diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-02-24 01:57:18 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2022-02-24 01:57:18 +0000 |
commit | b24b5b9049e889ee4eb39b565bcc8d48bd45ab48 (patch) | |
tree | 658ca4e6b41655f49463c85edbaeda48979c394c /lib/mesa/src/broadcom | |
parent | 57768bbb154c2879d34ec20e401b19472e77aaf7 (diff) |
Import Mesa 21.3.7
Diffstat (limited to 'lib/mesa/src/broadcom')
79 files changed, 15600 insertions, 8987 deletions
diff --git a/lib/mesa/src/broadcom/ci/deqp-v3d-rpi4-gles.toml b/lib/mesa/src/broadcom/ci/deqp-v3d-rpi4-gles.toml new file mode 100644 index 000000000..659a4ca9c --- /dev/null +++ b/lib/mesa/src/broadcom/ci/deqp-v3d-rpi4-gles.toml @@ -0,0 +1,49 @@ +[[deqp]] +deqp = "/deqp/modules/gles31/deqp-gles31" +caselists = [ "/deqp/mustpass/gles31-master.txt" ] +deqp_args = [ + "--deqp-gl-config-name=rgba8888d24s8ms0", + "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", + "--deqp-surface-width=256", + "--deqp-visibility=hidden", +] +version_check = "GL ES 3.1.*git" +renderer_check = "V3D" + +[[deqp]] +deqp = "/deqp/modules/gles3/deqp-gles3" +caselists = [ "/deqp/mustpass/gles3-master.txt" ] +deqp_args = [ + "--deqp-gl-config-name=rgba8888d24s8ms0", + "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", + "--deqp-surface-width=256", + "--deqp-visibility=hidden", +] + +[[deqp]] +deqp = "/deqp/modules/gles2/deqp-gles2" +caselists = [ "/deqp/mustpass/gles2-master.txt" ] +deqp_args = [ + "--deqp-gl-config-name=rgba8888d24s8ms0", + "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", + "--deqp-surface-width=256", + "--deqp-visibility=hidden", +] + +[[deqp]] +deqp = "/deqp/external/openglcts/modules/glcts" +caselists = [ + "/deqp/mustpass/gles31-khr-master.txt", + "/deqp/mustpass/gles3-khr-master.txt", + "/deqp/mustpass/gles2-khr-master.txt", +] +deqp_args = [ + "--deqp-gl-config-name=rgba8888d24s8ms0", + "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", + "--deqp-surface-width=256", + "--deqp-visibility=hidden", +] diff --git a/lib/mesa/src/broadcom/ci/deqp-v3dv-rpi4-fails.txt b/lib/mesa/src/broadcom/ci/deqp-v3dv-rpi4-fails.txt index 7a673b01f..6379afbe3 100644 --- a/lib/mesa/src/broadcom/ci/deqp-v3dv-rpi4-fails.txt +++ b/lib/mesa/src/broadcom/ci/deqp-v3dv-rpi4-fails.txt @@ -1,148 +1,5 @@ -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.general_general_linear_stripes_x,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.general_general_linear_stripes_y,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.general_general_linear_stripes_z,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.general_optimal_linear_stripes_x,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.general_optimal_linear_stripes_y,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.general_optimal_linear_stripes_z,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.optimal_general_linear_stripes_x,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.optimal_general_linear_stripes_y,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.optimal_general_linear_stripes_z,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.optimal_optimal_linear_stripes_x,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.optimal_optimal_linear_stripes_y,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.optimal_optimal_linear_stripes_z,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.general_general_linear_stripes_x,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.general_general_linear_stripes_y,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.general_general_linear_stripes_z,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.general_optimal_linear_stripes_x,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.general_optimal_linear_stripes_y,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.general_optimal_linear_stripes_z,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.optimal_general_linear_stripes_x,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.optimal_general_linear_stripes_y,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.optimal_general_linear_stripes_z,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.optimal_optimal_linear_stripes_x,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.optimal_optimal_linear_stripes_y,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.optimal_optimal_linear_stripes_z,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.general_general_linear_stripes_x,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.general_general_linear_stripes_y,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.general_general_linear_stripes_z,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.general_optimal_linear_stripes_x,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.general_optimal_linear_stripes_y,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.general_optimal_linear_stripes_z,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.optimal_general_linear_stripes_x,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.optimal_general_linear_stripes_y,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.optimal_general_linear_stripes_z,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.optimal_optimal_linear_stripes_x,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.optimal_optimal_linear_stripes_y,Fail -dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.optimal_optimal_linear_stripes_z,Fail -dEQP-VK.pipeline.logic_op.r16_uint.and,Crash -dEQP-VK.pipeline.logic_op.r16_uint.and_inverted,Crash -dEQP-VK.pipeline.logic_op.r16_uint.and_reverse,Crash -dEQP-VK.pipeline.logic_op.r16_uint.clear,Crash -dEQP-VK.pipeline.logic_op.r16_uint.copy_inverted,Crash -dEQP-VK.pipeline.logic_op.r16_uint.equivalent,Crash -dEQP-VK.pipeline.logic_op.r16_uint.invert,Crash -dEQP-VK.pipeline.logic_op.r16_uint.nand,Crash -dEQP-VK.pipeline.logic_op.r16_uint.no_op,Crash -dEQP-VK.pipeline.logic_op.r16_uint.nor,Crash -dEQP-VK.pipeline.logic_op.r16_uint.or,Crash -dEQP-VK.pipeline.logic_op.r16_uint.or_inverted,Crash -dEQP-VK.pipeline.logic_op.r16_uint.or_reverse,Crash -dEQP-VK.pipeline.logic_op.r16_uint.set,Crash -dEQP-VK.pipeline.logic_op.r16_uint.xor,Crash -dEQP-VK.pipeline.logic_op.r16g16_uint.and,Crash -dEQP-VK.pipeline.logic_op.r16g16_uint.and_inverted,Crash -dEQP-VK.pipeline.logic_op.r16g16_uint.and_reverse,Crash -dEQP-VK.pipeline.logic_op.r16g16_uint.clear,Crash -dEQP-VK.pipeline.logic_op.r16g16_uint.copy_inverted,Crash -dEQP-VK.pipeline.logic_op.r16g16_uint.equivalent,Crash -dEQP-VK.pipeline.logic_op.r16g16_uint.invert,Crash -dEQP-VK.pipeline.logic_op.r16g16_uint.nand,Crash -dEQP-VK.pipeline.logic_op.r16g16_uint.no_op,Crash -dEQP-VK.pipeline.logic_op.r16g16_uint.nor,Crash -dEQP-VK.pipeline.logic_op.r16g16_uint.or,Crash -dEQP-VK.pipeline.logic_op.r16g16_uint.or_inverted,Crash -dEQP-VK.pipeline.logic_op.r16g16_uint.or_reverse,Crash -dEQP-VK.pipeline.logic_op.r16g16_uint.set,Crash -dEQP-VK.pipeline.logic_op.r16g16_uint.xor,Crash -dEQP-VK.pipeline.logic_op.r16g16b16a16_uint.copy_inverted,Fail -dEQP-VK.pipeline.logic_op.r16g16b16a16_uint.equivalent,Fail -dEQP-VK.pipeline.logic_op.r16g16b16a16_uint.invert,Fail -dEQP-VK.pipeline.logic_op.r16g16b16a16_uint.nand,Fail -dEQP-VK.pipeline.logic_op.r16g16b16a16_uint.nor,Fail -dEQP-VK.pipeline.logic_op.r16g16b16a16_uint.or_inverted,Fail -dEQP-VK.pipeline.logic_op.r16g16b16a16_uint.or_reverse,Fail -dEQP-VK.pipeline.logic_op.r32_uint.and,Crash -dEQP-VK.pipeline.logic_op.r32_uint.and_inverted,Crash -dEQP-VK.pipeline.logic_op.r32_uint.and_reverse,Crash -dEQP-VK.pipeline.logic_op.r32_uint.clear,Crash -dEQP-VK.pipeline.logic_op.r32_uint.copy_inverted,Crash -dEQP-VK.pipeline.logic_op.r32_uint.equivalent,Crash -dEQP-VK.pipeline.logic_op.r32_uint.invert,Crash -dEQP-VK.pipeline.logic_op.r32_uint.nand,Crash -dEQP-VK.pipeline.logic_op.r32_uint.no_op,Crash -dEQP-VK.pipeline.logic_op.r32_uint.nor,Crash -dEQP-VK.pipeline.logic_op.r32_uint.or,Crash -dEQP-VK.pipeline.logic_op.r32_uint.or_inverted,Crash -dEQP-VK.pipeline.logic_op.r32_uint.or_reverse,Crash -dEQP-VK.pipeline.logic_op.r32_uint.set,Crash -dEQP-VK.pipeline.logic_op.r32_uint.xor,Crash -dEQP-VK.pipeline.logic_op.r32g32_uint.and,Crash -dEQP-VK.pipeline.logic_op.r32g32_uint.and_inverted,Crash -dEQP-VK.pipeline.logic_op.r32g32_uint.and_reverse,Crash -dEQP-VK.pipeline.logic_op.r32g32_uint.clear,Crash -dEQP-VK.pipeline.logic_op.r32g32_uint.copy_inverted,Crash -dEQP-VK.pipeline.logic_op.r32g32_uint.equivalent,Crash -dEQP-VK.pipeline.logic_op.r32g32_uint.invert,Crash -dEQP-VK.pipeline.logic_op.r32g32_uint.nand,Crash -dEQP-VK.pipeline.logic_op.r32g32_uint.no_op,Crash -dEQP-VK.pipeline.logic_op.r32g32_uint.nor,Crash -dEQP-VK.pipeline.logic_op.r32g32_uint.or,Crash -dEQP-VK.pipeline.logic_op.r32g32_uint.or_inverted,Crash -dEQP-VK.pipeline.logic_op.r32g32_uint.or_reverse,Crash -dEQP-VK.pipeline.logic_op.r32g32_uint.set,Crash -dEQP-VK.pipeline.logic_op.r32g32_uint.xor,Crash -dEQP-VK.pipeline.logic_op.r8_uint.and,Crash -dEQP-VK.pipeline.logic_op.r8_uint.and_inverted,Crash -dEQP-VK.pipeline.logic_op.r8_uint.and_reverse,Crash -dEQP-VK.pipeline.logic_op.r8_uint.clear,Crash -dEQP-VK.pipeline.logic_op.r8_uint.copy_inverted,Crash -dEQP-VK.pipeline.logic_op.r8_uint.equivalent,Crash -dEQP-VK.pipeline.logic_op.r8_uint.invert,Crash -dEQP-VK.pipeline.logic_op.r8_uint.nand,Crash -dEQP-VK.pipeline.logic_op.r8_uint.no_op,Crash -dEQP-VK.pipeline.logic_op.r8_uint.nor,Crash -dEQP-VK.pipeline.logic_op.r8_uint.or,Crash -dEQP-VK.pipeline.logic_op.r8_uint.or_inverted,Crash -dEQP-VK.pipeline.logic_op.r8_uint.or_reverse,Crash -dEQP-VK.pipeline.logic_op.r8_uint.set,Crash -dEQP-VK.pipeline.logic_op.r8_uint.xor,Crash -dEQP-VK.pipeline.logic_op.r8g8_uint.and,Crash -dEQP-VK.pipeline.logic_op.r8g8_uint.and_inverted,Crash -dEQP-VK.pipeline.logic_op.r8g8_uint.and_reverse,Crash -dEQP-VK.pipeline.logic_op.r8g8_uint.clear,Crash -dEQP-VK.pipeline.logic_op.r8g8_uint.copy_inverted,Crash -dEQP-VK.pipeline.logic_op.r8g8_uint.equivalent,Crash -dEQP-VK.pipeline.logic_op.r8g8_uint.invert,Crash -dEQP-VK.pipeline.logic_op.r8g8_uint.nand,Crash -dEQP-VK.pipeline.logic_op.r8g8_uint.no_op,Crash -dEQP-VK.pipeline.logic_op.r8g8_uint.nor,Crash -dEQP-VK.pipeline.logic_op.r8g8_uint.or,Crash -dEQP-VK.pipeline.logic_op.r8g8_uint.or_inverted,Crash -dEQP-VK.pipeline.logic_op.r8g8_uint.or_reverse,Crash -dEQP-VK.pipeline.logic_op.r8g8_uint.set,Crash -dEQP-VK.pipeline.logic_op.r8g8_uint.xor,Crash -dEQP-VK.pipeline.logic_op.r8g8b8a8_uint.copy_inverted,Fail -dEQP-VK.pipeline.logic_op.r8g8b8a8_uint.equivalent,Fail -dEQP-VK.pipeline.logic_op.r8g8b8a8_uint.invert,Fail -dEQP-VK.pipeline.logic_op.r8g8b8a8_uint.nand,Fail -dEQP-VK.pipeline.logic_op.r8g8b8a8_uint.nor,Fail -dEQP-VK.pipeline.logic_op.r8g8b8a8_uint.or_inverted,Fail -dEQP-VK.pipeline.logic_op.r8g8b8a8_uint.or_reverse,Fail -dEQP-VK.spirv_assembly.instruction.compute.vector_shuffle.vector_shuffle,Fail -dEQP-VK.synchronization.basic.binary_semaphore.chain,Fail -dEQP-VK.ycbcr.query.levels.geometry.r8g8b8a8_unorm,Crash -dEQP-VK.ycbcr.query.levels.tess_control.r8g8b8a8_unorm,Crash -dEQP-VK.ycbcr.query.levels.tess_eval.r8g8b8a8_unorm,Crash -dEQP-VK.ycbcr.query.size_lod.geometry.r8g8b8a8_unorm,Crash -dEQP-VK.ycbcr.query.size_lod.tess_control.r8g8b8a8_unorm,Crash -dEQP-VK.ycbcr.query.size_lod.tess_eval.r8g8b8a8_unorm,Crash +# This seems to fail due to the test error threshold being insufficient +dEQP-VK.geometry.input.basic_primitive.line_strip_adjacency,Fail + +# CTS bug; fix submitted +dEQP-VK.spirv_assembly.instruction.graphics.variable_pointers.graphics.writes_single_buffer_geom,Fail diff --git a/lib/mesa/src/broadcom/ci/deqp-vc4-rpi3-gles.toml b/lib/mesa/src/broadcom/ci/deqp-vc4-rpi3-gles.toml new file mode 100644 index 000000000..218cb1835 --- /dev/null +++ b/lib/mesa/src/broadcom/ci/deqp-vc4-rpi3-gles.toml @@ -0,0 +1,25 @@ +[[deqp]] +deqp = "/deqp/modules/gles2/deqp-gles2" +caselists = [ "/deqp/mustpass/gles2-master.txt" ] +tests_per_group = 250 +deqp_args = [ + "--deqp-gl-config-name=rgba8888d24s8ms0", + "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", + "--deqp-surface-width=256", + "--deqp-visibility=hidden", +] +version_check = "GL ES 2.0.*git" +renderer_check = "VC4" + +[[deqp]] +deqp = "/deqp/external/openglcts/modules/glcts" +caselists = [ "/deqp/mustpass/gles2-khr-master.txt" ] +tests_per_group = 250 +deqp_args = [ + "--deqp-gl-config-name=rgba8888d24s8ms0", + "--deqp-surface-height=256", + "--deqp-surface-type=pbuffer", + "--deqp-surface-width=256", + "--deqp-visibility=hidden", +] diff --git a/lib/mesa/src/broadcom/ci/gitlab-ci.yml b/lib/mesa/src/broadcom/ci/gitlab-ci.yml index c3d28777b..4f70ef1e1 100644 --- a/lib/mesa/src/broadcom/ci/gitlab-ci.yml +++ b/lib/mesa/src/broadcom/ci/gitlab-ci.yml @@ -2,32 +2,38 @@ extends: - .baremetal-test-armhf - .vc4-rules - - .use-arm_test + - .use-debian/arm_test variables: BM_BOOTFS: /boot/raspberrypi_armhf - BM_KERNEL_MODULES: vc4 BM_ROOTFS: /rootfs-armhf GPU_VERSION: vc4-rpi3 - DEQP_EXPECTED_RENDERER: VC4 + HWCI_KERNEL_MODULES: vc4 + FLAKES_CHANNEL: "#videocore-ci" script: - ./install/bare-metal/poe-powered.sh needs: - - job: arm_test + - job: debian/arm_test artifacts: false - - meson-armhf + - debian-armhf tags: - igalia-rpi3 -vc4-rpi3-gles2:armhf: +vc4-rpi3-gles:armhf: extends: - .vc4-rpi3-test:armhf - parallel: 4 + parallel: 2 variables: - BARE_METAL_TEST_SCRIPT: "/install/deqp-runner.sh" - DEQP_VER: gles2 - # The vc4s are so slow that it takes about a minute to get through the - # default 500 tests in a group, triggering the serial watchdog. + HWCI_TEST_SCRIPT: "/install/deqp-runner.sh" + DEQP_SUITE: vc4-rpi3-gles + +vc4-rpi3-egl:armhf: + extends: + - .vc4-rpi3-test:armhf + variables: + HWCI_TEST_SCRIPT: "/install/deqp-runner.sh" + HWCI_START_XORG: 1 DEQP_RUNNER_OPTIONS: "--tests-per-group 250" + DEQP_VER: egl .vc4-rpi3-piglit:armhf: extends: @@ -35,9 +41,9 @@ vc4-rpi3-gles2:armhf: - .vc4-rpi3-test:armhf - .test-manual variables: - BARE_METAL_TEST_SCRIPT: "/install/piglit/piglit-runner.sh" + HWCI_TEST_SCRIPT: "/install/piglit/piglit-runner.sh" BM_POE_TIMEOUT: 180 - BM_START_XORG: 1 + HWCI_START_XORG: 1 PIGLIT_PLATFORM: mixed_glx_egl vc4-rpi3-piglit-quick_gl:armhf: @@ -60,89 +66,72 @@ vc4-rpi3-piglit-quick_shader:armhf: extends: - .baremetal-test-armhf - .v3d-rules - - .use-arm_test + - .use-debian/arm_test variables: - BARE_METAL_TEST_SCRIPT: "/install/deqp-runner.sh" + HWCI_TEST_SCRIPT: "/install/deqp-runner.sh" BM_BOOTFS: /boot/raspberrypi_armhf - BM_KERNEL_MODULES: v3d,vc4 BM_POE_TIMEOUT: 300 BM_ROOTFS: /rootfs-armhf - DEQP_EXPECTED_RENDERER: V3D + FLAKES_CHANNEL: "#videocore-ci" GPU_VERSION: v3d-rpi4 + HWCI_KERNEL_MODULES: v3d,vc4 script: - ./install/bare-metal/poe-powered.sh needs: - - arm_test - - meson-armhf + - debian/arm_test + - debian-armhf tags: - igalia-rpi4 -v3d-rpi4-gles31:armhf: - extends: - - .v3d-rpi4-test:armhf - parallel: 2 - variables: - DEQP_VER: gles31 - -v3d-rpi4-gles3:armhf: +v3d-rpi4-gles:armhf: extends: - .v3d-rpi4-test:armhf - parallel: 4 + parallel: 8 variables: - DEQP_VER: gles3 + DEQP_SUITE: v3d-rpi4-gles -v3d-rpi4-gles2:armhf: +v3d-rpi4-egl:armhf: extends: - .v3d-rpi4-test:armhf variables: - DEQP_VER: gles2 + HWCI_START_XORG: 1 + DEQP_VER: egl -.v3d-rpi4-piglit:armhf: +v3d-rpi4-piglit:armhf: extends: - .piglit-test - .v3d-rpi4-test:armhf - - .test-manual + parallel: 4 variables: - BARE_METAL_TEST_SCRIPT: "/install/piglit/piglit-runner.sh" - BM_START_XORG: 1 + HWCI_TEST_SCRIPT: "/install/piglit/piglit-runner.sh" + HWCI_START_XORG: 1 PIGLIT_PLATFORM: mixed_glx_egl + PIGLIT_PROFILES: all -v3d-rpi4-piglit-quick_gl:armhf: +v3dv-rpi4-vk:arm64: extends: - - .v3d-rpi4-piglit:armhf - parallel: 2 - variables: - PIGLIT_PROFILES: quick_gl - -v3d-rpi4-piglit-quick_shader:armhf: - extends: - - .v3d-rpi4-piglit:armhf - variables: - PIGLIT_PROFILES: quick_shader - -v3dv-rpi4-vk:armhf: - extends: - - .baremetal-test-armhf - - .use-arm_test + - .baremetal-test + - .use-debian/arm_test - .v3dv-rules - parallel: 6 + parallel: 8 variables: - BARE_METAL_TEST_SCRIPT: "/install/deqp-runner.sh" - BM_BOOTFS: /boot/raspberrypi_armhf - BM_KERNEL_MODULES: v3d,vc4 + HWCI_TEST_SCRIPT: "/install/deqp-runner.sh" + BM_BOOTFS: /boot/raspberrypi_arm64 BM_POE_TIMEOUT: 300 - BM_ROOTFS: /rootfs-armhf - CPU: arm7hlf - DEQP_EXPECTED_RENDERER: "V3D 4.2" - DEQP_FRACTION: 7 + BM_ROOTFS: /rootfs-arm64 + DEQP_EXPECTED_RENDERER: "V3D.4.2" + DEQP_FRACTION: 5 DEQP_VER: vk + FLAKES_CHANNEL: "#videocore-ci" GPU_VERSION: v3dv-rpi4 - VK_CPU: arm7hlf + HWCI_KERNEL_MODULES: v3d,vc4 + MINIO_ARTIFACT_NAME: mesa-arm64 VK_DRIVER: broadcom script: - ./install/bare-metal/poe-powered.sh needs: - - arm_test - - meson-armhf + - debian/arm_test + - job: debian-arm64 + artifacts: false tags: - igalia-rpi4 diff --git a/lib/mesa/src/broadcom/ci/v3d-rpi4-fails.txt b/lib/mesa/src/broadcom/ci/v3d-rpi4-fails.txt new file mode 100644 index 000000000..c0d90c2d2 --- /dev/null +++ b/lib/mesa/src/broadcom/ci/v3d-rpi4-fails.txt @@ -0,0 +1,330 @@ +glx@glx-make-current,Crash +glx@glx-multi-window-single-context,Fail +glx@glx-multithread-buffer,Fail +glx@glx-query-drawable-glx_fbconfig_id-window,Fail +glx@glx-swap-pixmap-bad,Fail +glx@glx-visuals-depth -pixmap,Crash +glx@glx-visuals-stencil -pixmap,Crash +glx@glx_arb_create_context_es2_profile@invalid opengl es version,Fail +glx@glx_arb_create_context_no_error@no error,Fail +glx@glx_ext_import_context@free context,Fail +glx@glx_ext_import_context@get context id,Fail +glx@glx_ext_import_context@get current display,Fail +glx@glx_ext_import_context@import context- multi process,Fail +glx@glx_ext_import_context@import context- single process,Fail +glx@glx_ext_import_context@imported context has same context id,Fail +glx@glx_ext_import_context@make current- multi process,Fail +glx@glx_ext_import_context@make current- single process,Fail +glx@glx_ext_import_context@query context info,Fail +shaders@glsl-bug-110796,Fail +spec@!opengl 1.0@gl-1.0-bitmap-heart-dance,Fail +spec@!opengl 1.0@gl-1.0-dlist-bitmap,Fail +spec@!opengl 1.0@gl-1.0-edgeflag,Fail +spec@!opengl 1.0@gl-1.0-edgeflag-const,Fail +spec@!opengl 1.0@gl-1.0-edgeflag-quads,Fail +spec@!opengl 1.0@gl-1.0-no-op-paths,Fail +spec@!opengl 1.0@gl-1.0-spot-light,Fail +spec@!opengl 1.0@gl-1.0-user-clip-all-planes,Fail +spec@!opengl 1.1@depthstencil-default_fb-drawpixels-24_8 samples=2,Fail +spec@!opengl 1.1@depthstencil-default_fb-drawpixels-32f_24_8_rev samples=2,Fail +spec@!opengl 1.1@depthstencil-default_fb-drawpixels-32f_24_8_rev samples=4,Fail +spec@!opengl 1.1@depthstencil-default_fb-drawpixels-float-and-ushort samples=2,Fail +spec@!opengl 1.1@getteximage-depth,Fail +spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D_ARRAY-GL_DEPTH_COMPONENT16,Fail +spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D_ARRAY-GL_DEPTH_COMPONENT24,Fail +spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D_ARRAY-GL_DEPTH_COMPONENT32,Fail +spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D_ARRAY-GL_DEPTH_COMPONENT,Fail +spec@!opengl 1.1@getteximage-formats,Fail +spec@!opengl 1.1@linestipple,Fail +spec@!opengl 1.1@linestipple@Factor 2x,Fail +spec@!opengl 1.1@linestipple@Factor 3x,Fail +spec@!opengl 1.1@linestipple@Line loop,Fail +spec@!opengl 1.1@linestipple@Line strip,Fail +spec@!opengl 1.1@linestipple@Restarting lines within a single Begin-End block,Fail +spec@!opengl 1.1@point-line-no-cull,Fail +spec@!opengl 1.1@polygon-mode,Fail +spec@!opengl 1.1@polygon-mode-offset,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 1: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 1: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on bottom edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on left edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on bottom edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on left edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 5: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 5: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail +spec@!opengl 1.1@texwrap formats bordercolor,Fail +spec@!opengl 1.1@texwrap formats bordercolor@GL_INTENSITY12- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor@GL_INTENSITY16- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor@GL_LUMINANCE12- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor@GL_LUMINANCE12_ALPHA12- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor@GL_LUMINANCE12_ALPHA4- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor@GL_LUMINANCE16- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor@GL_LUMINANCE16_ALPHA16- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor@GL_RGB12- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor@GL_RGB16- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor@GL_RGBA12- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor@GL_RGBA16- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_INTENSITY12- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_INTENSITY16- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_LUMINANCE12- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_LUMINANCE12_ALPHA12- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_LUMINANCE12_ALPHA4- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_LUMINANCE16- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_LUMINANCE16_ALPHA16- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB12- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB16- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGBA12- swizzled- border color only,Fail +spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGBA16- swizzled- border color only,Fail +spec@!opengl 1.1@windowoverlap,Fail +spec@!opengl 1.4@gl-1.4-polygon-offset,Fail +spec@!opengl 2.0@gl-2.0-edgeflag,Fail +spec@!opengl 2.0@gl-2.0-edgeflag-immediate,Fail +spec@!opengl 2.0@max-samplers,Fail +spec@!opengl 2.0@max-samplers border,Fail +spec@!opengl 2.1@pbo,Fail +spec@!opengl 2.1@pbo@test_polygon_stip,Fail +spec@!opengl 2.1@polygon-stipple-fs,Fail +spec@!opengl es 3.0@gles-3.0-transform-feedback-uniform-buffer-object,Fail +spec@arb_color_buffer_float@gl_rgba32f-render,Fail +spec@arb_color_buffer_float@gl_rgba32f-render-fog,Fail +spec@arb_color_buffer_float@gl_rgba32f-render-sanity,Fail +spec@arb_color_buffer_float@gl_rgba32f-render-sanity-fog,Fail +spec@arb_compute_shader@minmax,Fail +spec@arb_copy_buffer@targets,Fail +spec@arb_depth_buffer_float@fbo-generatemipmap-formats,Fail +spec@arb_depth_buffer_float@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT32F,Fail +spec@arb_depth_buffer_float@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT32F NPOT,Fail +spec@arb_depth_buffer_float@texwrap formats bordercolor,Fail +spec@arb_depth_buffer_float@texwrap formats bordercolor@GL_DEPTH32F_STENCIL8- border color only,Fail +spec@arb_depth_buffer_float@texwrap formats bordercolor@GL_DEPTH_COMPONENT32F- border color only,Fail +spec@arb_depth_buffer_float@texwrap formats bordercolor-swizzled,Fail +spec@arb_depth_buffer_float@texwrap formats bordercolor-swizzled@GL_DEPTH32F_STENCIL8- swizzled- border color only,Fail +spec@arb_depth_buffer_float@texwrap formats bordercolor-swizzled@GL_DEPTH_COMPONENT32F- swizzled- border color only,Fail +spec@arb_depth_buffer_float@texwrap formats,Fail +spec@arb_depth_buffer_float@texwrap formats@GL_DEPTH32F_STENCIL8- NPOT,Fail +spec@arb_depth_buffer_float@texwrap formats@GL_DEPTH_COMPONENT32F- NPOT,Fail +spec@arb_depth_texture@fbo-generatemipmap-formats,Fail +spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT16,Fail +spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT16 NPOT,Fail +spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT24,Fail +spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT24 NPOT,Fail +spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT32,Fail +spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT32 NPOT,Fail +spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT,Fail +spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT NPOT,Fail +spec@arb_depth_texture@texwrap formats bordercolor,Fail +spec@arb_depth_texture@texwrap formats bordercolor@GL_DEPTH_COMPONENT16- border color only,Fail +spec@arb_depth_texture@texwrap formats bordercolor@GL_DEPTH_COMPONENT24- border color only,Fail +spec@arb_depth_texture@texwrap formats bordercolor@GL_DEPTH_COMPONENT32- border color only,Fail +spec@arb_depth_texture@texwrap formats bordercolor-swizzled,Fail +spec@arb_depth_texture@texwrap formats bordercolor-swizzled@GL_DEPTH_COMPONENT16- swizzled- border color only,Fail +spec@arb_depth_texture@texwrap formats bordercolor-swizzled@GL_DEPTH_COMPONENT24- swizzled- border color only,Fail +spec@arb_depth_texture@texwrap formats bordercolor-swizzled@GL_DEPTH_COMPONENT32- swizzled- border color only,Fail +spec@arb_depth_texture@texwrap formats,Fail +spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT16- NPOT,Fail +spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT24- NPOT,Fail +spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT32- NPOT,Fail +spec@arb_framebuffer_object@fbo-drawbuffers-none use_frag_out,Fail +spec@arb_pixel_buffer_object@pbo-getteximage,Fail +spec@arb_pixel_buffer_object@texsubimage array pbo,Fail +spec@arb_point_sprite@arb_point_sprite-checkerboard,Fail +spec@arb_point_sprite@arb_point_sprite-mipmap,Fail +spec@arb_shader_storage_buffer_object@compiler@atomicmin-swizzle.vert,Fail +spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgrad,Fail +spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail +spec@arb_texture_float@fbo-blending-formats,Fail +spec@arb_texture_float@fbo-blending-formats@GL_ALPHA32F_ARB,Fail +spec@arb_texture_float@fbo-blending-formats@GL_INTENSITY16F_ARB,Fail +spec@arb_texture_float@fbo-blending-formats@GL_INTENSITY32F_ARB,Fail +spec@arb_texture_float@fbo-blending-formats@GL_LUMINANCE16F_ARB,Fail +spec@arb_texture_float@fbo-blending-formats@GL_LUMINANCE32F_ARB,Fail +spec@arb_texture_float@fbo-blending-formats@GL_LUMINANCE_ALPHA32F_ARB,Fail +spec@arb_texture_float@fbo-blending-formats@GL_RGB16F,Fail +spec@arb_texture_float@fbo-blending-formats@GL_RGB32F,Fail +spec@arb_texture_float@fbo-blending-formats@GL_RGBA32F,Fail +spec@arb_texture_float@texwrap formats bordercolor,Fail +spec@arb_texture_float@texwrap formats bordercolor@GL_ALPHA32F_ARB- border color only,Fail +spec@arb_texture_float@texwrap formats bordercolor@GL_INTENSITY32F_ARB- border color only,Fail +spec@arb_texture_float@texwrap formats bordercolor@GL_LUMINANCE32F_ARB- border color only,Fail +spec@arb_texture_float@texwrap formats bordercolor@GL_LUMINANCE_ALPHA32F_ARB- border color only,Fail +spec@arb_texture_float@texwrap formats bordercolor@GL_RGB32F- border color only,Fail +spec@arb_texture_float@texwrap formats bordercolor@GL_RGBA32F- border color only,Fail +spec@arb_texture_float@texwrap formats bordercolor-swizzled,Fail +spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_ALPHA32F_ARB- swizzled- border color only,Fail +spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_INTENSITY32F_ARB- swizzled- border color only,Fail +spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_LUMINANCE32F_ARB- swizzled- border color only,Fail +spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_LUMINANCE_ALPHA32F_ARB- swizzled- border color only,Fail +spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGB32F- swizzled- border color only,Fail +spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGBA32F- swizzled- border color only,Fail +spec@arb_texture_rectangle@1-1-linear-texture,Fail +spec@arb_texture_rg@fbo-blending-formats-float,Fail +spec@arb_texture_rg@fbo-blending-formats-float@GL_R32F,Fail +spec@arb_texture_rg@fbo-blending-formats-float@GL_RG32F,Fail +spec@arb_texture_rg@texwrap formats bordercolor,Fail +spec@arb_texture_rg@texwrap formats bordercolor@GL_R16- border color only,Fail +spec@arb_texture_rg@texwrap formats bordercolor@GL_RG16- border color only,Fail +spec@arb_texture_rg@texwrap formats bordercolor-swizzled,Fail +spec@arb_texture_rg@texwrap formats bordercolor-swizzled@GL_R16- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats bordercolor-swizzled@GL_RG16- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats-float bordercolor,Fail +spec@arb_texture_rg@texwrap formats-float bordercolor@GL_R32F- border color only,Fail +spec@arb_texture_rg@texwrap formats-float bordercolor@GL_RG32F- border color only,Fail +spec@arb_texture_rg@texwrap formats-float bordercolor-swizzled,Fail +spec@arb_texture_rg@texwrap formats-float bordercolor-swizzled@GL_R32F- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats-float bordercolor-swizzled@GL_RG32F- swizzled- border color only,Fail +spec@arb_texture_rg@texwrap formats-float,Fail +spec@arb_texture_rg@texwrap formats-float@GL_R32F- NPOT,Fail +spec@arb_texture_rg@texwrap formats-float@GL_RG32F- NPOT,Fail +spec@arb_transform_feedback2@change objects while paused (gles3),Fail +spec@egl 1.4@egl-copy-buffers,Crash +spec@egl 1.4@eglterminate then unbind context,Fail +spec@egl_ext_protected_content@conformance,Fail +spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail +spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_rgba,Fail +spec@egl_khr_surfaceless_context@viewport,Fail +spec@egl_mesa_configless_context@basic,Fail +spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail +spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail +spec@ext_framebuffer_multisample@interpolation 2 centroid-edges,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-edges,Fail +spec@ext_framebuffer_object@fbo-blending-format-quirks,Fail +spec@ext_framebuffer_object@getteximage-formats init-by-clear-and-render,Fail +spec@ext_framebuffer_object@getteximage-formats init-by-rendering,Fail +spec@ext_gpu_shader4@execution@texelfetch@fs-texelfetch-isampler1darray,Fail +spec@ext_gpu_shader4@execution@texelfetch@fs-texelfetch-sampler1darray,Fail +spec@ext_gpu_shader4@execution@texelfetch@fs-texelfetch-usampler1darray,Fail +spec@ext_gpu_shader4@execution@texelfetchoffset@fs-texelfetch-isampler1darray,Fail +spec@ext_gpu_shader4@execution@texelfetchoffset@fs-texelfetch-sampler1darray,Fail +spec@ext_gpu_shader4@execution@texelfetchoffset@fs-texelfetch-usampler1darray,Fail +spec@ext_gpu_shader4@execution@texelfetchoffset@vs-texelfetch-isampler1darray,Fail +spec@ext_gpu_shader4@execution@texelfetchoffset@vs-texelfetch-sampler1darray,Fail +spec@ext_gpu_shader4@execution@texelfetchoffset@vs-texelfetch-usampler1darray,Fail +spec@ext_gpu_shader4@execution@texelfetch@vs-texelfetch-isampler1darray,Fail +spec@ext_gpu_shader4@execution@texelfetch@vs-texelfetch-sampler1darray,Fail +spec@ext_gpu_shader4@execution@texelfetch@vs-texelfetch-usampler1darray,Fail +spec@ext_gpu_shader4@tex-miplevel-selection gpu4texture() 1darray,Fail +spec@ext_gpu_shader4@tex-miplevel-selection gpu4texture() 1darrayshadow,Fail +spec@ext_gpu_shader4@tex-miplevel-selection gpu4texture(bias) 1darray,Fail +spec@ext_gpu_shader4@tex-miplevel-selection gpu4texture(bias) 1darrayshadow,Fail +spec@ext_gpu_shader4@tex-miplevel-selection gpu4texture() cubeshadow,Fail +spec@ext_gpu_shader4@tex-miplevel-selection gpu4texturegrad 1darray,Fail +spec@ext_gpu_shader4@tex-miplevel-selection gpu4texturegrad 1darrayshadow,Fail +spec@ext_gpu_shader4@tex-miplevel-selection gpu4texturegradoffset 1darray,Fail +spec@ext_gpu_shader4@tex-miplevel-selection gpu4texturegradoffset 1darrayshadow,Fail +spec@ext_gpu_shader4@tex-miplevel-selection gpu4texturelod 1darray,Fail +spec@ext_gpu_shader4@tex-miplevel-selection gpu4texturelod 1darrayshadow,Fail +spec@ext_gpu_shader4@tex-miplevel-selection gpu4texturelodoffset 1darray,Fail +spec@ext_gpu_shader4@tex-miplevel-selection gpu4texturelodoffset 1darrayshadow,Fail +spec@ext_gpu_shader4@tex-miplevel-selection gpu4textureoffset 1darray,Fail +spec@ext_gpu_shader4@tex-miplevel-selection gpu4textureoffset 1darrayshadow,Fail +spec@ext_packed_depth_stencil@texwrap formats bordercolor,Fail +spec@ext_packed_depth_stencil@texwrap formats bordercolor@GL_DEPTH24_STENCIL8- border color only,Fail +spec@ext_packed_depth_stencil@texwrap formats bordercolor-swizzled,Fail +spec@ext_packed_depth_stencil@texwrap formats bordercolor-swizzled@GL_DEPTH24_STENCIL8- swizzled- border color only,Fail +spec@ext_packed_depth_stencil@texwrap formats,Fail +spec@ext_packed_depth_stencil@texwrap formats@GL_DEPTH24_STENCIL8- NPOT,Fail +spec@ext_packed_float@query-rgba-signed-components,Fail +spec@ext_texture_array@array-texture,Fail +spec@ext_texture_array@fbo-generatemipmap-array rgb9_e5,Fail +spec@ext_texture_array@fbo-generatemipmap-array,Fail +spec@ext_texture_array@texsubimage array,Fail +spec@ext_texture_integer@getteximage-clamping gl_arb_texture_rg,Fail +spec@ext_texture_integer@getteximage-clamping,Fail +spec@ext_texture_lod_bias@lodbias,Fail +spec@ext_texture_snorm@texwrap formats bordercolor,Fail +spec@ext_texture_snorm@texwrap formats bordercolor@GL_ALPHA16_SNORM- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor@GL_INTENSITY16_SNORM- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor@GL_LUMINANCE16_ALPHA16_SNORM- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor@GL_LUMINANCE16_SNORM- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor@GL_R16_SNORM- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor@GL_RG16_SNORM- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor@GL_RGB16_SNORM- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor@GL_RGBA16_SNORM- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor-swizzled,Fail +spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_ALPHA16_SNORM- swizzled- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_INTENSITY16_SNORM- swizzled- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_LUMINANCE16_ALPHA16_SNORM- swizzled- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_LUMINANCE16_SNORM- swizzled- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_R16_SNORM- swizzled- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_RG16_SNORM- swizzled- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_RGB16_SNORM- swizzled- border color only,Fail +spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_RGBA16_SNORM- swizzled- border color only,Fail +spec@arb_texture_storage@texture-storage@cube array texture,Fail +spec@glsl-1.10@execution@glsl-fs-inline-explosion,Crash +spec@glsl-1.10@execution@glsl-vs-inline-explosion,Crash +spec@glsl-1.20@compiler@invalid-vec4-array-to-vec3-array-conversion.vert,Fail +spec@glsl-1.20@execution@clipping@vs-clip-vertex-primitives,Fail +spec@glsl-1.20@execution@fs-underflow-mul-compare-zero,Fail +spec@intel_performance_query@intel_performance_query-issue_2235,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail +spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail +spec@nv_image_formats@compiler@declaration-disallow-r16-snorm-2d-array.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-r16-snorm-2d-array.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-r16-snorm-2d.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-r16-snorm-2d.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-r16-snorm-3d.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-r16-snorm-3d.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-r16-snorm-cube.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-r16-snorm-cube.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-r16-unorm-2d-array.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-r16-unorm-2d-array.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-r16-unorm-2d.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-r16-unorm-2d.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-r16-unorm-3d.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-r16-unorm-3d.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-r16-unorm-cube.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-r16-unorm-cube.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-rg16-snorm-2d-array.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-rg16-snorm-2d-array.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-rg16-snorm-2d.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-rg16-snorm-2d.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-rg16-snorm-3d.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-rg16-snorm-3d.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-rg16-snorm-cube.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-rg16-snorm-cube.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-rg16-unorm-2d-array.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-rg16-unorm-2d-array.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-rg16-unorm-2d.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-rg16-unorm-2d.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-rg16-unorm-3d.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-rg16-unorm-3d.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-rg16-unorm-cube.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-rg16-unorm-cube.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-rgba16-snorm-2d-array.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-rgba16-snorm-2d-array.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-rgba16-snorm-2d.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-rgba16-snorm-2d.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-rgba16-snorm-3d.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-rgba16-snorm-3d.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-rgba16-snorm-cube.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-rgba16-snorm-cube.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-rgba16-unorm-2d-array.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-rgba16-unorm-2d-array.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-rgba16-unorm-2d.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-rgba16-unorm-2d.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-rgba16-unorm-3d.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-rgba16-unorm-3d.vert,Fail +spec@nv_image_formats@compiler@declaration-disallow-rgba16-unorm-cube.frag,Fail +spec@nv_image_formats@compiler@declaration-disallow-rgba16-unorm-cube.vert,Fail +spec@nv_read_depth@read_depth_gles3,Fail +spec@oes_egl_image_external_essl3@oes_egl_image_external_essl3,Crash +spec@oes_shader_io_blocks@compiler@layout-location-aliasing.vert,Fail diff --git a/lib/mesa/src/broadcom/ci/v3d-rpi4-flakes.txt b/lib/mesa/src/broadcom/ci/v3d-rpi4-flakes.txt new file mode 100644 index 000000000..a17f2c79c --- /dev/null +++ b/lib/mesa/src/broadcom/ci/v3d-rpi4-flakes.txt @@ -0,0 +1,11 @@ +dEQP-GLES31.functional.compute.shared_var.basic_type.ivec3_highp +dEQP-GLES31.functional.ssbo.layout.single_basic_type.packed.highp_mat2 +KHR-GLES31.core.shader_image_load_store.basic-glsl-earlyFragTests + +glx@glx_arb_sync_control@swapbuffersmsc-divisor-zero +glx@glx_arb_sync_control@waitformsc +spec@!opengl 1.1@depthstencil-default_fb-drawpixels-24_8 samples=4 +spec@!opengl 1.1@depthstencil-default_fb-drawpixels-32f_24_8_rev samples=2 +spec@!opengl 1.1@depthstencil-default_fb-drawpixels-float-and-ushort samples=4 +spec@arb_occlusion_query@occlusion_query_order +spec@egl_chromium_sync_control@conformance diff --git a/lib/mesa/src/broadcom/ci/v3d-rpi4-skips.txt b/lib/mesa/src/broadcom/ci/v3d-rpi4-skips.txt new file mode 100644 index 000000000..e6b1076a5 --- /dev/null +++ b/lib/mesa/src/broadcom/ci/v3d-rpi4-skips.txt @@ -0,0 +1,40 @@ +# Slow tests (> 1 minute to run) +spec@!opengl 1.1@streaming-texture-leak +spec@!opengl 1.2@tex3d-maxsize +spec@ext_texture_env_combine@texture-env-combine +spec@glsl-1.10@execution@loops@glsl-fs-unroll-explosion +spec@glsl-1.10@execution@loops@glsl-vs-unroll-explosion +spec@!opengl 1.0@gl-1.0-blend-func + +# Extensions not supported +spec@arb_gpu_shader_fp64.* +spec@arb_gpu_shader_gpu5.* +spec@arb_gpu_shader_int64.* +spec@arb_tessellation_shader.* +spec@arb_texture_cube_map.* +spec@glsl-1.30.* +spec@glsl-1.40.* +spec@glsl-1.50.* +spec@glsl-3.* +spec@glsl-4.* +spec@glsl-es-3.20.* +# Slow tests (> 1 minute to run) +spec@!opengl 1.1@streaming-texture-leak +spec@!opengl 1.2@tex3d-maxsize +spec@ext_texture_env_combine@texture-env-combine +spec@glsl-1.10@execution@loops@glsl-fs-unroll-explosion +spec@glsl-1.10@execution@loops@glsl-vs-unroll-explosion +spec@!opengl 1.0@gl-1.0-blend-func + +# Extensions not supported +spec@arb_gpu_shader_fp64.* +spec@arb_gpu_shader_gpu5.* +spec@arb_gpu_shader_int64.* +spec@arb_tessellation_shader.* +spec@arb_texture_cube_map.* +spec@glsl-1.30.* +spec@glsl-1.40.* +spec@glsl-1.50.* +spec@glsl-3.* +spec@glsl-4.* +spec@glsl-es-3.20.* diff --git a/lib/mesa/src/broadcom/ci/v3dv-rpi4-flakes.txt b/lib/mesa/src/broadcom/ci/v3dv-rpi4-flakes.txt new file mode 100644 index 000000000..0d22f002d --- /dev/null +++ b/lib/mesa/src/broadcom/ci/v3dv-rpi4-flakes.txt @@ -0,0 +1,5 @@ +dEQP-VK.api.external.fence.opaque_fd.reset_permanent +dEQP-VK.api.external.fence.opaque_fd.reset_temporary +dEQP-VK.api.external.fence.opaque_fd.signal_export_import_wait_permanent +dEQP-VK.ssbo.layout.instance_array_basic_type.std430.uvec4 +dEQP-VK.wsi.display.get_display_plane_capabilities diff --git a/lib/mesa/src/broadcom/ci/v3dv-rpi4-skips.txt b/lib/mesa/src/broadcom/ci/v3dv-rpi4-skips.txt new file mode 100644 index 000000000..bf6a82c19 --- /dev/null +++ b/lib/mesa/src/broadcom/ci/v3dv-rpi4-skips.txt @@ -0,0 +1,21 @@ +# Broadcom waivers +dEQP-VK.draw.inverted_depth_ranges.nodepthclamp_deltazero +dEQP-VK.rasterization.depth_bias.d32_sfloat + +# Timeout tests (> 1 minute to run) +dEQP-VK.api.object_management.max_concurrent.query_pool +dEQP-VK.graphicsfuzz.spv-stable-maze-flatten-copy-composite +dEQP-VK.memory.mapping.dedicated_alloc.buffer.full.variable.implicit_unmap +dEQP-VK.memory.mapping.dedicated_alloc.image.full.variable.implicit_unmap +dEQP-VK.memory.mapping.suballocation.full.variable.implicit_unmap +dEQP-VK.spirv_assembly.instruction.graphics.spirv_ids_abuse.lots_ids_geom +dEQP-VK.spirv_assembly.instruction.graphics.spirv_ids_abuse.lots_ids_vert +dEQP-VK.ssbo.layout.random.all_shared_buffer.5 +dEQP-VK.ssbo.layout.random.arrays_of_arrays.13 +dEQP-VK.ssbo.layout.random.nested_structs_arrays.0 +dEQP-VK.texture.explicit_lod.2d.sizes.128x128_linear_linear_mipmap_linear_clamp +dEQP-VK.texture.explicit_lod.2d.sizes.128x128_linear_linear_mipmap_linear_repeat +dEQP-VK.texture.explicit_lod.2d.sizes.128x128_nearest_linear_mipmap_linear_clamp +dEQP-VK.texture.explicit_lod.2d.sizes.128x128_nearest_linear_mipmap_linear_repeat +dEQP-VK.ubo.random.all_out_of_order_offsets.45 +dEQP-VK.ubo.random.all_shared_buffer.48 diff --git a/lib/mesa/src/broadcom/ci/vc4-rpi3-fails.txt b/lib/mesa/src/broadcom/ci/vc4-rpi3-fails.txt new file mode 100644 index 000000000..d0833cd4f --- /dev/null +++ b/lib/mesa/src/broadcom/ci/vc4-rpi3-fails.txt @@ -0,0 +1,1611 @@ +KHR-GLES2.core.internalformat.texture2d.depth_component_unsigned_int_depth_component16,Fail +KHR-GLES2.core.internalformat.texture2d.depth_component_unsigned_int_depth_component24,Fail +KHR-GLES2.core.internalformat.texture2d.depth_component_unsigned_short_depth_component16,Fail + +# https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/3133 +KHR-GLES2.texture_3d.copy_sub_image.negative,Fail +KHR-GLES2.texture_3d.copy_sub_image.rgba,Fail + +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_clamp_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_clamp_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_clamp_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_clamp_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_clamp_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_clamp_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_clamp_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_clamp_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_clamp_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_mirror_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_mirror_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_mirror_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_mirror_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_mirror_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_mirror_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_mirror_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_mirror_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_mirror_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_repeat_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_repeat_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_repeat_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_repeat_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_repeat_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_repeat_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_repeat_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_repeat_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_linear_repeat_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_clamp_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_clamp_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_clamp_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_clamp_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_clamp_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_clamp_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_clamp_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_clamp_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_clamp_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_mirror_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_mirror_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_mirror_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_mirror_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_mirror_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_mirror_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_mirror_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_mirror_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_mirror_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_repeat_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_repeat_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_repeat_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_repeat_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_repeat_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_repeat_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_repeat_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_repeat_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_repeat_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_clamp_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_clamp_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_clamp_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_clamp_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_clamp_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_clamp_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_clamp_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_clamp_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_clamp_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_mirror_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_mirror_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_mirror_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_mirror_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_mirror_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_mirror_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_mirror_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_mirror_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_mirror_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_repeat_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_repeat_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_repeat_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_repeat_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_repeat_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_repeat_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_repeat_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_repeat_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_repeat_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_clamp_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_clamp_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_clamp_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_clamp_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_clamp_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_clamp_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_clamp_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_clamp_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_clamp_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_mirror_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_mirror_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_mirror_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_mirror_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_mirror_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_mirror_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_mirror_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_mirror_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_mirror_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_repeat_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_repeat_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_repeat_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_repeat_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_repeat_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_repeat_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_repeat_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_repeat_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_repeat_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_clamp_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_clamp_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_clamp_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_clamp_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_clamp_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_clamp_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_clamp_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_clamp_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_clamp_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_mirror_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_mirror_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_mirror_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_mirror_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_mirror_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_mirror_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_mirror_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_mirror_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_mirror_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_repeat_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_repeat_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_repeat_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_repeat_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_repeat_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_repeat_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_repeat_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_repeat_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_repeat_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_clamp_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_clamp_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_clamp_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_clamp_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_clamp_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_clamp_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_clamp_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_clamp_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_clamp_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_mirror_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_mirror_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_mirror_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_mirror_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_mirror_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_mirror_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_mirror_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_mirror_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_mirror_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_repeat_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_repeat_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_repeat_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_repeat_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_repeat_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_repeat_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_repeat_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_repeat_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_repeat_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_clamp_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_clamp_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_clamp_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_clamp_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_clamp_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_clamp_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_clamp_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_clamp_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_clamp_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_mirror_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_mirror_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_mirror_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_mirror_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_mirror_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_mirror_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_mirror_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_mirror_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_mirror_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_repeat_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_repeat_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_repeat_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_repeat_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_repeat_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_repeat_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_repeat_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_repeat_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_repeat_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_clamp_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_clamp_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_clamp_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_clamp_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_clamp_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_clamp_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_clamp_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_clamp_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_clamp_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_mirror_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_mirror_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_mirror_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_mirror_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_mirror_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_mirror_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_mirror_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_mirror_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_mirror_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_repeat_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_repeat_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_repeat_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_repeat_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_repeat_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_repeat_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_repeat_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_repeat_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_repeat_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_clamp_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_clamp_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_clamp_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_clamp_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_clamp_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_clamp_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_clamp_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_clamp_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_clamp_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_mirror_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_mirror_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_mirror_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_mirror_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_mirror_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_mirror_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_mirror_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_mirror_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_mirror_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_repeat_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_repeat_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_repeat_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_repeat_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_repeat_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_repeat_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_repeat_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_repeat_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_repeat_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_clamp_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_clamp_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_clamp_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_clamp_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_clamp_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_clamp_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_clamp_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_clamp_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_clamp_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_mirror_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_mirror_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_mirror_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_mirror_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_mirror_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_mirror_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_mirror_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_mirror_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_mirror_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_repeat_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_repeat_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_repeat_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_repeat_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_repeat_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_repeat_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_repeat_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_repeat_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_repeat_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_clamp_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_clamp_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_clamp_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_clamp_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_clamp_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_clamp_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_clamp_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_clamp_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_clamp_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_mirror_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_mirror_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_mirror_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_mirror_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_mirror_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_mirror_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_mirror_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_mirror_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_mirror_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_repeat_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_repeat_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_repeat_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_repeat_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_repeat_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_repeat_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_repeat_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_repeat_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_repeat_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_clamp_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_clamp_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_clamp_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_clamp_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_clamp_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_clamp_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_clamp_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_clamp_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_clamp_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_mirror_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_mirror_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_mirror_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_mirror_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_mirror_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_mirror_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_mirror_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_mirror_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_mirror_repeat_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_repeat_clamp_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_repeat_clamp_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_repeat_clamp_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_repeat_mirror_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_repeat_mirror_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_repeat_mirror_repeat,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_repeat_repeat_clamp,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_repeat_repeat_mirror,Fail +KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_repeat_repeat_repeat,Fail + +# https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/3134 +KHR-GLES2.texture_3d.filtering.combinations.negative,Fail + +KHR-GLES2.texture_3d.filtering.formats.rgba8_linear,Fail +KHR-GLES2.texture_3d.filtering.formats.rgba8_linear_mipmap_linear,Fail +KHR-GLES2.texture_3d.filtering.formats.rgba8_linear_mipmap_nearest,Fail +KHR-GLES2.texture_3d.filtering.formats.rgba8_nearest,Fail +KHR-GLES2.texture_3d.filtering.formats.rgba8_nearest_mipmap_linear,Fail +KHR-GLES2.texture_3d.filtering.formats.rgba8_nearest_mipmap_nearest,Fail +KHR-GLES2.texture_3d.filtering.sizes.128x32x64_linear,Fail +KHR-GLES2.texture_3d.filtering.sizes.128x32x64_linear_mipmap_linear,Fail +KHR-GLES2.texture_3d.filtering.sizes.128x32x64_linear_mipmap_nearest,Fail +KHR-GLES2.texture_3d.filtering.sizes.128x32x64_nearest,Fail +KHR-GLES2.texture_3d.filtering.sizes.128x32x64_nearest_mipmap_linear,Fail +KHR-GLES2.texture_3d.filtering.sizes.128x32x64_nearest_mipmap_nearest,Fail +KHR-GLES2.texture_3d.filtering.sizes.32x64x16_linear,Fail +KHR-GLES2.texture_3d.filtering.sizes.32x64x16_linear_mipmap_linear,Fail +KHR-GLES2.texture_3d.filtering.sizes.32x64x16_linear_mipmap_nearest,Fail +KHR-GLES2.texture_3d.filtering.sizes.32x64x16_nearest,Fail +KHR-GLES2.texture_3d.filtering.sizes.32x64x16_nearest_mipmap_linear,Fail +KHR-GLES2.texture_3d.filtering.sizes.32x64x16_nearest_mipmap_nearest,Fail +KHR-GLES2.texture_3d.filtering.sizes.3x7x5_linear,Fail +KHR-GLES2.texture_3d.filtering.sizes.3x7x5_linear_mipmap_nearest,Fail +KHR-GLES2.texture_3d.filtering.sizes.3x7x5_nearest,Fail +KHR-GLES2.texture_3d.filtering.sizes.3x7x5_nearest_mipmap_linear,Fail +KHR-GLES2.texture_3d.filtering.sizes.3x7x5_nearest_mipmap_nearest,Fail +KHR-GLES2.texture_3d.filtering.sizes.4x8x8_linear,Fail +KHR-GLES2.texture_3d.filtering.sizes.4x8x8_linear_mipmap_nearest,Fail +KHR-GLES2.texture_3d.filtering.sizes.4x8x8_nearest,Fail +KHR-GLES2.texture_3d.filtering.sizes.4x8x8_nearest_mipmap_linear,Fail +KHR-GLES2.texture_3d.filtering.sizes.4x8x8_nearest_mipmap_nearest,Fail +KHR-GLES2.texture_3d.filtering.sizes.63x63x63_linear,Fail +KHR-GLES2.texture_3d.filtering.sizes.63x63x63_linear_mipmap_linear,Fail +KHR-GLES2.texture_3d.filtering.sizes.63x63x63_linear_mipmap_nearest,Fail +KHR-GLES2.texture_3d.filtering.sizes.63x63x63_nearest,Fail +KHR-GLES2.texture_3d.filtering.sizes.63x63x63_nearest_mipmap_linear,Fail +KHR-GLES2.texture_3d.filtering.sizes.63x63x63_nearest_mipmap_nearest,Fail +KHR-GLES2.texture_3d.framebuffer_texture.rgba,Fail +KHR-GLES2.texture_3d.sub_image.rgba8,Fail +dEQP-EGL.functional.color_clears.multi_context.gles2.rgb888_pbuffer,Crash +dEQP-EGL.functional.color_clears.multi_context.gles2.rgb888_window,Crash +dEQP-EGL.functional.color_clears.multi_context.gles2.rgba8888_pbuffer,Crash +dEQP-EGL.functional.color_clears.multi_context.gles2.rgba8888_window,Crash +dEQP-EGL.functional.color_clears.multi_thread.gles2.rgb888_pbuffer,Crash +dEQP-EGL.functional.color_clears.multi_thread.gles2.rgb888_window,Crash +dEQP-EGL.functional.color_clears.multi_thread.gles2.rgba8888_pbuffer,Crash +dEQP-EGL.functional.color_clears.multi_thread.gles2.rgba8888_window,Crash +dEQP-EGL.functional.color_clears.single_context.gles2.rgb888_pbuffer,Crash +dEQP-EGL.functional.color_clears.single_context.gles2.rgb888_window,Crash +dEQP-EGL.functional.color_clears.single_context.gles2.rgba8888_pbuffer,Crash +dEQP-EGL.functional.color_clears.single_context.gles2.rgba8888_window,Crash +dEQP-EGL.functional.create_context.no_config,Fail +dEQP-EGL.functional.render.multi_context.gles2.rgb888_pbuffer,Crash +dEQP-EGL.functional.render.multi_context.gles2.rgb888_window,Crash +dEQP-EGL.functional.render.multi_context.gles2.rgba8888_pbuffer,Crash +dEQP-EGL.functional.render.multi_context.gles2.rgba8888_window,Crash +dEQP-EGL.functional.render.multi_thread.gles2.rgb888_pbuffer,Crash +dEQP-EGL.functional.render.multi_thread.gles2.rgb888_window,Crash +dEQP-EGL.functional.render.multi_thread.gles2.rgba8888_pbuffer,Crash +dEQP-EGL.functional.render.multi_thread.gles2.rgba8888_window,Crash +dEQP-EGL.functional.render.single_context.gles2.rgb888_pbuffer,Crash +dEQP-EGL.functional.render.single_context.gles2.rgb888_window,Crash +dEQP-EGL.functional.render.single_context.gles2.rgba8888_pbuffer,Crash +dEQP-EGL.functional.render.single_context.gles2.rgba8888_window,Crash +dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_center,Fail +dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_corner,Fail +dEQP-GLES2.functional.depth_stencil_clear.depth_stencil_masked,Fail +dEQP-GLES2.functional.draw.draw_arrays.line_loop.multiple_attributes,Fail +dEQP-GLES2.functional.draw.draw_arrays.line_loop.single_attribute,Fail +dEQP-GLES2.functional.fbo.render.texsubimage.after_render_tex2d_rgba,Fail +dEQP-GLES2.functional.fbo.render.texsubimage.between_render_tex2d_rgba,Fail +dEQP-GLES2.functional.negative_api.vertex_array.vertex_attrib,Fail +dEQP-GLES2.functional.negative_api.vertex_array.vertex_attribv,Fail +dEQP-GLES2.functional.texture.filtering.2d.nearest_mipmap_linear_linear_mirror_rgba8888,Fail +dEQP-GLES2.functional.texture.filtering.2d.nearest_mipmap_linear_linear_repeat_rgba8888,Fail +dEQP-GLES2.functional.texture.filtering.2d.nearest_mipmap_nearest_linear_mirror_rgba8888,Fail +dEQP-GLES2.functional.texture.filtering.2d.nearest_mipmap_nearest_linear_repeat_rgba8888,Fail +dEQP-GLES2.functional.texture.mipmap.2d.basic.linear_linear_repeat_non_square,Fail +dEQP-GLES2.functional.texture.mipmap.2d.basic.nearest_linear_clamp_non_square,Fail +dEQP-GLES2.functional.texture.mipmap.2d.basic.nearest_linear_mirror_non_square,Fail +dEQP-GLES2.functional.texture.mipmap.2d.basic.nearest_linear_repeat_non_square,Fail +dEQP-GLES2.functional.texture.specification.basic_copytexsubimage2d.2d_rgba,Fail +dEQP-GLES2.functional.texture.specification.basic_copytexsubimage2d.cube_rgba,Fail +dEQP-GLES2.functional.texture.wrap.clamp_clamp_nearest_npot_etc1,Fail + +glx@glx-copy-sub-buffer samples=2,Crash +glx@glx-copy-sub-buffer samples=4,Crash +glx@glx-make-current,Crash +glx@glx-multithread-buffer,Fail +glx@glx-query-drawable-glx_fbconfig_id-window,Fail +glx@glx-swap-pixmap-bad,Fail +glx@glx-visuals-depth -pixmap,Crash +glx@glx-visuals-depth,Crash +glx@glx-visuals-stencil -pixmap,Crash +glx@glx-visuals-stencil,Crash +glx@glx_arb_create_context_es2_profile@invalid opengl es version,Fail +glx@glx_arb_create_context_no_error@no error,Fail +glx@glx_ext_import_context@free context,Fail +glx@glx_ext_import_context@get context id,Fail +glx@glx_ext_import_context@get current display,Fail +glx@glx_ext_import_context@import context- multi process,Fail +glx@glx_ext_import_context@import context- single process,Fail +glx@glx_ext_import_context@imported context has same context id,Fail +glx@glx_ext_import_context@make current- multi process,Fail +glx@glx_ext_import_context@make current- single process,Fail +glx@glx_ext_import_context@query context info,Fail +shaders@glsl-arb-fragment-coord-conventions,Fail +shaders@glsl-bug-110796,Fail +shaders@glsl-max-vertex-attrib,Fail +shaders@glsl-predication-on-large-array,Fail +spec@!opengl 1.0@gl-1.0-bitmap-heart-dance,Fail +spec@!opengl 1.0@gl-1.0-dlist-bitmap,Crash +spec@!opengl 1.0@gl-1.0-drawbuffer-modes,Fail +spec@!opengl 1.0@gl-1.0-edgeflag,Fail +spec@!opengl 1.0@gl-1.0-edgeflag-const,Fail +spec@!opengl 1.0@gl-1.0-edgeflag-quads,Fail +spec@!opengl 1.0@gl-1.0-logicop,Crash +spec@!opengl 1.0@gl-1.0-no-op-paths,Fail +spec@!opengl 1.0@gl-1.0-scissor-offscreen,Fail +spec@!opengl 1.0@gl-1.0-user-clip-all-planes,Fail +spec@!opengl 1.1@clipflat,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail +spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail +spec@!opengl 1.1@depthstencil-default_fb-blit samples=2,Crash +spec@!opengl 1.1@depthstencil-default_fb-blit samples=4,Crash +spec@!opengl 1.1@depthstencil-default_fb-clear samples=2,Crash +spec@!opengl 1.1@depthstencil-default_fb-clear samples=4,Crash +spec@!opengl 1.1@depthstencil-default_fb-clear,Fail +spec@!opengl 1.1@depthstencil-default_fb-copypixels samples=2,Crash +spec@!opengl 1.1@depthstencil-default_fb-copypixels samples=4,Crash +spec@!opengl 1.1@depthstencil-default_fb-drawpixels-24_8 samples=2,Crash +spec@!opengl 1.1@depthstencil-default_fb-drawpixels-24_8 samples=4,Crash +spec@!opengl 1.1@depthstencil-default_fb-drawpixels-float-and-ushort samples=2,Crash +spec@!opengl 1.1@depthstencil-default_fb-drawpixels-float-and-ushort samples=4,Crash +spec@!opengl 1.1@depthstencil-default_fb-readpixels-24_8 samples=2,Crash +spec@!opengl 1.1@depthstencil-default_fb-readpixels-24_8 samples=4,Crash +spec@!opengl 1.1@depthstencil-default_fb-readpixels-float-and-ushort samples=2,Crash +spec@!opengl 1.1@depthstencil-default_fb-readpixels-float-and-ushort samples=4,Crash +spec@!opengl 1.1@draw-pixels,Fail +spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 varray gl_line_loop,Fail +spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 varray gl_polygon,Crash +spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 varray gl_quad_strip,Crash +spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 varray gl_quads,Crash +spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 varray gl_triangle_fan,Fail +spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 vbo gl_line_loop,Fail +spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 vbo gl_polygon,Crash +spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 vbo gl_quad_strip,Crash +spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 vbo gl_quads,Crash +spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 vbo gl_triangle_fan,Fail +spec@!opengl 1.1@line-flat-clip-color,Fail +spec@!opengl 1.1@linestipple,Fail +spec@!opengl 1.1@linestipple@Baseline,Fail +spec@!opengl 1.1@linestipple@Factor 2x,Fail +spec@!opengl 1.1@linestipple@Factor 3x,Fail +spec@!opengl 1.1@linestipple@Line loop,Fail +spec@!opengl 1.1@linestipple@Line strip,Fail +spec@!opengl 1.1@linestipple@Restarting lines within a single Begin-End block,Fail +spec@!opengl 1.1@polygon-mode,Fail +spec@!opengl 1.1@polygon-mode-offset,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 1: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 1: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on bottom edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on left edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on bottom edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on left edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 5: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 5: Expected white pixel on top edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail +spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail +spec@!opengl 1.1@read-front clear-front-first samples=2,Crash +spec@!opengl 1.1@read-front clear-front-first samples=4,Crash +spec@!opengl 1.1@read-front samples=2,Crash +spec@!opengl 1.1@read-front samples=4,Crash +spec@!opengl 1.1@tex-upside-down-miptree,Fail +spec@!opengl 1.1@texsubimage-unpack,Fail +spec@!opengl 1.1@texwrap 2d proj,Fail +spec@!opengl 1.1@texwrap 2d proj@GL_RGBA8- NPOT- projected,Fail +spec@!opengl 1.1@texwrap 2d proj@GL_RGBA8- projected,Fail +spec@!opengl 1.1@texwrap 2d proj@GL_RGBA8- swizzled- projected,Fail +spec@!opengl 1.1@texwrap 2d,Fail +spec@!opengl 1.1@texwrap 2d@GL_RGBA8,Fail +spec@!opengl 1.1@texwrap 2d@GL_RGBA8- NPOT,Fail +spec@!opengl 1.1@texwrap 2d@GL_RGBA8- swizzled,Fail +spec@!opengl 1.1@texwrap formats,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB10,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB10- NPOT,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB10- swizzled,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB10_A2,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB10_A2- NPOT,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB10_A2- swizzled,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB12,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB12- NPOT,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB12- swizzled,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB16,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB16- NPOT,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB16- swizzled,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB5,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB5- NPOT,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB5- swizzled,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB5_A1,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB5_A1- NPOT,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB5_A1- swizzled,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB8,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB8- NPOT,Fail +spec@!opengl 1.1@texwrap formats@GL_RGB8- swizzled,Fail +spec@!opengl 1.1@texwrap formats@GL_RGBA12,Fail +spec@!opengl 1.1@texwrap formats@GL_RGBA12- NPOT,Fail +spec@!opengl 1.1@texwrap formats@GL_RGBA12- swizzled,Fail +spec@!opengl 1.1@texwrap formats@GL_RGBA16,Fail +spec@!opengl 1.1@texwrap formats@GL_RGBA16- NPOT,Fail +spec@!opengl 1.1@texwrap formats@GL_RGBA16- swizzled,Fail +spec@!opengl 1.1@texwrap formats@GL_RGBA8,Fail +spec@!opengl 1.1@texwrap formats@GL_RGBA8- NPOT,Fail +spec@!opengl 1.1@texwrap formats@GL_RGBA8- swizzled,Fail +spec@!opengl 1.1@windowoverlap,Fail +spec@!opengl 1.2@copyteximage 3d,Fail +spec@!opengl 1.2@getteximage-targets 3d,Fail +spec@!opengl 1.2@lodclamp,Fail +spec@!opengl 1.2@lodclamp-between,Fail +spec@!opengl 1.2@lodclamp-between-max,Fail +spec@!opengl 1.2@mipmap-setup,Fail +spec@!opengl 1.2@tex3d,Fail +spec@!opengl 1.2@tex3d-maxsize,Fail +spec@!opengl 1.2@texwrap 3d proj,Fail +spec@!opengl 1.2@texwrap 3d proj@GL_RGBA8- NPOT- projected,Fail +spec@!opengl 1.2@texwrap 3d proj@GL_RGBA8- projected,Fail +spec@!opengl 1.2@texwrap 3d proj@GL_RGBA8- swizzled- projected,Fail +spec@!opengl 1.2@texwrap 3d,Fail +spec@!opengl 1.2@texwrap 3d@GL_RGBA8,Fail +spec@!opengl 1.2@texwrap 3d@GL_RGBA8- NPOT,Fail +spec@!opengl 1.2@texwrap 3d@GL_RGBA8- swizzled,Fail +spec@!opengl 1.3@tex3d-depth1,Fail +spec@!opengl 1.4@gl-1.4-polygon-offset,Fail +spec@!opengl 1.4@tex-miplevel-selection,Fail +spec@!opengl 1.4@tex-miplevel-selection-lod,Fail +spec@!opengl 1.4@tex-miplevel-selection-lod-bias,Fail +spec@!opengl 1.5@depth-tex-compare,Fail +spec@!opengl 2.0@attrib-assignments,Fail +spec@!opengl 2.0@gl-2.0-edgeflag,Fail +spec@!opengl 2.0@gl-2.0-edgeflag-immediate,Fail +spec@!opengl 2.0@occlusion-query-discard,Fail +spec@!opengl 2.0@tex3d-npot,Fail +spec@!opengl 2.1@minmax,Fail +spec@!opengl 2.1@pbo,Fail +spec@!opengl 2.1@pbo@test_polygon_stip,Fail +spec@!opengl 2.1@polygon-stipple-fs,Fail +spec@arb_arrays_of_arrays@execution@glsl-arrays-copy-size-mismatch,Fail +spec@arb_depth_texture@depth-level-clamp,Fail +spec@arb_depth_texture@texwrap formats,Fail +spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT16,Fail +spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT16- NPOT,Fail +spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT16- swizzled,Fail +spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT24,Fail +spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT24- NPOT,Fail +spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT24- swizzled,Fail +spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT32,Fail +spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT32- NPOT,Fail +spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT32- swizzled,Fail +spec@arb_draw_elements_base_vertex@arb_draw_elements_base_vertex-negative-index,Crash +spec@arb_draw_elements_base_vertex@arb_draw_elements_base_vertex-negative-index-user_varrays,Crash +spec@arb_es2_compatibility@texwrap formats,Fail +spec@arb_es2_compatibility@texwrap formats@GL_RGB565,Fail +spec@arb_es2_compatibility@texwrap formats@GL_RGB565- NPOT,Fail +spec@arb_es2_compatibility@texwrap formats@GL_RGB565- swizzled,Fail +spec@arb_fragment_coord_conventions@fp-arb-fragment-coord-conventions-integer,Fail +spec@arb_fragment_coord_conventions@fp-arb-fragment-coord-conventions-none,Fail +spec@arb_fragment_program@fp-indirections2,Fail +spec@arb_fragment_program@minmax,Fail +spec@arb_framebuffer_object@arb_framebuffer_object-depth-stencil-blit stencil gl_depth24_stencil8,Fail +spec@arb_framebuffer_object@arb_framebuffer_object-depth-stencil-blit stencil gl_stencil_index1,Fail +spec@arb_framebuffer_object@arb_framebuffer_object-depth-stencil-blit stencil gl_stencil_index16,Fail +spec@arb_framebuffer_object@arb_framebuffer_object-depth-stencil-blit stencil gl_stencil_index4,Fail +spec@arb_framebuffer_object@arb_framebuffer_object-depth-stencil-blit stencil gl_stencil_index8,Fail +spec@arb_framebuffer_object@fbo-attachments-blit-scaled-linear,Fail +spec@arb_framebuffer_object@fbo-blit-stretch,Fail +spec@arb_framebuffer_object@fbo-generatemipmap-3d,Fail +spec@arb_framebuffer_object@fbo-mipmap-copypix,Fail +spec@arb_framebuffer_object@framebuffer-blit-levels draw stencil,Fail +spec@arb_framebuffer_object@framebuffer-blit-levels read stencil,Fail +spec@arb_framebuffer_object@mixed-buffer-sizes,Fail +spec@arb_framebuffer_object@same-attachment-glframebuffertexture2d-gl_depth_stencil_attachment,Fail +spec@arb_framebuffer_srgb@arb_framebuffer_srgb-srgb_conformance,Fail +spec@arb_framebuffer_srgb@blit renderbuffer linear_to_srgb downsample disabled clear,Crash +spec@arb_framebuffer_srgb@blit renderbuffer linear_to_srgb downsample disabled render,Crash +spec@arb_framebuffer_srgb@blit renderbuffer linear_to_srgb downsample enabled clear,Crash +spec@arb_framebuffer_srgb@blit renderbuffer linear_to_srgb downsample enabled render,Crash +spec@arb_framebuffer_srgb@blit renderbuffer linear_to_srgb msaa disabled clear,Crash +spec@arb_framebuffer_srgb@blit renderbuffer linear_to_srgb msaa disabled render,Crash +spec@arb_framebuffer_srgb@blit renderbuffer linear_to_srgb msaa enabled clear,Crash +spec@arb_framebuffer_srgb@blit renderbuffer linear_to_srgb msaa enabled render,Crash +spec@arb_framebuffer_srgb@blit renderbuffer srgb_to_linear downsample disabled clear,Crash +spec@arb_framebuffer_srgb@blit renderbuffer srgb_to_linear downsample disabled render,Crash +spec@arb_framebuffer_srgb@blit renderbuffer srgb_to_linear downsample enabled clear,Crash +spec@arb_framebuffer_srgb@blit renderbuffer srgb_to_linear downsample enabled render,Crash +spec@arb_framebuffer_srgb@blit renderbuffer srgb_to_linear msaa disabled clear,Crash +spec@arb_framebuffer_srgb@blit renderbuffer srgb_to_linear msaa disabled render,Crash +spec@arb_framebuffer_srgb@blit renderbuffer srgb_to_linear msaa enabled clear,Crash +spec@arb_framebuffer_srgb@blit renderbuffer srgb_to_linear msaa enabled render,Crash +spec@arb_framebuffer_srgb@blit texture linear_to_srgb downsample disabled clear,Crash +spec@arb_framebuffer_srgb@blit texture linear_to_srgb downsample disabled render,Crash +spec@arb_framebuffer_srgb@blit texture linear_to_srgb downsample enabled clear,Crash +spec@arb_framebuffer_srgb@blit texture linear_to_srgb downsample enabled render,Crash +spec@arb_framebuffer_srgb@blit texture linear_to_srgb msaa disabled clear,Crash +spec@arb_framebuffer_srgb@blit texture linear_to_srgb msaa disabled render,Crash +spec@arb_framebuffer_srgb@blit texture linear_to_srgb msaa enabled clear,Crash +spec@arb_framebuffer_srgb@blit texture linear_to_srgb msaa enabled render,Crash +spec@arb_framebuffer_srgb@blit texture srgb_to_linear downsample disabled clear,Crash +spec@arb_framebuffer_srgb@blit texture srgb_to_linear downsample disabled render,Crash +spec@arb_framebuffer_srgb@blit texture srgb_to_linear downsample enabled clear,Crash +spec@arb_framebuffer_srgb@blit texture srgb_to_linear downsample enabled render,Crash +spec@arb_framebuffer_srgb@blit texture srgb_to_linear msaa disabled clear,Crash +spec@arb_framebuffer_srgb@blit texture srgb_to_linear msaa disabled render,Crash +spec@arb_framebuffer_srgb@blit texture srgb_to_linear msaa enabled clear,Crash +spec@arb_framebuffer_srgb@blit texture srgb_to_linear msaa enabled render,Crash +spec@arb_internalformat_query2@all internalformat_<x>_size pname checks,Fail +spec@arb_internalformat_query2@all internalformat_<x>_size pname checks@GL_INTERNALFORMAT_ALPHA_SIZE,Fail +spec@arb_internalformat_query2@all internalformat_<x>_size pname checks@GL_INTERNALFORMAT_BLUE_SIZE,Fail +spec@arb_internalformat_query2@all internalformat_<x>_size pname checks@GL_INTERNALFORMAT_DEPTH_SIZE,Fail +spec@arb_internalformat_query2@all internalformat_<x>_size pname checks@GL_INTERNALFORMAT_GREEN_SIZE,Fail +spec@arb_internalformat_query2@all internalformat_<x>_size pname checks@GL_INTERNALFORMAT_RED_SIZE,Fail +spec@arb_internalformat_query2@api error checks,Fail +spec@arb_internalformat_query2@max dimensions related pname checks,Fail +spec@arb_internalformat_query2@max dimensions related pname checks@GL_MAX_COMBINED_DIMENSIONS,Fail +spec@arb_internalformat_query2@max dimensions related pname checks@GL_MAX_DEPTH,Fail +spec@arb_internalformat_query2@max dimensions related pname checks@GL_MAX_HEIGHT,Fail +spec@arb_internalformat_query2@max dimensions related pname checks@GL_MAX_WIDTH,Fail +spec@arb_occlusion_query2@render,Fail +spec@arb_occlusion_query@occlusion_query,Fail +spec@arb_occlusion_query@occlusion_query_conform,Fail +spec@arb_occlusion_query@occlusion_query_meta_fragments,Fail +spec@arb_occlusion_query@occlusion_query_meta_save,Fail +spec@arb_pixel_buffer_object@fbo-pbo-readpixels-small,Fail +spec@arb_pixel_buffer_object@pbo-getteximage,Fail +spec@arb_pixel_buffer_object@texsubimage-unpack pbo,Fail +spec@arb_point_sprite@arb_point_sprite-mipmap,Fail +spec@arb_provoking_vertex@arb-provoking-vertex-render,Fail +spec@arb_sampler_objects@sampler-objects,Fail +spec@arb_shader_texture_lod@execution@glsl-fs-texturelod-01,Fail +spec@arb_texture_multisample@arb_texture_multisample-teximage-3d-multisample,Fail +spec@arb_texture_rectangle@1-1-linear-texture,Fail +spec@arb_texture_rectangle@copyteximage rect samples=2,Crash +spec@arb_texture_rectangle@copyteximage rect samples=4,Crash +spec@arb_texture_rectangle@texrect-many,Crash +spec@arb_texture_storage@texture-storage,Fail +spec@arb_texture_storage@texture-storage@3D mipmapped ,Fail +spec@arb_texture_storage@texture-storage@3D non-mipmapped ,Fail +spec@arb_vertex_program@minmax,Fail +spec@egl 1.4@egl-copy-buffers,Crash +spec@egl 1.4@eglterminate then unbind context,Fail +spec@egl 1.4@largest possible eglcreatepbuffersurface and then glclear,Fail +spec@egl_ext_protected_content@conformance,Fail +spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail +spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_rgba,Fail +spec@egl_khr_surfaceless_context@viewport,Fail +spec@egl_mesa_configless_context@basic,Fail +spec@ext_direct_state_access@multi-texture,Crash +spec@ext_direct_state_access@multi-texture@MultiTexImage3DEXT,Fail +spec@ext_direct_state_access@multi-texture@MultiTexSubImage1DEXT,Fail +spec@ext_direct_state_access@textures,Fail +spec@ext_direct_state_access@textures@CopyTextureSubImage3DEXT + display list GL_COMPILE,Fail +spec@ext_direct_state_access@textures@CopyTextureSubImage3DEXT + display list GL_COMPILE_AND_EXECUTE,Fail +spec@ext_direct_state_access@textures@CopyTextureSubImage3DEXT,Fail +spec@ext_direct_state_access@textures@GL_PROXY_TEXTURE_1D + glTex* + display list GL_COMPILE,Fail +spec@ext_direct_state_access@textures@GL_PROXY_TEXTURE_1D + glTex* + display list GL_COMPILE_AND_EXECUTE,Fail +spec@ext_direct_state_access@textures@GL_PROXY_TEXTURE_1D + glTex*,Fail +spec@ext_direct_state_access@textures@GL_PROXY_TEXTURE_3D + glTex* + display list GL_COMPILE,Fail +spec@ext_direct_state_access@textures@GL_PROXY_TEXTURE_3D + glTex* + display list GL_COMPILE_AND_EXECUTE,Fail +spec@ext_direct_state_access@textures@GL_PROXY_TEXTURE_3D + glTex*,Fail +spec@ext_direct_state_access@textures@TextureImage3DEXT + display list GL_COMPILE,Fail +spec@ext_direct_state_access@textures@TextureImage3DEXT + display list GL_COMPILE_AND_EXECUTE,Fail +spec@ext_direct_state_access@textures@TextureImage3DEXT,Fail +spec@ext_direct_state_access@textures@TextureSubImage2DEXT + display list GL_COMPILE,Fail +spec@ext_direct_state_access@textures@TextureSubImage2DEXT + display list GL_COMPILE_AND_EXECUTE,Fail +spec@ext_direct_state_access@textures@TextureSubImage2DEXT,Fail +spec@ext_direct_state_access@textures@TextureSubImage3DEXT + display list GL_COMPILE,Fail +spec@ext_direct_state_access@textures@TextureSubImage3DEXT + display list GL_COMPILE_AND_EXECUTE,Fail +spec@ext_direct_state_access@textures@TextureSubImage3DEXT,Fail +spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail +spec@ext_framebuffer_multisample@blit-flipped 2 x,Crash +spec@ext_framebuffer_multisample@blit-flipped 2 y,Crash +spec@ext_framebuffer_multisample@blit-flipped 4 x,Crash +spec@ext_framebuffer_multisample@blit-flipped 4 y,Crash +spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail +spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 downsample,Crash +spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 msaa,Crash +spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 upsample,Crash +spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 downsample,Crash +spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 msaa,Crash +spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 upsample,Crash +spec@ext_framebuffer_multisample@enable-flag,Crash +spec@ext_framebuffer_multisample@interpolation 2 centroid-edges,Fail +spec@ext_framebuffer_multisample@interpolation 4 centroid-edges,Fail +spec@ext_framebuffer_multisample@line-smooth 2,Crash +spec@ext_framebuffer_multisample@line-smooth 4,Crash +spec@ext_framebuffer_multisample@multisample-blit 2 color linear,Crash +spec@ext_framebuffer_multisample@multisample-blit 2 color,Crash +spec@ext_framebuffer_multisample@multisample-blit 2 depth,Crash +spec@ext_framebuffer_multisample@multisample-blit 2 stencil,Crash +spec@ext_framebuffer_multisample@multisample-blit 4 color linear,Crash +spec@ext_framebuffer_multisample@multisample-blit 4 color,Crash +spec@ext_framebuffer_multisample@multisample-blit 4 depth,Crash +spec@ext_framebuffer_multisample@multisample-blit 4 stencil,Crash +spec@ext_framebuffer_multisample@no-color 2 depth combined,Crash +spec@ext_framebuffer_multisample@no-color 2 depth single,Crash +spec@ext_framebuffer_multisample@no-color 2 depth-computed combined,Crash +spec@ext_framebuffer_multisample@no-color 2 depth-computed single,Crash +spec@ext_framebuffer_multisample@no-color 2 stencil combined,Crash +spec@ext_framebuffer_multisample@no-color 2 stencil single,Crash +spec@ext_framebuffer_multisample@no-color 4 depth combined,Crash +spec@ext_framebuffer_multisample@no-color 4 depth single,Crash +spec@ext_framebuffer_multisample@no-color 4 depth-computed combined,Crash +spec@ext_framebuffer_multisample@no-color 4 depth-computed single,Crash +spec@ext_framebuffer_multisample@no-color 4 stencil combined,Crash +spec@ext_framebuffer_multisample@no-color 4 stencil single,Crash +spec@ext_framebuffer_multisample@point-smooth 2,Crash +spec@ext_framebuffer_multisample@point-smooth 4,Crash +spec@ext_framebuffer_multisample@polygon-smooth 2,Crash +spec@ext_framebuffer_multisample@polygon-smooth 4,Crash +spec@ext_framebuffer_multisample@sample-alpha-to-coverage 2 color,Fail +spec@ext_framebuffer_multisample@sample-alpha-to-coverage 2 depth,Crash +spec@ext_framebuffer_multisample@sample-alpha-to-coverage 4 color,Fail +spec@ext_framebuffer_multisample@sample-alpha-to-coverage 4 depth,Crash +spec@ext_framebuffer_multisample@sample-coverage 2 inverted,Crash +spec@ext_framebuffer_multisample@sample-coverage 2 non-inverted,Crash +spec@ext_framebuffer_multisample@sample-coverage 4 inverted,Crash +spec@ext_framebuffer_multisample@sample-coverage 4 non-inverted,Crash +spec@ext_framebuffer_multisample@unaligned-blit 2 color downsample,Crash +spec@ext_framebuffer_multisample@unaligned-blit 2 color msaa,Crash +spec@ext_framebuffer_multisample@unaligned-blit 2 color upsample,Crash +spec@ext_framebuffer_multisample@unaligned-blit 2 depth downsample,Crash +spec@ext_framebuffer_multisample@unaligned-blit 2 depth msaa,Crash +spec@ext_framebuffer_multisample@unaligned-blit 2 depth upsample,Crash +spec@ext_framebuffer_multisample@unaligned-blit 4 color downsample,Crash +spec@ext_framebuffer_multisample@unaligned-blit 4 color msaa,Crash +spec@ext_framebuffer_multisample@unaligned-blit 4 color upsample,Crash +spec@ext_framebuffer_multisample@unaligned-blit 4 depth downsample,Crash +spec@ext_framebuffer_multisample@unaligned-blit 4 depth msaa,Crash +spec@ext_framebuffer_multisample@unaligned-blit 4 depth upsample,Crash +spec@ext_framebuffer_multisample@upsample 2 color linear,Crash +spec@ext_framebuffer_multisample@upsample 2 color,Crash +spec@ext_framebuffer_multisample@upsample 2 depth,Crash +spec@ext_framebuffer_multisample@upsample 2 stencil,Crash +spec@ext_framebuffer_multisample@upsample 4 color linear,Crash +spec@ext_framebuffer_multisample@upsample 4 color,Crash +spec@ext_framebuffer_multisample@upsample 4 depth,Crash +spec@ext_framebuffer_multisample@upsample 4 stencil,Crash +spec@ext_framebuffer_multisample_blit_scaled@negative-blit-scaled,Crash +spec@ext_framebuffer_object@fbo-3d,Fail +spec@ext_framebuffer_object@fbo-blending-format-quirks,Fail +spec@ext_framebuffer_object@fbo-depth-sample-compare,Fail +spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index1-blit,Fail +spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index16-blit,Fail +spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index4-blit,Fail +spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index8-blit,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y212,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y410,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y412,Fail +spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y416,Fail +spec@ext_occlusion_query_boolean@any-samples,Fail +spec@ext_packed_depth_stencil@depth_stencil texture,Fail +spec@ext_packed_depth_stencil@fbo-depthstencil-gl_depth24_stencil8-clear,Fail +spec@ext_packed_depth_stencil@fbo-stencil-gl_depth24_stencil8-blit,Fail +spec@ext_packed_depth_stencil@texwrap formats,Fail +spec@ext_packed_depth_stencil@texwrap formats@GL_DEPTH24_STENCIL8,Fail +spec@ext_packed_depth_stencil@texwrap formats@GL_DEPTH24_STENCIL8- NPOT,Fail +spec@ext_packed_depth_stencil@texwrap formats@GL_DEPTH24_STENCIL8- swizzled,Fail +spec@ext_provoking_vertex@provoking-vertex,Fail +spec@ext_texture_format_bgra8888@api-errors,Fail +spec@ext_texture_srgb@texwrap formats bordercolor,Fail +spec@ext_texture_srgb@texwrap formats bordercolor-swizzled,Fail +spec@ext_texture_srgb@texwrap formats bordercolor-swizzled@GL_SLUMINANCE8- swizzled- border color only,Fail +spec@ext_texture_srgb@texwrap formats bordercolor-swizzled@GL_SLUMINANCE8_ALPHA8- swizzled- border color only,Fail +spec@ext_texture_srgb@texwrap formats bordercolor-swizzled@GL_SRGB8- swizzled- border color only,Fail +spec@ext_texture_srgb@texwrap formats bordercolor-swizzled@GL_SRGB8_ALPHA8- swizzled- border color only,Fail +spec@ext_texture_srgb@texwrap formats bordercolor@GL_SLUMINANCE8- border color only,Fail +spec@ext_texture_srgb@texwrap formats bordercolor@GL_SLUMINANCE8_ALPHA8- border color only,Fail +spec@ext_texture_srgb@texwrap formats bordercolor@GL_SRGB8- border color only,Fail +spec@ext_texture_srgb@texwrap formats bordercolor@GL_SRGB8_ALPHA8- border color only,Fail +spec@ext_texture_srgb@texwrap formats,Fail +spec@ext_texture_srgb@texwrap formats@GL_SLUMINANCE8,Fail +spec@ext_texture_srgb@texwrap formats@GL_SLUMINANCE8- NPOT,Fail +spec@ext_texture_srgb@texwrap formats@GL_SLUMINANCE8- swizzled,Fail +spec@ext_texture_srgb@texwrap formats@GL_SLUMINANCE8_ALPHA8,Fail +spec@ext_texture_srgb@texwrap formats@GL_SLUMINANCE8_ALPHA8- NPOT,Fail +spec@ext_texture_srgb@texwrap formats@GL_SLUMINANCE8_ALPHA8- swizzled,Fail +spec@ext_texture_srgb@texwrap formats@GL_SRGB8,Fail +spec@ext_texture_srgb@texwrap formats@GL_SRGB8- NPOT,Fail +spec@ext_texture_srgb@texwrap formats@GL_SRGB8- swizzled,Fail +spec@ext_texture_srgb@texwrap formats@GL_SRGB8_ALPHA8,Fail +spec@ext_texture_srgb@texwrap formats@GL_SRGB8_ALPHA8- NPOT,Fail +spec@ext_texture_srgb@texwrap formats@GL_SRGB8_ALPHA8- swizzled,Fail +spec@glsl-1.10@built-in constants,Fail +spec@glsl-1.10@built-in constants@gl_MaxVertexAttribs,Fail +spec@glsl-1.10@execution@built-in-functions@fs-cos-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-cos-vec2,Fail +spec@glsl-1.10@execution@built-in-functions@fs-cos-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@fs-cos-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@fs-exp-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-exp-vec2,Fail +spec@glsl-1.10@execution@built-in-functions@fs-exp-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@fs-exp-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@fs-exp2-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-exp2-vec2,Fail +spec@glsl-1.10@execution@built-in-functions@fs-exp2-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@fs-exp2-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@fs-log-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-log-vec2,Fail +spec@glsl-1.10@execution@built-in-functions@fs-log-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@fs-log-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@fs-log2-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-log2-vec2,Fail +spec@glsl-1.10@execution@built-in-functions@fs-log2-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@fs-log2-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@fs-pow-float-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-pow-vec2-vec2,Fail +spec@glsl-1.10@execution@built-in-functions@fs-pow-vec3-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@fs-pow-vec4-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@fs-sin-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-sin-vec2,Fail +spec@glsl-1.10@execution@built-in-functions@fs-sin-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@fs-sin-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@fs-tan-float,Fail +spec@glsl-1.10@execution@built-in-functions@fs-tan-vec2,Fail +spec@glsl-1.10@execution@built-in-functions@fs-tan-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@fs-tan-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@vs-cos-float,Fail +spec@glsl-1.10@execution@built-in-functions@vs-cos-vec2,Fail +spec@glsl-1.10@execution@built-in-functions@vs-cos-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@vs-cos-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@vs-exp-float,Fail +spec@glsl-1.10@execution@built-in-functions@vs-exp-vec2,Fail +spec@glsl-1.10@execution@built-in-functions@vs-exp-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@vs-exp-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@vs-exp2-float,Fail +spec@glsl-1.10@execution@built-in-functions@vs-exp2-vec2,Fail +spec@glsl-1.10@execution@built-in-functions@vs-exp2-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@vs-exp2-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@vs-log-float,Fail +spec@glsl-1.10@execution@built-in-functions@vs-log-vec2,Fail +spec@glsl-1.10@execution@built-in-functions@vs-log-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@vs-log-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@vs-log2-float,Fail +spec@glsl-1.10@execution@built-in-functions@vs-log2-vec2,Fail +spec@glsl-1.10@execution@built-in-functions@vs-log2-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@vs-log2-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@vs-pow-float-float,Fail +spec@glsl-1.10@execution@built-in-functions@vs-pow-vec2-vec2,Fail +spec@glsl-1.10@execution@built-in-functions@vs-pow-vec3-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@vs-pow-vec4-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@vs-sin-float,Fail +spec@glsl-1.10@execution@built-in-functions@vs-sin-vec2,Fail +spec@glsl-1.10@execution@built-in-functions@vs-sin-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@vs-sin-vec4,Fail +spec@glsl-1.10@execution@built-in-functions@vs-tan-float,Fail +spec@glsl-1.10@execution@built-in-functions@vs-tan-vec2,Fail +spec@glsl-1.10@execution@built-in-functions@vs-tan-vec3,Fail +spec@glsl-1.10@execution@built-in-functions@vs-tan-vec4,Fail +spec@glsl-1.10@execution@fs-texture-select,Fail +spec@glsl-1.10@execution@glsl-fs-convolution-2,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-sampler-numbering-2,Fail +spec@glsl-1.10@execution@samplers@glsl-fs-sampler-numbering-3,Fail +spec@glsl-1.10@execution@samplers@in-parameter-array,Fail +spec@glsl-1.10@execution@texture3d,Fail +spec@glsl-1.20@built-in constants,Fail +spec@glsl-1.20@built-in constants@gl_MaxVertexAttribs,Fail +spec@glsl-1.20@execution@fs-nan-builtin-max,Fail +spec@glsl-1.20@execution@fs-nan-builtin-min,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture() 1d,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture() 1dshadow,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture() 2d,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture() 2dshadow,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture() 3d,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture() cube,Crash +spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture(bias) 1d,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture(bias) 1dshadow,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture(bias) 2d,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture(bias) 2dshadow,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture(bias) 3d,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture(bias) cube,Crash +spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj 1d,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj 1d_projvec4,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj 1dshadow,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj 2d,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj 2d_projvec4,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj 2dshadow,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj 3d,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj(bias) 1d,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj(bias) 1d_projvec4,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj(bias) 1dshadow,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj(bias) 2d,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj(bias) 2d_projvec4,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj(bias) 2dshadow,Fail +spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj(bias) 3d,Fail +spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat4-index-col-row-wr,Fail +spec@glsl-1.20@execution@variable-indexing@vs-temp-array-mat4-index-col-row-wr,Fail +spec@glsl-1.20@execution@vs-nan-builtin-max,Fail +spec@glsl-1.20@execution@vs-nan-builtin-min,Fail +spec@intel_performance_query@intel_performance_query-issue_2235,Fail +spec@khr_texture_compression_astc@basic-gles,Fail +spec@khr_texture_compression_astc@miptree-gl ldr,Fail +spec@khr_texture_compression_astc@miptree-gl ldr@LDR Profile,Fail +spec@khr_texture_compression_astc@miptree-gl srgb,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-sd,Fail +spec@khr_texture_compression_astc@miptree-gl srgb-sd@sRGB skip decode,Fail +spec@khr_texture_compression_astc@miptree-gl srgb@sRGB decode,Fail +spec@khr_texture_compression_astc@miptree-gles ldr,Fail +spec@khr_texture_compression_astc@miptree-gles ldr@LDR Profile,Fail +spec@khr_texture_compression_astc@miptree-gles srgb,Fail +spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail +spec@oes_compressed_etc1_rgb8_texture@miptree,Fail diff --git a/lib/mesa/src/broadcom/ci/vc4-rpi3-flakes.txt b/lib/mesa/src/broadcom/ci/vc4-rpi3-flakes.txt new file mode 100644 index 000000000..895a2f767 --- /dev/null +++ b/lib/mesa/src/broadcom/ci/vc4-rpi3-flakes.txt @@ -0,0 +1,39 @@ +dEQP-GLES2.functional.clipping.triangle_vertex.clip_three.clip_neg_x_and_pos_x_and_neg_x_neg_y_neg_z +dEQP-GLES2.functional.clipping.triangle_vertex.clip_three.clip_neg_x_and_pos_x_and_pos_y_pos_z +dEQP-GLES2.functional.clipping.triangle_vertex.clip_three.clip_neg_x_and_pos_x_neg_y_pos_z_and_neg_x_pos_y_neg_z +dEQP-GLES2.functional.clipping.triangle_vertex.clip_three.clip_pos_x_and_neg_x_neg_y_pos_z_and_neg_x_pos_y_neg_z +dEQP-GLES2.functional.draw.random.51 +dEQP-GLES2.functional.fragment_ops.blend.rgb_func_alpha_func.src.one_minus_src_alpha_constant_color +dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_direct_write_dynamic_loop_subscript_read_vertex +dEQP-GLES2.functional.shaders.loops.do_while_dynamic_iterations.basic_mediump_int_vertex +dEQP-GLES2.functional.shaders.loops.do_while_dynamic_iterations.conditional_continue_vertex +dEQP-GLES2.functional.shaders.loops.do_while_dynamic_iterations.function_call_inout_vertex +dEQP-GLES2.functional.shaders.loops.do_while_dynamic_iterations.function_call_return_vertex +dEQP-GLES2.functional.shaders.loops.do_while_dynamic_iterations.nested_sequence_vertex +dEQP-GLES2.functional.shaders.loops.while_constant_iterations.select_iteration_count_vertex +dEQP-GLES2.functional.shaders.loops.while_dynamic_iterations.function_call_return_vertex +dEQP-GLES2.functional.shaders.loops.while_dynamic_iterations.infinite_with_conditional_break_vertex +dEQP-GLES2.functional.shaders.loops.while_dynamic_iterations.post_increment_vertex +dEQP-GLES2.functional.shaders.loops.while_dynamic_iterations.single_iteration_vertex +dEQP-GLES2.functional.shaders.operator.unary_operator.pre_decrement_result.mediump_vec3_fragment +dEQP-GLES2.functional.shaders.random.exponential.fragment.51 +dEQP-GLES2.functional.shaders.random.texture.fragment.129 +dEQP-GLES2.functional.shaders.return.output_write_in_func_never_vertex +dEQP-GLES2.functional.texture.filtering.2d.linear_linear_clamp_rgb888_pot +dEQP-GLES2.functional.texture.filtering.cube.linear_mipmap_linear_nearest_mirror_rgba8888 +dEQP-GLES2.functional.texture.filtering.cube.nearest_linear_mirror_rgba8888_pot +dEQP-GLES2.functional.texture.filtering.cube.nearest_mipmap_linear_linear_clamp_rgba8888 +dEQP-GLES2.functional.texture.filtering.cube.nearest_mipmap_linear_nearest_repeat_l8 +dEQP-GLES2.functional.texture.filtering.cube.nearest_mipmap_nearest_linear_clamp_rgba8888 +dEQP-GLES2.functional.texture.filtering.cube.nearest_mipmap_nearest_linear_mirror_rgba8888 +dEQP-GLES2.functional.texture.mipmap.cube.generate.rgb565_fastest +dEQP-GLES2.functional.texture.size.cube.256x256_rgb888 + +glx@glx-multi-window-single-context +shaders@glsl-vs-loop +shaders@glsl-vs-loop-nested +spec@arb_framebuffer_srgb@blit renderbuffer srgb single_sampled enabled clear +spec@egl_chromium_sync_control@conformance +spec@ext_packed_depth_stencil@fbo-stencil-gl_depth24_stencil8-readpixels +spec@!opengl 1.1@depthstencil-default_fb-drawpixels-float-and-ushort samples=2 +spec@!opengl 1.1@depthstencil-default_fb-drawpixels-float-and-ushort samples=4 diff --git a/lib/mesa/src/broadcom/ci/vc4-rpi3-skips.txt b/lib/mesa/src/broadcom/ci/vc4-rpi3-skips.txt new file mode 100644 index 000000000..692eaff24 --- /dev/null +++ b/lib/mesa/src/broadcom/ci/vc4-rpi3-skips.txt @@ -0,0 +1,46 @@ +# Note: skips lists for CI are just a list of lines that, when +# non-zero-length and not starting with '#', will regex match to +# delete lines from the test list. Be careful. + +# This is causing a binning memory overflow problem +dEQP-GLES2.functional.fragment_ops.scissor.outside_render_line + +# These are very slow +dEQP-GLES2.functional.uniform_api.random.3 +dEQP-GLES2.functional.uniform_api.random.79 + +# Conformance issue: VC4 needs dynamic loops in the VS to cause a +# shader link failure. +# +# The issue is that the HW doesn't have an exec mask at dispatch +# for the VS, so the shouldn't-be-exec channels have undefined +# contents and may cause infinite loops, leading to GPU hangs. The +# process of GPU hang reset causes flakes in whatever other jobs are +# running simultaneously, so we can't even leave these in the flakes +# list for tracking. +dEQP-GLES2.functional.shaders.loops.*dynamic.*vertex + +# Timeout tests (> 1 minute to run) +KHR-GLES2.texture_3d.filtering.sizes.3x7x5_linear_mipmap_linear +KHR-GLES2.texture_3d.filtering.sizes.4x8x8_linear_mipmap_linear + +# Slow tests (> 1 minute to run) +spec@ext_framebuffer_multisample@accuracy +glx@glx-multithread-texture +spec@arb_internalformat_query2@all internalformat_<x>_type pname checks +spec@!opengl 1.1@streaming-texture-leak +spec@!opengl 1.0@gl-1.0-blend-func +shaders@glsl-predication-on-large-array + +# Extensions not supported +spec@arb_gpu_shader_fp64.* +spec@arb_gpu_shader_gpu5.* +spec@arb_gpu_shader_int64.* +spec@arb_tessellation_shader.* +spec@arb_texture_cube_map.* +spec@glsl-1.30.* +spec@glsl-1.40.* +spec@glsl-1.50.* +spec@glsl-3.* +spec@glsl-4.* +spec@glsl-es-3.* diff --git a/lib/mesa/src/broadcom/cle/v3d_decoder.c b/lib/mesa/src/broadcom/cle/v3d_decoder.c index 364419074..97dd8ce84 100644 --- a/lib/mesa/src/broadcom/cle/v3d_decoder.c +++ b/lib/mesa/src/broadcom/cle/v3d_decoder.c @@ -674,11 +674,11 @@ v3d_spec_load(const struct v3d_device_info *devinfo) for (int i = 0; i < ARRAY_SIZE(genxml_files_table); i++) { if (i != 0) { - assert(genxml_files_table[i - 1].gen_10 < - genxml_files_table[i].gen_10); + assert(genxml_files_table[i - 1].ver_10 < + genxml_files_table[i].ver_10); } - if (genxml_files_table[i].gen_10 <= devinfo->ver) { + if (genxml_files_table[i].ver_10 <= devinfo->ver) { text_offset = genxml_files_table[i].offset; text_length = genxml_files_table[i].length; } diff --git a/lib/mesa/src/broadcom/cle/v3d_packet_v33.xml b/lib/mesa/src/broadcom/cle/v3d_packet_v33.xml index 2fdc685ae..de80a6b64 100644 --- a/lib/mesa/src/broadcom/cle/v3d_packet_v33.xml +++ b/lib/mesa/src/broadcom/cle/v3d_packet_v33.xml @@ -950,11 +950,7 @@ <field name="Double-buffer in non-ms mode" size="1" start="15" type="bool"/> <field name="Multisample Mode (4x)" size="1" start="14" type="bool"/> - <field name="Maximum BPP of all render targets" size="2" start="12" type="uint"> - <value name="Render target maximum 32bpp" value="0"/> - <value name="Render target maximum 64bpp" value="1"/> - <value name="Render target maximum 128bpp" value="2"/> - </field> + <field name="Maximum BPP of all render targets" size="2" start="12" type="Internal BPP"/> <field name="Number of Render Targets" size="4" start="8" type="uint" minus_one="true"/> @@ -992,11 +988,7 @@ <field name="Double-buffer in non-ms mode" size="1" start="43" type="bool"/> <field name="Multisample Mode (4x)" size="1" start="42" type="bool"/> - <field name="Maximum BPP of all render targets" size="2" start="40" type="uint"> - <value name="Render target maximum 32bpp" value="0"/> - <value name="Render target maximum 64bpp" value="1"/> - <value name="Render target maximum 128bpp" value="2"/> - </field> + <field name="Maximum BPP of all render targets" size="2" start="40" type="Internal BPP"/> <field name="Image Height (pixels)" size="16" start="24" type="uint"/> <field name="Image Width (pixels)" size="16" start="8" type="uint"/> diff --git a/lib/mesa/src/broadcom/clif/clif_dump.c b/lib/mesa/src/broadcom/clif/clif_dump.c index bf84c0b96..0aaa6b6ad 100644 --- a/lib/mesa/src/broadcom/clif/clif_dump.c +++ b/lib/mesa/src/broadcom/clif/clif_dump.c @@ -52,7 +52,7 @@ clif_dump_add_address_to_worklist(struct clif_dump *clif, struct clif_dump * clif_dump_init(const struct v3d_device_info *devinfo, - FILE *out, bool pretty) + FILE *out, bool pretty, bool nobin) { struct clif_dump *clif = rzalloc(NULL, struct clif_dump); @@ -60,6 +60,7 @@ clif_dump_init(const struct v3d_device_info *devinfo, clif->out = out; clif->spec = v3d_spec_load(devinfo); clif->pretty = pretty; + clif->nobin = nobin; list_inithead(&clif->worklist); @@ -238,6 +239,9 @@ static void clif_dump_binary(struct clif_dump *clif, struct clif_bo *bo, uint32_t start, uint32_t end) { + if (clif->pretty && clif->nobin) + return; + if (start == end) return; diff --git a/lib/mesa/src/broadcom/clif/clif_dump.h b/lib/mesa/src/broadcom/clif/clif_dump.h index 8de3a2cbe..63f3ae77d 100644 --- a/lib/mesa/src/broadcom/clif/clif_dump.h +++ b/lib/mesa/src/broadcom/clif/clif_dump.h @@ -32,7 +32,7 @@ struct clif_dump; struct drm_v3d_submit_cl; struct clif_dump *clif_dump_init(const struct v3d_device_info *devinfo, - FILE *output, bool pretty); + FILE *output, bool pretty, bool nobin); void clif_dump(struct clif_dump *clif, const struct drm_v3d_submit_cl *submit); void clif_dump_destroy(struct clif_dump *clif); diff --git a/lib/mesa/src/broadcom/clif/clif_private.h b/lib/mesa/src/broadcom/clif/clif_private.h index 597d0b506..d96bfd12d 100644 --- a/lib/mesa/src/broadcom/clif/clif_private.h +++ b/lib/mesa/src/broadcom/clif/clif_private.h @@ -54,6 +54,11 @@ struct clif_dump { * output. */ bool pretty; + + /** + * Flag to no dump the binary resources. + */ + bool nobin; }; enum reloc_worklist_type { diff --git a/lib/mesa/src/broadcom/common/v3d_debug.c b/lib/mesa/src/broadcom/common/v3d_debug.c index 64a2426b9..508a2b7c7 100644 --- a/lib/mesa/src/broadcom/common/v3d_debug.c +++ b/lib/mesa/src/broadcom/common/v3d_debug.c @@ -34,33 +34,65 @@ #include "common/v3d_debug.h" #include "util/macros.h" -#include "util/debug.h" +#include "util/u_debug.h" #include "c11/threads.h" uint32_t V3D_DEBUG = 0; -static const struct debug_control debug_control[] = { - { "cl", V3D_DEBUG_CL}, - { "clif", V3D_DEBUG_CLIF}, - { "qpu", V3D_DEBUG_QPU}, - { "vir", V3D_DEBUG_VIR}, - { "nir", V3D_DEBUG_NIR}, - { "tgsi", V3D_DEBUG_TGSI}, - { "shaderdb", V3D_DEBUG_SHADERDB}, - { "surface", V3D_DEBUG_SURFACE}, - { "perf", V3D_DEBUG_PERF}, - { "norast", V3D_DEBUG_NORAST}, - { "fs", V3D_DEBUG_FS}, - { "gs", V3D_DEBUG_GS}, - { "vs", V3D_DEBUG_VS}, - { "cs", V3D_DEBUG_CS}, - { "always_flush", V3D_DEBUG_ALWAYS_FLUSH}, - { "precompile", V3D_DEBUG_PRECOMPILE}, - { "ra", V3D_DEBUG_RA}, - { "dump_spirv", V3D_DEBUG_DUMP_SPIRV}, - { NULL, 0 } +static const struct debug_named_value debug_control[] = { + { "cl", V3D_DEBUG_CL, + "Dump command list during creation" }, + { "cl_nobin", V3D_DEBUG_CL_NO_BIN, + "Dump command listduring creation, excluding binary resources" }, + { "clif", V3D_DEBUG_CLIF, + "Dump command list (CLIF format) during creation", }, + { "qpu", V3D_DEBUG_QPU, + "Dump generated QPU instructions" }, + { "vir", V3D_DEBUG_VIR, + "Dump VIR during program compile" }, + { "nir", V3D_DEBUG_NIR, + "Dump NIR during program compile" }, + { "tgsi", V3D_DEBUG_TGSI, + "Dump TGSI during program compile" }, + { "shaderdb", V3D_DEBUG_SHADERDB, + "Dump program compile information for shader-db analysis" }, + { "surface", V3D_DEBUG_SURFACE, + "Print resource layout information" }, + { "perf", V3D_DEBUG_PERF, + "Print during runtime performance-related events" }, + { "norast", V3D_DEBUG_NORAST, + "Skip actual hardware execution of commands" }, + { "fs", V3D_DEBUG_FS, + "Dump fragment shaders" }, + { "gs", V3D_DEBUG_GS, + "Dump geometry shaders" }, + { "vs", V3D_DEBUG_VS, + "Dump vertex shaders" }, + { "cs", V3D_DEBUG_CS, + "Dump computer shaders" }, + { "always_flush", V3D_DEBUG_ALWAYS_FLUSH, + "Flush after each draw call" }, + { "precompile", V3D_DEBUG_PRECOMPILE, + "Precompiles shader variant at shader state creation time" }, + { "ra", V3D_DEBUG_RA, + "Dump register allocation failures" }, + { "dump_spirv", V3D_DEBUG_DUMP_SPIRV, + "Dump SPIR-V code" }, + { "tmu32", V3D_DEBUG_TMU_32BIT, + "Force 32-bit precision on all TMU operations" }, + /* This can lead to incorrect behavior for applications that do + * require full 32-bit precision, but can improve performance + * for those that don't. + */ + { "tmu16", V3D_DEBUG_TMU_16BIT, + "Force 16-bit precision on all TMU operations" }, + { "noloopunroll", V3D_DEBUG_NO_LOOP_UNROLL, + "Disable loop unrolling" }, + { NULL } }; +DEBUG_GET_ONCE_FLAGS_OPTION(v3d_debug, "V3D_DEBUG", debug_control, 0) + uint32_t v3d_debug_flag_for_shader_stage(gl_shader_stage stage) { @@ -76,20 +108,11 @@ v3d_debug_flag_for_shader_stage(gl_shader_stage stage) return flags[stage]; } -static void -v3d_process_debug_variable_once(void) -{ - V3D_DEBUG = parse_debug_string(getenv("V3D_DEBUG"), debug_control); - - if (V3D_DEBUG & V3D_DEBUG_SHADERDB) - V3D_DEBUG |= V3D_DEBUG_NORAST; -} - void v3d_process_debug_variable(void) { - static once_flag v3d_process_debug_variable_flag = ONCE_FLAG_INIT; + V3D_DEBUG = debug_get_option_v3d_debug(); - call_once(&v3d_process_debug_variable_flag, - v3d_process_debug_variable_once); + if (V3D_DEBUG & V3D_DEBUG_SHADERDB) + V3D_DEBUG |= V3D_DEBUG_NORAST; } diff --git a/lib/mesa/src/broadcom/common/v3d_debug.h b/lib/mesa/src/broadcom/common/v3d_debug.h index efa269758..72d632568 100644 --- a/lib/mesa/src/broadcom/common/v3d_debug.h +++ b/lib/mesa/src/broadcom/common/v3d_debug.h @@ -59,6 +59,10 @@ extern uint32_t V3D_DEBUG; #define V3D_DEBUG_PRECOMPILE (1 << 15) #define V3D_DEBUG_RA (1 << 16) #define V3D_DEBUG_DUMP_SPIRV (1 << 17) +#define V3D_DEBUG_TMU_32BIT (1 << 18) +#define V3D_DEBUG_TMU_16BIT (1 << 19) +#define V3D_DEBUG_NO_LOOP_UNROLL (1 << 20) +#define V3D_DEBUG_CL_NO_BIN (1 << 21) #define V3D_DEBUG_SHADERS (V3D_DEBUG_TGSI | V3D_DEBUG_NIR | \ V3D_DEBUG_VIR | V3D_DEBUG_QPU | \ @@ -81,11 +85,6 @@ extern uint32_t V3D_DEBUG; #define dbg_printf(...) fprintf(stderr, __VA_ARGS__) #endif /* HAVE_ANDROID_PLATFORM */ -#define DBG(flag, ...) do { \ - if (unlikely(V3D_DEBUG & (flag))) \ - dbg_printf(__VA_ARGS__); \ -} while(0) - extern uint32_t v3d_debug_flag_for_shader_stage(gl_shader_stage stage); extern void v3d_process_debug_variable(void); diff --git a/lib/mesa/src/broadcom/common/v3d_limits.h b/lib/mesa/src/broadcom/common/v3d_limits.h index a974ebc58..129e53e29 100644 --- a/lib/mesa/src/broadcom/common/v3d_limits.h +++ b/lib/mesa/src/broadcom/common/v3d_limits.h @@ -62,4 +62,6 @@ #define V3D_MAX_POINT_SIZE 512.0f #define V3D_MAX_LINE_WIDTH 32 +#define V3D_MAX_BUFFER_RANGE (1 << 27) + #endif /* V3D_LIMITS_H */ diff --git a/lib/mesa/src/broadcom/common/v3d_tiling.c b/lib/mesa/src/broadcom/common/v3d_tiling.c new file mode 100644 index 000000000..22f84811e --- /dev/null +++ b/lib/mesa/src/broadcom/common/v3d_tiling.c @@ -0,0 +1,492 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file v3d_tiling.c + * + * Handles information about the V3D tiling formats, and loading and storing + * from them. + */ + +#include <stdint.h> +#include "v3d_tiling.h" +#include "broadcom/common/v3d_cpu_tiling.h" + +/** Return the width in pixels of a 64-byte microtile. */ +uint32_t +v3d_utile_width(int cpp) +{ + switch (cpp) { + case 1: + case 2: + return 8; + case 4: + case 8: + return 4; + case 16: + return 2; + default: + unreachable("unknown cpp"); + } +} + +/** Return the height in pixels of a 64-byte microtile. */ +uint32_t +v3d_utile_height(int cpp) +{ + switch (cpp) { + case 1: + return 8; + case 2: + case 4: + return 4; + case 8: + case 16: + return 2; + default: + unreachable("unknown cpp"); + } +} + +/** + * Returns the byte address for a given pixel within a utile. + * + * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4 + * arrangement. + */ +static inline uint32_t +v3d_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y) +{ + uint32_t utile_w = v3d_utile_width(cpp); + + assert(x < utile_w && y < v3d_utile_height(cpp)); + + return x * cpp + y * utile_w * cpp; +} + +/** + * Returns the byte offset for a given pixel in a LINEARTILE layout. + * + * LINEARTILE is a single line of utiles in either the X or Y direction. + */ +static inline uint32_t +v3d_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y) +{ + uint32_t utile_w = v3d_utile_width(cpp); + uint32_t utile_h = v3d_utile_height(cpp); + uint32_t utile_index_x = x / utile_w; + uint32_t utile_index_y = y / utile_h; + + assert(utile_index_x == 0 || utile_index_y == 0); + + return (64 * (utile_index_x + utile_index_y) + + v3d_get_utile_pixel_offset(cpp, + x & (utile_w - 1), + y & (utile_h - 1))); +} + +/** + * Returns the byte offset for a given pixel in a UBLINEAR layout. + * + * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2 + * utiles), and the UIF blocks are in 1 or 2 columns in raster order. + */ +static inline uint32_t +v3d_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y, + int ublinear_number) +{ + uint32_t utile_w = v3d_utile_width(cpp); + uint32_t utile_h = v3d_utile_height(cpp); + uint32_t ub_w = utile_w * 2; + uint32_t ub_h = utile_h * 2; + uint32_t ub_x = x / ub_w; + uint32_t ub_y = y / ub_h; + + return (256 * (ub_y * ublinear_number + + ub_x) + + ((x & utile_w) ? 64 : 0) + + ((y & utile_h) ? 128 : 0) + + + v3d_get_utile_pixel_offset(cpp, + x & (utile_w - 1), + y & (utile_h - 1))); +} + +static inline uint32_t +v3d_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return v3d_get_ublinear_pixel_offset(cpp, x, y, 2); +} + +static inline uint32_t +v3d_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return v3d_get_ublinear_pixel_offset(cpp, x, y, 1); +} + +/** + * Returns the byte offset for a given pixel in a UIF layout. + * + * UIF is the general V3D tiling layout shared across 3D, media, and scanout. + * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in + * 4x4 groups, and those 4x4 groups are then stored in raster order. + */ +static inline uint32_t +v3d_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y, + bool do_xor) +{ + uint32_t utile_w = v3d_utile_width(cpp); + uint32_t utile_h = v3d_utile_height(cpp); + uint32_t mb_width = utile_w * 2; + uint32_t mb_height = utile_h * 2; + uint32_t log2_mb_width = ffs(mb_width) - 1; + uint32_t log2_mb_height = ffs(mb_height) - 1; + + /* Macroblock X, y */ + uint32_t mb_x = x >> log2_mb_width; + uint32_t mb_y = y >> log2_mb_height; + /* X, y within the macroblock */ + uint32_t mb_pixel_x = x - (mb_x << log2_mb_width); + uint32_t mb_pixel_y = y - (mb_y << log2_mb_height); + + if (do_xor && (mb_x / 4) & 1) + mb_y ^= 0x10; + + uint32_t mb_h = align(image_h, 1 << log2_mb_height) >> log2_mb_height; + uint32_t mb_id = ((mb_x / 4) * ((mb_h - 1) * 4)) + mb_x + mb_y * 4; + + uint32_t mb_base_addr = mb_id * 256; + + bool top = mb_pixel_y < utile_h; + bool left = mb_pixel_x < utile_w; + + /* Docs have this in pixels, we do bytes here. */ + uint32_t mb_tile_offset = (!top * 128 + !left * 64); + + uint32_t utile_x = mb_pixel_x & (utile_w - 1); + uint32_t utile_y = mb_pixel_y & (utile_h - 1); + + uint32_t mb_pixel_address = (mb_base_addr + + mb_tile_offset + + v3d_get_utile_pixel_offset(cpp, + utile_x, + utile_y)); + + return mb_pixel_address; +} + +static inline uint32_t +v3d_get_uif_xor_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return v3d_get_uif_pixel_offset(cpp, image_h, x, y, true); +} + +static inline uint32_t +v3d_get_uif_no_xor_pixel_offset(uint32_t cpp, uint32_t image_h, + uint32_t x, uint32_t y) +{ + return v3d_get_uif_pixel_offset(cpp, image_h, x, y, false); +} + +/* Loads/stores non-utile-aligned boxes by walking over the destination + * rectangle, computing the address on the GPU, and storing/loading a pixel at + * a time. + */ +static inline void +v3d_move_pixels_unaligned(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + int cpp, uint32_t image_h, + const struct pipe_box *box, + uint32_t (*get_pixel_offset)(uint32_t cpp, + uint32_t image_h, + uint32_t x, uint32_t y), + bool is_load) +{ + for (uint32_t y = 0; y < box->height; y++) { + void *cpu_row = cpu + y * cpu_stride; + + for (int x = 0; x < box->width; x++) { + uint32_t pixel_offset = get_pixel_offset(cpp, image_h, + box->x + x, + box->y + y); + + if (false) { + fprintf(stderr, "%3d,%3d -> %d\n", + box->x + x, box->y + y, + pixel_offset); + } + + if (is_load) { + memcpy(cpu_row + x * cpp, + gpu + pixel_offset, + cpp); + } else { + memcpy(gpu + pixel_offset, + cpu_row + x * cpp, + cpp); + } + } + } +} + +/* Breaks the image down into utiles and calls either the fast whole-utile + * load/store functions, or the unaligned fallback case. + */ +static inline void +v3d_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + int cpp, uint32_t image_h, + const struct pipe_box *box, + uint32_t (*get_pixel_offset)(uint32_t cpp, + uint32_t image_h, + uint32_t x, uint32_t y), + bool is_load) +{ + uint32_t utile_w = v3d_utile_width(cpp); + uint32_t utile_h = v3d_utile_height(cpp); + uint32_t utile_gpu_stride = utile_w * cpp; + uint32_t x1 = box->x; + uint32_t y1 = box->y; + uint32_t x2 = box->x + box->width; + uint32_t y2 = box->y + box->height; + uint32_t align_x1 = align(x1, utile_w); + uint32_t align_y1 = align(y1, utile_h); + uint32_t align_x2 = x2 & ~(utile_w - 1); + uint32_t align_y2 = y2 & ~(utile_h - 1); + + /* Load/store all the whole utiles first. */ + for (uint32_t y = align_y1; y < align_y2; y += utile_h) { + void *cpu_row = cpu + (y - box->y) * cpu_stride; + + for (uint32_t x = align_x1; x < align_x2; x += utile_w) { + void *utile_gpu = (gpu + + get_pixel_offset(cpp, image_h, x, y)); + void *utile_cpu = cpu_row + (x - box->x) * cpp; + + if (is_load) { + v3d_load_utile(utile_cpu, cpu_stride, + utile_gpu, utile_gpu_stride); + } else { + v3d_store_utile(utile_gpu, utile_gpu_stride, + utile_cpu, cpu_stride); + } + } + } + + /* If there were no aligned utiles in the middle, load/store the whole + * thing unaligned. + */ + if (align_y2 <= align_y1 || + align_x2 <= align_x1) { + v3d_move_pixels_unaligned(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, + box, + get_pixel_offset, is_load); + return; + } + + /* Load/store the partial utiles. */ + struct pipe_box partial_boxes[4] = { + /* Top */ + { + .x = x1, + .width = x2 - x1, + .y = y1, + .height = align_y1 - y1, + }, + /* Bottom */ + { + .x = x1, + .width = x2 - x1, + .y = align_y2, + .height = y2 - align_y2, + }, + /* Left */ + { + .x = x1, + .width = align_x1 - x1, + .y = align_y1, + .height = align_y2 - align_y1, + }, + /* Right */ + { + .x = align_x2, + .width = x2 - align_x2, + .y = align_y1, + .height = align_y2 - align_y1, + }, + }; + for (int i = 0; i < ARRAY_SIZE(partial_boxes); i++) { + void *partial_cpu = (cpu + + (partial_boxes[i].y - y1) * cpu_stride + + (partial_boxes[i].x - x1) * cpp); + + v3d_move_pixels_unaligned(gpu, gpu_stride, + partial_cpu, cpu_stride, + cpp, image_h, + &partial_boxes[i], + get_pixel_offset, is_load); + } +} + +static inline void +v3d_move_pixels_general(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + int cpp, uint32_t image_h, + const struct pipe_box *box, + uint32_t (*get_pixel_offset)(uint32_t cpp, + uint32_t image_h, + uint32_t x, uint32_t y), + bool is_load) +{ + switch (cpp) { + case 1: + v3d_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 1, image_h, box, + get_pixel_offset, + is_load); + break; + case 2: + v3d_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 2, image_h, box, + get_pixel_offset, + is_load); + break; + case 4: + v3d_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 4, image_h, box, + get_pixel_offset, + is_load); + break; + case 8: + v3d_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 8, image_h, box, + get_pixel_offset, + is_load); + break; + case 16: + v3d_move_pixels_general_percpp(gpu, gpu_stride, + cpu, cpu_stride, + 16, image_h, box, + get_pixel_offset, + is_load); + break; + } +} + +static inline void +v3d_move_tiled_image(void *gpu, uint32_t gpu_stride, + void *cpu, uint32_t cpu_stride, + enum v3d_tiling_mode tiling_format, + int cpp, + uint32_t image_h, + const struct pipe_box *box, + bool is_load) +{ + switch (tiling_format) { + case V3D_TILING_UIF_XOR: + v3d_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + v3d_get_uif_xor_pixel_offset, + is_load); + break; + case V3D_TILING_UIF_NO_XOR: + v3d_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + v3d_get_uif_no_xor_pixel_offset, + is_load); + break; + case V3D_TILING_UBLINEAR_2_COLUMN: + v3d_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + v3d_get_ublinear_2_column_pixel_offset, + is_load); + break; + case V3D_TILING_UBLINEAR_1_COLUMN: + v3d_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + v3d_get_ublinear_1_column_pixel_offset, + is_load); + break; + case V3D_TILING_LINEARTILE: + v3d_move_pixels_general(gpu, gpu_stride, + cpu, cpu_stride, + cpp, image_h, box, + v3d_get_lt_pixel_offset, + is_load); + break; + default: + unreachable("Unsupported tiling format"); + break; + } +} + +/** + * Loads pixel data from the start (microtile-aligned) box in \p src to the + * start of \p dst according to the given tiling format. + */ +void +v3d_load_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum v3d_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box) +{ + v3d_move_tiled_image(src, src_stride, + dst, dst_stride, + tiling_format, + cpp, + image_h, + box, + true); +} + +/** + * Stores pixel data from the start of \p src into a (microtile-aligned) box in + * \p dst according to the given tiling format. + */ +void +v3d_store_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum v3d_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box) +{ + v3d_move_tiled_image(dst, dst_stride, + src, src_stride, + tiling_format, + cpp, + image_h, + box, + false); +} diff --git a/lib/mesa/src/broadcom/common/v3d_tiling.h b/lib/mesa/src/broadcom/common/v3d_tiling.h new file mode 100644 index 000000000..08ae7cce8 --- /dev/null +++ b/lib/mesa/src/broadcom/common/v3d_tiling.h @@ -0,0 +1,80 @@ +/* + * Copyright © 2014 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef V3D_TILING_H +#define V3D_TILING_H + +#include "util/u_box.h" + +/* A UIFblock is a 256-byte region of memory that's 256-byte aligned. These + * will be grouped in 4x4 blocks (left-to-right, then top-to-bottom) in a 4KB + * page. Those pages are then arranged left-to-right, top-to-bottom, to cover + * an image. + * + * The inside of a UIFblock, for packed pixels, will be split into 4 64-byte + * utiles. Utiles may be 8x8 (8bpp), 8x4(16bpp) or 4x4 (32bpp). + */ + +/** + * Tiling mode enum used for v3d_resource.c, which maps directly to the Memory + * Format field of render target and Z/Stencil config. + */ +enum v3d_tiling_mode { + /* Untiled resources. Not valid as texture inputs. */ + V3D_TILING_RASTER, + + /* Single line of u-tiles. */ + V3D_TILING_LINEARTILE, + + /* Departure from standard 4-UIF block column format. */ + V3D_TILING_UBLINEAR_1_COLUMN, + + /* Departure from standard 4-UIF block column format. */ + V3D_TILING_UBLINEAR_2_COLUMN, + + /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is + * split 2x2 into utiles. + */ + V3D_TILING_UIF_NO_XOR, + + /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is + * split 2x2 into utiles. + */ + V3D_TILING_UIF_XOR, +}; + +uint32_t v3d_utile_width(int cpp) ATTRIBUTE_CONST; +uint32_t v3d_utile_height(int cpp) ATTRIBUTE_CONST; +bool v3d_size_is_lt(uint32_t width, uint32_t height, int cpp) ATTRIBUTE_CONST; +void v3d_load_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum v3d_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box); +void v3d_store_tiled_image(void *dst, uint32_t dst_stride, + void *src, uint32_t src_stride, + enum v3d_tiling_mode tiling_format, int cpp, + uint32_t image_h, + const struct pipe_box *box); + +#endif /* V3D_TILING_H */ diff --git a/lib/mesa/src/broadcom/common/v3d_util.c b/lib/mesa/src/broadcom/common/v3d_util.c new file mode 100644 index 000000000..424656fd8 --- /dev/null +++ b/lib/mesa/src/broadcom/common/v3d_util.c @@ -0,0 +1,88 @@ +/* + * Copyright © 2021 Raspberry Pi + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "v3d_util.h" +#include "util/macros.h" + +/* Choose a number of workgroups per supergroup that maximizes + * lane occupancy. We can pack up to 16 workgroups into a supergroup. + */ +uint32_t +v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo, + bool has_subgroups, + bool has_tsy_barrier, + uint32_t threads, + uint32_t num_wgs, + uint32_t wg_size) +{ + /* FIXME: subgroups may restrict supergroup packing. For now, we disable it + * completely if the shader uses subgroups. + */ + if (has_subgroups) + return 1; + + /* Compute maximum number of batches in a supergroup for this workgroup size. + * Each batch is 16 elements, and we can have up to 16 work groups in a + * supergroup: + * + * max_batches_per_sg = (wg_size * max_wgs_per_sg) / elements_per_batch + * since max_wgs_per_sg = 16 and elements_per_batch = 16, we get: + * max_batches_per_sg = wg_size + */ + uint32_t max_batches_per_sg = wg_size; + + /* QPU threads will stall at TSY barriers until the entire supergroup + * reaches the barrier. Limit the supergroup size to half the QPU threads + * available, so we can have at least 2 supergroups executing in parallel + * and we don't stall all our QPU threads when a supergroup hits a barrier. + */ + if (has_tsy_barrier) { + uint32_t max_qpu_threads = devinfo->qpu_count * threads; + max_batches_per_sg = MIN2(max_batches_per_sg, max_qpu_threads / 2); + } + uint32_t max_wgs_per_sg = max_batches_per_sg * 16 / wg_size; + + uint32_t best_wgs_per_sg = 1; + uint32_t best_unused_lanes = 16; + for (uint32_t wgs_per_sg = 1; wgs_per_sg <= max_wgs_per_sg; wgs_per_sg++) { + /* Don't try to pack more workgroups per supergroup than the total amount + * of workgroups dispatched. + */ + if (wgs_per_sg > num_wgs) + return best_wgs_per_sg; + + /* Compute wasted lines for this configuration and keep track of the + * config with less waste. + */ + uint32_t unused_lanes = (16 - ((wgs_per_sg * wg_size) % 16)) & 0x0f; + if (unused_lanes == 0) + return wgs_per_sg; + + if (unused_lanes < best_unused_lanes) { + best_wgs_per_sg = wgs_per_sg; + best_unused_lanes = unused_lanes; + } + } + + return best_wgs_per_sg; +} diff --git a/lib/mesa/src/broadcom/common/v3d_util.h b/lib/mesa/src/broadcom/common/v3d_util.h new file mode 100644 index 000000000..b9804f235 --- /dev/null +++ b/lib/mesa/src/broadcom/common/v3d_util.h @@ -0,0 +1,37 @@ +/* + * Copyright © 2021 Raspberry Pi + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef V3D_UTIL_H +#define V3D_UTIL_H + +#include "common/v3d_device_info.h" + +uint32_t +v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo, + bool has_subgroups, + bool has_tsy_barrier, + uint32_t threads, + uint32_t num_wgs, + uint32_t wg_size); + +#endif diff --git a/lib/mesa/src/broadcom/compiler/nir_to_vir.c b/lib/mesa/src/broadcom/compiler/nir_to_vir.c index c70d12881..d0a89f1a7 100644 --- a/lib/mesa/src/broadcom/compiler/nir_to_vir.c +++ b/lib/mesa/src/broadcom/compiler/nir_to_vir.c @@ -68,6 +68,39 @@ #define V3D_TSY_DEC_SEMAPHORE 14 #define V3D_TSY_SET_QUORUM_FREE_ALL 15 +enum v3d_tmu_op_type +{ + V3D_TMU_OP_TYPE_REGULAR, + V3D_TMU_OP_TYPE_ATOMIC, + V3D_TMU_OP_TYPE_CACHE +}; + +static enum v3d_tmu_op_type +v3d_tmu_get_type_from_op(uint32_t tmu_op, bool is_write) +{ + switch(tmu_op) { + case V3D_TMU_OP_WRITE_ADD_READ_PREFETCH: + case V3D_TMU_OP_WRITE_SUB_READ_CLEAR: + case V3D_TMU_OP_WRITE_XCHG_READ_FLUSH: + case V3D_TMU_OP_WRITE_CMPXCHG_READ_FLUSH: + case V3D_TMU_OP_WRITE_UMIN_FULL_L1_CLEAR: + return is_write ? V3D_TMU_OP_TYPE_ATOMIC : V3D_TMU_OP_TYPE_CACHE; + case V3D_TMU_OP_WRITE_UMAX: + case V3D_TMU_OP_WRITE_SMIN: + case V3D_TMU_OP_WRITE_SMAX: + assert(is_write); + FALLTHROUGH; + case V3D_TMU_OP_WRITE_AND_READ_INC: + case V3D_TMU_OP_WRITE_OR_READ_DEC: + case V3D_TMU_OP_WRITE_XOR_READ_NOT: + return V3D_TMU_OP_TYPE_ATOMIC; + case V3D_TMU_OP_REGULAR: + return V3D_TMU_OP_TYPE_REGULAR; + + default: + unreachable("Unknown tmu_op\n"); + } +} static void ntq_emit_cf_list(struct v3d_compile *c, struct exec_list *list); @@ -282,6 +315,8 @@ ntq_add_pending_tmu_flush(struct v3d_compile *c, if (c->disable_tmu_pipelining) ntq_flush_tmu(c); + else if (c->tmu.flush_count > 1) + c->pipelined_any_tmu = true; } enum emit_mode { @@ -565,11 +600,10 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr, &tmu_writes); } - /* The spec says that for atomics, the TYPE field is - * ignored, but that doesn't seem to be the case for - * CMPXCHG. Just use the number of tmud writes we did - * to decide the type (or choose "32bit" for atomic - * reads, which has been fine). + /* For atomics we use 32bit except for CMPXCHG, that we need + * to use VEC2. For the rest of the cases we use the number of + * tmud writes we did to decide the type. For cache operations + * the type is ignored. */ uint32_t config = 0; if (mode == MODE_EMIT) { @@ -580,6 +614,9 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr, assert(tmu_writes > 0); num_components = tmu_writes - 1; } + bool is_atomic = + v3d_tmu_get_type_from_op(tmu_op, !is_load) == + V3D_TMU_OP_TYPE_ATOMIC; uint32_t perquad = is_load && !vir_in_nonuniform_control_flow(c) @@ -587,7 +624,9 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr, : GENERAL_TMU_LOOKUP_PER_PIXEL; config = 0xffffff00 | tmu_op << 3 | perquad; - if (num_components == 1) { + if (tmu_op == V3D_TMU_OP_WRITE_CMPXCHG_READ_FLUSH) { + config |= GENERAL_TMU_LOOKUP_TYPE_VEC2; + } else if (is_atomic || num_components == 1) { config |= GENERAL_TMU_LOOKUP_TYPE_32BIT_UI; } else { config |= GENERAL_TMU_LOOKUP_TYPE_VEC2 + @@ -1191,6 +1230,18 @@ out: return V3D_QPU_COND_IFNA; } +static struct qreg +ntq_emit_cond_to_bool(struct v3d_compile *c, enum v3d_qpu_cond cond) +{ + struct qreg result = + vir_MOV(c, vir_SEL(c, cond, + vir_uniform_ui(c, ~0), + vir_uniform_ui(c, 0))); + c->flags_temp = result.index; + c->flags_cond = cond; + return result; +} + static void ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr) { @@ -1354,11 +1405,7 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr) enum v3d_qpu_cond cond; ASSERTED bool ok = ntq_emit_comparison(c, instr, &cond); assert(ok); - result = vir_MOV(c, vir_SEL(c, cond, - vir_uniform_ui(c, ~0), - vir_uniform_ui(c, 0))); - c->flags_temp = result.index; - c->flags_cond = cond; + result = ntq_emit_cond_to_bool(c, cond); break; } @@ -1438,11 +1485,7 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr) case nir_op_uadd_carry: vir_set_pf(c, vir_ADD_dest(c, vir_nop_reg(), src[0], src[1]), V3D_QPU_PF_PUSHC); - result = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA, - vir_uniform_ui(c, ~0), - vir_uniform_ui(c, 0))); - c->flags_temp = result.index; - c->flags_cond = V3D_QPU_COND_IFA; + result = ntq_emit_cond_to_bool(c, V3D_QPU_COND_IFA); break; case nir_op_pack_half_2x16_split: @@ -1627,6 +1670,15 @@ vir_emit_tlb_color_write(struct v3d_compile *c, unsigned rt) static void emit_frag_end(struct v3d_compile *c) { + /* If the shader has no non-TLB side effects and doesn't write Z + * we can promote it to enabling early_fragment_tests even + * if the user didn't. + */ + if (c->output_position_index == -1 && + !(c->s->info.num_images || c->s->info.num_ssbos)) { + c->s->info.fs.early_fragment_tests = true; + } + if (c->output_sample_mask_index != -1) { vir_SETMSF_dest(c, vir_nop_reg(), vir_AND(c, @@ -1651,7 +1703,8 @@ emit_frag_end(struct v3d_compile *c) } struct qreg tlbu_reg = vir_magic_reg(V3D_QPU_WADDR_TLBU); - if (c->output_position_index != -1) { + if (c->output_position_index != -1 && + !c->s->info.fs.early_fragment_tests) { struct qinst *inst = vir_MOV_dest(c, tlbu_reg, c->outputs[c->output_position_index]); uint8_t tlb_specifier = TLB_TYPE_DEPTH; @@ -1711,17 +1764,22 @@ emit_frag_end(struct v3d_compile *c) static inline void vir_VPM_WRITE_indirect(struct v3d_compile *c, struct qreg val, - struct qreg vpm_index) + struct qreg vpm_index, + bool uniform_vpm_index) { assert(c->devinfo->ver >= 40); - vir_STVPMV(c, vpm_index, val); + if (uniform_vpm_index) + vir_STVPMV(c, vpm_index, val); + else + vir_STVPMD(c, vpm_index, val); } static void vir_VPM_WRITE(struct v3d_compile *c, struct qreg val, uint32_t vpm_index) { if (c->devinfo->ver >= 40) { - vir_VPM_WRITE_indirect(c, val, vir_uniform_ui(c, vpm_index)); + vir_VPM_WRITE_indirect(c, val, + vir_uniform_ui(c, vpm_index), true); } else { /* XXX: v3d33_vir_vpm_write_setup(c); */ vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val); @@ -1774,7 +1832,7 @@ mem_vectorize_callback(unsigned align_mul, unsigned align_offset, } void -v3d_optimize_nir(struct nir_shader *s) +v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s) { bool progress; unsigned lower_flrp = @@ -1787,7 +1845,7 @@ v3d_optimize_nir(struct nir_shader *s) NIR_PASS_V(s, nir_lower_vars_to_ssa); NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL); - NIR_PASS(progress, s, nir_lower_phis_to_scalar); + NIR_PASS(progress, s, nir_lower_phis_to_scalar, false); NIR_PASS(progress, s, nir_copy_prop); NIR_PASS(progress, s, nir_opt_remove_phis); NIR_PASS(progress, s, nir_opt_dce); @@ -1825,6 +1883,14 @@ v3d_optimize_nir(struct nir_shader *s) NIR_PASS(progress, s, nir_opt_undef); NIR_PASS(progress, s, nir_lower_undef_to_zero); + + if (c && !c->disable_loop_unrolling && + s->options->max_unroll_iterations > 0) { + bool local_progress = false; + NIR_PASS(local_progress, s, nir_opt_loop_unroll); + c->unrolled_any_loops |= local_progress; + progress |= local_progress; + } } while (progress); nir_move_options sink_opts = @@ -1836,15 +1902,11 @@ v3d_optimize_nir(struct nir_shader *s) } static int -driver_location_compare(const void *in_a, const void *in_b) +driver_location_compare(const nir_variable *a, const nir_variable *b) { - const nir_variable *const *a = in_a; - const nir_variable *const *b = in_b; - - if ((*a)->data.driver_location == (*b)->data.driver_location) - return (*a)->data.location_frac - (*b)->data.location_frac; - - return (*a)->data.driver_location - (*b)->data.driver_location; + return a->data.driver_location == b->data.driver_location ? + a->data.location_frac - b->data.location_frac : + a->data.driver_location - b->data.driver_location; } static struct qreg @@ -1984,49 +2046,36 @@ program_reads_point_coord(struct v3d_compile *c) } static void -get_sorted_input_variables(struct v3d_compile *c, - unsigned *num_entries, - nir_variable ***vars) -{ - *num_entries = 0; - nir_foreach_shader_in_variable(var, c->s) - (*num_entries)++; - - *vars = ralloc_array(c, nir_variable *, *num_entries); - - unsigned i = 0; - nir_foreach_shader_in_variable(var, c->s) - (*vars)[i++] = var; - - /* Sort the variables so that we emit the input setup in - * driver_location order. This is required for VPM reads, whose data - * is fetched into the VPM in driver_location (TGSI register index) - * order. - */ - qsort(*vars, *num_entries, sizeof(**vars), driver_location_compare); -} - -static void ntq_setup_gs_inputs(struct v3d_compile *c) { - nir_variable **vars; - unsigned num_entries; - get_sorted_input_variables(c, &num_entries, &vars); - - for (unsigned i = 0; i < num_entries; i++) { - nir_variable *var = vars[i]; + nir_sort_variables_with_modes(c->s, driver_location_compare, + nir_var_shader_in); + nir_foreach_shader_in_variable(var, c->s) { /* All GS inputs are arrays with as many entries as vertices * in the input primitive, but here we only care about the * per-vertex input type. */ - const struct glsl_type *type = glsl_without_array(var->type); + assert(glsl_type_is_array(var->type)); + const struct glsl_type *type = glsl_get_array_element(var->type); unsigned array_len = MAX2(glsl_get_length(type), 1); unsigned loc = var->data.driver_location; resize_qreg_array(c, &c->inputs, &c->inputs_array_size, (loc + array_len) * 4); + if (var->data.compact) { + for (unsigned j = 0; j < array_len; j++) { + unsigned input_idx = c->num_inputs++; + unsigned loc_frac = var->data.location_frac + j; + unsigned loc = var->data.location + loc_frac / 4; + unsigned comp = loc_frac % 4; + c->input_slots[input_idx] = + v3d_slot_from_slot_and_component(loc, comp); + } + continue; + } + for (unsigned j = 0; j < array_len; j++) { unsigned num_elements = glsl_get_vector_elements(type); for (unsigned k = 0; k < num_elements; k++) { @@ -2044,12 +2093,10 @@ ntq_setup_gs_inputs(struct v3d_compile *c) static void ntq_setup_fs_inputs(struct v3d_compile *c) { - nir_variable **vars; - unsigned num_entries; - get_sorted_input_variables(c, &num_entries, &vars); + nir_sort_variables_with_modes(c->s, driver_location_compare, + nir_var_shader_in); - for (unsigned i = 0; i < num_entries; i++) { - nir_variable *var = vars[i]; + nir_foreach_shader_in_variable(var, c->s) { unsigned var_len = glsl_count_vec4_slots(var->type, false, false); unsigned loc = var->data.driver_location; @@ -2062,6 +2109,14 @@ ntq_setup_fs_inputs(struct v3d_compile *c) if (var->data.location == VARYING_SLOT_POS) { emit_fragcoord_input(c, loc); + } else if (var->data.location == VARYING_SLOT_PRIMITIVE_ID && + !c->fs_key->has_gs) { + /* If the fragment shader reads gl_PrimitiveID and we + * don't have a geometry shader in the pipeline to write + * it then we program the hardware to inject it as + * an implicit varying. Take it from there. + */ + c->inputs[loc * 4] = c->primitive_id; } else if (util_varying_is_point_coord(var->data.location, c->fs_key->point_sprite_mask)) { c->inputs[loc * 4 + 0] = c->point_x; @@ -2342,8 +2397,16 @@ ntq_emit_load_uniform(struct v3d_compile *c, nir_intrinsic_instr *instr) static void ntq_emit_load_input(struct v3d_compile *c, nir_intrinsic_instr *instr) { - /* XXX: Use ldvpmv (uniform offset) or ldvpmd (non-uniform offset) - * and enable PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR. + /* XXX: Use ldvpmv (uniform offset) or ldvpmd (non-uniform offset). + * + * Right now the driver sets PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR even + * if we don't support non-uniform offsets because we also set the + * lower_all_io_to_temps option in the NIR compiler. This ensures that + * any indirect indexing on in/out variables is turned into indirect + * indexing on temporary variables instead, that we handle by lowering + * to scratch. If we implement non-uniform offset here we might be able + * to avoid the temp and scratch lowering, which involves copying from + * the input to the temp variable, possibly making code more optimal. */ unsigned offset = nir_intrinsic_base(instr) + nir_src_as_uint(instr->src[0]); @@ -2448,10 +2511,10 @@ emit_store_output_gs(struct v3d_compile *c, nir_intrinsic_instr *instr) * different offsets in the VPM and we need to use the scatter write * instruction to have a different offset for each lane. */ - if (nir_src_is_dynamically_uniform(instr->src[1])) - vir_VPM_WRITE_indirect(c, val, offset); - else - vir_STVPMD(c, offset, val); + bool is_uniform_offset = + !vir_in_nonuniform_control_flow(c) && + !nir_src_is_divergent(instr->src[1]); + vir_VPM_WRITE_indirect(c, val, offset, is_uniform_offset); if (vir_in_nonuniform_control_flow(c)) { struct qinst *last_inst = @@ -2461,33 +2524,37 @@ emit_store_output_gs(struct v3d_compile *c, nir_intrinsic_instr *instr) } static void +emit_store_output_vs(struct v3d_compile *c, nir_intrinsic_instr *instr) +{ + assert(c->s->info.stage == MESA_SHADER_VERTEX); + assert(instr->num_components == 1); + + uint32_t base = nir_intrinsic_base(instr); + struct qreg val = ntq_get_src(c, instr->src[0], 0); + + if (nir_src_is_const(instr->src[1])) { + vir_VPM_WRITE(c, val, + base + nir_src_as_uint(instr->src[1])); + } else { + struct qreg offset = vir_ADD(c, + ntq_get_src(c, instr->src[1], 1), + vir_uniform_ui(c, base)); + bool is_uniform_offset = + !vir_in_nonuniform_control_flow(c) && + !nir_src_is_divergent(instr->src[1]); + vir_VPM_WRITE_indirect(c, val, offset, is_uniform_offset); + } +} + +static void ntq_emit_store_output(struct v3d_compile *c, nir_intrinsic_instr *instr) { - /* XXX perf: Use stvpmv with uniform non-constant offsets and - * stvpmd with non-uniform offsets and enable - * PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR. - */ - if (c->s->info.stage == MESA_SHADER_FRAGMENT) { + if (c->s->info.stage == MESA_SHADER_FRAGMENT) ntq_emit_color_write(c, instr); - } else if (c->s->info.stage == MESA_SHADER_GEOMETRY) { + else if (c->s->info.stage == MESA_SHADER_GEOMETRY) emit_store_output_gs(c, instr); - } else { - assert(c->s->info.stage == MESA_SHADER_VERTEX); - assert(instr->num_components == 1); - - uint32_t base = nir_intrinsic_base(instr); - if (nir_src_is_const(instr->src[1])) { - vir_VPM_WRITE(c, - ntq_get_src(c, instr->src[0], 0), - base + nir_src_as_uint(instr->src[1])); - } else { - vir_VPM_WRITE_indirect(c, - ntq_get_src(c, instr->src[0], 0), - vir_ADD(c, - ntq_get_src(c, instr->src[1], 1), - vir_uniform_ui(c, base))); - } - } + else + emit_store_output_vs(c, instr); } /** @@ -2707,6 +2774,41 @@ ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr) } } +static inline struct qreg +emit_load_local_invocation_index(struct v3d_compile *c) +{ + return vir_SHR(c, c->cs_payload[1], + vir_uniform_ui(c, 32 - c->local_invocation_index_bits)); +} + +/* Various subgroup operations rely on the A flags, so this helper ensures that + * A flags represents currently active lanes in the subgroup. + */ +static void +set_a_flags_for_subgroup(struct v3d_compile *c) +{ + /* MSF returns 0 for disabled lanes in compute shaders so + * PUSHZ will set A=1 for disabled lanes. We want the inverse + * of this but we don't have any means to negate the A flags + * directly, but we can do it by repeating the same operation + * with NORZ (A = ~A & ~Z). + */ + assert(c->s->info.stage == MESA_SHADER_COMPUTE); + vir_set_pf(c, vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_PF_PUSHZ); + vir_set_uf(c, vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_UF_NORZ); + + /* If we are under non-uniform control flow we also need to + * AND the A flags with the current execute mask. + */ + if (vir_in_nonuniform_control_flow(c)) { + const uint32_t bidx = c->cur_block->index; + vir_set_uf(c, vir_XOR_dest(c, vir_nop_reg(), + c->execute, + vir_uniform_ui(c, bidx)), + V3D_QPU_UF_ANDZ); + } +} + static void ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) { @@ -2772,7 +2874,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) case nir_intrinsic_get_ssbo_size: ntq_store_dest(c, &instr->dest, 0, vir_uniform(c, QUNIFORM_GET_SSBO_SIZE, - nir_src_as_uint(instr->src[0]))); + nir_src_comp_as_uint(instr->src[0], 0))); break; case nir_intrinsic_get_ubo_size: @@ -2830,11 +2932,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) case nir_intrinsic_load_helper_invocation: vir_set_pf(c, vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_PF_PUSHZ); - struct qreg qdest = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA, - vir_uniform_ui(c, ~0), - vir_uniform_ui(c, 0))); - c->flags_temp = qdest.index; - c->flags_cond = V3D_QPU_COND_IFA; + struct qreg qdest = ntq_emit_cond_to_bool(c, V3D_QPU_COND_IFA); ntq_store_dest(c, &instr->dest, 0, qdest); break; @@ -2960,7 +3058,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) vir_emit_thrsw(c); break; - case nir_intrinsic_load_num_work_groups: + case nir_intrinsic_load_num_workgroups: for (int i = 0; i < 3; i++) { ntq_store_dest(c, &instr->dest, i, vir_uniform(c, QUNIFORM_NUM_WORK_GROUPS, @@ -2968,27 +3066,49 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) } break; - case nir_intrinsic_load_local_invocation_index: - ntq_store_dest(c, &instr->dest, 0, - vir_SHR(c, c->cs_payload[1], - vir_uniform_ui(c, 32 - c->local_invocation_index_bits))); + case nir_intrinsic_load_workgroup_id: { + struct qreg x = vir_AND(c, c->cs_payload[0], + vir_uniform_ui(c, 0xffff)); + + struct qreg y = vir_SHR(c, c->cs_payload[0], + vir_uniform_ui(c, 16)); + + struct qreg z = vir_AND(c, c->cs_payload[1], + vir_uniform_ui(c, 0xffff)); + + /* We only support dispatch base in Vulkan */ + if (c->key->environment == V3D_ENVIRONMENT_VULKAN) { + x = vir_ADD(c, x, + vir_uniform(c, QUNIFORM_WORK_GROUP_BASE, 0)); + y = vir_ADD(c, y, + vir_uniform(c, QUNIFORM_WORK_GROUP_BASE, 1)); + z = vir_ADD(c, z, + vir_uniform(c, QUNIFORM_WORK_GROUP_BASE, 2)); + } + + ntq_store_dest(c, &instr->dest, 0, vir_MOV(c, x)); + ntq_store_dest(c, &instr->dest, 1, vir_MOV(c, y)); + ntq_store_dest(c, &instr->dest, 2, vir_MOV(c, z)); break; + } - case nir_intrinsic_load_work_group_id: + case nir_intrinsic_load_local_invocation_index: ntq_store_dest(c, &instr->dest, 0, - vir_AND(c, c->cs_payload[0], - vir_uniform_ui(c, 0xffff))); - ntq_store_dest(c, &instr->dest, 1, - vir_SHR(c, c->cs_payload[0], - vir_uniform_ui(c, 16))); - ntq_store_dest(c, &instr->dest, 2, - vir_AND(c, c->cs_payload[1], - vir_uniform_ui(c, 0xffff))); + emit_load_local_invocation_index(c)); break; - case nir_intrinsic_load_subgroup_id: - ntq_store_dest(c, &instr->dest, 0, vir_EIDX(c)); + case nir_intrinsic_load_subgroup_id: { + /* This is basically the batch index, which is the Local + * Invocation Index divided by the SIMD width). + */ + STATIC_ASSERT(util_is_power_of_two_nonzero(V3D_CHANNELS)); + const uint32_t divide_shift = ffs(V3D_CHANNELS) - 1; + struct qreg lii = emit_load_local_invocation_index(c); + ntq_store_dest(c, &instr->dest, 0, + vir_SHR(c, lii, + vir_uniform_ui(c, divide_shift))); break; + } case nir_intrinsic_load_per_vertex_input: { /* The vertex shader writes all its used outputs into @@ -3002,11 +3122,17 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) * * col: vertex index, row = varying index */ + assert(nir_src_is_const(instr->src[1])); + uint32_t location = + nir_intrinsic_io_semantics(instr).location + + nir_src_as_uint(instr->src[1]); + uint32_t component = nir_intrinsic_component(instr); + int32_t row_idx = -1; for (int i = 0; i < c->num_inputs; i++) { struct v3d_varying_slot slot = c->input_slots[i]; - if (v3d_slot_get_slot(slot) == nir_intrinsic_io_semantics(instr).location && - v3d_slot_get_component(slot) == nir_intrinsic_component(instr)) { + if (v3d_slot_get_slot(slot) == location && + v3d_slot_get_component(slot) == component) { row_idx = i; break; } @@ -3033,6 +3159,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) * VPM output header. According to docs, we should read this * using ldvpm(v,d)_in (See Table 71). */ + assert(c->s->info.stage == MESA_SHADER_GEOMETRY); ntq_store_dest(c, &instr->dest, 0, vir_LDVPMV_IN(c, vir_uniform_ui(c, 0))); break; @@ -3146,6 +3273,37 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) break; } + case nir_intrinsic_load_subgroup_size: + ntq_store_dest(c, &instr->dest, 0, + vir_uniform_ui(c, V3D_CHANNELS)); + break; + + case nir_intrinsic_load_subgroup_invocation: + ntq_store_dest(c, &instr->dest, 0, vir_EIDX(c)); + break; + + case nir_intrinsic_elect: { + set_a_flags_for_subgroup(c); + struct qreg first = vir_FLAFIRST(c); + + /* Produce a boolean result from Flafirst */ + vir_set_pf(c, vir_XOR_dest(c, vir_nop_reg(), + first, vir_uniform_ui(c, 1)), + V3D_QPU_PF_PUSHZ); + struct qreg result = ntq_emit_cond_to_bool(c, V3D_QPU_COND_IFA); + ntq_store_dest(c, &instr->dest, 0, result); + break; + } + + case nir_intrinsic_load_num_subgroups: + unreachable("Should have been lowered"); + break; + + case nir_intrinsic_load_view_index: + ntq_store_dest(c, &instr->dest, 0, + vir_uniform(c, QUNIFORM_VIEW_INDEX, 0)); + break; + default: fprintf(stderr, "Unknown intrinsic: "); nir_print_instr(&instr->instr, stderr); @@ -3632,9 +3790,15 @@ nir_to_vir(struct v3d_compile *c) c->payload_w_centroid = vir_MOV(c, vir_reg(QFILE_REG, 1)); c->payload_z = vir_MOV(c, vir_reg(QFILE_REG, 2)); - /* V3D 4.x can disable implicit point coordinate varyings if - * they are not used. - */ + /* V3D 4.x can disable implicit varyings if they are not used */ + c->fs_uses_primitive_id = + nir_find_variable_with_location(c->s, nir_var_shader_in, + VARYING_SLOT_PRIMITIVE_ID); + if (c->fs_uses_primitive_id && !c->fs_key->has_gs) { + c->primitive_id = + emit_fragment_varying(c, NULL, -1, 0, 0); + } + if (c->fs_key->is_points && (c->devinfo->ver < 40 || program_reads_point_coord(c))) { c->point_x = emit_fragment_varying(c, NULL, -1, 0, 0); @@ -3668,9 +3832,9 @@ nir_to_vir(struct v3d_compile *c) /* Set up the division between gl_LocalInvocationIndex and * wg_in_mem in the payload reg. */ - int wg_size = (c->s->info.cs.local_size[0] * - c->s->info.cs.local_size[1] * - c->s->info.cs.local_size[2]); + int wg_size = (c->s->info.workgroup_size[0] * + c->s->info.workgroup_size[1] * + c->s->info.workgroup_size[2]); c->local_invocation_index_bits = ffs(util_next_power_of_two(MAX2(wg_size, 64))) - 1; assert(c->local_invocation_index_bits <= 8); @@ -3678,9 +3842,9 @@ nir_to_vir(struct v3d_compile *c) if (c->s->info.shared_size) { struct qreg wg_in_mem = vir_SHR(c, c->cs_payload[1], vir_uniform_ui(c, 16)); - if (c->s->info.cs.local_size[0] != 1 || - c->s->info.cs.local_size[1] != 1 || - c->s->info.cs.local_size[2] != 1) { + if (c->s->info.workgroup_size[0] != 1 || + c->s->info.workgroup_size[1] != 1 || + c->s->info.workgroup_size[2] != 1) { int wg_bits = (16 - c->local_invocation_index_bits); int wg_mask = (1 << wg_bits) - 1; @@ -3731,46 +3895,6 @@ nir_to_vir(struct v3d_compile *c) } } -const nir_shader_compiler_options v3d_nir_options = { - .lower_add_sat = true, - .lower_all_io_to_temps = true, - .lower_extract_byte = true, - .lower_extract_word = true, - .lower_bitfield_insert_to_shifts = true, - .lower_bitfield_extract_to_shifts = true, - .lower_bitfield_reverse = true, - .lower_bit_count = true, - .lower_cs_local_id_from_index = true, - .lower_ffract = true, - .lower_fmod = true, - .lower_pack_unorm_2x16 = true, - .lower_pack_snorm_2x16 = true, - .lower_pack_unorm_4x8 = true, - .lower_pack_snorm_4x8 = true, - .lower_unpack_unorm_4x8 = true, - .lower_unpack_snorm_4x8 = true, - .lower_pack_half_2x16 = true, - .lower_unpack_half_2x16 = true, - .lower_fdiv = true, - .lower_find_lsb = true, - .lower_ffma16 = true, - .lower_ffma32 = true, - .lower_ffma64 = true, - .lower_flrp32 = true, - .lower_fpow = true, - .lower_fsat = true, - .lower_fsqrt = true, - .lower_ifind_msb = true, - .lower_isign = true, - .lower_ldexp = true, - .lower_mul_high = true, - .lower_wpos_pntc = true, - .lower_rotate = true, - .lower_to_scalar = true, - .has_fsub = true, - .has_isub = true, -}; - /** * When demoting a shader down to single-threaded, removes the THRSW * instructions (one will still be inserted at v3d_vir_to_qpu() for the @@ -3789,9 +3913,25 @@ vir_remove_thrsw(struct v3d_compile *c) c->last_thrsw = NULL; } -void -vir_emit_last_thrsw(struct v3d_compile *c) +/** + * This makes sure we have a top-level last thread switch which signals the + * start of the last thread section, which may include adding a new thrsw + * instruction if needed. We don't allow spilling in the last thread section, so + * if we need to do any spills that inject additional thread switches later on, + * we ensure this thread switch will still be the last thread switch in the + * program, which makes last thread switch signalling a lot easier when we have + * spilling. If in the end we don't need to spill to compile the program and we + * injected a new thread switch instruction here only for that, we will + * eventually restore the previous last thread switch and remove the one we + * added here. + */ +static void +vir_emit_last_thrsw(struct v3d_compile *c, + struct qinst **restore_last_thrsw, + bool *restore_scoreboard_lock) { + *restore_last_thrsw = c->last_thrsw; + /* On V3D before 4.1, we need a TMU op to be outstanding when thread * switching, so disable threads if we didn't do any TMU ops (each of * which would have emitted a THRSW). @@ -3800,7 +3940,7 @@ vir_emit_last_thrsw(struct v3d_compile *c) c->threads = 1; if (c->last_thrsw) vir_remove_thrsw(c); - return; + *restore_last_thrsw = NULL; } /* If we're threaded and the last THRSW was in conditional code, then @@ -3823,8 +3963,34 @@ vir_emit_last_thrsw(struct v3d_compile *c) vir_emit_thrsw(c); } + /* If we have not inserted a last thread switch yet, do it now to ensure + * any potential spilling we do happens before this. If we don't spill + * in the end, we will restore the previous one. + */ + if (*restore_last_thrsw == c->last_thrsw) { + if (*restore_last_thrsw) + (*restore_last_thrsw)->is_last_thrsw = false; + *restore_scoreboard_lock = c->lock_scoreboard_on_first_thrsw; + vir_emit_thrsw(c); + } else { + *restore_last_thrsw = c->last_thrsw; + } + + assert(c->last_thrsw); + c->last_thrsw->is_last_thrsw = true; +} + +static void +vir_restore_last_thrsw(struct v3d_compile *c, + struct qinst *thrsw, + bool scoreboard_lock) +{ + assert(c->last_thrsw); + vir_remove_instruction(c, c->last_thrsw); + c->last_thrsw = thrsw; if (c->last_thrsw) c->last_thrsw->is_last_thrsw = true; + c->lock_scoreboard_on_first_thrsw = scoreboard_lock; } /* There's a flag in the shader for "center W is needed for reasons other than @@ -3862,8 +4028,14 @@ v3d_nir_to_vir(struct v3d_compile *c) nir_to_vir(c); + bool restore_scoreboard_lock = false; + struct qinst *restore_last_thrsw; + /* Emit the last THRSW before STVPM and TLB writes. */ - vir_emit_last_thrsw(c); + vir_emit_last_thrsw(c, + &restore_last_thrsw, + &restore_scoreboard_lock); + switch (c->s->info.stage) { case MESA_SHADER_FRAGMENT: @@ -3962,6 +4134,12 @@ v3d_nir_to_vir(struct v3d_compile *c) vir_remove_thrsw(c); } + /* If we didn't spill, then remove the last thread switch we injected + * artificially (if any) and restore the previous one. + */ + if (!c->spills && c->last_thrsw != restore_last_thrsw) + vir_restore_last_thrsw(c, restore_last_thrsw, restore_scoreboard_lock); + if (c->spills && (V3D_DEBUG & (V3D_DEBUG_VIR | v3d_debug_flag_for_shader_stage(c->s->info.stage)))) { diff --git a/lib/mesa/src/broadcom/compiler/qpu_schedule.c b/lib/mesa/src/broadcom/compiler/qpu_schedule.c index 8af2e8ef2..7b9891e86 100644 --- a/lib/mesa/src/broadcom/compiler/qpu_schedule.c +++ b/lib/mesa/src/broadcom/compiler/qpu_schedule.c @@ -492,7 +492,8 @@ struct choose_scoreboard { int last_thrsw_tick; int last_branch_tick; int last_setmsf_tick; - bool tlb_locked; + bool first_thrsw_emitted; + bool last_thrsw_emitted; bool fixup_ldvary; int ldvary_count; }; @@ -576,10 +577,26 @@ writes_too_soon_after_write(const struct v3d_device_info *devinfo, } static bool -pixel_scoreboard_too_soon(struct choose_scoreboard *scoreboard, +scoreboard_is_locked(struct choose_scoreboard *scoreboard, + bool lock_scoreboard_on_first_thrsw) +{ + if (lock_scoreboard_on_first_thrsw) { + return scoreboard->first_thrsw_emitted && + scoreboard->tick - scoreboard->last_thrsw_tick >= 3; + } + + return scoreboard->last_thrsw_emitted && + scoreboard->tick - scoreboard->last_thrsw_tick >= 3; +} + +static bool +pixel_scoreboard_too_soon(struct v3d_compile *c, + struct choose_scoreboard *scoreboard, const struct v3d_qpu_instr *inst) { - return (scoreboard->tick == 0 && qpu_inst_is_tlb(inst)); + return qpu_inst_is_tlb(inst) && + !scoreboard_is_locked(scoreboard, + c->lock_scoreboard_on_first_thrsw); } static bool @@ -868,9 +885,9 @@ qpu_convert_add_to_mul(struct v3d_qpu_instr *inst) inst->flags.mc = inst->flags.ac; inst->flags.mpf = inst->flags.apf; inst->flags.muf = inst->flags.auf; - inst->flags.ac = V3D_QPU_PF_NONE; + inst->flags.ac = V3D_QPU_COND_NONE; inst->flags.apf = V3D_QPU_PF_NONE; - inst->flags.auf = V3D_QPU_PF_NONE; + inst->flags.auf = V3D_QPU_UF_NONE; } static bool @@ -1053,12 +1070,12 @@ retry: if (writes_too_soon_after_write(c->devinfo, scoreboard, n->inst)) continue; - /* "A scoreboard wait must not occur in the first two - * instructions of a fragment shader. This is either the - * explicit Wait for Scoreboard signal or an implicit wait - * with the first tile-buffer read or write instruction." + /* "Before doing a TLB access a scoreboard wait must have been + * done. This happens either on the first or last thread + * switch, depending on a setting (scb_wait_on_first_thrsw) in + * the shader state." */ - if (pixel_scoreboard_too_soon(scoreboard, inst)) + if (pixel_scoreboard_too_soon(c, scoreboard, inst)) continue; /* ldunif and ldvary both write r5, but ldunif does so a tick @@ -1131,12 +1148,10 @@ retry: continue; } - /* Don't merge in something that will lock the TLB. - * Hopwefully what we have in inst will release some - * other instructions, allowing us to delay the - * TLB-locking instruction until later. + /* Don't merge TLB instructions before we have acquired + * the scoreboard lock. */ - if (!scoreboard->tlb_locked && qpu_inst_is_tlb(inst)) + if (pixel_scoreboard_too_soon(c, scoreboard, inst)) continue; /* When we succesfully pair up an ldvary we then try @@ -1273,9 +1288,6 @@ update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard, if (inst->sig.ldvary) scoreboard->last_ldvary_tick = scoreboard->tick; - - if (qpu_inst_is_tlb(inst)) - scoreboard->tlb_locked = true; } static void @@ -1490,6 +1502,11 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c, return false; } + if (v3d_qpu_sig_writes_address(c->devinfo, &inst->sig) && + !inst->sig_magic) { + return false; + } + if (c->devinfo->ver < 40 && inst->alu.add.op == V3D_QPU_A_SETMSF) return false; @@ -1747,6 +1764,8 @@ emit_thrsw(struct v3d_compile *c, merge_inst = inst; } + scoreboard->first_thrsw_emitted = true; + /* If we're emitting the last THRSW (other than program end), then * signal that to the HW by emitting two THRSWs in a row. */ @@ -1758,6 +1777,7 @@ emit_thrsw(struct v3d_compile *c, struct qinst *second_inst = (struct qinst *)merge_inst->link.next; second_inst->qpu.sig.thrsw = true; + scoreboard->last_thrsw_emitted = true; } /* Make sure the thread end executes within the program lifespan */ @@ -1981,6 +2001,17 @@ fixup_pipelined_ldvary(struct v3d_compile *c, if (alu_reads_register(inst, false, ldvary_magic, ldvary_index)) return false; + /* The implicit ldvary destination may not be written to by a signal + * in the instruction following ldvary. Since we are planning to move + * ldvary to the previous instruction, this means we need to check if + * the current instruction has any other signal that could create this + * conflict. The only other signal that can write to the implicit + * ldvary destination that is compatible with ldvary in the same + * instruction is ldunif. + */ + if (inst->sig.ldunif) + return false; + /* The previous instruction can't write to the same destination as the * ldvary. */ diff --git a/lib/mesa/src/broadcom/compiler/v3d_compiler.h b/lib/mesa/src/broadcom/compiler/v3d_compiler.h index f229f414e..f728327f6 100644 --- a/lib/mesa/src/broadcom/compiler/v3d_compiler.h +++ b/lib/mesa/src/broadcom/compiler/v3d_compiler.h @@ -299,6 +299,11 @@ enum quniform_contents { */ QUNIFORM_NUM_WORK_GROUPS, + /* Base workgroup offset passed to vkCmdDispatchBase in the dimension + * selected by the data value. + */ + QUNIFORM_WORK_GROUP_BASE, + /** * Returns the the offset of the scratch buffer for register spilling. */ @@ -320,6 +325,11 @@ enum quniform_contents { * out-of-bounds accesses into the tile state during binning. */ QUNIFORM_FB_LAYERS, + + /** + * Current value of gl_ViewIndex for Multiview rendering. + */ + QUNIFORM_VIEW_INDEX, }; static inline uint32_t v3d_unit_data_create(uint32_t unit, uint32_t value) @@ -416,6 +426,19 @@ struct v3d_fs_key { uint32_t point_sprite_mask; struct pipe_rt_blend_state blend; + + /* If the fragment shader reads gl_PrimitiveID then we have 2 scenarios: + * + * - If there is a geometry shader, then gl_PrimitiveID must be written + * by it and the fragment shader loads it as a regular explicit input + * varying. This is the only valid use case in GLES 3.1. + * + * - If there is not a geometry shader (allowed since GLES 3.2 and + * Vulkan 1.0), then gl_PrimitiveID must be implicitly written by + * hardware and is considered an implicit input varying in the + * fragment shader. + */ + bool has_gs; }; struct v3d_gs_key { @@ -544,10 +567,10 @@ enum v3d_compilation_result { struct v3d_compiler { const struct v3d_device_info *devinfo; struct ra_regs *regs; - unsigned int reg_class_any[3]; - unsigned int reg_class_r5[3]; - unsigned int reg_class_phys[3]; - unsigned int reg_class_phys_or_acc[3]; + struct ra_class *reg_class_any[3]; + struct ra_class *reg_class_r5[3]; + struct ra_class *reg_class_phys[3]; + struct ra_class *reg_class_phys_or_acc[3]; }; /** @@ -631,6 +654,9 @@ struct v3d_compile { bool writes_z; bool uses_implicit_point_line_varyings; + /* True if a fragment shader reads gl_PrimitiveID */ + bool fs_uses_primitive_id; + /* If the fragment shader does anything that requires to force * per-sample MSAA, such as reading gl_SampleID. */ @@ -646,12 +672,14 @@ struct v3d_compile { * TMU spills. */ bool disable_tmu_pipelining; + bool pipelined_any_tmu; /* Disable sorting of UBO loads with constant offset. This may * increase the chances of being able to compile shaders with high * register pressure. */ bool disable_constant_ubo_load_sorting; + bool sorted_any_ubo_loads; /* Emits ldunif for each new uniform, even if the uniform was already * emitted in the same block. Useful to compile shaders with high @@ -660,6 +688,10 @@ struct v3d_compile { */ bool disable_ldunif_opt; + /* Disables loop unrolling to reduce register pressure. */ + bool disable_loop_unrolling; + bool unrolled_any_loops; + /* Minimum number of threads we are willing to use to register allocate * a shader with the current compilation strategy. This only prevents * us from lowering the thread count to register allocate successfully, @@ -668,6 +700,13 @@ struct v3d_compile { */ uint32_t min_threads_for_reg_alloc; + /* Whether TMU spills are allowed. If this is disabled it may cause + * register allocation to fail. We set this to favor other compilation + * strategies that can reduce register pressure and hopefully reduce or + * eliminate TMU spills in the shader. + */ + bool tmu_spilling_allowed; + /* The UBO index and block used with the last unifa load, as well as the * current unifa offset *after* emitting that load. This is used to skip * unifa writes (and their 3 delay slot) when the next UBO load reads @@ -683,7 +722,7 @@ struct v3d_compile { struct qreg execute; bool in_control_flow; - struct qreg line_x, point_x, point_y; + struct qreg line_x, point_x, point_y, primitive_id; /** * Instance ID, which comes in before the vertex attribute payload if @@ -710,6 +749,9 @@ struct v3d_compile { struct qreg cs_shared_offset; int local_invocation_index_bits; + /* If the shader uses subgroup functionality */ + bool has_subgroups; + uint8_t vattr_sizes[V3D_MAX_VS_INPUTS / 4]; uint32_t vpm_output_size; @@ -833,6 +875,8 @@ struct v3d_prog_data { bool single_seg; bool tmu_dirty_rcl; + + bool has_control_barrier; }; struct v3d_vs_prog_data { @@ -895,11 +939,16 @@ struct v3d_gs_prog_data { /* Number of GS invocations */ uint8_t num_invocations; + + bool writes_psiz; }; struct v3d_fs_prog_data { struct v3d_prog_data base; + /* Whether the program reads gl_PrimitiveID */ + bool uses_pid; + struct v3d_varying_slot input_slots[V3D_MAX_FS_INPUTS]; /* Array of flat shade flags. @@ -927,19 +976,38 @@ struct v3d_compute_prog_data { /* Size in bytes of the workgroup's shared space. */ uint32_t shared_size; uint16_t local_size[3]; + /* If the shader uses subgroup functionality */ + bool has_subgroups; +}; + +struct vpm_config { + uint32_t As; + uint32_t Vc; + uint32_t Gs; + uint32_t Gd; + uint32_t Gv; + uint32_t Ve; + uint32_t gs_width; }; +bool +v3d_compute_vpm_config(struct v3d_device_info *devinfo, + struct v3d_vs_prog_data *vs_bin, + struct v3d_vs_prog_data *vs, + struct v3d_gs_prog_data *gs_bin, + struct v3d_gs_prog_data *gs, + struct vpm_config *vpm_cfg_bin, + struct vpm_config *vpm_cfg); + static inline bool vir_has_uniform(struct qinst *inst) { return inst->uniform != ~0; } -extern const nir_shader_compiler_options v3d_nir_options; - const struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo); void v3d_compiler_free(const struct v3d_compiler *compiler); -void v3d_optimize_nir(struct nir_shader *s); +void v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s); uint64_t *v3d_compile(const struct v3d_compiler *compiler, struct v3d_key *key, @@ -981,6 +1049,7 @@ struct v3d_qpu_instr v3d_qpu_nop(void); struct qreg vir_emit_def(struct v3d_compile *c, struct qinst *inst); struct qinst *vir_emit_nondef(struct v3d_compile *c, struct qinst *inst); void vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond); +enum v3d_qpu_cond vir_get_cond(struct qinst *inst); void vir_set_pf(struct v3d_compile *c, struct qinst *inst, enum v3d_qpu_pf pf); void vir_set_uf(struct v3d_compile *c, struct qinst *inst, enum v3d_qpu_uf uf); void vir_set_unpack(struct qinst *inst, int src, @@ -988,7 +1057,6 @@ void vir_set_unpack(struct qinst *inst, int src, void vir_set_pack(struct qinst *inst, enum v3d_qpu_output_pack pack); struct qreg vir_get_temp(struct v3d_compile *c); -void vir_emit_last_thrsw(struct v3d_compile *c); void vir_calculate_live_intervals(struct v3d_compile *c); int vir_get_nsrc(struct qinst *inst); bool vir_has_side_effects(struct v3d_compile *c, struct qinst *inst); @@ -1216,6 +1284,8 @@ VIR_A_ALU1(NEG) VIR_A_ALU1(FLAPUSH) VIR_A_ALU1(FLBPUSH) VIR_A_ALU1(FLPOP) +VIR_A_ALU0(FLAFIRST) +VIR_A_ALU0(FLNAFIRST) VIR_A_ALU1(SETMSF) VIR_A_ALU1(SETREVF) VIR_A_ALU0(TIDX) @@ -1345,30 +1415,6 @@ vir_TLB_COLOR_READ(struct v3d_compile *c) return vir_emit_def(c, ldtlb); } -/* -static inline struct qreg -vir_LOAD_IMM(struct v3d_compile *c, uint32_t val) -{ - return vir_emit_def(c, vir_inst(QOP_LOAD_IMM, c->undef, - vir_reg(QFILE_LOAD_IMM, val), c->undef)); -} - -static inline struct qreg -vir_LOAD_IMM_U2(struct v3d_compile *c, uint32_t val) -{ - return vir_emit_def(c, vir_inst(QOP_LOAD_IMM_U2, c->undef, - vir_reg(QFILE_LOAD_IMM, val), - c->undef)); -} -static inline struct qreg -vir_LOAD_IMM_I2(struct v3d_compile *c, uint32_t val) -{ - return vir_emit_def(c, vir_inst(QOP_LOAD_IMM_I2, c->undef, - vir_reg(QFILE_LOAD_IMM, val), - c->undef)); -} -*/ - static inline struct qinst * vir_BRANCH(struct v3d_compile *c, enum v3d_qpu_branch_cond cond) { diff --git a/lib/mesa/src/broadcom/compiler/v3d_nir_lower_io.c b/lib/mesa/src/broadcom/compiler/v3d_nir_lower_io.c index 655f74fd4..895b1a391 100644 --- a/lib/mesa/src/broadcom/compiler/v3d_nir_lower_io.c +++ b/lib/mesa/src/broadcom/compiler/v3d_nir_lower_io.c @@ -24,6 +24,8 @@ #include "compiler/v3d_compiler.h" #include "compiler/nir/nir_builder.h" +#include "util/u_helpers.h" + /** * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io * intrinsics into something amenable to the V3D architecture. @@ -325,6 +327,59 @@ v3d_nir_lower_vertex_input(struct v3d_compile *c, nir_builder *b, nir_intrinsic_set_component(instr, (comp + 2) % 4); } +/* Sometimes the origin of gl_PointCoord is in the upper left rather than the + * lower left so we need to flip it. + * + * This is needed for Vulkan, Gallium uses lower_wpos_pntc. + */ +static void +v3d_nir_lower_fragment_input(struct v3d_compile *c, nir_builder *b, + nir_intrinsic_instr *intr) +{ + assert(c->s->info.stage == MESA_SHADER_FRAGMENT); + + /* Gallium uses lower_wpos_pntc */ + if (c->key->environment == V3D_ENVIRONMENT_OPENGL) + return; + + b->cursor = nir_after_instr(&intr->instr); + + int comp = nir_intrinsic_component(intr); + + nir_variable *input_var = + nir_find_variable_with_driver_location(c->s, + nir_var_shader_in, + nir_intrinsic_base(intr)); + + if (input_var && util_varying_is_point_coord(input_var->data.location, + c->fs_key->point_sprite_mask)) { + assert(intr->num_components == 1); + + nir_ssa_def *result = &intr->dest.ssa; + + switch (comp) { + case 0: + case 1: + if (!c->fs_key->is_points) + result = nir_imm_float(b, 0.0); + break; + case 2: + result = nir_imm_float(b, 0.0); + break; + case 3: + result = nir_imm_float(b, 1.0); + break; + } + if (c->fs_key->point_coord_upper_left && comp == 1) + result = nir_fsub(b, nir_imm_float(b, 1.0), result); + if (result != &intr->dest.ssa) { + nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, + result, + result->parent_instr); + } + } +} + static void v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b, struct nir_instr *instr, @@ -338,6 +393,8 @@ v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b, case nir_intrinsic_load_input: if (c->s->info.stage == MESA_SHADER_VERTEX) v3d_nir_lower_vertex_input(c, b, intr); + else if (c->s->info.stage == MESA_SHADER_FRAGMENT) + v3d_nir_lower_fragment_input(c, b, intr); break; case nir_intrinsic_load_uniform: diff --git a/lib/mesa/src/broadcom/compiler/v3d_nir_lower_logic_ops.c b/lib/mesa/src/broadcom/compiler/v3d_nir_lower_logic_ops.c index 2cd613b26..11782c734 100644 --- a/lib/mesa/src/broadcom/compiler/v3d_nir_lower_logic_ops.c +++ b/lib/mesa/src/broadcom/compiler/v3d_nir_lower_logic_ops.c @@ -202,12 +202,23 @@ v3d_get_format_swizzle_for_rt(struct v3d_compile *c, int rt) } static nir_ssa_def * -v3d_nir_get_tlb_color(nir_builder *b, int rt, int sample) +v3d_nir_get_tlb_color(nir_builder *b, struct v3d_compile *c, int rt, int sample) { - nir_ssa_def *color[4]; - for (int i = 0; i < 4; i++) - color[i] = nir_load_tlb_color_v3d(b, 1, 32, nir_imm_int(b, rt), .base = sample, .component = i); + uint32_t num_components = + util_format_get_nr_components(c->fs_key->color_fmt[rt].format); + nir_ssa_def *color[4]; + for (int i = 0; i < 4; i++) { + if (i < num_components) { + color[i] = + nir_load_tlb_color_v3d(b, 1, 32, nir_imm_int(b, rt), + .base = sample, + .component = i); + } else { + /* These will be DCEd */ + color[i] = nir_imm_int(b, 0); + } + } return nir_vec4(b, color[0], color[1], color[2], color[3]); } @@ -224,6 +235,22 @@ v3d_emit_logic_op_raw(struct v3d_compile *c, nir_builder *b, nir_ssa_def *dst = v3d_nir_get_swizzled_channel(b, dst_chans, fmt_swz[i]); op_res[i] = v3d_logicop(b, c->fs_key->logicop_func, src, dst); + + /* In Vulkan we configure our integer RTs to clamp, so we need + * to ignore result bits that don't fit in the destination RT + * component size. + */ + if (c->key->environment == V3D_ENVIRONMENT_VULKAN) { + uint32_t bits = + util_format_get_component_bits( + c->fs_key->color_fmt[rt].format, + UTIL_FORMAT_COLORSPACE_RGB, i); + if (bits > 0 && bits < 32) { + nir_ssa_def *mask = + nir_imm_int(b, (1u << bits) - 1); + op_res[i] = nir_iand(b, op_res[i], mask); + } + } } nir_ssa_def *r[4]; @@ -257,7 +284,7 @@ static nir_ssa_def * v3d_nir_emit_logic_op(struct v3d_compile *c, nir_builder *b, nir_ssa_def *src, int rt, int sample) { - nir_ssa_def *dst = v3d_nir_get_tlb_color(b, rt, sample); + nir_ssa_def *dst = v3d_nir_get_tlb_color(b, c, rt, sample); nir_ssa_def *src_chans[4], *dst_chans[4]; for (unsigned i = 0; i < 4; i++) { diff --git a/lib/mesa/src/broadcom/compiler/v3d_nir_lower_robust_buffer_access.c b/lib/mesa/src/broadcom/compiler/v3d_nir_lower_robust_buffer_access.c index e6a226b03..40f1cc23b 100644 --- a/lib/mesa/src/broadcom/compiler/v3d_nir_lower_robust_buffer_access.c +++ b/lib/mesa/src/broadcom/compiler/v3d_nir_lower_robust_buffer_access.c @@ -56,7 +56,7 @@ lower_load(struct v3d_compile *c, nir_builder *b, nir_intrinsic_instr *instr) { - uint32_t index = nir_src_as_uint(instr->src[0]); + uint32_t index = nir_src_comp_as_uint(instr->src[0], 0); nir_intrinsic_op op; if (instr->intrinsic == nir_intrinsic_load_ubo) { @@ -75,7 +75,7 @@ lower_store(struct v3d_compile *c, nir_builder *b, nir_intrinsic_instr *instr) { - uint32_t index = nir_src_as_uint(instr->src[1]); + uint32_t index = nir_src_comp_as_uint(instr->src[1], 0); rewrite_offset(b, instr, index, 2, nir_intrinsic_get_ssbo_size); } @@ -84,7 +84,7 @@ lower_atomic(struct v3d_compile *c, nir_builder *b, nir_intrinsic_instr *instr) { - uint32_t index = nir_src_as_uint(instr->src[0]); + uint32_t index = nir_src_comp_as_uint(instr->src[0], 0); rewrite_offset(b, instr, index, 1, nir_intrinsic_get_ssbo_size); } diff --git a/lib/mesa/src/broadcom/compiler/vir.c b/lib/mesa/src/broadcom/compiler/vir.c index 335a5a8e3..bf75a4da1 100644 --- a/lib/mesa/src/broadcom/compiler/vir.c +++ b/lib/mesa/src/broadcom/compiler/vir.c @@ -25,6 +25,7 @@ #include "v3d_compiler.h" #include "util/u_prim.h" #include "compiler/nir/nir_schedule.h" +#include "compiler/nir/nir_builder.h" int vir_get_nsrc(struct qinst *inst) @@ -242,6 +243,19 @@ vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond) } } +enum v3d_qpu_cond +vir_get_cond(struct qinst *inst) +{ + assert(inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU); + + if (vir_is_add(inst)) + return inst->qpu.flags.ac; + else if (vir_is_mul(inst)) + return inst->qpu.flags.mc; + else /* NOP */ + return V3D_QPU_COND_NONE; +} + void vir_set_pf(struct v3d_compile *c, struct qinst *inst, enum v3d_qpu_pf pf) { @@ -525,7 +539,10 @@ vir_compile_init(const struct v3d_compiler *compiler, void *debug_output_data), void *debug_output_data, int program_id, int variant_id, + uint32_t max_threads, uint32_t min_threads_for_reg_alloc, + bool tmu_spilling_allowed, + bool disable_loop_unrolling, bool disable_constant_ubo_load_sorting, bool disable_tmu_pipelining, bool fallback_scheduler) @@ -537,14 +554,17 @@ vir_compile_init(const struct v3d_compiler *compiler, c->key = key; c->program_id = program_id; c->variant_id = variant_id; - c->threads = 4; + c->threads = max_threads; c->debug_output = debug_output; c->debug_output_data = debug_output_data; c->compilation_result = V3D_COMPILATION_SUCCEEDED; c->min_threads_for_reg_alloc = min_threads_for_reg_alloc; + c->tmu_spilling_allowed = tmu_spilling_allowed; c->fallback_scheduler = fallback_scheduler; c->disable_tmu_pipelining = disable_tmu_pipelining; c->disable_constant_ubo_load_sorting = disable_constant_ubo_load_sorting; + c->disable_loop_unrolling = V3D_DEBUG & V3D_DEBUG_NO_LOOP_UNROLL + ? true : disable_loop_unrolling; s = nir_shader_clone(c, s); c->s = s; @@ -754,6 +774,9 @@ v3d_gs_set_prog_data(struct v3d_compile *c, prog_data->out_prim_type = c->s->info.gs.output_primitive; prog_data->num_invocations = c->s->info.gs.invocations; + + prog_data->writes_psiz = + c->s->info.outputs_written & (1 << VARYING_SLOT_PSIZ); } static void @@ -791,6 +814,7 @@ v3d_fs_set_prog_data(struct v3d_compile *c, prog_data->lock_scoreboard_on_first_thrsw = c->lock_scoreboard_on_first_thrsw; prog_data->force_per_sample_msaa = c->force_per_sample_msaa; + prog_data->uses_pid = c->fs_uses_primitive_id; } static void @@ -799,9 +823,11 @@ v3d_cs_set_prog_data(struct v3d_compile *c, { prog_data->shared_size = c->s->info.shared_size; - prog_data->local_size[0] = c->s->info.cs.local_size[0]; - prog_data->local_size[1] = c->s->info.cs.local_size[1]; - prog_data->local_size[2] = c->s->info.cs.local_size[2]; + prog_data->local_size[0] = c->s->info.workgroup_size[0]; + prog_data->local_size[1] = c->s->info.workgroup_size[1]; + prog_data->local_size[2] = c->s->info.workgroup_size[2]; + + prog_data->has_subgroups = c->has_subgroups; } static void @@ -812,6 +838,7 @@ v3d_set_prog_data(struct v3d_compile *c, prog_data->single_seg = !c->last_thrsw; prog_data->spill_size = c->spill_size; prog_data->tmu_dirty_rcl = c->tmu_dirty_rcl; + prog_data->has_control_barrier = c->s->info.uses_control_barrier; v3d_set_prog_data_uniforms(c, prog_data); @@ -866,7 +893,7 @@ v3d_nir_lower_vs_early(struct v3d_compile *c) NIR_PASS_V(c->s, nir_remove_unused_io_vars, nir_var_shader_out, used_outputs, NULL); /* demotes to globals */ NIR_PASS_V(c->s, nir_lower_global_vars_to_local); - v3d_optimize_nir(c->s); + v3d_optimize_nir(c, c->s); NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in, NULL); /* This must go before nir_lower_io */ @@ -900,7 +927,7 @@ v3d_nir_lower_gs_early(struct v3d_compile *c) NIR_PASS_V(c->s, nir_remove_unused_io_vars, nir_var_shader_out, used_outputs, NULL); /* demotes to globals */ NIR_PASS_V(c->s, nir_lower_global_vars_to_local); - v3d_optimize_nir(c->s); + v3d_optimize_nir(c, c->s); NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in, NULL); /* This must go before nir_lower_io */ @@ -958,14 +985,6 @@ v3d_nir_lower_fs_early(struct v3d_compile *c) /* The lowering pass can introduce new sysval reads */ nir_shader_gather_info(c->s, nir_shader_get_entrypoint(c->s)); } - - /* If the shader has no non-TLB side effects, we can promote it to - * enabling early_fragment_tests even if the user didn't. - */ - if (!(c->s->info.num_images || - c->s->info.num_ssbos)) { - c->s->info.fs.early_fragment_tests = true; - } } static void @@ -1330,11 +1349,10 @@ v3d_nir_sort_constant_ubo_loads_block(struct v3d_compile *c, static bool v3d_nir_sort_constant_ubo_loads(nir_shader *s, struct v3d_compile *c) { - bool progress = false; nir_foreach_function(function, s) { if (function->impl) { nir_foreach_block(block, function->impl) { - progress |= + c->sorted_any_ubo_loads |= v3d_nir_sort_constant_ubo_loads_block(c, block); } nir_metadata_preserve(function->impl, @@ -1342,6 +1360,77 @@ v3d_nir_sort_constant_ubo_loads(nir_shader *s, struct v3d_compile *c) nir_metadata_dominance); } } + return c->sorted_any_ubo_loads; +} + +static void +lower_load_num_subgroups(struct v3d_compile *c, + nir_builder *b, + nir_intrinsic_instr *intr) +{ + assert(c->s->info.stage == MESA_SHADER_COMPUTE); + assert(intr->intrinsic == nir_intrinsic_load_num_subgroups); + + b->cursor = nir_after_instr(&intr->instr); + uint32_t num_subgroups = + DIV_ROUND_UP(c->s->info.workgroup_size[0] * + c->s->info.workgroup_size[1] * + c->s->info.workgroup_size[2], V3D_CHANNELS); + nir_ssa_def *result = nir_imm_int(b, num_subgroups); + nir_ssa_def_rewrite_uses(&intr->dest.ssa, result); + nir_instr_remove(&intr->instr); +} + +static bool +lower_subgroup_intrinsics(struct v3d_compile *c, + nir_block *block, nir_builder *b) +{ + bool progress = false; + nir_foreach_instr_safe(inst, block) { + if (inst->type != nir_instr_type_intrinsic) + continue;; + + nir_intrinsic_instr *intr = + nir_instr_as_intrinsic(inst); + if (!intr) + continue; + + switch (intr->intrinsic) { + case nir_intrinsic_load_num_subgroups: + lower_load_num_subgroups(c, b, intr); + progress = true; + FALLTHROUGH; + case nir_intrinsic_load_subgroup_id: + case nir_intrinsic_load_subgroup_size: + case nir_intrinsic_load_subgroup_invocation: + case nir_intrinsic_elect: + c->has_subgroups = true; + break; + default: + break; + } + } + + return progress; +} + +static bool +v3d_nir_lower_subgroup_intrinsics(nir_shader *s, struct v3d_compile *c) +{ + bool progress = false; + nir_foreach_function(function, s) { + if (function->impl) { + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block(block, function->impl) + progress |= lower_subgroup_intrinsics(c, block, &b); + + nir_metadata_preserve(function->impl, + nir_metadata_block_index | + nir_metadata_dominance); + } + } return progress; } @@ -1405,18 +1494,21 @@ v3d_attempt_compile(struct v3d_compile *c) if (c->key->robust_buffer_access) { /* v3d_nir_lower_robust_buffer_access assumes constant buffer - * indices on ubo/ssbo intrinsics so run a copy propagation pass - * before we run the lowering to warrant this. We also want to run - * the lowering before v3d_optimize to clean-up redundant - * get_buffer_size calls produced in the pass. + * indices on ubo/ssbo intrinsics so run copy propagation and + * constant folding passes before we run the lowering to warrant + * this. We also want to run the lowering before v3d_optimize to + * clean-up redundant get_buffer_size calls produced in the pass. */ NIR_PASS_V(c->s, nir_copy_prop); + NIR_PASS_V(c->s, nir_opt_constant_folding); NIR_PASS_V(c->s, v3d_nir_lower_robust_buffer_access, c); } NIR_PASS_V(c->s, nir_lower_wrmasks, should_split_wrmask, c->s); - v3d_optimize_nir(c->s); + NIR_PASS_V(c->s, v3d_nir_lower_subgroup_intrinsics, c); + + v3d_optimize_nir(c, c->s); /* Do late algebraic optimization to turn add(a, neg(b)) back into * subs, then the mandatory cleanup after algebraic. Note that it may @@ -1505,6 +1597,83 @@ int v3d_shaderdb_dump(struct v3d_compile *c, c->nop_count); } +/* This is a list of incremental changes to the compilation strategy + * that will be used to try to compile the shader successfully. The + * default strategy is to enable all optimizations which will have + * the highest register pressure but is expected to produce most + * optimal code. Following strategies incrementally disable specific + * optimizations that are known to contribute to register pressure + * in order to be able to compile the shader successfully while meeting + * thread count requirements. + * + * V3D 4.1+ has a min thread count of 2, but we can use 1 here to also + * cover previous hardware as well (meaning that we are not limiting + * register allocation to any particular thread count). This is fine + * because v3d_nir_to_vir will cap this to the actual minimum. + */ +struct v3d_compiler_strategy { + const char *name; + uint32_t max_threads; + uint32_t min_threads; + bool disable_loop_unrolling; + bool disable_ubo_load_sorting; + bool disable_tmu_pipelining; + bool tmu_spilling_allowed; +} static const strategies[] = { + /*0*/ { "default", 4, 4, false, false, false, false }, + /*1*/ { "disable loop unrolling", 4, 4, true, false, false, false }, + /*2*/ { "disable UBO load sorting", 4, 4, true, true, false, false }, + /*3*/ { "disable TMU pipelining", 4, 4, true, true, true, false }, + /*4*/ { "lower thread count", 2, 1, false, false, false, false }, + /*5*/ { "disable loop unrolling (ltc)", 2, 1, true, false, false, false }, + /*6*/ { "disable UBO load sorting (ltc)", 2, 1, true, true, false, false }, + /*7*/ { "disable TMU pipelining (ltc)", 2, 1, true, true, true, true }, + /*8*/ { "fallback scheduler", 2, 1, true, true, true, true } +}; + +/** + * If a particular optimization didn't make any progress during a compile + * attempt disabling it alone won't allow us to compile the shader successfuly, + * since we'll end up with the same code. Detect these scenarios so we can + * avoid wasting time with useless compiles. We should also consider if the + * strategy changes other aspects of the compilation process though, like + * spilling, and not skip it in that case. + */ +static bool +skip_compile_strategy(struct v3d_compile *c, uint32_t idx) +{ + /* We decide if we can skip a strategy based on the optimizations that + * were active in the previous strategy, so we should only be calling this + * for strategies after the first. + */ + assert(idx > 0); + + /* Don't skip a strategy that changes spilling behavior */ + if (strategies[idx].tmu_spilling_allowed != + strategies[idx - 1].tmu_spilling_allowed) { + return false; + } + + switch (idx) { + /* Loop unrolling: skip if we didn't unroll any loops */ + case 1: + case 5: + return !c->unrolled_any_loops; + /* UBO load sorting: skip if we didn't sort any loads */ + case 2: + case 6: + return !c->sorted_any_ubo_loads; + /* TMU pipelining: skip if we didn't pipeline any TMU ops */ + case 3: + case 7: + return !c->pipelined_any_tmu; + /* Lower thread count: skip if we already tried less that 4 threads */ + case 4: + return c->threads < 4; + default: + return false; + }; +} uint64_t *v3d_compile(const struct v3d_compiler *compiler, struct v3d_key *key, struct v3d_prog_data **out_prog_data, @@ -1515,40 +1684,41 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler, int program_id, int variant_id, uint32_t *final_assembly_size) { - struct v3d_compile *c; + struct v3d_compile *c = NULL; + for (int i = 0; i < ARRAY_SIZE(strategies); i++) { + /* Fallback strategy */ + if (i > 0) { + assert(c); + if (skip_compile_strategy(c, i)) + continue; - /* This is a list of incremental changes to the compilation strategy - * that will be used to try to compile the shader successfully. The - * default strategy is to enable all optimizations which will have - * the highest register pressure but is expected to produce most - * optimal code. Following strategies incrementally disable specific - * optimizations that are known to contribute to register pressure - * in order to be able to compile the shader successfully while meeting - * thread count requirements. - * - * V3D 4.1+ has a min thread count of 2, but we can use 1 here to also - * cover previous hardware as well (meaning that we are not limiting - * register allocation to any particular thread count). This is fine - * because v3d_nir_to_vir will cap this to the actual minimum. - */ - struct v3d_compiler_strategy { - const char *name; - uint32_t min_threads_for_reg_alloc; - } static const strategies[] = { - { "default", 4 }, - { "disable UBO load sorting", 1 }, - { "disable TMU pipelining", 1 }, - { "fallback scheduler", 1 } - }; + char *debug_msg; + int ret = asprintf(&debug_msg, + "Falling back to strategy '%s' for %s", + strategies[i].name, + vir_get_stage_name(c)); + + if (ret >= 0) { + if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF)) + fprintf(stderr, "%s\n", debug_msg); + + c->debug_output(debug_msg, c->debug_output_data); + free(debug_msg); + } + + vir_compile_destroy(c); + } - for (int i = 0; i < ARRAY_SIZE(strategies); i++) { c = vir_compile_init(compiler, key, s, debug_output, debug_output_data, program_id, variant_id, - strategies[i].min_threads_for_reg_alloc, - i > 0, /* Disable UBO load sorting */ - i > 1, /* Disable TMU pipelining */ - i > 2 /* Fallback_scheduler */); + strategies[i].max_threads, + strategies[i].min_threads, + strategies[i].tmu_spilling_allowed, + strategies[i].disable_loop_unrolling, + strategies[i].disable_ubo_load_sorting, + strategies[i].disable_tmu_pipelining, + i == ARRAY_SIZE(strategies) - 1); v3d_attempt_compile(c); @@ -1557,23 +1727,6 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler, V3D_COMPILATION_FAILED_REGISTER_ALLOCATION) { break; } - - /* Fallback strategy */ - char *debug_msg; - int ret = asprintf(&debug_msg, - "Falling back to strategy '%s' for %s", - strategies[i + 1].name, - vir_get_stage_name(c)); - - if (ret >= 0) { - if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF)) - fprintf(stderr, "%s\n", debug_msg); - - c->debug_output(debug_msg, c->debug_output_data); - free(debug_msg); - } - - vir_compile_destroy(c); } if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF) && @@ -1717,6 +1870,24 @@ try_opt_ldunif(struct v3d_compile *c, uint32_t index, struct qreg *unif) { uint32_t count = 20; struct qinst *prev_inst = NULL; + assert(c->cur_block); + +#ifdef DEBUG + /* We can only reuse a uniform if it was emitted in the same block, + * so callers must make sure the current instruction is being emitted + * in the current block. + */ + bool found = false; + vir_for_each_inst(inst, c->cur_block) { + if (&inst->link == c->cursor.link) { + found = true; + break; + } + } + + assert(found || &c->cur_block->instructions == c->cursor.link); +#endif + list_for_each_entry_from_rev(struct qinst, inst, c->cursor.link->prev, &c->cur_block->instructions, link) { if ((inst->qpu.sig.ldunif || inst->qpu.sig.ldunifrf) && @@ -1817,3 +1988,174 @@ vir_get_stage_name(struct v3d_compile *c) else return gl_shader_stage_name(c->s->info.stage); } + +static inline uint32_t +compute_vpm_size_in_sectors(const struct v3d_device_info *devinfo) +{ + assert(devinfo->vpm_size > 0); + const uint32_t sector_size = V3D_CHANNELS * sizeof(uint32_t) * 8; + return devinfo->vpm_size / sector_size; +} + +/* Computes various parameters affecting VPM memory configuration for programs + * involving geometry shaders to ensure the program fits in memory and honors + * requirements described in section "VPM usage" of the programming manual. + */ +static bool +compute_vpm_config_gs(struct v3d_device_info *devinfo, + struct v3d_vs_prog_data *vs, + struct v3d_gs_prog_data *gs, + struct vpm_config *vpm_cfg_out) +{ + const uint32_t A = vs->separate_segments ? 1 : 0; + const uint32_t Ad = vs->vpm_input_size; + const uint32_t Vd = vs->vpm_output_size; + + const uint32_t vpm_size = compute_vpm_size_in_sectors(devinfo); + + /* Try to fit program into our VPM memory budget by adjusting + * configurable parameters iteratively. We do this in two phases: + * the first phase tries to fit the program into the total available + * VPM memory. If we succeed at that, then the second phase attempts + * to fit the program into half of that budget so we can run bin and + * render programs in parallel. + */ + struct vpm_config vpm_cfg[2]; + struct vpm_config *final_vpm_cfg = NULL; + uint32_t phase = 0; + + vpm_cfg[phase].As = 1; + vpm_cfg[phase].Gs = 1; + vpm_cfg[phase].Gd = gs->vpm_output_size; + vpm_cfg[phase].gs_width = gs->simd_width; + + /* While there is a requirement that Vc >= [Vn / 16], this is + * always the case when tessellation is not present because in that + * case Vn can only be 6 at most (when input primitive is triangles + * with adjacency). + * + * We always choose Vc=2. We can't go lower than this due to GFXH-1744, + * and Broadcom has not found it worth it to increase it beyond this + * in general. Increasing Vc also increases VPM memory pressure which + * can turn up being detrimental for performance in some scenarios. + */ + vpm_cfg[phase].Vc = 2; + + /* Gv is a constraint on the hardware to not exceed the + * specified number of vertex segments per GS batch. If adding a + * new primitive to a GS batch would result in a range of more + * than Gv vertex segments being referenced by the batch, then + * the hardware will flush the batch and start a new one. This + * means that we can choose any value we want, we just need to + * be aware that larger values improve GS batch utilization + * at the expense of more VPM memory pressure (which can affect + * other performance aspects, such as GS dispatch width). + * We start with the largest value, and will reduce it if we + * find that total memory pressure is too high. + */ + vpm_cfg[phase].Gv = 3; + do { + /* When GS is present in absence of TES, then we need to satisfy + * that Ve >= Gv. We go with the smallest value of Ve to avoid + * increasing memory pressure. + */ + vpm_cfg[phase].Ve = vpm_cfg[phase].Gv; + + uint32_t vpm_sectors = + A * vpm_cfg[phase].As * Ad + + (vpm_cfg[phase].Vc + vpm_cfg[phase].Ve) * Vd + + vpm_cfg[phase].Gs * vpm_cfg[phase].Gd; + + /* Ideally we want to use no more than half of the available + * memory so we can execute a bin and render program in parallel + * without stalls. If we achieved that then we are done. + */ + if (vpm_sectors <= vpm_size / 2) { + final_vpm_cfg = &vpm_cfg[phase]; + break; + } + + /* At the very least, we should not allocate more than the + * total available VPM memory. If we have a configuration that + * succeeds at this we save it and continue to see if we can + * meet the half-memory-use criteria too. + */ + if (phase == 0 && vpm_sectors <= vpm_size) { + vpm_cfg[1] = vpm_cfg[0]; + phase = 1; + } + + /* Try lowering Gv */ + if (vpm_cfg[phase].Gv > 0) { + vpm_cfg[phase].Gv--; + continue; + } + + /* Try lowering GS dispatch width */ + if (vpm_cfg[phase].gs_width > 1) { + do { + vpm_cfg[phase].gs_width >>= 1; + vpm_cfg[phase].Gd = align(vpm_cfg[phase].Gd, 2) / 2; + } while (vpm_cfg[phase].gs_width == 2); + + /* Reset Gv to max after dropping dispatch width */ + vpm_cfg[phase].Gv = 3; + continue; + } + + /* We ran out of options to reduce memory pressure. If we + * are at phase 1 we have at least a valid configuration, so we + * we use that. + */ + if (phase == 1) + final_vpm_cfg = &vpm_cfg[0]; + break; + } while (true); + + if (!final_vpm_cfg) + return false; + + assert(final_vpm_cfg); + assert(final_vpm_cfg->Gd <= 16); + assert(final_vpm_cfg->Gv < 4); + assert(final_vpm_cfg->Ve < 4); + assert(final_vpm_cfg->Vc >= 2 && final_vpm_cfg->Vc <= 4); + assert(final_vpm_cfg->gs_width == 1 || + final_vpm_cfg->gs_width == 4 || + final_vpm_cfg->gs_width == 8 || + final_vpm_cfg->gs_width == 16); + + *vpm_cfg_out = *final_vpm_cfg; + return true; +} + +bool +v3d_compute_vpm_config(struct v3d_device_info *devinfo, + struct v3d_vs_prog_data *vs_bin, + struct v3d_vs_prog_data *vs, + struct v3d_gs_prog_data *gs_bin, + struct v3d_gs_prog_data *gs, + struct vpm_config *vpm_cfg_bin, + struct vpm_config *vpm_cfg) +{ + assert(vs && vs_bin); + assert((gs != NULL) == (gs_bin != NULL)); + + if (!gs) { + vpm_cfg_bin->As = 1; + vpm_cfg_bin->Ve = 0; + vpm_cfg_bin->Vc = vs_bin->vcm_cache_size; + + vpm_cfg->As = 1; + vpm_cfg->Ve = 0; + vpm_cfg->Vc = vs->vcm_cache_size; + } else { + if (!compute_vpm_config_gs(devinfo, vs_bin, gs_bin, vpm_cfg_bin)) + return false; + + if (!compute_vpm_config_gs(devinfo, vs, gs, vpm_cfg)) + return false; + } + + return true; +} diff --git a/lib/mesa/src/broadcom/compiler/vir_live_variables.c b/lib/mesa/src/broadcom/compiler/vir_live_variables.c index 48d0201dc..2fd6430a0 100644 --- a/lib/mesa/src/broadcom/compiler/vir_live_variables.c +++ b/lib/mesa/src/broadcom/compiler/vir_live_variables.c @@ -28,9 +28,12 @@ #include "util/register_allocate.h" #include "v3d_compiler.h" +/* Keeps track of conditional / partial writes in a block */ struct partial_update_state { - struct qinst *insts[4]; - uint8_t channels; + /* Instruction doing a conditional or partial write */ + struct qinst *inst; + /* Instruction that set the flags for the conditional write */ + struct qinst *flags_inst; }; static int @@ -44,7 +47,8 @@ vir_reg_to_var(struct qreg reg) static void vir_setup_use(struct v3d_compile *c, struct qblock *block, int ip, - struct qreg src) + struct partial_update_state *partial_update_ht, struct qinst *inst, + struct qreg src, struct qinst *flags_inst) { int var = vir_reg_to_var(src); if (var == -1) @@ -57,39 +61,39 @@ vir_setup_use(struct v3d_compile *c, struct qblock *block, int ip, * use of a variable without having completely * defined that variable within the block. */ - if (!BITSET_TEST(block->def, var)) - BITSET_SET(block->use, var); -} - -static struct partial_update_state * -get_partial_update_state(struct hash_table *partial_update_ht, - struct qinst *inst) -{ - struct hash_entry *entry = - _mesa_hash_table_search(partial_update_ht, - &inst->dst.index); - if (entry) - return entry->data; - - struct partial_update_state *state = - rzalloc(partial_update_ht, struct partial_update_state); - - _mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state); + if (!BITSET_TEST(block->def, var)) { + /* If this use of var is conditional and the condition + * and flags match those of a previous instruction + * in the same block partially defining var then we + * consider var completely defined within the block. + */ + if (BITSET_TEST(block->defout, var)) { + struct partial_update_state *state = + &partial_update_ht[var]; + if (state->inst) { + if (vir_get_cond(inst) == vir_get_cond(state->inst) && + flags_inst == state->flags_inst) { + return; + } + } + } - return state; + BITSET_SET(block->use, var); + } } +/* The def[] bitset marks when an initialization in a + * block completely screens off previous updates of + * that variable. + */ static void vir_setup_def(struct v3d_compile *c, struct qblock *block, int ip, - struct hash_table *partial_update_ht, struct qinst *inst) + struct partial_update_state *partial_update, struct qinst *inst, + struct qinst *flags_inst) { if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU) return; - /* The def[] bitset marks when an initialization in a - * block completely screens off previous updates of - * that variable. - */ int var = vir_reg_to_var(inst->dst); if (var == -1) return; @@ -115,62 +119,22 @@ vir_setup_def(struct v3d_compile *c, struct qblock *block, int ip, return; } - /* Finally, look at the condition code and packing and mark it as a - * def. We need to make sure that we understand sequences - * instructions like: - * - * mov.zs t0, t1 - * mov.zc t0, t2 + /* Keep track of conditional writes. * - * or: + * Notice that the dst's live range for a conditional or partial writes + * will get extended up the control flow to the top of the program until + * we find a full write, making register allocation more difficult, so + * we should try our best to keep track of these and figure out if a + * combination of them actually writes the entire register so we can + * stop that process early and reduce liveness. * - * mmov t0.8a, t1 - * mmov t0.8b, t2 - * mmov t0.8c, t3 - * mmov t0.8d, t4 - * - * as defining the temp within the block, because otherwise dst's live - * range will get extended up the control flow to the top of the - * program. + * FIXME: Track partial updates via pack/unpack. */ - struct partial_update_state *state = - get_partial_update_state(partial_update_ht, inst); - uint8_t mask = 0xf; /* XXX vir_channels_written(inst); */ - - if (inst->qpu.flags.ac == V3D_QPU_COND_NONE && - inst->qpu.flags.mc == V3D_QPU_COND_NONE) { - state->channels |= mask; - } else { - for (int i = 0; i < 4; i++) { - if (!(mask & (1 << i))) - continue; - - /* XXXif (state->insts[i] && - state->insts[i]->cond == - qpu_cond_complement(inst->cond)) - state->channels |= 1 << i; - else - */ - state->insts[i] = inst; - } - } - - if (state->channels == 0xf) - BITSET_SET(block->def, var); -} - -static void -sf_state_clear(struct hash_table *partial_update_ht) -{ - hash_table_foreach(partial_update_ht, entry) { - struct partial_update_state *state = entry->data; - - for (int i = 0; i < 4; i++) { - if (state->insts[i] && - (state->insts[i]->qpu.flags.ac != V3D_QPU_COND_NONE || - state->insts[i]->qpu.flags.mc != V3D_QPU_COND_NONE)) - state->insts[i] = NULL; - } + struct partial_update_state *state = &partial_update[var]; + if (inst->qpu.flags.ac != V3D_QPU_COND_NONE || + inst->qpu.flags.mc != V3D_QPU_COND_NONE) { + state->inst = inst; + state->flags_inst = flags_inst; } } @@ -184,23 +148,36 @@ sf_state_clear(struct hash_table *partial_update_ht) static void vir_setup_def_use(struct v3d_compile *c) { - struct hash_table *partial_update_ht = - _mesa_hash_table_create(c, _mesa_hash_int, _mesa_key_int_equal); + struct partial_update_state *partial_update = + rzalloc_array(c, struct partial_update_state, c->num_temps); int ip = 0; vir_for_each_block(block, c) { block->start_ip = ip; - _mesa_hash_table_clear(partial_update_ht, NULL); + memset(partial_update, 0, + sizeof(struct partial_update_state) * c->num_temps); + + struct qinst *flags_inst = NULL; vir_for_each_inst(inst, block) { - for (int i = 0; i < vir_get_nsrc(inst); i++) - vir_setup_use(c, block, ip, inst->src[i]); + for (int i = 0; i < vir_get_nsrc(inst); i++) { + vir_setup_use(c, block, ip, partial_update, + inst, inst->src[i], flags_inst); + } - vir_setup_def(c, block, ip, partial_update_ht, inst); + vir_setup_def(c, block, ip, partial_update, + inst, flags_inst); - if (false /* XXX inst->uf */) - sf_state_clear(partial_update_ht); + if (inst->qpu.flags.apf != V3D_QPU_PF_NONE || + inst->qpu.flags.mpf != V3D_QPU_PF_NONE) { + flags_inst = inst; + } + + if (inst->qpu.flags.auf != V3D_QPU_UF_NONE || + inst->qpu.flags.muf != V3D_QPU_UF_NONE) { + flags_inst = NULL; + } /* Payload registers: r0/1/2 contain W, centroid W, * and Z at program start. Register allocation will @@ -221,7 +198,7 @@ vir_setup_def_use(struct v3d_compile *c) block->end_ip = ip; } - _mesa_hash_table_destroy(partial_update_ht, NULL); + ralloc_free(partial_update); } static bool diff --git a/lib/mesa/src/broadcom/compiler/vir_opt_dead_code.c b/lib/mesa/src/broadcom/compiler/vir_opt_dead_code.c index 55469402e..64c762c88 100644 --- a/lib/mesa/src/broadcom/compiler/vir_opt_dead_code.c +++ b/lib/mesa/src/broadcom/compiler/vir_opt_dead_code.c @@ -149,25 +149,30 @@ check_first_ldunifa(struct v3d_compile *c, } static bool -increment_unifa_address(struct v3d_compile *c, struct qinst *unifa) +increment_unifa_address(struct v3d_compile *c, struct qblock *block, struct qinst *unifa) { + struct qblock *current_block = c->cur_block; if (unifa->qpu.type == V3D_QPU_INSTR_TYPE_ALU && unifa->qpu.alu.mul.op == V3D_QPU_M_MOV) { c->cursor = vir_after_inst(unifa); + c->cur_block = block; struct qreg unifa_reg = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA); vir_ADD_dest(c, unifa_reg, unifa->src[0], vir_uniform_ui(c, 4u)); vir_remove_instruction(c, unifa); + c->cur_block = current_block; return true; } if (unifa->qpu.type == V3D_QPU_INSTR_TYPE_ALU && unifa->qpu.alu.add.op == V3D_QPU_A_ADD) { c->cursor = vir_after_inst(unifa); + c->cur_block = block; struct qreg unifa_reg = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA); struct qreg tmp = vir_ADD(c, unifa->src[1], vir_uniform_ui(c, 4u)); vir_ADD_dest(c, unifa_reg, unifa->src[0], tmp); vir_remove_instruction(c, unifa); + c->cur_block = current_block; return true; } @@ -271,7 +276,7 @@ vir_opt_dead_code(struct v3d_compile *c) */ if (is_first_ldunifa) { assert(unifa); - if (!increment_unifa_address(c, unifa)) + if (!increment_unifa_address(c, block, unifa)) continue; } diff --git a/lib/mesa/src/broadcom/compiler/vir_opt_redundant_flags.c b/lib/mesa/src/broadcom/compiler/vir_opt_redundant_flags.c index 8749f3cd6..4609ef9c3 100644 --- a/lib/mesa/src/broadcom/compiler/vir_opt_redundant_flags.c +++ b/lib/mesa/src/broadcom/compiler/vir_opt_redundant_flags.c @@ -107,9 +107,14 @@ vir_opt_redundant_flags_block(struct v3d_compile *c, struct qblock *block) continue; } - /* Flags aren't preserved across a thrsw. */ - if (inst->qpu.sig.thrsw) - last_flags = NULL; + /* Flags aren't preserved across a thrsw. + * + * In V3D 4.2+ flags are preserved across thread switches. + */ + if (c->devinfo->ver < 42) { + if (inst->qpu.sig.thrsw) + last_flags = NULL; + } if (inst->qpu.flags.apf != V3D_QPU_PF_NONE || inst->qpu.flags.mpf != V3D_QPU_PF_NONE) { diff --git a/lib/mesa/src/broadcom/compiler/vir_register_allocate.c b/lib/mesa/src/broadcom/compiler/vir_register_allocate.c index 41fc25729..08698b4ec 100644 --- a/lib/mesa/src/broadcom/compiler/vir_register_allocate.c +++ b/lib/mesa/src/broadcom/compiler/vir_register_allocate.c @@ -164,10 +164,8 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g, } for (unsigned i = 0; i < c->num_temps; i++) { - int node = temp_to_node[i]; - if (BITSET_TEST(c->spillable, i)) - ra_set_node_spill_cost(g, node, spill_costs[i]); + ra_set_node_spill_cost(g, temp_to_node[i], spill_costs[i]); } return ra_get_best_spill_node(g); @@ -179,7 +177,12 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g, void v3d_setup_spill_base(struct v3d_compile *c) { - c->cursor = vir_before_block(vir_entry_block(c)); + /* Setting up the spill base is done in the entry block; so change + * both the current block to emit and the cursor. + */ + struct qblock *current_block = c->cur_block; + c->cur_block = vir_entry_block(c); + c->cursor = vir_before_block(c->cur_block); int start_num_temps = c->num_temps; @@ -206,16 +209,16 @@ v3d_setup_spill_base(struct v3d_compile *c) for (int i = start_num_temps; i < c->num_temps; i++) BITSET_CLEAR(c->spillable, i); + /* Restore the current block. */ + c->cur_block = current_block; c->cursor = vir_after_block(c->cur_block); } -static void +static struct qinst * v3d_emit_spill_tmua(struct v3d_compile *c, uint32_t spill_offset) { - vir_ADD_dest(c, vir_reg(QFILE_MAGIC, - V3D_QPU_WADDR_TMUA), - c->spill_base, - vir_uniform_ui(c, spill_offset)); + return vir_ADD_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUA), + c->spill_base, vir_uniform_ui(c, spill_offset)); } @@ -223,12 +226,17 @@ static void v3d_emit_tmu_spill(struct v3d_compile *c, struct qinst *inst, struct qinst *position, uint32_t spill_offset) { + assert(inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU); + c->cursor = vir_after_inst(position); - inst->dst.index = c->num_temps++; - vir_MOV_dest(c, vir_reg(QFILE_MAGIC, - V3D_QPU_WADDR_TMUD), - inst->dst); - v3d_emit_spill_tmua(c, spill_offset); + inst->dst = vir_get_temp(c); + enum v3d_qpu_cond cond = vir_get_cond(inst); + struct qinst *tmp = + vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUD), + inst->dst); + tmp->qpu.flags.mc = cond; + tmp = v3d_emit_spill_tmua(c, spill_offset); + tmp->qpu.flags.ac = cond; vir_emit_thrsw(c); vir_TMUWT(c); c->spills++; @@ -253,7 +261,7 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp) } struct qinst *last_thrsw = c->last_thrsw; - assert(!last_thrsw || last_thrsw->is_last_thrsw); + assert(last_thrsw && last_thrsw->is_last_thrsw); int start_num_temps = c->num_temps; @@ -339,29 +347,13 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp) spill_offset); } } - - /* If we didn't have a last-thrsw inserted by nir_to_vir and - * we've been inserting thrsws, then insert a new last_thrsw - * right before we start the vpm/tlb sequence for the last - * thread segment. - */ - if (!is_uniform && !last_thrsw && c->last_thrsw && - (v3d_qpu_writes_vpm(&inst->qpu) || - v3d_qpu_uses_tlb(&inst->qpu))) { - c->cursor = vir_before_inst(inst); - vir_emit_thrsw(c); - - last_thrsw = c->last_thrsw; - last_thrsw->is_last_thrsw = true; - } } } /* Make sure c->last_thrsw is the actual last thrsw, not just one we * inserted in our most recent unspill. */ - if (last_thrsw) - c->last_thrsw = last_thrsw; + c->last_thrsw = last_thrsw; /* Don't allow spilling of our spilling instructions. There's no way * they can help get things colored. @@ -372,27 +364,63 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp) c->disable_ldunif_opt = had_disable_ldunif_opt; } +struct node_to_temp_map { + uint32_t temp; + uint32_t priority; +}; + struct v3d_ra_select_callback_data { uint32_t next_acc; uint32_t next_phys; + struct node_to_temp_map *map; }; -static unsigned int -v3d_ra_select_callback(unsigned int n, BITSET_WORD *regs, void *data) +/* Choosing accumulators improves chances of merging QPU instructions + * due to these merges requiring that at most 2 rf registers are used + * by the add and mul instructions. + */ +static bool +v3d_ra_favor_accum(struct v3d_ra_select_callback_data *v3d_ra, + BITSET_WORD *regs, + int priority) { - struct v3d_ra_select_callback_data *v3d_ra = data; - int r5 = ACC_INDEX + 5; + /* Favor accumulators if we have less that this number of physical + * registers. Accumulators have more restrictions (like being + * invalidated through thrsw), so running out of physical registers + * even if we have accumulators available can lead to register + * allocation failures. + */ + static const int available_rf_threshold = 5; + int available_rf = 0 ; + for (int i = 0; i < PHYS_COUNT; i++) { + if (BITSET_TEST(regs, PHYS_INDEX + i)) + available_rf++; + if (available_rf >= available_rf_threshold) + break; + } + if (available_rf < available_rf_threshold) + return true; - /* Choose r5 for our ldunifs if possible (nobody else can load to that - * reg, and it keeps the QPU cond field free from being occupied by - * ldunifrf). + /* Favor accumulators for short-lived temps (our priority represents + * liveness), to prevent long-lived temps from grabbing accumulators + * and preventing follow-up instructions from using them, potentially + * leading to large portions of the shader being unable to use + * accumulators and therefore merge instructions successfully. */ - if (BITSET_TEST(regs, r5)) - return r5; + static const int priority_threshold = 20; + if (priority <= priority_threshold) + return true; + + return false; +} - /* Choose an accumulator if possible (I think it's lower power than - * phys regs), but round-robin through them to give post-RA - * instruction selection more options. +static bool +v3d_ra_select_accum(struct v3d_ra_select_callback_data *v3d_ra, + BITSET_WORD *regs, + unsigned int *out) +{ + /* Round-robin through our accumulators to give post-RA instruction + * selection more options. */ for (int i = 0; i < ACC_COUNT; i++) { int acc_off = (v3d_ra->next_acc + i) % ACC_COUNT; @@ -400,20 +428,61 @@ v3d_ra_select_callback(unsigned int n, BITSET_WORD *regs, void *data) if (BITSET_TEST(regs, acc)) { v3d_ra->next_acc = acc_off + 1; - return acc; + *out = acc; + return true; } } + return false; +} + +static bool +v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra, + BITSET_WORD *regs, + unsigned int *out) +{ for (int i = 0; i < PHYS_COUNT; i++) { int phys_off = (v3d_ra->next_phys + i) % PHYS_COUNT; int phys = PHYS_INDEX + phys_off; if (BITSET_TEST(regs, phys)) { v3d_ra->next_phys = phys_off + 1; - return phys; + *out = phys; + return true; } } + return false; +} + +static unsigned int +v3d_ra_select_callback(unsigned int n, BITSET_WORD *regs, void *data) +{ + struct v3d_ra_select_callback_data *v3d_ra = data; + int r5 = ACC_INDEX + 5; + + /* Choose r5 for our ldunifs if possible (nobody else can load to that + * reg, and it keeps the QPU cond field free from being occupied by + * ldunifrf). + */ + if (BITSET_TEST(regs, r5)) + return r5; + + unsigned int reg; + if (v3d_ra_favor_accum(v3d_ra, regs, v3d_ra->map[n].priority) && + v3d_ra_select_accum(v3d_ra, regs, ®)) { + return reg; + } + + if (v3d_ra_select_rf(v3d_ra, regs, ®)) + return reg; + + /* If we ran out of physical registers try to assign an accumulator + * if we didn't favor that option earlier. + */ + if (v3d_ra_select_accum(v3d_ra, regs, ®)) + return reg; + unreachable("RA must pass us at least one possible reg."); } @@ -426,44 +495,37 @@ vir_init_reg_sets(struct v3d_compiler *compiler) int max_thread_index = (compiler->devinfo->ver >= 40 ? 2 : 3); compiler->regs = ra_alloc_reg_set(compiler, PHYS_INDEX + PHYS_COUNT, - true); + false); if (!compiler->regs) return false; for (int threads = 0; threads < max_thread_index; threads++) { compiler->reg_class_any[threads] = - ra_alloc_reg_class(compiler->regs); + ra_alloc_contig_reg_class(compiler->regs, 1); compiler->reg_class_r5[threads] = - ra_alloc_reg_class(compiler->regs); + ra_alloc_contig_reg_class(compiler->regs, 1); compiler->reg_class_phys_or_acc[threads] = - ra_alloc_reg_class(compiler->regs); + ra_alloc_contig_reg_class(compiler->regs, 1); compiler->reg_class_phys[threads] = - ra_alloc_reg_class(compiler->regs); + ra_alloc_contig_reg_class(compiler->regs, 1); for (int i = PHYS_INDEX; i < PHYS_INDEX + (PHYS_COUNT >> threads); i++) { - ra_class_add_reg(compiler->regs, - compiler->reg_class_phys_or_acc[threads], i); - ra_class_add_reg(compiler->regs, - compiler->reg_class_phys[threads], i); - ra_class_add_reg(compiler->regs, - compiler->reg_class_any[threads], i); + ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i); + ra_class_add_reg(compiler->reg_class_phys[threads], i); + ra_class_add_reg(compiler->reg_class_any[threads], i); } for (int i = ACC_INDEX + 0; i < ACC_INDEX + ACC_COUNT - 1; i++) { - ra_class_add_reg(compiler->regs, - compiler->reg_class_phys_or_acc[threads], i); - ra_class_add_reg(compiler->regs, - compiler->reg_class_any[threads], i); + ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i); + ra_class_add_reg(compiler->reg_class_any[threads], i); } /* r5 can only store a single 32-bit value, so not much can * use it. */ - ra_class_add_reg(compiler->regs, - compiler->reg_class_r5[threads], + ra_class_add_reg(compiler->reg_class_r5[threads], ACC_INDEX + 5); - ra_class_add_reg(compiler->regs, - compiler->reg_class_any[threads], + ra_class_add_reg(compiler->reg_class_any[threads], ACC_INDEX + 5); } @@ -472,11 +534,6 @@ vir_init_reg_sets(struct v3d_compiler *compiler) return true; } -struct node_to_temp_map { - uint32_t temp; - uint32_t priority; -}; - static int node_to_temp_priority(const void *in_a, const void *in_b) { @@ -506,15 +563,15 @@ get_spill_batch_size(struct v3d_compile *c) return 20; } -/* Don't emit spills using the TMU until we've dropped thread count first. Also, - * don't spill if we have enabled any other optimization that can lead to - * higher register pressure, such as TMU pipelining, we rather recompile without - * the optimization in that case. +/* Don't emit spills using the TMU until we've dropped thread count first. We, + * may also disable spilling when certain optimizations that are known to + * increase register pressure are active so we favor recompiling with + * optimizations disabled instead of spilling. */ static inline bool tmu_spilling_allowed(struct v3d_compile *c, int thread_index) { - return thread_index == 0 && c->disable_tmu_pipelining; + return thread_index == 0 && c->tmu_spilling_allowed; } #define CLASS_BIT_PHYS (1 << 0) @@ -532,6 +589,7 @@ tmu_spilling_allowed(struct v3d_compile *c, int thread_index) struct qpu_reg * v3d_register_allocate(struct v3d_compile *c, bool *spilled) { + uint32_t UNUSED start_num_temps = c->num_temps; struct node_to_temp_map map[c->num_temps]; uint32_t temp_to_node[c->num_temps]; uint8_t class_bits[c->num_temps]; @@ -542,6 +600,7 @@ v3d_register_allocate(struct v3d_compile *c, bool *spilled) * RF0-2. */ .next_phys = 3, + .map = map, }; *spilled = false; @@ -782,6 +841,12 @@ v3d_register_allocate(struct v3d_compile *c, bool *spilled) return NULL; } + /* Ensure that we are not accessing temp_to_node out of bounds. We + * should never trigger this assertion because `c->num_temps` only + * grows when we spill, in which case we return early and don't get + * here. + */ + assert(start_num_temps == c->num_temps); struct qpu_reg *temp_registers = calloc(c->num_temps, sizeof(*temp_registers)); diff --git a/lib/mesa/src/broadcom/compiler/vir_to_qpu.c b/lib/mesa/src/broadcom/compiler/vir_to_qpu.c index aa3354542..634b8961b 100644 --- a/lib/mesa/src/broadcom/compiler/vir_to_qpu.c +++ b/lib/mesa/src/broadcom/compiler/vir_to_qpu.c @@ -45,12 +45,6 @@ qpu_magic(enum v3d_qpu_waddr waddr) return reg; } -static inline struct qpu_reg -qpu_acc(int acc) -{ - return qpu_magic(V3D_QPU_WADDR_R0 + acc); -} - struct v3d_qpu_instr v3d_qpu_nop(void) { @@ -219,8 +213,13 @@ v3d_generate_code_block(struct v3d_compile *c, src[i] = qpu_magic(qinst->src[i].index); break; case QFILE_NULL: + /* QFILE_NULL is an undef, so we can load + * anything. Using reg 0 + */ + src[i] = qpu_reg(0); + break; case QFILE_LOAD_IMM: - src[i] = qpu_acc(0); + assert(!"not reached"); break; case QFILE_TEMP: src[i] = temp_registers[index]; @@ -238,7 +237,7 @@ v3d_generate_code_block(struct v3d_compile *c, temp = new_qpu_nop_before(qinst); temp->qpu.sig.ldvpm = true; - src[i] = qpu_acc(3); + src[i] = qpu_magic(V3D_QPU_WADDR_R3); break; } } diff --git a/lib/mesa/src/broadcom/meson.build b/lib/mesa/src/broadcom/meson.build index f558aaca4..2e1145dd0 100644 --- a/lib/mesa/src/broadcom/meson.build +++ b/lib/mesa/src/broadcom/meson.build @@ -50,16 +50,35 @@ foreach ver : v3d_versions ) endforeach +v3d_args = ['-DV3D_BUILD_NEON'] + +v3d_neon_c_args = [] +if host_machine.cpu_family() == 'arm' + v3d_neon_c_args = '-mfpu=neon' +endif + +libv3d_neon = static_library( + 'v3d_neon', + 'common/v3d_tiling.c', + include_directories : [ + inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom, + ], + c_args : [v3d_args, v3d_neon_c_args], + gnu_symbol_visibility : 'hidden', + dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers], +) + libbroadcom_v3d = static_library( 'libbroadcom_v3d', [ - files('common/v3d_debug.c', 'common/v3d_device_info.c', 'clif/clif_dump.c'), + files('common/v3d_debug.c', 'common/v3d_device_info.c', 'clif/clif_dump.c', 'common/v3d_util.c'), v3d_xml_pack, ], include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_broadcom], c_args : [no_override_init_args], gnu_symbol_visibility : 'hidden', link_whole : v3d_libs + per_version_libs, + link_with: [libv3d_neon], build_by_default : false, dependencies: [dep_valgrind, dep_thread], ) diff --git a/lib/mesa/src/broadcom/qpu/qpu_disasm.h b/lib/mesa/src/broadcom/qpu/qpu_disasm.h index efdf8ddb5..b02ec91d7 100644 --- a/lib/mesa/src/broadcom/qpu/qpu_disasm.h +++ b/lib/mesa/src/broadcom/qpu/qpu_disasm.h @@ -21,8 +21,8 @@ * IN THE SOFTWARE. */ -#ifndef VC5_QPU_DISASM_H -#define VC5_QPU_DISASM_H +#ifndef QPU_DISASM_H +#define QPU_DISASM_H #include "broadcom/common/v3d_device_info.h" @@ -36,4 +36,4 @@ const char *v3d_qpu_disasm(const struct v3d_device_info *devinfo, uint64_t inst) void v3d_qpu_dump(const struct v3d_device_info *devinfo, const struct v3d_qpu_instr *instr); -#endif /* VC5_QPU_DISASM_H */ +#endif /* QPU_DISASM_H */ diff --git a/lib/mesa/src/broadcom/qpu/qpu_instr.c b/lib/mesa/src/broadcom/qpu/qpu_instr.c index 0bda9a42c..569c5fc40 100644 --- a/lib/mesa/src/broadcom/qpu/qpu_instr.c +++ b/lib/mesa/src/broadcom/qpu/qpu_instr.c @@ -137,6 +137,8 @@ v3d_qpu_add_op_name(enum v3d_qpu_add_op op) [V3D_QPU_A_TMUWT] = "tmuwt", [V3D_QPU_A_VPMSETUP] = "vpmsetup", [V3D_QPU_A_VPMWT] = "vpmwt", + [V3D_QPU_A_FLAFIRST] = "flafirst", + [V3D_QPU_A_FLNAFIRST] = "flnafirst", [V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in", [V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out", [V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in", @@ -406,6 +408,8 @@ static const uint8_t add_op_args[] = { [V3D_QPU_A_BARRIERID] = D, [V3D_QPU_A_TMUWT] = D, [V3D_QPU_A_VPMWT] = D, + [V3D_QPU_A_FLAFIRST] = D, + [V3D_QPU_A_FLNAFIRST] = D, [V3D_QPU_A_VPMSETUP] = D | A, @@ -930,6 +934,8 @@ v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst) case V3D_QPU_A_VFLNB: case V3D_QPU_A_FLAPUSH: case V3D_QPU_A_FLBPUSH: + case V3D_QPU_A_FLAFIRST: + case V3D_QPU_A_FLNAFIRST: return true; default: break; diff --git a/lib/mesa/src/broadcom/qpu/qpu_instr.h b/lib/mesa/src/broadcom/qpu/qpu_instr.h index a87ed9ff3..4f165e939 100644 --- a/lib/mesa/src/broadcom/qpu/qpu_instr.h +++ b/lib/mesa/src/broadcom/qpu/qpu_instr.h @@ -94,7 +94,6 @@ enum v3d_qpu_waddr { V3D_QPU_WADDR_R3 = 3, V3D_QPU_WADDR_R4 = 4, V3D_QPU_WADDR_R5 = 5, - /* 6 is reserved, but note 3.2.2.8: "Result Writes" */ V3D_QPU_WADDR_NOP = 6, V3D_QPU_WADDR_TLB = 7, V3D_QPU_WADDR_TLBU = 8, @@ -191,6 +190,8 @@ enum v3d_qpu_add_op { V3D_QPU_A_TMUWT, V3D_QPU_A_VPMSETUP, V3D_QPU_A_VPMWT, + V3D_QPU_A_FLAFIRST, + V3D_QPU_A_FLNAFIRST, V3D_QPU_A_LDVPMV_IN, V3D_QPU_A_LDVPMV_OUT, V3D_QPU_A_LDVPMD_IN, diff --git a/lib/mesa/src/broadcom/qpu/qpu_pack.c b/lib/mesa/src/broadcom/qpu/qpu_pack.c index 7502bbfb9..eee1e9f95 100644 --- a/lib/mesa/src/broadcom/qpu/qpu_pack.c +++ b/lib/mesa/src/broadcom/qpu/qpu_pack.c @@ -44,65 +44,65 @@ (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field)) #endif /* QPU_MASK */ -#define VC5_QPU_OP_MUL_SHIFT 58 -#define VC5_QPU_OP_MUL_MASK QPU_MASK(63, 58) +#define V3D_QPU_OP_MUL_SHIFT 58 +#define V3D_QPU_OP_MUL_MASK QPU_MASK(63, 58) -#define VC5_QPU_SIG_SHIFT 53 -#define VC5_QPU_SIG_MASK QPU_MASK(57, 53) +#define V3D_QPU_SIG_SHIFT 53 +#define V3D_QPU_SIG_MASK QPU_MASK(57, 53) -#define VC5_QPU_COND_SHIFT 46 -#define VC5_QPU_COND_MASK QPU_MASK(52, 46) -#define VC5_QPU_COND_SIG_MAGIC_ADDR (1 << 6) +#define V3D_QPU_COND_SHIFT 46 +#define V3D_QPU_COND_MASK QPU_MASK(52, 46) +#define V3D_QPU_COND_SIG_MAGIC_ADDR (1 << 6) -#define VC5_QPU_MM QPU_MASK(45, 45) -#define VC5_QPU_MA QPU_MASK(44, 44) +#define V3D_QPU_MM QPU_MASK(45, 45) +#define V3D_QPU_MA QPU_MASK(44, 44) #define V3D_QPU_WADDR_M_SHIFT 38 #define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38) -#define VC5_QPU_BRANCH_ADDR_LOW_SHIFT 35 -#define VC5_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35) +#define V3D_QPU_BRANCH_ADDR_LOW_SHIFT 35 +#define V3D_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35) #define V3D_QPU_WADDR_A_SHIFT 32 #define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32) -#define VC5_QPU_BRANCH_COND_SHIFT 32 -#define VC5_QPU_BRANCH_COND_MASK QPU_MASK(34, 32) +#define V3D_QPU_BRANCH_COND_SHIFT 32 +#define V3D_QPU_BRANCH_COND_MASK QPU_MASK(34, 32) -#define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT 24 -#define VC5_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24) +#define V3D_QPU_BRANCH_ADDR_HIGH_SHIFT 24 +#define V3D_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24) -#define VC5_QPU_OP_ADD_SHIFT 24 -#define VC5_QPU_OP_ADD_MASK QPU_MASK(31, 24) +#define V3D_QPU_OP_ADD_SHIFT 24 +#define V3D_QPU_OP_ADD_MASK QPU_MASK(31, 24) -#define VC5_QPU_MUL_B_SHIFT 21 -#define VC5_QPU_MUL_B_MASK QPU_MASK(23, 21) +#define V3D_QPU_MUL_B_SHIFT 21 +#define V3D_QPU_MUL_B_MASK QPU_MASK(23, 21) -#define VC5_QPU_BRANCH_MSFIGN_SHIFT 21 -#define VC5_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21) +#define V3D_QPU_BRANCH_MSFIGN_SHIFT 21 +#define V3D_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21) -#define VC5_QPU_MUL_A_SHIFT 18 -#define VC5_QPU_MUL_A_MASK QPU_MASK(20, 18) +#define V3D_QPU_MUL_A_SHIFT 18 +#define V3D_QPU_MUL_A_MASK QPU_MASK(20, 18) -#define VC5_QPU_ADD_B_SHIFT 15 -#define VC5_QPU_ADD_B_MASK QPU_MASK(17, 15) +#define V3D_QPU_ADD_B_SHIFT 15 +#define V3D_QPU_ADD_B_MASK QPU_MASK(17, 15) -#define VC5_QPU_BRANCH_BDU_SHIFT 15 -#define VC5_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15) +#define V3D_QPU_BRANCH_BDU_SHIFT 15 +#define V3D_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15) -#define VC5_QPU_BRANCH_UB QPU_MASK(14, 14) +#define V3D_QPU_BRANCH_UB QPU_MASK(14, 14) -#define VC5_QPU_ADD_A_SHIFT 12 -#define VC5_QPU_ADD_A_MASK QPU_MASK(14, 12) +#define V3D_QPU_ADD_A_SHIFT 12 +#define V3D_QPU_ADD_A_MASK QPU_MASK(14, 12) -#define VC5_QPU_BRANCH_BDI_SHIFT 12 -#define VC5_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12) +#define V3D_QPU_BRANCH_BDI_SHIFT 12 +#define V3D_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12) -#define VC5_QPU_RADDR_A_SHIFT 6 -#define VC5_QPU_RADDR_A_MASK QPU_MASK(11, 6) +#define V3D_QPU_RADDR_A_SHIFT 6 +#define V3D_QPU_RADDR_A_MASK QPU_MASK(11, 6) -#define VC5_QPU_RADDR_B_SHIFT 0 -#define VC5_QPU_RADDR_B_MASK QPU_MASK(5, 0) +#define V3D_QPU_RADDR_B_SHIFT 0 +#define V3D_QPU_RADDR_B_MASK QPU_MASK(5, 0) #define THRSW .thrsw = true #define LDUNIF .ldunif = true @@ -207,9 +207,9 @@ static const struct v3d_qpu_sig v41_sig_map[] = { [21] = { THRSW, LDVARY, WRTMUC }, [22] = { UCB, }, [23] = { ROT, }, - /* 24-30 reserved */ [24] = { LDUNIFA}, [25] = { LDUNIFARF }, + /* 26-30 reserved */ [31] = { SMIMM, LDTMU, }, }; @@ -456,8 +456,15 @@ struct opcode_desc { uint8_t mux_b_mask; uint8_t mux_a_mask; uint8_t op; - /* 0 if it's the same across V3D versions, or a specific V3D version. */ - uint8_t ver; + + /* first_ver == 0 if it's the same across all V3D versions. + * first_ver == X, last_ver == 0 if it's the same for all V3D versions + * starting from X + * first_ver == X, last_ver == Y if it's the same for all V3D versions + * on the range X through Y + */ + uint8_t first_ver; + uint8_t last_ver; }; static const struct opcode_desc add_ops[] = { @@ -519,8 +526,10 @@ static const struct opcode_desc add_ops[] = { { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 }, { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT }, { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT }, - + { 187, 187, 1 << 2, 1 << 7, V3D_QPU_A_FLAFIRST, 41 }, + { 187, 187, 1 << 3, 1 << 0, V3D_QPU_A_FLNAFIRST, 41 }, { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 }, + { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 }, { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 }, { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 }, @@ -576,9 +585,23 @@ static const struct opcode_desc mul_ops[] = { { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL }, }; +/* Returns true if op_desc should be filtered out based on devinfo->ver + * against op_desc->first_ver and op_desc->last_ver. Check notes about + * first_ver/last_ver on struct opcode_desc comments. + */ +static bool +opcode_invalid_in_version(const struct v3d_device_info *devinfo, + const struct opcode_desc *op_desc) +{ + return (op_desc->first_ver != 0 && devinfo->ver < op_desc->first_ver) || + (op_desc->last_ver != 0 && devinfo->ver > op_desc->last_ver); +} + static const struct opcode_desc * -lookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes, - uint32_t opcode, uint32_t mux_a, uint32_t mux_b) +lookup_opcode_from_packed(const struct v3d_device_info *devinfo, + const struct opcode_desc *opcodes, + size_t num_opcodes, uint32_t opcode, + uint32_t mux_a, uint32_t mux_b) { for (int i = 0; i < num_opcodes; i++) { const struct opcode_desc *op_desc = &opcodes[i]; @@ -587,6 +610,9 @@ lookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes, opcode > op_desc->opcode_last) continue; + if (opcode_invalid_in_version(devinfo, op_desc)) + continue; + if (!(op_desc->mux_b_mask & (1 << mux_b))) continue; @@ -716,9 +742,9 @@ static bool v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, struct v3d_qpu_instr *instr) { - uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_ADD); - uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_A); - uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_B); + uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD); + uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A); + uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_B); uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); uint32_t map_op = op; @@ -731,8 +757,9 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, map_op = (map_op - 253 + 245); const struct opcode_desc *desc = - lookup_opcode(add_ops, ARRAY_SIZE(add_ops), - map_op, mux_a, mux_b); + lookup_opcode_from_packed(devinfo, add_ops, ARRAY_SIZE(add_ops), + map_op, mux_a, mux_b); + if (!desc) return false; @@ -846,7 +873,7 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); instr->alu.add.magic_write = false; - if (packed_inst & VC5_QPU_MA) { + if (packed_inst & V3D_QPU_MA) { switch (instr->alu.add.op) { case V3D_QPU_A_LDVPMV_IN: instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT; @@ -870,14 +897,15 @@ static bool v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, struct v3d_qpu_instr *instr) { - uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_MUL); - uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_A); - uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_B); + uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL); + uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A); + uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_B); { const struct opcode_desc *desc = - lookup_opcode(mul_ops, ARRAY_SIZE(mul_ops), - op, mux_a, mux_b); + lookup_opcode_from_packed(devinfo, mul_ops, + ARRAY_SIZE(mul_ops), + op, mux_a, mux_b); if (!desc) return false; @@ -933,11 +961,31 @@ v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, instr->alu.mul.a = mux_a; instr->alu.mul.b = mux_b; instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M); - instr->alu.mul.magic_write = packed_inst & VC5_QPU_MM; + instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM; return true; } +static const struct opcode_desc * +lookup_opcode_from_instr(const struct v3d_device_info *devinfo, + const struct opcode_desc *opcodes, size_t num_opcodes, + uint8_t op) +{ + for (int i = 0; i < num_opcodes; i++) { + const struct opcode_desc *op_desc = &opcodes[i]; + + if (op_desc->op != op) + continue; + + if (opcode_invalid_in_version(devinfo, op_desc)) + continue; + + return op_desc; + } + + return NULL; +} + static bool v3d_qpu_add_pack(const struct v3d_device_info *devinfo, const struct v3d_qpu_instr *instr, uint64_t *packed_instr) @@ -946,18 +994,14 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo, uint32_t mux_a = instr->alu.add.a; uint32_t mux_b = instr->alu.add.b; int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op); - const struct opcode_desc *desc; + const struct opcode_desc *desc = + lookup_opcode_from_instr(devinfo, add_ops, ARRAY_SIZE(add_ops), + instr->alu.add.op); - int opcode; - for (desc = add_ops; desc != &add_ops[ARRAY_SIZE(add_ops)]; - desc++) { - if (desc->op == instr->alu.add.op) - break; - } - if (desc == &add_ops[ARRAY_SIZE(add_ops)]) + if (!desc) return false; - opcode = desc->opcode_first; + uint32_t opcode = desc->opcode_first; /* If an operation doesn't use an arg, its mux values may be used to * identify the operation type. @@ -995,7 +1039,7 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo, case V3D_QPU_A_LDVPMD_OUT: case V3D_QPU_A_LDVPMG_OUT: assert(!instr->alu.add.magic_write); - *packed_instr |= VC5_QPU_MA; + *packed_instr |= V3D_QPU_MA; break; default: @@ -1145,12 +1189,12 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo, break; } - *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_ADD_A); - *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B); - *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD); + *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_ADD_A); + *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_ADD_B); + *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD); *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A); if (instr->alu.add.magic_write && !no_magic_write) - *packed_instr |= VC5_QPU_MA; + *packed_instr |= V3D_QPU_MA; return true; } @@ -1162,14 +1206,12 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo, uint32_t mux_a = instr->alu.mul.a; uint32_t mux_b = instr->alu.mul.b; int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op); - const struct opcode_desc *desc; - for (desc = mul_ops; desc != &mul_ops[ARRAY_SIZE(mul_ops)]; - desc++) { - if (desc->op == instr->alu.mul.op) - break; - } - if (desc == &mul_ops[ARRAY_SIZE(mul_ops)]) + const struct opcode_desc *desc = + lookup_opcode_from_instr(devinfo, mul_ops, ARRAY_SIZE(mul_ops), + instr->alu.mul.op); + + if (!desc) return false; uint32_t opcode = desc->opcode_first; @@ -1253,13 +1295,13 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo, break; } - *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_MUL_A); - *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_MUL_B); + *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_MUL_A); + *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_MUL_B); - *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_MUL); + *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL); *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M); if (instr->alu.mul.magic_write) - *packed_instr |= VC5_QPU_MM; + *packed_instr |= V3D_QPU_MM; return true; } @@ -1272,14 +1314,14 @@ v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo, instr->type = V3D_QPU_INSTR_TYPE_ALU; if (!v3d_qpu_sig_unpack(devinfo, - QPU_GET_FIELD(packed_instr, VC5_QPU_SIG), + QPU_GET_FIELD(packed_instr, V3D_QPU_SIG), &instr->sig)) return false; - uint32_t packed_cond = QPU_GET_FIELD(packed_instr, VC5_QPU_COND); + uint32_t packed_cond = QPU_GET_FIELD(packed_instr, V3D_QPU_COND); if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) { - instr->sig_addr = packed_cond & ~VC5_QPU_COND_SIG_MAGIC_ADDR; - instr->sig_magic = packed_cond & VC5_QPU_COND_SIG_MAGIC_ADDR; + instr->sig_addr = packed_cond & ~V3D_QPU_COND_SIG_MAGIC_ADDR; + instr->sig_magic = packed_cond & V3D_QPU_COND_SIG_MAGIC_ADDR; instr->flags.ac = V3D_QPU_COND_NONE; instr->flags.mc = V3D_QPU_COND_NONE; @@ -1292,8 +1334,8 @@ v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo, return false; } - instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A); - instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B); + instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A); + instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B); if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr)) return false; @@ -1311,7 +1353,7 @@ v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo, { instr->type = V3D_QPU_INSTR_TYPE_BRANCH; - uint32_t cond = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_COND); + uint32_t cond = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_COND); if (cond == 0) instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS; else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <= @@ -1320,31 +1362,31 @@ v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo, else return false; - uint32_t msfign = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_MSFIGN); + uint32_t msfign = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_MSFIGN); if (msfign == 3) return false; instr->branch.msfign = msfign; - instr->branch.bdi = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_BDI); + instr->branch.bdi = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_BDI); - instr->branch.ub = packed_instr & VC5_QPU_BRANCH_UB; + instr->branch.ub = packed_instr & V3D_QPU_BRANCH_UB; if (instr->branch.ub) { instr->branch.bdu = QPU_GET_FIELD(packed_instr, - VC5_QPU_BRANCH_BDU); + V3D_QPU_BRANCH_BDU); } instr->branch.raddr_a = QPU_GET_FIELD(packed_instr, - VC5_QPU_RADDR_A); + V3D_QPU_RADDR_A); instr->branch.offset = 0; instr->branch.offset += QPU_GET_FIELD(packed_instr, - VC5_QPU_BRANCH_ADDR_LOW) << 3; + V3D_QPU_BRANCH_ADDR_LOW) << 3; instr->branch.offset += QPU_GET_FIELD(packed_instr, - VC5_QPU_BRANCH_ADDR_HIGH) << 24; + V3D_QPU_BRANCH_ADDR_HIGH) << 24; return true; } @@ -1354,10 +1396,10 @@ v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo, uint64_t packed_instr, struct v3d_qpu_instr *instr) { - if (QPU_GET_FIELD(packed_instr, VC5_QPU_OP_MUL) != 0) { + if (QPU_GET_FIELD(packed_instr, V3D_QPU_OP_MUL) != 0) { return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr); } else { - uint32_t sig = QPU_GET_FIELD(packed_instr, VC5_QPU_SIG); + uint32_t sig = QPU_GET_FIELD(packed_instr, V3D_QPU_SIG); if ((sig & 24) == 16) { return v3d_qpu_instr_unpack_branch(devinfo, packed_instr, @@ -1376,11 +1418,11 @@ v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo, uint32_t sig; if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig)) return false; - *packed_instr |= QPU_SET_FIELD(sig, VC5_QPU_SIG); + *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG); if (instr->type == V3D_QPU_INSTR_TYPE_ALU) { - *packed_instr |= QPU_SET_FIELD(instr->raddr_a, VC5_QPU_RADDR_A); - *packed_instr |= QPU_SET_FIELD(instr->raddr_b, VC5_QPU_RADDR_B); + *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A); + *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B); if (!v3d_qpu_add_pack(devinfo, instr, packed_instr)) return false; @@ -1400,13 +1442,13 @@ v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo, flags = instr->sig_addr; if (instr->sig_magic) - flags |= VC5_QPU_COND_SIG_MAGIC_ADDR; + flags |= V3D_QPU_COND_SIG_MAGIC_ADDR; } else { if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags)) return false; } - *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND); + *packed_instr |= QPU_SET_FIELD(flags, V3D_QPU_COND); } else { if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) return false; @@ -1420,38 +1462,39 @@ v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo, const struct v3d_qpu_instr *instr, uint64_t *packed_instr) { - *packed_instr |= QPU_SET_FIELD(16, VC5_QPU_SIG); + *packed_instr |= QPU_SET_FIELD(16, V3D_QPU_SIG); if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) { *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond - V3D_QPU_BRANCH_COND_A0), - VC5_QPU_BRANCH_COND); + V3D_QPU_BRANCH_COND); } *packed_instr |= QPU_SET_FIELD(instr->branch.msfign, - VC5_QPU_BRANCH_MSFIGN); + V3D_QPU_BRANCH_MSFIGN); *packed_instr |= QPU_SET_FIELD(instr->branch.bdi, - VC5_QPU_BRANCH_BDI); + V3D_QPU_BRANCH_BDI); if (instr->branch.ub) { - *packed_instr |= VC5_QPU_BRANCH_UB; + *packed_instr |= V3D_QPU_BRANCH_UB; *packed_instr |= QPU_SET_FIELD(instr->branch.bdu, - VC5_QPU_BRANCH_BDU); + V3D_QPU_BRANCH_BDU); } switch (instr->branch.bdi) { case V3D_QPU_BRANCH_DEST_ABS: case V3D_QPU_BRANCH_DEST_REL: *packed_instr |= QPU_SET_FIELD(instr->branch.msfign, - VC5_QPU_BRANCH_MSFIGN); + V3D_QPU_BRANCH_MSFIGN); *packed_instr |= QPU_SET_FIELD((instr->branch.offset & ~0xff000000) >> 3, - VC5_QPU_BRANCH_ADDR_LOW); + V3D_QPU_BRANCH_ADDR_LOW); *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24, - VC5_QPU_BRANCH_ADDR_HIGH); + V3D_QPU_BRANCH_ADDR_HIGH); + break; default: break; } @@ -1459,7 +1502,7 @@ v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo, if (instr->branch.bdi == V3D_QPU_BRANCH_DEST_REGFILE || instr->branch.bdu == V3D_QPU_BRANCH_DEST_REGFILE) { *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a, - VC5_QPU_RADDR_A); + V3D_QPU_RADDR_A); } return true; diff --git a/lib/mesa/src/broadcom/qpu/tests/qpu_disasm.c b/lib/mesa/src/broadcom/qpu/tests/qpu_disasm.c index 5922b409a..e6b1918b8 100644 --- a/lib/mesa/src/broadcom/qpu/tests/qpu_disasm.c +++ b/lib/mesa/src/broadcom/qpu/tests/qpu_disasm.c @@ -162,6 +162,7 @@ main(int argc, char **argv) &instr.alu.add.b); swap_pack(&instr.alu.add.a_unpack, &instr.alu.add.b_unpack); + break; default: break; } diff --git a/lib/mesa/src/broadcom/simulator/v3d_simulator.c b/lib/mesa/src/broadcom/simulator/v3d_simulator.c index 8d43bf6d5..494e5bb44 100644 --- a/lib/mesa/src/broadcom/simulator/v3d_simulator.c +++ b/lib/mesa/src/broadcom/simulator/v3d_simulator.c @@ -24,10 +24,10 @@ /** * @file v3d_simulator.c * - * Implements VC5 simulation on top of a non-VC5 GEM fd. + * Implements V3D simulation on top of a non-V3D GEM fd. * - * This file's goal is to emulate the VC5 ioctls' behavior in the kernel on - * top of the simpenrose software simulator. Generally, VC5 driver BOs have a + * This file's goal is to emulate the V3D ioctls' behavior in the kernel on + * top of the simpenrose software simulator. Generally, V3D driver BOs have a * GEM-side copy of their contents and a simulator-side memory area that the * GEM contents get copied into during simulation. Once simulation is done, * the simulator's data is copied back out to the GEM BOs, so that rendering @@ -40,8 +40,8 @@ * outside of this file still call ioctls directly on the fd). * * Another limitation is that BO import doesn't work unless the underlying - * window system's BO size matches what VC5 is going to use, which of course - * doesn't work out in practice. This means that for now, only DRI3 (VC5 + * window system's BO size matches what V3D is going to use, which of course + * doesn't work out in practice. This means that for now, only DRI3 (V3D * makes the winsys BOs) is supported, not DRI2 (window system makes the winys * BOs). */ @@ -79,7 +79,7 @@ static struct v3d_simulator_state { /* Base hardware address of the heap. */ uint32_t mem_base; /* Size of the heap. */ - size_t mem_size; + uint32_t mem_size; struct mem_block *heap; struct mem_block *overflow; @@ -87,6 +87,9 @@ static struct v3d_simulator_state { /** Mapping from GEM fd to struct v3d_simulator_file * */ struct hash_table *fd_map; + /** Last performance monitor ID. */ + uint32_t last_perfid; + struct util_dynarray bin_oom; int refcount; } sim_state = { @@ -100,6 +103,11 @@ struct v3d_simulator_file { /** Mapping from GEM handle to struct v3d_simulator_bo * */ struct hash_table *bo_map; + /** Dynamic array with performance monitors */ + struct v3d_simulator_perfmon **perfmons; + uint32_t perfmons_size; + uint32_t active_perfid; + struct mem_block *gmp; void *gmp_vaddr; @@ -121,12 +129,34 @@ struct v3d_simulator_bo { int handle; }; +struct v3d_simulator_perfmon { + uint32_t ncounters; + uint8_t counters[DRM_V3D_MAX_PERF_COUNTERS]; + uint64_t values[DRM_V3D_MAX_PERF_COUNTERS]; +}; + static void * int_to_key(int key) { return (void *)(uintptr_t)key; } +#define PERFMONS_ALLOC_SIZE 100 + +static uint32_t +perfmons_next_id(struct v3d_simulator_file *sim_file) { + sim_state.last_perfid++; + if (sim_state.last_perfid > sim_file->perfmons_size) { + sim_file->perfmons_size += PERFMONS_ALLOC_SIZE; + sim_file->perfmons = reralloc(sim_file, + sim_file->perfmons, + struct v3d_simulator_perfmon *, + sim_file->perfmons_size); + } + + return sim_state.last_perfid; +} + static struct v3d_simulator_file * v3d_get_simulator_file_for_fd(int fd) { @@ -357,6 +387,46 @@ v3d_simulator_unpin_bos(struct v3d_simulator_file *file, return 0; } +static struct v3d_simulator_perfmon * +v3d_get_simulator_perfmon(int fd, uint32_t perfid) +{ + if (!perfid || perfid > sim_state.last_perfid) + return NULL; + + struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd); + + mtx_lock(&sim_state.mutex); + assert(perfid <= file->perfmons_size); + struct v3d_simulator_perfmon *perfmon = file->perfmons[perfid - 1]; + mtx_unlock(&sim_state.mutex); + + return perfmon; +} + +static void +v3d_simulator_perfmon_switch(int fd, uint32_t perfid) +{ + struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd); + struct v3d_simulator_perfmon *perfmon; + + if (perfid == file->active_perfid) + return; + + perfmon = v3d_get_simulator_perfmon(fd, file->active_perfid); + if (perfmon) + v3d41_simulator_perfmon_stop(sim_state.v3d, + perfmon->ncounters, + perfmon->values); + + perfmon = v3d_get_simulator_perfmon(fd, perfid); + if (perfmon) + v3d41_simulator_perfmon_start(sim_state.v3d, + perfmon->ncounters, + perfmon->counters); + + file->active_perfid = perfid; +} + static int v3d_simulator_submit_cl_ioctl(int fd, struct drm_v3d_submit_cl *submit) { @@ -369,6 +439,9 @@ v3d_simulator_submit_cl_ioctl(int fd, struct drm_v3d_submit_cl *submit) mtx_lock(&sim_state.submit_lock); bin_fd = fd; + + v3d_simulator_perfmon_switch(fd, submit->perfmon_id); + if (sim_state.ver >= 41) v3d41_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs); else @@ -402,9 +475,9 @@ void v3d_simulator_open_from_handle(int fd, int handle, uint32_t size) } /** - * Simulated ioctl(fd, DRM_VC5_CREATE_BO) implementation. + * Simulated ioctl(fd, DRM_V3D_CREATE_BO) implementation. * - * Making a VC5 BO is just a matter of making a corresponding BO on the host. + * Making a V3D BO is just a matter of making a corresponding BO on the host. */ static int v3d_simulator_create_bo_ioctl(int fd, struct drm_v3d_create_bo *args) @@ -447,7 +520,7 @@ v3d_simulator_create_bo_ioctl(int fd, struct drm_v3d_create_bo *args) } /** - * Simulated ioctl(fd, DRM_VC5_MMAP_BO) implementation. + * Simulated ioctl(fd, DRM_V3D_MMAP_BO) implementation. * * We've already grabbed the mmap offset when we created the sim bo, so just * return it. @@ -530,6 +603,8 @@ v3d_simulator_submit_csd_ioctl(int fd, struct drm_v3d_submit_csd *args) for (int i = 0; i < args->bo_handle_count; i++) v3d_simulator_copy_in_handle(file, bo_handles[i]); + v3d_simulator_perfmon_switch(fd, args->perfmon_id); + if (sim_state.ver >= 41) ret = v3d41_simulator_submit_csd_ioctl(sim_state.v3d, args, file->gmp->ofs); @@ -542,6 +617,79 @@ v3d_simulator_submit_csd_ioctl(int fd, struct drm_v3d_submit_csd *args) return ret; } +static int +v3d_simulator_perfmon_create_ioctl(int fd, struct drm_v3d_perfmon_create *args) +{ + struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd); + + if (args->ncounters == 0 || + args->ncounters > DRM_V3D_MAX_PERF_COUNTERS) + return -EINVAL; + + struct v3d_simulator_perfmon *perfmon = rzalloc(file, + struct v3d_simulator_perfmon); + + perfmon->ncounters = args->ncounters; + for (int i = 0; i < args->ncounters; i++) { + if (args->counters[i] >= V3D_PERFCNT_NUM) { + ralloc_free(perfmon); + return -EINVAL; + } else { + perfmon->counters[i] = args->counters[i]; + } + } + + mtx_lock(&sim_state.mutex); + args->id = perfmons_next_id(file); + file->perfmons[args->id - 1] = perfmon; + mtx_unlock(&sim_state.mutex); + + return 0; +} + +static int +v3d_simulator_perfmon_destroy_ioctl(int fd, struct drm_v3d_perfmon_destroy *args) +{ + struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd); + struct v3d_simulator_perfmon *perfmon = + v3d_get_simulator_perfmon(fd, args->id); + + if (!perfmon) + return -EINVAL; + + mtx_lock(&sim_state.mutex); + file->perfmons[args->id - 1] = NULL; + mtx_unlock(&sim_state.mutex); + + ralloc_free(perfmon); + + return 0; +} + +static int +v3d_simulator_perfmon_get_values_ioctl(int fd, struct drm_v3d_perfmon_get_values *args) +{ + struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd); + + mtx_lock(&sim_state.submit_lock); + + /* Stop the perfmon if it is still active */ + if (args->id == file->active_perfid) + v3d_simulator_perfmon_switch(fd, 0); + + mtx_unlock(&sim_state.submit_lock); + + struct v3d_simulator_perfmon *perfmon = + v3d_get_simulator_perfmon(fd, args->id); + + if (!perfmon) + return -EINVAL; + + memcpy((void *)args->values_ptr, perfmon->values, perfmon->ncounters * sizeof(uint64_t)); + + return 0; +} + int v3d_simulator_ioctl(int fd, unsigned long request, void *args) { @@ -575,6 +723,15 @@ v3d_simulator_ioctl(int fd, unsigned long request, void *args) case DRM_IOCTL_V3D_SUBMIT_CSD: return v3d_simulator_submit_csd_ioctl(fd, args); + case DRM_IOCTL_V3D_PERFMON_CREATE: + return v3d_simulator_perfmon_create_ioctl(fd, args); + + case DRM_IOCTL_V3D_PERFMON_DESTROY: + return v3d_simulator_perfmon_destroy_ioctl(fd, args); + + case DRM_IOCTL_V3D_PERFMON_GET_VALUES: + return v3d_simulator_perfmon_get_values_ioctl(fd, args); + case DRM_IOCTL_GEM_OPEN: case DRM_IOCTL_GEM_FLINK: return drmIoctl(fd, request, args); diff --git a/lib/mesa/src/broadcom/simulator/v3d_simulator_wrapper.cpp b/lib/mesa/src/broadcom/simulator/v3d_simulator_wrapper.cpp index 15db767d5..88e439255 100644 --- a/lib/mesa/src/broadcom/simulator/v3d_simulator_wrapper.cpp +++ b/lib/mesa/src/broadcom/simulator/v3d_simulator_wrapper.cpp @@ -46,7 +46,7 @@ struct v3d_hw *v3d_hw_auto_new(void *in_params) } -uint32_t v3d_hw_get_mem(const struct v3d_hw *hw, size_t *size, void **p) +uint32_t v3d_hw_get_mem(const struct v3d_hw *hw, uint32_t *size, void **p) { return hw->get_mem(size, p); } @@ -56,11 +56,6 @@ bool v3d_hw_alloc_mem(struct v3d_hw *hw, size_t min_size) return hw->alloc_mem(min_size) == V3D_HW_ALLOC_SUCCESS; } -bool v3d_hw_has_gca(struct v3d_hw *hw) -{ - return hw->has_gca(); -} - uint32_t v3d_hw_read_reg(struct v3d_hw *hw, uint32_t reg) { return hw->read_reg(reg); @@ -89,5 +84,10 @@ v3d_hw_set_isr(struct v3d_hw *hw, void (*isr)(uint32_t status)) hw->set_isr(isr); } +uint32_t v3d_hw_get_hub_core() +{ + return V3D_HW_HUB_CORE; +} + } #endif /* USE_V3D_SIMULATOR */ diff --git a/lib/mesa/src/broadcom/simulator/v3d_simulator_wrapper.h b/lib/mesa/src/broadcom/simulator/v3d_simulator_wrapper.h index b20ea2484..05b2a3361 100644 --- a/lib/mesa/src/broadcom/simulator/v3d_simulator_wrapper.h +++ b/lib/mesa/src/broadcom/simulator/v3d_simulator_wrapper.h @@ -31,14 +31,14 @@ extern "C" { #endif struct v3d_hw *v3d_hw_auto_new(void *params); -uint32_t v3d_hw_get_mem(const struct v3d_hw *hw, size_t *size, void **p); +uint32_t v3d_hw_get_mem(const struct v3d_hw *hw, uint32_t *size, void **p); bool v3d_hw_alloc_mem(struct v3d_hw *hw, size_t min_size); -bool v3d_hw_has_gca(struct v3d_hw *hw); uint32_t v3d_hw_read_reg(struct v3d_hw *hw, uint32_t reg); void v3d_hw_write_reg(struct v3d_hw *hw, uint32_t reg, uint32_t val); void v3d_hw_tick(struct v3d_hw *hw); int v3d_hw_get_version(struct v3d_hw *hw); void v3d_hw_set_isr(struct v3d_hw *hw, void (*isr)(uint32_t status)); +uint32_t v3d_hw_get_hub_core(); #ifdef __cplusplus } diff --git a/lib/mesa/src/broadcom/simulator/v3dx_simulator.c b/lib/mesa/src/broadcom/simulator/v3dx_simulator.c index cbf257859..07bbbe2f8 100644 --- a/lib/mesa/src/broadcom/simulator/v3dx_simulator.c +++ b/lib/mesa/src/broadcom/simulator/v3dx_simulator.c @@ -24,7 +24,7 @@ /** * @file v3dx_simulator.c * - * Implements the actual HW interaction betweeh the GL driver's VC5 simulator and the simulator. + * Implements the actual HW interaction betweeh the GL driver's V3D simulator and the simulator. * * The register headers between V3D versions will have conflicting defines, so * all register interactions appear in this file and are compiled per V3D version @@ -41,6 +41,7 @@ #include "v3d_simulator_wrapper.h" #include "util/macros.h" +#include "util/bitscan.h" #include "drm-uapi/v3d_drm.h" #define HW_REGISTER_RO(x) (x) @@ -57,9 +58,6 @@ static void v3d_invalidate_l3(struct v3d_hw *v3d) { - if (!v3d_hw_has_gca(v3d)) - return; - #if V3D_VERSION < 40 uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL); @@ -80,6 +78,12 @@ v3d_invalidate_l2c(struct v3d_hw *v3d) V3D_CTL_0_L2CACTL_L2CENA_SET); } +enum v3d_l2t_cache_flush_mode { + V3D_CACHE_FLUSH_MODE_FLUSH, + V3D_CACHE_FLUSH_MODE_CLEAR, + V3D_CACHE_FLUSH_MODE_CLEAN, +}; + /* Invalidates texture L2 cachelines */ static void v3d_invalidate_l2t(struct v3d_hw *v3d) @@ -88,7 +92,23 @@ v3d_invalidate_l2t(struct v3d_hw *v3d) V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0); V3D_WRITE(V3D_CTL_0_L2TCACTL, V3D_CTL_0_L2TCACTL_L2TFLS_SET | - (0 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB)); + (V3D_CACHE_FLUSH_MODE_FLUSH << V3D_CTL_0_L2TCACTL_L2TFLM_LSB)); +} + +/* + * Wait for l2tcactl, used for flushes. + * + * FIXME: for a multicore scenario we should pass here the core. All wrapper + * assumes just one core, so would be better to handle that on that case. + */ +static UNUSED void v3d_core_wait_l2tcactl(struct v3d_hw *v3d, + uint32_t ctrl) +{ + assert(!(ctrl & ~(V3D_CTL_0_L2TCACTL_TMUWCF_SET | V3D_CTL_0_L2TCACTL_L2TFLS_SET))); + + while (V3D_READ(V3D_CTL_0_L2TCACTL) & ctrl) { + v3d_hw_tick(v3d); + } } /* Flushes dirty texture cachelines from the L1 write combiner */ @@ -98,7 +118,13 @@ v3d_flush_l1td(struct v3d_hw *v3d) V3D_WRITE(V3D_CTL_0_L2TCACTL, V3D_CTL_0_L2TCACTL_TMUWCF_SET); - assert(!(V3D_READ(V3D_CTL_0_L2TCACTL) & V3D_CTL_0_L2TCACTL_L2TFLS_SET)); + /* Note: here the kernel (and previous versions of the simulator + * wrapper) is using V3D_CTL_0_L2TCACTL_L2TFLS_SET, as with l2t. We + * understand that it makes more sense to do like this. We need to + * confirm which one is doing it correctly. So far things work fine on + * the simulator this way. + */ + v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_TMUWCF_SET); } /* Flushes dirty texture L2 cachelines */ @@ -109,9 +135,9 @@ v3d_flush_l2t(struct v3d_hw *v3d) V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0); V3D_WRITE(V3D_CTL_0_L2TCACTL, V3D_CTL_0_L2TCACTL_L2TFLS_SET | - (2 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB)); + (V3D_CACHE_FLUSH_MODE_CLEAN << V3D_CTL_0_L2TCACTL_L2TFLM_LSB)); - assert(!(V3D_READ(V3D_CTL_0_L2TCACTL) & V3D_CTL_0_L2TCACTL_L2TFLS_SET)); + v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_L2TFLS_SET); } /* Invalidates the slice caches. These are read-only caches. */ @@ -184,6 +210,8 @@ v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d, struct drm_v3d_submit_csd *args, uint32_t gmp_ofs) { + int last_completed_jobs = (V3D_READ(V3D_CSD_0_STATUS) & + V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET); g_gmp_ofs = gmp_ofs; v3d_reload_gmp(v3d); @@ -198,9 +226,13 @@ v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d, /* CFG0 kicks off the job */ V3D_WRITE(V3D_CSD_0_QUEUED_CFG0, args->cfg[0]); - while (V3D_READ(V3D_CSD_0_STATUS) & - (V3D_CSD_0_STATUS_HAVE_CURRENT_DISPATCH_SET | - V3D_CSD_0_STATUS_HAVE_QUEUED_DISPATCH_SET)) { + /* Now we wait for the dispatch to finish. The safest way is to check + * if NUM_COMPLETED_JOBS has increased. Note that in spite of that + * name that register field is about the number of completed + * dispatches. + */ + while ((V3D_READ(V3D_CSD_0_STATUS) & + V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET) == last_completed_jobs) { v3d_hw_tick(v3d); } @@ -234,6 +266,9 @@ v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d, case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH: args->value = 1; return 0; + case DRM_V3D_PARAM_SUPPORTS_PERFMON: + args->value = V3D_VERSION >= 41; + return 0; } if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) { @@ -241,44 +276,139 @@ v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d, return 0; } - fprintf(stderr, "Unknown DRM_IOCTL_VC5_GET_PARAM(%lld)\n", + fprintf(stderr, "Unknown DRM_IOCTL_V3D_GET_PARAM(%lld)\n", (long long)args->value); abort(); } static struct v3d_hw *v3d_isr_hw; + +static void +v3d_isr_core(struct v3d_hw *v3d, + unsigned core) +{ + /* FIXME: so far we are assuming just one core, and using only the _0_ + * registers. If we add multiple-core on the simulator, we would need + * to pass core as a parameter, and chose the proper registers. + */ + assert(core == 0); + uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS); + V3D_WRITE(V3D_CTL_0_INT_CLR, core_status); + + if (core_status & V3D_CTL_0_INT_STS_INT_OUTOMEM_SET) { + uint32_t size = 256 * 1024; + uint32_t offset = v3d_simulator_get_spill(size); + + v3d_reload_gmp(v3d); + + V3D_WRITE(V3D_PTB_0_BPOA, offset); + V3D_WRITE(V3D_PTB_0_BPOS, size); + return; + } + + if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) { + fprintf(stderr, "GMP violation at 0x%08x\n", + V3D_READ(V3D_GMP_VIO_ADDR)); + abort(); + } else { + fprintf(stderr, + "Unexpected ISR with core status 0x%08x\n", + core_status); + } + abort(); +} + +static void +handle_mmu_interruptions(struct v3d_hw *v3d, + uint32_t hub_status) +{ + bool wrv = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET; + bool pti = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET; + bool cap = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET; + + if (!(pti || cap || wrv)) + return; + + const char *client = "?"; + uint32_t axi_id = V3D_READ(V3D_MMU_VIO_ID); + uint32_t va_width = 30; + +#if V3D_VERSION >= 41 + static const char *const v3d41_axi_ids[] = { + "L2T", + "PTB", + "PSE", + "TLB", + "CLE", + "TFU", + "MMU", + "GMP", + }; + + axi_id = axi_id >> 5; + if (axi_id < ARRAY_SIZE(v3d41_axi_ids)) + client = v3d41_axi_ids[axi_id]; + + uint32_t mmu_debug = V3D_READ(V3D_MMU_DEBUG_INFO); + + va_width += ((mmu_debug & V3D_MMU_DEBUG_INFO_VA_WIDTH_SET) + >> V3D_MMU_DEBUG_INFO_VA_WIDTH_LSB); +#endif + /* Only the top bits (final number depends on the gen) of the virtual + * address are reported in the MMU VIO_ADDR register. + */ + uint64_t vio_addr = ((uint64_t)V3D_READ(V3D_MMU_VIO_ADDR) << + (va_width - 32)); + + /* Difference with the kernal: here were are going to abort after + * logging, so we don't bother with some stuff that the kernel does, + * like restoring the MMU ctrl bits + */ + + fprintf(stderr, "MMU error from client %s (%d) at 0x%llx%s%s%s\n", + client, axi_id, (long long) vio_addr, + wrv ? ", write violation" : "", + pti ? ", pte invalid" : "", + cap ? ", cap exceeded" : ""); + + abort(); +} + +static void +v3d_isr_hub(struct v3d_hw *v3d) +{ + uint32_t hub_status = V3D_READ(V3D_HUB_CTL_INT_STS); + + /* Acknowledge the interrupts we're handling here */ + V3D_WRITE(V3D_HUB_CTL_INT_CLR, hub_status); + + if (hub_status & V3D_HUB_CTL_INT_STS_INT_TFUC_SET) { + /* FIXME: we were not able to raise this exception. We let the + * unreachable here, so we could get one if it is raised on + * the future. In any case, note that for this case we would + * only be doing debugging log. + */ + unreachable("TFU Conversion Complete interrupt not handled"); + } + + handle_mmu_interruptions(v3d, hub_status); +} + static void v3d_isr(uint32_t hub_status) { struct v3d_hw *v3d = v3d_isr_hw; + uint32_t mask = hub_status; - /* Check the per-core bits */ - if (hub_status & (1 << 0)) { - uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS); - V3D_WRITE(V3D_CTL_0_INT_CLR, core_status); - - if (core_status & V3D_CTL_0_INT_STS_INT_OUTOMEM_SET) { - uint32_t size = 256 * 1024; - uint32_t offset = v3d_simulator_get_spill(size); - - v3d_reload_gmp(v3d); - - V3D_WRITE(V3D_PTB_0_BPOA, offset); - V3D_WRITE(V3D_PTB_0_BPOS, size); - return; - } - - if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) { - fprintf(stderr, "GMP violation at 0x%08x\n", - V3D_READ(V3D_GMP_VIO_ADDR)); - abort(); - } else { - fprintf(stderr, - "Unexpected ISR with core status 0x%08x\n", - core_status); - } - abort(); + /* Check the hub_status bits */ + while (mask) { + unsigned core = u_bit_scan(&mask); + + if (core == v3d_hw_get_hub_core()) + v3d_isr_hub(v3d); + else + v3d_isr_core(v3d, core); } return; @@ -299,11 +429,24 @@ v3dX(simulator_init_regs)(struct v3d_hw *v3d) V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET); #endif + /* FIXME: the kernel captures some additional core interrupts here, + * for tracing. Perhaps we should evaluate to do the same here and add + * some debug options. + */ uint32_t core_interrupts = (V3D_CTL_0_INT_STS_INT_GMPV_SET | V3D_CTL_0_INT_STS_INT_OUTOMEM_SET); V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts); V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts); + uint32_t hub_interrupts = + (V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET | /* write violation */ + V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET | /* page table invalid */ + V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET | /* CAP exceeded */ + V3D_HUB_CTL_INT_STS_INT_TFUC_SET); /* TFU conversion */ + + V3D_WRITE(V3D_HUB_CTL_INT_MSK_SET, ~hub_interrupts); + V3D_WRITE(V3D_HUB_CTL_INT_MSK_CLR, hub_interrupts); + v3d_isr_hw = v3d; v3d_hw_set_isr(v3d, v3d_isr); } @@ -313,6 +456,12 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d, struct drm_v3d_submit_cl *submit, uint32_t gmp_ofs) { + int last_bfc = (V3D_READ(V3D_CLE_0_BFC) & + V3D_CLE_0_BFC_BMFCT_SET); + + int last_rfc = (V3D_READ(V3D_CLE_0_RFC) & + V3D_CLE_0_RFC_RMFCT_SET); + g_gmp_ofs = gmp_ofs; v3d_reload_gmp(v3d); @@ -336,8 +485,8 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d, * scheduler implements this using the GPU scheduler blocking on the * bin fence completing. (We don't use HW semaphores). */ - while (V3D_READ(V3D_CLE_0_CT0CA) != - V3D_READ(V3D_CLE_0_CT0EA)) { + while ((V3D_READ(V3D_CLE_0_BFC) & + V3D_CLE_0_BFC_BMFCT_SET) == last_bfc) { v3d_hw_tick(v3d); } @@ -346,12 +495,55 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d, V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start); V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end); - while (V3D_READ(V3D_CLE_0_CT1CA) != - V3D_READ(V3D_CLE_0_CT1EA) || - V3D_READ(V3D_CLE_1_CT1CA) != - V3D_READ(V3D_CLE_1_CT1EA)) { + while ((V3D_READ(V3D_CLE_0_RFC) & + V3D_CLE_0_RFC_RMFCT_SET) == last_rfc) { v3d_hw_tick(v3d); } } +#if V3D_VERSION >= 41 +#define V3D_PCTR_0_PCTR_N(x) (V3D_PCTR_0_PCTR0 + 4 * (x)) +#define V3D_PCTR_0_SRC_N(x) (V3D_PCTR_0_SRC_0_3 + 4 * (x)) +#define V3D_PCTR_0_SRC_N_SHIFT(x) ((x) * 8) +#define V3D_PCTR_0_SRC_N_MASK(x) (BITFIELD_RANGE(V3D_PCTR_0_SRC_N_SHIFT(x), \ + V3D_PCTR_0_SRC_N_SHIFT(x) + 6)) +#endif + +void +v3dX(simulator_perfmon_start)(struct v3d_hw *v3d, + uint32_t ncounters, + uint8_t *events) +{ +#if V3D_VERSION >= 41 + int i, j; + uint32_t source; + uint32_t mask = BITFIELD_RANGE(0, ncounters); + + for (i = 0; i < ncounters; i+=4) { + source = i / 4; + uint32_t channels = 0; + for (j = 0; j < 4 && (i + j) < ncounters; j++) + channels |= events[i + j] << V3D_PCTR_0_SRC_N_SHIFT(j); + V3D_WRITE(V3D_PCTR_0_SRC_N(source), channels); + } + V3D_WRITE(V3D_PCTR_0_CLR, mask); + V3D_WRITE(V3D_PCTR_0_OVERFLOW, mask); + V3D_WRITE(V3D_PCTR_0_EN, mask); +#endif +} + +void v3dX(simulator_perfmon_stop)(struct v3d_hw *v3d, + uint32_t ncounters, + uint64_t *values) +{ +#if V3D_VERSION >= 41 + int i; + + for (i = 0; i < ncounters; i++) + values[i] += V3D_READ(V3D_PCTR_0_PCTR_N(i)); + + V3D_WRITE(V3D_PCTR_0_EN, 0); +#endif +} + #endif /* USE_V3D_SIMULATOR */ diff --git a/lib/mesa/src/broadcom/simulator/v3dx_simulator.h b/lib/mesa/src/broadcom/simulator/v3dx_simulator.h index 2c623d79a..145ae59c2 100644 --- a/lib/mesa/src/broadcom/simulator/v3dx_simulator.h +++ b/lib/mesa/src/broadcom/simulator/v3dx_simulator.h @@ -44,3 +44,9 @@ int v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d, int v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d, struct drm_v3d_submit_csd *args, uint32_t gmp_offset); +void v3dX(simulator_perfmon_start)(struct v3d_hw *v3d, + uint32_t ncounters, + uint8_t *events); +void v3dX(simulator_perfmon_stop)(struct v3d_hw *v3d, + uint32_t ncounters, + uint64_t *values); diff --git a/lib/mesa/src/broadcom/vulkan/meson.build b/lib/mesa/src/broadcom/vulkan/meson.build index 88bee8c13..a1cc58637 100644 --- a/lib/mesa/src/broadcom/vulkan/meson.build +++ b/lib/mesa/src/broadcom/vulkan/meson.build @@ -25,32 +25,11 @@ v3dv_entrypoints = custom_target( command : [ prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak', '--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'v3dv', + '--device-prefix', 'ver42', ], depend_files : vk_entrypoints_gen_depend_files, ) -v3dv_extensions_c = custom_target( - 'v3dv_extensions.c', - input : ['v3dv_extensions_gen.py', vk_api_xml], - output : 'v3dv_extensions.c', - command : [ - prog_python, '@INPUT0@', '--xml', '@INPUT1@', - '--out-c', '@OUTPUT@', - ], - depend_files : [files('v3dv_extensions.py'), vk_extensions_gen], -) - -v3dv_extensions_h = custom_target( - 'v3dv_extensions.h', - input : ['v3dv_extensions_gen.py', vk_api_xml], - output : 'v3dv_extensions.h', - command : [ - prog_python, '@INPUT0@', '--xml', '@INPUT1@', - '--out-h', '@OUTPUT@', - ], - depend_files : [files('v3dv_extensions.py'), vk_extensions_gen], -) - libv3dv_files = files( 'v3dv_bo.c', 'v3dv_cl.c', @@ -71,15 +50,27 @@ libv3dv_files = files( 'v3dv_query.c', 'v3dv_queue.c', 'v3dv_uniforms.c', - 'v3dv_util.c', 'v3dv_wsi.c', - 'v3d_tiling.c', +) + +files_per_version = files( + 'v3dvx_cmd_buffer.c', + 'v3dvx_descriptor_set.c', + 'v3dvx_device.c', + 'v3dvx_formats.c', + 'v3dvx_image.c', + 'v3dvx_pipeline.c', + 'v3dvx_meta_common.c', + 'v3dvx_pipeline.c', + 'v3dvx_queue.c', ) # The vulkan driver only supports version >= 42, which is the version present in # Rpi4. We need to explicitly set it as we are reusing pieces from the GL v3d # driver. -v3dv_flags = ['-DV3D_VERSION=42'] +v3d_versions = ['42'] + +v3dv_flags = [] dep_v3dv3 = dependency('v3dv3', required : false) if dep_v3dv3.found() @@ -94,39 +85,43 @@ v3dv_deps = [ idep_nir, idep_nir_headers, idep_vulkan_util, + idep_vulkan_wsi, ] if with_platform_x11 v3dv_deps += dep_xcb_dri3 - v3dv_flags += [ - '-DVK_USE_PLATFORM_XCB_KHR', - '-DVK_USE_PLATFORM_XLIB_KHR', - ] - libv3dv_files += files('v3dv_wsi_x11.c') endif if with_platform_wayland v3dv_deps += [dep_wayland_client, dep_wl_protocols] - v3dv_flags += '-DVK_USE_PLATFORM_WAYLAND_KHR' - libv3dv_files += files('v3dv_wsi_wayland.c') libv3dv_files += [wayland_drm_client_protocol_h, wayland_drm_protocol_c] endif -if system_has_kms_drm and not with_platform_android - v3dv_flags += '-DVK_USE_PLATFORM_DISPLAY_KHR' - libv3dv_files += files('v3dv_wsi_display.c') -endif +per_version_libs = [] +foreach ver : v3d_versions + per_version_libs += static_library( + 'v3dv-v' + ver, + [files_per_version, v3d_xml_pack, v3dv_entrypoints[0]], + include_directories : [ + inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom, + inc_compiler, inc_util, + ], + c_args : [v3dv_flags, '-DV3D_VERSION=' + ver], + gnu_symbol_visibility : 'hidden', + dependencies : [v3dv_deps], +) +endforeach libvulkan_broadcom = shared_library( 'vulkan_broadcom', - [libv3dv_files, v3dv_entrypoints, v3dv_extensions_c, v3dv_extensions_h, sha1_h], + [libv3dv_files, v3dv_entrypoints, sha1_h], include_directories : [ - inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_broadcom, inc_compiler, inc_util, inc_vulkan_wsi, + inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_broadcom, inc_compiler, inc_util, ], link_with : [ libbroadcom_cle, libbroadcom_v3d, - libvulkan_wsi, + per_version_libs, ], dependencies : v3dv_deps, c_args : v3dv_flags, @@ -150,14 +145,15 @@ endif broadcom_icd = custom_target( 'broadcom_icd', - input : 'v3dv_icd.py', + input : [vk_icd_gen, vk_api_xml], output : 'broadcom_icd.@0@.json'.format(host_machine.cpu()), command : [ - prog_python, '@INPUT@', - '--lib-path', join_paths(get_option('prefix'), get_option('libdir')), + prog_python, '@INPUT0@', + '--api-version', '1.0', '--xml', '@INPUT1@', + '--lib-path', join_paths(get_option('prefix'), get_option('libdir'), + 'libvulkan_broadcom.so'), '--out', '@OUTPUT@', ], - depend_files : files('v3dv_extensions.py'), build_by_default : true, install_dir : with_vulkan_icd_dir, install : true, diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_bo.c b/lib/mesa/src/broadcom/vulkan/v3dv_bo.c index 459032990..71679ceec 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_bo.c +++ b/lib/mesa/src/broadcom/vulkan/v3dv_bo.c @@ -184,6 +184,7 @@ v3dv_bo_init(struct v3dv_bo *bo, bool private) { bo->handle = handle; + bo->handle_bit = 1ull << (handle % 64); bo->size = size; bo->offset = offset; bo->map = NULL; diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_bo.h b/lib/mesa/src/broadcom/vulkan/v3dv_bo.h index fd6754c48..ab2b8c735 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_bo.h +++ b/lib/mesa/src/broadcom/vulkan/v3dv_bo.h @@ -30,6 +30,7 @@ struct v3dv_bo { struct list_head list_link; uint32_t handle; + uint64_t handle_bit; uint32_t size; uint32_t offset; diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_cl.c b/lib/mesa/src/broadcom/vulkan/v3dv_cl.c index e9674b6c5..ed11f53c4 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_cl.c +++ b/lib/mesa/src/broadcom/vulkan/v3dv_cl.c @@ -22,6 +22,13 @@ */ #include "v3dv_private.h" + +/* We don't expect that the packets we use in this file change across hw + * versions, so we just explicitly set the V3D_VERSION and include v3dx_pack + * here + */ +#define V3D_VERSION 33 +#include "broadcom/common/v3d_macros.h" #include "broadcom/cle/v3dx_pack.h" void @@ -72,10 +79,10 @@ cl_alloc_bo(struct v3dv_cl *cl, uint32_t space, bool use_branch) cl_emit(cl, BRANCH, branch) { branch.address = v3dv_cl_address(bo, 0); } + } else { + v3dv_job_add_bo_unchecked(cl->job, bo); } - v3dv_job_add_bo(cl->job, bo); - cl->bo = bo; cl->base = cl->bo->map; cl->size = cl->bo->size; diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_cl.h b/lib/mesa/src/broadcom/vulkan/v3dv_cl.h index a6a38b4aa..68d5acd45 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_cl.h +++ b/lib/mesa/src/broadcom/vulkan/v3dv_cl.h @@ -46,6 +46,16 @@ struct v3dv_cl_reloc { uint32_t offset; }; +static inline void +pack_emit_reloc(void *cl, const void *reloc) {} + +#define __gen_user_data struct v3dv_cl +#define __gen_address_type struct v3dv_cl_reloc +#define __gen_address_offset(reloc) (((reloc)->bo ? (reloc)->bo->offset : 0) + \ + (reloc)->offset) +#define __gen_emit_reloc cl_pack_emit_reloc +#define __gen_unpack_address(cl, s, e) __unpack_address(cl, s, e) + struct v3dv_cl { void *base; struct v3dv_job *job; @@ -194,7 +204,7 @@ void v3dv_cl_ensure_space_with_branch(struct v3dv_cl *cl, uint32_t space); * Helper function called by the XML-generated pack functions for filling in * an address field in shader records. * - * Since we have a private address space as of VC5, our BOs can have lifelong + * Since we have a private address space as of V3D, our BOs can have lifelong * offsets, and all the kernel needs to know is which BOs need to be paged in * for this exec. */ @@ -213,7 +223,7 @@ cl_pack_emit_reloc(struct v3dv_cl *cl, const struct v3dv_cl_reloc *reloc) #define cl_emit_prepacked(cl, packet) \ cl_emit_prepacked_sized(cl, packet, sizeof(*(packet))) -#define v3dv_pack(packed, packet, name) \ +#define v3dvx_pack(packed, packet, name) \ for (struct cl_packet_struct(packet) name = { \ cl_packet_header(packet) \ }, \ diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c b/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c index 6cb9de28a..ff914e048 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c +++ b/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c @@ -22,8 +22,6 @@ */ #include "v3dv_private.h" -#include "broadcom/cle/v3dx_pack.h" -#include "util/half_float.h" #include "util/u_pack_color.h" #include "vk_format_info.h" #include "vk_util.h" @@ -57,6 +55,7 @@ const struct v3dv_dynamic_state default_dynamic_state = { .slope_factor = 0.0f, }, .line_width = 1.0f, + .color_write_enable = (1ull << (4 * V3D_MAX_DRAW_BUFFERS)) - 1, }; void @@ -65,17 +64,26 @@ v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo) if (!bo) return; - if (_mesa_set_search(job->bos, bo)) - return; + if (job->bo_handle_mask & bo->handle_bit) { + if (_mesa_set_search(job->bos, bo)) + return; + } _mesa_set_add(job->bos, bo); job->bo_count++; + job->bo_handle_mask |= bo->handle_bit; } -static void -cmd_buffer_emit_render_pass_rcl(struct v3dv_cmd_buffer *cmd_buffer); +void +v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo) +{ + assert(bo); + _mesa_set_add(job->bos, bo); + job->bo_count++; + job->bo_handle_mask |= bo->handle_bit; +} -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateCommandPool(VkDevice _device, const VkCommandPoolCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, @@ -90,7 +98,7 @@ v3dv_CreateCommandPool(VkDevice _device, pool = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pool), VK_OBJECT_TYPE_COMMAND_POOL); if (pool == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); if (pAllocator) pool->alloc = *pAllocator; @@ -114,7 +122,7 @@ cmd_buffer_init(struct v3dv_cmd_buffer *cmd_buffer, * buffer reset that would reset the loader's dispatch table for the * command buffer, and any other relevant info from vk_object_base */ - const uint32_t base_size = sizeof(struct vk_object_base); + const uint32_t base_size = sizeof(struct vk_command_buffer); uint8_t *cmd_buffer_driver_start = ((uint8_t *) cmd_buffer) + base_size; memset(cmd_buffer_driver_start, 0, sizeof(*cmd_buffer) - base_size); @@ -142,12 +150,20 @@ cmd_buffer_create(struct v3dv_device *device, VkCommandBuffer *pCommandBuffer) { struct v3dv_cmd_buffer *cmd_buffer; - cmd_buffer = vk_object_zalloc(&device->vk, - &pool->alloc, - sizeof(*cmd_buffer), - VK_OBJECT_TYPE_COMMAND_BUFFER); + cmd_buffer = vk_zalloc2(&device->vk.alloc, + &pool->alloc, + sizeof(*cmd_buffer), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (cmd_buffer == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + VkResult result; + result = vk_command_buffer_init(&cmd_buffer->vk, &device->vk); + if (result != VK_SUCCESS) { + vk_free2(&device->vk.alloc, &pool->alloc, cmd_buffer); + return result; + } cmd_buffer_init(cmd_buffer, device, pool, level); @@ -332,18 +348,9 @@ cmd_buffer_destroy(struct v3dv_cmd_buffer *cmd_buffer) { list_del(&cmd_buffer->pool_link); cmd_buffer_free_resources(cmd_buffer); - vk_object_free(&cmd_buffer->device->vk, &cmd_buffer->pool->alloc, cmd_buffer); -} - -void -v3dv_job_emit_binning_flush(struct v3dv_job *job) -{ - assert(job); - - v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(FLUSH)); - v3dv_return_if_oom(NULL, job); - - cl_emit(&job->bcl, FLUSH, flush); + vk_command_buffer_finish(&cmd_buffer->vk); + vk_free2(&cmd_buffer->device->vk.alloc, &cmd_buffer->pool->alloc, + cmd_buffer); } static bool @@ -402,6 +409,13 @@ cmd_buffer_can_merge_subpass(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_subpass *prev_subpass = &state->pass->subpasses[state->subpass_idx]; struct v3dv_subpass *subpass = &state->pass->subpasses[subpass_idx]; + /* Don't merge if the subpasses have different view masks, since in that + * case the framebuffer setup is different and we need to emit different + * RCLs. + */ + if (subpass->view_mask != prev_subpass->view_mask) + return false; + /* Because the list of subpass attachments can include VK_ATTACHMENT_UNUSED, * we need to check that for each subpass all its used attachments are * used by the other subpass. @@ -517,6 +531,7 @@ v3dv_job_start_frame(struct v3dv_job *job, uint32_t width, uint32_t height, uint32_t layers, + bool allocate_tile_state_for_all_layers, uint32_t render_target_count, uint8_t max_internal_bpp, bool msaa) @@ -532,6 +547,16 @@ v3dv_job_start_frame(struct v3dv_job *job, v3dv_cl_ensure_space_with_branch(&job->bcl, 256); v3dv_return_if_oom(NULL, job); + /* We only need to allocate tile state for all layers if the binner + * writes primitives to layers other than the first. This can only be + * done using layered rendering (writing gl_Layer from a geometry shader), + * so for other cases of multilayered framebuffers (typically with + * meta copy/clear operations) that won't use layered rendering, we only + * need one layer worth of of tile state for the binner. + */ + if (!allocate_tile_state_for_all_layers) + layers = 1; + /* The PTB will request the tile alloc initial size per tile at start * of tile binning. */ @@ -561,7 +586,7 @@ v3dv_job_start_frame(struct v3dv_job *job, return; } - v3dv_job_add_bo(job, job->tile_alloc); + v3dv_job_add_bo_unchecked(job, job->tile_alloc); const uint32_t tsda_per_tile_size = 256; const uint32_t tile_state_size = tiling->layers * @@ -574,33 +599,12 @@ v3dv_job_start_frame(struct v3dv_job *job, return; } - v3dv_job_add_bo(job, job->tile_state); + v3dv_job_add_bo_unchecked(job, job->tile_state); - /* This must go before the binning mode configuration. It is - * required for layered framebuffers to work. - */ - cl_emit(&job->bcl, NUMBER_OF_LAYERS, config) { - config.number_of_layers = layers; - } - - cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) { - config.width_in_pixels = tiling->width; - config.height_in_pixels = tiling->height; - config.number_of_render_targets = MAX2(tiling->render_target_count, 1); - config.multisample_mode_4x = tiling->msaa; - config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; - } - - /* There's definitely nothing in the VCD cache we want. */ - cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin); - - /* "Binning mode lists must have a Start Tile Binning item (6) after - * any prefix state data before the binning list proper starts." - */ - cl_emit(&job->bcl, START_TILE_BINNING, bin); + v3dv_X(job->device, job_emit_binning_prolog)(job, tiling, layers); - job->ez_state = VC5_EZ_UNDECIDED; - job->first_ez_state = VC5_EZ_UNDECIDED; + job->ez_state = V3D_EZ_UNDECIDED; + job->first_ez_state = V3D_EZ_UNDECIDED; } static void @@ -617,19 +621,9 @@ cmd_buffer_end_render_pass_frame(struct v3dv_cmd_buffer *cmd_buffer) * any RCL commands of its own. */ if (v3dv_cl_offset(&cmd_buffer->state.job->rcl) == 0) - cmd_buffer_emit_render_pass_rcl(cmd_buffer); + v3dv_X(cmd_buffer->device, cmd_buffer_emit_render_pass_rcl)(cmd_buffer); - v3dv_job_emit_binning_flush(cmd_buffer->state.job); -} - -static void -cmd_buffer_end_render_pass_secondary(struct v3dv_cmd_buffer *cmd_buffer) -{ - assert(cmd_buffer->state.job); - v3dv_cl_ensure_space_with_branch(&cmd_buffer->state.job->bcl, - cl_packet_length(RETURN_FROM_SUB_LIST)); - v3dv_return_if_oom(cmd_buffer, NULL); - cl_emit(&cmd_buffer->state.job->bcl, RETURN_FROM_SUB_LIST, ret); + v3dv_X(cmd_buffer->device, job_emit_binning_flush)(cmd_buffer->state.job); } struct v3dv_job * @@ -716,7 +710,7 @@ v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer) cmd_buffer_end_render_pass_frame(cmd_buffer); } else { assert(job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY); - cmd_buffer_end_render_pass_secondary(cmd_buffer); + v3dv_X(cmd_buffer->device, cmd_buffer_end_render_pass_secondary)(cmd_buffer); } } @@ -803,7 +797,7 @@ v3dv_job_init(struct v3dv_job *job, v3dv_cl_init(job, &job->indirect); - if (V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH) + if (unlikely(V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH)) job->always_flush = true; } @@ -821,6 +815,7 @@ v3dv_job_init(struct v3dv_job *job, * bits. */ cmd_buffer->state.dirty = ~0; + cmd_buffer->state.dirty_descriptor_stages = ~0; /* Honor inheritance of occlussion queries in secondaries if requested */ if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY && @@ -879,6 +874,7 @@ static VkResult cmd_buffer_reset(struct v3dv_cmd_buffer *cmd_buffer, VkCommandBufferResetFlags flags) { + vk_command_buffer_reset(&cmd_buffer->vk); if (cmd_buffer->status != V3DV_CMD_BUFFER_STATUS_INITIALIZED) { struct v3dv_device *device = cmd_buffer->device; struct v3dv_cmd_pool *pool = cmd_buffer->pool; @@ -902,7 +898,7 @@ cmd_buffer_reset(struct v3dv_cmd_buffer *cmd_buffer, return VK_SUCCESS; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_AllocateCommandBuffers(VkDevice _device, const VkCommandBufferAllocateInfo *pAllocateInfo, VkCommandBuffer *pCommandBuffers) @@ -930,7 +926,7 @@ v3dv_AllocateCommandBuffers(VkDevice _device, return result; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_FreeCommandBuffers(VkDevice device, VkCommandPool commandPool, uint32_t commandBufferCount, @@ -946,7 +942,7 @@ v3dv_FreeCommandBuffers(VkDevice device, } } -void +VKAPI_ATTR void VKAPI_CALL v3dv_DestroyCommandPool(VkDevice _device, VkCommandPool commandPool, const VkAllocationCallbacks *pAllocator) @@ -965,7 +961,7 @@ v3dv_DestroyCommandPool(VkDevice _device, vk_object_free(&device->vk, pAllocator, pool); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_TrimCommandPool(VkDevice device, VkCommandPool commandPool, VkCommandPoolTrimFlags flags) @@ -1026,34 +1022,37 @@ cmd_buffer_subpass_handle_pending_resolves(struct v3dv_cmd_buffer *cmd_buffer) struct v3dv_image_view *src_iview = fb->attachments[src_attachment_idx]; struct v3dv_image_view *dst_iview = fb->attachments[dst_attachment_idx]; - VkImageResolve region = { + VkImageResolve2KHR region = { + .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR, .srcSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, - src_iview->base_level, - src_iview->first_layer, - src_iview->last_layer - src_iview->first_layer + 1, + src_iview->vk.base_mip_level, + src_iview->vk.base_array_layer, + src_iview->vk.layer_count, }, .srcOffset = { 0, 0, 0 }, .dstSubresource = { VK_IMAGE_ASPECT_COLOR_BIT, - dst_iview->base_level, - dst_iview->first_layer, - dst_iview->last_layer - dst_iview->first_layer + 1, + dst_iview->vk.base_mip_level, + dst_iview->vk.base_array_layer, + dst_iview->vk.layer_count, }, .dstOffset = { 0, 0, 0 }, - .extent = src_iview->image->extent, + .extent = src_iview->vk.image->extent, }; - VkImage src_image_handle = - v3dv_image_to_handle((struct v3dv_image *) src_iview->image); - VkImage dst_image_handle = - v3dv_image_to_handle((struct v3dv_image *) dst_iview->image); - v3dv_CmdResolveImage(cmd_buffer_handle, - src_image_handle, - VK_IMAGE_LAYOUT_GENERAL, - dst_image_handle, - VK_IMAGE_LAYOUT_GENERAL, - 1, ®ion); + struct v3dv_image *src_image = (struct v3dv_image *) src_iview->vk.image; + struct v3dv_image *dst_image = (struct v3dv_image *) dst_iview->vk.image; + VkResolveImageInfo2KHR resolve_info = { + .sType = VK_STRUCTURE_TYPE_RESOLVE_IMAGE_INFO_2_KHR, + .srcImage = v3dv_image_to_handle(src_image), + .srcImageLayout = VK_IMAGE_LAYOUT_GENERAL, + .dstImage = v3dv_image_to_handle(dst_image), + .dstImageLayout = VK_IMAGE_LAYOUT_GENERAL, + .regionCount = 1, + .pRegions = ®ion, + }; + v3dv_CmdResolveImage2KHR(cmd_buffer_handle, &resolve_info); } cmd_buffer->state.framebuffer = restore_fb; @@ -1116,7 +1115,7 @@ cmd_buffer_begin_render_pass_secondary( return VK_SUCCESS; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo *pBeginInfo) { @@ -1149,7 +1148,7 @@ v3dv_BeginCommandBuffer(VkCommandBuffer commandBuffer, return VK_SUCCESS; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_ResetCommandBuffer(VkCommandBuffer commandBuffer, VkCommandBufferResetFlags flags) { @@ -1157,7 +1156,7 @@ v3dv_ResetCommandBuffer(VkCommandBuffer commandBuffer, return cmd_buffer_reset(cmd_buffer, flags); } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_ResetCommandPool(VkDevice device, VkCommandPool commandPool, VkCommandPoolResetFlags flags) @@ -1176,22 +1175,6 @@ v3dv_ResetCommandPool(VkDevice device, } static void -emit_clip_window(struct v3dv_job *job, const VkRect2D *rect) -{ - assert(job); - - v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CLIP_WINDOW)); - v3dv_return_if_oom(NULL, job); - - cl_emit(&job->bcl, CLIP_WINDOW, clip) { - clip.clip_window_left_pixel_coordinate = rect->offset.x; - clip.clip_window_bottom_pixel_coordinate = rect->offset.y; - clip.clip_window_width_in_pixels = rect->extent.width; - clip.clip_window_height_in_pixels = rect->extent.height; - } -} - -static void cmd_buffer_update_tile_alignment(struct v3dv_cmd_buffer *cmd_buffer) { /* Render areas and scissor/viewport are only relevant inside render passes, @@ -1206,7 +1189,7 @@ cmd_buffer_update_tile_alignment(struct v3dv_cmd_buffer *cmd_buffer) */ assert(cmd_buffer->state.framebuffer); cmd_buffer->state.tile_aligned_render_area = - v3dv_subpass_area_is_tile_aligned(rect, + v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, rect, cmd_buffer->state.framebuffer, cmd_buffer->state.pass, cmd_buffer->state.subpass_idx); @@ -1218,42 +1201,6 @@ cmd_buffer_update_tile_alignment(struct v3dv_cmd_buffer *cmd_buffer) } } -void -v3dv_get_hw_clear_color(const VkClearColorValue *color, - uint32_t internal_type, - uint32_t internal_size, - uint32_t *hw_color) -{ - union util_color uc; - switch (internal_type) { - case V3D_INTERNAL_TYPE_8: - util_pack_color(color->float32, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); - memcpy(hw_color, uc.ui, internal_size); - break; - case V3D_INTERNAL_TYPE_8I: - case V3D_INTERNAL_TYPE_8UI: - hw_color[0] = ((color->uint32[0] & 0xff) | - (color->uint32[1] & 0xff) << 8 | - (color->uint32[2] & 0xff) << 16 | - (color->uint32[3] & 0xff) << 24); - break; - case V3D_INTERNAL_TYPE_16F: - util_pack_color(color->float32, PIPE_FORMAT_R16G16B16A16_FLOAT, &uc); - memcpy(hw_color, uc.ui, internal_size); - break; - case V3D_INTERNAL_TYPE_16I: - case V3D_INTERNAL_TYPE_16UI: - hw_color[0] = ((color->uint32[0] & 0xffff) | color->uint32[1] << 16); - hw_color[1] = ((color->uint32[2] & 0xffff) | color->uint32[3] << 16); - break; - case V3D_INTERNAL_TYPE_32F: - case V3D_INTERNAL_TYPE_32I: - case V3D_INTERNAL_TYPE_32UI: - memcpy(hw_color, color->uint32, internal_size); - break; - } -} - static void cmd_buffer_state_set_attachment_clear_color(struct v3dv_cmd_buffer *cmd_buffer, uint32_t attachment_idx, @@ -1265,18 +1212,19 @@ cmd_buffer_state_set_attachment_clear_color(struct v3dv_cmd_buffer *cmd_buffer, &cmd_buffer->state.pass->attachments[attachment_idx]; uint32_t internal_type, internal_bpp; - const struct v3dv_format *format = v3dv_get_format(attachment->desc.format); - v3dv_get_internal_type_bpp_for_output_format(format->rt_type, - &internal_type, - &internal_bpp); + const struct v3dv_format *format = + v3dv_X(cmd_buffer->device, get_format)(attachment->desc.format); + + v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_output_format) + (format->rt_type, &internal_type, &internal_bpp); uint32_t internal_size = 4 << internal_bpp; struct v3dv_cmd_buffer_attachment_state *attachment_state = &cmd_buffer->state.attachments[attachment_idx]; - v3dv_get_hw_clear_color(color, internal_type, internal_size, - &attachment_state->clear_value.color[0]); + v3dv_X(cmd_buffer->device, get_hw_clear_color) + (color, internal_type, internal_size, &attachment_state->clear_value.color[0]); attachment_state->vk_clear_value.color = *color; } @@ -1370,7 +1318,7 @@ cmd_buffer_ensure_render_pass_attachment_state(struct v3dv_cmd_buffer *cmd_buffe assert(state->attachment_alloc_count >= pass->attachment_count); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdBeginRenderPass(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo *pRenderPassBegin, VkSubpassContents contents) @@ -1394,7 +1342,7 @@ v3dv_CmdBeginRenderPass(VkCommandBuffer commandBuffer, * to emit a new clip window to constraint it to the render area. */ uint32_t min_render_x = state->render_area.offset.x; - uint32_t min_render_y = state->render_area.offset.x; + uint32_t min_render_y = state->render_area.offset.y; uint32_t max_render_x = min_render_x + state->render_area.extent.width - 1; uint32_t max_render_y = min_render_y + state->render_area.extent.height - 1; uint32_t min_clip_x = state->clip_window.offset.x; @@ -1410,7 +1358,7 @@ v3dv_CmdBeginRenderPass(VkCommandBuffer commandBuffer, v3dv_cmd_buffer_subpass_start(cmd_buffer, 0); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents) { V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); @@ -1426,884 +1374,6 @@ v3dv_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents) v3dv_cmd_buffer_subpass_start(cmd_buffer, state->subpass_idx + 1); } -void -v3dv_render_pass_setup_render_target(struct v3dv_cmd_buffer *cmd_buffer, - int rt, - uint32_t *rt_bpp, - uint32_t *rt_type, - uint32_t *rt_clamp) -{ - const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; - - assert(state->subpass_idx < state->pass->subpass_count); - const struct v3dv_subpass *subpass = - &state->pass->subpasses[state->subpass_idx]; - - if (rt >= subpass->color_count) - return; - - struct v3dv_subpass_attachment *attachment = &subpass->color_attachments[rt]; - const uint32_t attachment_idx = attachment->attachment; - if (attachment_idx == VK_ATTACHMENT_UNUSED) - return; - - const struct v3dv_framebuffer *framebuffer = state->framebuffer; - assert(attachment_idx < framebuffer->attachment_count); - struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx]; - assert(iview->aspects & VK_IMAGE_ASPECT_COLOR_BIT); - - *rt_bpp = iview->internal_bpp; - *rt_type = iview->internal_type; - *rt_clamp =vk_format_is_int(iview->vk_format) ? - V3D_RENDER_TARGET_CLAMP_INT : V3D_RENDER_TARGET_CLAMP_NONE; -} - -static void -cmd_buffer_render_pass_emit_load(struct v3dv_cmd_buffer *cmd_buffer, - struct v3dv_cl *cl, - struct v3dv_image_view *iview, - uint32_t layer, - uint32_t buffer) -{ - const struct v3dv_image *image = iview->image; - const struct v3d_resource_slice *slice = &image->slices[iview->base_level]; - uint32_t layer_offset = v3dv_layer_offset(image, - iview->base_level, - iview->first_layer + layer); - - cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { - load.buffer_to_load = buffer; - load.address = v3dv_cl_address(image->mem->bo, layer_offset); - - load.input_image_format = iview->format->rt_type; - load.r_b_swap = iview->swap_rb; - load.memory_format = slice->tiling; - - if (slice->tiling == VC5_TILING_UIF_NO_XOR || - slice->tiling == VC5_TILING_UIF_XOR) { - load.height_in_ub_or_stride = - slice->padded_height_of_output_image_in_uif_blocks; - } else if (slice->tiling == VC5_TILING_RASTER) { - load.height_in_ub_or_stride = slice->stride; - } - - if (image->samples > VK_SAMPLE_COUNT_1_BIT) - load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; - else - load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; - } -} - -static bool -check_needs_load(const struct v3dv_cmd_buffer_state *state, - VkImageAspectFlags aspect, - uint32_t att_first_subpass_idx, - VkAttachmentLoadOp load_op) -{ - /* We call this with image->aspects & aspect, so 0 means the aspect we are - * testing does not exist in the image. - */ - if (!aspect) - return false; - - /* Attachment load operations apply on the first subpass that uses the - * attachment, otherwise we always need to load. - */ - if (state->job->first_subpass > att_first_subpass_idx) - return true; - - /* If the job is continuing a subpass started in another job, we always - * need to load. - */ - if (state->job->is_subpass_continue) - return true; - - /* If the area is not aligned to tile boundaries, we always need to load */ - if (!state->tile_aligned_render_area) - return true; - - /* The attachment load operations must be LOAD */ - return load_op == VK_ATTACHMENT_LOAD_OP_LOAD; -} - -static bool -check_needs_clear(const struct v3dv_cmd_buffer_state *state, - VkImageAspectFlags aspect, - uint32_t att_first_subpass_idx, - VkAttachmentLoadOp load_op, - bool do_clear_with_draw) -{ - /* We call this with image->aspects & aspect, so 0 means the aspect we are - * testing does not exist in the image. - */ - if (!aspect) - return false; - - /* If the aspect needs to be cleared with a draw call then we won't emit - * the clear here. - */ - if (do_clear_with_draw) - return false; - - /* If this is resuming a subpass started with another job, then attachment - * load operations don't apply. - */ - if (state->job->is_subpass_continue) - return false; - - /* If the render area is not aligned to tile boudaries we can't use the - * TLB for a clear. - */ - if (!state->tile_aligned_render_area) - return false; - - /* If this job is running in a subpass other than the first subpass in - * which this attachment is used then attachment load operations don't apply. - */ - if (state->job->first_subpass != att_first_subpass_idx) - return false; - - /* The attachment load operation must be CLEAR */ - return load_op == VK_ATTACHMENT_LOAD_OP_CLEAR; -} - -static bool -check_needs_store(const struct v3dv_cmd_buffer_state *state, - VkImageAspectFlags aspect, - uint32_t att_last_subpass_idx, - VkAttachmentStoreOp store_op) -{ - /* We call this with image->aspects & aspect, so 0 means the aspect we are - * testing does not exist in the image. - */ - if (!aspect) - return false; - - /* Attachment store operations only apply on the last subpass where the - * attachment is used, in other subpasses we always need to store. - */ - if (state->subpass_idx < att_last_subpass_idx) - return true; - - /* Attachment store operations only apply on the last job we emit on the the - * last subpass where the attachment is used, otherwise we always need to - * store. - */ - if (!state->job->is_subpass_finish) - return true; - - /* The attachment store operation must be STORE */ - return store_op == VK_ATTACHMENT_STORE_OP_STORE; -} - -static void -cmd_buffer_render_pass_emit_loads(struct v3dv_cmd_buffer *cmd_buffer, - struct v3dv_cl *cl, - uint32_t layer) -{ - const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; - const struct v3dv_framebuffer *framebuffer = state->framebuffer; - const struct v3dv_render_pass *pass = state->pass; - const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx]; - - for (uint32_t i = 0; i < subpass->color_count; i++) { - uint32_t attachment_idx = subpass->color_attachments[i].attachment; - - if (attachment_idx == VK_ATTACHMENT_UNUSED) - continue; - - const struct v3dv_render_pass_attachment *attachment = - &state->pass->attachments[attachment_idx]; - - /* According to the Vulkan spec: - * - * "The load operation for each sample in an attachment happens before - * any recorded command which accesses the sample in the first subpass - * where the attachment is used." - * - * If the load operation is CLEAR, we must only clear once on the first - * subpass that uses the attachment (and in that case we don't LOAD). - * After that, we always want to load so we don't lose any rendering done - * by a previous subpass to the same attachment. We also want to load - * if the current job is continuing subpass work started by a previous - * job, for the same reason. - * - * If the render area is not aligned to tile boundaries then we have - * tiles which are partially covered by it. In this case, we need to - * load the tiles so we can preserve the pixels that are outside the - * render area for any such tiles. - */ - bool needs_load = check_needs_load(state, - VK_IMAGE_ASPECT_COLOR_BIT, - attachment->first_subpass, - attachment->desc.loadOp); - if (needs_load) { - struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx]; - cmd_buffer_render_pass_emit_load(cmd_buffer, cl, iview, - layer, RENDER_TARGET_0 + i); - } - } - - uint32_t ds_attachment_idx = subpass->ds_attachment.attachment; - if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { - const struct v3dv_render_pass_attachment *ds_attachment = - &state->pass->attachments[ds_attachment_idx]; - - const VkImageAspectFlags ds_aspects = - vk_format_aspects(ds_attachment->desc.format); - - const bool needs_depth_load = - check_needs_load(state, - ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, - ds_attachment->first_subpass, - ds_attachment->desc.loadOp); - - const bool needs_stencil_load = - check_needs_load(state, - ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT, - ds_attachment->first_subpass, - ds_attachment->desc.stencilLoadOp); - - if (needs_depth_load || needs_stencil_load) { - struct v3dv_image_view *iview = - framebuffer->attachments[ds_attachment_idx]; - /* From the Vulkan spec: - * - * "When an image view of a depth/stencil image is used as a - * depth/stencil framebuffer attachment, the aspectMask is ignored - * and both depth and stencil image subresources are used." - * - * So we ignore the aspects from the subresource range of the image - * view for the depth/stencil attachment, but we still need to restrict - * the to aspects compatible with the render pass and the image. - */ - const uint32_t zs_buffer = - v3dv_zs_buffer(needs_depth_load, needs_stencil_load); - cmd_buffer_render_pass_emit_load(cmd_buffer, cl, - iview, layer, zs_buffer); - } - } - - cl_emit(cl, END_OF_LOADS, end); -} - -static void -cmd_buffer_render_pass_emit_store(struct v3dv_cmd_buffer *cmd_buffer, - struct v3dv_cl *cl, - uint32_t attachment_idx, - uint32_t layer, - uint32_t buffer, - bool clear, - bool is_multisample_resolve) -{ - const struct v3dv_image_view *iview = - cmd_buffer->state.framebuffer->attachments[attachment_idx]; - const struct v3dv_image *image = iview->image; - const struct v3d_resource_slice *slice = &image->slices[iview->base_level]; - uint32_t layer_offset = v3dv_layer_offset(image, - iview->base_level, - iview->first_layer + layer); - - cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = buffer; - store.address = v3dv_cl_address(image->mem->bo, layer_offset); - store.clear_buffer_being_stored = clear; - - store.output_image_format = iview->format->rt_type; - store.r_b_swap = iview->swap_rb; - store.memory_format = slice->tiling; - - if (slice->tiling == VC5_TILING_UIF_NO_XOR || - slice->tiling == VC5_TILING_UIF_XOR) { - store.height_in_ub_or_stride = - slice->padded_height_of_output_image_in_uif_blocks; - } else if (slice->tiling == VC5_TILING_RASTER) { - store.height_in_ub_or_stride = slice->stride; - } - - if (image->samples > VK_SAMPLE_COUNT_1_BIT) - store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; - else if (is_multisample_resolve) - store.decimate_mode = V3D_DECIMATE_MODE_4X; - else - store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; - } -} - -static void -cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer, - struct v3dv_cl *cl, - uint32_t layer) -{ - struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; - const struct v3dv_subpass *subpass = - &state->pass->subpasses[state->subpass_idx]; - - bool has_stores = false; - bool use_global_zs_clear = false; - bool use_global_rt_clear = false; - - /* FIXME: separate stencil */ - uint32_t ds_attachment_idx = subpass->ds_attachment.attachment; - if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { - const struct v3dv_render_pass_attachment *ds_attachment = - &state->pass->attachments[ds_attachment_idx]; - - assert(state->job->first_subpass >= ds_attachment->first_subpass); - assert(state->subpass_idx >= ds_attachment->first_subpass); - assert(state->subpass_idx <= ds_attachment->last_subpass); - - /* From the Vulkan spec, VkImageSubresourceRange: - * - * "When an image view of a depth/stencil image is used as a - * depth/stencil framebuffer attachment, the aspectMask is ignored - * and both depth and stencil image subresources are used." - * - * So we ignore the aspects from the subresource range of the image - * view for the depth/stencil attachment, but we still need to restrict - * the to aspects compatible with the render pass and the image. - */ - const VkImageAspectFlags aspects = - vk_format_aspects(ds_attachment->desc.format); - - /* Only clear once on the first subpass that uses the attachment */ - bool needs_depth_clear = - check_needs_clear(state, - aspects & VK_IMAGE_ASPECT_DEPTH_BIT, - ds_attachment->first_subpass, - ds_attachment->desc.loadOp, - subpass->do_depth_clear_with_draw); - - bool needs_stencil_clear = - check_needs_clear(state, - aspects & VK_IMAGE_ASPECT_STENCIL_BIT, - ds_attachment->first_subpass, - ds_attachment->desc.stencilLoadOp, - subpass->do_stencil_clear_with_draw); - - /* Skip the last store if it is not required */ - bool needs_depth_store = - check_needs_store(state, - aspects & VK_IMAGE_ASPECT_DEPTH_BIT, - ds_attachment->last_subpass, - ds_attachment->desc.storeOp); - - bool needs_stencil_store = - check_needs_store(state, - aspects & VK_IMAGE_ASPECT_STENCIL_BIT, - ds_attachment->last_subpass, - ds_attachment->desc.stencilStoreOp); - - /* GFXH-1689: The per-buffer store command's clear buffer bit is broken - * for depth/stencil. - * - * There used to be some confusion regarding the Clear Tile Buffers - * Z/S bit also being broken, but we confirmed with Broadcom that this - * is not the case, it was just that some other hardware bugs (that we - * need to work around, such as GFXH-1461) could cause this bit to behave - * incorrectly. - * - * There used to be another issue where the RTs bit in the Clear Tile - * Buffers packet also cleared Z/S, but Broadcom confirmed this is - * fixed since V3D 4.1. - * - * So if we have to emit a clear of depth or stencil we don't use - * the per-buffer store clear bit, even if we need to store the buffers, - * instead we always have to use the Clear Tile Buffers Z/S bit. - * If we have configured the job to do early Z/S clearing, then we - * don't want to emit any Clear Tile Buffers command at all here. - * - * Note that GFXH-1689 is not reproduced in the simulator, where - * using the clear buffer bit in depth/stencil stores works fine. - */ - use_global_zs_clear = !state->job->early_zs_clear && - (needs_depth_clear || needs_stencil_clear); - if (needs_depth_store || needs_stencil_store) { - const uint32_t zs_buffer = - v3dv_zs_buffer(needs_depth_store, needs_stencil_store); - cmd_buffer_render_pass_emit_store(cmd_buffer, cl, - ds_attachment_idx, layer, - zs_buffer, false, false); - has_stores = true; - } - } - - for (uint32_t i = 0; i < subpass->color_count; i++) { - uint32_t attachment_idx = subpass->color_attachments[i].attachment; - - if (attachment_idx == VK_ATTACHMENT_UNUSED) - continue; - - const struct v3dv_render_pass_attachment *attachment = - &state->pass->attachments[attachment_idx]; - - assert(state->job->first_subpass >= attachment->first_subpass); - assert(state->subpass_idx >= attachment->first_subpass); - assert(state->subpass_idx <= attachment->last_subpass); - - /* Only clear once on the first subpass that uses the attachment */ - bool needs_clear = - check_needs_clear(state, - VK_IMAGE_ASPECT_COLOR_BIT, - attachment->first_subpass, - attachment->desc.loadOp, - false); - - /* Skip the last store if it is not required */ - bool needs_store = - check_needs_store(state, - VK_IMAGE_ASPECT_COLOR_BIT, - attachment->last_subpass, - attachment->desc.storeOp); - - /* If we need to resolve this attachment emit that store first. Notice - * that we must not request a tile buffer clear here in that case, since - * that would clear the tile buffer before we get to emit the actual - * color attachment store below, since the clear happens after the - * store is completed. - * - * If the attachment doesn't support TLB resolves then we will have to - * fallback to doing the resolve in a shader separately after this - * job, so we will need to store the multisampled sttachment even if that - * wansn't requested by the client. - */ - const bool needs_resolve = - subpass->resolve_attachments && - subpass->resolve_attachments[i].attachment != VK_ATTACHMENT_UNUSED; - if (needs_resolve && attachment->use_tlb_resolve) { - const uint32_t resolve_attachment_idx = - subpass->resolve_attachments[i].attachment; - cmd_buffer_render_pass_emit_store(cmd_buffer, cl, - resolve_attachment_idx, layer, - RENDER_TARGET_0 + i, - false, true); - has_stores = true; - } else if (needs_resolve) { - needs_store = true; - } - - /* Emit the color attachment store if needed */ - if (needs_store) { - cmd_buffer_render_pass_emit_store(cmd_buffer, cl, - attachment_idx, layer, - RENDER_TARGET_0 + i, - needs_clear && !use_global_rt_clear, - false); - has_stores = true; - } else if (needs_clear) { - use_global_rt_clear = true; - } - } - - /* We always need to emit at least one dummy store */ - if (!has_stores) { - cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = NONE; - } - } - - /* If we have any depth/stencil clears we can't use the per-buffer clear - * bit and instead we have to emit a single clear of all tile buffers. - */ - if (use_global_zs_clear || use_global_rt_clear) { - cl_emit(cl, CLEAR_TILE_BUFFERS, clear) { - clear.clear_z_stencil_buffer = use_global_zs_clear; - clear.clear_all_render_targets = use_global_rt_clear; - } - } -} - -static void -cmd_buffer_render_pass_emit_per_tile_rcl(struct v3dv_cmd_buffer *cmd_buffer, - uint32_t layer) -{ - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); - - /* Emit the generic list in our indirect state -- the rcl will just - * have pointers into it. - */ - struct v3dv_cl *cl = &job->indirect; - v3dv_cl_ensure_space(cl, 200, 1); - v3dv_return_if_oom(cmd_buffer, NULL); - - struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); - - cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); - - cmd_buffer_render_pass_emit_loads(cmd_buffer, cl, layer); - - /* The binner starts out writing tiles assuming that the initial mode - * is triangles, so make sure that's the case. - */ - cl_emit(cl, PRIM_LIST_FORMAT, fmt) { - fmt.primitive_type = LIST_TRIANGLES; - } - - /* PTB assumes that value to be 0, but hw will not set it. */ - cl_emit(cl, SET_INSTANCEID, set) { - set.instance_id = 0; - } - - cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); - - cmd_buffer_render_pass_emit_stores(cmd_buffer, cl, layer); - - cl_emit(cl, END_OF_TILE_MARKER, end); - - cl_emit(cl, RETURN_FROM_SUB_LIST, ret); - - cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { - branch.start = tile_list_start; - branch.end = v3dv_cl_get_address(cl); - } -} - -static void -cmd_buffer_emit_render_pass_layer_rcl(struct v3dv_cmd_buffer *cmd_buffer, - uint32_t layer) -{ - const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; - - struct v3dv_job *job = cmd_buffer->state.job; - struct v3dv_cl *rcl = &job->rcl; - - /* If doing multicore binning, we would need to initialize each - * core's tile list here. - */ - const struct v3dv_frame_tiling *tiling = &job->frame_tiling; - const uint32_t tile_alloc_offset = - 64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y; - cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) { - list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset); - } - - cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) { - config.number_of_bin_tile_lists = 1; - config.total_frame_width_in_tiles = tiling->draw_tiles_x; - config.total_frame_height_in_tiles = tiling->draw_tiles_y; - - config.supertile_width_in_tiles = tiling->supertile_width; - config.supertile_height_in_tiles = tiling->supertile_height; - - config.total_frame_width_in_supertiles = - tiling->frame_width_in_supertiles; - config.total_frame_height_in_supertiles = - tiling->frame_height_in_supertiles; - } - - /* Start by clearing the tile buffer. */ - cl_emit(rcl, TILE_COORDINATES, coords) { - coords.tile_column_number = 0; - coords.tile_row_number = 0; - } - - /* Emit an initial clear of the tile buffers. This is necessary - * for any buffers that should be cleared (since clearing - * normally happens at the *end* of the generic tile list), but - * it's also nice to clear everything so the first tile doesn't - * inherit any contents from some previous frame. - * - * Also, implement the GFXH-1742 workaround. There's a race in - * the HW between the RCL updating the TLB's internal type/size - * and the spawning of the QPU instances using the TLB's current - * internal type/size. To make sure the QPUs get the right - * state, we need 1 dummy store in between internal type/size - * changes on V3D 3.x, and 2 dummy stores on 4.x. - */ - for (int i = 0; i < 2; i++) { - if (i > 0) - cl_emit(rcl, TILE_COORDINATES, coords); - cl_emit(rcl, END_OF_LOADS, end); - cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = NONE; - } - if (i == 0 && cmd_buffer->state.tile_aligned_render_area) { - cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) { - clear.clear_z_stencil_buffer = !job->early_zs_clear; - clear.clear_all_render_targets = true; - } - } - cl_emit(rcl, END_OF_TILE_MARKER, end); - } - - cl_emit(rcl, FLUSH_VCD_CACHE, flush); - - cmd_buffer_render_pass_emit_per_tile_rcl(cmd_buffer, layer); - - uint32_t supertile_w_in_pixels = - tiling->tile_width * tiling->supertile_width; - uint32_t supertile_h_in_pixels = - tiling->tile_height * tiling->supertile_height; - const uint32_t min_x_supertile = - state->render_area.offset.x / supertile_w_in_pixels; - const uint32_t min_y_supertile = - state->render_area.offset.y / supertile_h_in_pixels; - - uint32_t max_render_x = state->render_area.offset.x; - if (state->render_area.extent.width > 0) - max_render_x += state->render_area.extent.width - 1; - uint32_t max_render_y = state->render_area.offset.y; - if (state->render_area.extent.height > 0) - max_render_y += state->render_area.extent.height - 1; - const uint32_t max_x_supertile = max_render_x / supertile_w_in_pixels; - const uint32_t max_y_supertile = max_render_y / supertile_h_in_pixels; - - for (int y = min_y_supertile; y <= max_y_supertile; y++) { - for (int x = min_x_supertile; x <= max_x_supertile; x++) { - cl_emit(rcl, SUPERTILE_COORDINATES, coords) { - coords.column_number_in_supertiles = x; - coords.row_number_in_supertiles = y; - } - } - } -} - -static void -set_rcl_early_z_config(struct v3dv_job *job, - bool *early_z_disable, - uint32_t *early_z_test_and_update_direction) -{ - /* If this is true then we have not emitted any draw calls in this job - * and we don't get any benefits form early Z. - */ - if (!job->decided_global_ez_enable) { - assert(job->draw_count == 0); - *early_z_disable = true; - return; - } - - switch (job->first_ez_state) { - case VC5_EZ_UNDECIDED: - case VC5_EZ_LT_LE: - *early_z_disable = false; - *early_z_test_and_update_direction = EARLY_Z_DIRECTION_LT_LE; - break; - case VC5_EZ_GT_GE: - *early_z_disable = false; - *early_z_test_and_update_direction = EARLY_Z_DIRECTION_GT_GE; - break; - case VC5_EZ_DISABLED: - *early_z_disable = true; - break; - } -} - -static void -cmd_buffer_emit_render_pass_rcl(struct v3dv_cmd_buffer *cmd_buffer) -{ - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); - - const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; - const struct v3dv_framebuffer *framebuffer = state->framebuffer; - - /* We can't emit the RCL until we have a framebuffer, which we may not have - * if we are recording a secondary command buffer. In that case, we will - * have to wait until vkCmdExecuteCommands is called from a primary command - * buffer. - */ - if (!framebuffer) { - assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); - return; - } - - const struct v3dv_frame_tiling *tiling = &job->frame_tiling; - - const uint32_t fb_layers = framebuffer->layers; - v3dv_cl_ensure_space_with_branch(&job->rcl, 200 + - MAX2(fb_layers, 1) * 256 * - cl_packet_length(SUPERTILE_COORDINATES)); - v3dv_return_if_oom(cmd_buffer, NULL); - - assert(state->subpass_idx < state->pass->subpass_count); - const struct v3dv_render_pass *pass = state->pass; - const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx]; - struct v3dv_cl *rcl = &job->rcl; - - /* Comon config must be the first TILE_RENDERING_MODE_CFG and - * Z_STENCIL_CLEAR_VALUES must be last. The ones in between are optional - * updates to the previous HW state. - */ - bool do_early_zs_clear = false; - const uint32_t ds_attachment_idx = subpass->ds_attachment.attachment; - cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) { - config.image_width_pixels = framebuffer->width; - config.image_height_pixels = framebuffer->height; - config.number_of_render_targets = MAX2(subpass->color_count, 1); - config.multisample_mode_4x = tiling->msaa; - config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; - - if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { - const struct v3dv_image_view *iview = - framebuffer->attachments[ds_attachment_idx]; - config.internal_depth_type = iview->internal_type; - - set_rcl_early_z_config(job, - &config.early_z_disable, - &config.early_z_test_and_update_direction); - - /* Early-Z/S clear can be enabled if the job is clearing and not - * storing (or loading) depth. If a stencil aspect is also present - * we have the same requirements for it, however, in this case we - * can accept stencil loadOp DONT_CARE as well, so instead of - * checking that stencil is cleared we check that is not loaded. - * - * Early-Z/S clearing is independent of Early Z/S testing, so it is - * possible to enable one but not the other so long as their - * respective requirements are met. - */ - struct v3dv_render_pass_attachment *ds_attachment = - &pass->attachments[ds_attachment_idx]; - - const VkImageAspectFlags ds_aspects = - vk_format_aspects(ds_attachment->desc.format); - - bool needs_depth_clear = - check_needs_clear(state, - ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, - ds_attachment->first_subpass, - ds_attachment->desc.loadOp, - subpass->do_depth_clear_with_draw); - - bool needs_depth_store = - check_needs_store(state, - ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, - ds_attachment->last_subpass, - ds_attachment->desc.storeOp); - - do_early_zs_clear = needs_depth_clear && !needs_depth_store; - if (do_early_zs_clear && - vk_format_has_stencil(ds_attachment->desc.format)) { - bool needs_stencil_load = - check_needs_load(state, - ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT, - ds_attachment->first_subpass, - ds_attachment->desc.stencilLoadOp); - - bool needs_stencil_store = - check_needs_store(state, - ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT, - ds_attachment->last_subpass, - ds_attachment->desc.stencilStoreOp); - - do_early_zs_clear = !needs_stencil_load && !needs_stencil_store; - } - - config.early_depth_stencil_clear = do_early_zs_clear; - } else { - config.early_z_disable = true; - } - } - - /* If we enabled early Z/S clear, then we can't emit any "Clear Tile Buffers" - * commands with the Z/S bit set, so keep track of whether we enabled this - * in the job so we can skip these later. - */ - job->early_zs_clear = do_early_zs_clear; - - for (uint32_t i = 0; i < subpass->color_count; i++) { - uint32_t attachment_idx = subpass->color_attachments[i].attachment; - if (attachment_idx == VK_ATTACHMENT_UNUSED) - continue; - - struct v3dv_image_view *iview = - state->framebuffer->attachments[attachment_idx]; - - const struct v3dv_image *image = iview->image; - const struct v3d_resource_slice *slice = &image->slices[iview->base_level]; - - const uint32_t *clear_color = - &state->attachments[attachment_idx].clear_value.color[0]; - - uint32_t clear_pad = 0; - if (slice->tiling == VC5_TILING_UIF_NO_XOR || - slice->tiling == VC5_TILING_UIF_XOR) { - int uif_block_height = v3d_utile_height(image->cpp) * 2; - - uint32_t implicit_padded_height = - align(framebuffer->height, uif_block_height) / uif_block_height; - - if (slice->padded_height_of_output_image_in_uif_blocks - - implicit_padded_height >= 15) { - clear_pad = slice->padded_height_of_output_image_in_uif_blocks; - } - } - - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) { - clear.clear_color_low_32_bits = clear_color[0]; - clear.clear_color_next_24_bits = clear_color[1] & 0xffffff; - clear.render_target_number = i; - }; - - if (iview->internal_bpp >= V3D_INTERNAL_BPP_64) { - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) { - clear.clear_color_mid_low_32_bits = - ((clear_color[1] >> 24) | (clear_color[2] << 8)); - clear.clear_color_mid_high_24_bits = - ((clear_color[2] >> 24) | ((clear_color[3] & 0xffff) << 8)); - clear.render_target_number = i; - }; - } - - if (iview->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) { - clear.uif_padded_height_in_uif_blocks = clear_pad; - clear.clear_color_high_16_bits = clear_color[3] >> 16; - clear.render_target_number = i; - }; - } - } - - cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { - v3dv_render_pass_setup_render_target(cmd_buffer, 0, - &rt.render_target_0_internal_bpp, - &rt.render_target_0_internal_type, - &rt.render_target_0_clamp); - v3dv_render_pass_setup_render_target(cmd_buffer, 1, - &rt.render_target_1_internal_bpp, - &rt.render_target_1_internal_type, - &rt.render_target_1_clamp); - v3dv_render_pass_setup_render_target(cmd_buffer, 2, - &rt.render_target_2_internal_bpp, - &rt.render_target_2_internal_type, - &rt.render_target_2_clamp); - v3dv_render_pass_setup_render_target(cmd_buffer, 3, - &rt.render_target_3_internal_bpp, - &rt.render_target_3_internal_type, - &rt.render_target_3_clamp); - } - - /* Ends rendering mode config. */ - if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { - cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { - clear.z_clear_value = - state->attachments[ds_attachment_idx].clear_value.z; - clear.stencil_clear_value = - state->attachments[ds_attachment_idx].clear_value.s; - }; - } else { - cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { - clear.z_clear_value = 1.0f; - clear.stencil_clear_value = 0; - }; - } - - /* Always set initial block size before the first branch, which needs - * to match the value from binning mode config. - */ - cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { - init.use_auto_chained_tile_lists = true; - init.size_of_first_block_in_chained_tile_lists = - TILE_ALLOCATION_BLOCK_SIZE_64B; - } - - for (int layer = 0; layer < MAX2(1, fb_layers); layer++) - cmd_buffer_emit_render_pass_layer_rcl(cmd_buffer, layer); - - cl_emit(rcl, END_OF_RENDERING, end); -} - static void cmd_buffer_emit_subpass_clears(struct v3dv_cmd_buffer *cmd_buffer) { @@ -2445,13 +1515,30 @@ cmd_buffer_subpass_create_job(struct v3dv_cmd_buffer *cmd_buffer, uint8_t internal_bpp; bool msaa; - v3dv_framebuffer_compute_internal_bpp_msaa(framebuffer, subpass, - &internal_bpp, &msaa); + v3dv_X(job->device, framebuffer_compute_internal_bpp_msaa) + (framebuffer, subpass, &internal_bpp, &msaa); + + /* From the Vulkan spec: + * + * "If the render pass uses multiview, then layers must be one and + * each attachment requires a number of layers that is greater than + * the maximum bit index set in the view mask in the subpasses in + * which it is used." + * + * So when multiview is enabled, we take the number of layers from the + * last bit set in the view mask. + */ + uint32_t layers = framebuffer->layers; + if (subpass->view_mask != 0) { + assert(framebuffer->layers == 1); + layers = util_last_bit(subpass->view_mask); + } v3dv_job_start_frame(job, framebuffer->width, framebuffer->height, - framebuffer->layers, + layers, + true, subpass->color_count, internal_bpp, msaa); @@ -2534,7 +1621,7 @@ v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer) job->is_subpass_finish = true; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdEndRenderPass(VkCommandBuffer commandBuffer) { V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); @@ -2553,7 +1640,7 @@ v3dv_CmdEndRenderPass(VkCommandBuffer commandBuffer) state->subpass_idx = -1; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_EndCommandBuffer(VkCommandBuffer commandBuffer) { V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); @@ -2580,44 +1667,6 @@ v3dv_EndCommandBuffer(VkCommandBuffer commandBuffer) } static void -emit_occlusion_query(struct v3dv_cmd_buffer *cmd_buffer); - -static void -ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer, - uint32_t slot_size, - uint32_t used_count, - uint32_t *alloc_count, - void **ptr); - -static void -cmd_buffer_copy_secondary_end_query_state(struct v3dv_cmd_buffer *primary, - struct v3dv_cmd_buffer *secondary) -{ - struct v3dv_cmd_buffer_state *p_state = &primary->state; - struct v3dv_cmd_buffer_state *s_state = &secondary->state; - - const uint32_t total_state_count = - p_state->query.end.used_count + s_state->query.end.used_count; - ensure_array_state(primary, - sizeof(struct v3dv_end_query_cpu_job_info), - total_state_count, - &p_state->query.end.alloc_count, - (void **) &p_state->query.end.states); - v3dv_return_if_oom(primary, NULL); - - for (uint32_t i = 0; i < s_state->query.end.used_count; i++) { - const struct v3dv_end_query_cpu_job_info *s_qstate = - &secondary->state.query.end.states[i]; - - struct v3dv_end_query_cpu_job_info *p_qstate = - &p_state->query.end.states[p_state->query.end.used_count++]; - - p_qstate->pool = s_qstate->pool; - p_qstate->query = s_qstate->query; - } -} - -static void clone_bo_list(struct v3dv_cmd_buffer *cmd_buffer, struct list_head *dst, struct list_head *src) @@ -2645,9 +1694,9 @@ clone_bo_list(struct v3dv_cmd_buffer *cmd_buffer, * for jobs recorded in secondary command buffers when we want to execute * them in primaries. */ -static struct v3dv_job * -job_clone_in_cmd_buffer(struct v3dv_job *job, - struct v3dv_cmd_buffer *cmd_buffer) +struct v3dv_job * +v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job, + struct v3dv_cmd_buffer *cmd_buffer) { struct v3dv_job *clone_job = vk_alloc(&job->device->vk.alloc, sizeof(struct v3dv_job), 8, @@ -2676,163 +1725,6 @@ job_clone_in_cmd_buffer(struct v3dv_job *job, return clone_job; } -static struct v3dv_job * -cmd_buffer_subpass_split_for_barrier(struct v3dv_cmd_buffer *cmd_buffer, - bool is_bcl_barrier) -{ - assert(cmd_buffer->state.subpass_idx != -1); - v3dv_cmd_buffer_finish_job(cmd_buffer); - struct v3dv_job *job = - v3dv_cmd_buffer_subpass_resume(cmd_buffer, - cmd_buffer->state.subpass_idx); - if (!job) - return NULL; - - job->serialize = true; - job->needs_bcl_sync = is_bcl_barrier; - return job; -} - -static void -cmd_buffer_execute_inside_pass(struct v3dv_cmd_buffer *primary, - uint32_t cmd_buffer_count, - const VkCommandBuffer *cmd_buffers) -{ - assert(primary->state.job); - - /* Emit occlusion query state if needed so the draw calls inside our - * secondaries update the counters. - */ - bool has_occlusion_query = - primary->state.dirty & V3DV_CMD_DIRTY_OCCLUSION_QUERY; - if (has_occlusion_query) - emit_occlusion_query(primary); - - /* FIXME: if our primary job tiling doesn't enable MSSA but any of the - * pipelines used by the secondaries do, we need to re-start the primary - * job to enable MSAA. See cmd_buffer_restart_job_for_msaa_if_needed. - */ - bool pending_barrier = false; - bool pending_bcl_barrier = false; - for (uint32_t i = 0; i < cmd_buffer_count; i++) { - V3DV_FROM_HANDLE(v3dv_cmd_buffer, secondary, cmd_buffers[i]); - - assert(secondary->usage_flags & - VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT); - - list_for_each_entry(struct v3dv_job, secondary_job, - &secondary->jobs, list_link) { - if (secondary_job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY) { - /* If the job is a CL, then we branch to it from the primary BCL. - * In this case the secondary's BCL is finished with a - * RETURN_FROM_SUB_LIST command to return back to the primary BCL - * once we are done executing it. - */ - assert(v3dv_cl_offset(&secondary_job->rcl) == 0); - assert(secondary_job->bcl.bo); - - /* Sanity check that secondary BCL ends with RETURN_FROM_SUB_LIST */ - STATIC_ASSERT(cl_packet_length(RETURN_FROM_SUB_LIST) == 1); - assert(v3dv_cl_offset(&secondary_job->bcl) >= 1); - assert(*(((uint8_t *)secondary_job->bcl.next) - 1) == - V3D42_RETURN_FROM_SUB_LIST_opcode); - - /* If this secondary has any barriers (or we had any pending barrier - * to apply), then we can't just branch to it from the primary, we - * need to split the primary to create a new job that can consume - * the barriers first. - * - * FIXME: in this case, maybe just copy the secondary BCL without - * the RETURN_FROM_SUB_LIST into the primary job to skip the - * branch? - */ - struct v3dv_job *primary_job = primary->state.job; - if (!primary_job || secondary_job->serialize || pending_barrier) { - const bool needs_bcl_barrier = - secondary_job->needs_bcl_sync || pending_bcl_barrier; - primary_job = - cmd_buffer_subpass_split_for_barrier(primary, - needs_bcl_barrier); - v3dv_return_if_oom(primary, NULL); - - /* Since we have created a new primary we need to re-emit - * occlusion query state. - */ - if (has_occlusion_query) - emit_occlusion_query(primary); - } - - /* Make sure our primary job has all required BO references */ - set_foreach(secondary_job->bos, entry) { - struct v3dv_bo *bo = (struct v3dv_bo *)entry->key; - v3dv_job_add_bo(primary_job, bo); - } - - /* Emit required branch instructions. We expect each of these - * to end with a corresponding 'return from sub list' item. - */ - list_for_each_entry(struct v3dv_bo, bcl_bo, - &secondary_job->bcl.bo_list, list_link) { - v3dv_cl_ensure_space_with_branch(&primary_job->bcl, - cl_packet_length(BRANCH_TO_SUB_LIST)); - v3dv_return_if_oom(primary, NULL); - cl_emit(&primary_job->bcl, BRANCH_TO_SUB_LIST, branch) { - branch.address = v3dv_cl_address(bcl_bo, 0); - } - } - - primary_job->tmu_dirty_rcl |= secondary_job->tmu_dirty_rcl; - } else if (secondary_job->type == V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS) { - if (pending_barrier) { - cmd_buffer_subpass_split_for_barrier(primary, pending_bcl_barrier); - v3dv_return_if_oom(primary, NULL); - } - - const struct v3dv_clear_attachments_cpu_job_info *info = - &secondary_job->cpu.clear_attachments; - v3dv_CmdClearAttachments(v3dv_cmd_buffer_to_handle(primary), - info->attachment_count, - info->attachments, - info->rect_count, - info->rects); - } else { - /* This is a regular job (CPU or GPU), so just finish the current - * primary job (if any) and then add the secondary job to the - * primary's job list right after it. - */ - v3dv_cmd_buffer_finish_job(primary); - job_clone_in_cmd_buffer(secondary_job, primary); - if (pending_barrier) { - secondary_job->serialize = true; - if (pending_bcl_barrier) - secondary_job->needs_bcl_sync = true; - } - } - - pending_barrier = false; - pending_bcl_barrier = false; - } - - /* If the secondary has recorded any vkCmdEndQuery commands, we need to - * copy this state to the primary so it is processed properly when the - * current primary job is finished. - */ - cmd_buffer_copy_secondary_end_query_state(primary, secondary); - - /* If this secondary had any pending barrier state we will need that - * barrier state consumed with whatever comes next in the primary. - */ - assert(secondary->state.has_barrier || !secondary->state.has_bcl_barrier); - pending_barrier = secondary->state.has_barrier; - pending_bcl_barrier = secondary->state.has_bcl_barrier; - } - - if (pending_barrier) { - primary->state.has_barrier = true; - primary->state.has_bcl_barrier |= pending_bcl_barrier; - } -} - static void cmd_buffer_execute_outside_pass(struct v3dv_cmd_buffer *primary, uint32_t cmd_buffer_count, @@ -2862,9 +1754,8 @@ cmd_buffer_execute_outside_pass(struct v3dv_cmd_buffer *primary, list_for_each_entry(struct v3dv_job, secondary_job, &secondary->jobs, list_link) { /* These can only happen inside a render pass */ - assert(secondary_job->type != V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS); assert(secondary_job->type != V3DV_JOB_TYPE_GPU_CL_SECONDARY); - struct v3dv_job *job = job_clone_in_cmd_buffer(secondary_job, primary); + struct v3dv_job *job = v3dv_job_clone_in_cmd_buffer(secondary_job, primary); if (!job) return; @@ -2892,7 +1783,7 @@ cmd_buffer_execute_outside_pass(struct v3dv_cmd_buffer *primary, } } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCommandBuffers) @@ -2900,8 +1791,8 @@ v3dv_CmdExecuteCommands(VkCommandBuffer commandBuffer, V3DV_FROM_HANDLE(v3dv_cmd_buffer, primary, commandBuffer); if (primary->state.pass != NULL) { - cmd_buffer_execute_inside_pass(primary, - commandBufferCount, pCommandBuffers); + v3dv_X(primary->device, cmd_buffer_execute_inside_pass) + (primary, commandBufferCount, pCommandBuffers); } else { cmd_buffer_execute_outside_pass(primary, commandBufferCount, pCommandBuffers); @@ -2993,131 +1884,15 @@ cmd_buffer_bind_pipeline_static_state(struct v3dv_cmd_buffer *cmd_buffer, } } - cmd_buffer->state.dynamic.mask = dynamic_mask; - cmd_buffer->state.dirty |= dirty; -} - -static void -job_update_ez_state(struct v3dv_job *job, - struct v3dv_pipeline *pipeline, - struct v3dv_cmd_buffer *cmd_buffer) -{ - /* If first_ez_state is VC5_EZ_DISABLED it means that we have already - * determined that we should disable EZ completely for all draw calls in - * this job. This will cause us to disable EZ for the entire job in the - * Tile Rendering Mode RCL packet and when we do that we need to make sure - * we never emit a draw call in the job with EZ enabled in the CFG_BITS - * packet, so ez_state must also be VC5_EZ_DISABLED; - */ - if (job->first_ez_state == VC5_EZ_DISABLED) { - assert(job->ez_state == VC5_EZ_DISABLED); - return; - } - - /* This is part of the pre draw call handling, so we should be inside a - * render pass. - */ - assert(cmd_buffer->state.pass); - - /* If this is the first time we update EZ state for this job we first check - * if there is anything that requires disabling it completely for the entire - * job (based on state that is not related to the current draw call and - * pipeline state). - */ - if (!job->decided_global_ez_enable) { - job->decided_global_ez_enable = true; - - struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; - assert(state->subpass_idx < state->pass->subpass_count); - struct v3dv_subpass *subpass = &state->pass->subpasses[state->subpass_idx]; - if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED) { - job->first_ez_state = VC5_EZ_DISABLED; - job->ez_state = VC5_EZ_DISABLED; - return; - } - - /* GFXH-1918: the early-z buffer may load incorrect depth values - * if the frame has odd width or height. - * - * So we need to disable EZ in this case. - */ - const struct v3dv_render_pass_attachment *ds_attachment = - &state->pass->attachments[subpass->ds_attachment.attachment]; - - const VkImageAspectFlags ds_aspects = - vk_format_aspects(ds_attachment->desc.format); - - bool needs_depth_load = - check_needs_load(state, - ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, - ds_attachment->first_subpass, - ds_attachment->desc.loadOp); - - if (needs_depth_load) { - struct v3dv_framebuffer *fb = state->framebuffer; - - if (!fb) { - assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); - perf_debug("Loading depth aspect in a secondary command buffer " - "without framebuffer info disables early-z tests.\n"); - job->first_ez_state = VC5_EZ_DISABLED; - job->ez_state = VC5_EZ_DISABLED; - return; - } - - if (((fb->width % 2) != 0 || (fb->height % 2) != 0)) { - perf_debug("Loading depth aspect for framebuffer with odd width " - "or height disables early-Z tests.\n"); - job->first_ez_state = VC5_EZ_DISABLED; - job->ez_state = VC5_EZ_DISABLED; - return; - } + if (!(dynamic_mask & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) { + if (dest->color_write_enable != src->color_write_enable) { + dest->color_write_enable = src->color_write_enable; + dirty |= V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE; } } - /* Otherwise, we can decide to selectively enable or disable EZ for draw - * calls using the CFG_BITS packet based on the bound pipeline state. - */ - - /* If the FS writes Z, then it may update against the chosen EZ direction */ - struct v3dv_shader_variant *fs_variant = - pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; - if (fs_variant->prog_data.fs->writes_z) { - job->ez_state = VC5_EZ_DISABLED; - return; - } - - switch (pipeline->ez_state) { - case VC5_EZ_UNDECIDED: - /* If the pipeline didn't pick a direction but didn't disable, then go - * along with the current EZ state. This allows EZ optimization for Z - * func == EQUAL or NEVER. - */ - break; - - case VC5_EZ_LT_LE: - case VC5_EZ_GT_GE: - /* If the pipeline picked a direction, then it needs to match the current - * direction if we've decided on one. - */ - if (job->ez_state == VC5_EZ_UNDECIDED) - job->ez_state = pipeline->ez_state; - else if (job->ez_state != pipeline->ez_state) - job->ez_state = VC5_EZ_DISABLED; - break; - - case VC5_EZ_DISABLED: - /* If the pipeline disables EZ because of a bad Z func or stencil - * operation, then we can't do any more EZ in this frame. - */ - job->ez_state = VC5_EZ_DISABLED; - break; - } - - if (job->first_ez_state == VC5_EZ_UNDECIDED && - job->ez_state != VC5_EZ_DISABLED) { - job->first_ez_state = job->ez_state; - } + cmd_buffer->state.dynamic.mask = dynamic_mask; + cmd_buffer->state.dirty |= dirty; } static void @@ -3128,25 +1903,6 @@ bind_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer, if (cmd_buffer->state.gfx.pipeline == pipeline) return; - /* Enable always flush if we are blending to sRGB render targets. This - * fixes test failures in: - * dEQP-VK.pipeline.blend.format.r8g8b8a8_srgb.* - * - * FIXME: not sure why we need this. The tile buffer is always linear, with - * conversion from/to sRGB happening on tile load/store operations. This - * means that when we enable flushing the only difference is that we convert - * to sRGB on the store after each draw call and we convert from sRGB on the - * load before each draw call, but the blend happens in linear format in the - * tile buffer anyway, which is the same scenario as if we didn't flush. - */ - assert(pipeline->subpass); - if (pipeline->subpass->has_srgb_rt && pipeline->blend.enables) { - assert(cmd_buffer->state.job); - cmd_buffer->state.job->always_flush = true; - perf_debug("flushing draw calls for subpass %d because bound pipeline " - "uses sRGB blending\n", cmd_buffer->state.subpass_idx); - } - cmd_buffer->state.gfx.pipeline = pipeline; cmd_buffer_bind_pipeline_static_state(cmd_buffer, &pipeline->dynamic_state); @@ -3167,7 +1923,7 @@ bind_compute_pipeline(struct v3dv_cmd_buffer *cmd_buffer, cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_COMPUTE_PIPELINE; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline _pipeline) @@ -3223,7 +1979,7 @@ v3dv_viewport_compute_xform(const VkViewport *viewport, scale[2] = min_abs_scale * (scale[2] < 0 ? -1.0f : 1.0f); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount, @@ -3256,7 +2012,7 @@ v3dv_CmdSetViewport(VkCommandBuffer commandBuffer, cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VIEWPORT; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount, @@ -3360,379 +2116,13 @@ emit_scissor(struct v3dv_cmd_buffer *cmd_buffer) cmd_buffer->state.clip_window.extent.width = maxx - minx; cmd_buffer->state.clip_window.extent.height = maxy - miny; - emit_clip_window(cmd_buffer->state.job, &cmd_buffer->state.clip_window); + v3dv_X(cmd_buffer->device, job_emit_clip_window) + (cmd_buffer->state.job, &cmd_buffer->state.clip_window); cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_SCISSOR; } static void -emit_viewport(struct v3dv_cmd_buffer *cmd_buffer) -{ - struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; - /* FIXME: right now we only support one viewport. viewporst[0] would work - * now, would need to change if we allow multiple viewports - */ - float *vptranslate = dynamic->viewport.translate[0]; - float *vpscale = dynamic->viewport.scale[0]; - - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); - - const uint32_t required_cl_size = - cl_packet_length(CLIPPER_XY_SCALING) + - cl_packet_length(CLIPPER_Z_SCALE_AND_OFFSET) + - cl_packet_length(CLIPPER_Z_MIN_MAX_CLIPPING_PLANES) + - cl_packet_length(VIEWPORT_OFFSET); - v3dv_cl_ensure_space_with_branch(&job->bcl, required_cl_size); - v3dv_return_if_oom(cmd_buffer, NULL); - - cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) { - clip.viewport_half_width_in_1_256th_of_pixel = vpscale[0] * 256.0f; - clip.viewport_half_height_in_1_256th_of_pixel = vpscale[1] * 256.0f; - } - - cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) { - clip.viewport_z_offset_zc_to_zs = vptranslate[2]; - clip.viewport_z_scale_zc_to_zs = vpscale[2]; - } - cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) { - /* Vulkan's Z NDC is [0..1], unlile OpenGL which is [-1, 1] */ - float z1 = vptranslate[2]; - float z2 = vptranslate[2] + vpscale[2]; - clip.minimum_zw = MIN2(z1, z2); - clip.maximum_zw = MAX2(z1, z2); - } - - cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) { - vp.viewport_centre_x_coordinate = vptranslate[0]; - vp.viewport_centre_y_coordinate = vptranslate[1]; - } - - cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_VIEWPORT; -} - -static void -emit_stencil(struct v3dv_cmd_buffer *cmd_buffer) -{ - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); - - struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - struct v3dv_dynamic_state *dynamic_state = &cmd_buffer->state.dynamic; - - const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK | - V3DV_DYNAMIC_STENCIL_WRITE_MASK | - V3DV_DYNAMIC_STENCIL_REFERENCE; - - v3dv_cl_ensure_space_with_branch(&job->bcl, - 2 * cl_packet_length(STENCIL_CFG)); - v3dv_return_if_oom(cmd_buffer, NULL); - - bool emitted_stencil = false; - for (uint32_t i = 0; i < 2; i++) { - if (pipeline->emit_stencil_cfg[i]) { - if (dynamic_state->mask & dynamic_stencil_states) { - cl_emit_with_prepacked(&job->bcl, STENCIL_CFG, - pipeline->stencil_cfg[i], config) { - if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK) { - config.stencil_test_mask = - i == 0 ? dynamic_state->stencil_compare_mask.front : - dynamic_state->stencil_compare_mask.back; - } - if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK) { - config.stencil_write_mask = - i == 0 ? dynamic_state->stencil_write_mask.front : - dynamic_state->stencil_write_mask.back; - } - if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_REFERENCE) { - config.stencil_ref_value = - i == 0 ? dynamic_state->stencil_reference.front : - dynamic_state->stencil_reference.back; - } - } - } else { - cl_emit_prepacked(&job->bcl, &pipeline->stencil_cfg[i]); - } - - emitted_stencil = true; - } - } - - if (emitted_stencil) { - const uint32_t dynamic_stencil_dirty_flags = - V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK | - V3DV_CMD_DIRTY_STENCIL_WRITE_MASK | - V3DV_CMD_DIRTY_STENCIL_REFERENCE; - cmd_buffer->state.dirty &= ~dynamic_stencil_dirty_flags; - } -} - -static void -emit_depth_bias(struct v3dv_cmd_buffer *cmd_buffer) -{ - struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - assert(pipeline); - - if (!pipeline->depth_bias.enabled) - return; - - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); - - v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(DEPTH_OFFSET)); - v3dv_return_if_oom(cmd_buffer, NULL); - - struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; - cl_emit(&job->bcl, DEPTH_OFFSET, bias) { - bias.depth_offset_factor = dynamic->depth_bias.slope_factor; - bias.depth_offset_units = dynamic->depth_bias.constant_factor; - if (pipeline->depth_bias.is_z16) - bias.depth_offset_units *= 256.0f; - bias.limit = dynamic->depth_bias.depth_bias_clamp; - } - - cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_DEPTH_BIAS; -} - -static void -emit_line_width(struct v3dv_cmd_buffer *cmd_buffer) -{ - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); - - v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(LINE_WIDTH)); - v3dv_return_if_oom(cmd_buffer, NULL); - - cl_emit(&job->bcl, LINE_WIDTH, line) { - line.line_width = cmd_buffer->state.dynamic.line_width; - } - - cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_LINE_WIDTH; -} - -static void -emit_sample_state(struct v3dv_cmd_buffer *cmd_buffer) -{ - struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - assert(pipeline); - - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); - - v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(SAMPLE_STATE)); - v3dv_return_if_oom(cmd_buffer, NULL); - - cl_emit(&job->bcl, SAMPLE_STATE, state) { - state.coverage = 1.0f; - state.mask = pipeline->sample_mask; - } -} - -static void -emit_blend(struct v3dv_cmd_buffer *cmd_buffer) -{ - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); - - struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - assert(pipeline); - - const uint32_t blend_packets_size = - cl_packet_length(BLEND_ENABLES) + - cl_packet_length(BLEND_CONSTANT_COLOR) + - cl_packet_length(BLEND_CFG) * V3D_MAX_DRAW_BUFFERS + - cl_packet_length(COLOR_WRITE_MASKS); - - v3dv_cl_ensure_space_with_branch(&job->bcl, blend_packets_size); - v3dv_return_if_oom(cmd_buffer, NULL); - - if (cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PIPELINE) { - if (pipeline->blend.enables) { - cl_emit(&job->bcl, BLEND_ENABLES, enables) { - enables.mask = pipeline->blend.enables; - } - } - - for (uint32_t i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) { - if (pipeline->blend.enables & (1 << i)) - cl_emit_prepacked(&job->bcl, &pipeline->blend.cfg[i]); - } - - cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) { - mask.mask = pipeline->blend.color_write_masks; - } - } - - if (pipeline->blend.needs_color_constants && - cmd_buffer->state.dirty & V3DV_CMD_DIRTY_BLEND_CONSTANTS) { - struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; - cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) { - color.red_f16 = _mesa_float_to_half(dynamic->blend_constants[0]); - color.green_f16 = _mesa_float_to_half(dynamic->blend_constants[1]); - color.blue_f16 = _mesa_float_to_half(dynamic->blend_constants[2]); - color.alpha_f16 = _mesa_float_to_half(dynamic->blend_constants[3]); - } - cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_BLEND_CONSTANTS; - } -} - -static void -emit_flat_shade_flags(struct v3dv_job *job, - int varying_offset, - uint32_t varyings, - enum V3DX(Varying_Flags_Action) lower, - enum V3DX(Varying_Flags_Action) higher) -{ - v3dv_cl_ensure_space_with_branch(&job->bcl, - cl_packet_length(FLAT_SHADE_FLAGS)); - v3dv_return_if_oom(NULL, job); - - cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) { - flags.varying_offset_v0 = varying_offset; - flags.flat_shade_flags_for_varyings_v024 = varyings; - flags.action_for_flat_shade_flags_of_lower_numbered_varyings = lower; - flags.action_for_flat_shade_flags_of_higher_numbered_varyings = higher; - } -} - -static void -emit_noperspective_flags(struct v3dv_job *job, - int varying_offset, - uint32_t varyings, - enum V3DX(Varying_Flags_Action) lower, - enum V3DX(Varying_Flags_Action) higher) -{ - v3dv_cl_ensure_space_with_branch(&job->bcl, - cl_packet_length(NON_PERSPECTIVE_FLAGS)); - v3dv_return_if_oom(NULL, job); - - cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) { - flags.varying_offset_v0 = varying_offset; - flags.non_perspective_flags_for_varyings_v024 = varyings; - flags.action_for_non_perspective_flags_of_lower_numbered_varyings = lower; - flags.action_for_non_perspective_flags_of_higher_numbered_varyings = higher; - } -} - -static void -emit_centroid_flags(struct v3dv_job *job, - int varying_offset, - uint32_t varyings, - enum V3DX(Varying_Flags_Action) lower, - enum V3DX(Varying_Flags_Action) higher) -{ - v3dv_cl_ensure_space_with_branch(&job->bcl, - cl_packet_length(CENTROID_FLAGS)); - v3dv_return_if_oom(NULL, job); - - cl_emit(&job->bcl, CENTROID_FLAGS, flags) { - flags.varying_offset_v0 = varying_offset; - flags.centroid_flags_for_varyings_v024 = varyings; - flags.action_for_centroid_flags_of_lower_numbered_varyings = lower; - flags.action_for_centroid_flags_of_higher_numbered_varyings = higher; - } -} - -static bool -emit_varying_flags(struct v3dv_job *job, - uint32_t num_flags, - const uint32_t *flags, - void (*flag_emit_callback)(struct v3dv_job *job, - int varying_offset, - uint32_t flags, - enum V3DX(Varying_Flags_Action) lower, - enum V3DX(Varying_Flags_Action) higher)) -{ - bool emitted_any = false; - for (int i = 0; i < num_flags; i++) { - if (!flags[i]) - continue; - - if (emitted_any) { - flag_emit_callback(job, i, flags[i], - V3D_VARYING_FLAGS_ACTION_UNCHANGED, - V3D_VARYING_FLAGS_ACTION_UNCHANGED); - } else if (i == 0) { - flag_emit_callback(job, i, flags[i], - V3D_VARYING_FLAGS_ACTION_UNCHANGED, - V3D_VARYING_FLAGS_ACTION_ZEROED); - } else { - flag_emit_callback(job, i, flags[i], - V3D_VARYING_FLAGS_ACTION_ZEROED, - V3D_VARYING_FLAGS_ACTION_ZEROED); - } - - emitted_any = true; - } - - return emitted_any; -} - -static void -emit_varyings_state(struct v3dv_cmd_buffer *cmd_buffer) -{ - struct v3dv_job *job = cmd_buffer->state.job; - struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - - struct v3d_fs_prog_data *prog_data_fs = - pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs; - - const uint32_t num_flags = - ARRAY_SIZE(prog_data_fs->flat_shade_flags); - const uint32_t *flat_shade_flags = prog_data_fs->flat_shade_flags; - const uint32_t *noperspective_flags = prog_data_fs->noperspective_flags; - const uint32_t *centroid_flags = prog_data_fs->centroid_flags; - - if (!emit_varying_flags(job, num_flags, flat_shade_flags, - emit_flat_shade_flags)) { - v3dv_cl_ensure_space_with_branch( - &job->bcl, cl_packet_length(ZERO_ALL_FLAT_SHADE_FLAGS)); - v3dv_return_if_oom(cmd_buffer, NULL); - - cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags); - } - - if (!emit_varying_flags(job, num_flags, noperspective_flags, - emit_noperspective_flags)) { - v3dv_cl_ensure_space_with_branch( - &job->bcl, cl_packet_length(ZERO_ALL_NON_PERSPECTIVE_FLAGS)); - v3dv_return_if_oom(cmd_buffer, NULL); - - cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags); - } - - if (!emit_varying_flags(job, num_flags, centroid_flags, - emit_centroid_flags)) { - v3dv_cl_ensure_space_with_branch( - &job->bcl, cl_packet_length(ZERO_ALL_CENTROID_FLAGS)); - v3dv_return_if_oom(cmd_buffer, NULL); - - cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags); - } -} - -static void -emit_configuration_bits(struct v3dv_cmd_buffer *cmd_buffer) -{ - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); - - struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - assert(pipeline); - - job_update_ez_state(job, pipeline, cmd_buffer); - - v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CFG_BITS)); - v3dv_return_if_oom(cmd_buffer, NULL); - - cl_emit_with_prepacked(&job->bcl, CFG_BITS, pipeline->cfg_bits, config) { - config.early_z_enable = job->ez_state != VC5_EZ_DISABLED; - config.early_z_updates_enable = config.early_z_enable && - pipeline->z_updates_enable; - } -} - -static void update_gfx_uniform_state(struct v3dv_cmd_buffer *cmd_buffer, uint32_t dirty_uniform_state) { @@ -3746,13 +2136,26 @@ update_gfx_uniform_state(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; assert(pipeline); - const bool dirty_descriptors_only = - (cmd_buffer->state.dirty & dirty_uniform_state) == - V3DV_CMD_DIRTY_DESCRIPTOR_SETS; + const bool has_new_pipeline = dirty_uniform_state & V3DV_CMD_DIRTY_PIPELINE; + const bool has_new_viewport = dirty_uniform_state & V3DV_CMD_DIRTY_VIEWPORT; + const bool has_new_push_constants = dirty_uniform_state & V3DV_CMD_DIRTY_PUSH_CONSTANTS; + const bool has_new_descriptors = dirty_uniform_state & V3DV_CMD_DIRTY_DESCRIPTOR_SETS; + const bool has_new_view_index = dirty_uniform_state & V3DV_CMD_DIRTY_VIEW_INDEX; + + /* VK_SHADER_STAGE_FRAGMENT_BIT */ + const bool has_new_descriptors_fs = + has_new_descriptors && + (cmd_buffer->state.dirty_descriptor_stages & VK_SHADER_STAGE_FRAGMENT_BIT); - const bool needs_fs_update = - !dirty_descriptors_only || - (pipeline->layout->shader_stages & VK_SHADER_STAGE_FRAGMENT_BIT); + const bool has_new_push_constants_fs = + has_new_push_constants && + (cmd_buffer->state.dirty_push_constants_stages & VK_SHADER_STAGE_FRAGMENT_BIT); + + const bool needs_fs_update = has_new_pipeline || + has_new_view_index || + has_new_push_constants_fs || + has_new_descriptors_fs || + has_new_view_index; if (needs_fs_update) { struct v3dv_shader_variant *fs_variant = @@ -3762,221 +2165,69 @@ update_gfx_uniform_state(struct v3dv_cmd_buffer *cmd_buffer, v3dv_write_uniforms(cmd_buffer, pipeline, fs_variant); } - const bool needs_vs_update = - !dirty_descriptors_only || - (pipeline->layout->shader_stages & VK_SHADER_STAGE_VERTEX_BIT); - - if (needs_vs_update) { - struct v3dv_shader_variant *vs_variant = - pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; - - struct v3dv_shader_variant *vs_bin_variant = - pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]; - - cmd_buffer->state.uniforms.vs = - v3dv_write_uniforms(cmd_buffer, pipeline, vs_variant); - - cmd_buffer->state.uniforms.vs_bin = - v3dv_write_uniforms(cmd_buffer, pipeline, vs_bin_variant); - } -} - -static void -emit_gl_shader_state(struct v3dv_cmd_buffer *cmd_buffer) -{ - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); - - struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; - struct v3dv_pipeline *pipeline = state->gfx.pipeline; - assert(pipeline); - - struct v3d_vs_prog_data *prog_data_vs = - pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs; - struct v3d_vs_prog_data *prog_data_vs_bin = - pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs; - struct v3d_fs_prog_data *prog_data_fs = - pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs; - - /* Update the cache dirty flag based on the shader progs data */ - job->tmu_dirty_rcl |= prog_data_vs_bin->base.tmu_dirty_rcl; - job->tmu_dirty_rcl |= prog_data_vs->base.tmu_dirty_rcl; - job->tmu_dirty_rcl |= prog_data_fs->base.tmu_dirty_rcl; - - /* See GFXH-930 workaround below */ - uint32_t num_elements_to_emit = MAX2(pipeline->va_count, 1); - - uint32_t shader_rec_offset = - v3dv_cl_ensure_space(&job->indirect, - cl_packet_length(GL_SHADER_STATE_RECORD) + - num_elements_to_emit * - cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD), - 32); - v3dv_return_if_oom(cmd_buffer, NULL); - - struct v3dv_shader_variant *vs_variant = - pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; - struct v3dv_shader_variant *vs_bin_variant = - pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]; - struct v3dv_shader_variant *fs_variant = - pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; - struct v3dv_bo *assembly_bo = pipeline->shared_data->assembly_bo; - - struct v3dv_bo *default_attribute_values = - pipeline->default_attribute_values != NULL ? - pipeline->default_attribute_values : - pipeline->device->default_attribute_float; - - cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_RECORD, - pipeline->shader_state_record, shader) { - - /* FIXME: we are setting this values here and during the - * prepacking. This is because both cl_emit_with_prepacked and v3dv_pack - * asserts for minimum values of these. It would be good to get - * v3dv_pack to assert on the final value if possible - */ - shader.min_coord_shader_input_segments_required_in_play = - pipeline->vpm_cfg_bin.As; - shader.min_vertex_shader_input_segments_required_in_play = - pipeline->vpm_cfg.As; - - shader.coordinate_shader_code_address = - v3dv_cl_address(assembly_bo, vs_bin_variant->assembly_offset); - shader.vertex_shader_code_address = - v3dv_cl_address(assembly_bo, vs_variant->assembly_offset); - shader.fragment_shader_code_address = - v3dv_cl_address(assembly_bo, fs_variant->assembly_offset); - - shader.coordinate_shader_uniforms_address = cmd_buffer->state.uniforms.vs_bin; - shader.vertex_shader_uniforms_address = cmd_buffer->state.uniforms.vs; - shader.fragment_shader_uniforms_address = cmd_buffer->state.uniforms.fs; - - shader.address_of_default_attribute_values = - v3dv_cl_address(default_attribute_values, 0); - } - - /* Upload vertex element attributes (SHADER_STATE_ATTRIBUTE_RECORD) */ - bool cs_loaded_any = false; - const bool cs_uses_builtins = prog_data_vs_bin->uses_iid || - prog_data_vs_bin->uses_biid || - prog_data_vs_bin->uses_vid; - const uint32_t packet_length = - cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD); - - uint32_t emitted_va_count = 0; - for (uint32_t i = 0; emitted_va_count < pipeline->va_count; i++) { - assert(i < MAX_VERTEX_ATTRIBS); - - if (pipeline->va[i].vk_format == VK_FORMAT_UNDEFINED) - continue; - - const uint32_t binding = pipeline->va[i].binding; + /* VK_SHADER_STAGE_GEOMETRY_BIT */ + if (pipeline->has_gs) { + const bool has_new_descriptors_gs = + has_new_descriptors && + (cmd_buffer->state.dirty_descriptor_stages & + VK_SHADER_STAGE_GEOMETRY_BIT); - /* We store each vertex attribute in the array using its driver location - * as index. - */ - const uint32_t location = i; - - struct v3dv_vertex_binding *c_vb = &cmd_buffer->state.vertex_bindings[binding]; - - cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, - &pipeline->vertex_attrs[i * packet_length], attr) { - - assert(c_vb->buffer->mem->bo); - attr.address = v3dv_cl_address(c_vb->buffer->mem->bo, - c_vb->buffer->mem_offset + - pipeline->va[i].offset + - c_vb->offset); - - attr.number_of_values_read_by_coordinate_shader = - prog_data_vs_bin->vattr_sizes[location]; - attr.number_of_values_read_by_vertex_shader = - prog_data_vs->vattr_sizes[location]; - - /* GFXH-930: At least one attribute must be enabled and read by CS - * and VS. If we have attributes being consumed by the VS but not - * the CS, then set up a dummy load of the last attribute into the - * CS's VPM inputs. (Since CS is just dead-code-elimination compared - * to VS, we can't have CS loading but not VS). - * - * GFXH-1602: first attribute must be active if using builtins. - */ - if (prog_data_vs_bin->vattr_sizes[location]) - cs_loaded_any = true; - - if (i == 0 && cs_uses_builtins && !cs_loaded_any) { - attr.number_of_values_read_by_coordinate_shader = 1; - cs_loaded_any = true; - } else if (i == pipeline->va_count - 1 && !cs_loaded_any) { - attr.number_of_values_read_by_coordinate_shader = 1; - cs_loaded_any = true; - } + const bool has_new_push_constants_gs = + has_new_push_constants && + (cmd_buffer->state.dirty_push_constants_stages & + VK_SHADER_STAGE_GEOMETRY_BIT); - attr.maximum_index = 0xffffff; - } + const bool needs_gs_update = has_new_viewport || + has_new_view_index || + has_new_pipeline || + has_new_push_constants_gs || + has_new_descriptors_gs; - emitted_va_count++; - } + if (needs_gs_update) { + struct v3dv_shader_variant *gs_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]; - if (pipeline->va_count == 0) { - /* GFXH-930: At least one attribute must be enabled and read - * by CS and VS. If we have no attributes being consumed by - * the shader, set up a dummy to be loaded into the VPM. - */ - cl_emit(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) { - /* Valid address of data whose value will be unused. */ - attr.address = v3dv_cl_address(job->indirect.bo, 0); + struct v3dv_shader_variant *gs_bin_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]; - attr.type = ATTRIBUTE_FLOAT; - attr.stride = 0; - attr.vec_size = 1; + cmd_buffer->state.uniforms.gs = + v3dv_write_uniforms(cmd_buffer, pipeline, gs_variant); - attr.number_of_values_read_by_coordinate_shader = 1; - attr.number_of_values_read_by_vertex_shader = 1; + cmd_buffer->state.uniforms.gs_bin = + v3dv_write_uniforms(cmd_buffer, pipeline, gs_bin_variant); } } - if (cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PIPELINE) { - v3dv_cl_ensure_space_with_branch(&job->bcl, - sizeof(pipeline->vcm_cache_size)); - v3dv_return_if_oom(cmd_buffer, NULL); + /* VK_SHADER_STAGE_VERTEX_BIT */ + const bool has_new_descriptors_vs = + has_new_descriptors && + (cmd_buffer->state.dirty_descriptor_stages & VK_SHADER_STAGE_VERTEX_BIT); - cl_emit_prepacked(&job->bcl, &pipeline->vcm_cache_size); - } + const bool has_new_push_constants_vs = + has_new_push_constants && + (cmd_buffer->state.dirty_push_constants_stages & VK_SHADER_STAGE_VERTEX_BIT); - v3dv_cl_ensure_space_with_branch(&job->bcl, - cl_packet_length(GL_SHADER_STATE)); - v3dv_return_if_oom(cmd_buffer, NULL); + const bool needs_vs_update = has_new_viewport || + has_new_view_index || + has_new_pipeline || + has_new_push_constants_vs || + has_new_descriptors_vs; - cl_emit(&job->bcl, GL_SHADER_STATE, state) { - state.address = v3dv_cl_address(job->indirect.bo, - shader_rec_offset); - state.number_of_attribute_arrays = num_elements_to_emit; - } - - cmd_buffer->state.dirty &= ~(V3DV_CMD_DIRTY_VERTEX_BUFFER | - V3DV_CMD_DIRTY_DESCRIPTOR_SETS | - V3DV_CMD_DIRTY_PUSH_CONSTANTS); -} + if (needs_vs_update) { + struct v3dv_shader_variant *vs_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; -static void -emit_occlusion_query(struct v3dv_cmd_buffer *cmd_buffer) -{ - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); + struct v3dv_shader_variant *vs_bin_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]; - v3dv_cl_ensure_space_with_branch(&job->bcl, - cl_packet_length(OCCLUSION_QUERY_COUNTER)); - v3dv_return_if_oom(cmd_buffer, NULL); + cmd_buffer->state.uniforms.vs = + v3dv_write_uniforms(cmd_buffer, pipeline, vs_variant); - cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) { - if (cmd_buffer->state.query.active_query) { - counter.address = - v3dv_cl_address(cmd_buffer->state.query.active_query, 0); - } + cmd_buffer->state.uniforms.vs_bin = + v3dv_write_uniforms(cmd_buffer, pipeline, vs_bin_variant); } - cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_OCCLUSION_QUERY; + cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_VIEW_INDEX; } /* This stores command buffer state that we might be about to stomp for @@ -4115,86 +2366,6 @@ v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer, state->meta.has_descriptor_state = false; } -/* FIXME: C&P from v3dx_draw. Refactor to common place? */ -static uint32_t -v3d_hw_prim_type(enum pipe_prim_type prim_type) -{ - switch (prim_type) { - case PIPE_PRIM_POINTS: - case PIPE_PRIM_LINES: - case PIPE_PRIM_LINE_LOOP: - case PIPE_PRIM_LINE_STRIP: - case PIPE_PRIM_TRIANGLES: - case PIPE_PRIM_TRIANGLE_STRIP: - case PIPE_PRIM_TRIANGLE_FAN: - return prim_type; - - case PIPE_PRIM_LINES_ADJACENCY: - case PIPE_PRIM_LINE_STRIP_ADJACENCY: - case PIPE_PRIM_TRIANGLES_ADJACENCY: - case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: - return 8 + (prim_type - PIPE_PRIM_LINES_ADJACENCY); - - default: - unreachable("Unsupported primitive type"); - } -} - -struct v3dv_draw_info { - uint32_t vertex_count; - uint32_t instance_count; - uint32_t first_vertex; - uint32_t first_instance; -}; - -static void -cmd_buffer_emit_draw(struct v3dv_cmd_buffer *cmd_buffer, - struct v3dv_draw_info *info) -{ - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); - - struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; - struct v3dv_pipeline *pipeline = state->gfx.pipeline; - - assert(pipeline); - - uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology); - - if (info->first_instance > 0) { - v3dv_cl_ensure_space_with_branch( - &job->bcl, cl_packet_length(BASE_VERTEX_BASE_INSTANCE)); - v3dv_return_if_oom(cmd_buffer, NULL); - - cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) { - base.base_instance = info->first_instance; - base.base_vertex = 0; - } - } - - if (info->instance_count > 1) { - v3dv_cl_ensure_space_with_branch( - &job->bcl, cl_packet_length(VERTEX_ARRAY_INSTANCED_PRIMS)); - v3dv_return_if_oom(cmd_buffer, NULL); - - cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMS, prim) { - prim.mode = hw_prim_type; - prim.index_of_first_vertex = info->first_vertex; - prim.number_of_instances = info->instance_count; - prim.instance_length = info->vertex_count; - } - } else { - v3dv_cl_ensure_space_with_branch( - &job->bcl, cl_packet_length(VERTEX_ARRAY_PRIMS)); - v3dv_return_if_oom(cmd_buffer, NULL); - cl_emit(&job->bcl, VERTEX_ARRAY_PRIMS, prim) { - prim.mode = hw_prim_type; - prim.length = info->vertex_count; - prim.index_of_first_vertex = info->first_vertex; - } - } -} - static struct v3dv_job * cmd_buffer_pre_draw_split_job(struct v3dv_cmd_buffer *cmd_buffer) { @@ -4297,6 +2468,7 @@ cmd_buffer_restart_job_for_msaa_if_needed(struct v3dv_cmd_buffer *cmd_buffer) old_job->frame_tiling.width, old_job->frame_tiling.height, old_job->frame_tiling.layers, + true, old_job->frame_tiling.render_target_count, old_job->frame_tiling.internal_bpp, true /* msaa */); @@ -4304,35 +2476,8 @@ cmd_buffer_restart_job_for_msaa_if_needed(struct v3dv_cmd_buffer *cmd_buffer) v3dv_job_destroy(old_job); } -static void -emit_index_buffer(struct v3dv_cmd_buffer *cmd_buffer) -{ - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); - - /* We flag all state as dirty when we create a new job so make sure we - * have a valid index buffer before attempting to emit state for it. - */ - struct v3dv_buffer *ibuffer = - v3dv_buffer_from_handle(cmd_buffer->state.index_buffer.buffer); - if (ibuffer) { - v3dv_cl_ensure_space_with_branch( - &job->bcl, cl_packet_length(INDEX_BUFFER_SETUP)); - v3dv_return_if_oom(cmd_buffer, NULL); - - const uint32_t offset = cmd_buffer->state.index_buffer.offset; - cl_emit(&job->bcl, INDEX_BUFFER_SETUP, ib) { - ib.address = v3dv_cl_address(ibuffer->mem->bo, - ibuffer->mem_offset + offset); - ib.size = ibuffer->mem->bo->size; - } - } - - cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_INDEX_BUFFER; -} - -static void -cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer) +void +v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer) { assert(cmd_buffer->state.gfx.pipeline); assert(!(cmd_buffer->state.gfx.pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT)); @@ -4368,17 +2513,20 @@ cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer) *dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_PUSH_CONSTANTS | V3DV_CMD_DIRTY_DESCRIPTOR_SETS | - V3DV_CMD_DIRTY_VIEWPORT); + V3DV_CMD_DIRTY_VIEWPORT | + V3DV_CMD_DIRTY_VIEW_INDEX); if (dirty_uniform_state) update_gfx_uniform_state(cmd_buffer, dirty_uniform_state); + struct v3dv_device *device = cmd_buffer->device; + if (dirty_uniform_state || (*dirty & V3DV_CMD_DIRTY_VERTEX_BUFFER)) - emit_gl_shader_state(cmd_buffer); + v3dv_X(device, cmd_buffer_emit_gl_shader_state)(cmd_buffer); if (*dirty & (V3DV_CMD_DIRTY_PIPELINE)) { - emit_configuration_bits(cmd_buffer); - emit_varyings_state(cmd_buffer); + v3dv_X(device, cmd_buffer_emit_configuration_bits)(cmd_buffer); + v3dv_X(device, cmd_buffer_emit_varyings_state)(cmd_buffer); } if (*dirty & (V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR)) { @@ -4386,46 +2534,69 @@ cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer) } if (*dirty & V3DV_CMD_DIRTY_VIEWPORT) { - emit_viewport(cmd_buffer); + v3dv_X(device, cmd_buffer_emit_viewport)(cmd_buffer); } if (*dirty & V3DV_CMD_DIRTY_INDEX_BUFFER) - emit_index_buffer(cmd_buffer); + v3dv_X(device, cmd_buffer_emit_index_buffer)(cmd_buffer); const uint32_t dynamic_stencil_dirty_flags = V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK | V3DV_CMD_DIRTY_STENCIL_WRITE_MASK | V3DV_CMD_DIRTY_STENCIL_REFERENCE; if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | dynamic_stencil_dirty_flags)) - emit_stencil(cmd_buffer); + v3dv_X(device, cmd_buffer_emit_stencil)(cmd_buffer); if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_DEPTH_BIAS)) - emit_depth_bias(cmd_buffer); + v3dv_X(device, cmd_buffer_emit_depth_bias)(cmd_buffer); if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_BLEND_CONSTANTS)) - emit_blend(cmd_buffer); + v3dv_X(device, cmd_buffer_emit_blend)(cmd_buffer); if (*dirty & V3DV_CMD_DIRTY_OCCLUSION_QUERY) - emit_occlusion_query(cmd_buffer); + v3dv_X(device, cmd_buffer_emit_occlusion_query)(cmd_buffer); if (*dirty & V3DV_CMD_DIRTY_LINE_WIDTH) - emit_line_width(cmd_buffer); + v3dv_X(device, cmd_buffer_emit_line_width)(cmd_buffer); if (*dirty & V3DV_CMD_DIRTY_PIPELINE) - emit_sample_state(cmd_buffer); + v3dv_X(device, cmd_buffer_emit_sample_state)(cmd_buffer); + + if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE)) + v3dv_X(device, cmd_buffer_emit_color_write_mask)(cmd_buffer); cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_PIPELINE; } +static inline void +cmd_buffer_set_view_index(struct v3dv_cmd_buffer *cmd_buffer, + uint32_t view_index) +{ + cmd_buffer->state.view_index = view_index; + cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VIEW_INDEX; +} + static void cmd_buffer_draw(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_draw_info *info) { - cmd_buffer_emit_pre_draw(cmd_buffer); - cmd_buffer_emit_draw(cmd_buffer, info); + + struct v3dv_render_pass *pass = cmd_buffer->state.pass; + if (likely(!pass->multiview_enabled)) { + v3dv_cmd_buffer_emit_pre_draw(cmd_buffer); + v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw)(cmd_buffer, info); + return; + } + + uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask; + while (view_mask) { + cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask)); + v3dv_cmd_buffer_emit_pre_draw(cmd_buffer); + v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw)(cmd_buffer, info); + } } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, @@ -4445,7 +2616,7 @@ v3dv_CmdDraw(VkCommandBuffer commandBuffer, cmd_buffer_draw(cmd_buffer, &info); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, @@ -4458,56 +2629,26 @@ v3dv_CmdDrawIndexed(VkCommandBuffer commandBuffer, V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); - cmd_buffer_emit_pre_draw(cmd_buffer); - - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); - - const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology); - uint8_t index_type = ffs(cmd_buffer->state.index_buffer.index_size) - 1; - uint32_t index_offset = firstIndex * cmd_buffer->state.index_buffer.index_size; - - if (vertexOffset != 0 || firstInstance != 0) { - v3dv_cl_ensure_space_with_branch( - &job->bcl, cl_packet_length(BASE_VERTEX_BASE_INSTANCE)); - v3dv_return_if_oom(cmd_buffer, NULL); - - cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) { - base.base_instance = firstInstance; - base.base_vertex = vertexOffset; - } + struct v3dv_render_pass *pass = cmd_buffer->state.pass; + if (likely(!pass->multiview_enabled)) { + v3dv_cmd_buffer_emit_pre_draw(cmd_buffer); + v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indexed) + (cmd_buffer, indexCount, instanceCount, + firstIndex, vertexOffset, firstInstance); + return; } - if (instanceCount == 1) { - v3dv_cl_ensure_space_with_branch( - &job->bcl, cl_packet_length(INDEXED_PRIM_LIST)); - v3dv_return_if_oom(cmd_buffer, NULL); - - cl_emit(&job->bcl, INDEXED_PRIM_LIST, prim) { - prim.index_type = index_type; - prim.length = indexCount; - prim.index_offset = index_offset; - prim.mode = hw_prim_type; - prim.enable_primitive_restarts = pipeline->primitive_restart; - } - } else if (instanceCount > 1) { - v3dv_cl_ensure_space_with_branch( - &job->bcl, cl_packet_length(INDEXED_INSTANCED_PRIM_LIST)); - v3dv_return_if_oom(cmd_buffer, NULL); - - cl_emit(&job->bcl, INDEXED_INSTANCED_PRIM_LIST, prim) { - prim.index_type = index_type; - prim.index_offset = index_offset; - prim.mode = hw_prim_type; - prim.enable_primitive_restarts = pipeline->primitive_restart; - prim.number_of_instances = instanceCount; - prim.instance_length = indexCount; - } + uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask; + while (view_mask) { + cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask)); + v3dv_cmd_buffer_emit_pre_draw(cmd_buffer); + v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indexed) + (cmd_buffer, indexCount, instanceCount, + firstIndex, vertexOffset, firstInstance); } } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, @@ -4521,28 +2662,24 @@ v3dv_CmdDrawIndirect(VkCommandBuffer commandBuffer, V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer); - cmd_buffer_emit_pre_draw(cmd_buffer); - - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); - - const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology); - - v3dv_cl_ensure_space_with_branch( - &job->bcl, cl_packet_length(INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS)); - v3dv_return_if_oom(cmd_buffer, NULL); + struct v3dv_render_pass *pass = cmd_buffer->state.pass; + if (likely(!pass->multiview_enabled)) { + v3dv_cmd_buffer_emit_pre_draw(cmd_buffer); + v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indirect) + (cmd_buffer, buffer, offset, drawCount, stride); + return; + } - cl_emit(&job->bcl, INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS, prim) { - prim.mode = hw_prim_type; - prim.number_of_draw_indirect_array_records = drawCount; - prim.stride_in_multiples_of_4_bytes = stride >> 2; - prim.address = v3dv_cl_address(buffer->mem->bo, - buffer->mem_offset + offset); + uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask; + while (view_mask) { + cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask)); + v3dv_cmd_buffer_emit_pre_draw(cmd_buffer); + v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indirect) + (cmd_buffer, buffer, offset, drawCount, stride); } } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, @@ -4556,31 +2693,24 @@ v3dv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer); - cmd_buffer_emit_pre_draw(cmd_buffer); - - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); - - const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology); - uint8_t index_type = ffs(cmd_buffer->state.index_buffer.index_size) - 1; - - v3dv_cl_ensure_space_with_branch( - &job->bcl, cl_packet_length(INDIRECT_INDEXED_INSTANCED_PRIM_LIST)); - v3dv_return_if_oom(cmd_buffer, NULL); + struct v3dv_render_pass *pass = cmd_buffer->state.pass; + if (likely(!pass->multiview_enabled)) { + v3dv_cmd_buffer_emit_pre_draw(cmd_buffer); + v3dv_X(cmd_buffer->device, cmd_buffer_emit_indexed_indirect) + (cmd_buffer, buffer, offset, drawCount, stride); + return; + } - cl_emit(&job->bcl, INDIRECT_INDEXED_INSTANCED_PRIM_LIST, prim) { - prim.index_type = index_type; - prim.mode = hw_prim_type; - prim.enable_primitive_restarts = pipeline->primitive_restart; - prim.number_of_draw_indirect_indexed_records = drawCount; - prim.stride_in_multiples_of_4_bytes = stride >> 2; - prim.address = v3dv_cl_address(buffer->mem->bo, - buffer->mem_offset + offset); + uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask; + while (view_mask) { + cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask)); + v3dv_cmd_buffer_emit_pre_draw(cmd_buffer); + v3dv_X(cmd_buffer->device, cmd_buffer_emit_indexed_indirect) + (cmd_buffer, buffer, offset, drawCount, stride); } } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdPipelineBarrier(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, @@ -4616,7 +2746,7 @@ v3dv_CmdPipelineBarrier(VkCommandBuffer commandBuffer, } } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount, @@ -4651,6 +2781,9 @@ static uint32_t get_index_size(VkIndexType index_type) { switch (index_type) { + case VK_INDEX_TYPE_UINT8_EXT: + return 1; + break; case VK_INDEX_TYPE_UINT16: return 2; break; @@ -4662,7 +2795,7 @@ get_index_size(VkIndexType index_type) } } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, @@ -4683,7 +2816,7 @@ v3dv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_INDEX_BUFFER; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t compareMask) @@ -4698,7 +2831,7 @@ v3dv_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t writeMask) @@ -4713,7 +2846,7 @@ v3dv_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_STENCIL_WRITE_MASK; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t reference) @@ -4728,7 +2861,7 @@ v3dv_CmdSetStencilReference(VkCommandBuffer commandBuffer, cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_STENCIL_REFERENCE; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, float depthBiasClamp, @@ -4742,7 +2875,7 @@ v3dv_CmdSetDepthBias(VkCommandBuffer commandBuffer, cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_DEPTH_BIAS; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds) @@ -4752,7 +2885,7 @@ v3dv_CmdSetDepthBounds(VkCommandBuffer commandBuffer, */ } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth) { @@ -4762,7 +2895,7 @@ v3dv_CmdSetLineWidth(VkCommandBuffer commandBuffer, cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_LINE_WIDTH; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout, @@ -4784,18 +2917,16 @@ v3dv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, &cmd_buffer->state.compute.descriptor_state : &cmd_buffer->state.gfx.descriptor_state; + VkShaderStageFlags dirty_stages = 0; bool descriptor_state_changed = false; for (uint32_t i = 0; i < descriptorSetCount; i++) { V3DV_FROM_HANDLE(v3dv_descriptor_set, set, pDescriptorSets[i]); uint32_t index = firstSet + i; + descriptor_state->valid |= (1u << index); if (descriptor_state->descriptor_sets[index] != set) { descriptor_state->descriptor_sets[index] = set; - descriptor_state_changed = true; - } - - if (!(descriptor_state->valid & (1u << index))) { - descriptor_state->valid |= (1u << index); + dirty_stages |= set->layout->shader_stages; descriptor_state_changed = true; } @@ -4804,20 +2935,24 @@ v3dv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, if (descriptor_state->dynamic_offsets[idx] != pDynamicOffsets[dyn_index]) { descriptor_state->dynamic_offsets[idx] = pDynamicOffsets[dyn_index]; + dirty_stages |= set->layout->shader_stages; descriptor_state_changed = true; } } } if (descriptor_state_changed) { - if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) + if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_DESCRIPTOR_SETS; - else + cmd_buffer->state.dirty_descriptor_stages |= dirty_stages & VK_SHADER_STAGE_ALL_GRAPHICS; + } else { cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS; + cmd_buffer->state.dirty_descriptor_stages |= VK_SHADER_STAGE_COMPUTE_BIT; + } } } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, @@ -4833,9 +2968,10 @@ v3dv_CmdPushConstants(VkCommandBuffer commandBuffer, memcpy((uint8_t *) cmd_buffer->push_constants_data + offset, pValues, size); cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_PUSH_CONSTANTS; + cmd_buffer->state.dirty_push_constants_stages |= stageFlags; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4]) { @@ -4853,6 +2989,26 @@ v3dv_CmdSetBlendConstants(VkCommandBuffer commandBuffer, cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_BLEND_CONSTANTS; } +VKAPI_ATTR void VKAPI_CALL +v3dv_CmdSetColorWriteEnableEXT(VkCommandBuffer commandBuffer, + uint32_t attachmentCount, + const VkBool32 *pColorWriteEnables) +{ + V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); + struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; + uint32_t color_write_enable = 0; + + for (uint32_t i = 0; i < attachmentCount; i++) + color_write_enable |= pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0; + + if (state->dynamic.color_write_enable == color_write_enable) + return; + + state->dynamic.color_write_enable = color_write_enable; + + state->dirty |= V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE; +} + void v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_query_pool *pool, @@ -4881,12 +3037,12 @@ v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer, list_addtail(&job->list_link, &cmd_buffer->jobs); } -static void -ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer, - uint32_t slot_size, - uint32_t used_count, - uint32_t *alloc_count, - void **ptr) +void +v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer, + uint32_t slot_size, + uint32_t used_count, + uint32_t *alloc_count, + void **ptr) { if (used_count >= *alloc_count) { const uint32_t prev_slot_count = *alloc_count; @@ -4915,10 +3071,11 @@ v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer, VkQueryControlFlags flags) { /* FIXME: we only support one active query for now */ - assert(cmd_buffer->state.query.active_query == NULL); + assert(cmd_buffer->state.query.active_query.bo == NULL); assert(query < pool->query_count); - cmd_buffer->state.query.active_query = pool->queries[query].bo; + cmd_buffer->state.query.active_query.bo = pool->queries[query].bo; + cmd_buffer->state.query.active_query.offset = pool->queries[query].offset; cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_OCCLUSION_QUERY; } @@ -4928,7 +3085,7 @@ v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer, uint32_t query) { assert(query < pool->query_count); - assert(cmd_buffer->state.query.active_query != NULL); + assert(cmd_buffer->state.query.active_query.bo != NULL); if (cmd_buffer->state.pass) { /* Queue the EndQuery in the command buffer state, we will create a CPU @@ -4936,11 +3093,11 @@ v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer, * render pass job in which they have been recorded. */ struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; - ensure_array_state(cmd_buffer, - sizeof(struct v3dv_end_query_cpu_job_info), - state->query.end.used_count, - &state->query.end.alloc_count, - (void **) &state->query.end.states); + v3dv_cmd_buffer_ensure_array_state(cmd_buffer, + sizeof(struct v3dv_end_query_cpu_job_info), + state->query.end.used_count, + &state->query.end.alloc_count, + (void **) &state->query.end.states); v3dv_return_if_oom(cmd_buffer, NULL); struct v3dv_end_query_cpu_job_info *info = @@ -4948,6 +3105,27 @@ v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer, info->pool = pool; info->query = query; + + /* From the Vulkan spec: + * + * "If queries are used while executing a render pass instance that has + * multiview enabled, the query uses N consecutive query indices in + * the query pool (starting at query) where N is the number of bits set + * in the view mask in the subpass the query is used in. How the + * numerical results of the query are distributed among the queries is + * implementation-dependent." + * + * In our case, only the first query is used but this means we still need + * to flag the other queries as available so we don't emit errors when + * the applications attempt to retrive values from them. + */ + struct v3dv_render_pass *pass = cmd_buffer->state.pass; + if (!pass->multiview_enabled) { + info->count = 1; + } else { + struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx]; + info->count = util_bitcount(subpass->view_mask); + } } else { /* Otherwise, schedule the CPU job immediately */ struct v3dv_job *job = @@ -4958,10 +3136,14 @@ v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer, job->cpu.query_end.pool = pool; job->cpu.query_end.query = query; + + /* Multiview queries cannot cross subpass boundaries */ + job->cpu.query_end.count = 1; + list_addtail(&job->list_link, &cmd_buffer->jobs); } - cmd_buffer->state.query.active_query = NULL; + cmd_buffer->state.query.active_query.bo = NULL; cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_OCCLUSION_QUERY; } @@ -5019,7 +3201,7 @@ v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer, list_addtail(&job->list_link, &cmd_buffer->jobs); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdSetEvent(VkCommandBuffer commandBuffer, VkEvent _event, VkPipelineStageFlags stageMask) @@ -5045,7 +3227,7 @@ v3dv_CmdSetEvent(VkCommandBuffer commandBuffer, list_addtail(&job->list_link, &cmd_buffer->jobs); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdResetEvent(VkCommandBuffer commandBuffer, VkEvent _event, VkPipelineStageFlags stageMask) @@ -5071,7 +3253,7 @@ v3dv_CmdResetEvent(VkCommandBuffer commandBuffer, list_addtail(&job->list_link, &cmd_buffer->jobs); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdWaitEvents(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents, @@ -5124,7 +3306,7 @@ v3dv_CmdWaitEvents(VkCommandBuffer commandBuffer, list_addtail(&job->list_link, &cmd_buffer->jobs); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdWriteTimestamp(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage, VkQueryPool queryPool, @@ -5136,7 +3318,8 @@ v3dv_CmdWriteTimestamp(VkCommandBuffer commandBuffer, /* If this is called inside a render pass we need to finish the current * job here... */ - if (cmd_buffer->state.pass) + struct v3dv_render_pass *pass = cmd_buffer->state.pass; + if (pass) v3dv_cmd_buffer_finish_job(cmd_buffer); struct v3dv_job *job = @@ -5148,6 +3331,14 @@ v3dv_CmdWriteTimestamp(VkCommandBuffer commandBuffer, job->cpu.query_timestamp.pool = query_pool; job->cpu.query_timestamp.query = query; + if (!pass || !pass->multiview_enabled) { + job->cpu.query_timestamp.count = 1; + } else { + struct v3dv_subpass *subpass = + &pass->subpasses[cmd_buffer->state.subpass_idx]; + job->cpu.query_timestamp.count = util_bitcount(subpass->view_mask); + } + list_addtail(&job->list_link, &cmd_buffer->jobs); cmd_buffer->state.job = NULL; @@ -5163,9 +3354,10 @@ cmd_buffer_emit_pre_dispatch(struct v3dv_cmd_buffer *cmd_buffer) assert(cmd_buffer->state.compute.pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - uint32_t *dirty = &cmd_buffer->state.dirty; - *dirty &= ~(V3DV_CMD_DIRTY_COMPUTE_PIPELINE | - V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS); + cmd_buffer->state.dirty &= ~(V3DV_CMD_DIRTY_COMPUTE_PIPELINE | + V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS); + cmd_buffer->state.dirty_descriptor_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT; + cmd_buffer->state.dirty_push_constants_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT; } #define V3D_CSD_CFG012_WG_COUNT_SHIFT 16 @@ -5230,6 +3422,9 @@ v3dv_cmd_buffer_rewrite_indirect_csd_job( static struct v3dv_job * cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer, + uint32_t base_offset_x, + uint32_t base_offset_y, + uint32_t base_offset_z, uint32_t group_count_x, uint32_t group_count_y, uint32_t group_count_z, @@ -5258,6 +3453,10 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer, job->csd.wg_count[1] = group_count_y; job->csd.wg_count[2] = group_count_z; + job->csd.wg_base[0] = base_offset_x; + job->csd.wg_base[1] = base_offset_y; + job->csd.wg_base[2] = base_offset_z; + submit->cfg[0] |= group_count_x << V3D_CSD_CFG012_WG_COUNT_SHIFT; submit->cfg[1] |= group_count_y << V3D_CSD_CFG012_WG_COUNT_SHIFT; submit->cfg[2] |= group_count_z << V3D_CSD_CFG012_WG_COUNT_SHIFT; @@ -5265,20 +3464,32 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer, const struct v3d_compute_prog_data *cpd = cs_variant->prog_data.cs; - const uint32_t wgs_per_sg = 1; /* FIXME */ + const uint32_t num_wgs = group_count_x * group_count_y * group_count_z; const uint32_t wg_size = cpd->local_size[0] * cpd->local_size[1] * cpd->local_size[2]; - submit->cfg[3] |= wgs_per_sg << V3D_CSD_CFG3_WGS_PER_SG_SHIFT; - submit->cfg[3] |= ((DIV_ROUND_UP(wgs_per_sg * wg_size, 16) - 1) << - V3D_CSD_CFG3_BATCHES_PER_SG_M1_SHIFT); + + uint32_t wgs_per_sg = + v3d_csd_choose_workgroups_per_supergroup( + &cmd_buffer->device->devinfo, + cs_variant->prog_data.cs->has_subgroups, + cs_variant->prog_data.cs->base.has_control_barrier, + cs_variant->prog_data.cs->base.threads, + num_wgs, wg_size); + + uint32_t batches_per_sg = DIV_ROUND_UP(wgs_per_sg * wg_size, 16); + uint32_t whole_sgs = num_wgs / wgs_per_sg; + uint32_t rem_wgs = num_wgs - whole_sgs * wgs_per_sg; + uint32_t num_batches = batches_per_sg * whole_sgs + + DIV_ROUND_UP(rem_wgs * wg_size, 16); + + submit->cfg[3] |= (wgs_per_sg & 0xf) << V3D_CSD_CFG3_WGS_PER_SG_SHIFT; + submit->cfg[3] |= (batches_per_sg - 1) << V3D_CSD_CFG3_BATCHES_PER_SG_M1_SHIFT; submit->cfg[3] |= (wg_size & 0xff) << V3D_CSD_CFG3_WG_SIZE_SHIFT; if (wg_size_out) *wg_size_out = wg_size; - uint32_t batches_per_wg = DIV_ROUND_UP(wg_size, 16); - submit->cfg[4] = batches_per_wg * - (group_count_x * group_count_y * group_count_z) - 1; + submit->cfg[4] = num_batches - 1; assert(submit->cfg[4] != ~0); assert(pipeline->shared_data->assembly_bo); @@ -5302,7 +3513,7 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer, } } - v3dv_job_add_bo(job, cs_assembly_bo); + v3dv_job_add_bo_unchecked(job, cs_assembly_bo); struct v3dv_cl_reloc uniforms = v3dv_write_uniforms_wg_offsets(cmd_buffer, pipeline, cs_variant, @@ -5316,6 +3527,9 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer, static void cmd_buffer_dispatch(struct v3dv_cmd_buffer *cmd_buffer, + uint32_t base_offset_x, + uint32_t base_offset_y, + uint32_t base_offset_z, uint32_t group_count_x, uint32_t group_count_y, uint32_t group_count_z) @@ -5325,6 +3539,9 @@ cmd_buffer_dispatch(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job = cmd_buffer_create_csd_job(cmd_buffer, + base_offset_x, + base_offset_y, + base_offset_z, group_count_x, group_count_y, group_count_z, @@ -5334,7 +3551,7 @@ cmd_buffer_dispatch(struct v3dv_cmd_buffer *cmd_buffer, cmd_buffer->state.job = NULL; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t groupCountX, uint32_t groupCountY, @@ -5343,9 +3560,28 @@ v3dv_CmdDispatch(VkCommandBuffer commandBuffer, V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); cmd_buffer_emit_pre_dispatch(cmd_buffer); - cmd_buffer_dispatch(cmd_buffer, groupCountX, groupCountY, groupCountZ); + cmd_buffer_dispatch(cmd_buffer, 0, 0, 0, + groupCountX, groupCountY, groupCountZ); +} + +VKAPI_ATTR void VKAPI_CALL +v3dv_CmdDispatchBase(VkCommandBuffer commandBuffer, + uint32_t baseGroupX, + uint32_t baseGroupY, + uint32_t baseGroupZ, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ) +{ + V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer_emit_pre_dispatch(cmd_buffer); + cmd_buffer_dispatch(cmd_buffer, + baseGroupX, baseGroupY, baseGroupZ, + groupCountX, groupCountY, groupCountZ); } + static void cmd_buffer_dispatch_indirect(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_buffer *buffer, @@ -5370,6 +3606,7 @@ cmd_buffer_dispatch_indirect(struct v3dv_cmd_buffer *cmd_buffer, */ struct v3dv_job *csd_job = cmd_buffer_create_csd_job(cmd_buffer, + 0, 0, 0, 1, 1, 1, &job->cpu.csd_indirect.wg_uniform_offsets[0], &job->cpu.csd_indirect.wg_size); @@ -5392,7 +3629,7 @@ cmd_buffer_dispatch_indirect(struct v3dv_cmd_buffer *cmd_buffer, cmd_buffer->state.job = NULL; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset) @@ -5405,3 +3642,10 @@ v3dv_CmdDispatchIndirect(VkCommandBuffer commandBuffer, cmd_buffer_emit_pre_dispatch(cmd_buffer); cmd_buffer_dispatch_indirect(cmd_buffer, buffer, offset); } + +VKAPI_ATTR void VKAPI_CALL +v3dv_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask) +{ + /* Nothing to do here since we only support a single device */ + assert(deviceMask == 0x1); +} diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_descriptor_set.c b/lib/mesa/src/broadcom/vulkan/v3dv_descriptor_set.c index 3487d701a..14a93cea4 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_descriptor_set.c +++ b/lib/mesa/src/broadcom/vulkan/v3dv_descriptor_set.c @@ -27,42 +27,20 @@ #include "v3dv_private.h" /* - * Returns how much space a given descriptor type needs on a bo (GPU - * memory). - */ -static uint32_t -descriptor_bo_size(VkDescriptorType type) -{ - switch(type) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - return sizeof(struct v3dv_sampler_descriptor); - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - return sizeof(struct v3dv_combined_image_sampler_descriptor); - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - return sizeof(struct v3dv_sampled_image_descriptor); - default: - return 0; - } -} - -/* * For a given descriptor defined by the descriptor_set it belongs, its * binding layout, and array_index, it returns the map region assigned to it * from the descriptor pool bo. */ static void* -descriptor_bo_map(struct v3dv_descriptor_set *set, +descriptor_bo_map(struct v3dv_device *device, + struct v3dv_descriptor_set *set, const struct v3dv_descriptor_set_binding_layout *binding_layout, uint32_t array_index) { - assert(descriptor_bo_size(binding_layout->type) > 0); + assert(v3dv_X(device, descriptor_bo_size)(binding_layout->type) > 0); return set->pool->bo->map + set->base_offset + binding_layout->descriptor_offset + - array_index * descriptor_bo_size(binding_layout->type); + array_index * v3dv_X(device, descriptor_bo_size)(binding_layout->type); } static bool @@ -125,7 +103,8 @@ v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_stat * validation or adding extra offsets if the bo contains more that one field. */ static struct v3dv_cl_reloc -v3dv_descriptor_map_get_descriptor_bo(struct v3dv_descriptor_state *descriptor_state, +v3dv_descriptor_map_get_descriptor_bo(struct v3dv_device *device, + struct v3dv_descriptor_state *descriptor_state, struct v3dv_descriptor_map *map, struct v3dv_pipeline_layout *pipeline_layout, uint32_t index, @@ -146,7 +125,7 @@ v3dv_descriptor_map_get_descriptor_bo(struct v3dv_descriptor_state *descriptor_s const struct v3dv_descriptor_set_binding_layout *binding_layout = &set->layout->binding[binding_number]; - assert(descriptor_bo_size(binding_layout->type) > 0); + assert(v3dv_X(device, descriptor_bo_size)(binding_layout->type) > 0); *out_type = binding_layout->type; uint32_t array_index = map->array_index[index]; @@ -155,7 +134,7 @@ v3dv_descriptor_map_get_descriptor_bo(struct v3dv_descriptor_state *descriptor_s struct v3dv_cl_reloc reloc = { .bo = set->pool->bo, .offset = set->base_offset + binding_layout->descriptor_offset + - array_index * descriptor_bo_size(binding_layout->type), + array_index * v3dv_X(device, descriptor_bo_size)(binding_layout->type), }; return reloc; @@ -218,24 +197,23 @@ v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state, struct v3dv_cl_reloc -v3dv_descriptor_map_get_sampler_state(struct v3dv_descriptor_state *descriptor_state, +v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device, + struct v3dv_descriptor_state *descriptor_state, struct v3dv_descriptor_map *map, struct v3dv_pipeline_layout *pipeline_layout, uint32_t index) { VkDescriptorType type; struct v3dv_cl_reloc reloc = - v3dv_descriptor_map_get_descriptor_bo(descriptor_state, map, + v3dv_descriptor_map_get_descriptor_bo(device, descriptor_state, map, pipeline_layout, index, &type); assert(type == VK_DESCRIPTOR_TYPE_SAMPLER || type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); - if (type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { - reloc.offset += offsetof(struct v3dv_combined_image_sampler_descriptor, - sampler_state); - } + if (type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + reloc.offset += v3dv_X(device, combined_image_sampler_sampler_state_offset)(); return reloc; } @@ -262,7 +240,7 @@ v3dv_descriptor_map_get_texture_format(struct v3dv_descriptor_state *descriptor_ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: assert(descriptor->image_view); - *out_vk_format = descriptor->image_view->vk_format; + *out_vk_format = descriptor->image_view->vk.format; return descriptor->image_view->format; default: unreachable("descriptor type doesn't has a texture format"); @@ -288,23 +266,28 @@ v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_stat case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { assert(descriptor->image_view); - return descriptor->image_view->image->mem->bo; + struct v3dv_image *image = + (struct v3dv_image *) descriptor->image_view->vk.image; + return image->mem->bo; + } default: unreachable("descriptor type doesn't has a texture bo"); } } struct v3dv_cl_reloc -v3dv_descriptor_map_get_texture_shader_state(struct v3dv_descriptor_state *descriptor_state, +v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device, + struct v3dv_descriptor_state *descriptor_state, struct v3dv_descriptor_map *map, struct v3dv_pipeline_layout *pipeline_layout, uint32_t index) { VkDescriptorType type; struct v3dv_cl_reloc reloc = - v3dv_descriptor_map_get_descriptor_bo(descriptor_state, map, + v3dv_descriptor_map_get_descriptor_bo(device, + descriptor_state, map, pipeline_layout, index, &type); @@ -315,10 +298,8 @@ v3dv_descriptor_map_get_texture_shader_state(struct v3dv_descriptor_state *descr type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER || type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); - if (type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { - reloc.offset += offsetof(struct v3dv_combined_image_sampler_descriptor, - texture_state); - } + if (type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + reloc.offset += v3dv_X(device, combined_image_sampler_texture_state_offset)(); return reloc; } @@ -330,7 +311,7 @@ v3dv_descriptor_map_get_texture_shader_state(struct v3dv_descriptor_state *descr * just multiple descriptor set layouts pasted together." */ -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreatePipelineLayout(VkDevice _device, const VkPipelineLayoutCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, @@ -345,7 +326,7 @@ v3dv_CreatePipelineLayout(VkDevice _device, layout = vk_object_zalloc(&device->vk, pAllocator, sizeof(*layout), VK_OBJECT_TYPE_PIPELINE_LAYOUT); if (layout == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); layout->num_sets = pCreateInfo->setLayoutCount; @@ -380,7 +361,7 @@ v3dv_CreatePipelineLayout(VkDevice _device, return VK_SUCCESS; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_DestroyPipelineLayout(VkDevice _device, VkPipelineLayout _pipelineLayout, const VkAllocationCallbacks *pAllocator) @@ -393,7 +374,7 @@ v3dv_DestroyPipelineLayout(VkDevice _device, vk_object_free(&device->vk, pAllocator, pipeline_layout); } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateDescriptorPool(VkDevice _device, const VkDescriptorPoolCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, @@ -435,7 +416,7 @@ v3dv_CreateDescriptorPool(VkDevice _device, assert(pCreateInfo->pPoolSizes[i].descriptorCount > 0); descriptor_count += pCreateInfo->pPoolSizes[i].descriptorCount; - bo_size += descriptor_bo_size(pCreateInfo->pPoolSizes[i].type) * + bo_size += v3dv_X(device, descriptor_bo_size)(pCreateInfo->pPoolSizes[i].type) * pCreateInfo->pPoolSizes[i].descriptorCount; } @@ -452,7 +433,7 @@ v3dv_CreateDescriptorPool(VkDevice _device, VK_OBJECT_TYPE_DESCRIPTOR_POOL); if (!pool) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) { pool->host_memory_base = (uint8_t*)pool + sizeof(struct v3dv_descriptor_pool); @@ -482,7 +463,7 @@ v3dv_CreateDescriptorPool(VkDevice _device, out_of_device_memory: vk_object_free(&device->vk, pAllocator, pool); - return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); } static void @@ -506,7 +487,7 @@ descriptor_set_destroy(struct v3dv_device *device, vk_object_free(&device->vk, NULL, set); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_DestroyDescriptorPool(VkDevice _device, VkDescriptorPool _pool, const VkAllocationCallbacks *pAllocator) @@ -531,7 +512,7 @@ v3dv_DestroyDescriptorPool(VkDevice _device, vk_object_free(&device->vk, pAllocator, pool); } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_ResetDescriptorPool(VkDevice _device, VkDescriptorPool descriptorPool, VkDescriptorPoolResetFlags flags) @@ -558,7 +539,7 @@ v3dv_ResetDescriptorPool(VkDevice _device, return VK_SUCCESS; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, @@ -602,7 +583,7 @@ v3dv_CreateDescriptorSetLayout(VkDevice _device, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT); if (!set_layout) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); /* We just allocate all the immutable samplers at the end of the struct */ struct v3dv_sampler *samplers = (void*) &set_layout->binding[num_bindings]; @@ -614,7 +595,7 @@ v3dv_CreateDescriptorSetLayout(VkDevice _device, pCreateInfo->bindingCount, &bindings); if (result != VK_SUCCESS) { vk_object_free(&device->vk, pAllocator, set_layout); - return vk_error(device->instance, result); + return vk_error(device, result); } memset(set_layout->binding, 0, @@ -680,7 +661,7 @@ v3dv_CreateDescriptorSetLayout(VkDevice _device, set_layout->binding[binding_number].descriptor_offset = set_layout->bo_size; set_layout->bo_size += - descriptor_bo_size(set_layout->binding[binding_number].type) * + v3dv_X(device, descriptor_bo_size)(set_layout->binding[binding_number].type) * binding->descriptorCount; } @@ -694,7 +675,7 @@ v3dv_CreateDescriptorSetLayout(VkDevice _device, return VK_SUCCESS; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_DestroyDescriptorSetLayout(VkDevice _device, VkDescriptorSetLayout _set_layout, const VkAllocationCallbacks *pAllocator) @@ -716,7 +697,7 @@ out_of_pool_memory(const struct v3dv_device *device, * by allocating a new pool, so they don't point to real issues. */ if (!pool->is_driver_internal) - return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY) + return vk_error(device, VK_ERROR_OUT_OF_POOL_MEMORY); else return VK_ERROR_OUT_OF_POOL_MEMORY; } @@ -745,7 +726,7 @@ descriptor_set_create(struct v3dv_device *device, VK_OBJECT_TYPE_DESCRIPTOR_SET); if (!set) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); } set->pool = pool; @@ -818,15 +799,14 @@ descriptor_set_create(struct v3dv_device *device, for (uint32_t i = 0; i < layout->binding[b].array_size; i++) { uint32_t combined_offset = layout->binding[b].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ? - offsetof(struct v3dv_combined_image_sampler_descriptor, sampler_state) : - 0; + v3dv_X(device, combined_image_sampler_sampler_state_offset)() : 0; - void *desc_map = descriptor_bo_map(set, &layout->binding[b], i); + void *desc_map = descriptor_bo_map(device, set, &layout->binding[b], i); desc_map += combined_offset; memcpy(desc_map, samplers[i].sampler_state, - cl_packet_length(SAMPLER_STATE)); + sizeof(samplers[i].sampler_state)); } } @@ -835,7 +815,7 @@ descriptor_set_create(struct v3dv_device *device, return VK_SUCCESS; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_AllocateDescriptorSets(VkDevice _device, const VkDescriptorSetAllocateInfo *pAllocateInfo, VkDescriptorSet *pDescriptorSets) @@ -869,7 +849,7 @@ v3dv_AllocateDescriptorSets(VkDevice _device, return result; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_FreeDescriptorSets(VkDevice _device, VkDescriptorPool descriptorPool, uint32_t count, @@ -888,7 +868,8 @@ v3dv_FreeDescriptorSets(VkDevice _device, } static void -descriptor_bo_copy(struct v3dv_descriptor_set *dst_set, +descriptor_bo_copy(struct v3dv_device *device, + struct v3dv_descriptor_set *dst_set, const struct v3dv_descriptor_set_binding_layout *dst_binding_layout, uint32_t dst_array_index, struct v3dv_descriptor_set *src_set, @@ -897,31 +878,55 @@ descriptor_bo_copy(struct v3dv_descriptor_set *dst_set, { assert(dst_binding_layout->type == src_binding_layout->type); - void *dst_map = descriptor_bo_map(dst_set, dst_binding_layout, dst_array_index); - void *src_map = descriptor_bo_map(src_set, src_binding_layout, src_array_index); + void *dst_map = descriptor_bo_map(device, dst_set, dst_binding_layout, dst_array_index); + void *src_map = descriptor_bo_map(device, src_set, src_binding_layout, src_array_index); + + memcpy(dst_map, src_map, v3dv_X(device, descriptor_bo_size)(src_binding_layout->type)); +} + +static void +write_buffer_descriptor(struct v3dv_descriptor *descriptor, + VkDescriptorType desc_type, + const VkDescriptorBufferInfo *buffer_info) +{ + V3DV_FROM_HANDLE(v3dv_buffer, buffer, buffer_info->buffer); - memcpy(dst_map, src_map, descriptor_bo_size(src_binding_layout->type)); + descriptor->type = desc_type; + descriptor->buffer = buffer; + descriptor->offset = buffer_info->offset; + if (buffer_info->range == VK_WHOLE_SIZE) { + descriptor->range = buffer->size - buffer_info->offset; + } else { + assert(descriptor->range <= UINT32_MAX); + descriptor->range = buffer_info->range; + } } static void -write_image_descriptor(VkDescriptorType desc_type, +write_image_descriptor(struct v3dv_device *device, + struct v3dv_descriptor *descriptor, + VkDescriptorType desc_type, struct v3dv_descriptor_set *set, const struct v3dv_descriptor_set_binding_layout *binding_layout, struct v3dv_image_view *iview, struct v3dv_sampler *sampler, uint32_t array_index) { - void *desc_map = descriptor_bo_map(set, binding_layout, array_index); + descriptor->type = desc_type; + descriptor->sampler = sampler; + descriptor->image_view = iview; + + void *desc_map = descriptor_bo_map(device, set, + binding_layout, array_index); if (iview) { const uint32_t tex_state_index = - iview->type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY || + iview->vk.view_type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY || desc_type != VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ? 0 : 1; memcpy(desc_map, iview->texture_shader_state[tex_state_index], sizeof(iview->texture_shader_state[0])); - desc_map += offsetof(struct v3dv_combined_image_sampler_descriptor, - sampler_state); + desc_map += v3dv_X(device, combined_image_sampler_sampler_state_offset)(); } if (sampler && !binding_layout->immutable_samplers_offset) { @@ -936,28 +941,33 @@ write_image_descriptor(VkDescriptorType desc_type, static void -write_buffer_view_descriptor(VkDescriptorType desc_type, +write_buffer_view_descriptor(struct v3dv_device *device, + struct v3dv_descriptor *descriptor, + VkDescriptorType desc_type, struct v3dv_descriptor_set *set, const struct v3dv_descriptor_set_binding_layout *binding_layout, struct v3dv_buffer_view *bview, uint32_t array_index) { - void *desc_map = descriptor_bo_map(set, binding_layout, array_index); - assert(bview); + descriptor->type = desc_type; + descriptor->buffer_view = bview; + + void *desc_map = descriptor_bo_map(device, set, binding_layout, array_index); memcpy(desc_map, bview->texture_shader_state, sizeof(bview->texture_shader_state)); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_UpdateDescriptorSets(VkDevice _device, uint32_t descriptorWriteCount, const VkWriteDescriptorSet *pDescriptorWrites, uint32_t descriptorCopyCount, const VkCopyDescriptorSet *pDescriptorCopies) { + V3DV_FROM_HANDLE(v3dv_device, device, _device); for (uint32_t i = 0; i < descriptorWriteCount; i++) { const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i]; V3DV_FROM_HANDLE(v3dv_descriptor_set, set, writeset->dstSet); @@ -971,8 +981,6 @@ v3dv_UpdateDescriptorSets(VkDevice _device, descriptor += writeset->dstArrayElement; for (uint32_t j = 0; j < writeset->descriptorCount; ++j) { - descriptor->type = writeset->descriptorType; - switch(writeset->descriptorType) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: @@ -980,16 +988,8 @@ v3dv_UpdateDescriptorSets(VkDevice _device, case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: { const VkDescriptorBufferInfo *buffer_info = writeset->pBufferInfo + j; - V3DV_FROM_HANDLE(v3dv_buffer, buffer, buffer_info->buffer); - - descriptor->buffer = buffer; - descriptor->offset = buffer_info->offset; - if (buffer_info->range == VK_WHOLE_SIZE) { - descriptor->range = buffer->size - buffer_info->offset; - } else { - assert(descriptor->range <= UINT32_MAX); - descriptor->range = buffer_info->range; - } + write_buffer_descriptor(descriptor, writeset->descriptorType, + buffer_info); break; } case VK_DESCRIPTOR_TYPE_SAMPLER: { @@ -999,10 +999,7 @@ v3dv_UpdateDescriptorSets(VkDevice _device, */ const VkDescriptorImageInfo *image_info = writeset->pImageInfo + j; V3DV_FROM_HANDLE(v3dv_sampler, sampler, image_info->sampler); - - descriptor->sampler = sampler; - - write_image_descriptor(writeset->descriptorType, + write_image_descriptor(device, descriptor, writeset->descriptorType, set, binding_layout, NULL, sampler, writeset->dstArrayElement + j); @@ -1013,10 +1010,7 @@ v3dv_UpdateDescriptorSets(VkDevice _device, case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: { const VkDescriptorImageInfo *image_info = writeset->pImageInfo + j; V3DV_FROM_HANDLE(v3dv_image_view, iview, image_info->imageView); - - descriptor->image_view = iview; - - write_image_descriptor(writeset->descriptorType, + write_image_descriptor(device, descriptor, writeset->descriptorType, set, binding_layout, iview, NULL, writeset->dstArrayElement + j); @@ -1026,11 +1020,7 @@ v3dv_UpdateDescriptorSets(VkDevice _device, const VkDescriptorImageInfo *image_info = writeset->pImageInfo + j; V3DV_FROM_HANDLE(v3dv_image_view, iview, image_info->imageView); V3DV_FROM_HANDLE(v3dv_sampler, sampler, image_info->sampler); - - descriptor->image_view = iview; - descriptor->sampler = sampler; - - write_image_descriptor(writeset->descriptorType, + write_image_descriptor(device, descriptor, writeset->descriptorType, set, binding_layout, iview, sampler, writeset->dstArrayElement + j); @@ -1040,12 +1030,7 @@ v3dv_UpdateDescriptorSets(VkDevice _device, case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: { V3DV_FROM_HANDLE(v3dv_buffer_view, buffer_view, writeset->pTexelBufferView[j]); - - assert(buffer_view); - - descriptor->buffer_view = buffer_view; - - write_buffer_view_descriptor(writeset->descriptorType, + write_buffer_view_descriptor(device, descriptor, writeset->descriptorType, set, binding_layout, buffer_view, writeset->dstArrayElement + j); break; @@ -1086,8 +1071,9 @@ v3dv_UpdateDescriptorSets(VkDevice _device, dst_descriptor++; src_descriptor++; - if (descriptor_bo_size(src_binding_layout->type) > 0) { - descriptor_bo_copy(dst_set, dst_binding_layout, + if (v3dv_X(device, descriptor_bo_size)(src_binding_layout->type) > 0) { + descriptor_bo_copy(device, + dst_set, dst_binding_layout, j + copyset->dstArrayElement, src_set, src_binding_layout, j + copyset->srcArrayElement); @@ -1096,3 +1082,197 @@ v3dv_UpdateDescriptorSets(VkDevice _device, } } } + +VKAPI_ATTR void VKAPI_CALL +v3dv_GetDescriptorSetLayoutSupport( + VkDevice _device, + const VkDescriptorSetLayoutCreateInfo *pCreateInfo, + VkDescriptorSetLayoutSupport *pSupport) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + VkDescriptorSetLayoutBinding *bindings = NULL; + VkResult result = vk_create_sorted_bindings( + pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings); + if (result != VK_SUCCESS) { + pSupport->supported = false; + return; + } + + bool supported = true; + + uint32_t desc_host_size = sizeof(struct v3dv_descriptor); + uint32_t host_size = sizeof(struct v3dv_descriptor_set); + uint32_t bo_size = 0; + for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) { + const VkDescriptorSetLayoutBinding *binding = bindings + i; + + if ((UINT32_MAX - host_size) / desc_host_size < binding->descriptorCount) { + supported = false; + break; + } + + uint32_t desc_bo_size = v3dv_X(device, descriptor_bo_size)(binding->descriptorType); + if (desc_bo_size > 0 && + (UINT32_MAX - bo_size) / desc_bo_size < binding->descriptorCount) { + supported = false; + break; + } + + host_size += binding->descriptorCount * desc_host_size; + bo_size += binding->descriptorCount * desc_bo_size; + } + + free(bindings); + + pSupport->supported = supported; +} + +VkResult +v3dv_CreateDescriptorUpdateTemplate( + VkDevice _device, + const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorUpdateTemplate *pDescriptorUpdateTemplate) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + struct v3dv_descriptor_update_template *template; + + size_t size = sizeof(*template) + + pCreateInfo->descriptorUpdateEntryCount * sizeof(template->entries[0]); + template = vk_object_alloc(&device->vk, pAllocator, size, + VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE); + if (template == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + template->bind_point = pCreateInfo->pipelineBindPoint; + + assert(pCreateInfo->templateType == + VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET); + template->set = pCreateInfo->set; + + template->entry_count = pCreateInfo->descriptorUpdateEntryCount; + for (uint32_t i = 0; i < template->entry_count; i++) { + const VkDescriptorUpdateTemplateEntry *pEntry = + &pCreateInfo->pDescriptorUpdateEntries[i]; + + template->entries[i] = (struct v3dv_descriptor_template_entry) { + .type = pEntry->descriptorType, + .binding = pEntry->dstBinding, + .array_element = pEntry->dstArrayElement, + .array_count = pEntry->descriptorCount, + .offset = pEntry->offset, + .stride = pEntry->stride, + }; + } + + *pDescriptorUpdateTemplate = + v3dv_descriptor_update_template_to_handle(template); + + return VK_SUCCESS; +} + +void +v3dv_DestroyDescriptorUpdateTemplate( + VkDevice _device, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, + const VkAllocationCallbacks *pAllocator) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + V3DV_FROM_HANDLE(v3dv_descriptor_update_template, template, + descriptorUpdateTemplate); + + if (!template) + return; + + vk_object_free(&device->vk, pAllocator, template); +} + +void +v3dv_UpdateDescriptorSetWithTemplate( + VkDevice _device, + VkDescriptorSet descriptorSet, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, + const void *pData) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + V3DV_FROM_HANDLE(v3dv_descriptor_set, set, descriptorSet); + V3DV_FROM_HANDLE(v3dv_descriptor_update_template, template, + descriptorUpdateTemplate); + + for (int i = 0; i < template->entry_count; i++) { + const struct v3dv_descriptor_template_entry *entry = + &template->entries[i]; + + const struct v3dv_descriptor_set_binding_layout *binding_layout = + set->layout->binding + entry->binding; + + struct v3dv_descriptor *descriptor = + set->descriptors + + binding_layout->descriptor_index + + entry->array_element; + + switch (entry->type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + for (uint32_t j = 0; j < entry->array_count; j++) { + const VkDescriptorBufferInfo *info = + pData + entry->offset + j * entry->stride; + write_buffer_descriptor(descriptor + j, entry->type, info); + } + break; + + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + for (uint32_t j = 0; j < entry->array_count; j++) { + const VkDescriptorImageInfo *info = + pData + entry->offset + j * entry->stride; + V3DV_FROM_HANDLE(v3dv_image_view, iview, info->imageView); + V3DV_FROM_HANDLE(v3dv_sampler, sampler, info->sampler); + write_image_descriptor(device, descriptor + j, entry->type, + set, binding_layout, iview, sampler, + entry->array_element + j); + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + for (uint32_t j = 0; j < entry->array_count; j++) { + const VkBufferView *_bview = + pData + entry->offset + j * entry->stride; + V3DV_FROM_HANDLE(v3dv_buffer_view, bview, *_bview); + write_buffer_view_descriptor(device, descriptor + j, entry->type, + set, binding_layout, bview, + entry->array_element + j); + } + break; + + default: + unreachable("Unsupported descriptor type"); + } + } +} + +VKAPI_ATTR VkResult VKAPI_CALL +v3dv_CreateSamplerYcbcrConversion( + VkDevice _device, + const VkSamplerYcbcrConversionCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSamplerYcbcrConversion *pYcbcrConversion) +{ + unreachable("Ycbcr sampler conversion is not supported"); + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +v3dv_DestroySamplerYcbcrConversion( + VkDevice _device, + VkSamplerYcbcrConversion YcbcrConversion, + const VkAllocationCallbacks *pAllocator) +{ + unreachable("Ycbcr sampler conversion is not supported"); +} diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_device.c b/lib/mesa/src/broadcom/vulkan/v3dv_device.c index 496f93e28..de085bf09 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_device.c +++ b/lib/mesa/src/broadcom/vulkan/v3dv_device.c @@ -30,12 +30,17 @@ #include <unistd.h> #include <xf86drm.h> +#ifdef MAJOR_IN_MKDEV +#include <sys/mkdev.h> +#endif +#ifdef MAJOR_IN_SYSMACROS +#include <sys/sysmacros.h> +#endif + #include "v3dv_private.h" #include "common/v3d_debug.h" -#include "broadcom/cle/v3dx_pack.h" - #include "compiler/v3d_compiler.h" #include "drm-uapi/v3d_drm.h" @@ -61,34 +66,96 @@ #include "drm-uapi/i915_drm.h" #endif -static void * -default_alloc_func(void *pUserData, size_t size, size_t align, - VkSystemAllocationScope allocationScope) -{ - return malloc(size); -} +#define V3DV_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION) -static void * -default_realloc_func(void *pUserData, void *pOriginal, size_t size, - size_t align, VkSystemAllocationScope allocationScope) +VKAPI_ATTR VkResult VKAPI_CALL +v3dv_EnumerateInstanceVersion(uint32_t *pApiVersion) { - return realloc(pOriginal, size); + *pApiVersion = V3DV_API_VERSION; + return VK_SUCCESS; } -static void -default_free_func(void *pUserData, void *pMemory) -{ - free(pMemory); -} +#if defined(VK_USE_PLATFORM_WIN32_KHR) || \ + defined(VK_USE_PLATFORM_WAYLAND_KHR) || \ + defined(VK_USE_PLATFORM_XCB_KHR) || \ + defined(VK_USE_PLATFORM_XLIB_KHR) || \ + defined(VK_USE_PLATFORM_DISPLAY_KHR) +#define V3DV_USE_WSI_PLATFORM +#endif -static const VkAllocationCallbacks default_alloc = { - .pUserData = NULL, - .pfnAllocation = default_alloc_func, - .pfnReallocation = default_realloc_func, - .pfnFree = default_free_func, +static const struct vk_instance_extension_table instance_extensions = { + .KHR_device_group_creation = true, +#ifdef VK_USE_PLATFORM_DISPLAY_KHR + .KHR_display = true, + .KHR_get_display_properties2 = true, +#endif + .KHR_external_fence_capabilities = true, + .KHR_external_memory_capabilities = true, + .KHR_external_semaphore_capabilities = true, + .KHR_get_physical_device_properties2 = true, +#ifdef V3DV_USE_WSI_PLATFORM + .KHR_get_surface_capabilities2 = true, + .KHR_surface = true, + .KHR_surface_protected_capabilities = true, +#endif +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + .KHR_wayland_surface = true, +#endif +#ifdef VK_USE_PLATFORM_XCB_KHR + .KHR_xcb_surface = true, +#endif +#ifdef VK_USE_PLATFORM_XLIB_KHR + .KHR_xlib_surface = true, +#endif + .EXT_debug_report = true, }; -VkResult +static void +get_device_extensions(const struct v3dv_physical_device *device, + struct vk_device_extension_table *ext) +{ + *ext = (struct vk_device_extension_table) { + .KHR_bind_memory2 = true, + .KHR_copy_commands2 = true, + .KHR_dedicated_allocation = true, + .KHR_device_group = true, + .KHR_descriptor_update_template = true, + .KHR_external_fence = true, + .KHR_external_fence_fd = true, + .KHR_external_memory = true, + .KHR_external_memory_fd = true, + .KHR_external_semaphore = true, + .KHR_external_semaphore_fd = true, + .KHR_get_memory_requirements2 = true, + .KHR_image_format_list = true, + .KHR_relaxed_block_layout = true, + .KHR_maintenance1 = true, + .KHR_maintenance2 = true, + .KHR_maintenance3 = true, + .KHR_multiview = true, + .KHR_shader_non_semantic_info = true, + .KHR_sampler_mirror_clamp_to_edge = true, + .KHR_storage_buffer_storage_class = true, + .KHR_uniform_buffer_standard_layout = true, +#ifdef V3DV_USE_WSI_PLATFORM + .KHR_swapchain = true, + .KHR_incremental_present = true, +#endif + .KHR_variable_pointers = true, + .EXT_color_write_enable = true, + .EXT_custom_border_color = true, + .EXT_external_memory_dma_buf = true, + .EXT_index_type_uint8 = true, + .EXT_physical_device_drm = true, + .EXT_pipeline_creation_cache_control = true, + .EXT_pipeline_creation_feedback = true, + .EXT_private_data = true, + .EXT_provoking_vertex = true, + .EXT_vertex_attribute_divisor = true, + }; +} + +VKAPI_ATTR VkResult VKAPI_CALL v3dv_EnumerateInstanceExtensionProperties(const char *pLayerName, uint32_t *pPropertyCount, VkExtensionProperties *pProperties) @@ -98,10 +165,10 @@ v3dv_EnumerateInstanceExtensionProperties(const char *pLayerName, return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); return vk_enumerate_instance_extension_properties( - &v3dv_instance_extensions_supported, pPropertyCount, pProperties); + &instance_extensions, pPropertyCount, pProperties); } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkInstance *pInstance) @@ -112,25 +179,27 @@ v3dv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); if (pAllocator == NULL) - pAllocator = &default_alloc; + pAllocator = vk_default_allocator(); - instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + instance = vk_alloc(pAllocator, sizeof(*instance), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); if (!instance) return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); struct vk_instance_dispatch_table dispatch_table; vk_instance_dispatch_table_from_entrypoints( &dispatch_table, &v3dv_instance_entrypoints, true); + vk_instance_dispatch_table_from_entrypoints( + &dispatch_table, &wsi_instance_entrypoints, false); result = vk_instance_init(&instance->vk, - &v3dv_instance_extensions_supported, + &instance_extensions, &dispatch_table, pCreateInfo, pAllocator); if (result != VK_SUCCESS) { vk_free(pAllocator, instance); - return vk_error(instance, result); + return vk_error(NULL, result); } v3d_process_debug_variable(); @@ -208,7 +277,7 @@ physical_device_finish(struct v3dv_physical_device *device) mtx_destroy(&device->mutex); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_DestroyInstance(VkInstance _instance, const VkAllocationCallbacks *pAllocator) { @@ -550,14 +619,14 @@ init_uuids(struct v3dv_physical_device *device) const struct build_id_note *note = build_id_find_nhdr_for_addr(init_uuids); if (!note) { - return vk_errorf((struct v3dv_instance*) device->vk.instance, + return vk_errorf(device->vk.instance, VK_ERROR_INITIALIZATION_FAILED, "Failed to find build-id"); } unsigned build_id_len = build_id_length(note); if (build_id_len < 20) { - return vk_errorf((struct v3dv_instance*) device->vk.instance, + return vk_errorf(device->vk.instance, VK_ERROR_INITIALIZATION_FAILED, "build-id too short. It needs to be a SHA"); } @@ -627,6 +696,8 @@ physical_device_init(struct v3dv_physical_device *device, struct vk_physical_device_dispatch_table dispatch_table; vk_physical_device_dispatch_table_from_entrypoints (&dispatch_table, &v3dv_physical_device_entrypoints, true); + vk_physical_device_dispatch_table_from_entrypoints( + &dispatch_table, &wsi_physical_device_entrypoints, false); result = vk_physical_device_init(&device->vk, &instance->vk, NULL, &dispatch_table); @@ -648,17 +719,48 @@ physical_device_init(struct v3dv_physical_device *device, * we postpone that until a swapchain is created. */ + const char *primary_path; +#if !using_v3d_simulator + if (drm_primary_device) + primary_path = drm_primary_device->nodes[DRM_NODE_PRIMARY]; + else + primary_path = NULL; +#else + primary_path = drm_render_device->nodes[DRM_NODE_PRIMARY]; +#endif + + struct stat primary_stat = {0}, render_stat = {0}; + + device->has_primary = primary_path; + if (device->has_primary) { + if (stat(primary_path, &primary_stat) != 0) { + result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, + "failed to stat DRM primary node %s", + primary_path); + goto fail; + } + + device->primary_devid = primary_stat.st_rdev; + } + + if (fstat(render_fd, &render_stat) != 0) { + result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, + "failed to stat DRM render node %s", + path); + goto fail; + } + device->has_render = true; + device->render_devid = render_stat.st_rdev; + if (instance->vk.enabled_extensions.KHR_display) { #if !using_v3d_simulator /* Open the primary node on the vc4 display device */ assert(drm_primary_device); - const char *primary_path = drm_primary_device->nodes[DRM_NODE_PRIMARY]; master_fd = open(primary_path, O_RDWR | O_CLOEXEC); #else /* There is only one device with primary and render nodes. * Open its primary node. */ - const char *primary_path = drm_render_device->nodes[DRM_NODE_PRIMARY]; master_fd = open(primary_path, O_RDWR | O_CLOEXEC); #endif } @@ -722,8 +824,7 @@ physical_device_init(struct v3dv_physical_device *device, goto fail; } - v3dv_physical_device_get_supported_extensions(device, - &device->vk.supported_extensions); + get_device_extensions(device, &device->vk.supported_extensions); pthread_mutex_init(&device->mutex, NULL); @@ -835,7 +936,7 @@ instance_ensure_physical_device(struct v3dv_instance *instance) return VK_SUCCESS; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_EnumeratePhysicalDevices(VkInstance _instance, uint32_t *pPhysicalDeviceCount, VkPhysicalDevice *pPhysicalDevices) @@ -858,7 +959,37 @@ v3dv_EnumeratePhysicalDevices(VkInstance _instance, return vk_outarray_status(&out); } -void +VKAPI_ATTR VkResult VKAPI_CALL +v3dv_EnumeratePhysicalDeviceGroups( + VkInstance _instance, + uint32_t *pPhysicalDeviceGroupCount, + VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties) +{ + V3DV_FROM_HANDLE(v3dv_instance, instance, _instance); + VK_OUTARRAY_MAKE(out, pPhysicalDeviceGroupProperties, + pPhysicalDeviceGroupCount); + + VkResult result = instance_ensure_physical_device(instance); + if (result != VK_SUCCESS) + return result; + + assert(instance->physicalDeviceCount == 1); + + vk_outarray_append(&out, p) { + p->physicalDeviceCount = 1; + memset(p->physicalDevices, 0, sizeof(p->physicalDevices)); + p->physicalDevices[0] = + v3dv_physical_device_to_handle(&instance->physicalDevice); + p->subsetAllocation = false; + + vk_foreach_struct(ext, p->pNext) + v3dv_debug_ignored_stype(ext->sType); + } + + return vk_outarray_status(&out); +} + +VKAPI_ATTR void VKAPI_CALL v3dv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures *pFeatures) { @@ -869,7 +1000,7 @@ v3dv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, .fullDrawIndexUint32 = false, /* Only available since V3D 4.4.9.1 */ .imageCubeArray = true, .independentBlend = true, - .geometryShader = false, + .geometryShader = true, .tessellationShader = false, .sampleRateShading = true, .dualSrcBlend = false, @@ -886,7 +1017,7 @@ v3dv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, .multiViewport = false, .samplerAnisotropy = true, .textureCompressionETC2 = true, - .textureCompressionASTC_LDR = false, + .textureCompressionASTC_LDR = true, /* Note that textureCompressionBC requires that the driver support all * the BC formats. V3D 4.2 only support the BC1-3, so we can't claim * that we support it. @@ -896,7 +1027,7 @@ v3dv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, .pipelineStatisticsQuery = false, .vertexPipelineStoresAndAtomics = true, .fragmentStoresAndAtomics = true, - .shaderTessellationAndGeometryPointSize = false, + .shaderTessellationAndGeometryPointSize = true, .shaderImageGatherExtended = false, .shaderStorageImageExtendedFormats = true, .shaderStorageImageMultisample = false, @@ -927,14 +1058,45 @@ v3dv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, }; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures2 *pFeatures) { v3dv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features); + VkPhysicalDeviceVulkan11Features vk11 = { + .storageBuffer16BitAccess = false, + .uniformAndStorageBuffer16BitAccess = false, + .storagePushConstant16 = false, + .storageInputOutput16 = false, + .multiview = true, + .multiviewGeometryShader = false, + .multiviewTessellationShader = false, + .variablePointersStorageBuffer = true, + /* FIXME: this needs support for non-constant index on UBO/SSBO */ + .variablePointers = false, + .protectedMemory = false, + .samplerYcbcrConversion = false, + .shaderDrawParameters = false, + }; + vk_foreach_struct(ext, pFeatures->pNext) { switch (ext->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: { + VkPhysicalDeviceCustomBorderColorFeaturesEXT *features = + (VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext; + features->customBorderColors = true; + features->customBorderColorWithoutFormat = false; + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR: { + VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR *features = + (VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR *)ext; + features->uniformBufferStandardLayout = true; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: { VkPhysicalDevicePrivateDataFeaturesEXT *features = (VkPhysicalDevicePrivateDataFeaturesEXT *)ext; @@ -942,6 +1104,87 @@ v3dv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: { + VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features = + (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext; + features->indexTypeUint8 = true; + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: { + VkPhysicalDeviceColorWriteEnableFeaturesEXT *features = (void *) ext; + features->colorWriteEnable = true; + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: { + VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *features = (void *) ext; + features->pipelineCreationCacheControl = true; + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: { + VkPhysicalDeviceProvokingVertexFeaturesEXT *features = (void *) ext; + features->provokingVertexLast = true; + /* FIXME: update when supporting EXT_transform_feedback */ + features->transformFeedbackPreservesProvokingVertex = false; + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: { + VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features = + (void *) ext; + features->vertexAttributeInstanceRateDivisor = true; + features->vertexAttributeInstanceRateZeroDivisor = false; + break; + } + + /* Vulkan 1.1 */ + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES: { + VkPhysicalDeviceVulkan11Features *features = + (VkPhysicalDeviceVulkan11Features *)ext; + memcpy(features, &vk11, sizeof(VkPhysicalDeviceVulkan11Features)); + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: { + VkPhysicalDevice16BitStorageFeatures *features = (void *) ext; + features->storageBuffer16BitAccess = vk11.storageBuffer16BitAccess; + features->uniformAndStorageBuffer16BitAccess = + vk11.uniformAndStorageBuffer16BitAccess; + features->storagePushConstant16 = vk11.storagePushConstant16; + features->storageInputOutput16 = vk11.storageInputOutput16; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: { + VkPhysicalDeviceMultiviewFeatures *features = (void *) ext; + features->multiview = vk11.multiview; + features->multiviewGeometryShader = vk11.multiviewGeometryShader; + features->multiviewTessellationShader = vk11.multiviewTessellationShader; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: { + VkPhysicalDeviceProtectedMemoryFeatures *features = (void *) ext; + features->protectedMemory = vk11.protectedMemory; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: { + VkPhysicalDeviceSamplerYcbcrConversionFeatures *features = (void *) ext; + features->samplerYcbcrConversion = vk11.samplerYcbcrConversion; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: { + VkPhysicalDeviceShaderDrawParametersFeatures *features = (void *) ext; + features->shaderDrawParameters = vk11.shaderDrawParameters; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: { + VkPhysicalDeviceVariablePointersFeatures *features = (void *) ext; + features->variablePointersStorageBuffer = + vk11.variablePointersStorageBuffer; + features->variablePointers = vk11.variablePointers; + break; + } + default: v3dv_debug_ignored_stype(ext->sType); break; @@ -949,6 +1192,20 @@ v3dv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, } } +VKAPI_ATTR void VKAPI_CALL +v3dv_GetDeviceGroupPeerMemoryFeatures(VkDevice device, + uint32_t heapIndex, + uint32_t localDeviceIndex, + uint32_t remoteDeviceIndex, + VkPeerMemoryFeatureFlags *pPeerMemoryFeatures) +{ + assert(localDeviceIndex == 0 && remoteDeviceIndex == 0); + *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | + VK_PEER_MEMORY_FEATURE_COPY_DST_BIT | + VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | + VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT; +} + uint32_t v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev) { @@ -987,11 +1244,16 @@ v3dv_physical_device_device_id(struct v3dv_physical_device *dev) return devid; #else - return dev->devinfo.ver; + switch (dev->devinfo.ver) { + case 42: + return 0xBE485FD3; /* Broadcom deviceID for 2711 */ + default: + unreachable("Unsupported V3D version"); + } #endif } -void +VKAPI_ATTR void VKAPI_CALL v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties *pProperties) { @@ -1009,7 +1271,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, const uint32_t v3d_coord_shift = 6; - const uint32_t v3d_point_line_granularity = 2.0f / (1 << v3d_coord_shift); + const float v3d_point_line_granularity = 2.0f / (1 << v3d_coord_shift); const uint32_t max_fb_size = 4096; const VkSampleCountFlags supported_sample_counts = @@ -1028,8 +1290,8 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, .maxImageDimensionCube = 4096, .maxImageArrayLayers = 2048, .maxTexelBufferElements = (1ul << 28), - .maxUniformBufferRange = (1ul << 27), - .maxStorageBufferRange = (1ul << 27), + .maxUniformBufferRange = V3D_MAX_BUFFER_RANGE, + .maxStorageBufferRange = V3D_MAX_BUFFER_RANGE, .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, .maxMemoryAllocationCount = mem_size / page_size, .maxSamplerAllocationCount = 64 * 1024, @@ -1075,11 +1337,11 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, .maxTessellationEvaluationOutputComponents = 0, /* Geometry limits */ - .maxGeometryShaderInvocations = 0, - .maxGeometryInputComponents = 0, - .maxGeometryOutputComponents = 0, - .maxGeometryOutputVertices = 0, - .maxGeometryTotalOutputComponents = 0, + .maxGeometryShaderInvocations = 32, + .maxGeometryInputComponents = 64, + .maxGeometryOutputComponents = 64, + .maxGeometryOutputVertices = 256, + .maxGeometryTotalOutputComponents = 1024, /* Fragment limits */ .maxFragmentInputComponents = max_varying_components, @@ -1108,7 +1370,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, 2.0 * max_fb_size - 1 }, .viewportSubPixelBits = 0, .minMemoryMapAlignment = page_size, - .minTexelBufferOffsetAlignment = VC5_UIFBLOCK_SIZE, + .minTexelBufferOffsetAlignment = V3D_UIFBLOCK_SIZE, .minUniformBufferOffsetAlignment = 32, .minStorageBufferOffsetAlignment = 32, .minTexelOffset = -8, @@ -1151,7 +1413,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, }; *pProperties = (VkPhysicalDeviceProperties) { - .apiVersion = v3dv_physical_device_api_version(pdevice), + .apiVersion = V3DV_API_VERSION, .driverVersion = vk_get_driver_version(), .vendorID = v3dv_physical_device_vendor_id(pdevice), .deviceID = v3dv_physical_device_device_id(pdevice), @@ -1166,7 +1428,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, pdevice->pipeline_cache_uuid, VK_UUID_SIZE); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties2 *pProperties) { @@ -1176,6 +1438,26 @@ v3dv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, vk_foreach_struct(ext, pProperties->pNext) { switch (ext->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: { + VkPhysicalDeviceCustomBorderColorPropertiesEXT *props = + (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext; + props->maxCustomBorderColorSamplers = V3D_MAX_TEXTURE_SAMPLERS; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: { + VkPhysicalDeviceProvokingVertexPropertiesEXT *props = + (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext; + props->provokingVertexModePerPipeline = true; + /* FIXME: update when supporting EXT_transform_feedback */ + props->transformFeedbackPreservesTriangleFanProvokingVertex = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: { + VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props = + (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext; + props->maxVertexAttribDivisor = 0xffff; + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: { VkPhysicalDeviceIDProperties *id_props = (VkPhysicalDeviceIDProperties *)ext; @@ -1185,11 +1467,78 @@ v3dv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, id_props->deviceLUIDValid = false; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: { + VkPhysicalDeviceDrmPropertiesEXT *props = + (VkPhysicalDeviceDrmPropertiesEXT *)ext; + props->hasPrimary = pdevice->has_primary; + if (props->hasPrimary) { + props->primaryMajor = (int64_t) major(pdevice->primary_devid); + props->primaryMinor = (int64_t) minor(pdevice->primary_devid); + } + props->hasRender = pdevice->has_render; + if (props->hasRender) { + props->renderMajor = (int64_t) major(pdevice->render_devid); + props->renderMinor = (int64_t) minor(pdevice->render_devid); + } + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: { + VkPhysicalDeviceMaintenance3Properties *props = + (VkPhysicalDeviceMaintenance3Properties *)ext; + /* We don't really have special restrictions for the maximum + * descriptors per set, other than maybe not exceeding the limits + * of addressable memory in a single allocation on either the host + * or the GPU. This will be a much larger limit than any of the + * per-stage limits already available in Vulkan though, so in practice, + * it is not expected to limit anything beyond what is already + * constrained through per-stage limits. + */ + uint32_t max_host_descriptors = + (UINT32_MAX - sizeof(struct v3dv_descriptor_set)) / + sizeof(struct v3dv_descriptor); + uint32_t max_gpu_descriptors = + (UINT32_MAX / v3dv_X(pdevice, max_descriptor_bo_size)()); + props->maxPerSetDescriptors = + MIN2(max_host_descriptors, max_gpu_descriptors); + + /* Minimum required by the spec */ + props->maxMemoryAllocationSize = MAX_MEMORY_ALLOCATION_SIZE; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: { + VkPhysicalDeviceMultiviewProperties *props = + (VkPhysicalDeviceMultiviewProperties *)ext; + props->maxMultiviewViewCount = MAX_MULTIVIEW_VIEW_COUNT; + props->maxMultiviewInstanceIndex = UINT32_MAX - 1; + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: /* Do nothing, not even logging. This is a non-PCI device, so we will * never provide this extension. */ break; + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: { + VkPhysicalDevicePointClippingProperties *props = + (VkPhysicalDevicePointClippingProperties *)ext; + props->pointClippingBehavior = + VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: { + VkPhysicalDeviceProtectedMemoryProperties *props = + (VkPhysicalDeviceProtectedMemoryProperties *)ext; + props->protectedNoFault = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: { + VkPhysicalDeviceSubgroupProperties *props = + (VkPhysicalDeviceSubgroupProperties *)ext; + props->subgroupSize = V3D_CHANNELS; + props->supportedStages = VK_SHADER_STAGE_COMPUTE_BIT; + props->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT; + props->quadOperationsInAllStages = false; + break; + } default: v3dv_debug_ignored_stype(ext->sType); break; @@ -1208,7 +1557,7 @@ v3dv_queue_family_properties = { .minImageTransferGranularity = { 1, 1, 1 }, }; -void +VKAPI_ATTR void VKAPI_CALL v3dv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, uint32_t *pCount, VkQueueFamilyProperties *pQueueFamilyProperties) @@ -1220,7 +1569,7 @@ v3dv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, } } -void +VKAPI_ATTR void VKAPI_CALL v3dv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, uint32_t *pQueueFamilyPropertyCount, VkQueueFamilyProperties2 *pQueueFamilyProperties) @@ -1236,7 +1585,7 @@ v3dv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, } } -void +VKAPI_ATTR void VKAPI_CALL v3dv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties *pMemoryProperties) { @@ -1244,7 +1593,7 @@ v3dv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice, *pMemoryProperties = device->memory; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) { @@ -1260,7 +1609,7 @@ v3dv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice, } } -PFN_vkVoidFunction +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL v3dv_GetInstanceProcAddr(VkInstance _instance, const char *pName) { @@ -1303,7 +1652,7 @@ vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, return vk_instance_get_physical_device_proc_addr(&instance->vk, pName); } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, VkLayerProperties *pProperties) { @@ -1315,7 +1664,7 @@ v3dv_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount, VkLayerProperties *pProperties) @@ -1327,16 +1676,19 @@ v3dv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, return VK_SUCCESS; } - return vk_error((struct v3dv_instance*) physical_device->vk.instance, - VK_ERROR_LAYER_NOT_PRESENT); + return vk_error(physical_device, VK_ERROR_LAYER_NOT_PRESENT); } static VkResult -queue_init(struct v3dv_device *device, struct v3dv_queue *queue) +queue_init(struct v3dv_device *device, struct v3dv_queue *queue, + const VkDeviceQueueCreateInfo *create_info, + uint32_t index_in_family) { - vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE); + VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, + index_in_family); + if (result != VK_SUCCESS) + return result; queue->device = device; - queue->flags = 0; queue->noop_job = NULL; list_inithead(&queue->submit_wait_list); pthread_mutex_init(&queue->mutex, NULL); @@ -1346,7 +1698,7 @@ queue_init(struct v3dv_device *device, struct v3dv_queue *queue) static void queue_finish(struct v3dv_queue *queue) { - vk_object_base_finish(&queue->base); + vk_queue_finish(&queue->vk); assert(list_is_empty(&queue->submit_wait_list)); if (queue->noop_job) v3dv_job_destroy(queue->noop_job); @@ -1371,7 +1723,7 @@ destroy_device_meta(struct v3dv_device *device) v3dv_meta_texel_buffer_copy_finish(device); } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, @@ -1384,19 +1736,6 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice, assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); - /* Check enabled features */ - if (pCreateInfo->pEnabledFeatures) { - VkPhysicalDeviceFeatures supported_features; - v3dv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features); - VkBool32 *supported_feature = (VkBool32 *)&supported_features; - VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures; - unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32); - for (uint32_t i = 0; i < num_features; i++) { - if (enabled_feature[i] && !supported_feature[i]) - return vk_error(instance, VK_ERROR_FEATURE_NOT_PRESENT); - } - } - /* Check requested queues (we only expose one queue ) */ assert(pCreateInfo->queueCreateInfoCount == 1); for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { @@ -1415,11 +1754,13 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice, struct vk_device_dispatch_table dispatch_table; vk_device_dispatch_table_from_entrypoints(&dispatch_table, &v3dv_device_entrypoints, true); + vk_device_dispatch_table_from_entrypoints(&dispatch_table, + &wsi_device_entrypoints, false); result = vk_device_init(&device->vk, &physical_device->vk, &dispatch_table, pCreateInfo, pAllocator); if (result != VK_SUCCESS) { vk_free(&device->vk.alloc, device); - return vk_error(instance, result); + return vk_error(NULL, result); } device->instance = instance; @@ -1432,20 +1773,31 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice, pthread_mutex_init(&device->mutex, NULL); - result = queue_init(device, &device->queue); + result = queue_init(device, &device->queue, + pCreateInfo->pQueueCreateInfos, 0); if (result != VK_SUCCESS) goto fail; device->devinfo = physical_device->devinfo; - if (pCreateInfo->pEnabledFeatures) { + /* Vulkan 1.1 and VK_KHR_get_physical_device_properties2 added + * VkPhysicalDeviceFeatures2 which can be used in the pNext chain of + * vkDeviceCreateInfo, in which case it should be used instead of + * pEnabledFeatures. + */ + const VkPhysicalDeviceFeatures2 *features2 = + vk_find_struct_const(pCreateInfo->pNext, PHYSICAL_DEVICE_FEATURES_2); + if (features2) { + memcpy(&device->features, &features2->features, + sizeof(device->features)); + } else if (pCreateInfo->pEnabledFeatures) { memcpy(&device->features, pCreateInfo->pEnabledFeatures, sizeof(device->features)); - - if (device->features.robustBufferAccess) - perf_debug("Device created with Robust Buffer Access enabled.\n"); } + if (device->features.robustBufferAccess) + perf_debug("Device created with Robust Buffer Access enabled.\n"); + int ret = drmSyncobjCreate(physical_device->render_fd, DRM_SYNCOBJ_CREATE_SIGNALED, &device->last_job_sync); @@ -1454,9 +1806,12 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice, goto fail; } +#ifdef DEBUG + v3dv_X(device, device_check_prepacked_sizes)(); +#endif init_device_meta(device); v3dv_bo_cache_init(device); - v3dv_pipeline_cache_init(&device->default_pipeline_cache, device, + v3dv_pipeline_cache_init(&device->default_pipeline_cache, device, 0, device->instance->default_pipeline_cache_enabled); device->default_attribute_float = v3dv_pipeline_create_default_attribute_values(device, NULL); @@ -1472,7 +1827,7 @@ fail: return result; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) { @@ -1496,24 +1851,10 @@ v3dv_DestroyDevice(VkDevice _device, v3dv_bo_cache_destroy(device); vk_device_finish(&device->vk); - vk_free2(&default_alloc, pAllocator, device); + vk_free2(&device->vk.alloc, pAllocator, device); } -void -v3dv_GetDeviceQueue(VkDevice _device, - uint32_t queueFamilyIndex, - uint32_t queueIndex, - VkQueue *pQueue) -{ - V3DV_FROM_HANDLE(v3dv_device, device, _device); - - assert(queueIndex == 0); - assert(queueFamilyIndex == 0); - - *pQueue = v3dv_queue_to_handle(&device->queue); -} - -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_DeviceWaitIdle(VkDevice _device) { V3DV_FROM_HANDLE(v3dv_device, device, _device); @@ -1526,8 +1867,7 @@ device_alloc(struct v3dv_device *device, VkDeviceSize size) { /* Our kernel interface is 32-bit */ - if (size > UINT32_MAX) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; + assert(size <= UINT32_MAX); mem->bo = v3dv_bo_alloc(device, size, "device_alloc", false); if (!mem->bo) @@ -1546,7 +1886,9 @@ device_free_wsi_dumb(int32_t display_fd, int32_t dumb_handle) struct drm_mode_destroy_dumb destroy_dumb = { .handle = dumb_handle, }; - v3dv_ioctl(display_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destroy_dumb); + if (v3dv_ioctl(display_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destroy_dumb)) { + fprintf(stderr, "destroy dumb object %d: %s\n", dumb_handle, strerror(errno)); + } } static void @@ -1724,7 +2066,7 @@ fail_create: #endif } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_AllocateMemory(VkDevice _device, const VkMemoryAllocateInfo *pAllocateInfo, const VkAllocationCallbacks *pAllocator, @@ -1759,6 +2101,22 @@ v3dv_AllocateMemory(VkDevice _device, case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR: fd_info = (void *)ext; break; + case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO: + /* We don't support VK_KHR_buffer_device_address or multiple + * devices per device group, so we can ignore this. + */ + break; + case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR: + /* We don't have particular optimizations associated with memory + * allocations that won't be suballocated to multiple resources. + */ + break; + case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR: + /* The mask of handle types specified here must be supported + * according to VkExternalImageFormatProperties, so it must be + * fd or dmabuf, which don't have special requirements for us. + */ + break; default: v3dv_debug_ignored_stype(ext->sType); break; @@ -1766,32 +2124,40 @@ v3dv_AllocateMemory(VkDevice _device, } VkResult result = VK_SUCCESS; - if (wsi_info) { - result = device_alloc_for_wsi(device, pAllocator, mem, - pAllocateInfo->allocationSize); - } else if (fd_info && fd_info->handleType) { - assert(fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || - fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); - result = device_import_bo(device, pAllocator, - fd_info->fd, pAllocateInfo->allocationSize, - &mem->bo); - mem->has_bo_ownership = false; - if (result == VK_SUCCESS) - close(fd_info->fd); + + /* We always allocate device memory in multiples of a page, so round up + * requested size to that. + */ + VkDeviceSize alloc_size = ALIGN(pAllocateInfo->allocationSize, 4096); + + if (unlikely(alloc_size > MAX_MEMORY_ALLOCATION_SIZE)) { + result = VK_ERROR_OUT_OF_DEVICE_MEMORY; } else { - result = device_alloc(device, mem, pAllocateInfo->allocationSize); + if (wsi_info) { + result = device_alloc_for_wsi(device, pAllocator, mem, alloc_size); + } else if (fd_info && fd_info->handleType) { + assert(fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || + fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); + result = device_import_bo(device, pAllocator, + fd_info->fd, alloc_size, &mem->bo); + mem->has_bo_ownership = false; + if (result == VK_SUCCESS) + close(fd_info->fd); + } else { + result = device_alloc(device, mem, alloc_size); + } } if (result != VK_SUCCESS) { vk_object_free(&device->vk, pAllocator, mem); - return vk_error(device->instance, result); + return vk_error(device, result); } *pMem = v3dv_device_memory_to_handle(mem); return result; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_FreeMemory(VkDevice _device, VkDeviceMemory _mem, const VkAllocationCallbacks *pAllocator) @@ -1810,7 +2176,7 @@ v3dv_FreeMemory(VkDevice _device, vk_object_free(&device->vk, pAllocator, mem); } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_MapMemory(VkDevice _device, VkDeviceMemory _memory, VkDeviceSize offset, @@ -1835,13 +2201,13 @@ v3dv_MapMemory(VkDevice _device, */ VkResult result = device_map(device, mem); if (result != VK_SUCCESS) - return vk_error(device->instance, result); + return vk_error(device, result); *ppData = ((uint8_t *) mem->bo->map) + offset; return VK_SUCCESS; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_UnmapMemory(VkDevice _device, VkDeviceMemory _memory) { @@ -1854,7 +2220,7 @@ v3dv_UnmapMemory(VkDevice _device, device_unmap(device, mem); } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, const VkMappedMemoryRange *pMemoryRanges) @@ -1862,7 +2228,7 @@ v3dv_FlushMappedMemoryRanges(VkDevice _device, return VK_SUCCESS; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, const VkMappedMemoryRange *pMemoryRanges) @@ -1870,28 +2236,40 @@ v3dv_InvalidateMappedMemoryRanges(VkDevice _device, return VK_SUCCESS; } -void -v3dv_GetImageMemoryRequirements(VkDevice _device, - VkImage _image, - VkMemoryRequirements *pMemoryRequirements) +VKAPI_ATTR void VKAPI_CALL +v3dv_GetImageMemoryRequirements2(VkDevice device, + const VkImageMemoryRequirementsInfo2 *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) { - V3DV_FROM_HANDLE(v3dv_image, image, _image); + V3DV_FROM_HANDLE(v3dv_image, image, pInfo->image); - assert(image->size > 0); + pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) { + .memoryTypeBits = 0x1, + .alignment = image->alignment, + .size = image->size + }; - pMemoryRequirements->size = image->size; - pMemoryRequirements->alignment = image->alignment; - pMemoryRequirements->memoryTypeBits = 0x1; + vk_foreach_struct(ext, pMemoryRequirements->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { + VkMemoryDedicatedRequirements *req = + (VkMemoryDedicatedRequirements *) ext; + req->requiresDedicatedAllocation = image->vk.external_handle_types != 0; + req->prefersDedicatedAllocation = image->vk.external_handle_types != 0; + break; + } + default: + v3dv_debug_ignored_stype(ext->sType); + break; + } + } } -VkResult -v3dv_BindImageMemory(VkDevice _device, - VkImage _image, - VkDeviceMemory _memory, - VkDeviceSize memoryOffset) +static void +bind_image_memory(const VkBindImageMemoryInfo *info) { - V3DV_FROM_HANDLE(v3dv_device_memory, mem, _memory); - V3DV_FROM_HANDLE(v3dv_image, image, _image); + V3DV_FROM_HANDLE(v3dv_image, image, info->image); + V3DV_FROM_HANDLE(v3dv_device_memory, mem, info->memory); /* Valid usage: * @@ -1899,36 +2277,75 @@ v3dv_BindImageMemory(VkDevice _device, * the VkMemoryRequirements structure returned from a call to * vkGetImageMemoryRequirements with image" */ - assert(memoryOffset % image->alignment == 0); - assert(memoryOffset < mem->bo->size); + assert(info->memoryOffset % image->alignment == 0); + assert(info->memoryOffset < mem->bo->size); image->mem = mem; - image->mem_offset = memoryOffset; + image->mem_offset = info->memoryOffset; +} + +VKAPI_ATTR VkResult VKAPI_CALL +v3dv_BindImageMemory2(VkDevice _device, + uint32_t bindInfoCount, + const VkBindImageMemoryInfo *pBindInfos) +{ + for (uint32_t i = 0; i < bindInfoCount; i++) { + const VkBindImageMemorySwapchainInfoKHR *swapchain_info = + vk_find_struct_const(pBindInfos->pNext, + BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR); + if (swapchain_info && swapchain_info->swapchain) { + struct v3dv_image *swapchain_image = + v3dv_wsi_get_image_from_swapchain(swapchain_info->swapchain, + swapchain_info->imageIndex); + VkBindImageMemoryInfo swapchain_bind = { + .sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO, + .image = pBindInfos[i].image, + .memory = v3dv_device_memory_to_handle(swapchain_image->mem), + .memoryOffset = swapchain_image->mem_offset, + }; + bind_image_memory(&swapchain_bind); + } else { + bind_image_memory(&pBindInfos[i]); + } + } return VK_SUCCESS; } -void -v3dv_GetBufferMemoryRequirements(VkDevice _device, - VkBuffer _buffer, - VkMemoryRequirements* pMemoryRequirements) +VKAPI_ATTR void VKAPI_CALL +v3dv_GetBufferMemoryRequirements2(VkDevice device, + const VkBufferMemoryRequirementsInfo2 *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) { - V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer); + V3DV_FROM_HANDLE(v3dv_buffer, buffer, pInfo->buffer); + + pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) { + .memoryTypeBits = 0x1, + .alignment = buffer->alignment, + .size = align64(buffer->size, buffer->alignment), + }; - pMemoryRequirements->memoryTypeBits = 0x1; - pMemoryRequirements->alignment = buffer->alignment; - pMemoryRequirements->size = - align64(buffer->size, pMemoryRequirements->alignment); + vk_foreach_struct(ext, pMemoryRequirements->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { + VkMemoryDedicatedRequirements *req = + (VkMemoryDedicatedRequirements *) ext; + req->requiresDedicatedAllocation = false; + req->prefersDedicatedAllocation = false; + break; + } + default: + v3dv_debug_ignored_stype(ext->sType); + break; + } + } } -VkResult -v3dv_BindBufferMemory(VkDevice _device, - VkBuffer _buffer, - VkDeviceMemory _memory, - VkDeviceSize memoryOffset) +static void +bind_buffer_memory(const VkBindBufferMemoryInfo *info) { - V3DV_FROM_HANDLE(v3dv_device_memory, mem, _memory); - V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer); + V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->buffer); + V3DV_FROM_HANDLE(v3dv_device_memory, mem, info->memory); /* Valid usage: * @@ -1936,16 +2353,26 @@ v3dv_BindBufferMemory(VkDevice _device, * the VkMemoryRequirements structure returned from a call to * vkGetBufferMemoryRequirements with buffer" */ - assert(memoryOffset % buffer->alignment == 0); - assert(memoryOffset < mem->bo->size); + assert(info->memoryOffset % buffer->alignment == 0); + assert(info->memoryOffset < mem->bo->size); buffer->mem = mem; - buffer->mem_offset = memoryOffset; + buffer->mem_offset = info->memoryOffset; +} + + +VKAPI_ATTR VkResult VKAPI_CALL +v3dv_BindBufferMemory2(VkDevice device, + uint32_t bindInfoCount, + const VkBindBufferMemoryInfo *pBindInfos) +{ + for (uint32_t i = 0; i < bindInfoCount; i++) + bind_buffer_memory(&pBindInfos[i]); return VK_SUCCESS; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateBuffer(VkDevice _device, const VkBufferCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, @@ -1963,7 +2390,7 @@ v3dv_CreateBuffer(VkDevice _device, buffer = vk_object_zalloc(&device->vk, pAllocator, sizeof(*buffer), VK_OBJECT_TYPE_BUFFER); if (buffer == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); buffer->size = pCreateInfo->size; buffer->usage = pCreateInfo->usage; @@ -1979,7 +2406,7 @@ v3dv_CreateBuffer(VkDevice _device, return VK_SUCCESS; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_DestroyBuffer(VkDevice _device, VkBuffer _buffer, const VkAllocationCallbacks *pAllocator) @@ -1993,67 +2420,7 @@ v3dv_DestroyBuffer(VkDevice _device, vk_object_free(&device->vk, pAllocator, buffer); } -/** - * This computes the maximum bpp used by any of the render targets used by - * a particular subpass and checks if any of those render targets are - * multisampled. If we don't have a subpass (when we are not inside a - * render pass), then we assume that all framebuffer attachments are used. - */ -void -v3dv_framebuffer_compute_internal_bpp_msaa( - const struct v3dv_framebuffer *framebuffer, - const struct v3dv_subpass *subpass, - uint8_t *max_bpp, - bool *msaa) -{ - STATIC_ASSERT(RENDER_TARGET_MAXIMUM_32BPP == 0); - *max_bpp = RENDER_TARGET_MAXIMUM_32BPP; - *msaa = false; - - if (subpass) { - for (uint32_t i = 0; i < subpass->color_count; i++) { - uint32_t att_idx = subpass->color_attachments[i].attachment; - if (att_idx == VK_ATTACHMENT_UNUSED) - continue; - - const struct v3dv_image_view *att = framebuffer->attachments[att_idx]; - assert(att); - - if (att->aspects & VK_IMAGE_ASPECT_COLOR_BIT) - *max_bpp = MAX2(*max_bpp, att->internal_bpp); - - if (att->image->samples > VK_SAMPLE_COUNT_1_BIT) - *msaa = true; - } - - if (!*msaa && subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) { - const struct v3dv_image_view *att = - framebuffer->attachments[subpass->ds_attachment.attachment]; - assert(att); - - if (att->image->samples > VK_SAMPLE_COUNT_1_BIT) - *msaa = true; - } - - return; - } - - assert(framebuffer->attachment_count <= 4); - for (uint32_t i = 0; i < framebuffer->attachment_count; i++) { - const struct v3dv_image_view *att = framebuffer->attachments[i]; - assert(att); - - if (att->aspects & VK_IMAGE_ASPECT_COLOR_BIT) - *max_bpp = MAX2(*max_bpp, att->internal_bpp); - - if (att->image->samples > VK_SAMPLE_COUNT_1_BIT) - *msaa = true; - } - - return; -} - -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateFramebuffer(VkDevice _device, const VkFramebufferCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, @@ -2069,7 +2436,7 @@ v3dv_CreateFramebuffer(VkDevice _device, framebuffer = vk_object_zalloc(&device->vk, pAllocator, size, VK_OBJECT_TYPE_FRAMEBUFFER); if (framebuffer == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); framebuffer->width = pCreateInfo->width; framebuffer->height = pCreateInfo->height; @@ -2081,7 +2448,7 @@ v3dv_CreateFramebuffer(VkDevice _device, for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { framebuffer->attachments[i] = v3dv_image_view_from_handle(pCreateInfo->pAttachments[i]); - if (framebuffer->attachments[i]->aspects & VK_IMAGE_ASPECT_COLOR_BIT) + if (framebuffer->attachments[i]->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT) framebuffer->color_attachment_count++; } @@ -2090,7 +2457,7 @@ v3dv_CreateFramebuffer(VkDevice _device, return VK_SUCCESS; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_DestroyFramebuffer(VkDevice _device, VkFramebuffer _fb, const VkAllocationCallbacks *pAllocator) @@ -2104,7 +2471,7 @@ v3dv_DestroyFramebuffer(VkDevice _device, vk_object_free(&device->vk, pAllocator, fb); } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, int fd, @@ -2119,11 +2486,11 @@ v3dv_GetMemoryFdPropertiesKHR(VkDevice _device, (1 << pdevice->memory.memoryTypeCount) - 1; return VK_SUCCESS; default: - return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); + return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); } } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_GetMemoryFdKHR(VkDevice _device, const VkMemoryGetFdInfoKHR *pGetFdInfo, int *pFd) @@ -2140,14 +2507,14 @@ v3dv_GetMemoryFdKHR(VkDevice _device, mem->bo->handle, DRM_CLOEXEC, &fd); if (ret) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); *pFd = fd; return VK_SUCCESS; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateEvent(VkDevice _device, const VkEventCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, @@ -2158,7 +2525,7 @@ v3dv_CreateEvent(VkDevice _device, vk_object_zalloc(&device->vk, pAllocator, sizeof(*event), VK_OBJECT_TYPE_EVENT); if (!event) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); /* Events are created in the unsignaled state */ event->state = false; @@ -2167,7 +2534,7 @@ v3dv_CreateEvent(VkDevice _device, return VK_SUCCESS; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_DestroyEvent(VkDevice _device, VkEvent _event, const VkAllocationCallbacks *pAllocator) @@ -2181,14 +2548,14 @@ v3dv_DestroyEvent(VkDevice _device, vk_object_free(&device->vk, pAllocator, event); } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_GetEventStatus(VkDevice _device, VkEvent _event) { V3DV_FROM_HANDLE(v3dv_event, event, _event); return p_atomic_read(&event->state) ? VK_EVENT_SET : VK_EVENT_RESET; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_SetEvent(VkDevice _device, VkEvent _event) { V3DV_FROM_HANDLE(v3dv_event, event, _event); @@ -2196,7 +2563,7 @@ v3dv_SetEvent(VkDevice _device, VkEvent _event) return VK_SUCCESS; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_ResetEvent(VkDevice _device, VkEvent _event) { V3DV_FROM_HANDLE(v3dv_event, event, _event); @@ -2204,101 +2571,7 @@ v3dv_ResetEvent(VkDevice _device, VkEvent _event) return VK_SUCCESS; } -static const enum V3DX(Wrap_Mode) vk_to_v3d_wrap_mode[] = { - [VK_SAMPLER_ADDRESS_MODE_REPEAT] = V3D_WRAP_MODE_REPEAT, - [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = V3D_WRAP_MODE_MIRROR, - [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = V3D_WRAP_MODE_CLAMP, - [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = V3D_WRAP_MODE_MIRROR_ONCE, - [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = V3D_WRAP_MODE_BORDER, -}; - -static const enum V3DX(Compare_Function) -vk_to_v3d_compare_func[] = { - [VK_COMPARE_OP_NEVER] = V3D_COMPARE_FUNC_NEVER, - [VK_COMPARE_OP_LESS] = V3D_COMPARE_FUNC_LESS, - [VK_COMPARE_OP_EQUAL] = V3D_COMPARE_FUNC_EQUAL, - [VK_COMPARE_OP_LESS_OR_EQUAL] = V3D_COMPARE_FUNC_LEQUAL, - [VK_COMPARE_OP_GREATER] = V3D_COMPARE_FUNC_GREATER, - [VK_COMPARE_OP_NOT_EQUAL] = V3D_COMPARE_FUNC_NOTEQUAL, - [VK_COMPARE_OP_GREATER_OR_EQUAL] = V3D_COMPARE_FUNC_GEQUAL, - [VK_COMPARE_OP_ALWAYS] = V3D_COMPARE_FUNC_ALWAYS, -}; - -static void -pack_sampler_state(struct v3dv_sampler *sampler, - const VkSamplerCreateInfo *pCreateInfo) -{ - enum V3DX(Border_Color_Mode) border_color_mode; - - /* For now we only support the preset Vulkan border color modes. If we - * want to implement VK_EXT_custom_border_color in the future we would have - * to use V3D_BORDER_COLOR_FOLLOWS, and fill up border_color_word_[0/1/2/3] - * SAMPLER_STATE. - */ - switch (pCreateInfo->borderColor) { - case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: - case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: - border_color_mode = V3D_BORDER_COLOR_0000; - break; - case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: - case VK_BORDER_COLOR_INT_OPAQUE_BLACK: - border_color_mode = V3D_BORDER_COLOR_0001; - break; - case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: - case VK_BORDER_COLOR_INT_OPAQUE_WHITE: - border_color_mode = V3D_BORDER_COLOR_1111; - break; - default: - unreachable("Unknown border color"); - break; - } - - /* For some texture formats, when clamping to transparent black border the - * CTS expects alpha to be set to 1 instead of 0, but the border color mode - * will take priority over the texture state swizzle, so the only way to - * fix that is to apply a swizzle in the shader. Here we keep track of - * whether we are activating that mode and we will decide if we need to - * activate the texture swizzle lowering in the shader key at compile time - * depending on the actual texture format. - */ - if ((pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || - pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || - pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER) && - border_color_mode == V3D_BORDER_COLOR_0000) { - sampler->clamp_to_transparent_black_border = true; - } - - v3dv_pack(sampler->sampler_state, SAMPLER_STATE, s) { - if (pCreateInfo->anisotropyEnable) { - s.anisotropy_enable = true; - if (pCreateInfo->maxAnisotropy > 8) - s.maximum_anisotropy = 3; - else if (pCreateInfo->maxAnisotropy > 4) - s.maximum_anisotropy = 2; - else if (pCreateInfo->maxAnisotropy > 2) - s.maximum_anisotropy = 1; - } - - s.border_color_mode = border_color_mode; - - s.wrap_i_border = false; /* Also hardcoded on v3d */ - s.wrap_s = vk_to_v3d_wrap_mode[pCreateInfo->addressModeU]; - s.wrap_t = vk_to_v3d_wrap_mode[pCreateInfo->addressModeV]; - s.wrap_r = vk_to_v3d_wrap_mode[pCreateInfo->addressModeW]; - s.fixed_bias = pCreateInfo->mipLodBias; - s.max_level_of_detail = MIN2(MAX2(0, pCreateInfo->maxLod), 15); - s.min_level_of_detail = MIN2(MAX2(0, pCreateInfo->minLod), 15); - s.srgb_disable = 0; /* Not even set by v3d */ - s.depth_compare_function = - vk_to_v3d_compare_func[pCreateInfo->compareEnable ? - pCreateInfo->compareOp : VK_COMPARE_OP_NEVER]; - s.mip_filter_nearest = pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_NEAREST; - s.min_filter_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST; - s.mag_filter_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST; - } -} - -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateSampler(VkDevice _device, const VkSamplerCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, @@ -2312,18 +2585,23 @@ v3dv_CreateSampler(VkDevice _device, sampler = vk_object_zalloc(&device->vk, pAllocator, sizeof(*sampler), VK_OBJECT_TYPE_SAMPLER); if (!sampler) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); sampler->compare_enable = pCreateInfo->compareEnable; sampler->unnormalized_coordinates = pCreateInfo->unnormalizedCoordinates; - pack_sampler_state(sampler, pCreateInfo); + + const VkSamplerCustomBorderColorCreateInfoEXT *bc_info = + vk_find_struct_const(pCreateInfo->pNext, + SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT); + + v3dv_X(device, pack_sampler_state)(sampler, pCreateInfo, bc_info); *pSampler = v3dv_sampler_to_handle(sampler); return VK_SUCCESS; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_DestroySampler(VkDevice _device, VkSampler _sampler, const VkAllocationCallbacks *pAllocator) @@ -2337,7 +2615,7 @@ v3dv_DestroySampler(VkDevice _device, vk_object_free(&device->vk, pAllocator, sampler); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory, VkDeviceSize *pCommittedMemoryInBytes) @@ -2345,17 +2623,17 @@ v3dv_GetDeviceMemoryCommitment(VkDevice device, *pCommittedMemoryInBytes = 0; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_GetImageSparseMemoryRequirements( - VkDevice device, - VkImage image, - uint32_t *pSparseMemoryRequirementCount, - VkSparseImageMemoryRequirements *pSparseMemoryRequirements) + VkDevice device, + VkImage image, + uint32_t *pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements *pSparseMemoryRequirements) { *pSparseMemoryRequirementCount = 0; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_GetImageSparseMemoryRequirements2( VkDevice device, const VkImageSparseMemoryRequirementsInfo2 *pInfo, diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_formats.c b/lib/mesa/src/broadcom/vulkan/v3dv_formats.c index cefa1418b..6e32d341a 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_formats.c +++ b/lib/mesa/src/broadcom/vulkan/v3dv_formats.c @@ -25,300 +25,14 @@ #include "vk_util.h" #include "vk_format_info.h" -#include "broadcom/cle/v3dx_pack.h" #include "drm-uapi/drm_fourcc.h" #include "util/format/u_format.h" #include "vulkan/wsi/wsi_common.h" -#define SWIZ(x,y,z,w) { \ - PIPE_SWIZZLE_##x, \ - PIPE_SWIZZLE_##y, \ - PIPE_SWIZZLE_##z, \ - PIPE_SWIZZLE_##w \ -} - -#define FORMAT(vk, rt, tex, swiz, return_size, supports_filtering) \ - [VK_FORMAT_##vk] = { \ - true, \ - V3D_OUTPUT_IMAGE_FORMAT_##rt, \ - TEXTURE_DATA_FORMAT_##tex, \ - swiz, \ - return_size, \ - supports_filtering, \ - } - -#define SWIZ_X001 SWIZ(X, 0, 0, 1) -#define SWIZ_XY01 SWIZ(X, Y, 0, 1) -#define SWIZ_XYZ1 SWIZ(X, Y, Z, 1) -#define SWIZ_XYZW SWIZ(X, Y, Z, W) -#define SWIZ_YZWX SWIZ(Y, Z, W, X) -#define SWIZ_YZW1 SWIZ(Y, Z, W, 1) -#define SWIZ_ZYXW SWIZ(Z, Y, X, W) -#define SWIZ_ZYX1 SWIZ(Z, Y, X, 1) -#define SWIZ_XXXY SWIZ(X, X, X, Y) -#define SWIZ_XXX1 SWIZ(X, X, X, 1) -#define SWIZ_XXXX SWIZ(X, X, X, X) -#define SWIZ_000X SWIZ(0, 0, 0, X) -#define SWIZ_WXYZ SWIZ(W, X, Y, Z) - -/* FIXME: expand format table to describe whether the format is supported - * for buffer surfaces (texel buffers, vertex buffers, etc). - */ -static const struct v3dv_format format_table[] = { - /* Color, 4 channels */ - FORMAT(B8G8R8A8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_ZYXW, 16, true), - FORMAT(B8G8R8A8_UNORM, RGBA8, RGBA8, SWIZ_ZYXW, 16, true), - - FORMAT(R8G8B8A8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_XYZW, 16, true), - FORMAT(R8G8B8A8_UNORM, RGBA8, RGBA8, SWIZ_XYZW, 16, true), - FORMAT(R8G8B8A8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZW, 16, true), - FORMAT(R8G8B8A8_SINT, RGBA8I, RGBA8I, SWIZ_XYZW, 16, false), - FORMAT(R8G8B8A8_UINT, RGBA8UI, RGBA8UI, SWIZ_XYZW, 16, false), - - FORMAT(R16G16B16A16_SFLOAT, RGBA16F, RGBA16F, SWIZ_XYZW, 16, true), - FORMAT(R16G16B16A16_UNORM, NO, RGBA16, SWIZ_XYZW, 32, true), - FORMAT(R16G16B16A16_SNORM, NO, RGBA16_SNORM, SWIZ_XYZW, 32, true), - FORMAT(R16G16B16A16_SINT, RGBA16I, RGBA16I, SWIZ_XYZW, 16, false), - FORMAT(R16G16B16A16_UINT, RGBA16UI, RGBA16UI, SWIZ_XYZW, 16, false), - - FORMAT(R32G32B32A32_SFLOAT, RGBA32F, RGBA32F, SWIZ_XYZW, 32, false), - FORMAT(R32G32B32A32_SINT, RGBA32I, RGBA32I, SWIZ_XYZW, 32, false), - FORMAT(R32G32B32A32_UINT, RGBA32UI, RGBA32UI, SWIZ_XYZW, 32, false), - - /* Color, 3 channels */ - FORMAT(R32G32B32_SFLOAT, NO, NO, SWIZ_XYZ1, 0, false), - FORMAT(R32G32B32_UINT, NO, NO, SWIZ_XYZ1, 0, false), - FORMAT(R32G32B32_SINT, NO, NO, SWIZ_XYZ1, 0, false), - - /* Color, 2 channels */ - FORMAT(R8G8_UNORM, RG8, RG8, SWIZ_XY01, 16, true), - FORMAT(R8G8_SNORM, NO, RG8_SNORM, SWIZ_XY01, 16, true), - FORMAT(R8G8_SINT, RG8I, RG8I, SWIZ_XY01, 16, false), - FORMAT(R8G8_UINT, RG8UI, RG8UI, SWIZ_XY01, 16, false), - - FORMAT(R16G16_UNORM, NO, RG16, SWIZ_XY01, 32, true), - FORMAT(R16G16_SNORM, NO, RG16_SNORM, SWIZ_XY01, 32, true), - FORMAT(R16G16_SFLOAT, RG16F, RG16F, SWIZ_XY01, 16, true), - FORMAT(R16G16_SINT, RG16I, RG16I, SWIZ_XY01, 16, false), - FORMAT(R16G16_UINT, RG16UI, RG16UI, SWIZ_XY01, 16, false), - - FORMAT(R32G32_SFLOAT, RG32F, RG32F, SWIZ_XY01, 32, false), - FORMAT(R32G32_SINT, RG32I, RG32I, SWIZ_XY01, 32, false), - FORMAT(R32G32_UINT, RG32UI, RG32UI, SWIZ_XY01, 32, false), - - /* Color, 1 channel */ - FORMAT(R8_UNORM, R8, R8, SWIZ_X001, 16, true), - FORMAT(R8_SNORM, NO, R8_SNORM, SWIZ_X001, 16, true), - FORMAT(R8_SINT, R8I, R8I, SWIZ_X001, 16, false), - FORMAT(R8_UINT, R8UI, R8UI, SWIZ_X001, 16, false), - - FORMAT(R16_UNORM, NO, R16, SWIZ_X001, 32, true), - FORMAT(R16_SNORM, NO, R16_SNORM, SWIZ_X001, 32, true), - FORMAT(R16_SFLOAT, R16F, R16F, SWIZ_X001, 16, true), - FORMAT(R16_SINT, R16I, R16I, SWIZ_X001, 16, false), - FORMAT(R16_UINT, R16UI, R16UI, SWIZ_X001, 16, false), - - FORMAT(R32_SFLOAT, R32F, R32F, SWIZ_X001, 32, false), - FORMAT(R32_SINT, R32I, R32I, SWIZ_X001, 32, false), - FORMAT(R32_UINT, R32UI, R32UI, SWIZ_X001, 32, false), - - /* Color, packed */ - FORMAT(B4G4R4A4_UNORM_PACK16, ABGR4444, RGBA4, SWIZ_ZYXW, 16, true), /* Swap RB */ - FORMAT(R5G6B5_UNORM_PACK16, BGR565, RGB565, SWIZ_XYZ1, 16, true), - FORMAT(R5G5B5A1_UNORM_PACK16, ABGR1555, RGB5_A1, SWIZ_XYZW, 16, true), - FORMAT(A1R5G5B5_UNORM_PACK16, RGBA5551, A1_RGB5, SWIZ_ZYXW, 16, true), /* Swap RB */ - FORMAT(A8B8G8R8_UNORM_PACK32, RGBA8, RGBA8, SWIZ_XYZW, 16, true), /* RGBA8 UNORM */ - FORMAT(A8B8G8R8_SNORM_PACK32, NO, RGBA8_SNORM, SWIZ_XYZW, 16, true), /* RGBA8 SNORM */ - FORMAT(A8B8G8R8_UINT_PACK32, RGBA8UI, RGBA8UI, SWIZ_XYZW, 16, true), /* RGBA8 UINT */ - FORMAT(A8B8G8R8_SINT_PACK32, RGBA8I, RGBA8I, SWIZ_XYZW, 16, true), /* RGBA8 SINT */ - FORMAT(A8B8G8R8_SRGB_PACK32, SRGB8_ALPHA8, RGBA8, SWIZ_XYZW, 16, true), /* RGBA8 sRGB */ - FORMAT(A2B10G10R10_UNORM_PACK32,RGB10_A2, RGB10_A2, SWIZ_XYZW, 16, true), - FORMAT(A2B10G10R10_UINT_PACK32, RGB10_A2UI, RGB10_A2UI, SWIZ_XYZW, 16, true), - FORMAT(E5B9G9R9_UFLOAT_PACK32, NO, RGB9_E5, SWIZ_XYZ1, 16, true), - FORMAT(B10G11R11_UFLOAT_PACK32, R11F_G11F_B10F,R11F_G11F_B10F, SWIZ_XYZ1, 16, true), - - /* Depth */ - FORMAT(D16_UNORM, D16, DEPTH_COMP16, SWIZ_X001, 32, false), - FORMAT(D32_SFLOAT, D32F, DEPTH_COMP32F, SWIZ_X001, 32, false), - FORMAT(X8_D24_UNORM_PACK32, D24S8, DEPTH24_X8, SWIZ_X001, 32, false), - - /* Depth + Stencil */ - FORMAT(D24_UNORM_S8_UINT, D24S8, DEPTH24_X8, SWIZ_X001, 32, false), - - /* Compressed: ETC2 / EAC */ - FORMAT(ETC2_R8G8B8_UNORM_BLOCK, NO, RGB8_ETC2, SWIZ_XYZ1, 16, true), - FORMAT(ETC2_R8G8B8_SRGB_BLOCK, NO, RGB8_ETC2, SWIZ_XYZ1, 16, true), - FORMAT(ETC2_R8G8B8A1_UNORM_BLOCK, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, true), - FORMAT(ETC2_R8G8B8A1_SRGB_BLOCK, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, true), - FORMAT(ETC2_R8G8B8A8_UNORM_BLOCK, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, true), - FORMAT(ETC2_R8G8B8A8_SRGB_BLOCK, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, true), - FORMAT(EAC_R11_UNORM_BLOCK, NO, R11_EAC, SWIZ_X001, 16, true), - FORMAT(EAC_R11_SNORM_BLOCK, NO, SIGNED_R11_EAC, SWIZ_X001, 16, true), - FORMAT(EAC_R11G11_UNORM_BLOCK, NO, RG11_EAC, SWIZ_XY01, 16, true), - FORMAT(EAC_R11G11_SNORM_BLOCK, NO, SIGNED_RG11_EAC, SWIZ_XY01, 16, true), - - /* Compressed: BC1-3 */ - FORMAT(BC1_RGB_UNORM_BLOCK, NO, BC1, SWIZ_XYZ1, 16, true), - FORMAT(BC1_RGB_SRGB_BLOCK, NO, BC1, SWIZ_XYZ1, 16, true), - FORMAT(BC1_RGBA_UNORM_BLOCK, NO, BC1, SWIZ_XYZW, 16, true), - FORMAT(BC1_RGBA_SRGB_BLOCK, NO, BC1, SWIZ_XYZW, 16, true), - FORMAT(BC2_UNORM_BLOCK, NO, BC2, SWIZ_XYZW, 16, true), - FORMAT(BC2_SRGB_BLOCK, NO, BC2, SWIZ_XYZW, 16, true), - FORMAT(BC3_UNORM_BLOCK, NO, BC3, SWIZ_XYZW, 16, true), - FORMAT(BC3_SRGB_BLOCK, NO, BC3, SWIZ_XYZW, 16, true), -}; - -const struct v3dv_format * -v3dv_get_format(VkFormat format) -{ - if (format < ARRAY_SIZE(format_table) && format_table[format].supported) - return &format_table[format]; - else - return NULL; -} - -void -v3dv_get_internal_type_bpp_for_output_format(uint32_t format, - uint32_t *type, - uint32_t *bpp) -{ - switch (format) { - case V3D_OUTPUT_IMAGE_FORMAT_RGBA8: - case V3D_OUTPUT_IMAGE_FORMAT_RGB8: - case V3D_OUTPUT_IMAGE_FORMAT_RG8: - case V3D_OUTPUT_IMAGE_FORMAT_R8: - case V3D_OUTPUT_IMAGE_FORMAT_ABGR4444: - case V3D_OUTPUT_IMAGE_FORMAT_BGR565: - case V3D_OUTPUT_IMAGE_FORMAT_ABGR1555: - *type = V3D_INTERNAL_TYPE_8; - *bpp = V3D_INTERNAL_BPP_32; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RGBA8I: - case V3D_OUTPUT_IMAGE_FORMAT_RG8I: - case V3D_OUTPUT_IMAGE_FORMAT_R8I: - *type = V3D_INTERNAL_TYPE_8I; - *bpp = V3D_INTERNAL_BPP_32; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI: - case V3D_OUTPUT_IMAGE_FORMAT_RG8UI: - case V3D_OUTPUT_IMAGE_FORMAT_R8UI: - *type = V3D_INTERNAL_TYPE_8UI; - *bpp = V3D_INTERNAL_BPP_32; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_SRGB8_ALPHA8: - case V3D_OUTPUT_IMAGE_FORMAT_SRGB: - case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2: - case V3D_OUTPUT_IMAGE_FORMAT_R11F_G11F_B10F: - case V3D_OUTPUT_IMAGE_FORMAT_RGBA16F: - /* Note that sRGB RTs are stored in the tile buffer at 16F, - * and the conversion to sRGB happens at tilebuffer load/store. - */ - *type = V3D_INTERNAL_TYPE_16F; - *bpp = V3D_INTERNAL_BPP_64; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RG16F: - case V3D_OUTPUT_IMAGE_FORMAT_R16F: - *type = V3D_INTERNAL_TYPE_16F; - /* Use 64bpp to make sure the TLB doesn't throw away the alpha - * channel before alpha test happens. - */ - *bpp = V3D_INTERNAL_BPP_64; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RGBA16I: - *type = V3D_INTERNAL_TYPE_16I; - *bpp = V3D_INTERNAL_BPP_64; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RG16I: - case V3D_OUTPUT_IMAGE_FORMAT_R16I: - *type = V3D_INTERNAL_TYPE_16I; - *bpp = V3D_INTERNAL_BPP_32; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2UI: - case V3D_OUTPUT_IMAGE_FORMAT_RGBA16UI: - *type = V3D_INTERNAL_TYPE_16UI; - *bpp = V3D_INTERNAL_BPP_64; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RG16UI: - case V3D_OUTPUT_IMAGE_FORMAT_R16UI: - *type = V3D_INTERNAL_TYPE_16UI; - *bpp = V3D_INTERNAL_BPP_32; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RGBA32I: - *type = V3D_INTERNAL_TYPE_32I; - *bpp = V3D_INTERNAL_BPP_128; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RG32I: - *type = V3D_INTERNAL_TYPE_32I; - *bpp = V3D_INTERNAL_BPP_64; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_R32I: - *type = V3D_INTERNAL_TYPE_32I; - *bpp = V3D_INTERNAL_BPP_32; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RGBA32UI: - *type = V3D_INTERNAL_TYPE_32UI; - *bpp = V3D_INTERNAL_BPP_128; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RG32UI: - *type = V3D_INTERNAL_TYPE_32UI; - *bpp = V3D_INTERNAL_BPP_64; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_R32UI: - *type = V3D_INTERNAL_TYPE_32UI; - *bpp = V3D_INTERNAL_BPP_32; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RGBA32F: - *type = V3D_INTERNAL_TYPE_32F; - *bpp = V3D_INTERNAL_BPP_128; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_RG32F: - *type = V3D_INTERNAL_TYPE_32F; - *bpp = V3D_INTERNAL_BPP_64; - break; - - case V3D_OUTPUT_IMAGE_FORMAT_R32F: - *type = V3D_INTERNAL_TYPE_32F; - *bpp = V3D_INTERNAL_BPP_32; - break; - - default: - /* Provide some default values, as we'll be called at RB - * creation time, even if an RB with this format isn't supported. - */ - *type = V3D_INTERNAL_TYPE_8; - *bpp = V3D_INTERNAL_BPP_32; - break; - } -} - -bool -v3dv_format_supports_tlb_resolve(const struct v3dv_format *format) -{ - uint32_t type, bpp; - v3dv_get_internal_type_bpp_for_output_format(format->rt_type, &type, &bpp); - return type == V3D_INTERNAL_TYPE_8 || type == V3D_INTERNAL_TYPE_16F; -} - const uint8_t * -v3dv_get_format_swizzle(VkFormat f) +v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f) { - const struct v3dv_format *vf = v3dv_get_format(f); + const struct v3dv_format *vf = v3dv_X(device, get_format)(f); static const uint8_t fallback[] = {0, 1, 2, 3}; if (!vf) @@ -331,57 +45,18 @@ uint8_t v3dv_get_tex_return_size(const struct v3dv_format *vf, bool compare_enable) { + if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_16BIT)) + return 16; + + if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_32BIT)) + return 32; + if (compare_enable) return 16; return vf->return_size; } -bool -v3dv_tfu_supports_tex_format(const struct v3d_device_info *devinfo, - uint32_t tex_format) -{ - assert(devinfo->ver >= 42); - - switch (tex_format) { - case TEXTURE_DATA_FORMAT_R8: - case TEXTURE_DATA_FORMAT_R8_SNORM: - case TEXTURE_DATA_FORMAT_RG8: - case TEXTURE_DATA_FORMAT_RG8_SNORM: - case TEXTURE_DATA_FORMAT_RGBA8: - case TEXTURE_DATA_FORMAT_RGBA8_SNORM: - case TEXTURE_DATA_FORMAT_RGB565: - case TEXTURE_DATA_FORMAT_RGBA4: - case TEXTURE_DATA_FORMAT_RGB5_A1: - case TEXTURE_DATA_FORMAT_RGB10_A2: - case TEXTURE_DATA_FORMAT_R16: - case TEXTURE_DATA_FORMAT_R16_SNORM: - case TEXTURE_DATA_FORMAT_RG16: - case TEXTURE_DATA_FORMAT_RG16_SNORM: - case TEXTURE_DATA_FORMAT_RGBA16: - case TEXTURE_DATA_FORMAT_RGBA16_SNORM: - case TEXTURE_DATA_FORMAT_R16F: - case TEXTURE_DATA_FORMAT_RG16F: - case TEXTURE_DATA_FORMAT_RGBA16F: - case TEXTURE_DATA_FORMAT_R11F_G11F_B10F: - case TEXTURE_DATA_FORMAT_R4: - case TEXTURE_DATA_FORMAT_RGB9_E5: - case TEXTURE_DATA_FORMAT_R32F: - case TEXTURE_DATA_FORMAT_RG32F: - case TEXTURE_DATA_FORMAT_RGBA32F: - case TEXTURE_DATA_FORMAT_RGB8_ETC2: - case TEXTURE_DATA_FORMAT_RGB8_PUNCHTHROUGH_ALPHA1: - case TEXTURE_DATA_FORMAT_RGBA8_ETC2_EAC: - case TEXTURE_DATA_FORMAT_R11_EAC: - case TEXTURE_DATA_FORMAT_SIGNED_R11_EAC: - case TEXTURE_DATA_FORMAT_RG11_EAC: - case TEXTURE_DATA_FORMAT_SIGNED_RG11_EAC: - return true; - default: - return false; - } -} - /* Some cases of transfer operations are raw data copies that don't depend * on the semantics of the pixel format (no pixel format conversions are * involved). In these cases, it is safe to choose any format supported by @@ -389,7 +64,7 @@ v3dv_tfu_supports_tex_format(const struct v3d_device_info *devinfo, * TFU paths with formats that are not TFU supported otherwise. */ const struct v3dv_format * -v3dv_get_compatible_tfu_format(const struct v3d_device_info *devinfo, +v3dv_get_compatible_tfu_format(struct v3dv_device *device, uint32_t bpp, VkFormat *out_vk_format) { @@ -406,32 +81,15 @@ v3dv_get_compatible_tfu_format(const struct v3d_device_info *devinfo, if (out_vk_format) *out_vk_format = vk_format; - const struct v3dv_format *format = v3dv_get_format(vk_format); - assert(v3dv_tfu_supports_tex_format(devinfo, format->tex_type)); + const struct v3dv_format *format = v3dv_X(device, get_format)(vk_format); + assert(v3dv_X(device, tfu_supports_tex_format)(format->tex_type)); return format; } -static bool -format_supports_blending(const struct v3dv_format *format) -{ - /* Hardware blending is only supported on render targets that are configured - * 4x8-bit unorm, 2x16-bit float or 4x16-bit float. - */ - uint32_t type, bpp; - v3dv_get_internal_type_bpp_for_output_format(format->rt_type, &type, &bpp); - switch (type) { - case V3D_INTERNAL_TYPE_8: - return bpp == V3D_INTERNAL_BPP_32; - case V3D_INTERNAL_TYPE_16F: - return bpp == V3D_INTERNAL_BPP_32 || V3D_INTERNAL_BPP_64; - default: - return false; - } -} - static VkFormatFeatureFlags -image_format_features(VkFormat vk_format, +image_format_features(struct v3dv_physical_device *pdevice, + VkFormat vk_format, const struct v3dv_format *v3dv_format, VkImageTiling tiling) { @@ -476,7 +134,7 @@ image_format_features(VkFormat vk_format, if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) { flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; - if (format_supports_blending(v3dv_format)) + if (v3dv_X(pdevice, format_supports_blending)(v3dv_format)) flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; } else if (aspects & zs_aspects) { flags |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | @@ -556,33 +214,35 @@ buffer_format_features(VkFormat vk_format, const struct v3dv_format *v3dv_format } bool -v3dv_buffer_format_supports_features(VkFormat vk_format, +v3dv_buffer_format_supports_features(struct v3dv_device *device, + VkFormat vk_format, VkFormatFeatureFlags features) { - const struct v3dv_format *v3dv_format = v3dv_get_format(vk_format); + const struct v3dv_format *v3dv_format = v3dv_X(device, get_format)(vk_format); const VkFormatFeatureFlags supported = buffer_format_features(vk_format, v3dv_format); return (supported & features) == features; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_GetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties* pFormatProperties) { - const struct v3dv_format *v3dv_format = v3dv_get_format(format); + V3DV_FROM_HANDLE(v3dv_physical_device, pdevice, physicalDevice); + const struct v3dv_format *v3dv_format = v3dv_X(pdevice, get_format)(format); *pFormatProperties = (VkFormatProperties) { .linearTilingFeatures = - image_format_features(format, v3dv_format, VK_IMAGE_TILING_LINEAR), + image_format_features(pdevice, format, v3dv_format, VK_IMAGE_TILING_LINEAR), .optimalTilingFeatures = - image_format_features(format, v3dv_format, VK_IMAGE_TILING_OPTIMAL), + image_format_features(pdevice, format, v3dv_format, VK_IMAGE_TILING_OPTIMAL), .bufferFeatures = buffer_format_features(format, v3dv_format), }; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties2 *pFormatProperties) @@ -600,12 +260,16 @@ v3dv_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice, vk_outarray_append(&out, mod_props) { mod_props->drmFormatModifier = DRM_FORMAT_MOD_LINEAR; mod_props->drmFormatModifierPlaneCount = 1; + mod_props->drmFormatModifierTilingFeatures = + pFormatProperties->formatProperties.linearTilingFeatures; } } if (pFormatProperties->formatProperties.optimalTilingFeatures) { vk_outarray_append(&out, mod_props) { mod_props->drmFormatModifier = DRM_FORMAT_MOD_BROADCOM_UIF; mod_props->drmFormatModifierPlaneCount = 1; + mod_props->drmFormatModifierTilingFeatures = + pFormatProperties->formatProperties.optimalTilingFeatures; } } break; @@ -625,12 +289,24 @@ get_image_format_properties( VkImageFormatProperties *pImageFormatProperties, VkSamplerYcbcrConversionImageFormatProperties *pYcbcrImageFormatProperties) { - const struct v3dv_format *v3dv_format = v3dv_get_format(info->format); + const struct v3dv_format *v3dv_format = v3dv_X(physical_device, get_format)(info->format); VkFormatFeatureFlags format_feature_flags = - image_format_features(info->format, v3dv_format, tiling); + image_format_features(physical_device, info->format, v3dv_format, tiling); if (!format_feature_flags) goto unsupported; + /* This allows users to create uncompressed views of compressed images, + * however this is not something the hardware supports naturally and requires + * the driver to lie when programming the texture state to make the hardware + * sample with the uncompressed view correctly, and even then, there are + * issues when running on real hardware. + * + * See https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11336 + * for details. + */ + if (info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) + goto unsupported; + if (info->usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) { if (!(format_feature_flags & VK_FORMAT_FEATURE_TRANSFER_SRC_BIT)) { goto unsupported; @@ -775,7 +451,7 @@ static const VkExternalMemoryProperties prime_fd_props = { VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, }; -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_GetPhysicalDeviceImageFormatProperties( VkPhysicalDevice physicalDevice, VkFormat format, @@ -801,7 +477,7 @@ v3dv_GetPhysicalDeviceImageFormatProperties( pImageFormatProperties, NULL); } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceImageFormatInfo2 *base_info, VkImageFormatProperties2 *base_props) @@ -875,7 +551,7 @@ done: return result; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_GetPhysicalDeviceSparseImageFormatProperties( VkPhysicalDevice physicalDevice, VkFormat format, @@ -889,7 +565,7 @@ v3dv_GetPhysicalDeviceSparseImageFormatProperties( *pPropertyCount = 0; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_GetPhysicalDeviceSparseImageFormatProperties2( VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo, @@ -899,7 +575,7 @@ v3dv_GetPhysicalDeviceSparseImageFormatProperties2( *pPropertyCount = 0; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_GetPhysicalDeviceExternalBufferProperties( VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo, diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_image.c b/lib/mesa/src/broadcom/vulkan/v3dv_image.c index 2935d7e8b..d03814d98 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_image.c +++ b/lib/mesa/src/broadcom/vulkan/v3dv_image.c @@ -23,7 +23,6 @@ #include "v3dv_private.h" -#include "broadcom/cle/v3dx_pack.h" #include "drm-uapi/drm_fourcc.h" #include "util/format/u_format.h" #include "util/u_math.h" @@ -77,9 +76,9 @@ v3d_setup_slices(struct v3dv_image *image) { assert(image->cpp > 0); - uint32_t width = image->extent.width; - uint32_t height = image->extent.height; - uint32_t depth = image->extent.depth; + uint32_t width = image->vk.extent.width; + uint32_t height = image->vk.extent.height; + uint32_t depth = image->vk.extent.depth; /* Note that power-of-two padding is based on level 1. These are not * equivalent to just util_next_power_of_two(dimension), because at a @@ -95,21 +94,21 @@ v3d_setup_slices(struct v3dv_image *image) uint32_t uif_block_w = utile_w * 2; uint32_t uif_block_h = utile_h * 2; - uint32_t block_width = vk_format_get_blockwidth(image->vk_format); - uint32_t block_height = vk_format_get_blockheight(image->vk_format); + uint32_t block_width = vk_format_get_blockwidth(image->vk.format); + uint32_t block_height = vk_format_get_blockheight(image->vk.format); - assert(image->samples == VK_SAMPLE_COUNT_1_BIT || - image->samples == VK_SAMPLE_COUNT_4_BIT); - bool msaa = image->samples != VK_SAMPLE_COUNT_1_BIT; + assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT || + image->vk.samples == VK_SAMPLE_COUNT_4_BIT); + bool msaa = image->vk.samples != VK_SAMPLE_COUNT_1_BIT; bool uif_top = msaa; - assert(image->array_size > 0); + assert(image->vk.array_layers > 0); assert(depth > 0); - assert(image->levels >= 1); + assert(image->vk.mip_levels >= 1); uint32_t offset = 0; - for (int32_t i = image->levels - 1; i >= 0; i--) { + for (int32_t i = image->vk.mip_levels - 1; i >= 0; i--) { struct v3d_resource_slice *slice = &image->slices[i]; uint32_t level_width, level_height, level_depth; @@ -135,21 +134,21 @@ v3d_setup_slices(struct v3dv_image *image) level_height = DIV_ROUND_UP(level_height, block_height); if (!image->tiled) { - slice->tiling = VC5_TILING_RASTER; - if (image->type == VK_IMAGE_TYPE_1D) + slice->tiling = V3D_TILING_RASTER; + if (image->vk.image_type == VK_IMAGE_TYPE_1D) level_width = align(level_width, 64 / image->cpp); } else { if ((i != 0 || !uif_top) && (level_width <= utile_w || level_height <= utile_h)) { - slice->tiling = VC5_TILING_LINEARTILE; + slice->tiling = V3D_TILING_LINEARTILE; level_width = align(level_width, utile_w); level_height = align(level_height, utile_h); } else if ((i != 0 || !uif_top) && level_width <= uif_block_w) { - slice->tiling = VC5_TILING_UBLINEAR_1_COLUMN; + slice->tiling = V3D_TILING_UBLINEAR_1_COLUMN; level_width = align(level_width, uif_block_w); level_height = align(level_height, uif_block_h); } else if ((i != 0 || !uif_top) && level_width <= 2 * uif_block_w) { - slice->tiling = VC5_TILING_UBLINEAR_2_COLUMN; + slice->tiling = V3D_TILING_UBLINEAR_2_COLUMN; level_width = align(level_width, 2 * uif_block_w); level_height = align(level_height, uif_block_h); } else { @@ -167,10 +166,10 @@ v3d_setup_slices(struct v3dv_image *image) * perfectly misaligned. */ if ((level_height / uif_block_h) % - (VC5_PAGE_CACHE_SIZE / VC5_UIFBLOCK_ROW_SIZE) == 0) { - slice->tiling = VC5_TILING_UIF_XOR; + (V3D_PAGE_CACHE_SIZE / V3D_UIFBLOCK_ROW_SIZE) == 0) { + slice->tiling = V3D_TILING_UIF_XOR; } else { - slice->tiling = VC5_TILING_UIF_NO_XOR; + slice->tiling = V3D_TILING_UIF_NO_XOR; } } } @@ -178,8 +177,8 @@ v3d_setup_slices(struct v3dv_image *image) slice->offset = offset; slice->stride = level_width * image->cpp; slice->padded_height = level_height; - if (slice->tiling == VC5_TILING_UIF_NO_XOR || - slice->tiling == VC5_TILING_UIF_XOR) { + if (slice->tiling == V3D_TILING_UIF_NO_XOR || + slice->tiling == V3D_TILING_UIF_XOR) { slice->padded_height_of_output_image_in_uif_blocks = slice->padded_height / (2 * v3d_utile_height(image->cpp)); } @@ -195,7 +194,7 @@ v3d_setup_slices(struct v3dv_image *image) if (i == 1 && level_width > 4 * uif_block_w && level_height > PAGE_CACHE_MINUS_1_5_UB_ROWS * uif_block_h) { - slice_total_size = align(slice_total_size, VC5_UIFCFG_PAGE_SIZE); + slice_total_size = align(slice_total_size, V3D_UIFCFG_PAGE_SIZE); } offset += slice_total_size; @@ -211,13 +210,12 @@ v3d_setup_slices(struct v3dv_image *image) * * We additionally align to 4k, which improves UIF XOR performance. */ - image->alignment = - image->tiling == VK_IMAGE_TILING_LINEAR ? image->cpp : 4096; + image->alignment = image->tiled ? 4096 : image->cpp; uint32_t align_offset = align(image->slices[0].offset, image->alignment) - image->slices[0].offset; if (align_offset) { image->size += align_offset; - for (int i = 0; i < image->levels; i++) + for (int i = 0; i < image->vk.mip_levels; i++) image->slices[i].offset += align_offset; } @@ -225,10 +223,10 @@ v3d_setup_slices(struct v3dv_image *image) * one full mipmap tree to the next (64b aligned). For 3D textures, * we need to program the stride between slices of miplevel 0. */ - if (image->type != VK_IMAGE_TYPE_3D) { + if (image->vk.image_type != VK_IMAGE_TYPE_3D) { image->cube_map_stride = align(image->slices[0].offset + image->slices[0].size, 64); - image->size += image->cube_map_stride * (image->array_size - 1); + image->size += image->cube_map_stride * (image->vk.array_layers - 1); } else { image->cube_map_stride = image->slices[0].size; } @@ -239,29 +237,23 @@ v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer { const struct v3d_resource_slice *slice = &image->slices[level]; - if (image->type == VK_IMAGE_TYPE_3D) + if (image->vk.image_type == VK_IMAGE_TYPE_3D) return image->mem_offset + slice->offset + layer * slice->size; else return image->mem_offset + slice->offset + layer * image->cube_map_stride; } -VkResult -v3dv_CreateImage(VkDevice _device, - const VkImageCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkImage *pImage) +static VkResult +create_image(struct v3dv_device *device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImage *pImage) { - V3DV_FROM_HANDLE(v3dv_device, device, _device); struct v3dv_image *image = NULL; - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); - - v3dv_assert(pCreateInfo->mipLevels > 0); - v3dv_assert(pCreateInfo->arrayLayers > 0); - v3dv_assert(pCreateInfo->samples > 0); - v3dv_assert(pCreateInfo->extent.width > 0); - v3dv_assert(pCreateInfo->extent.height > 0); - v3dv_assert(pCreateInfo->extent.depth > 0); + image = vk_image_create(&device->vk, pCreateInfo, pAllocator, sizeof(*image)); + if (image == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); /* When using the simulator the WSI common code will see that our * driver wsi device doesn't match the display device and because of that @@ -272,68 +264,60 @@ v3dv_CreateImage(VkDevice _device, * As a result, on that path, swapchain images do not have any special * requirements and are not created with the pNext structs below. */ + VkImageTiling tiling = pCreateInfo->tiling; uint64_t modifier = DRM_FORMAT_MOD_INVALID; - if (pCreateInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { + if (tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { const VkImageDrmFormatModifierListCreateInfoEXT *mod_info = vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT); - assert(mod_info); - for (uint32_t i = 0; i < mod_info->drmFormatModifierCount; i++) { - switch (mod_info->pDrmFormatModifiers[i]) { - case DRM_FORMAT_MOD_LINEAR: - if (modifier == DRM_FORMAT_MOD_INVALID) - modifier = DRM_FORMAT_MOD_LINEAR; - break; - case DRM_FORMAT_MOD_BROADCOM_UIF: - modifier = DRM_FORMAT_MOD_BROADCOM_UIF; - break; + const VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod_info = + vk_find_struct_const(pCreateInfo->pNext, + IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT); + assert(mod_info || explicit_mod_info); + + if (mod_info) { + for (uint32_t i = 0; i < mod_info->drmFormatModifierCount; i++) { + switch (mod_info->pDrmFormatModifiers[i]) { + case DRM_FORMAT_MOD_LINEAR: + if (modifier == DRM_FORMAT_MOD_INVALID) + modifier = DRM_FORMAT_MOD_LINEAR; + break; + case DRM_FORMAT_MOD_BROADCOM_UIF: + modifier = DRM_FORMAT_MOD_BROADCOM_UIF; + break; + } } + } else { + modifier = explicit_mod_info->drmFormatModifier; } - } else { - const struct wsi_image_create_info *wsi_info = - vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA); - if (wsi_info) - modifier = DRM_FORMAT_MOD_LINEAR; - } - - /* 1D and 1D_ARRAY textures are always raster-order */ - VkImageTiling tiling; - if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D) - tiling = VK_IMAGE_TILING_LINEAR; - else if (modifier == DRM_FORMAT_MOD_INVALID) - tiling = pCreateInfo->tiling; - else if (modifier == DRM_FORMAT_MOD_BROADCOM_UIF) - tiling = VK_IMAGE_TILING_OPTIMAL; - else + assert(modifier == DRM_FORMAT_MOD_LINEAR || + modifier == DRM_FORMAT_MOD_BROADCOM_UIF); + } else if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D || + image->vk.wsi_legacy_scanout) { tiling = VK_IMAGE_TILING_LINEAR; + } - const struct v3dv_format *format = v3dv_get_format(pCreateInfo->format); + const struct v3dv_format *format = + v3dv_X(device, get_format)(pCreateInfo->format); v3dv_assert(format != NULL && format->supported); - image = vk_object_zalloc(&device->vk, pAllocator, sizeof(*image), - VK_OBJECT_TYPE_IMAGE); - if (!image) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - assert(pCreateInfo->samples == VK_SAMPLE_COUNT_1_BIT || pCreateInfo->samples == VK_SAMPLE_COUNT_4_BIT); - image->type = pCreateInfo->imageType; - image->extent = pCreateInfo->extent; - image->vk_format = pCreateInfo->format; image->format = format; - image->aspects = vk_format_aspects(image->vk_format); - image->levels = pCreateInfo->mipLevels; - image->array_size = pCreateInfo->arrayLayers; - image->samples = pCreateInfo->samples; - image->usage = pCreateInfo->usage; - image->flags = pCreateInfo->flags; + image->cpp = vk_format_get_blocksize(image->vk.format); + image->tiled = tiling == VK_IMAGE_TILING_OPTIMAL || + (tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && + modifier != DRM_FORMAT_MOD_LINEAR); - image->drm_format_mod = modifier; - image->tiling = tiling; - image->tiled = tiling == VK_IMAGE_TILING_OPTIMAL; + image->vk.tiling = tiling; + image->vk.drm_format_mod = modifier; - image->cpp = vk_format_get_blocksize(image->vk_format); + /* Our meta paths can create image views with compatible formats for any + * image, so always set this flag to keep the common Vulkan image code + * happy. + */ + image->vk.create_flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; v3d_setup_slices(image); @@ -342,7 +326,71 @@ v3dv_CreateImage(VkDevice _device, return VK_SUCCESS; } -void +static VkResult +create_image_from_swapchain(struct v3dv_device *device, + const VkImageCreateInfo *pCreateInfo, + const VkImageSwapchainCreateInfoKHR *swapchain_info, + const VkAllocationCallbacks *pAllocator, + VkImage *pImage) +{ + struct v3dv_image *swapchain_image = + v3dv_wsi_get_image_from_swapchain(swapchain_info->swapchain, 0); + assert(swapchain_image); + + VkImageCreateInfo local_create_info = *pCreateInfo; + local_create_info.pNext = NULL; + + /* Added by wsi code. */ + local_create_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + /* The spec requires TILING_OPTIMAL as input, but the swapchain image may + * privately use a different tiling. See spec anchor + * #swapchain-wsi-image-create-info . + */ + assert(local_create_info.tiling == VK_IMAGE_TILING_OPTIMAL); + local_create_info.tiling = swapchain_image->vk.tiling; + + VkImageDrmFormatModifierListCreateInfoEXT local_modifier_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT, + .drmFormatModifierCount = 1, + .pDrmFormatModifiers = &swapchain_image->vk.drm_format_mod, + }; + + if (swapchain_image->vk.drm_format_mod != DRM_FORMAT_MOD_INVALID) + __vk_append_struct(&local_create_info, &local_modifier_info); + + assert(swapchain_image->vk.image_type == local_create_info.imageType); + assert(swapchain_image->vk.format == local_create_info.format); + assert(swapchain_image->vk.extent.width == local_create_info.extent.width); + assert(swapchain_image->vk.extent.height == local_create_info.extent.height); + assert(swapchain_image->vk.extent.depth == local_create_info.extent.depth); + assert(swapchain_image->vk.array_layers == local_create_info.arrayLayers); + assert(swapchain_image->vk.samples == local_create_info.samples); + assert(swapchain_image->vk.tiling == local_create_info.tiling); + assert((swapchain_image->vk.usage & local_create_info.usage) == + local_create_info.usage); + + return create_image(device, &local_create_info, pAllocator, pImage); +} + +VKAPI_ATTR VkResult VKAPI_CALL +v3dv_CreateImage(VkDevice _device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImage *pImage) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + + const VkImageSwapchainCreateInfoKHR *swapchain_info = + vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR); + if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) + return create_image_from_swapchain(device, pCreateInfo, swapchain_info, + pAllocator, pImage); + + return create_image(device, pCreateInfo, pAllocator, pImage); +} + +VKAPI_ATTR void VKAPI_CALL v3dv_GetImageSubresourceLayout(VkDevice device, VkImage _image, const VkImageSubresource *subresource, @@ -358,7 +406,7 @@ v3dv_GetImageSubresourceLayout(VkDevice device, layout->depthPitch = image->cube_map_stride; layout->arrayPitch = image->cube_map_stride; - if (image->type != VK_IMAGE_TYPE_3D) { + if (image->vk.image_type != VK_IMAGE_TYPE_3D) { layout->size = slice->size; } else { /* For 3D images, the size of the slice represents the size of a 2D slice @@ -368,7 +416,7 @@ v3dv_GetImageSubresourceLayout(VkDevice device, * arranged in memory from last to first). */ if (subresource->mipLevel == 0) { - layout->size = slice->size * image->extent.depth; + layout->size = slice->size * image->vk.extent.depth; } else { const struct v3d_resource_slice *prev_slice = &image->slices[subresource->mipLevel - 1]; @@ -377,23 +425,7 @@ v3dv_GetImageSubresourceLayout(VkDevice device, } } -VkResult -v3dv_GetImageDrmFormatModifierPropertiesEXT( - VkDevice device, - VkImage _image, - VkImageDrmFormatModifierPropertiesEXT *pProperties) -{ - V3DV_FROM_HANDLE(v3dv_image, image, _image); - - assert(pProperties->sType == - VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT); - - pProperties->drmFormatModifier = image->drm_format_mod; - - return VK_SUCCESS; -} - -void +VKAPI_ATTR void VKAPI_CALL v3dv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks* pAllocator) @@ -404,7 +436,7 @@ v3dv_DestroyImage(VkDevice _device, if (image == NULL) return; - vk_object_free(&device->vk, pAllocator, image); + vk_image_destroy(&device->vk, pAllocator, &image->vk); } VkImageViewType @@ -419,138 +451,10 @@ v3dv_image_type_to_view_type(VkImageType type) } } -/* - * This method translates pipe_swizzle to the swizzle values used at the - * packet TEXTURE_SHADER_STATE - * - * FIXME: C&P from v3d, common place? - */ -static uint32_t -translate_swizzle(unsigned char pipe_swizzle) -{ - switch (pipe_swizzle) { - case PIPE_SWIZZLE_0: - return 0; - case PIPE_SWIZZLE_1: - return 1; - case PIPE_SWIZZLE_X: - case PIPE_SWIZZLE_Y: - case PIPE_SWIZZLE_Z: - case PIPE_SWIZZLE_W: - return 2 + pipe_swizzle; - default: - unreachable("unknown swizzle"); - } -} - -/* - * Packs and ensure bo for the shader state (the latter can be temporal). - */ -static void -pack_texture_shader_state_helper(struct v3dv_device *device, - struct v3dv_image_view *image_view, - bool for_cube_map_array_storage) -{ - assert(!for_cube_map_array_storage || - image_view->type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY); - const uint32_t index = for_cube_map_array_storage ? 1 : 0; - - assert(image_view->image); - const struct v3dv_image *image = image_view->image; - - assert(image->samples == VK_SAMPLE_COUNT_1_BIT || - image->samples == VK_SAMPLE_COUNT_4_BIT); - const uint32_t msaa_scale = image->samples == VK_SAMPLE_COUNT_1_BIT ? 1 : 2; - - v3dv_pack(image_view->texture_shader_state[index], TEXTURE_SHADER_STATE, tex) { - - tex.level_0_is_strictly_uif = - (image->slices[0].tiling == VC5_TILING_UIF_XOR || - image->slices[0].tiling == VC5_TILING_UIF_NO_XOR); - - tex.level_0_xor_enable = (image->slices[0].tiling == VC5_TILING_UIF_XOR); - - if (tex.level_0_is_strictly_uif) - tex.level_0_ub_pad = image->slices[0].ub_pad; - - /* FIXME: v3d never sets uif_xor_disable, but uses it on the following - * check so let's set the default value - */ - tex.uif_xor_disable = false; - if (tex.uif_xor_disable || - tex.level_0_is_strictly_uif) { - tex.extended = true; - } - - tex.base_level = image_view->base_level; - tex.max_level = image_view->max_level; - - tex.swizzle_r = translate_swizzle(image_view->swizzle[0]); - tex.swizzle_g = translate_swizzle(image_view->swizzle[1]); - tex.swizzle_b = translate_swizzle(image_view->swizzle[2]); - tex.swizzle_a = translate_swizzle(image_view->swizzle[3]); - - tex.texture_type = image_view->format->tex_type; - - if (image->type == VK_IMAGE_TYPE_3D) { - tex.image_depth = image->extent.depth; - } else { - tex.image_depth = (image_view->last_layer - image_view->first_layer) + 1; - } - - /* Empirical testing with CTS shows that when we are sampling from cube - * arrays we want to set image depth to layers / 6, but not when doing - * image load/store. - */ - if (image_view->type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY && - !for_cube_map_array_storage) { - assert(tex.image_depth % 6 == 0); - tex.image_depth /= 6; - } - - tex.image_height = image->extent.height * msaa_scale; - tex.image_width = image->extent.width * msaa_scale; - - /* On 4.x, the height of a 1D texture is redefined to be the - * upper 14 bits of the width (which is only usable with txf). - */ - if (image->type == VK_IMAGE_TYPE_1D) { - tex.image_height = tex.image_width >> 14; - } - tex.image_width &= (1 << 14) - 1; - tex.image_height &= (1 << 14) - 1; - - tex.array_stride_64_byte_aligned = image->cube_map_stride / 64; - - tex.srgb = vk_format_is_srgb(image_view->vk_format); - - /* At this point we don't have the job. That's the reason the first - * parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to - * add the bo to the job. This also means that we need to add manually - * the image bo to the job using the texture. - */ - const uint32_t base_offset = - image->mem->bo->offset + - v3dv_layer_offset(image, 0, image_view->first_layer); - tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset); - } -} - -static void -pack_texture_shader_state(struct v3dv_device *device, - struct v3dv_image_view *iview) -{ - pack_texture_shader_state_helper(device, iview, false); - if (iview->type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) - pack_texture_shader_state_helper(device, iview, true); -} - static enum pipe_swizzle -vk_component_mapping_to_pipe_swizzle(VkComponentSwizzle comp, - VkComponentSwizzle swz) +vk_component_mapping_to_pipe_swizzle(VkComponentSwizzle swz) { - if (swz == VK_COMPONENT_SWIZZLE_IDENTITY) - swz = comp; + assert(swz != VK_COMPONENT_SWIZZLE_IDENTITY); switch (swz) { case VK_COMPONENT_SWIZZLE_ZERO: @@ -570,7 +474,7 @@ vk_component_mapping_to_pipe_swizzle(VkComponentSwizzle comp, }; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, @@ -580,56 +484,15 @@ v3dv_CreateImageView(VkDevice _device, V3DV_FROM_HANDLE(v3dv_image, image, pCreateInfo->image); struct v3dv_image_view *iview; - iview = vk_object_zalloc(&device->vk, pAllocator, sizeof(*iview), - VK_OBJECT_TYPE_IMAGE_VIEW); + iview = vk_image_view_create(&device->vk, pCreateInfo, pAllocator, + sizeof(*iview)); if (iview == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - assert(range->layerCount > 0); - assert(range->baseMipLevel < image->levels); - -#ifdef DEBUG - switch (image->type) { - case VK_IMAGE_TYPE_1D: - case VK_IMAGE_TYPE_2D: - assert(range->baseArrayLayer + v3dv_layer_count(image, range) - 1 <= - image->array_size); - break; - case VK_IMAGE_TYPE_3D: - assert(range->baseArrayLayer + v3dv_layer_count(image, range) - 1 - <= u_minify(image->extent.depth, range->baseMipLevel)); - /* VK_KHR_maintenance1 */ - assert(pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D || - ((image->flags & VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT) && - range->levelCount == 1 && range->layerCount == 1)); - assert(pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D_ARRAY || - ((image->flags & VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT) && - range->levelCount == 1)); - break; - default: - unreachable("bad VkImageType"); - } -#endif - - iview->image = image; - iview->aspects = range->aspectMask; - iview->type = pCreateInfo->viewType; - - iview->base_level = range->baseMipLevel; - iview->max_level = iview->base_level + v3dv_level_count(image, range) - 1; - iview->extent = (VkExtent3D) { - .width = u_minify(image->extent.width , iview->base_level), - .height = u_minify(image->extent.height, iview->base_level), - .depth = u_minify(image->extent.depth , iview->base_level), - }; - - iview->first_layer = range->baseArrayLayer; - iview->last_layer = range->baseArrayLayer + - v3dv_layer_count(image, range) - 1; - iview->offset = - v3dv_layer_offset(image, iview->base_level, iview->first_layer); + iview->offset = v3dv_layer_offset(image, iview->vk.base_mip_level, + iview->vk.base_array_layer); /* If we have D24S8 format but the view only selects the stencil aspect * we want to re-interpret the format as RGBA8_UINT, then map our stencil @@ -653,44 +516,40 @@ v3dv_CreateImageView(VkDevice _device, * better to reimplement the latter using vk component */ image_view_swizzle[0] = - vk_component_mapping_to_pipe_swizzle(VK_COMPONENT_SWIZZLE_R, - pCreateInfo->components.r); + vk_component_mapping_to_pipe_swizzle(iview->vk.swizzle.r); image_view_swizzle[1] = - vk_component_mapping_to_pipe_swizzle(VK_COMPONENT_SWIZZLE_G, - pCreateInfo->components.g); + vk_component_mapping_to_pipe_swizzle(iview->vk.swizzle.g); image_view_swizzle[2] = - vk_component_mapping_to_pipe_swizzle(VK_COMPONENT_SWIZZLE_B, - pCreateInfo->components.b); + vk_component_mapping_to_pipe_swizzle(iview->vk.swizzle.b); image_view_swizzle[3] = - vk_component_mapping_to_pipe_swizzle(VK_COMPONENT_SWIZZLE_A, - pCreateInfo->components.a); + vk_component_mapping_to_pipe_swizzle(iview->vk.swizzle.a); } - iview->vk_format = format; - iview->format = v3dv_get_format(format); + iview->vk.format = format; + iview->format = v3dv_X(device, get_format)(format); assert(iview->format && iview->format->supported); - if (vk_format_is_depth_or_stencil(iview->vk_format)) { - iview->internal_type = v3dv_get_internal_depth_type(iview->vk_format); + if (vk_format_is_depth_or_stencil(iview->vk.format)) { + iview->internal_type = + v3dv_X(device, get_internal_depth_type)(iview->vk.format); } else { - v3dv_get_internal_type_bpp_for_output_format(iview->format->rt_type, - &iview->internal_type, - &iview->internal_bpp); + v3dv_X(device, get_internal_type_bpp_for_output_format) + (iview->format->rt_type, &iview->internal_type, &iview->internal_bpp); } - const uint8_t *format_swizzle = v3dv_get_format_swizzle(format); + const uint8_t *format_swizzle = v3dv_get_format_swizzle(device, format); util_format_compose_swizzles(format_swizzle, image_view_swizzle, iview->swizzle); iview->swap_rb = iview->swizzle[0] == PIPE_SWIZZLE_Z; - pack_texture_shader_state(device, iview); + v3dv_X(device, pack_texture_shader_state)(device, iview); *pView = v3dv_image_view_to_handle(iview); return VK_SUCCESS; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_DestroyImageView(VkDevice _device, VkImageView imageView, const VkAllocationCallbacks* pAllocator) @@ -701,52 +560,10 @@ v3dv_DestroyImageView(VkDevice _device, if (image_view == NULL) return; - vk_object_free(&device->vk, pAllocator, image_view); -} - -static void -pack_texture_shader_state_from_buffer_view(struct v3dv_device *device, - struct v3dv_buffer_view *buffer_view) -{ - assert(buffer_view->buffer); - const struct v3dv_buffer *buffer = buffer_view->buffer; - - v3dv_pack(buffer_view->texture_shader_state, TEXTURE_SHADER_STATE, tex) { - tex.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X); - tex.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y); - tex.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z); - tex.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W); - - tex.image_depth = 1; - - /* On 4.x, the height of a 1D texture is redefined to be the upper 14 - * bits of the width (which is only usable with txf) (or in other words, - * we are providing a 28 bit field for size, but split on the usual - * 14bit height/width). - */ - tex.image_width = buffer_view->num_elements; - tex.image_height = tex.image_width >> 14; - tex.image_width &= (1 << 14) - 1; - tex.image_height &= (1 << 14) - 1; - - tex.texture_type = buffer_view->format->tex_type; - tex.srgb = vk_format_is_srgb(buffer_view->vk_format); - - /* At this point we don't have the job. That's the reason the first - * parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to - * add the bo to the job. This also means that we need to add manually - * the image bo to the job using the texture. - */ - const uint32_t base_offset = - buffer->mem->bo->offset + - buffer->mem_offset + - buffer_view->offset; - - tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset); - } + vk_image_view_destroy(&device->vk, pAllocator, &image_view->vk); } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateBufferView(VkDevice _device, const VkBufferViewCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, @@ -754,14 +571,14 @@ v3dv_CreateBufferView(VkDevice _device, { V3DV_FROM_HANDLE(v3dv_device, device, _device); - const struct v3dv_buffer *buffer = + struct v3dv_buffer *buffer = v3dv_buffer_from_handle(pCreateInfo->buffer); struct v3dv_buffer_view *view = vk_object_zalloc(&device->vk, pAllocator, sizeof(*view), VK_OBJECT_TYPE_BUFFER_VIEW); if (!view) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); uint32_t range; if (pCreateInfo->range == VK_WHOLE_SIZE) @@ -777,22 +594,21 @@ v3dv_CreateBufferView(VkDevice _device, view->size = view->offset + range; view->num_elements = num_elements; view->vk_format = pCreateInfo->format; - view->format = v3dv_get_format(view->vk_format); + view->format = v3dv_X(device, get_format)(view->vk_format); - v3dv_get_internal_type_bpp_for_output_format(view->format->rt_type, - &view->internal_type, - &view->internal_bpp); + v3dv_X(device, get_internal_type_bpp_for_output_format) + (view->format->rt_type, &view->internal_type, &view->internal_bpp); if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT || buffer->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) - pack_texture_shader_state_from_buffer_view(device, view); + v3dv_X(device, pack_texture_shader_state_from_buffer_view)(device, view); *pView = v3dv_buffer_view_to_handle(view); return VK_SUCCESS; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_DestroyBufferView(VkDevice _device, VkBufferView bufferView, const VkAllocationCallbacks *pAllocator) diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_limits.h b/lib/mesa/src/broadcom/vulkan/v3dv_limits.h index a5ddb66e4..aaab1ce03 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_limits.h +++ b/lib/mesa/src/broadcom/vulkan/v3dv_limits.h @@ -44,7 +44,7 @@ #define MAX_INPUT_ATTACHMENTS 4 #define MAX_UNIFORM_BUFFERS 12 -#define MAX_STORAGE_BUFFERS 4 +#define MAX_STORAGE_BUFFERS 8 #define MAX_DYNAMIC_UNIFORM_BUFFERS 8 #define MAX_DYNAMIC_STORAGE_BUFFERS 4 @@ -53,21 +53,22 @@ #define MAX_RENDER_TARGETS 4 +#define MAX_MULTIVIEW_VIEW_COUNT 16 + /* These are tunable parameters in the HW design, but all the V3D * implementations agree. */ -#define VC5_UIFCFG_BANKS 8 -#define VC5_UIFCFG_PAGE_SIZE 4096 -#define VC5_UIFCFG_XOR_VALUE (1 << 4) -#define VC5_PAGE_CACHE_SIZE (VC5_UIFCFG_PAGE_SIZE * VC5_UIFCFG_BANKS) -#define VC5_UBLOCK_SIZE 64 -#define VC5_UIFBLOCK_SIZE (4 * VC5_UBLOCK_SIZE) -#define VC5_UIFBLOCK_ROW_SIZE (4 * VC5_UIFBLOCK_SIZE) +#define V3D_UIFCFG_BANKS 8 +#define V3D_UIFCFG_PAGE_SIZE 4096 +#define V3D_UIFCFG_XOR_VALUE (1 << 4) +#define V3D_PAGE_CACHE_SIZE (V3D_UIFCFG_PAGE_SIZE * V3D_UIFCFG_BANKS) +#define V3D_UBLOCK_SIZE 64 +#define V3D_UIFBLOCK_SIZE (4 * V3D_UBLOCK_SIZE) +#define V3D_UIFBLOCK_ROW_SIZE (4 * V3D_UIFBLOCK_SIZE) -#define PAGE_UB_ROWS (VC5_UIFCFG_PAGE_SIZE / VC5_UIFBLOCK_ROW_SIZE) +#define PAGE_UB_ROWS (V3D_UIFCFG_PAGE_SIZE / V3D_UIFBLOCK_ROW_SIZE) #define PAGE_UB_ROWS_TIMES_1_5 ((PAGE_UB_ROWS * 3) >> 1) -#define PAGE_CACHE_UB_ROWS (VC5_PAGE_CACHE_SIZE / VC5_UIFBLOCK_ROW_SIZE) +#define PAGE_CACHE_UB_ROWS (V3D_PAGE_CACHE_SIZE / V3D_UIFBLOCK_ROW_SIZE) #define PAGE_CACHE_MINUS_1_5_UB_ROWS (PAGE_CACHE_UB_ROWS - PAGE_UB_ROWS_TIMES_1_5) - #endif /* V3DV_LIMITS_H */ diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_meta_clear.c b/lib/mesa/src/broadcom/vulkan/v3dv_meta_clear.c index 0a38edb21..5555c690b 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_meta_clear.c +++ b/lib/mesa/src/broadcom/vulkan/v3dv_meta_clear.c @@ -22,13 +22,175 @@ */ #include "v3dv_private.h" +#include "v3dv_meta_common.h" -#include "broadcom/cle/v3dx_pack.h" #include "compiler/nir/nir_builder.h" #include "vk_format_info.h" #include "util/u_pack_color.h" static void +get_hw_clear_color(struct v3dv_device *device, + const VkClearColorValue *color, + VkFormat fb_format, + VkFormat image_format, + uint32_t internal_type, + uint32_t internal_bpp, + uint32_t *hw_color) +{ + const uint32_t internal_size = 4 << internal_bpp; + + /* If the image format doesn't match the framebuffer format, then we are + * trying to clear an unsupported tlb format using a compatible + * format for the framebuffer. In this case, we want to make sure that + * we pack the clear value according to the original format semantics, + * not the compatible format. + */ + if (fb_format == image_format) { + v3dv_X(device, get_hw_clear_color)(color, internal_type, internal_size, + hw_color); + } else { + union util_color uc; + enum pipe_format pipe_image_format = + vk_format_to_pipe_format(image_format); + util_pack_color(color->float32, pipe_image_format, &uc); + memcpy(hw_color, uc.ui, internal_size); + } +} + +/* Returns true if the implementation is able to handle the case, false + * otherwise. +*/ +static bool +clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_image *image, + const VkClearValue *clear_value, + const VkImageSubresourceRange *range) +{ + const VkOffset3D origin = { 0, 0, 0 }; + VkFormat fb_format; + if (!v3dv_meta_can_use_tlb(image, &origin, &fb_format)) + return false; + + uint32_t internal_type, internal_bpp; + v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects) + (fb_format, range->aspectMask, + &internal_type, &internal_bpp); + + union v3dv_clear_value hw_clear_value = { 0 }; + if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + get_hw_clear_color(cmd_buffer->device, &clear_value->color, fb_format, + image->vk.format, internal_type, internal_bpp, + &hw_clear_value.color[0]); + } else { + assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) || + (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)); + hw_clear_value.z = clear_value->depthStencil.depth; + hw_clear_value.s = clear_value->depthStencil.stencil; + } + + uint32_t level_count = vk_image_subresource_level_count(&image->vk, range); + uint32_t min_level = range->baseMipLevel; + uint32_t max_level = range->baseMipLevel + level_count; + + /* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively. + * Instead, we need to consider the full depth dimension of the image, which + * goes from 0 up to the level's depth extent. + */ + uint32_t min_layer; + uint32_t max_layer; + if (image->vk.image_type != VK_IMAGE_TYPE_3D) { + min_layer = range->baseArrayLayer; + max_layer = range->baseArrayLayer + + vk_image_subresource_layer_count(&image->vk, range); + } else { + min_layer = 0; + max_layer = 0; + } + + for (uint32_t level = min_level; level < max_level; level++) { + if (image->vk.image_type == VK_IMAGE_TYPE_3D) + max_layer = u_minify(image->vk.extent.depth, level); + + uint32_t width = u_minify(image->vk.extent.width, level); + uint32_t height = u_minify(image->vk.extent.height, level); + + struct v3dv_job *job = + v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); + + if (!job) + return true; + + v3dv_job_start_frame(job, width, height, max_layer, false, + 1, internal_bpp, + image->vk.samples > VK_SAMPLE_COUNT_1_BIT); + + struct v3dv_meta_framebuffer framebuffer; + v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format, + internal_type, + &job->frame_tiling); + + v3dv_X(job->device, job_emit_binning_flush)(job); + + /* If this triggers it is an application bug: the spec requires + * that any aspects to clear are present in the image. + */ + assert(range->aspectMask & image->vk.aspects); + + v3dv_X(job->device, meta_emit_clear_image_rcl) + (job, image, &framebuffer, &hw_clear_value, + range->aspectMask, min_layer, max_layer, level); + + v3dv_cmd_buffer_finish_job(cmd_buffer); + } + + return true; +} + +VKAPI_ATTR void VKAPI_CALL +v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer, + VkImage _image, + VkImageLayout imageLayout, + const VkClearColorValue *pColor, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) +{ + V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); + V3DV_FROM_HANDLE(v3dv_image, image, _image); + + const VkClearValue clear_value = { + .color = *pColor, + }; + + for (uint32_t i = 0; i < rangeCount; i++) { + if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i])) + continue; + unreachable("Unsupported color clear."); + } +} + +VKAPI_ATTR void VKAPI_CALL +v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, + VkImage _image, + VkImageLayout imageLayout, + const VkClearDepthStencilValue *pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) +{ + V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); + V3DV_FROM_HANDLE(v3dv_image, image, _image); + + const VkClearValue clear_value = { + .depthStencil = *pDepthStencil, + }; + + for (uint32_t i = 0; i < rangeCount; i++) { + if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i])) + continue; + unreachable("Unsupported depth/stencil clear."); + } +} + +static void destroy_color_clear_pipeline(VkDevice _device, uint64_t pipeline, VkAllocationCallbacks *alloc) @@ -54,12 +216,20 @@ static VkResult create_color_clear_pipeline_layout(struct v3dv_device *device, VkPipelineLayout *pipeline_layout) { + /* FIXME: this is abusing a bit the API, since not all of our clear + * pipelines have a geometry shader. We could create 2 different pipeline + * layouts, but this works for us for now. + */ + VkPushConstantRange ranges[2] = { + { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16 }, + { VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4 }, + }; + VkPipelineLayoutCreateInfo info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 0, - .pushConstantRangeCount = 1, - .pPushConstantRanges = - &(VkPushConstantRange) { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16 }, + .pushConstantRangeCount = 2, + .pPushConstantRanges = ranges, }; return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device), @@ -70,12 +240,20 @@ static VkResult create_depth_clear_pipeline_layout(struct v3dv_device *device, VkPipelineLayout *pipeline_layout) { + /* FIXME: this is abusing a bit the API, since not all of our clear + * pipelines have a geometry shader. We could create 2 different pipeline + * layouts, but this works for us for now. + */ + VkPushConstantRange ranges[2] = { + { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4 }, + { VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4 }, + }; + VkPipelineLayoutCreateInfo info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 0, - .pushConstantRangeCount = 1, - .pPushConstantRanges = - &(VkPushConstantRange) { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4 }, + .pushConstantRangeCount = 2, + .pPushConstantRanges = ranges }; return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device), @@ -178,6 +356,70 @@ get_clear_rect_vs() } static nir_shader * +get_clear_rect_gs(uint32_t push_constant_layer_base) +{ + /* FIXME: this creates a geometry shader that takes the index of a single + * layer to clear from push constants, so we need to emit a draw call for + * each layer that we want to clear. We could actually do better and have it + * take a range of layers and then emit one triangle per layer to clear, + * however, if we were to do this we would need to be careful not to exceed + * the maximum number of output vertices allowed in a geometry shader. + */ + const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options(); + nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options, + "meta clear gs"); + nir_shader *nir = b.shader; + nir->info.inputs_read = 1ull << VARYING_SLOT_POS; + nir->info.outputs_written = (1ull << VARYING_SLOT_POS) | + (1ull << VARYING_SLOT_LAYER); + nir->info.gs.input_primitive = GL_TRIANGLES; + nir->info.gs.output_primitive = GL_TRIANGLE_STRIP; + nir->info.gs.vertices_in = 3; + nir->info.gs.vertices_out = 3; + nir->info.gs.invocations = 1; + nir->info.gs.active_stream_mask = 0x1; + + /* in vec4 gl_Position[3] */ + nir_variable *gs_in_pos = + nir_variable_create(b.shader, nir_var_shader_in, + glsl_array_type(glsl_vec4_type(), 3, 0), + "in_gl_Position"); + gs_in_pos->data.location = VARYING_SLOT_POS; + + /* out vec4 gl_Position */ + nir_variable *gs_out_pos = + nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(), + "out_gl_Position"); + gs_out_pos->data.location = VARYING_SLOT_POS; + + /* out float gl_Layer */ + nir_variable *gs_out_layer = + nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(), + "out_gl_Layer"); + gs_out_layer->data.location = VARYING_SLOT_LAYER; + + /* Emit output triangle */ + for (uint32_t i = 0; i < 3; i++) { + /* gl_Position from shader input */ + nir_deref_instr *in_pos_i = + nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i); + nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i); + + /* gl_Layer from push constants */ + nir_ssa_def *layer = + nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), + .base = push_constant_layer_base, .range = 4); + nir_store_var(&b, gs_out_layer, layer, 0x1); + + nir_emit_vertex(&b, 0); + } + + nir_end_primitive(&b, 0); + + return nir; +} + +static nir_shader * get_color_clear_rect_fs(uint32_t rt_idx, VkFormat format) { const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options(); @@ -224,6 +466,7 @@ create_pipeline(struct v3dv_device *device, uint32_t subpass_idx, uint32_t samples, struct nir_shader *vs_nir, + struct nir_shader *gs_nir, struct nir_shader *fs_nir, const VkPipelineVertexInputStateCreateInfo *vi_state, const VkPipelineDepthStencilStateCreateInfo *ds_state, @@ -231,32 +474,41 @@ create_pipeline(struct v3dv_device *device, const VkPipelineLayout layout, VkPipeline *pipeline) { + VkPipelineShaderStageCreateInfo stages[3] = { 0 }; struct vk_shader_module vs_m; + struct vk_shader_module gs_m; struct vk_shader_module fs_m; + uint32_t stage_count = 0; v3dv_shader_module_internal_init(device, &vs_m, vs_nir); - if (fs_nir) - v3dv_shader_module_internal_init(device, &fs_m, fs_nir); + stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stages[stage_count].stage = VK_SHADER_STAGE_VERTEX_BIT; + stages[stage_count].module = vk_shader_module_to_handle(&vs_m); + stages[stage_count].pName = "main"; + stage_count++; + + if (gs_nir) { + v3dv_shader_module_internal_init(device, &gs_m, gs_nir); + stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stages[stage_count].stage = VK_SHADER_STAGE_GEOMETRY_BIT; + stages[stage_count].module = vk_shader_module_to_handle(&gs_m); + stages[stage_count].pName = "main"; + stage_count++; + } - VkPipelineShaderStageCreateInfo stages[2] = { - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = vk_shader_module_to_handle(&vs_m), - .pName = "main", - }, - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = fs_nir ? vk_shader_module_to_handle(&fs_m) : VK_NULL_HANDLE, - .pName = "main", - }, - }; + if (fs_nir) { + v3dv_shader_module_internal_init(device, &fs_m, fs_nir); + stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stages[stage_count].stage = VK_SHADER_STAGE_FRAGMENT_BIT; + stages[stage_count].module = vk_shader_module_to_handle(&fs_m); + stages[stage_count].pName = "main"; + stage_count++; + } VkGraphicsPipelineCreateInfo info = { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .stageCount = fs_nir ? 2 : 1, + .stageCount = stage_count, .pStages = stages, .pVertexInputState = vi_state, @@ -342,11 +594,13 @@ create_color_clear_pipeline(struct v3dv_device *device, VkFormat format, uint32_t samples, uint32_t components, + bool is_layered, VkPipelineLayout pipeline_layout, VkPipeline *pipeline) { nir_shader *vs_nir = get_clear_rect_vs(); nir_shader *fs_nir = get_color_clear_rect_fs(rt_idx, format); + nir_shader *gs_nir = is_layered ? get_clear_rect_gs(16) : NULL; const VkPipelineVertexInputStateCreateInfo vi_state = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, @@ -384,7 +638,7 @@ create_color_clear_pipeline(struct v3dv_device *device, return create_pipeline(device, pass, subpass_idx, samples, - vs_nir, fs_nir, + vs_nir, gs_nir, fs_nir, &vi_state, &ds_state, &cb_state, @@ -398,6 +652,7 @@ create_depth_clear_pipeline(struct v3dv_device *device, struct v3dv_render_pass *pass, uint32_t subpass_idx, uint32_t samples, + bool is_layered, VkPipelineLayout pipeline_layout, VkPipeline *pipeline) { @@ -407,6 +662,7 @@ create_depth_clear_pipeline(struct v3dv_device *device, nir_shader *vs_nir = get_clear_rect_vs(); nir_shader *fs_nir = has_depth ? get_depth_clear_rect_fs() : NULL; + nir_shader *gs_nir = is_layered ? get_clear_rect_gs(4) : NULL; const VkPipelineVertexInputStateCreateInfo vi_state = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, @@ -441,7 +697,7 @@ create_depth_clear_pipeline(struct v3dv_device *device, return create_pipeline(device, pass, subpass_idx, samples, - vs_nir, fs_nir, + vs_nir, gs_nir, fs_nir, &vi_state, &ds_state, &cb_state, @@ -499,7 +755,8 @@ static inline uint64_t get_color_clear_pipeline_cache_key(uint32_t rt_idx, VkFormat format, uint32_t samples, - uint32_t components) + uint32_t components, + bool is_layered) { assert(rt_idx < V3D_MAX_DRAW_BUFFERS); @@ -518,6 +775,9 @@ get_color_clear_pipeline_cache_key(uint32_t rt_idx, key |= ((uint64_t) components) << bit_offset; bit_offset += 4; + key |= (is_layered ? 1ull : 0ull) << bit_offset; + bit_offset += 1; + assert(bit_offset <= 64); return key; } @@ -525,7 +785,8 @@ get_color_clear_pipeline_cache_key(uint32_t rt_idx, static inline uint64_t get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects, VkFormat format, - uint32_t samples) + uint32_t samples, + bool is_layered) { uint64_t key = 0; uint32_t bit_offset = 0; @@ -544,6 +805,9 @@ get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects, key |= ((uint64_t) has_stencil) << bit_offset; bit_offset++;; + key |= (is_layered ? 1ull : 0ull) << bit_offset; + bit_offset += 1; + assert(bit_offset <= 64); return key; } @@ -557,6 +821,7 @@ get_color_clear_pipeline(struct v3dv_device *device, VkFormat format, uint32_t samples, uint32_t components, + bool is_layered, struct v3dv_meta_color_clear_pipeline **pipeline) { assert(vk_format_is_color(format)); @@ -580,8 +845,8 @@ get_color_clear_pipeline(struct v3dv_device *device, uint64_t key; if (can_cache_pipeline) { - key = - get_color_clear_pipeline_cache_key(rt_idx, format, samples, components); + key = get_color_clear_pipeline_cache_key(rt_idx, format, samples, + components, is_layered); mtx_lock(&device->meta.mtx); struct hash_entry *entry = _mesa_hash_table_search(device->meta.color_clear.cache, &key); @@ -621,6 +886,7 @@ get_color_clear_pipeline(struct v3dv_device *device, format, samples, components, + is_layered, device->meta.color_clear.p_layout, &(*pipeline)->pipeline); if (result != VK_SUCCESS) @@ -660,6 +926,7 @@ get_depth_clear_pipeline(struct v3dv_device *device, struct v3dv_render_pass *pass, uint32_t subpass_idx, uint32_t attachment_idx, + bool is_layered, struct v3dv_meta_depth_clear_pipeline **pipeline) { assert(subpass_idx < pass->subpass_count); @@ -673,7 +940,7 @@ get_depth_clear_pipeline(struct v3dv_device *device, assert(vk_format_is_depth_or_stencil(format)); const uint64_t key = - get_depth_clear_pipeline_cache_key(aspects, format, samples); + get_depth_clear_pipeline_cache_key(aspects, format, samples, is_layered); mtx_lock(&device->meta.mtx); struct hash_entry *entry = _mesa_hash_table_search(device->meta.depth_clear.cache, &key); @@ -696,6 +963,7 @@ get_depth_clear_pipeline(struct v3dv_device *device, pass, subpass_idx, samples, + is_layered, device->meta.depth_clear.p_layout, &(*pipeline)->pipeline); if (result != VK_SUCCESS) @@ -722,272 +990,15 @@ fail: return result; } -static VkFormat -get_color_format_for_depth_stencil_format(VkFormat format) -{ - /* For single depth/stencil aspect formats, we just choose a compatible - * 1 channel format, but for combined depth/stencil we want an RGBA format - * so we can specify the channels we want to write. - */ - switch (format) { - case VK_FORMAT_D16_UNORM: - return VK_FORMAT_R16_UINT; - case VK_FORMAT_D32_SFLOAT: - return VK_FORMAT_R32_SFLOAT; - case VK_FORMAT_X8_D24_UNORM_PACK32: - case VK_FORMAT_D24_UNORM_S8_UINT: - return VK_FORMAT_R8G8B8A8_UINT; - default: - unreachable("Unsupported depth/stencil format"); - }; -} - -/** - * Emits a scissored quad in the clear color, however, unlike the subpass - * versions, this creates its own framebuffer setup with a single color - * attachment, and therefore spanws new jobs, making it much slower than the - * subpass version. - * - * This path is only used when we have clears on layers other than the - * base layer in a framebuffer attachment, since we don't currently - * support any form of layered rendering that would allow us to implement - * this in the subpass version. - * - * Notice this can also handle depth/stencil formats by rendering to the - * depth/stencil target using a compatible color format. - */ -static void -emit_color_clear_rect(struct v3dv_cmd_buffer *cmd_buffer, - uint32_t attachment_idx, - VkFormat rt_format, - uint32_t rt_samples, - uint32_t rt_components, - VkClearColorValue clear_color, - const VkClearRect *rect) -{ - assert(cmd_buffer->state.pass); - struct v3dv_device *device = cmd_buffer->device; - struct v3dv_render_pass *pass = cmd_buffer->state.pass; - - assert(attachment_idx != VK_ATTACHMENT_UNUSED && - attachment_idx < pass->attachment_count); - - struct v3dv_meta_color_clear_pipeline *pipeline = NULL; - VkResult result = - get_color_clear_pipeline(device, - NULL, 0, /* Not using current subpass */ - 0, attachment_idx, - rt_format, rt_samples, rt_components, - &pipeline); - if (result != VK_SUCCESS) { - if (result == VK_ERROR_OUT_OF_HOST_MEMORY) - v3dv_flag_oom(cmd_buffer, NULL); - return; - } - assert(pipeline && pipeline->pipeline && pipeline->pass); - - /* Since we are not emitting the draw call in the current subpass we should - * be caching the clear pipeline and we don't have to take care of destorying - * it below. - */ - assert(pipeline->cached); - - /* Store command buffer state for the current subpass before we interrupt - * it to emit the color clear pass and then finish the job for the - * interrupted subpass. - */ - v3dv_cmd_buffer_meta_state_push(cmd_buffer, false); - v3dv_cmd_buffer_finish_job(cmd_buffer); - - struct v3dv_framebuffer *subpass_fb = - v3dv_framebuffer_from_handle(cmd_buffer->state.meta.framebuffer); - VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer); - VkDevice device_handle = v3dv_device_to_handle(cmd_buffer->device); - - /* If we are clearing a depth/stencil attachment as a color attachment - * then we need to configure the framebuffer to the compatible color - * format. - */ - const struct v3dv_image_view *att_iview = - subpass_fb->attachments[attachment_idx]; - const bool is_depth_or_stencil = - vk_format_is_depth_or_stencil(att_iview->vk_format); - - /* Emit the pass for each attachment layer, which creates a framebuffer - * for each selected layer of the attachment and then renders a scissored - * quad in the clear color. - */ - uint32_t dirty_dynamic_state = 0; - for (uint32_t i = 0; i < rect->layerCount; i++) { - VkImageViewCreateInfo fb_layer_view_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = v3dv_image_to_handle((struct v3dv_image *)att_iview->image), - .viewType = - v3dv_image_type_to_view_type(att_iview->image->type), - .format = is_depth_or_stencil ? rt_format : att_iview->vk_format, - .subresourceRange = { - .aspectMask = is_depth_or_stencil ? VK_IMAGE_ASPECT_COLOR_BIT : - att_iview->aspects, - .baseMipLevel = att_iview->base_level, - .levelCount = att_iview->max_level - att_iview->base_level + 1, - .baseArrayLayer = att_iview->first_layer + rect->baseArrayLayer + i, - .layerCount = 1, - }, - }; - VkImageView fb_attachment; - result = v3dv_CreateImageView(v3dv_device_to_handle(device), - &fb_layer_view_info, - &device->vk.alloc, &fb_attachment); - if (result != VK_SUCCESS) - goto fail; - - v3dv_cmd_buffer_add_private_obj( - cmd_buffer, (uintptr_t)fb_attachment, - (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView); - - VkFramebufferCreateInfo fb_info = { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .renderPass = v3dv_render_pass_to_handle(pass), - .attachmentCount = 1, - .pAttachments = &fb_attachment, - .width = subpass_fb->width, - .height = subpass_fb->height, - .layers = 1, - }; - - VkFramebuffer fb; - result = v3dv_CreateFramebuffer(device_handle, &fb_info, - &cmd_buffer->device->vk.alloc, &fb); - if (result != VK_SUCCESS) - goto fail; - - v3dv_cmd_buffer_add_private_obj( - cmd_buffer, (uintptr_t)fb, - (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer); - - VkRenderPassBeginInfo rp_info = { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderPass = pipeline->pass, - .framebuffer = fb, - .renderArea = { - .offset = { rect->rect.offset.x, rect->rect.offset.y }, - .extent = { rect->rect.extent.width, rect->rect.extent.height } }, - .clearValueCount = 0, - }; - - v3dv_CmdBeginRenderPass(cmd_buffer_handle, &rp_info, - VK_SUBPASS_CONTENTS_INLINE); - - struct v3dv_job *job = cmd_buffer->state.job; - if (!job) - goto fail; - job->is_subpass_continue = true; - - v3dv_CmdPushConstants(cmd_buffer_handle, - device->meta.color_clear.p_layout, - VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16, - &clear_color); - - v3dv_CmdBindPipeline(cmd_buffer_handle, - VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline->pipeline); - - const VkViewport viewport = { - .x = rect->rect.offset.x, - .y = rect->rect.offset.y, - .width = rect->rect.extent.width, - .height = rect->rect.extent.height, - .minDepth = 0.0f, - .maxDepth = 1.0f - }; - v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport); - v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rect->rect); - - v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0); - - v3dv_CmdEndRenderPass(cmd_buffer_handle); - } - - /* The clear pipeline sets viewport and scissor state, so we need - * to restore it - */ - dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR; - -fail: - v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true); -} - -static void -emit_ds_clear_rect(struct v3dv_cmd_buffer *cmd_buffer, - VkImageAspectFlags aspects, - uint32_t attachment_idx, - VkClearDepthStencilValue clear_ds, - const VkClearRect *rect) -{ - assert(cmd_buffer->state.pass); - assert(attachment_idx != VK_ATTACHMENT_UNUSED); - assert(attachment_idx < cmd_buffer->state.pass->attachment_count); - - VkFormat format = - cmd_buffer->state.pass->attachments[attachment_idx].desc.format; - assert ((aspects & ~vk_format_aspects(format)) == 0); - - uint32_t samples = - cmd_buffer->state.pass->attachments[attachment_idx].desc.samples; - - enum pipe_format pformat = vk_format_to_pipe_format(format); - VkClearColorValue clear_color; - uint32_t clear_zs = - util_pack_z_stencil(pformat, clear_ds.depth, clear_ds.stencil); - - /* We implement depth/stencil clears by turning them into color clears - * with a compatible color format. - */ - VkFormat color_format = get_color_format_for_depth_stencil_format(format); - - uint32_t comps; - if (color_format == VK_FORMAT_R8G8B8A8_UINT) { - /* We are clearing a D24 format so we need to select the channels that we - * are being asked to clear to avoid clearing aspects that should be - * preserved. Also, the hardware uses the MSB channels to store the D24 - * component, so we need to shift the components in the clear value to - * match that. - */ - comps = 0; - if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { - comps |= VK_COLOR_COMPONENT_R_BIT; - clear_color.uint32[0] = clear_zs >> 24; - } - if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { - comps |= VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | - VK_COLOR_COMPONENT_A_BIT; - clear_color.uint32[1] = (clear_zs >> 0) & 0xff; - clear_color.uint32[2] = (clear_zs >> 8) & 0xff; - clear_color.uint32[3] = (clear_zs >> 16) & 0xff; - } - } else { - /* For anything else we use a single component format */ - comps = VK_COLOR_COMPONENT_R_BIT; - clear_color.uint32[0] = clear_zs; - } - - emit_color_clear_rect(cmd_buffer, attachment_idx, - color_format, samples, comps, - clear_color, rect); -} - -/* Emits a scissored quad in the clear color. - * - * This path only works for clears to the base layer in the framebuffer, since - * we don't currently support any form of layered rendering. - */ +/* Emits a scissored quad in the clear color */ static void emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_render_pass *pass, struct v3dv_subpass *subpass, uint32_t rt_idx, const VkClearColorValue *clear_color, + bool is_layered, + bool all_rects_same_layers, uint32_t rect_count, const VkClearRect *rects) { @@ -1016,6 +1027,7 @@ emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer, format, samples, components, + is_layered, &pipeline); if (result != VK_SUCCESS) { if (result == VK_ERROR_OUT_OF_HOST_MEMORY) @@ -1040,7 +1052,6 @@ emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer, uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR; for (uint32_t i = 0; i < rect_count; i++) { - assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1); const VkViewport viewport = { .x = rects[i].rect.offset.x, .y = rects[i].rect.offset.y, @@ -1051,7 +1062,20 @@ emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer, }; v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport); v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect); - v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0); + + if (is_layered) { + for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount; + layer_offset++) { + uint32_t layer = rects[i].baseArrayLayer + layer_offset; + v3dv_CmdPushConstants(cmd_buffer_handle, + cmd_buffer->device->meta.depth_clear.p_layout, + VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4, &layer); + v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0); + } + } else { + assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1); + v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0); + } } /* Subpass pipelines can't be cached because they include a reference to the @@ -1068,9 +1092,6 @@ emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer, /* Emits a scissored quad, clearing the depth aspect by writing to gl_FragDepth * and the stencil aspect by using stencil testing. - * - * This path only works for clears to the base layer in the framebuffer, since - * we don't currently support any form of layered rendering. */ static void emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer, @@ -1078,6 +1099,8 @@ emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_subpass *subpass, VkImageAspectFlags aspects, const VkClearDepthStencilValue *clear_ds, + bool is_layered, + bool all_rects_same_layers, uint32_t rect_count, const VkClearRect *rects) { @@ -1094,6 +1117,7 @@ emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer, pass, cmd_buffer->state.subpass_idx, attachment_idx, + is_layered, &pipeline); if (result != VK_SUCCESS) { if (result == VK_ERROR_OUT_OF_HOST_MEMORY) @@ -1130,7 +1154,6 @@ emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer, } for (uint32_t i = 0; i < rect_count; i++) { - assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1); const VkViewport viewport = { .x = rects[i].rect.offset.x, .y = rects[i].rect.offset.y, @@ -1141,485 +1164,46 @@ emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer, }; v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport); v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect); - v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0); - } - - v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false); -} - -static void -emit_tlb_clear_store(struct v3dv_cmd_buffer *cmd_buffer, - struct v3dv_cl *cl, - uint32_t attachment_idx, - uint32_t layer, - uint32_t buffer) -{ - const struct v3dv_image_view *iview = - cmd_buffer->state.framebuffer->attachments[attachment_idx]; - const struct v3dv_image *image = iview->image; - const struct v3d_resource_slice *slice = &image->slices[iview->base_level]; - uint32_t layer_offset = v3dv_layer_offset(image, - iview->base_level, - iview->first_layer + layer); - - cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = buffer; - store.address = v3dv_cl_address(image->mem->bo, layer_offset); - store.clear_buffer_being_stored = false; - - store.output_image_format = iview->format->rt_type; - store.r_b_swap = iview->swap_rb; - store.memory_format = slice->tiling; - - if (slice->tiling == VC5_TILING_UIF_NO_XOR || - slice->tiling == VC5_TILING_UIF_XOR) { - store.height_in_ub_or_stride = - slice->padded_height_of_output_image_in_uif_blocks; - } else if (slice->tiling == VC5_TILING_RASTER) { - store.height_in_ub_or_stride = slice->stride; - } - - if (image->samples > VK_SAMPLE_COUNT_1_BIT) - store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; - else - store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; - } -} - -static void -emit_tlb_clear_stores(struct v3dv_cmd_buffer *cmd_buffer, - struct v3dv_cl *cl, - uint32_t attachment_count, - const VkClearAttachment *attachments, - uint32_t layer) -{ - struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; - const struct v3dv_subpass *subpass = - &state->pass->subpasses[state->subpass_idx]; - - bool has_stores = false; - for (uint32_t i = 0; i < attachment_count; i++) { - uint32_t attachment_idx; - uint32_t buffer; - if (attachments[i].aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT)) { - attachment_idx = subpass->ds_attachment.attachment; - buffer = v3dv_zs_buffer_from_aspect_bits(attachments[i].aspectMask); + if (is_layered) { + for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount; + layer_offset++) { + uint32_t layer = rects[i].baseArrayLayer + layer_offset; + v3dv_CmdPushConstants(cmd_buffer_handle, + cmd_buffer->device->meta.depth_clear.p_layout, + VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4, &layer); + v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0); + } } else { - uint32_t rt_idx = attachments[i].colorAttachment; - attachment_idx = subpass->color_attachments[rt_idx].attachment; - buffer = RENDER_TARGET_0 + rt_idx; + assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1); + v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0); } - - if (attachment_idx == VK_ATTACHMENT_UNUSED) - continue; - - has_stores = true; - emit_tlb_clear_store(cmd_buffer, cl, attachment_idx, layer, buffer); - } - - if (!has_stores) { - cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = NONE; - } - } -} - -static void -emit_tlb_clear_per_tile_rcl(struct v3dv_cmd_buffer *cmd_buffer, - uint32_t attachment_count, - const VkClearAttachment *attachments, - uint32_t layer) -{ - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); - - struct v3dv_cl *cl = &job->indirect; - v3dv_cl_ensure_space(cl, 200, 1); - v3dv_return_if_oom(cmd_buffer, NULL); - - struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); - - cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); - - cl_emit(cl, END_OF_LOADS, end); /* Nothing to load */ - - cl_emit(cl, PRIM_LIST_FORMAT, fmt) { - fmt.primitive_type = LIST_TRIANGLES; } - cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); - - emit_tlb_clear_stores(cmd_buffer, cl, attachment_count, attachments, layer); - - cl_emit(cl, END_OF_TILE_MARKER, end); - - cl_emit(cl, RETURN_FROM_SUB_LIST, ret); - - cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { - branch.start = tile_list_start; - branch.end = v3dv_cl_get_address(cl); - } + v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false); } static void -emit_tlb_clear_layer_rcl(struct v3dv_cmd_buffer *cmd_buffer, - uint32_t attachment_count, - const VkClearAttachment *attachments, - uint32_t layer) +gather_layering_info(uint32_t rect_count, const VkClearRect *rects, + bool *is_layered, bool *all_rects_same_layers) { - const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; - const struct v3dv_framebuffer *framebuffer = state->framebuffer; - - struct v3dv_job *job = cmd_buffer->state.job; - struct v3dv_cl *rcl = &job->rcl; - - const struct v3dv_frame_tiling *tiling = &job->frame_tiling; - - const uint32_t tile_alloc_offset = - 64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y; - cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) { - list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset); - } - - cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) { - config.number_of_bin_tile_lists = 1; - config.total_frame_width_in_tiles = tiling->draw_tiles_x; - config.total_frame_height_in_tiles = tiling->draw_tiles_y; - - config.supertile_width_in_tiles = tiling->supertile_width; - config.supertile_height_in_tiles = tiling->supertile_height; - - config.total_frame_width_in_supertiles = - tiling->frame_width_in_supertiles; - config.total_frame_height_in_supertiles = - tiling->frame_height_in_supertiles; - } - - /* Emit the clear and also the workaround for GFXH-1742 */ - for (int i = 0; i < 2; i++) { - cl_emit(rcl, TILE_COORDINATES, coords); - cl_emit(rcl, END_OF_LOADS, end); - cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = NONE; - } - if (i == 0) { - cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) { - clear.clear_z_stencil_buffer = true; - clear.clear_all_render_targets = true; - } + *all_rects_same_layers = true; + + uint32_t min_layer = rects[0].baseArrayLayer; + uint32_t max_layer = rects[0].baseArrayLayer + rects[0].layerCount - 1; + for (uint32_t i = 1; i < rect_count; i++) { + if (rects[i].baseArrayLayer != rects[i - 1].baseArrayLayer || + rects[i].layerCount != rects[i - 1].layerCount) { + *all_rects_same_layers = false; + min_layer = MIN2(min_layer, rects[i].baseArrayLayer); + max_layer = MAX2(max_layer, rects[i].baseArrayLayer + + rects[i].layerCount - 1); } - cl_emit(rcl, END_OF_TILE_MARKER, end); } - cl_emit(rcl, FLUSH_VCD_CACHE, flush); - - emit_tlb_clear_per_tile_rcl(cmd_buffer, attachment_count, attachments, layer); - - uint32_t supertile_w_in_pixels = - tiling->tile_width * tiling->supertile_width; - uint32_t supertile_h_in_pixels = - tiling->tile_height * tiling->supertile_height; - - const uint32_t max_render_x = framebuffer->width - 1; - const uint32_t max_render_y = framebuffer->height - 1; - const uint32_t max_x_supertile = max_render_x / supertile_w_in_pixels; - const uint32_t max_y_supertile = max_render_y / supertile_h_in_pixels; - - for (int y = 0; y <= max_y_supertile; y++) { - for (int x = 0; x <= max_x_supertile; x++) { - cl_emit(rcl, SUPERTILE_COORDINATES, coords) { - coords.column_number_in_supertiles = x; - coords.row_number_in_supertiles = y; - } - } - } + *is_layered = !(min_layer == 0 && max_layer == 0); } -static void -emit_tlb_clear_job(struct v3dv_cmd_buffer *cmd_buffer, - uint32_t attachment_count, - const VkClearAttachment *attachments, - uint32_t base_layer, - uint32_t layer_count) -{ - const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; - const struct v3dv_framebuffer *framebuffer = state->framebuffer; - const struct v3dv_subpass *subpass = - &state->pass->subpasses[state->subpass_idx]; - struct v3dv_job *job = cmd_buffer->state.job; - assert(job); - - /* Check how many color attachments we have and also if we have a - * depth/stencil attachment. - */ - uint32_t color_attachment_count = 0; - VkClearAttachment color_attachments[4]; - const VkClearDepthStencilValue *ds_clear_value = NULL; - uint8_t internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F; - for (uint32_t i = 0; i < attachment_count; i++) { - if (attachments[i].aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT)) { - assert(subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED); - ds_clear_value = &attachments[i].clearValue.depthStencil; - struct v3dv_render_pass_attachment *att = - &state->pass->attachments[subpass->ds_attachment.attachment]; - internal_depth_type = v3dv_get_internal_depth_type(att->desc.format); - } else if (attachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - color_attachments[color_attachment_count++] = attachments[i]; - } - } - - uint8_t internal_bpp; - bool msaa; - v3dv_framebuffer_compute_internal_bpp_msaa(framebuffer, subpass, - &internal_bpp, &msaa); - - v3dv_job_start_frame(job, - framebuffer->width, - framebuffer->height, - framebuffer->layers, - color_attachment_count, - internal_bpp, msaa); - - struct v3dv_cl *rcl = &job->rcl; - v3dv_cl_ensure_space_with_branch(rcl, 200 + - layer_count * 256 * - cl_packet_length(SUPERTILE_COORDINATES)); - v3dv_return_if_oom(cmd_buffer, NULL); - - const struct v3dv_frame_tiling *tiling = &job->frame_tiling; - cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) { - config.early_z_disable = true; - config.image_width_pixels = framebuffer->width; - config.image_height_pixels = framebuffer->height; - config.number_of_render_targets = MAX2(color_attachment_count, 1); - config.multisample_mode_4x = false; /* FIXME */ - config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; - config.internal_depth_type = internal_depth_type; - } - - for (uint32_t i = 0; i < color_attachment_count; i++) { - uint32_t rt_idx = color_attachments[i].colorAttachment; - uint32_t attachment_idx = subpass->color_attachments[rt_idx].attachment; - if (attachment_idx == VK_ATTACHMENT_UNUSED) - continue; - - const struct v3dv_render_pass_attachment *attachment = - &state->pass->attachments[attachment_idx]; - - uint32_t internal_type, internal_bpp, internal_size; - const struct v3dv_format *format = - v3dv_get_format(attachment->desc.format); - v3dv_get_internal_type_bpp_for_output_format(format->rt_type, - &internal_type, - &internal_bpp); - internal_size = 4 << internal_bpp; - - uint32_t clear_color[4] = { 0 }; - v3dv_get_hw_clear_color(&color_attachments[i].clearValue.color, - internal_type, - internal_size, - clear_color); - - struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx]; - const struct v3dv_image *image = iview->image; - const struct v3d_resource_slice *slice = &image->slices[iview->base_level]; - - uint32_t clear_pad = 0; - if (slice->tiling == VC5_TILING_UIF_NO_XOR || - slice->tiling == VC5_TILING_UIF_XOR) { - int uif_block_height = v3d_utile_height(image->cpp) * 2; - - uint32_t implicit_padded_height = - align(framebuffer->height, uif_block_height) / uif_block_height; - - if (slice->padded_height_of_output_image_in_uif_blocks - - implicit_padded_height >= 15) { - clear_pad = slice->padded_height_of_output_image_in_uif_blocks; - } - } - - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) { - clear.clear_color_low_32_bits = clear_color[0]; - clear.clear_color_next_24_bits = clear_color[1] & 0xffffff; - clear.render_target_number = i; - }; - - if (iview->internal_bpp >= V3D_INTERNAL_BPP_64) { - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) { - clear.clear_color_mid_low_32_bits = - ((clear_color[1] >> 24) | (clear_color[2] << 8)); - clear.clear_color_mid_high_24_bits = - ((clear_color[2] >> 24) | ((clear_color[3] & 0xffff) << 8)); - clear.render_target_number = i; - }; - } - - if (iview->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) { - clear.uif_padded_height_in_uif_blocks = clear_pad; - clear.clear_color_high_16_bits = clear_color[3] >> 16; - clear.render_target_number = i; - }; - } - } - - cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { - v3dv_render_pass_setup_render_target(cmd_buffer, 0, - &rt.render_target_0_internal_bpp, - &rt.render_target_0_internal_type, - &rt.render_target_0_clamp); - v3dv_render_pass_setup_render_target(cmd_buffer, 1, - &rt.render_target_1_internal_bpp, - &rt.render_target_1_internal_type, - &rt.render_target_1_clamp); - v3dv_render_pass_setup_render_target(cmd_buffer, 2, - &rt.render_target_2_internal_bpp, - &rt.render_target_2_internal_type, - &rt.render_target_2_clamp); - v3dv_render_pass_setup_render_target(cmd_buffer, 3, - &rt.render_target_3_internal_bpp, - &rt.render_target_3_internal_type, - &rt.render_target_3_clamp); - } - - cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { - clear.z_clear_value = ds_clear_value ? ds_clear_value->depth : 1.0f; - clear.stencil_clear_value = ds_clear_value ? ds_clear_value->stencil : 0; - }; - - cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { - init.use_auto_chained_tile_lists = true; - init.size_of_first_block_in_chained_tile_lists = - TILE_ALLOCATION_BLOCK_SIZE_64B; - } - - for (int layer = base_layer; layer < base_layer + layer_count; layer++) { - emit_tlb_clear_layer_rcl(cmd_buffer, - attachment_count, - attachments, - layer); - } - - cl_emit(rcl, END_OF_RENDERING, end); -} - -static void -emit_tlb_clear(struct v3dv_cmd_buffer *cmd_buffer, - uint32_t attachment_count, - const VkClearAttachment *attachments, - uint32_t base_layer, - uint32_t layer_count) -{ - struct v3dv_job *job = - v3dv_cmd_buffer_start_job(cmd_buffer, cmd_buffer->state.subpass_idx, - V3DV_JOB_TYPE_GPU_CL); - - /* vkCmdClearAttachments runs inside a render pass */ - job->is_subpass_continue = true; - - emit_tlb_clear_job(cmd_buffer, - attachment_count, - attachments, - base_layer, layer_count); - - v3dv_cmd_buffer_subpass_resume(cmd_buffer, cmd_buffer->state.subpass_idx); -} - -static bool -is_subrect(const VkRect2D *r0, const VkRect2D *r1) -{ - return r0->offset.x <= r1->offset.x && - r0->offset.y <= r1->offset.y && - r0->offset.x + r0->extent.width >= r1->offset.x + r1->extent.width && - r0->offset.y + r0->extent.height >= r1->offset.y + r1->extent.height; -} - -static bool -can_use_tlb_clear(struct v3dv_cmd_buffer *cmd_buffer, - uint32_t rect_count, - const VkClearRect* rects) -{ - const struct v3dv_framebuffer *framebuffer = cmd_buffer->state.framebuffer; - - const VkRect2D *render_area = &cmd_buffer->state.render_area; - - /* Check if we are clearing a single region covering the entire framebuffer - * and that we are not constrained by the current render area. - * - * From the Vulkan 1.0 spec: - * - * "The vkCmdClearAttachments command is not affected by the bound - * pipeline state." - * - * So we can ignore scissor and viewport state for this check. - */ - const VkRect2D fb_rect = { - { 0, 0 }, - { framebuffer->width, framebuffer->height } - }; - - return rect_count == 1 && - is_subrect(&rects[0].rect, &fb_rect) && - is_subrect(render_area, &fb_rect); -} - -static void -handle_deferred_clear_attachments(struct v3dv_cmd_buffer *cmd_buffer, - uint32_t attachmentCount, - const VkClearAttachment *pAttachments, - uint32_t rectCount, - const VkClearRect *pRects) -{ - /* Finish the current job */ - v3dv_cmd_buffer_finish_job(cmd_buffer); - - /* Add a deferred clear attachments job right after that we will process - * when we execute this secondary command buffer into a primary. - */ - struct v3dv_job *job = - v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, - V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS, - cmd_buffer, - cmd_buffer->state.subpass_idx); - v3dv_return_if_oom(cmd_buffer, NULL); - - job->cpu.clear_attachments.rects = - vk_alloc(&cmd_buffer->device->vk.alloc, - sizeof(VkClearRect) * rectCount, 8, - VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); - if (!job->cpu.clear_attachments.rects) { - v3dv_flag_oom(cmd_buffer, NULL); - return; - } - - job->cpu.clear_attachments.attachment_count = attachmentCount; - memcpy(job->cpu.clear_attachments.attachments, pAttachments, - sizeof(VkClearAttachment) * attachmentCount); - - job->cpu.clear_attachments.rect_count = rectCount; - memcpy(job->cpu.clear_attachments.rects, pRects, - sizeof(VkClearRect) * rectCount); - - list_addtail(&job->list_link, &cmd_buffer->jobs); - - /* Resume the subpass so we can continue recording commands */ - v3dv_cmd_buffer_subpass_resume(cmd_buffer, - cmd_buffer->state.subpass_idx); -} - -static bool -all_clear_rects_in_base_layer(uint32_t rect_count, const VkClearRect *rects) -{ - for (uint32_t i = 0; i < rect_count; i++) { - if (rects[i].baseArrayLayer != 0 || rects[i].layerCount != 1) - return false; - } - return true; -} - -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments, @@ -1631,117 +1215,31 @@ v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer, /* We can only clear attachments in the current subpass */ assert(attachmentCount <= 5); /* 4 color + D/S */ - /* Clear attachments may clear multiple layers of the framebuffer, which - * currently requires that we emit multiple jobs (one per layer) and - * therefore requires that we have the framebuffer information available - * to select the destination layers. - * - * For secondary command buffers the framebuffer state may not be available - * until they are executed inside a primary command buffer, so in that case - * we need to defer recording of the command until that moment. - * - * FIXME: once we add support for geometry shaders in the driver we could - * avoid emitting a job per layer to implement this by always using the clear - * rect path below with a passthrough geometry shader to select the layer to - * clear. If we did that we would not need to special case secondary command - * buffers here and we could ensure that any secondary command buffer in a - * render pass only has on job with a partial CL, which would simplify things - * quite a bit. - */ - if (!cmd_buffer->state.framebuffer) { - assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); - handle_deferred_clear_attachments(cmd_buffer, - attachmentCount, pAttachments, - rectCount, pRects); - return; - } - - assert(cmd_buffer->state.framebuffer); - struct v3dv_render_pass *pass = cmd_buffer->state.pass; assert(cmd_buffer->state.subpass_idx < pass->subpass_count); struct v3dv_subpass *subpass = &cmd_buffer->state.pass->subpasses[cmd_buffer->state.subpass_idx]; - /* First we try to handle this by emitting a clear rect inside the - * current job for this subpass. This should be optimal but this method - * cannot handle clearing layers other than the base layer, since we don't - * support any form of layered rendering yet. - */ - if (all_clear_rects_in_base_layer(rectCount, pRects)) { - for (uint32_t i = 0; i < attachmentCount; i++) { - if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - emit_subpass_color_clear_rects(cmd_buffer, pass, subpass, - pAttachments[i].colorAttachment, - &pAttachments[i].clearValue.color, - rectCount, pRects); - } else { - emit_subpass_ds_clear_rects(cmd_buffer, pass, subpass, - pAttachments[i].aspectMask, - &pAttachments[i].clearValue.depthStencil, - rectCount, pRects); - } - } - return; - } - - perf_debug("Falling back to slow path for vkCmdClearAttachments due to " - "clearing layers other than the base array layer.\n"); - - /* If we can't handle this as a draw call inside the current job then we - * will have to spawn jobs for the clears, which will be slow. In that case, - * try to use the TLB to clear if possible. - */ - if (can_use_tlb_clear(cmd_buffer, rectCount, pRects)) { - emit_tlb_clear(cmd_buffer, attachmentCount, pAttachments, - pRects[0].baseArrayLayer, pRects[0].layerCount); - return; - } - - /* Otherwise, fall back to drawing rects with the clear value using a - * separate job. This is the slowest path. + /* Emit a clear rect inside the current job for this subpass. For layered + * framebuffers, we use a geometry shader to redirect clears to the + * appropriate layers. */ + bool is_layered, all_rects_same_layers; + gather_layering_info(rectCount, pRects, &is_layered, &all_rects_same_layers); for (uint32_t i = 0; i < attachmentCount; i++) { - uint32_t attachment_idx = VK_ATTACHMENT_UNUSED; - - if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - uint32_t rt_idx = pAttachments[i].colorAttachment; - attachment_idx = subpass->color_attachments[rt_idx].attachment; - } else if (pAttachments[i].aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT)) { - attachment_idx = subpass->ds_attachment.attachment; - } - - if (attachment_idx == VK_ATTACHMENT_UNUSED) - continue; - if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - const uint32_t components = VK_COLOR_COMPONENT_R_BIT | - VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | - VK_COLOR_COMPONENT_A_BIT; - const uint32_t samples = - cmd_buffer->state.pass->attachments[attachment_idx].desc.samples; - const VkFormat format = - cmd_buffer->state.pass->attachments[attachment_idx].desc.format; - for (uint32_t j = 0; j < rectCount; j++) { - emit_color_clear_rect(cmd_buffer, - attachment_idx, - format, - samples, - components, - pAttachments[i].clearValue.color, - &pRects[j]); - } + emit_subpass_color_clear_rects(cmd_buffer, pass, subpass, + pAttachments[i].colorAttachment, + &pAttachments[i].clearValue.color, + is_layered, all_rects_same_layers, + rectCount, pRects); } else { - for (uint32_t j = 0; j < rectCount; j++) { - emit_ds_clear_rect(cmd_buffer, - pAttachments[i].aspectMask, - attachment_idx, - pAttachments[i].clearValue.depthStencil, - &pRects[j]); - } + emit_subpass_ds_clear_rects(cmd_buffer, pass, subpass, + pAttachments[i].aspectMask, + &pAttachments[i].clearValue.depthStencil, + is_layered, all_rects_same_layers, + rectCount, pRects); } } } diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_meta_common.h b/lib/mesa/src/broadcom/vulkan/v3dv_meta_common.h new file mode 100644 index 000000000..555b55f90 --- /dev/null +++ b/lib/mesa/src/broadcom/vulkan/v3dv_meta_common.h @@ -0,0 +1,74 @@ +/* + * Copyright © 2021 Raspberry Pi + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef V3DV_META_COMMON_H +#define V3DV_META_COMMON_H + +/* Disable level 0 write, just write following mipmaps */ +#define V3D_TFU_IOA_DIMTW (1 << 0) +#define V3D_TFU_IOA_FORMAT_SHIFT 3 +#define V3D_TFU_IOA_FORMAT_LINEARTILE 3 +#define V3D_TFU_IOA_FORMAT_UBLINEAR_1_COLUMN 4 +#define V3D_TFU_IOA_FORMAT_UBLINEAR_2_COLUMN 5 +#define V3D_TFU_IOA_FORMAT_UIF_NO_XOR 6 +#define V3D_TFU_IOA_FORMAT_UIF_XOR 7 + +#define V3D_TFU_ICFG_NUMMM_SHIFT 5 +#define V3D_TFU_ICFG_TTYPE_SHIFT 9 + +#define V3D_TFU_ICFG_OPAD_SHIFT 22 + +#define V3D_TFU_ICFG_FORMAT_SHIFT 18 +#define V3D_TFU_ICFG_FORMAT_RASTER 0 +#define V3D_TFU_ICFG_FORMAT_SAND_128 1 +#define V3D_TFU_ICFG_FORMAT_SAND_256 2 +#define V3D_TFU_ICFG_FORMAT_LINEARTILE 11 +#define V3D_TFU_ICFG_FORMAT_UBLINEAR_1_COLUMN 12 +#define V3D_TFU_ICFG_FORMAT_UBLINEAR_2_COLUMN 13 +#define V3D_TFU_ICFG_FORMAT_UIF_NO_XOR 14 +#define V3D_TFU_ICFG_FORMAT_UIF_XOR 15 + +/** + * Copy/Clear operations implemented in v3dv_meta_*.c that use the TLB hardware + * need to figure out TLB programming from the target image data instead of an + * actual Vulkan framebuffer object. For the most part, the job's frame tiling + * information is enough for this, however we still need additional information + * such us the internal type of our single render target, so we use this + * auxiliary struct to pass that information around. + */ +struct v3dv_meta_framebuffer { + /* The internal type of the single render target */ + uint32_t internal_type; + + /* Supertile coverage */ + uint32_t min_x_supertile; + uint32_t min_y_supertile; + uint32_t max_x_supertile; + uint32_t max_y_supertile; + + /* Format info */ + VkFormat vk_format; + const struct v3dv_format *format; + uint8_t internal_depth_type; +}; + +#endif diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_meta_copy.c b/lib/mesa/src/broadcom/vulkan/v3dv_meta_copy.c index d998d7d8a..85cd8e066 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_meta_copy.c +++ b/lib/mesa/src/broadcom/vulkan/v3dv_meta_copy.c @@ -22,11 +22,12 @@ */ #include "v3dv_private.h" +#include "v3dv_meta_common.h" #include "compiler/nir/nir_builder.h" -#include "broadcom/cle/v3dx_pack.h" #include "vk_format_info.h" #include "util/u_pack_color.h" +#include "vulkan/util/vk_common_entrypoints.h" static uint32_t meta_blit_key_hash(const void *key) @@ -169,13 +170,25 @@ create_texel_buffer_copy_pipeline_layout(struct v3dv_device *device, } assert(*p_layout == 0); + /* FIXME: this is abusing a bit the API, since not all of our copy + * pipelines have a geometry shader. We could create 2 different pipeline + * layouts, but this works for us for now. + */ +#define TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET 0 +#define TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET 16 +#define TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET 20 +#define TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET 24 + VkPushConstantRange ranges[2] = { + { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 24 }, + { VK_SHADER_STAGE_GEOMETRY_BIT, 24, 4 }, + }; + VkPipelineLayoutCreateInfo p_layout_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .setLayoutCount = 1, .pSetLayouts = ds_layout, - .pushConstantRangeCount = 1, - .pPushConstantRanges = - &(VkPushConstantRange) { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 20 }, + .pushConstantRangeCount = 2, + .pPushConstantRanges = ranges, }; result = @@ -229,640 +242,127 @@ v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device) } } -static inline bool -can_use_tlb(struct v3dv_image *image, - const VkOffset3D *offset, - VkFormat *compat_format); - -/** - * Copy operations implemented in this file don't operate on a framebuffer - * object provided by the user, however, since most use the TLB for this, - * we still need to have some representation of the framebuffer. For the most - * part, the job's frame tiling information is enough for this, however we - * still need additional information such us the internal type of our single - * render target, so we use this auxiliary struct to pass that information - * around. - */ -struct framebuffer_data { - /* The internal type of the single render target */ - uint32_t internal_type; - - /* Supertile coverage */ - uint32_t min_x_supertile; - uint32_t min_y_supertile; - uint32_t max_x_supertile; - uint32_t max_y_supertile; - - /* Format info */ - VkFormat vk_format; - const struct v3dv_format *format; - uint8_t internal_depth_type; -}; - -static void -setup_framebuffer_data(struct framebuffer_data *fb, - VkFormat vk_format, - uint32_t internal_type, - const struct v3dv_frame_tiling *tiling) -{ - fb->internal_type = internal_type; - - /* Supertile coverage always starts at 0,0 */ - uint32_t supertile_w_in_pixels = - tiling->tile_width * tiling->supertile_width; - uint32_t supertile_h_in_pixels = - tiling->tile_height * tiling->supertile_height; - - fb->min_x_supertile = 0; - fb->min_y_supertile = 0; - fb->max_x_supertile = (tiling->width - 1) / supertile_w_in_pixels; - fb->max_y_supertile = (tiling->height - 1) / supertile_h_in_pixels; - - fb->vk_format = vk_format; - fb->format = v3dv_get_format(vk_format); - - fb->internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F; - if (vk_format_is_depth_or_stencil(vk_format)) - fb->internal_depth_type = v3dv_get_internal_depth_type(vk_format); -} - -/* This chooses a tile buffer format that is appropriate for the copy operation. - * Typically, this is the image render target type, however, if we are copying - * depth/stencil to/from a buffer the hardware can't do raster loads/stores, so - * we need to load and store to/from a tile color buffer using a compatible - * color format. - */ -static uint32_t -choose_tlb_format(struct framebuffer_data *framebuffer, - VkImageAspectFlags aspect, - bool for_store, - bool is_copy_to_buffer, - bool is_copy_from_buffer) -{ - if (is_copy_to_buffer || is_copy_from_buffer) { - switch (framebuffer->vk_format) { - case VK_FORMAT_D16_UNORM: - return V3D_OUTPUT_IMAGE_FORMAT_R16UI; - case VK_FORMAT_D32_SFLOAT: - return V3D_OUTPUT_IMAGE_FORMAT_R32F; - case VK_FORMAT_X8_D24_UNORM_PACK32: - return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; - case VK_FORMAT_D24_UNORM_S8_UINT: - /* When storing the stencil aspect of a combined depth/stencil image - * to a buffer, the Vulkan spec states that the output buffer must - * have packed stencil values, so we choose an R8UI format for our - * store outputs. For the load input we still want RGBA8UI since the - * source image contains 4 channels (including the 3 channels - * containing the 24-bit depth value). - * - * When loading the stencil aspect of a combined depth/stencil image - * from a buffer, we read packed 8-bit stencil values from the buffer - * that we need to put into the LSB of the 32-bit format (the R - * channel), so we use R8UI. For the store, if we used R8UI then we - * would write 8-bit stencil values consecutively over depth channels, - * so we need to use RGBA8UI. This will write each stencil value in - * its correct position, but will overwrite depth values (channels G - * B,A) with undefined values. To fix this, we will have to restore - * the depth aspect from the Z tile buffer, which we should pre-load - * from the image before the store). - */ - if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) { - return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; - } else { - assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT); - if (is_copy_to_buffer) { - return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI : - V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; - } else { - assert(is_copy_from_buffer); - return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI : - V3D_OUTPUT_IMAGE_FORMAT_R8UI; - } - } - default: /* Color formats */ - return framebuffer->format->rt_type; - break; - } - } else { - return framebuffer->format->rt_type; - } -} - -static inline bool -format_needs_rb_swap(VkFormat format) -{ - const uint8_t *swizzle = v3dv_get_format_swizzle(format); - return swizzle[0] == PIPE_SWIZZLE_Z; -} - -static void -get_internal_type_bpp_for_image_aspects(VkFormat vk_format, - VkImageAspectFlags aspect_mask, - uint32_t *internal_type, - uint32_t *internal_bpp) -{ - const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT; - - /* We can't store depth/stencil pixel formats to a raster format, so - * so instead we load our depth/stencil aspects to a compatible color - * format. - */ - /* FIXME: pre-compute this at image creation time? */ - if (aspect_mask & ds_aspects) { - switch (vk_format) { - case VK_FORMAT_D16_UNORM: - *internal_type = V3D_INTERNAL_TYPE_16UI; - *internal_bpp = V3D_INTERNAL_BPP_64; - break; - case VK_FORMAT_D32_SFLOAT: - *internal_type = V3D_INTERNAL_TYPE_32F; - *internal_bpp = V3D_INTERNAL_BPP_128; - break; - case VK_FORMAT_X8_D24_UNORM_PACK32: - case VK_FORMAT_D24_UNORM_S8_UINT: - /* Use RGBA8 format so we can relocate the X/S bits in the appropriate - * place to match Vulkan expectations. See the comment on the tile - * load command for more details. - */ - *internal_type = V3D_INTERNAL_TYPE_8UI; - *internal_bpp = V3D_INTERNAL_BPP_32; - break; - default: - assert(!"unsupported format"); - break; - } - } else { - const struct v3dv_format *format = v3dv_get_format(vk_format); - v3dv_get_internal_type_bpp_for_output_format(format->rt_type, - internal_type, - internal_bpp); - } -} - -struct rcl_clear_info { - const union v3dv_clear_value *clear_value; - struct v3dv_image *image; - VkImageAspectFlags aspects; - uint32_t layer; - uint32_t level; -}; - -static struct v3dv_cl * -emit_rcl_prologue(struct v3dv_job *job, - struct framebuffer_data *fb, - const struct rcl_clear_info *clear_info) -{ - const struct v3dv_frame_tiling *tiling = &job->frame_tiling; - - struct v3dv_cl *rcl = &job->rcl; - v3dv_cl_ensure_space_with_branch(rcl, 200 + - tiling->layers * 256 * - cl_packet_length(SUPERTILE_COORDINATES)); - if (job->cmd_buffer->state.oom) - return NULL; - - cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) { - config.early_z_disable = true; - config.image_width_pixels = tiling->width; - config.image_height_pixels = tiling->height; - config.number_of_render_targets = 1; - config.multisample_mode_4x = tiling->msaa; - config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; - config.internal_depth_type = fb->internal_depth_type; - } - - if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) { - uint32_t clear_pad = 0; - if (clear_info->image) { - const struct v3dv_image *image = clear_info->image; - const struct v3d_resource_slice *slice = - &image->slices[clear_info->level]; - if (slice->tiling == VC5_TILING_UIF_NO_XOR || - slice->tiling == VC5_TILING_UIF_XOR) { - int uif_block_height = v3d_utile_height(image->cpp) * 2; - - uint32_t implicit_padded_height = - align(tiling->height, uif_block_height) / uif_block_height; - - if (slice->padded_height_of_output_image_in_uif_blocks - - implicit_padded_height >= 15) { - clear_pad = slice->padded_height_of_output_image_in_uif_blocks; - } - } - } - - const uint32_t *color = &clear_info->clear_value->color[0]; - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) { - clear.clear_color_low_32_bits = color[0]; - clear.clear_color_next_24_bits = color[1] & 0x00ffffff; - clear.render_target_number = 0; - }; - - if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) { - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) { - clear.clear_color_mid_low_32_bits = - ((color[1] >> 24) | (color[2] << 8)); - clear.clear_color_mid_high_24_bits = - ((color[2] >> 24) | ((color[3] & 0xffff) << 8)); - clear.render_target_number = 0; - }; - } - - if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { - cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) { - clear.uif_padded_height_in_uif_blocks = clear_pad; - clear.clear_color_high_16_bits = color[3] >> 16; - clear.render_target_number = 0; - }; - } - } - - cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { - rt.render_target_0_internal_bpp = tiling->internal_bpp; - rt.render_target_0_internal_type = fb->internal_type; - rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE; - } - - cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { - clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f; - clear.stencil_clear_value = clear_info ? clear_info->clear_value->s : 0; - }; - - cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { - init.use_auto_chained_tile_lists = true; - init.size_of_first_block_in_chained_tile_lists = - TILE_ALLOCATION_BLOCK_SIZE_64B; - } - - return rcl; -} - -static void -emit_frame_setup(struct v3dv_job *job, - uint32_t layer, - const union v3dv_clear_value *clear_value) +static VkFormat +get_compatible_tlb_format(VkFormat format) { - v3dv_return_if_oom(NULL, job); - - const struct v3dv_frame_tiling *tiling = &job->frame_tiling; - - struct v3dv_cl *rcl = &job->rcl; - - const uint32_t tile_alloc_offset = - 64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y; - cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) { - list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset); - } - - cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) { - config.number_of_bin_tile_lists = 1; - config.total_frame_width_in_tiles = tiling->draw_tiles_x; - config.total_frame_height_in_tiles = tiling->draw_tiles_y; - - config.supertile_width_in_tiles = tiling->supertile_width; - config.supertile_height_in_tiles = tiling->supertile_height; - - config.total_frame_width_in_supertiles = - tiling->frame_width_in_supertiles; - config.total_frame_height_in_supertiles = - tiling->frame_height_in_supertiles; - } - - /* Implement GFXH-1742 workaround. Also, if we are clearing we have to do - * it here. - */ - for (int i = 0; i < 2; i++) { - cl_emit(rcl, TILE_COORDINATES, coords); - cl_emit(rcl, END_OF_LOADS, end); - cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = NONE; - } - if (clear_value && i == 0) { - cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) { - clear.clear_z_stencil_buffer = true; - clear.clear_all_render_targets = true; - } - } - cl_emit(rcl, END_OF_TILE_MARKER, end); - } - - cl_emit(rcl, FLUSH_VCD_CACHE, flush); -} + switch (format) { + case VK_FORMAT_R8G8B8A8_SNORM: + return VK_FORMAT_R8G8B8A8_UINT; -static void -emit_supertile_coordinates(struct v3dv_job *job, - struct framebuffer_data *framebuffer) -{ - v3dv_return_if_oom(NULL, job); + case VK_FORMAT_R8G8_SNORM: + return VK_FORMAT_R8G8_UINT; - struct v3dv_cl *rcl = &job->rcl; + case VK_FORMAT_R8_SNORM: + return VK_FORMAT_R8_UINT; - const uint32_t min_y = framebuffer->min_y_supertile; - const uint32_t max_y = framebuffer->max_y_supertile; - const uint32_t min_x = framebuffer->min_x_supertile; - const uint32_t max_x = framebuffer->max_x_supertile; + case VK_FORMAT_A8B8G8R8_SNORM_PACK32: + return VK_FORMAT_A8B8G8R8_UINT_PACK32; - for (int y = min_y; y <= max_y; y++) { - for (int x = min_x; x <= max_x; x++) { - cl_emit(rcl, SUPERTILE_COORDINATES, coords) { - coords.column_number_in_supertiles = x; - coords.row_number_in_supertiles = y; - } - } - } -} + case VK_FORMAT_R16_UNORM: + case VK_FORMAT_R16_SNORM: + return VK_FORMAT_R16_UINT; -static void -emit_linear_load(struct v3dv_cl *cl, - uint32_t buffer, - struct v3dv_bo *bo, - uint32_t offset, - uint32_t stride, - uint32_t format) -{ - cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { - load.buffer_to_load = buffer; - load.address = v3dv_cl_address(bo, offset); - load.input_image_format = format; - load.memory_format = VC5_TILING_RASTER; - load.height_in_ub_or_stride = stride; - load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; - } -} + case VK_FORMAT_R16G16_UNORM: + case VK_FORMAT_R16G16_SNORM: + return VK_FORMAT_R16G16_UINT; -static void -emit_linear_store(struct v3dv_cl *cl, - uint32_t buffer, - struct v3dv_bo *bo, - uint32_t offset, - uint32_t stride, - bool msaa, - uint32_t format) -{ - cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = RENDER_TARGET_0; - store.address = v3dv_cl_address(bo, offset); - store.clear_buffer_being_stored = false; - store.output_image_format = format; - store.memory_format = VC5_TILING_RASTER; - store.height_in_ub_or_stride = stride; - store.decimate_mode = msaa ? V3D_DECIMATE_MODE_ALL_SAMPLES : - V3D_DECIMATE_MODE_SAMPLE_0; - } -} + case VK_FORMAT_R16G16B16A16_UNORM: + case VK_FORMAT_R16G16B16A16_SNORM: + return VK_FORMAT_R16G16B16A16_UINT; -static void -emit_image_load(struct v3dv_cl *cl, - struct framebuffer_data *framebuffer, - struct v3dv_image *image, - VkImageAspectFlags aspect, - uint32_t layer, - uint32_t mip_level, - bool is_copy_to_buffer, - bool is_copy_from_buffer) -{ - uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer); + case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: + return VK_FORMAT_R32_SFLOAT; - /* For image to/from buffer copies we always load to and store from RT0, - * even for depth/stencil aspects, because the hardware can't do raster - * stores or loads from/to the depth/stencil tile buffers. + /* We can't render to compressed formats using the TLB so instead we use + * a compatible format with the same bpp as the compressed format. Because + * the compressed format's bpp is for a full block (i.e. 4x4 pixels in the + * case of ETC), when we implement copies with the compatible format we + * will have to divide offsets and dimensions on the compressed image by + * the compressed block size. */ - bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer || - aspect == VK_IMAGE_ASPECT_COLOR_BIT; - - const struct v3d_resource_slice *slice = &image->slices[mip_level]; - cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { - load.buffer_to_load = load_to_color_tlb ? - RENDER_TARGET_0 : v3dv_zs_buffer_from_aspect_bits(aspect); - - load.address = v3dv_cl_address(image->mem->bo, layer_offset); - - load.input_image_format = choose_tlb_format(framebuffer, aspect, false, - is_copy_to_buffer, - is_copy_from_buffer); - load.memory_format = slice->tiling; - - /* When copying depth/stencil images to a buffer, for D24 formats Vulkan - * expects the depth value in the LSB bits of each 32-bit pixel. - * Unfortunately, the hardware seems to put the S8/X8 bits there and the - * depth bits on the MSB. To work around that we can reverse the channel - * order and then swap the R/B channels to get what we want. - * - * NOTE: reversing and swapping only gets us the behavior we want if the - * operations happen in that exact order, which seems to be the case when - * done on the tile buffer load operations. On the store, it seems the - * order is not the same. The order on the store is probably reversed so - * that reversing and swapping on both the load and the store preserves - * the original order of the channels in memory. - * - * Notice that we only need to do this when copying to a buffer, where - * depth and stencil aspects are copied as separate regions and - * the spec expects them to be tightly packed. - */ - bool needs_rb_swap = false; - bool needs_chan_reverse = false; - if (is_copy_to_buffer && - (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 || - (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT && - (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) { - needs_rb_swap = true; - needs_chan_reverse = true; - } else if (!is_copy_from_buffer && !is_copy_to_buffer && - (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) { - /* This is not a raw data copy (i.e. we are clearing the image), - * so we need to make sure we respect the format swizzle. - */ - needs_rb_swap = format_needs_rb_swap(framebuffer->vk_format); - } - - load.r_b_swap = needs_rb_swap; - load.channel_reverse = needs_chan_reverse; + case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK: + case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK: + case VK_FORMAT_EAC_R11G11_UNORM_BLOCK: + case VK_FORMAT_EAC_R11G11_SNORM_BLOCK: + case VK_FORMAT_BC2_UNORM_BLOCK: + case VK_FORMAT_BC2_SRGB_BLOCK: + case VK_FORMAT_BC3_SRGB_BLOCK: + case VK_FORMAT_BC3_UNORM_BLOCK: + case VK_FORMAT_ASTC_4x4_UNORM_BLOCK: + case VK_FORMAT_ASTC_4x4_SRGB_BLOCK: + case VK_FORMAT_ASTC_5x4_UNORM_BLOCK: + case VK_FORMAT_ASTC_5x4_SRGB_BLOCK: + case VK_FORMAT_ASTC_5x5_UNORM_BLOCK: + case VK_FORMAT_ASTC_5x5_SRGB_BLOCK: + case VK_FORMAT_ASTC_6x5_UNORM_BLOCK: + case VK_FORMAT_ASTC_6x5_SRGB_BLOCK: + case VK_FORMAT_ASTC_6x6_UNORM_BLOCK: + case VK_FORMAT_ASTC_6x6_SRGB_BLOCK: + case VK_FORMAT_ASTC_8x5_UNORM_BLOCK: + case VK_FORMAT_ASTC_8x5_SRGB_BLOCK: + case VK_FORMAT_ASTC_8x6_UNORM_BLOCK: + case VK_FORMAT_ASTC_8x6_SRGB_BLOCK: + case VK_FORMAT_ASTC_8x8_UNORM_BLOCK: + case VK_FORMAT_ASTC_8x8_SRGB_BLOCK: + case VK_FORMAT_ASTC_10x5_UNORM_BLOCK: + case VK_FORMAT_ASTC_10x5_SRGB_BLOCK: + case VK_FORMAT_ASTC_10x6_UNORM_BLOCK: + case VK_FORMAT_ASTC_10x6_SRGB_BLOCK: + case VK_FORMAT_ASTC_10x8_UNORM_BLOCK: + case VK_FORMAT_ASTC_10x8_SRGB_BLOCK: + case VK_FORMAT_ASTC_10x10_UNORM_BLOCK: + case VK_FORMAT_ASTC_10x10_SRGB_BLOCK: + case VK_FORMAT_ASTC_12x10_UNORM_BLOCK: + case VK_FORMAT_ASTC_12x10_SRGB_BLOCK: + case VK_FORMAT_ASTC_12x12_UNORM_BLOCK: + case VK_FORMAT_ASTC_12x12_SRGB_BLOCK: + return VK_FORMAT_R32G32B32A32_UINT; - if (slice->tiling == VC5_TILING_UIF_NO_XOR || - slice->tiling == VC5_TILING_UIF_XOR) { - load.height_in_ub_or_stride = - slice->padded_height_of_output_image_in_uif_blocks; - } else if (slice->tiling == VC5_TILING_RASTER) { - load.height_in_ub_or_stride = slice->stride; - } + case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK: + case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK: + case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK: + case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK: + case VK_FORMAT_EAC_R11_UNORM_BLOCK: + case VK_FORMAT_EAC_R11_SNORM_BLOCK: + case VK_FORMAT_BC1_RGB_UNORM_BLOCK: + case VK_FORMAT_BC1_RGB_SRGB_BLOCK: + case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: + case VK_FORMAT_BC1_RGBA_SRGB_BLOCK: + return VK_FORMAT_R16G16B16A16_UINT; - if (image->samples > VK_SAMPLE_COUNT_1_BIT) - load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; - else - load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; + default: + return VK_FORMAT_UNDEFINED; } } -static void -emit_image_store(struct v3dv_cl *cl, - struct framebuffer_data *framebuffer, - struct v3dv_image *image, - VkImageAspectFlags aspect, - uint32_t layer, - uint32_t mip_level, - bool is_copy_to_buffer, - bool is_copy_from_buffer, - bool is_multisample_resolve) +/** + * Checks if we can implement an image copy or clear operation using the TLB + * hardware. + */ +bool +v3dv_meta_can_use_tlb(struct v3dv_image *image, + const VkOffset3D *offset, + VkFormat *compat_format) { - uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer); - - bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer || - aspect == VK_IMAGE_ASPECT_COLOR_BIT; - - const struct v3d_resource_slice *slice = &image->slices[mip_level]; - cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = store_from_color_tlb ? - RENDER_TARGET_0 : v3dv_zs_buffer_from_aspect_bits(aspect); - - store.address = v3dv_cl_address(image->mem->bo, layer_offset); - store.clear_buffer_being_stored = false; - - /* See rationale in emit_image_load() */ - bool needs_rb_swap = false; - bool needs_chan_reverse = false; - if (is_copy_from_buffer && - (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 || - (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT && - (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) { - needs_rb_swap = true; - needs_chan_reverse = true; - } else if (!is_copy_from_buffer && !is_copy_to_buffer && - (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) { - needs_rb_swap = format_needs_rb_swap(framebuffer->vk_format); - } - - store.r_b_swap = needs_rb_swap; - store.channel_reverse = needs_chan_reverse; - - store.output_image_format = choose_tlb_format(framebuffer, aspect, true, - is_copy_to_buffer, - is_copy_from_buffer); - store.memory_format = slice->tiling; - if (slice->tiling == VC5_TILING_UIF_NO_XOR || - slice->tiling == VC5_TILING_UIF_XOR) { - store.height_in_ub_or_stride = - slice->padded_height_of_output_image_in_uif_blocks; - } else if (slice->tiling == VC5_TILING_RASTER) { - store.height_in_ub_or_stride = slice->stride; - } + if (offset->x != 0 || offset->y != 0) + return false; - if (image->samples > VK_SAMPLE_COUNT_1_BIT) - store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; - else if (is_multisample_resolve) - store.decimate_mode = V3D_DECIMATE_MODE_4X; - else - store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; + if (image->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) { + if (compat_format) + *compat_format = image->vk.format; + return true; } -} - -static void -emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job, - struct framebuffer_data *framebuffer, - struct v3dv_buffer *buffer, - struct v3dv_image *image, - uint32_t layer_offset, - const VkBufferImageCopy *region) -{ - struct v3dv_cl *cl = &job->indirect; - v3dv_cl_ensure_space(cl, 200, 1); - v3dv_return_if_oom(NULL, job); - - struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); - - cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); - - /* Load image to TLB */ - assert((image->type != VK_IMAGE_TYPE_3D && - layer_offset < region->imageSubresource.layerCount) || - layer_offset < image->extent.depth); - - const uint32_t image_layer = image->type != VK_IMAGE_TYPE_3D ? - region->imageSubresource.baseArrayLayer + layer_offset : - region->imageOffset.z + layer_offset; - - emit_image_load(cl, framebuffer, image, - region->imageSubresource.aspectMask, - image_layer, - region->imageSubresource.mipLevel, - true, false); - - cl_emit(cl, END_OF_LOADS, end); - - cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); - - /* Store TLB to buffer */ - uint32_t width, height; - if (region->bufferRowLength == 0) - width = region->imageExtent.width; - else - width = region->bufferRowLength; - - if (region->bufferImageHeight == 0) - height = region->imageExtent.height; - else - height = region->bufferImageHeight; - /* Handle copy from compressed format */ - width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk_format)); - height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk_format)); - - /* If we are storing stencil from a combined depth/stencil format the - * Vulkan spec states that the output buffer must have packed stencil - * values, where each stencil value is 1 byte. + /* If the image format is not TLB-supported, then check if we can use + * a compatible format instead. */ - uint32_t cpp = - region->imageSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ? - 1 : image->cpp; - uint32_t buffer_stride = width * cpp; - uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset + - height * buffer_stride * layer_offset; - - uint32_t format = choose_tlb_format(framebuffer, - region->imageSubresource.aspectMask, - true, true, false); - bool msaa = image->samples > VK_SAMPLE_COUNT_1_BIT; - - emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo, - buffer_offset, buffer_stride, msaa, format); - - cl_emit(cl, END_OF_TILE_MARKER, end); - - cl_emit(cl, RETURN_FROM_SUB_LIST, ret); - - cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { - branch.start = tile_list_start; - branch.end = v3dv_cl_get_address(cl); + if (compat_format) { + *compat_format = get_compatible_tlb_format(image->vk.format); + if (*compat_format != VK_FORMAT_UNDEFINED) + return true; } -} -static void -emit_copy_layer_to_buffer(struct v3dv_job *job, - struct v3dv_buffer *buffer, - struct v3dv_image *image, - struct framebuffer_data *framebuffer, - uint32_t layer, - const VkBufferImageCopy *region) -{ - emit_frame_setup(job, layer, NULL); - emit_copy_layer_to_buffer_per_tile_list(job, framebuffer, buffer, - image, layer, region); - emit_supertile_coordinates(job, framebuffer); -} - -static void -emit_copy_image_to_buffer_rcl(struct v3dv_job *job, - struct v3dv_buffer *buffer, - struct v3dv_image *image, - struct framebuffer_data *framebuffer, - const VkBufferImageCopy *region) -{ - struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); - v3dv_return_if_oom(NULL, job); - - for (int layer = 0; layer < job->frame_tiling.layers; layer++) - emit_copy_layer_to_buffer(job, buffer, image, framebuffer, layer, region); - cl_emit(rcl, END_OF_RENDERING, end); + return false; } /* Implements a copy using the TLB. @@ -879,19 +379,19 @@ static bool copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_buffer *buffer, struct v3dv_image *image, - const VkBufferImageCopy *region) + const VkBufferImageCopy2KHR *region) { VkFormat fb_format; - if (!can_use_tlb(image, ®ion->imageOffset, &fb_format)) + if (!v3dv_meta_can_use_tlb(image, ®ion->imageOffset, &fb_format)) return false; uint32_t internal_type, internal_bpp; - get_internal_type_bpp_for_image_aspects(fb_format, - region->imageSubresource.aspectMask, - &internal_type, &internal_bpp); + v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects) + (fb_format, region->imageSubresource.aspectMask, + &internal_type, &internal_bpp); uint32_t num_layers; - if (image->type != VK_IMAGE_TYPE_3D) + if (image->vk.image_type != VK_IMAGE_TYPE_3D) num_layers = region->imageSubresource.layerCount; else num_layers = region->imageExtent.depth; @@ -903,19 +403,21 @@ copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer, return true; /* Handle copy from compressed format using a compatible format */ - const uint32_t block_w = vk_format_get_blockwidth(image->vk_format); - const uint32_t block_h = vk_format_get_blockheight(image->vk_format); + const uint32_t block_w = vk_format_get_blockwidth(image->vk.format); + const uint32_t block_h = vk_format_get_blockheight(image->vk.format); const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w); const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h); - v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, false); + v3dv_job_start_frame(job, width, height, num_layers, false, + 1, internal_bpp, false); - struct framebuffer_data framebuffer; - setup_framebuffer_data(&framebuffer, fb_format, internal_type, - &job->frame_tiling); + struct v3dv_meta_framebuffer framebuffer; + v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format, + internal_type, &job->frame_tiling); - v3dv_job_emit_binning_flush(job); - emit_copy_image_to_buffer_rcl(job, buffer, image, &framebuffer, region); + v3dv_X(job->device, job_emit_binning_flush)(job); + v3dv_X(job->device, meta_emit_copy_image_to_buffer_rcl) + (job, buffer, image, &framebuffer, region); v3dv_cmd_buffer_finish_job(cmd_buffer); @@ -930,7 +432,7 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, VkFormat src_format, VkColorComponentFlags cmask, VkComponentMapping *cswizzle, - const VkImageBlit *region, + const VkImageBlit2KHR *region, VkFilter filter, bool dst_is_padded_image); @@ -942,7 +444,7 @@ static bool copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_buffer *buffer, struct v3dv_image *image, - const VkBufferImageCopy *region) + const VkBufferImageCopy2KHR *region) { bool handled = false; @@ -991,10 +493,10 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, dst_format = VK_FORMAT_R8G8B8A8_UINT; break; case VK_IMAGE_ASPECT_DEPTH_BIT: - assert(image->vk_format == VK_FORMAT_D32_SFLOAT || - image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT || - image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32); - if (image->vk_format == VK_FORMAT_D32_SFLOAT) { + assert(image->vk.format == VK_FORMAT_D32_SFLOAT || + image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT || + image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32); + if (image->vk.format == VK_FORMAT_D32_SFLOAT) { src_format = VK_FORMAT_R32_UINT; dst_format = VK_FORMAT_R32_UINT; } else { @@ -1016,7 +518,7 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, break; case VK_IMAGE_ASPECT_STENCIL_BIT: assert(copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT); - assert(image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT); + assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT); /* Copying from S8D24. We want to write 8-bit stencil values only, * so adjust the buffer bpp for that. Since the hardware stores stencil * in the LSB, we can just do a RGBA8UI to R8UI blit. @@ -1070,14 +572,14 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, buf_height = region->bufferImageHeight; /* If the image is compressed, the bpp refers to blocks, not pixels */ - uint32_t block_width = vk_format_get_blockwidth(image->vk_format); - uint32_t block_height = vk_format_get_blockheight(image->vk_format); + uint32_t block_width = vk_format_get_blockwidth(image->vk.format); + uint32_t block_height = vk_format_get_blockheight(image->vk.format); buf_width = buf_width / block_width; buf_height = buf_height / block_height; /* Compute layers to copy */ uint32_t num_layers; - if (image->type != VK_IMAGE_TYPE_3D) + if (image->vk.image_type != VK_IMAGE_TYPE_3D) num_layers = region->imageSubresource.layerCount; else num_layers = region->imageExtent.depth; @@ -1094,17 +596,17 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, VkResult result; struct v3dv_device *device = cmd_buffer->device; VkDevice _device = v3dv_device_to_handle(device); - if (vk_format_is_compressed(image->vk_format)) { + if (vk_format_is_compressed(image->vk.format)) { VkImage uiview; VkImageCreateInfo uiview_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = VK_IMAGE_TYPE_3D, .format = dst_format, - .extent = { buf_width, buf_height, image->extent.depth }, - .mipLevels = image->levels, - .arrayLayers = image->array_size, - .samples = image->samples, - .tiling = image->tiling, + .extent = { buf_width, buf_height, image->vk.extent.depth }, + .mipLevels = image->vk.mip_levels, + .arrayLayers = image->vk.array_layers, + .samples = image->vk.samples, + .tiling = image->vk.tiling, .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, @@ -1118,9 +620,10 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, cmd_buffer, (uintptr_t)uiview, (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage); - result = v3dv_BindImageMemory(_device, uiview, - v3dv_device_memory_to_handle(image->mem), - image->mem_offset); + result = + vk_common_BindImageMemory(_device, uiview, + v3dv_device_memory_to_handle(image->mem), + image->mem_offset); if (result != VK_SUCCESS) return handled; @@ -1158,9 +661,10 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, /* Bind the buffer memory to the image */ VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset + i * buf_width * buf_height * buffer_bpp; - result = v3dv_BindImageMemory(_device, buffer_image, - v3dv_device_memory_to_handle(buffer->mem), - buffer_offset); + result = + vk_common_BindImageMemory(_device, buffer_image, + v3dv_device_memory_to_handle(buffer->mem), + buffer_offset); if (result != VK_SUCCESS) return handled; @@ -1172,7 +676,8 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, * image, but that we need to blit to a S8D24 destination (the only * stencil format we support). */ - const VkImageBlit blit_region = { + const VkImageBlit2KHR blit_region = { + .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR, .srcSubresource = { .aspectMask = copy_aspect, .mipLevel = region->imageSubresource.mipLevel, @@ -1225,309 +730,26 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer, return true; } -static VkFormat -get_compatible_tlb_format(VkFormat format) -{ - switch (format) { - case VK_FORMAT_R8G8B8A8_SNORM: - return VK_FORMAT_R8G8B8A8_UINT; - - case VK_FORMAT_R8G8_SNORM: - return VK_FORMAT_R8G8_UINT; - - case VK_FORMAT_R8_SNORM: - return VK_FORMAT_R8_UINT; - - case VK_FORMAT_A8B8G8R8_SNORM_PACK32: - return VK_FORMAT_A8B8G8R8_UINT_PACK32; - - case VK_FORMAT_R16_UNORM: - case VK_FORMAT_R16_SNORM: - return VK_FORMAT_R16_UINT; - - case VK_FORMAT_R16G16_UNORM: - case VK_FORMAT_R16G16_SNORM: - return VK_FORMAT_R16G16_UINT; - - case VK_FORMAT_R16G16B16A16_UNORM: - case VK_FORMAT_R16G16B16A16_SNORM: - return VK_FORMAT_R16G16B16A16_UINT; - - case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: - return VK_FORMAT_R32_SFLOAT; +VKAPI_ATTR void VKAPI_CALL +v3dv_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2KHR *info) - /* We can't render to compressed formats using the TLB so instead we use - * a compatible format with the same bpp as the compressed format. Because - * the compressed format's bpp is for a full block (i.e. 4x4 pixels in the - * case of ETC), when we implement copies with the compatible format we - * will have to divide offsets and dimensions on the compressed image by - * the compressed block size. - */ - case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK: - case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK: - case VK_FORMAT_EAC_R11G11_UNORM_BLOCK: - case VK_FORMAT_EAC_R11G11_SNORM_BLOCK: - case VK_FORMAT_BC2_UNORM_BLOCK: - case VK_FORMAT_BC2_SRGB_BLOCK: - case VK_FORMAT_BC3_SRGB_BLOCK: - case VK_FORMAT_BC3_UNORM_BLOCK: - return VK_FORMAT_R32G32B32A32_UINT; - - case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK: - case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK: - case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK: - case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK: - case VK_FORMAT_EAC_R11_UNORM_BLOCK: - case VK_FORMAT_EAC_R11_SNORM_BLOCK: - case VK_FORMAT_BC1_RGB_UNORM_BLOCK: - case VK_FORMAT_BC1_RGB_SRGB_BLOCK: - case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: - case VK_FORMAT_BC1_RGBA_SRGB_BLOCK: - return VK_FORMAT_R16G16B16A16_UINT; - - default: - return VK_FORMAT_UNDEFINED; - } -} - -static inline bool -can_use_tlb(struct v3dv_image *image, - const VkOffset3D *offset, - VkFormat *compat_format) -{ - if (offset->x != 0 || offset->y != 0) - return false; - - if (image->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) { - if (compat_format) - *compat_format = image->vk_format; - return true; - } - - /* If the image format is not TLB-supported, then check if we can use - * a compatible format instead. - */ - if (compat_format) { - *compat_format = get_compatible_tlb_format(image->vk_format); - if (*compat_format != VK_FORMAT_UNDEFINED) - return true; - } - - return false; -} - -void -v3dv_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkBuffer destBuffer, - uint32_t regionCount, - const VkBufferImageCopy *pRegions) { V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); - V3DV_FROM_HANDLE(v3dv_image, image, srcImage); - V3DV_FROM_HANDLE(v3dv_buffer, buffer, destBuffer); + V3DV_FROM_HANDLE(v3dv_image, image, info->srcImage); + V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->dstBuffer); - assert(image->samples == VK_SAMPLE_COUNT_1_BIT); + assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT); - for (uint32_t i = 0; i < regionCount; i++) { - if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, &pRegions[i])) + for (uint32_t i = 0; i < info->regionCount; i++) { + if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, &info->pRegions[i])) continue; - if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, &pRegions[i])) + if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, &info->pRegions[i])) continue; unreachable("Unsupported image to buffer copy."); } } -static void -emit_copy_image_layer_per_tile_list(struct v3dv_job *job, - struct framebuffer_data *framebuffer, - struct v3dv_image *dst, - struct v3dv_image *src, - uint32_t layer_offset, - const VkImageCopy *region) -{ - struct v3dv_cl *cl = &job->indirect; - v3dv_cl_ensure_space(cl, 200, 1); - v3dv_return_if_oom(NULL, job); - - struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); - - cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); - - assert((src->type != VK_IMAGE_TYPE_3D && - layer_offset < region->srcSubresource.layerCount) || - layer_offset < src->extent.depth); - - const uint32_t src_layer = src->type != VK_IMAGE_TYPE_3D ? - region->srcSubresource.baseArrayLayer + layer_offset : - region->srcOffset.z + layer_offset; - - emit_image_load(cl, framebuffer, src, - region->srcSubresource.aspectMask, - src_layer, - region->srcSubresource.mipLevel, - false, false); - - cl_emit(cl, END_OF_LOADS, end); - - cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); - - assert((dst->type != VK_IMAGE_TYPE_3D && - layer_offset < region->dstSubresource.layerCount) || - layer_offset < dst->extent.depth); - - const uint32_t dst_layer = dst->type != VK_IMAGE_TYPE_3D ? - region->dstSubresource.baseArrayLayer + layer_offset : - region->dstOffset.z + layer_offset; - - emit_image_store(cl, framebuffer, dst, - region->dstSubresource.aspectMask, - dst_layer, - region->dstSubresource.mipLevel, - false, false, false); - - cl_emit(cl, END_OF_TILE_MARKER, end); - - cl_emit(cl, RETURN_FROM_SUB_LIST, ret); - - cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { - branch.start = tile_list_start; - branch.end = v3dv_cl_get_address(cl); - } -} - -static void -emit_copy_image_layer(struct v3dv_job *job, - struct v3dv_image *dst, - struct v3dv_image *src, - struct framebuffer_data *framebuffer, - uint32_t layer, - const VkImageCopy *region) -{ - emit_frame_setup(job, layer, NULL); - emit_copy_image_layer_per_tile_list(job, framebuffer, dst, src, layer, region); - emit_supertile_coordinates(job, framebuffer); -} - -static void -emit_copy_image_rcl(struct v3dv_job *job, - struct v3dv_image *dst, - struct v3dv_image *src, - struct framebuffer_data *framebuffer, - const VkImageCopy *region) -{ - struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); - v3dv_return_if_oom(NULL, job); - - for (int layer = 0; layer < job->frame_tiling.layers; layer++) - emit_copy_image_layer(job, dst, src, framebuffer, layer, region); - cl_emit(rcl, END_OF_RENDERING, end); -} - -/* Disable level 0 write, just write following mipmaps */ -#define V3D_TFU_IOA_DIMTW (1 << 0) -#define V3D_TFU_IOA_FORMAT_SHIFT 3 -#define V3D_TFU_IOA_FORMAT_LINEARTILE 3 -#define V3D_TFU_IOA_FORMAT_UBLINEAR_1_COLUMN 4 -#define V3D_TFU_IOA_FORMAT_UBLINEAR_2_COLUMN 5 -#define V3D_TFU_IOA_FORMAT_UIF_NO_XOR 6 -#define V3D_TFU_IOA_FORMAT_UIF_XOR 7 - -#define V3D_TFU_ICFG_NUMMM_SHIFT 5 -#define V3D_TFU_ICFG_TTYPE_SHIFT 9 - -#define V3D_TFU_ICFG_OPAD_SHIFT 22 - -#define V3D_TFU_ICFG_FORMAT_SHIFT 18 -#define V3D_TFU_ICFG_FORMAT_RASTER 0 -#define V3D_TFU_ICFG_FORMAT_SAND_128 1 -#define V3D_TFU_ICFG_FORMAT_SAND_256 2 -#define V3D_TFU_ICFG_FORMAT_LINEARTILE 11 -#define V3D_TFU_ICFG_FORMAT_UBLINEAR_1_COLUMN 12 -#define V3D_TFU_ICFG_FORMAT_UBLINEAR_2_COLUMN 13 -#define V3D_TFU_ICFG_FORMAT_UIF_NO_XOR 14 -#define V3D_TFU_ICFG_FORMAT_UIF_XOR 15 - -static void -emit_tfu_job(struct v3dv_cmd_buffer *cmd_buffer, - struct v3dv_image *dst, - uint32_t dst_mip_level, - uint32_t dst_layer, - struct v3dv_image *src, - uint32_t src_mip_level, - uint32_t src_layer, - uint32_t width, - uint32_t height, - const struct v3dv_format *format) -{ - const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level]; - const struct v3d_resource_slice *dst_slice = &dst->slices[dst_mip_level]; - - assert(dst->mem && dst->mem->bo); - const struct v3dv_bo *dst_bo = dst->mem->bo; - - assert(src->mem && src->mem->bo); - const struct v3dv_bo *src_bo = src->mem->bo; - - struct drm_v3d_submit_tfu tfu = { - .ios = (height << 16) | width, - .bo_handles = { - dst_bo->handle, - src_bo->handle != dst_bo->handle ? src_bo->handle : 0 - }, - }; - - const uint32_t src_offset = - src_bo->offset + v3dv_layer_offset(src, src_mip_level, src_layer); - tfu.iia |= src_offset; - - uint32_t icfg; - if (src_slice->tiling == VC5_TILING_RASTER) { - icfg = V3D_TFU_ICFG_FORMAT_RASTER; - } else { - icfg = V3D_TFU_ICFG_FORMAT_LINEARTILE + - (src_slice->tiling - VC5_TILING_LINEARTILE); - } - tfu.icfg |= icfg << V3D_TFU_ICFG_FORMAT_SHIFT; - - const uint32_t dst_offset = - dst_bo->offset + v3dv_layer_offset(dst, dst_mip_level, dst_layer); - tfu.ioa |= dst_offset; - - tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE + - (dst_slice->tiling - VC5_TILING_LINEARTILE)) << - V3D_TFU_IOA_FORMAT_SHIFT; - tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT; - - switch (src_slice->tiling) { - case VC5_TILING_UIF_NO_XOR: - case VC5_TILING_UIF_XOR: - tfu.iis |= src_slice->padded_height / (2 * v3d_utile_height(src->cpp)); - break; - case VC5_TILING_RASTER: - tfu.iis |= src_slice->stride / src->cpp; - break; - default: - break; - } - - /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the - * OPAD field for the destination (how many extra UIF blocks beyond - * those necessary to cover the height). - */ - if (dst_slice->tiling == VC5_TILING_UIF_NO_XOR || - dst_slice->tiling == VC5_TILING_UIF_XOR) { - uint32_t uif_block_h = 2 * v3d_utile_height(dst->cpp); - uint32_t implicit_padded_height = align(height, uif_block_h); - uint32_t icfg = - (dst_slice->padded_height - implicit_padded_height) / uif_block_h; - tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT; - } - - v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu); -} - /** * Returns true if the implementation supports the requested operation (even if * it failed to process it, for example, due to an out-of-memory error). @@ -1536,17 +758,17 @@ static bool copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_image *dst, struct v3dv_image *src, - const VkImageCopy *region) + const VkImageCopy2KHR *region) { /* Destination can't be raster format */ - if (dst->tiling == VK_IMAGE_TILING_LINEAR) + if (dst->vk.tiling == VK_IMAGE_TILING_LINEAR) return false; /* We can only do full copies, so if the format is D24S8 both aspects need * to be copied. We only need to check the dst format because the spec * states that depth/stencil formats must match exactly. */ - if (dst->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { + if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) { const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; if (region->dstSubresource.aspectMask != ds_aspects) @@ -1562,8 +784,8 @@ copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, * checking against the region dimensions, which are in units of the source * image format. */ - if (vk_format_is_compressed(dst->vk_format) != - vk_format_is_compressed(src->vk_format)) { + if (vk_format_is_compressed(dst->vk.format) != + vk_format_is_compressed(src->vk.format)) { return false; } @@ -1576,8 +798,8 @@ copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, return false; const uint32_t dst_mip_level = region->dstSubresource.mipLevel; - uint32_t dst_width = u_minify(dst->extent.width, dst_mip_level); - uint32_t dst_height = u_minify(dst->extent.height, dst_mip_level); + uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level); + uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level); if (region->extent.width != dst_width || region->extent.height != dst_height) return false; @@ -1587,15 +809,15 @@ copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, * members represent the texel dimensions of the source image and not * the destination." */ - const uint32_t block_w = vk_format_get_blockwidth(src->vk_format); - const uint32_t block_h = vk_format_get_blockheight(src->vk_format); + const uint32_t block_w = vk_format_get_blockwidth(src->vk.format); + const uint32_t block_h = vk_format_get_blockheight(src->vk.format); uint32_t width = DIV_ROUND_UP(region->extent.width, block_w); uint32_t height = DIV_ROUND_UP(region->extent.height, block_h); /* Account for sample count */ - assert(dst->samples == src->samples); - if (dst->samples > VK_SAMPLE_COUNT_1_BIT) { - assert(dst->samples == VK_SAMPLE_COUNT_4_BIT); + assert(dst->vk.samples == src->vk.samples); + if (dst->vk.samples > VK_SAMPLE_COUNT_1_BIT) { + assert(dst->vk.samples == VK_SAMPLE_COUNT_4_BIT); width *= 2; height *= 2; } @@ -1614,24 +836,24 @@ copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, */ assert(dst->cpp == src->cpp); const struct v3dv_format *format = - v3dv_get_compatible_tfu_format(&cmd_buffer->device->devinfo, + v3dv_get_compatible_tfu_format(cmd_buffer->device, dst->cpp, NULL); /* Emit a TFU job for each layer to blit */ - const uint32_t layer_count = dst->type != VK_IMAGE_TYPE_3D ? + const uint32_t layer_count = dst->vk.image_type != VK_IMAGE_TYPE_3D ? region->dstSubresource.layerCount : region->extent.depth; const uint32_t src_mip_level = region->srcSubresource.mipLevel; - const uint32_t base_src_layer = src->type != VK_IMAGE_TYPE_3D ? + const uint32_t base_src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ? region->srcSubresource.baseArrayLayer : region->srcOffset.z; - const uint32_t base_dst_layer = dst->type != VK_IMAGE_TYPE_3D ? + const uint32_t base_dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ? region->dstSubresource.baseArrayLayer : region->dstOffset.z; for (uint32_t i = 0; i < layer_count; i++) { - emit_tfu_job(cmd_buffer, - dst, dst_mip_level, base_dst_layer + i, - src, src_mip_level, base_src_layer + i, - width, height, format); + v3dv_X(cmd_buffer->device, meta_emit_tfu_job) + (cmd_buffer, dst, dst_mip_level, base_dst_layer + i, + src, src_mip_level, base_src_layer + i, + width, height, format); } return true; @@ -1645,11 +867,11 @@ static bool copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_image *dst, struct v3dv_image *src, - const VkImageCopy *region) + const VkImageCopy2KHR *region) { VkFormat fb_format; - if (!can_use_tlb(src, ®ion->srcOffset, &fb_format) || - !can_use_tlb(dst, ®ion->dstOffset, &fb_format)) { + if (!v3dv_meta_can_use_tlb(src, ®ion->srcOffset, &fb_format) || + !v3dv_meta_can_use_tlb(dst, ®ion->dstOffset, &fb_format)) { return false; } @@ -1662,9 +884,9 @@ copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, assert(region->dstSubresource.aspectMask == region->srcSubresource.aspectMask); uint32_t internal_type, internal_bpp; - get_internal_type_bpp_for_image_aspects(fb_format, - region->dstSubresource.aspectMask, - &internal_type, &internal_bpp); + v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects) + (fb_format, region->dstSubresource.aspectMask, + &internal_type, &internal_bpp); /* From the Vulkan spec with VK_KHR_maintenance1, VkImageCopy valid usage: * @@ -1672,12 +894,12 @@ copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, * srcSubresource (for non-3D) must match the number of slices of the * extent (for 3D) or layers of the dstSubresource (for non-3D)." */ - assert((src->type != VK_IMAGE_TYPE_3D ? + assert((src->vk.image_type != VK_IMAGE_TYPE_3D ? region->srcSubresource.layerCount : region->extent.depth) == - (dst->type != VK_IMAGE_TYPE_3D ? + (dst->vk.image_type != VK_IMAGE_TYPE_3D ? region->dstSubresource.layerCount : region->extent.depth)); uint32_t num_layers; - if (dst->type != VK_IMAGE_TYPE_3D) + if (dst->vk.image_type != VK_IMAGE_TYPE_3D) num_layers = region->dstSubresource.layerCount; else num_layers = region->extent.depth; @@ -1689,20 +911,20 @@ copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, return true; /* Handle copy to compressed image using compatible format */ - const uint32_t block_w = vk_format_get_blockwidth(dst->vk_format); - const uint32_t block_h = vk_format_get_blockheight(dst->vk_format); + const uint32_t block_w = vk_format_get_blockwidth(dst->vk.format); + const uint32_t block_h = vk_format_get_blockheight(dst->vk.format); const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w); const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h); - v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, - src->samples > VK_SAMPLE_COUNT_1_BIT); + v3dv_job_start_frame(job, width, height, num_layers, false, 1, internal_bpp, + src->vk.samples > VK_SAMPLE_COUNT_1_BIT); - struct framebuffer_data framebuffer; - setup_framebuffer_data(&framebuffer, fb_format, internal_type, - &job->frame_tiling); + struct v3dv_meta_framebuffer framebuffer; + v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format, + internal_type, &job->frame_tiling); - v3dv_job_emit_binning_flush(job); - emit_copy_image_rcl(job, dst, src, &framebuffer, region); + v3dv_X(job->device, job_emit_binning_flush)(job); + v3dv_X(job->device, meta_emit_copy_image_rcl)(job, dst, src, &framebuffer, region); v3dv_cmd_buffer_finish_job(cmd_buffer); @@ -1734,18 +956,18 @@ create_image_alias(struct v3dv_cmd_buffer *cmd_buffer, VkImageCreateInfo info = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .imageType = src->type, + .imageType = src->vk.image_type, .format = format, .extent = { - .width = src->extent.width * width_scale, - .height = src->extent.height * height_scale, - .depth = src->extent.depth, + .width = src->vk.extent.width * width_scale, + .height = src->vk.extent.height * height_scale, + .depth = src->vk.extent.depth, }, - .mipLevels = src->levels, - .arrayLayers = src->array_size, - .samples = src->samples, - .tiling = src->tiling, - .usage = src->usage, + .mipLevels = src->vk.mip_levels, + .arrayLayers = src->vk.array_layers, + .samples = src->vk.samples, + .tiling = src->vk.tiling, + .usage = src->vk.usage, }; VkImage _image; @@ -1770,12 +992,12 @@ static bool copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_image *dst, struct v3dv_image *src, - const VkImageCopy *region) + const VkImageCopy2KHR *region) { - const uint32_t src_block_w = vk_format_get_blockwidth(src->vk_format); - const uint32_t src_block_h = vk_format_get_blockheight(src->vk_format); - const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk_format); - const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk_format); + const uint32_t src_block_w = vk_format_get_blockwidth(src->vk.format); + const uint32_t src_block_h = vk_format_get_blockheight(src->vk.format); + const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk.format); + const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk.format); const float block_scale_w = (float)src_block_w / (float)dst_block_w; const float block_scale_h = (float)src_block_h / (float)dst_block_h; @@ -1789,7 +1011,7 @@ copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer, float src_scale_h = 1.0f; float dst_scale_w = block_scale_w; float dst_scale_h = block_scale_h; - if (vk_format_is_compressed(src->vk_format)) { + if (vk_format_is_compressed(src->vk.format)) { /* If we are copying from a compressed format we should be aware that we * are going to texture from the source image, and the texture setup * knows the actual size of the image, so we need to choose a format @@ -1813,18 +1035,13 @@ copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer, */ assert(src->cpp == dst->cpp); - uint32_t divisor_w, divisor_h; format = VK_FORMAT_R32G32_UINT; switch (src->cpp) { case 16: format = VK_FORMAT_R32G32B32A32_UINT; - divisor_w = 4; - divisor_h = 4; break; case 8: format = VK_FORMAT_R16G16B16A16_UINT; - divisor_w = 4; - divisor_h = 4; break; default: unreachable("Unsupported compressed format"); @@ -1833,10 +1050,10 @@ copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer, /* Create image views of the src/dst images that we can interpret in * terms of the canonical format. */ - src_scale_w /= divisor_w; - src_scale_h /= divisor_h; - dst_scale_w /= divisor_w; - dst_scale_h /= divisor_h; + src_scale_w /= src_block_w; + src_scale_h /= src_block_h; + dst_scale_w /= src_block_w; + dst_scale_h /= src_block_h; src = create_image_alias(cmd_buffer, src, src_scale_w, src_scale_h, format); @@ -1845,11 +1062,11 @@ copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer, dst_scale_w, dst_scale_h, format); } else { format = src->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ? - src->vk_format : get_compatible_tlb_format(src->vk_format); + src->vk.format : get_compatible_tlb_format(src->vk.format); if (format == VK_FORMAT_UNDEFINED) return false; - const struct v3dv_format *f = v3dv_get_format(format); + const struct v3dv_format *f = v3dv_X(cmd_buffer->device, get_format)(format); if (!f->supported || f->tex_type == TEXTURE_DATA_FORMAT_NO) return false; } @@ -1895,7 +1112,8 @@ copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer, dst_start.z + region->extent.depth, }; - const VkImageBlit blit_region = { + const VkImageBlit2KHR blit_region = { + .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR, .srcSubresource = region->srcSubresource, .srcOffsets = { src_start, src_end }, .dstSubresource = region->dstSubresource, @@ -1912,466 +1130,42 @@ copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer, return handled; } -void -v3dv_CmdCopyImage(VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage dstImage, - VkImageLayout dstImageLayout, - uint32_t regionCount, - const VkImageCopy *pRegions) +VKAPI_ATTR void VKAPI_CALL +v3dv_CmdCopyImage2KHR(VkCommandBuffer commandBuffer, + const VkCopyImageInfo2KHR *info) + { V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); - V3DV_FROM_HANDLE(v3dv_image, src, srcImage); - V3DV_FROM_HANDLE(v3dv_image, dst, dstImage); + V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage); + V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage); - assert(src->samples == dst->samples); + assert(src->vk.samples == dst->vk.samples); - for (uint32_t i = 0; i < regionCount; i++) { - if (copy_image_tfu(cmd_buffer, dst, src, &pRegions[i])) + for (uint32_t i = 0; i < info->regionCount; i++) { + if (copy_image_tfu(cmd_buffer, dst, src, &info->pRegions[i])) continue; - if (copy_image_tlb(cmd_buffer, dst, src, &pRegions[i])) + if (copy_image_tlb(cmd_buffer, dst, src, &info->pRegions[i])) continue; - if (copy_image_blit(cmd_buffer, dst, src, &pRegions[i])) + if (copy_image_blit(cmd_buffer, dst, src, &info->pRegions[i])) continue; unreachable("Image copy not supported"); } } -static void -emit_clear_image_per_tile_list(struct v3dv_job *job, - struct framebuffer_data *framebuffer, - struct v3dv_image *image, - VkImageAspectFlags aspects, - uint32_t layer, - uint32_t level) -{ - struct v3dv_cl *cl = &job->indirect; - v3dv_cl_ensure_space(cl, 200, 1); - v3dv_return_if_oom(NULL, job); - - struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); - - cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); - - cl_emit(cl, END_OF_LOADS, end); - - cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); - - emit_image_store(cl, framebuffer, image, aspects, layer, level, - false, false, false); - - cl_emit(cl, END_OF_TILE_MARKER, end); - - cl_emit(cl, RETURN_FROM_SUB_LIST, ret); - - cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { - branch.start = tile_list_start; - branch.end = v3dv_cl_get_address(cl); - } -} - -static void -emit_clear_image(struct v3dv_job *job, - struct v3dv_image *image, - struct framebuffer_data *framebuffer, - VkImageAspectFlags aspects, - uint32_t layer, - uint32_t level) -{ - emit_clear_image_per_tile_list(job, framebuffer, image, aspects, layer, level); - emit_supertile_coordinates(job, framebuffer); -} - -static void -emit_clear_image_rcl(struct v3dv_job *job, - struct v3dv_image *image, - struct framebuffer_data *framebuffer, - const union v3dv_clear_value *clear_value, - VkImageAspectFlags aspects, - uint32_t layer, - uint32_t level) -{ - const struct rcl_clear_info clear_info = { - .clear_value = clear_value, - .image = image, - .aspects = aspects, - .layer = layer, - .level = level, - }; - - struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info); - v3dv_return_if_oom(NULL, job); - - emit_frame_setup(job, 0, clear_value); - emit_clear_image(job, image, framebuffer, aspects, layer, level); - cl_emit(rcl, END_OF_RENDERING, end); -} - -static void -get_hw_clear_color(const VkClearColorValue *color, - VkFormat fb_format, - VkFormat image_format, - uint32_t internal_type, - uint32_t internal_bpp, - uint32_t *hw_color) -{ - const uint32_t internal_size = 4 << internal_bpp; - - /* If the image format doesn't match the framebuffer format, then we are - * trying to clear an unsupported tlb format using a compatible - * format for the framebuffer. In this case, we want to make sure that - * we pack the clear value according to the original format semantics, - * not the compatible format. - */ - if (fb_format == image_format) { - v3dv_get_hw_clear_color(color, internal_type, internal_size, hw_color); - } else { - union util_color uc; - enum pipe_format pipe_image_format = - vk_format_to_pipe_format(image_format); - util_pack_color(color->float32, pipe_image_format, &uc); - memcpy(hw_color, uc.ui, internal_size); - } -} - -/* Returns true if the implementation is able to handle the case, false - * otherwise. -*/ -static bool -clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, - struct v3dv_image *image, - const VkClearValue *clear_value, - const VkImageSubresourceRange *range) -{ - const VkOffset3D origin = { 0, 0, 0 }; - VkFormat fb_format; - if (!can_use_tlb(image, &origin, &fb_format)) - return false; - - uint32_t internal_type, internal_bpp; - get_internal_type_bpp_for_image_aspects(fb_format, range->aspectMask, - &internal_type, &internal_bpp); - - union v3dv_clear_value hw_clear_value = { 0 }; - if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - get_hw_clear_color(&clear_value->color, fb_format, image->vk_format, - internal_type, internal_bpp, &hw_clear_value.color[0]); - } else { - assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) || - (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)); - hw_clear_value.z = clear_value->depthStencil.depth; - hw_clear_value.s = clear_value->depthStencil.stencil; - } - - uint32_t level_count = range->levelCount == VK_REMAINING_MIP_LEVELS ? - image->levels - range->baseMipLevel : - range->levelCount; - uint32_t min_level = range->baseMipLevel; - uint32_t max_level = range->baseMipLevel + level_count; - - /* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively. - * Instead, we need to consider the full depth dimension of the image, which - * goes from 0 up to the level's depth extent. - */ - uint32_t min_layer; - uint32_t max_layer; - if (image->type != VK_IMAGE_TYPE_3D) { - uint32_t layer_count = range->layerCount == VK_REMAINING_ARRAY_LAYERS ? - image->array_size - range->baseArrayLayer : - range->layerCount; - min_layer = range->baseArrayLayer; - max_layer = range->baseArrayLayer + layer_count; - } else { - min_layer = 0; - max_layer = 0; - } - - for (uint32_t level = min_level; level < max_level; level++) { - if (image->type == VK_IMAGE_TYPE_3D) - max_layer = u_minify(image->extent.depth, level); - for (uint32_t layer = min_layer; layer < max_layer; layer++) { - uint32_t width = u_minify(image->extent.width, level); - uint32_t height = u_minify(image->extent.height, level); - - struct v3dv_job *job = - v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); - - if (!job) - return true; - - /* We start a a new job for each layer so the frame "depth" is 1 */ - v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp, - image->samples > VK_SAMPLE_COUNT_1_BIT); - - struct framebuffer_data framebuffer; - setup_framebuffer_data(&framebuffer, fb_format, internal_type, - &job->frame_tiling); - - v3dv_job_emit_binning_flush(job); - - /* If this triggers it is an application bug: the spec requires - * that any aspects to clear are present in the image. - */ - assert(range->aspectMask & image->aspects); - - emit_clear_image_rcl(job, image, &framebuffer, &hw_clear_value, - range->aspectMask, layer, level); - - v3dv_cmd_buffer_finish_job(cmd_buffer); - } - } - - return true; -} - -void -v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer, - VkImage _image, - VkImageLayout imageLayout, - const VkClearColorValue *pColor, - uint32_t rangeCount, - const VkImageSubresourceRange *pRanges) -{ - V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); - V3DV_FROM_HANDLE(v3dv_image, image, _image); - - const VkClearValue clear_value = { - .color = *pColor, - }; - - for (uint32_t i = 0; i < rangeCount; i++) { - if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i])) - continue; - unreachable("Unsupported color clear."); - } -} - -void -v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, - VkImage _image, - VkImageLayout imageLayout, - const VkClearDepthStencilValue *pDepthStencil, - uint32_t rangeCount, - const VkImageSubresourceRange *pRanges) +VKAPI_ATTR void VKAPI_CALL +v3dv_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2KHR *pCopyBufferInfo) { V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); - V3DV_FROM_HANDLE(v3dv_image, image, _image); - - const VkClearValue clear_value = { - .depthStencil = *pDepthStencil, - }; - - for (uint32_t i = 0; i < rangeCount; i++) { - if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i])) - continue; - unreachable("Unsupported depth/stencil clear."); - } -} - -static void -emit_copy_buffer_per_tile_list(struct v3dv_job *job, - struct v3dv_bo *dst, - struct v3dv_bo *src, - uint32_t dst_offset, - uint32_t src_offset, - uint32_t stride, - uint32_t format) -{ - struct v3dv_cl *cl = &job->indirect; - v3dv_cl_ensure_space(cl, 200, 1); - v3dv_return_if_oom(NULL, job); - - struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); - - cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); - - emit_linear_load(cl, RENDER_TARGET_0, src, src_offset, stride, format); - - cl_emit(cl, END_OF_LOADS, end); - - cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); - - emit_linear_store(cl, RENDER_TARGET_0, - dst, dst_offset, stride, false, format); - - cl_emit(cl, END_OF_TILE_MARKER, end); - - cl_emit(cl, RETURN_FROM_SUB_LIST, ret); - - cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { - branch.start = tile_list_start; - branch.end = v3dv_cl_get_address(cl); - } -} - -static void -emit_copy_buffer(struct v3dv_job *job, - struct v3dv_bo *dst, - struct v3dv_bo *src, - uint32_t dst_offset, - uint32_t src_offset, - struct framebuffer_data *framebuffer, - uint32_t format, - uint32_t item_size) -{ - const uint32_t stride = job->frame_tiling.width * item_size; - emit_copy_buffer_per_tile_list(job, dst, src, - dst_offset, src_offset, - stride, format); - emit_supertile_coordinates(job, framebuffer); -} - -static void -emit_copy_buffer_rcl(struct v3dv_job *job, - struct v3dv_bo *dst, - struct v3dv_bo *src, - uint32_t dst_offset, - uint32_t src_offset, - struct framebuffer_data *framebuffer, - uint32_t format, - uint32_t item_size) -{ - struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); - v3dv_return_if_oom(NULL, job); - - emit_frame_setup(job, 0, NULL); - - emit_copy_buffer(job, dst, src, dst_offset, src_offset, - framebuffer, format, item_size); - - cl_emit(rcl, END_OF_RENDERING, end); -} - -/* Figure out a TLB size configuration for a number of pixels to process. - * Beware that we can't "render" more than 4096x4096 pixels in a single job, - * if the pixel count is larger than this, the caller might need to split - * the job and call this function multiple times. - */ -static void -framebuffer_size_for_pixel_count(uint32_t num_pixels, - uint32_t *width, - uint32_t *height) -{ - assert(num_pixels > 0); - - const uint32_t max_dim_pixels = 4096; - const uint32_t max_pixels = max_dim_pixels * max_dim_pixels; - - uint32_t w, h; - if (num_pixels > max_pixels) { - w = max_dim_pixels; - h = max_dim_pixels; - } else { - w = num_pixels; - h = 1; - while (w > max_dim_pixels || ((w % 2) == 0 && w > 2 * h)) { - w >>= 1; - h <<= 1; - } - } - assert(w <= max_dim_pixels && h <= max_dim_pixels); - assert(w * h <= num_pixels); - assert(w > 0 && h > 0); - - *width = w; - *height = h; -} - -static struct v3dv_job * -copy_buffer(struct v3dv_cmd_buffer *cmd_buffer, - struct v3dv_bo *dst, - uint32_t dst_offset, - struct v3dv_bo *src, - uint32_t src_offset, - const VkBufferCopy *region) -{ - const uint32_t internal_bpp = V3D_INTERNAL_BPP_32; - const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI; - - /* Select appropriate pixel format for the copy operation based on the - * size to copy and the alignment of the source and destination offsets. - */ - src_offset += region->srcOffset; - dst_offset += region->dstOffset; - uint32_t item_size = 4; - while (item_size > 1 && - (src_offset % item_size != 0 || dst_offset % item_size != 0)) { - item_size /= 2; - } - - while (item_size > 1 && region->size % item_size != 0) - item_size /= 2; - - assert(region->size % item_size == 0); - uint32_t num_items = region->size / item_size; - assert(num_items > 0); - - uint32_t format; - VkFormat vk_format; - switch (item_size) { - case 4: - format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; - vk_format = VK_FORMAT_R8G8B8A8_UINT; - break; - case 2: - format = V3D_OUTPUT_IMAGE_FORMAT_RG8UI; - vk_format = VK_FORMAT_R8G8_UINT; - break; - default: - format = V3D_OUTPUT_IMAGE_FORMAT_R8UI; - vk_format = VK_FORMAT_R8_UINT; - break; - } - - struct v3dv_job *job = NULL; - while (num_items > 0) { - job = v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); - if (!job) - return NULL; - - uint32_t width, height; - framebuffer_size_for_pixel_count(num_items, &width, &height); - - v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp, false); - - struct framebuffer_data framebuffer; - setup_framebuffer_data(&framebuffer, vk_format, internal_type, - &job->frame_tiling); - - v3dv_job_emit_binning_flush(job); - - emit_copy_buffer_rcl(job, dst, src, dst_offset, src_offset, - &framebuffer, format, item_size); - - v3dv_cmd_buffer_finish_job(cmd_buffer); - - const uint32_t items_copied = width * height; - const uint32_t bytes_copied = items_copied * item_size; - num_items -= items_copied; - src_offset += bytes_copied; - dst_offset += bytes_copied; - } - - return job; -} - -void -v3dv_CmdCopyBuffer(VkCommandBuffer commandBuffer, - VkBuffer srcBuffer, - VkBuffer dstBuffer, - uint32_t regionCount, - const VkBufferCopy *pRegions) -{ - V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); - V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, srcBuffer); - V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer); - - for (uint32_t i = 0; i < regionCount; i++) { - copy_buffer(cmd_buffer, - dst_buffer->mem->bo, dst_buffer->mem_offset, - src_buffer->mem->bo, src_buffer->mem_offset, - &pRegions[i]); + V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, pCopyBufferInfo->srcBuffer); + V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer); + + for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) { + v3dv_X(cmd_buffer->device, meta_copy_buffer) + (cmd_buffer, + dst_buffer->mem->bo, dst_buffer->mem_offset, + src_buffer->mem->bo, src_buffer->mem_offset, + &pCopyBufferInfo->pRegions[i]); } } @@ -2385,7 +1179,7 @@ destroy_update_buffer_cb(VkDevice _device, v3dv_bo_free(device, bo); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, @@ -2412,16 +1206,17 @@ v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, v3dv_bo_unmap(cmd_buffer->device, src_bo); - VkBufferCopy region = { + VkBufferCopy2KHR region = { + .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2_KHR, .srcOffset = 0, .dstOffset = dstOffset, .size = dataSize, }; struct v3dv_job *copy_job = - copy_buffer(cmd_buffer, - dst_buffer->mem->bo, dst_buffer->mem_offset, - src_bo, 0, - ®ion); + v3dv_X(cmd_buffer->device, meta_copy_buffer) + (cmd_buffer, dst_buffer->mem->bo, dst_buffer->mem_offset, + src_bo, 0, ®ion); + if (!copy_job) return; @@ -2429,118 +1224,7 @@ v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb); } -static void -emit_fill_buffer_per_tile_list(struct v3dv_job *job, - struct v3dv_bo *bo, - uint32_t offset, - uint32_t stride) -{ - struct v3dv_cl *cl = &job->indirect; - v3dv_cl_ensure_space(cl, 200, 1); - v3dv_return_if_oom(NULL, job); - - struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); - - cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); - - cl_emit(cl, END_OF_LOADS, end); - - cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); - - emit_linear_store(cl, RENDER_TARGET_0, bo, offset, stride, false, - V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI); - - cl_emit(cl, END_OF_TILE_MARKER, end); - - cl_emit(cl, RETURN_FROM_SUB_LIST, ret); - - cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { - branch.start = tile_list_start; - branch.end = v3dv_cl_get_address(cl); - } -} - -static void -emit_fill_buffer(struct v3dv_job *job, - struct v3dv_bo *bo, - uint32_t offset, - struct framebuffer_data *framebuffer) -{ - const uint32_t stride = job->frame_tiling.width * 4; - emit_fill_buffer_per_tile_list(job, bo, offset, stride); - emit_supertile_coordinates(job, framebuffer); -} - -static void -emit_fill_buffer_rcl(struct v3dv_job *job, - struct v3dv_bo *bo, - uint32_t offset, - struct framebuffer_data *framebuffer, - uint32_t data) -{ - const union v3dv_clear_value clear_value = { - .color = { data, 0, 0, 0 }, - }; - - const struct rcl_clear_info clear_info = { - .clear_value = &clear_value, - .image = NULL, - .aspects = VK_IMAGE_ASPECT_COLOR_BIT, - .layer = 0, - .level = 0, - }; - - struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info); - v3dv_return_if_oom(NULL, job); - - emit_frame_setup(job, 0, &clear_value); - emit_fill_buffer(job, bo, offset, framebuffer); - cl_emit(rcl, END_OF_RENDERING, end); -} - -static void -fill_buffer(struct v3dv_cmd_buffer *cmd_buffer, - struct v3dv_bo *bo, - uint32_t offset, - uint32_t size, - uint32_t data) -{ - assert(size > 0 && size % 4 == 0); - assert(offset + size <= bo->size); - - const uint32_t internal_bpp = V3D_INTERNAL_BPP_32; - const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI; - uint32_t num_items = size / 4; - - while (num_items > 0) { - struct v3dv_job *job = - v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); - if (!job) - return; - - uint32_t width, height; - framebuffer_size_for_pixel_count(num_items, &width, &height); - - v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp, false); - - struct framebuffer_data framebuffer; - setup_framebuffer_data(&framebuffer, VK_FORMAT_R8G8B8A8_UINT, - internal_type, &job->frame_tiling); - - v3dv_job_emit_binning_flush(job); - - emit_fill_buffer_rcl(job, bo, offset, &framebuffer, data); - - v3dv_cmd_buffer_finish_job(cmd_buffer); - - const uint32_t items_copied = width * height; - const uint32_t bytes_copied = items_copied * 4; - num_items -= items_copied; - offset += bytes_copied; - } -} - -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, @@ -2562,7 +1246,8 @@ v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer, size -= size % 4; } - fill_buffer(cmd_buffer, bo, dstOffset, size, data); + v3dv_X(cmd_buffer->device, meta_fill_buffer) + (cmd_buffer, bo, dstOffset, size, data); } /** @@ -2573,12 +1258,12 @@ static bool copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_image *image, struct v3dv_buffer *buffer, - const VkBufferImageCopy *region) + const VkBufferImageCopy2KHR *region) { - assert(image->samples == VK_SAMPLE_COUNT_1_BIT); + assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT); /* Destination can't be raster format */ - if (image->tiling == VK_IMAGE_TILING_LINEAR) + if (image->vk.tiling == VK_IMAGE_TILING_LINEAR) return false; /* We can't copy D24S8 because buffer to image copies only copy one aspect @@ -2588,8 +1273,8 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, * is not a straight copy, we would havew to swizzle the channels, which the * TFU can't do. */ - if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT || - image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) { + if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT || + image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) { return false; } @@ -2610,12 +1295,12 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, else height = region->bufferImageHeight; - if (width != image->extent.width || height != image->extent.height) + if (width != image->vk.extent.width || height != image->vk.extent.height) return false; /* Handle region semantics for compressed images */ - const uint32_t block_w = vk_format_get_blockwidth(image->vk_format); - const uint32_t block_h = vk_format_get_blockheight(image->vk_format); + const uint32_t block_w = vk_format_get_blockwidth(image->vk.format); + const uint32_t block_h = vk_format_get_blockheight(image->vk.format); width = DIV_ROUND_UP(width, block_w); height = DIV_ROUND_UP(height, block_h); @@ -2625,14 +1310,14 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, * texel size instead, which expands the list of formats we can handle here. */ const struct v3dv_format *format = - v3dv_get_compatible_tfu_format(&cmd_buffer->device->devinfo, + v3dv_get_compatible_tfu_format(cmd_buffer->device, image->cpp, NULL); const uint32_t mip_level = region->imageSubresource.mipLevel; const struct v3d_resource_slice *slice = &image->slices[mip_level]; uint32_t num_layers; - if (image->type != VK_IMAGE_TYPE_3D) + if (image->vk.image_type != VK_IMAGE_TYPE_3D) num_layers = region->imageSubresource.layerCount; else num_layers = region->imageExtent.depth; @@ -2647,7 +1332,11 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, /* Emit a TFU job per layer to copy */ const uint32_t buffer_stride = width * image->cpp; for (int i = 0; i < num_layers; i++) { - uint32_t layer = region->imageSubresource.baseArrayLayer + i; + uint32_t layer; + if (image->vk.image_type != VK_IMAGE_TYPE_3D) + layer = region->imageSubresource.baseArrayLayer + i; + else + layer = region->imageOffset.z + i; struct drm_v3d_submit_tfu tfu = { .ios = (height << 16) | width, @@ -2671,7 +1360,7 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, tfu.ioa |= dst_offset; tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE + - (slice->tiling - VC5_TILING_LINEARTILE)) << + (slice->tiling - V3D_TILING_LINEARTILE)) << V3D_TFU_IOA_FORMAT_SHIFT; tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT; @@ -2679,8 +1368,8 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, * OPAD field for the destination (how many extra UIF blocks beyond * those necessary to cover the height). */ - if (slice->tiling == VC5_TILING_UIF_NO_XOR || - slice->tiling == VC5_TILING_UIF_XOR) { + if (slice->tiling == V3D_TILING_UIF_NO_XOR || + slice->tiling == V3D_TILING_UIF_XOR) { uint32_t uif_block_h = 2 * v3d_utile_height(image->cpp); uint32_t implicit_padded_height = align(height, uif_block_h); uint32_t icfg = @@ -2694,140 +1383,6 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer, return true; } -static void -emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job, - struct framebuffer_data *framebuffer, - struct v3dv_image *image, - struct v3dv_buffer *buffer, - uint32_t layer, - const VkBufferImageCopy *region) -{ - struct v3dv_cl *cl = &job->indirect; - v3dv_cl_ensure_space(cl, 200, 1); - v3dv_return_if_oom(NULL, job); - - struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); - - cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); - - const VkImageSubresourceLayers *imgrsc = ®ion->imageSubresource; - assert((image->type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) || - layer < image->extent.depth); - - /* Load TLB from buffer */ - uint32_t width, height; - if (region->bufferRowLength == 0) - width = region->imageExtent.width; - else - width = region->bufferRowLength; - - if (region->bufferImageHeight == 0) - height = region->imageExtent.height; - else - height = region->bufferImageHeight; - - /* Handle copy to compressed format using a compatible format */ - width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk_format)); - height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk_format)); - - uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ? - 1 : image->cpp; - uint32_t buffer_stride = width * cpp; - uint32_t buffer_offset = - buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer; - - uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask, - false, false, true); - - emit_linear_load(cl, RENDER_TARGET_0, buffer->mem->bo, - buffer_offset, buffer_stride, format); - - /* Because we can't do raster loads/stores of Z/S formats we need to - * use a color tile buffer with a compatible RGBA color format instead. - * However, when we are uploading a single aspect to a combined - * depth/stencil image we have the problem that our tile buffer stores don't - * allow us to mask out the other aspect, so we always write all four RGBA - * channels to the image and we end up overwriting that other aspect with - * undefined values. To work around that, we first load the aspect we are - * not copying from the image memory into a proper Z/S tile buffer. Then we - * do our store from the color buffer for the aspect we are copying, and - * after that, we do another store from the Z/S tile buffer to restore the - * other aspect to its original value. - */ - if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { - if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { - emit_image_load(cl, framebuffer, image, VK_IMAGE_ASPECT_STENCIL_BIT, - imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, - false, false); - } else { - assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT); - emit_image_load(cl, framebuffer, image, VK_IMAGE_ASPECT_DEPTH_BIT, - imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, - false, false); - } - } - - cl_emit(cl, END_OF_LOADS, end); - - cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); - - /* Store TLB to image */ - emit_image_store(cl, framebuffer, image, imgrsc->aspectMask, - imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, - false, true, false); - - if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { - if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { - emit_image_store(cl, framebuffer, image, VK_IMAGE_ASPECT_STENCIL_BIT, - imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, - false, false, false); - } else { - assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT); - emit_image_store(cl, framebuffer, image, VK_IMAGE_ASPECT_DEPTH_BIT, - imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, - false, false, false); - } - } - - cl_emit(cl, END_OF_TILE_MARKER, end); - - cl_emit(cl, RETURN_FROM_SUB_LIST, ret); - - cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { - branch.start = tile_list_start; - branch.end = v3dv_cl_get_address(cl); - } -} - -static void -emit_copy_buffer_to_layer(struct v3dv_job *job, - struct v3dv_image *image, - struct v3dv_buffer *buffer, - struct framebuffer_data *framebuffer, - uint32_t layer, - const VkBufferImageCopy *region) -{ - emit_frame_setup(job, layer, NULL); - emit_copy_buffer_to_layer_per_tile_list(job, framebuffer, image, buffer, - layer, region); - emit_supertile_coordinates(job, framebuffer); -} - -static void -emit_copy_buffer_to_image_rcl(struct v3dv_job *job, - struct v3dv_image *image, - struct v3dv_buffer *buffer, - struct framebuffer_data *framebuffer, - const VkBufferImageCopy *region) -{ - struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); - v3dv_return_if_oom(NULL, job); - - for (int layer = 0; layer < job->frame_tiling.layers; layer++) - emit_copy_buffer_to_layer(job, image, buffer, framebuffer, layer, region); - cl_emit(rcl, END_OF_RENDERING, end); -} - /** * Returns true if the implementation supports the requested operation (even if * it failed to process it, for example, due to an out-of-memory error). @@ -2836,19 +1391,19 @@ static bool copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_image *image, struct v3dv_buffer *buffer, - const VkBufferImageCopy *region) + const VkBufferImageCopy2KHR *region) { VkFormat fb_format; - if (!can_use_tlb(image, ®ion->imageOffset, &fb_format)) + if (!v3dv_meta_can_use_tlb(image, ®ion->imageOffset, &fb_format)) return false; uint32_t internal_type, internal_bpp; - get_internal_type_bpp_for_image_aspects(fb_format, - region->imageSubresource.aspectMask, - &internal_type, &internal_bpp); + v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects) + (fb_format, region->imageSubresource.aspectMask, + &internal_type, &internal_bpp); uint32_t num_layers; - if (image->type != VK_IMAGE_TYPE_3D) + if (image->vk.image_type != VK_IMAGE_TYPE_3D) num_layers = region->imageSubresource.layerCount; else num_layers = region->imageExtent.depth; @@ -2860,19 +1415,21 @@ copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, return true; /* Handle copy to compressed format using a compatible format */ - const uint32_t block_w = vk_format_get_blockwidth(image->vk_format); - const uint32_t block_h = vk_format_get_blockheight(image->vk_format); + const uint32_t block_w = vk_format_get_blockwidth(image->vk.format); + const uint32_t block_h = vk_format_get_blockheight(image->vk.format); const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w); const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h); - v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, false); + v3dv_job_start_frame(job, width, height, num_layers, false, + 1, internal_bpp, false); - struct framebuffer_data framebuffer; - setup_framebuffer_data(&framebuffer, fb_format, internal_type, - &job->frame_tiling); + struct v3dv_meta_framebuffer framebuffer; + v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format, + internal_type, &job->frame_tiling); - v3dv_job_emit_binning_flush(job); - emit_copy_buffer_to_image_rcl(job, image, buffer, &framebuffer, region); + v3dv_X(job->device, job_emit_binning_flush)(job); + v3dv_X(job->device, meta_emit_copy_buffer_to_image_rcl) + (job, image, buffer, &framebuffer, region); v3dv_cmd_buffer_finish_job(cmd_buffer); @@ -2883,7 +1440,7 @@ static bool create_tiled_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_image *image, struct v3dv_buffer *buffer, - const VkBufferImageCopy *region) + const VkBufferImageCopy2KHR *region) { if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, region)) return true; @@ -2980,6 +1537,7 @@ static void get_texel_buffer_copy_pipeline_cache_key(VkFormat format, VkColorComponentFlags cmask, VkComponentMapping *cswizzle, + bool is_layered, uint8_t *key) { memset(key, 0, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE); @@ -2992,6 +1550,12 @@ get_texel_buffer_copy_pipeline_cache_key(VkFormat format, *p = cmask; p++; + /* Note that that we are using a single byte for this, so we could pack + * more data into this 32-bit slot in the future. + */ + *p = is_layered ? 1 : 0; + p++; + memcpy(p, cswizzle, sizeof(VkComponentMapping)); p += sizeof(VkComponentMapping) / sizeof(uint32_t); @@ -3011,6 +1575,7 @@ static bool create_pipeline(struct v3dv_device *device, struct v3dv_render_pass *pass, struct nir_shader *vs_nir, + struct nir_shader *gs_nir, struct nir_shader *fs_nir, const VkPipelineVertexInputStateCreateInfo *vi_state, const VkPipelineDepthStencilStateCreateInfo *ds_state, @@ -3036,6 +1601,71 @@ get_texel_buffer_copy_vs() return b.shader; } +static nir_shader * +get_texel_buffer_copy_gs() +{ + /* FIXME: this creates a geometry shader that takes the index of a single + * layer to clear from push constants, so we need to emit a draw call for + * each layer that we want to clear. We could actually do better and have it + * take a range of layers however, if we were to do this, we would need to + * be careful not to exceed the maximum number of output vertices allowed in + * a geometry shader. + */ + const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options(); + nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options, + "meta texel buffer copy gs"); + nir_shader *nir = b.shader; + nir->info.inputs_read = 1ull << VARYING_SLOT_POS; + nir->info.outputs_written = (1ull << VARYING_SLOT_POS) | + (1ull << VARYING_SLOT_LAYER); + nir->info.gs.input_primitive = GL_TRIANGLES; + nir->info.gs.output_primitive = GL_TRIANGLE_STRIP; + nir->info.gs.vertices_in = 3; + nir->info.gs.vertices_out = 3; + nir->info.gs.invocations = 1; + nir->info.gs.active_stream_mask = 0x1; + + /* in vec4 gl_Position[3] */ + nir_variable *gs_in_pos = + nir_variable_create(b.shader, nir_var_shader_in, + glsl_array_type(glsl_vec4_type(), 3, 0), + "in_gl_Position"); + gs_in_pos->data.location = VARYING_SLOT_POS; + + /* out vec4 gl_Position */ + nir_variable *gs_out_pos = + nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(), + "out_gl_Position"); + gs_out_pos->data.location = VARYING_SLOT_POS; + + /* out float gl_Layer */ + nir_variable *gs_out_layer = + nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(), + "out_gl_Layer"); + gs_out_layer->data.location = VARYING_SLOT_LAYER; + + /* Emit output triangle */ + for (uint32_t i = 0; i < 3; i++) { + /* gl_Position from shader input */ + nir_deref_instr *in_pos_i = + nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i); + nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i); + + /* gl_Layer from push constants */ + nir_ssa_def *layer = + nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), + .base = TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET, + .range = 4); + nir_store_var(&b, gs_out_layer, layer, 0x1); + + nir_emit_vertex(&b, 0); + } + + nir_end_primitive(&b, 0); + + return nir; +} + static nir_ssa_def * load_frag_coord(nir_builder *b) { @@ -3101,15 +1731,21 @@ get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format, * texel buffer. */ nir_ssa_def *box = - nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16); + nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), + .base = TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET, + .range = 16); /* Load the buffer stride (this comes in texel units) */ nir_ssa_def *stride = - nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4); + nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), + .base = TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET, + .range = 4); /* Load the buffer offset (this comes in texel units) */ nir_ssa_def *offset = - nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 20, .range = 4); + nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), + .base = TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET, + .range = 4); nir_ssa_def *coord = nir_f2i32(&b, load_frag_coord(&b)); @@ -3165,6 +1801,7 @@ create_texel_buffer_copy_pipeline(struct v3dv_device *device, VkFormat format, VkColorComponentFlags cmask, VkComponentMapping *cswizzle, + bool is_layered, VkRenderPass _pass, VkPipelineLayout pipeline_layout, VkPipeline *pipeline) @@ -3175,6 +1812,7 @@ create_texel_buffer_copy_pipeline(struct v3dv_device *device, nir_shader *vs_nir = get_texel_buffer_copy_vs(); nir_shader *fs_nir = get_texel_buffer_copy_fs(device, format, cswizzle); + nir_shader *gs_nir = is_layered ? get_texel_buffer_copy_gs() : NULL; const VkPipelineVertexInputStateCreateInfo vi_state = { .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, @@ -3210,7 +1848,7 @@ create_texel_buffer_copy_pipeline(struct v3dv_device *device, return create_pipeline(device, pass, - vs_nir, fs_nir, + vs_nir, gs_nir, fs_nir, &vi_state, &ds_state, &cb_state, @@ -3226,12 +1864,14 @@ get_copy_texel_buffer_pipeline( VkColorComponentFlags cmask, VkComponentMapping *cswizzle, VkImageType image_type, + bool is_layered, struct v3dv_meta_texel_buffer_copy_pipeline **pipeline) { bool ok = true; uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE]; - get_texel_buffer_copy_pipeline_cache_key(format, cmask, cswizzle, key); + get_texel_buffer_copy_pipeline_cache_key(format, cmask, cswizzle, is_layered, + key); mtx_lock(&device->meta.mtx); struct hash_entry *entry = @@ -3257,7 +1897,8 @@ get_copy_texel_buffer_pipeline( goto fail; ok = - create_texel_buffer_copy_pipeline(device, format, cmask, cswizzle, + create_texel_buffer_copy_pipeline(device, + format, cmask, cswizzle, is_layered, (*pipeline)->pass, device->meta.texel_buffer_copy.p_layout, &(*pipeline)->pipeline); @@ -3297,7 +1938,7 @@ texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer, VkColorComponentFlags cmask, VkComponentMapping *cswizzle, uint32_t region_count, - const VkBufferImageCopy *regions) + const VkBufferImageCopy2KHR *regions) { VkResult result; bool handled = false; @@ -3320,7 +1961,7 @@ texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer, return handled; /* FIXME: we only handle uncompressed images for now. */ - if (vk_format_is_compressed(image->vk_format)) + if (vk_format_is_compressed(image->vk.format)) return handled; const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT | @@ -3336,7 +1977,8 @@ texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer, */ if (!(buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT)) { if (v3dv_buffer_format_supports_features( - src_format, VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT)) { + cmd_buffer->device, src_format, + VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT)) { buffer->usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT; } else { return handled; @@ -3348,11 +1990,29 @@ texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer, */ handled = true; + + /* Compute the number of layers to copy. + * + * If we are batching (region_count > 1) all our regions have the same + * image subresource so we can take this from the first region. For 3D + * images we require the same depth extent. + */ + const VkImageSubresourceLayers *resource = ®ions[0].imageSubresource; + uint32_t num_layers; + if (image->vk.image_type != VK_IMAGE_TYPE_3D) { + num_layers = resource->layerCount; + } else { + assert(region_count == 1); + num_layers = regions[0].imageExtent.depth; + } + assert(num_layers > 0); + /* Get the texel buffer copy pipeline */ struct v3dv_meta_texel_buffer_copy_pipeline *pipeline = NULL; bool ok = get_copy_texel_buffer_pipeline(cmd_buffer->device, dst_format, cmask, cswizzle, - image->type, &pipeline); + image->vk.image_type, num_layers > 1, + &pipeline); if (!ok) return handled; assert(pipeline && pipeline->pipeline && pipeline->pass); @@ -3422,78 +2082,58 @@ texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer, 0, 1, &set, 0, NULL); - /* Compute the number of layers to copy. + /* Setup framebuffer. * - * If we are batching (region_count > 1) all our regions have the same - * image subresource so we can take this from the first region. + * For 3D images, this creates a layered framebuffer with a number of + * layers matching the depth extent of the 3D image. */ - const VkImageSubresourceLayers *resource = ®ions[0].imageSubresource; - uint32_t num_layers; - if (image->type != VK_IMAGE_TYPE_3D) { - num_layers = resource->layerCount; - } else { - assert(region_count == 1); - num_layers = regions[0].imageExtent.depth; - } - assert(num_layers > 0); - - /* Sanity check: we can only batch multiple regions together if they have - * the same framebuffer (so the same layer). - */ - assert(num_layers == 1 || region_count == 1); - - /* For each layer */ - for (uint32_t l = 0; l < num_layers; l++) { - /* Setup framebuffer for this layer. - * - * FIXME: once we support geometry shaders, we should be able to have - * one layered framebuffer and emit just one draw call for - * all layers using layered rendering. At that point, we should - * also be able to batch multi-layered regions as well. - */ - VkImageViewCreateInfo image_view_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = v3dv_image_to_handle(image), - .viewType = v3dv_image_type_to_view_type(image->type), - .format = dst_format, - .subresourceRange = { - .aspectMask = aspect, - .baseMipLevel = resource->mipLevel, - .levelCount = 1, - .baseArrayLayer = resource->baseArrayLayer + l, - .layerCount = 1 - }, - }; - VkImageView image_view; - result = v3dv_CreateImageView(_device, &image_view_info, - &cmd_buffer->device->vk.alloc, &image_view); - if (result != VK_SUCCESS) - goto fail; + uint32_t fb_width = u_minify(image->vk.extent.width, resource->mipLevel); + uint32_t fb_height = u_minify(image->vk.extent.height, resource->mipLevel); + VkImageViewCreateInfo image_view_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = v3dv_image_to_handle(image), + .viewType = v3dv_image_type_to_view_type(image->vk.image_type), + .format = dst_format, + .subresourceRange = { + .aspectMask = aspect, + .baseMipLevel = resource->mipLevel, + .levelCount = 1, + .baseArrayLayer = resource->baseArrayLayer, + .layerCount = num_layers, + }, + }; + VkImageView image_view; + result = v3dv_CreateImageView(_device, &image_view_info, + &cmd_buffer->device->vk.alloc, &image_view); + if (result != VK_SUCCESS) + goto fail; - v3dv_cmd_buffer_add_private_obj( - cmd_buffer, (uintptr_t)image_view, - (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView); + v3dv_cmd_buffer_add_private_obj( + cmd_buffer, (uintptr_t)image_view, + (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView); - VkFramebufferCreateInfo fb_info = { - .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - .renderPass = pipeline->pass, - .attachmentCount = 1, - .pAttachments = &image_view, - .width = u_minify(image->extent.width, resource->mipLevel), - .height = u_minify(image->extent.height, resource->mipLevel), - .layers = 1, - }; + VkFramebufferCreateInfo fb_info = { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .renderPass = pipeline->pass, + .attachmentCount = 1, + .pAttachments = &image_view, + .width = fb_width, + .height = fb_height, + .layers = num_layers, + }; - VkFramebuffer fb; - result = v3dv_CreateFramebuffer(_device, &fb_info, - &cmd_buffer->device->vk.alloc, &fb); - if (result != VK_SUCCESS) - goto fail; + VkFramebuffer fb; + result = v3dv_CreateFramebuffer(_device, &fb_info, + &cmd_buffer->device->vk.alloc, &fb); + if (result != VK_SUCCESS) + goto fail; - v3dv_cmd_buffer_add_private_obj( - cmd_buffer, (uintptr_t)fb, - (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer); + v3dv_cmd_buffer_add_private_obj( + cmd_buffer, (uintptr_t)fb, + (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer); + /* For each layer */ + for (uint32_t l = 0; l < num_layers; l++) { /* Start render pass for this layer. * * If the we only have one region to copy, then we might be able to @@ -3513,15 +2153,15 @@ texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer, v3dv_render_pass_from_handle(pipeline->pass); can_skip_tlb_load = cmask == full_cmask && - v3dv_subpass_area_is_tile_aligned(&render_area, + v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area, v3dv_framebuffer_from_handle(fb), pipeline_pass, 0); } } else { render_area.offset.x = 0; render_area.offset.y = 0; - render_area.extent.width = fb_info.width; - render_area.extent.height = fb_info.height; + render_area.extent.width = fb_width; + render_area.extent.height = fb_height; } VkRenderPassBeginInfo rp_info = { @@ -3538,10 +2178,21 @@ texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer, if (!job) goto fail; + /* If we are using a layered copy we need to specify the layer for the + * Geometry Shader. + */ + if (num_layers > 1) { + uint32_t layer = resource->baseArrayLayer + l; + v3dv_CmdPushConstants(_cmd_buffer, + cmd_buffer->device->meta.texel_buffer_copy.p_layout, + VK_SHADER_STAGE_GEOMETRY_BIT, + 24, 4, &layer); + } + /* For each region */ dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR; for (uint32_t r = 0; r < region_count; r++) { - const VkBufferImageCopy *region = ®ions[r]; + const VkBufferImageCopy2KHR *region = ®ions[r]; /* Obtain the 2D buffer region spec */ uint32_t buf_width, buf_height; @@ -3612,7 +2263,7 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer, VkColorComponentFlags cmask, VkComponentMapping *cswizzle, uint32_t region_count, - const VkBufferImageCopy *regions) + const VkBufferImageCopy2KHR *regions) { /* Since we can't sample linear images we need to upload the linear * buffer to a tiled image that we can use as a blit source, which @@ -3636,7 +2287,7 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer, .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = VK_IMAGE_TYPE_2D, .format = src_format, - .extent = { image->extent.width, image->extent.height, 1 }, + .extent = { image->vk.extent.width, image->vk.extent.height, 1 }, .mipLevels = 1, .arrayLayers = 1, .samples = VK_SAMPLE_COUNT_1_BIT, @@ -3653,7 +2304,7 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer, return handled; VkMemoryRequirements reqs; - v3dv_GetImageMemoryRequirements(_device, dummy_image, &reqs); + vk_common_GetImageMemoryRequirements(_device, dummy_image, &reqs); v3dv_DestroyImage(_device, dummy_image, &device->vk.alloc); VkDeviceMemory mem; @@ -3676,7 +2327,7 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer, * image subresource so we can take this from the first region. */ uint32_t num_layers; - if (image->type != VK_IMAGE_TYPE_3D) + if (image->vk.image_type != VK_IMAGE_TYPE_3D) num_layers = regions[0].imageSubresource.layerCount; else num_layers = regions[0].imageExtent.depth; @@ -3687,14 +2338,14 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer, */ assert(num_layers == 1 || region_count == 1); - const uint32_t block_width = vk_format_get_blockwidth(image->vk_format); - const uint32_t block_height = vk_format_get_blockheight(image->vk_format); + const uint32_t block_width = vk_format_get_blockwidth(image->vk.format); + const uint32_t block_height = vk_format_get_blockheight(image->vk.format); /* Copy regions by uploading each region to a temporary tiled image using * the memory we have just allocated as storage. */ for (uint32_t r = 0; r < region_count; r++) { - const VkBufferImageCopy *region = ®ions[r]; + const VkBufferImageCopy2KHR *region = ®ions[r]; /* Obtain the 2D buffer region spec */ uint32_t buf_width, buf_height; @@ -3741,14 +2392,15 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer, cmd_buffer, (uintptr_t)buffer_image, (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage); - result = v3dv_BindImageMemory(_device, buffer_image, mem, 0); + result = vk_common_BindImageMemory(_device, buffer_image, mem, 0); if (result != VK_SUCCESS) return handled; /* Upload buffer contents for the selected layer */ const VkDeviceSize buf_offset_bytes = region->bufferOffset + i * buf_height * buf_width * buffer_bpp; - const VkBufferImageCopy buffer_image_copy = { + const VkBufferImageCopy2KHR buffer_image_copy = { + .sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2_KHR, .bufferOffset = buf_offset_bytes, .bufferRowLength = region->bufferRowLength / block_width, .bufferImageHeight = region->bufferImageHeight / block_height, @@ -3782,7 +2434,8 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer, * image, but that we need to blit to a S8D24 destination (the only * stencil format we support). */ - const VkImageBlit blit_region = { + const VkImageBlit2KHR blit_region = { + .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR, .srcSubresource = { .aspectMask = aspect, .mipLevel = 0, @@ -3840,7 +2493,7 @@ copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_image *image, struct v3dv_buffer *buffer, uint32_t region_count, - const VkBufferImageCopy *regions, + const VkBufferImageCopy2KHR *regions, bool use_texel_buffer) { /* We can only call this with region_count > 1 if we can batch the regions @@ -3890,9 +2543,9 @@ copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer, dst_format = src_format; break; case VK_IMAGE_ASPECT_DEPTH_BIT: - assert(image->vk_format == VK_FORMAT_D32_SFLOAT || - image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT || - image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32); + assert(image->vk.format == VK_FORMAT_D32_SFLOAT || + image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT || + image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32); src_format = VK_FORMAT_R8G8B8A8_UINT; dst_format = src_format; aspect = VK_IMAGE_ASPECT_COLOR_BIT; @@ -3901,8 +2554,8 @@ copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer, * in the buffer is stored in the 24-LSB, but V3D wants it in the * 24-MSB. */ - if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT || - image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) { + if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT || + image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) { cmask = VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; @@ -3920,7 +2573,7 @@ copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer, * blit to an RGBA8UI destination masking out writes to components * GBA (which map to the D24 component of a S8D24 image). */ - assert(image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT); + assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT); buf_bpp = 1; src_format = VK_FORMAT_R8_UINT; dst_format = VK_FORMAT_R8G8B8A8_UINT; @@ -3970,16 +2623,16 @@ static bool copy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_image *image, struct v3dv_buffer *buffer, - const VkBufferImageCopy *region) + const VkBufferImageCopy2KHR *region) { /* FIXME */ - if (vk_format_is_depth_or_stencil(image->vk_format)) + if (vk_format_is_depth_or_stencil(image->vk.format)) return false; - if (vk_format_is_compressed(image->vk_format)) + if (vk_format_is_compressed(image->vk.format)) return false; - if (image->tiling == VK_IMAGE_TILING_LINEAR) + if (image->vk.tiling == VK_IMAGE_TILING_LINEAR) return false; uint32_t buffer_width, buffer_height; @@ -3997,7 +2650,7 @@ copy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer, uint32_t buffer_layer_stride = buffer_stride * buffer_height; uint32_t num_layers; - if (image->type != VK_IMAGE_TYPE_3D) + if (image->vk.image_type != VK_IMAGE_TYPE_3D) num_layers = region->imageSubresource.layerCount; else num_layers = region->imageExtent.depth; @@ -4028,50 +2681,55 @@ copy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer, return true; } -void -v3dv_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, - VkBuffer srcBuffer, - VkImage dstImage, - VkImageLayout dstImageLayout, - uint32_t regionCount, - const VkBufferImageCopy *pRegions) +VKAPI_ATTR void VKAPI_CALL +v3dv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2KHR *info) { V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); - V3DV_FROM_HANDLE(v3dv_buffer, buffer, srcBuffer); - V3DV_FROM_HANDLE(v3dv_image, image, dstImage); + V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->srcBuffer); + V3DV_FROM_HANDLE(v3dv_image, image, info->dstImage); - assert(image->samples == VK_SAMPLE_COUNT_1_BIT); + assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT); uint32_t r = 0; - while (r < regionCount) { + while (r < info->regionCount) { /* The TFU and TLB paths can only copy one region at a time and the region * needs to start at the origin. We try these first for the common case * where we are copying full images, since they should be the fastest. */ uint32_t batch_size = 1; - if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &pRegions[r])) + if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &info->pRegions[r])) goto handled; - if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &pRegions[r])) + if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &info->pRegions[r])) goto handled; /* Otherwise, we are copying subrects, so we fallback to copying * via shader and texel buffers and we try to batch the regions - * if possible. We can only batch copies if they target the same - * image subresource (so they have the same framebuffer spec). + * if possible. We can only batch copies if they have the same + * framebuffer spec, which is mostly determined by the image + * subresource of the region. */ - const VkImageSubresourceLayers *rsc = &pRegions[r].imageSubresource; - if (image->type != VK_IMAGE_TYPE_3D) { - for (uint32_t s = r + 1; s < regionCount; s++) { - const VkImageSubresourceLayers *rsc_s = &pRegions[s].imageSubresource; - if (memcmp(rsc, rsc_s, sizeof(VkImageSubresourceLayers)) != 0) + const VkImageSubresourceLayers *rsc = &info->pRegions[r].imageSubresource; + for (uint32_t s = r + 1; s < info->regionCount; s++) { + const VkImageSubresourceLayers *rsc_s = + &info->pRegions[s].imageSubresource; + + if (memcmp(rsc, rsc_s, sizeof(VkImageSubresourceLayers)) != 0) + break; + + /* For 3D images we also need to check the depth extent */ + if (image->vk.image_type == VK_IMAGE_TYPE_3D && + info->pRegions[s].imageExtent.depth != + info->pRegions[r].imageExtent.depth) { break; - batch_size++; } + + batch_size++; } if (copy_buffer_to_image_shader(cmd_buffer, image, buffer, - batch_size, &pRegions[r], true)) { + batch_size, &info->pRegions[r], true)) { goto handled; } @@ -4081,13 +2739,14 @@ v3dv_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, * slow it might not be worth it and we should instead put more effort * in handling more cases with the other paths. */ - if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer, &pRegions[r])) { + if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer, + &info->pRegions[r])) { batch_size = 1; goto handled; } if (copy_buffer_to_image_shader(cmd_buffer, image, buffer, - batch_size, &pRegions[r], false)) { + batch_size, &info->pRegions[r], false)) { goto handled; } @@ -4114,17 +2773,17 @@ static bool blit_tfu(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_image *dst, struct v3dv_image *src, - const VkImageBlit *region) + const VkImageBlit2KHR *region) { - assert(dst->samples == VK_SAMPLE_COUNT_1_BIT); - assert(src->samples == VK_SAMPLE_COUNT_1_BIT); + assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT); + assert(src->vk.samples == VK_SAMPLE_COUNT_1_BIT); /* Format must match */ - if (src->vk_format != dst->vk_format) + if (src->vk.format != dst->vk.format) return false; /* Destination can't be raster format */ - if (dst->tiling == VK_IMAGE_TILING_LINEAR) + if (dst->vk.tiling == VK_IMAGE_TILING_LINEAR) return false; /* Source region must start at (0,0) */ @@ -4136,8 +2795,8 @@ blit_tfu(struct v3dv_cmd_buffer *cmd_buffer, return false; const uint32_t dst_mip_level = region->dstSubresource.mipLevel; - const uint32_t dst_width = u_minify(dst->extent.width, dst_mip_level); - const uint32_t dst_height = u_minify(dst->extent.height, dst_mip_level); + const uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level); + const uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level); if (region->dstOffsets[1].x < dst_width - 1|| region->dstOffsets[1].y < dst_height - 1) { return false; @@ -4152,7 +2811,7 @@ blit_tfu(struct v3dv_cmd_buffer *cmd_buffer, /* If the format is D24S8 both aspects need to be copied, since the TFU * can't be programmed to copy only one aspect of the image. */ - if (dst->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { + if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) { const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; if (region->dstSubresource.aspectMask != ds_aspects) @@ -4165,7 +2824,7 @@ blit_tfu(struct v3dv_cmd_buffer *cmd_buffer, * compatible based on its texel size. */ const struct v3dv_format *format = - v3dv_get_compatible_tfu_format(&cmd_buffer->device->devinfo, + v3dv_get_compatible_tfu_format(cmd_buffer->device, dst->cpp, NULL); /* Emit a TFU job for each layer to blit */ @@ -4175,7 +2834,7 @@ blit_tfu(struct v3dv_cmd_buffer *cmd_buffer, uint32_t min_dst_layer; uint32_t max_dst_layer; bool dst_mirror_z = false; - if (dst->type == VK_IMAGE_TYPE_3D) { + if (dst->vk.image_type == VK_IMAGE_TYPE_3D) { compute_blit_3d_layers(region->dstOffsets, &min_dst_layer, &max_dst_layer, &dst_mirror_z); @@ -4187,7 +2846,7 @@ blit_tfu(struct v3dv_cmd_buffer *cmd_buffer, uint32_t min_src_layer; uint32_t max_src_layer; bool src_mirror_z = false; - if (src->type == VK_IMAGE_TYPE_3D) { + if (src->vk.image_type == VK_IMAGE_TYPE_3D) { compute_blit_3d_layers(region->srcOffsets, &min_src_layer, &max_src_layer, &src_mirror_z); @@ -4212,10 +2871,10 @@ blit_tfu(struct v3dv_cmd_buffer *cmd_buffer, dst_mirror_z ? max_dst_layer - i - 1: min_dst_layer + i; const uint32_t src_layer = src_mirror_z ? max_src_layer - i - 1: min_src_layer + i; - emit_tfu_job(cmd_buffer, - dst, dst_mip_level, dst_layer, - src, src_mip_level, src_layer, - dst_width, dst_height, format); + v3dv_X(cmd_buffer->device, meta_emit_tfu_job) + (cmd_buffer, dst, dst_mip_level, dst_layer, + src, src_mip_level, src_layer, + dst_width, dst_height, format); } return true; @@ -4657,6 +3316,7 @@ get_color_blit_fs(struct v3dv_device *device, if (dst_bit_size >= src_bit_size) continue; + assert(dst_bit_size > 0); if (util_format_is_pure_uint(dst_pformat)) { nir_ssa_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1); c[i] = nir_umin(&b, c[i], max); @@ -4679,6 +3339,7 @@ static bool create_pipeline(struct v3dv_device *device, struct v3dv_render_pass *pass, struct nir_shader *vs_nir, + struct nir_shader *gs_nir, struct nir_shader *fs_nir, const VkPipelineVertexInputStateCreateInfo *vi_state, const VkPipelineDepthStencilStateCreateInfo *ds_state, @@ -4688,12 +3349,15 @@ create_pipeline(struct v3dv_device *device, VkPipeline *pipeline) { struct vk_shader_module vs_m; + struct vk_shader_module gs_m; struct vk_shader_module fs_m; + uint32_t num_stages = gs_nir ? 3 : 2; + v3dv_shader_module_internal_init(device, &vs_m, vs_nir); v3dv_shader_module_internal_init(device, &fs_m, fs_nir); - VkPipelineShaderStageCreateInfo stages[2] = { + VkPipelineShaderStageCreateInfo stages[3] = { { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_VERTEX_BIT, @@ -4706,12 +3370,23 @@ create_pipeline(struct v3dv_device *device, .module = vk_shader_module_to_handle(&fs_m), .pName = "main", }, + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_GEOMETRY_BIT, + .module = VK_NULL_HANDLE, + .pName = "main", + }, }; + if (gs_nir) { + v3dv_shader_module_internal_init(device, &gs_m, gs_nir); + stages[2].module = vk_shader_module_to_handle(&gs_m); + } + VkGraphicsPipelineCreateInfo info = { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .stageCount = 2, + .stageCount = num_stages, .pStages = stages, .pVertexInputState = vi_state, @@ -4863,7 +3538,7 @@ create_blit_pipeline(struct v3dv_device *device, return create_pipeline(device, pass, - vs_nir, fs_nir, + vs_nir, NULL, fs_nir, &vi_state, &ds_state, &cb_state, @@ -5096,7 +3771,7 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, VkFormat src_format, VkColorComponentFlags cmask, VkComponentMapping *cswizzle, - const VkImageBlit *_region, + const VkImageBlit2KHR *_region, VkFilter filter, bool dst_is_padded_image) { @@ -5107,14 +3782,14 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, /* We don't support rendering to linear depth/stencil, this should have * been rewritten to a compatible color blit by the caller. */ - assert(dst->tiling != VK_IMAGE_TILING_LINEAR || + assert(dst->vk.tiling != VK_IMAGE_TILING_LINEAR || !vk_format_is_depth_or_stencil(dst_format)); /* Can't sample from linear images */ - if (src->tiling == VK_IMAGE_TILING_LINEAR && src->type != VK_IMAGE_TYPE_1D) + if (src->vk.tiling == VK_IMAGE_TILING_LINEAR && src->vk.image_type != VK_IMAGE_TYPE_1D) return false; - VkImageBlit region = *_region; + VkImageBlit2KHR region = *_region; /* Rewrite combined D/S blits to compatible color blits */ if (vk_format_is_depth_or_stencil(dst_format)) { assert(src_format == dst_format); @@ -5169,23 +3844,23 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, * need to apply those same semantics here when we compute the size of the * destination image level. */ - const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk_format); - const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk_format); - const uint32_t src_block_w = vk_format_get_blockwidth(src->vk_format); - const uint32_t src_block_h = vk_format_get_blockheight(src->vk_format); + const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk.format); + const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk.format); + const uint32_t src_block_w = vk_format_get_blockwidth(src->vk.format); + const uint32_t src_block_h = vk_format_get_blockheight(src->vk.format); const uint32_t dst_level_w = - u_minify(DIV_ROUND_UP(dst->extent.width * src_block_w, dst_block_w), + u_minify(DIV_ROUND_UP(dst->vk.extent.width * src_block_w, dst_block_w), region.dstSubresource.mipLevel); const uint32_t dst_level_h = - u_minify(DIV_ROUND_UP(dst->extent.height * src_block_h, dst_block_h), + u_minify(DIV_ROUND_UP(dst->vk.extent.height * src_block_h, dst_block_h), region.dstSubresource.mipLevel); const uint32_t src_level_w = - u_minify(src->extent.width, region.srcSubresource.mipLevel); + u_minify(src->vk.extent.width, region.srcSubresource.mipLevel); const uint32_t src_level_h = - u_minify(src->extent.height, region.srcSubresource.mipLevel); + u_minify(src->vk.extent.height, region.srcSubresource.mipLevel); const uint32_t src_level_d = - u_minify(src->extent.depth, region.srcSubresource.mipLevel); + u_minify(src->vk.extent.depth, region.srcSubresource.mipLevel); uint32_t dst_x, dst_y, dst_w, dst_h; bool dst_mirror_x, dst_mirror_y; @@ -5204,7 +3879,7 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, uint32_t min_dst_layer; uint32_t max_dst_layer; bool dst_mirror_z = false; - if (dst->type != VK_IMAGE_TYPE_3D) { + if (dst->vk.image_type != VK_IMAGE_TYPE_3D) { min_dst_layer = region.dstSubresource.baseArrayLayer; max_dst_layer = min_dst_layer + region.dstSubresource.layerCount; } else { @@ -5216,7 +3891,7 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, uint32_t min_src_layer; uint32_t max_src_layer; bool src_mirror_z = false; - if (src->type != VK_IMAGE_TYPE_3D) { + if (src->vk.image_type != VK_IMAGE_TYPE_3D) { min_src_layer = region.srcSubresource.baseArrayLayer; max_src_layer = min_src_layer + region.srcSubresource.layerCount; } else { @@ -5238,7 +3913,7 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, (float)(src_y + src_h), }; - if (src->samples == VK_SAMPLE_COUNT_1_BIT) { + if (src->vk.samples == VK_SAMPLE_COUNT_1_BIT) { coords[0] /= (float)src_level_w; coords[1] /= (float)src_level_h; coords[2] /= (float)src_level_w; @@ -5270,8 +3945,8 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, /* Get the blit pipeline */ struct v3dv_meta_blit_pipeline *pipeline = NULL; bool ok = get_blit_pipeline(cmd_buffer->device, - dst_format, src_format, cmask, src->type, - dst->samples, src->samples, + dst_format, src_format, cmask, src->vk.image_type, + dst->vk.samples, src->vk.samples, &pipeline); if (!ok) return handled; @@ -5341,7 +4016,7 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, VkImageViewCreateInfo dst_image_view_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = v3dv_image_to_handle(dst), - .viewType = v3dv_image_type_to_view_type(dst->type), + .viewType = v3dv_image_type_to_view_type(dst->vk.image_type), .format = dst_format, .subresourceRange = { .aspectMask = aspects, @@ -5399,7 +4074,7 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, VkImageViewCreateInfo src_image_view_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = v3dv_image_to_handle(src), - .viewType = v3dv_image_type_to_view_type(src->type), + .viewType = v3dv_image_type_to_view_type(src->vk.image_type), .format = src_format, .components = *cswizzle, .subresourceRange = { @@ -5407,7 +4082,7 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, .baseMipLevel = region.srcSubresource.mipLevel, .levelCount = 1, .baseArrayLayer = - src->type == VK_IMAGE_TYPE_3D ? 0 : min_src_layer + i, + src->vk.image_type == VK_IMAGE_TYPE_3D ? 0 : min_src_layer + i, .layerCount = 1 }, }; @@ -5457,8 +4132,8 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, v3dv_render_pass_from_handle(pipeline->pass); can_skip_tlb_load = cmask == full_cmask && - v3dv_subpass_area_is_tile_aligned(&render_area, framebuffer, - pipeline_pass, 0); + v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area, + framebuffer, pipeline_pass, 0); } /* Record blit */ @@ -5481,7 +4156,7 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer, * based on the ratio of the depth of the source and the destination * images, picking the coordinate in the middle of each step. */ - if (src->type == VK_IMAGE_TYPE_3D) { + if (src->vk.image_type == VK_IMAGE_TYPE_3D) { tex_coords[4] = !mirror_z ? (min_src_layer + (i + 0.5f) * src_z_step) / (float)src_level_d : @@ -5505,150 +4180,58 @@ fail: return handled; } -void -v3dv_CmdBlitImage(VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage dstImage, - VkImageLayout dstImageLayout, - uint32_t regionCount, - const VkImageBlit* pRegions, - VkFilter filter) +VKAPI_ATTR void VKAPI_CALL +v3dv_CmdBlitImage2KHR(VkCommandBuffer commandBuffer, + const VkBlitImageInfo2KHR *pBlitImageInfo) { V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); - V3DV_FROM_HANDLE(v3dv_image, src, srcImage); - V3DV_FROM_HANDLE(v3dv_image, dst, dstImage); + V3DV_FROM_HANDLE(v3dv_image, src, pBlitImageInfo->srcImage); + V3DV_FROM_HANDLE(v3dv_image, dst, pBlitImageInfo->dstImage); /* This command can only happen outside a render pass */ assert(cmd_buffer->state.pass == NULL); assert(cmd_buffer->state.job == NULL); /* From the Vulkan 1.0 spec, vkCmdBlitImage valid usage */ - assert(dst->samples == VK_SAMPLE_COUNT_1_BIT && - src->samples == VK_SAMPLE_COUNT_1_BIT); + assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT && + src->vk.samples == VK_SAMPLE_COUNT_1_BIT); /* We don't export VK_FORMAT_FEATURE_BLIT_DST_BIT on compressed formats */ - assert(!vk_format_is_compressed(dst->vk_format)); + assert(!vk_format_is_compressed(dst->vk.format)); - for (uint32_t i = 0; i < regionCount; i++) { - if (blit_tfu(cmd_buffer, dst, src, &pRegions[i])) + for (uint32_t i = 0; i < pBlitImageInfo->regionCount; i++) { + if (blit_tfu(cmd_buffer, dst, src, &pBlitImageInfo->pRegions[i])) continue; if (blit_shader(cmd_buffer, - dst, dst->vk_format, - src, src->vk_format, + dst, dst->vk.format, + src, src->vk.format, 0, NULL, - &pRegions[i], filter, true)) { + &pBlitImageInfo->pRegions[i], + pBlitImageInfo->filter, true)) { continue; } unreachable("Unsupported blit operation"); } } -static void -emit_resolve_image_layer_per_tile_list(struct v3dv_job *job, - struct framebuffer_data *framebuffer, - struct v3dv_image *dst, - struct v3dv_image *src, - uint32_t layer_offset, - const VkImageResolve *region) -{ - struct v3dv_cl *cl = &job->indirect; - v3dv_cl_ensure_space(cl, 200, 1); - v3dv_return_if_oom(NULL, job); - - struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); - - cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); - - assert((src->type != VK_IMAGE_TYPE_3D && - layer_offset < region->srcSubresource.layerCount) || - layer_offset < src->extent.depth); - - const uint32_t src_layer = src->type != VK_IMAGE_TYPE_3D ? - region->srcSubresource.baseArrayLayer + layer_offset : - region->srcOffset.z + layer_offset; - - emit_image_load(cl, framebuffer, src, - region->srcSubresource.aspectMask, - src_layer, - region->srcSubresource.mipLevel, - false, false); - - cl_emit(cl, END_OF_LOADS, end); - - cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); - - assert((dst->type != VK_IMAGE_TYPE_3D && - layer_offset < region->dstSubresource.layerCount) || - layer_offset < dst->extent.depth); - - const uint32_t dst_layer = dst->type != VK_IMAGE_TYPE_3D ? - region->dstSubresource.baseArrayLayer + layer_offset : - region->dstOffset.z + layer_offset; - - emit_image_store(cl, framebuffer, dst, - region->dstSubresource.aspectMask, - dst_layer, - region->dstSubresource.mipLevel, - false, false, true); - - cl_emit(cl, END_OF_TILE_MARKER, end); - - cl_emit(cl, RETURN_FROM_SUB_LIST, ret); - - cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { - branch.start = tile_list_start; - branch.end = v3dv_cl_get_address(cl); - } -} - -static void -emit_resolve_image_layer(struct v3dv_job *job, - struct v3dv_image *dst, - struct v3dv_image *src, - struct framebuffer_data *framebuffer, - uint32_t layer, - const VkImageResolve *region) -{ - emit_frame_setup(job, layer, NULL); - emit_resolve_image_layer_per_tile_list(job, framebuffer, - dst, src, layer, region); - emit_supertile_coordinates(job, framebuffer); -} - -static void -emit_resolve_image_rcl(struct v3dv_job *job, - struct v3dv_image *dst, - struct v3dv_image *src, - struct framebuffer_data *framebuffer, - const VkImageResolve *region) -{ - struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); - v3dv_return_if_oom(NULL, job); - - for (int layer = 0; layer < job->frame_tiling.layers; layer++) - emit_resolve_image_layer(job, dst, src, framebuffer, layer, region); - cl_emit(rcl, END_OF_RENDERING, end); -} - static bool resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_image *dst, struct v3dv_image *src, - const VkImageResolve *region) + const VkImageResolve2KHR *region) { - if (!can_use_tlb(src, ®ion->srcOffset, NULL) || - !can_use_tlb(dst, ®ion->dstOffset, NULL)) { + if (!v3dv_meta_can_use_tlb(src, ®ion->srcOffset, NULL) || + !v3dv_meta_can_use_tlb(dst, ®ion->dstOffset, NULL)) { return false; } - if (!v3dv_format_supports_tlb_resolve(src->format)) + if (!v3dv_X(cmd_buffer->device, format_supports_tlb_resolve)(src->format)) return false; - const VkFormat fb_format = src->vk_format; + const VkFormat fb_format = src->vk.format; uint32_t num_layers; - if (dst->type != VK_IMAGE_TYPE_3D) + if (dst->vk.image_type != VK_IMAGE_TYPE_3D) num_layers = region->dstSubresource.layerCount; else num_layers = region->extent.depth; @@ -5659,24 +4242,26 @@ resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer, if (!job) return true; - const uint32_t block_w = vk_format_get_blockwidth(dst->vk_format); - const uint32_t block_h = vk_format_get_blockheight(dst->vk_format); + const uint32_t block_w = vk_format_get_blockwidth(dst->vk.format); + const uint32_t block_h = vk_format_get_blockheight(dst->vk.format); const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w); const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h); uint32_t internal_type, internal_bpp; - get_internal_type_bpp_for_image_aspects(fb_format, - region->srcSubresource.aspectMask, - &internal_type, &internal_bpp); + v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects) + (fb_format, region->srcSubresource.aspectMask, + &internal_type, &internal_bpp); - v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, true); + v3dv_job_start_frame(job, width, height, num_layers, false, + 1, internal_bpp, true); - struct framebuffer_data framebuffer; - setup_framebuffer_data(&framebuffer, fb_format, internal_type, - &job->frame_tiling); + struct v3dv_meta_framebuffer framebuffer; + v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format, + internal_type, &job->frame_tiling); - v3dv_job_emit_binning_flush(job); - emit_resolve_image_rcl(job, dst, src, &framebuffer, region); + v3dv_X(job->device, job_emit_binning_flush)(job); + v3dv_X(job->device, meta_emit_resolve_image_rcl)(job, dst, src, + &framebuffer, region); v3dv_cmd_buffer_finish_job(cmd_buffer); return true; @@ -5686,9 +4271,10 @@ static bool resolve_image_blit(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_image *dst, struct v3dv_image *src, - const VkImageResolve *region) + const VkImageResolve2KHR *region) { - const VkImageBlit blit_region = { + const VkImageBlit2KHR blit_region = { + .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR, .srcSubresource = region->srcSubresource, .srcOffsets = { region->srcOffset, @@ -5707,36 +4293,32 @@ resolve_image_blit(struct v3dv_cmd_buffer *cmd_buffer, }, }; return blit_shader(cmd_buffer, - dst, dst->vk_format, - src, src->vk_format, + dst, dst->vk.format, + src, src->vk.format, 0, NULL, &blit_region, VK_FILTER_NEAREST, true); } -void -v3dv_CmdResolveImage(VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage dstImage, - VkImageLayout dstImageLayout, - uint32_t regionCount, - const VkImageResolve *pRegions) +VKAPI_ATTR void VKAPI_CALL +v3dv_CmdResolveImage2KHR(VkCommandBuffer commandBuffer, + const VkResolveImageInfo2KHR *info) + { V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); - V3DV_FROM_HANDLE(v3dv_image, src, srcImage); - V3DV_FROM_HANDLE(v3dv_image, dst, dstImage); + V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage); + V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage); /* This command can only happen outside a render pass */ assert(cmd_buffer->state.pass == NULL); assert(cmd_buffer->state.job == NULL); - assert(src->samples == VK_SAMPLE_COUNT_4_BIT); - assert(dst->samples == VK_SAMPLE_COUNT_1_BIT); + assert(src->vk.samples == VK_SAMPLE_COUNT_4_BIT); + assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT); - for (uint32_t i = 0; i < regionCount; i++) { - if (resolve_image_tlb(cmd_buffer, dst, src, &pRegions[i])) + for (uint32_t i = 0; i < info->regionCount; i++) { + if (resolve_image_tlb(cmd_buffer, dst, src, &info->pRegions[i])) continue; - if (resolve_image_blit(cmd_buffer, dst, src, &pRegions[i])) + if (resolve_image_blit(cmd_buffer, dst, src, &info->pRegions[i])) continue; unreachable("Unsupported multismaple resolve operation"); } diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_pass.c b/lib/mesa/src/broadcom/vulkan/v3dv_pass.c index 0f03dfe67..1b03c0d79 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_pass.c +++ b/lib/mesa/src/broadcom/vulkan/v3dv_pass.c @@ -22,7 +22,6 @@ */ #include "v3dv_private.h" -#include "vk_format_info.h" static uint32_t num_subpass_attachments(const VkSubpassDescription *desc) @@ -34,18 +33,26 @@ num_subpass_attachments(const VkSubpassDescription *desc) } static void -set_use_tlb_resolve(struct v3dv_render_pass_attachment *att) +set_use_tlb_resolve(struct v3dv_device *device, + struct v3dv_render_pass_attachment *att) { - const struct v3dv_format *format = v3dv_get_format(att->desc.format); - att->use_tlb_resolve = v3dv_format_supports_tlb_resolve(format); + const struct v3dv_format *format = v3dv_X(device, get_format)(att->desc.format); + att->use_tlb_resolve = v3dv_X(device, format_supports_tlb_resolve)(format); } static void -pass_find_subpass_range_for_attachments(struct v3dv_render_pass *pass) +pass_find_subpass_range_for_attachments(struct v3dv_device *device, + struct v3dv_render_pass *pass) { for (uint32_t i = 0; i < pass->attachment_count; i++) { pass->attachments[i].first_subpass = pass->subpass_count - 1; pass->attachments[i].last_subpass = 0; + if (pass->multiview_enabled) { + for (uint32_t j = 0; j < MAX_MULTIVIEW_VIEW_COUNT; j++) { + pass->attachments[i].views[j].first_subpass = pass->subpass_count - 1; + pass->attachments[i].views[j].last_subpass = 0; + } + } } for (uint32_t i = 0; i < pass->subpass_count; i++) { @@ -56,14 +63,26 @@ pass_find_subpass_range_for_attachments(struct v3dv_render_pass *pass) if (attachment_idx == VK_ATTACHMENT_UNUSED) continue; - if (i < pass->attachments[attachment_idx].first_subpass) - pass->attachments[attachment_idx].first_subpass = i; - if (i > pass->attachments[attachment_idx].last_subpass) - pass->attachments[attachment_idx].last_subpass = i; + struct v3dv_render_pass_attachment *att = + &pass->attachments[attachment_idx]; + + if (i < att->first_subpass) + att->first_subpass = i; + if (i > att->last_subpass) + att->last_subpass = i; + + uint32_t view_mask = subpass->view_mask; + while (view_mask) { + uint32_t view_index = u_bit_scan(&view_mask); + if (i < att->views[view_index].first_subpass) + att->views[view_index].first_subpass = i; + if (i > att->views[view_index].last_subpass) + att->views[view_index].last_subpass = i; + } if (subpass->resolve_attachments && subpass->resolve_attachments[j].attachment != VK_ATTACHMENT_UNUSED) { - set_use_tlb_resolve(&pass->attachments[attachment_idx]); + set_use_tlb_resolve(device, att); } } @@ -100,7 +119,7 @@ pass_find_subpass_range_for_attachments(struct v3dv_render_pass *pass) } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateRenderPass(VkDevice _device, const VkRenderPassCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, @@ -111,6 +130,10 @@ v3dv_CreateRenderPass(VkDevice _device, assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); + const VkRenderPassMultiviewCreateInfo *multiview_info = + vk_find_struct_const(pCreateInfo->pNext, RENDER_PASS_MULTIVIEW_CREATE_INFO); + bool multiview_enabled = multiview_info && multiview_info->subpassCount > 0; + size_t size = sizeof(*pass); size_t subpasses_offset = size; size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); @@ -120,8 +143,9 @@ v3dv_CreateRenderPass(VkDevice _device, pass = vk_object_zalloc(&device->vk, pAllocator, size, VK_OBJECT_TYPE_RENDER_PASS); if (pass == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + pass->multiview_enabled = multiview_enabled; pass->attachment_count = pCreateInfo->attachmentCount; pass->attachments = (void *) pass + attachments_offset; pass->subpass_count = pCreateInfo->subpassCount; @@ -144,7 +168,7 @@ v3dv_CreateRenderPass(VkDevice _device, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pass->subpass_attachments == NULL) { vk_object_free(&device->vk, pAllocator, pass); - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); } } else { pass->subpass_attachments = NULL; @@ -157,6 +181,8 @@ v3dv_CreateRenderPass(VkDevice _device, subpass->input_count = desc->inputAttachmentCount; subpass->color_count = desc->colorAttachmentCount; + if (multiview_enabled) + subpass->view_mask = multiview_info->pViewMasks[i]; if (desc->inputAttachmentCount > 0) { subpass->input_attachments = p; @@ -175,16 +201,10 @@ v3dv_CreateRenderPass(VkDevice _device, p += desc->colorAttachmentCount; for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { - const uint32_t attachment_idx = - desc->pColorAttachments[j].attachment; subpass->color_attachments[j] = (struct v3dv_subpass_attachment) { - .attachment = attachment_idx, + .attachment = desc->pColorAttachments[j].attachment, .layout = desc->pColorAttachments[j].layout, }; - if (attachment_idx != VK_ATTACHMENT_UNUSED) { - VkFormat format = pass->attachments[attachment_idx].desc.format; - subpass->has_srgb_rt |= vk_format_is_srgb(format); - } } } @@ -230,7 +250,7 @@ v3dv_CreateRenderPass(VkDevice _device, } } - pass_find_subpass_range_for_attachments(pass); + pass_find_subpass_range_for_attachments(device, pass); /* FIXME: handle subpass dependencies */ @@ -239,7 +259,7 @@ v3dv_CreateRenderPass(VkDevice _device, return VK_SUCCESS; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_DestroyRenderPass(VkDevice _device, VkRenderPass _pass, const VkAllocationCallbacks *pAllocator) @@ -255,7 +275,8 @@ v3dv_DestroyRenderPass(VkDevice _device, } static void -subpass_get_granularity(struct v3dv_render_pass *pass, +subpass_get_granularity(struct v3dv_device *device, + struct v3dv_render_pass *pass, uint32_t subpass_idx, VkExtent2D *granularity) { @@ -283,11 +304,11 @@ subpass_get_granularity(struct v3dv_render_pass *pass, continue; const VkAttachmentDescription *desc = &pass->attachments[attachment_idx].desc; - const struct v3dv_format *format = v3dv_get_format(desc->format); + const struct v3dv_format *format = v3dv_X(device, get_format)(desc->format); uint32_t internal_type, internal_bpp; - v3dv_get_internal_type_bpp_for_output_format(format->rt_type, - &internal_type, - &internal_bpp); + v3dv_X(device, get_internal_type_bpp_for_output_format) + (format->rt_type, &internal_type, &internal_bpp); + max_internal_bpp = MAX2(max_internal_bpp, internal_bpp); } @@ -306,12 +327,13 @@ subpass_get_granularity(struct v3dv_render_pass *pass, }; } -void -v3dv_GetRenderAreaGranularity(VkDevice device, +VKAPI_ATTR void VKAPI_CALL +v3dv_GetRenderAreaGranularity(VkDevice _device, VkRenderPass renderPass, VkExtent2D *pGranularity) { V3DV_FROM_HANDLE(v3dv_render_pass, pass, renderPass); + V3DV_FROM_HANDLE(v3dv_device, device, _device); *pGranularity = (VkExtent2D) { .width = 64, @@ -320,7 +342,7 @@ v3dv_GetRenderAreaGranularity(VkDevice device, for (uint32_t i = 0; i < pass->subpass_count; i++) { VkExtent2D sg; - subpass_get_granularity(pass, i, &sg); + subpass_get_granularity(device, pass, i, &sg); pGranularity->width = MIN2(pGranularity->width, sg.width); pGranularity->height = MIN2(pGranularity->height, sg.height); } @@ -348,7 +370,8 @@ v3dv_GetRenderAreaGranularity(VkDevice device, * In that case, we can't flag the area as being aligned. */ bool -v3dv_subpass_area_is_tile_aligned(const VkRect2D *area, +v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device, + const VkRect2D *area, struct v3dv_framebuffer *fb, struct v3dv_render_pass *pass, uint32_t subpass_idx) @@ -356,7 +379,7 @@ v3dv_subpass_area_is_tile_aligned(const VkRect2D *area, assert(subpass_idx < pass->subpass_count); VkExtent2D granularity; - subpass_get_granularity(pass, subpass_idx, &granularity); + subpass_get_granularity(device, pass, subpass_idx, &granularity); return area->offset.x % granularity.width == 0 && area->offset.y % granularity.height == 0 && diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_pipeline.c b/lib/mesa/src/broadcom/vulkan/v3dv_pipeline.c index 35cf35592..daa6c7550 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_pipeline.c +++ b/lib/mesa/src/broadcom/vulkan/v3dv_pipeline.c @@ -34,10 +34,13 @@ #include "nir/nir_serialize.h" #include "util/u_atomic.h" +#include "util/u_prim.h" +#include "util/os_time.h" #include "vulkan/util/vk_format.h" -#include "broadcom/cle/v3dx_pack.h" +static VkResult +compute_vpm_config(struct v3dv_pipeline *pipeline); void v3dv_print_v3d_key(struct v3d_key *key, @@ -120,11 +123,15 @@ pipeline_free_stages(struct v3dv_device *device, */ destroy_pipeline_stage(device, pipeline->vs, pAllocator); destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator); + destroy_pipeline_stage(device, pipeline->gs, pAllocator); + destroy_pipeline_stage(device, pipeline->gs_bin, pAllocator); destroy_pipeline_stage(device, pipeline->fs, pAllocator); destroy_pipeline_stage(device, pipeline->cs, pAllocator); pipeline->vs = NULL; pipeline->vs_bin = NULL; + pipeline->gs = NULL; + pipeline->gs_bin = NULL; pipeline->fs = NULL; pipeline->cs = NULL; } @@ -157,7 +164,7 @@ v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline, vk_object_free(&device->vk, pAllocator, pipeline); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks *pAllocator) @@ -172,20 +179,27 @@ v3dv_DestroyPipeline(VkDevice _device, } static const struct spirv_to_nir_options default_spirv_options = { - .caps = { false }, + .caps = { + .device_group = true, + .multiview = true, + .subgroup_basic = true, + .variable_pointers = true, + }, .ubo_addr_format = nir_address_format_32bit_index_offset, .ssbo_addr_format = nir_address_format_32bit_index_offset, .phys_ssbo_addr_format = nir_address_format_64bit_global, .push_const_addr_format = nir_address_format_logical, .shared_addr_format = nir_address_format_32bit_offset, - .frag_coord_is_sysval = false, }; const nir_shader_compiler_options v3dv_nir_options = { - .lower_add_sat = true, + .lower_uadd_sat = true, + .lower_iadd_sat = true, .lower_all_io_to_temps = true, .lower_extract_byte = true, .lower_extract_word = true, + .lower_insert_byte = true, + .lower_insert_word = true, .lower_bitfield_insert_to_shifts = true, .lower_bitfield_extract_to_shifts = true, .lower_bitfield_reverse = true, @@ -228,11 +242,16 @@ const nir_shader_compiler_options v3dv_nir_options = { .lower_wpos_pntc = true, .lower_rotate = true, .lower_to_scalar = true, + .lower_device_index_to_zero = true, .has_fsub = true, .has_isub = true, .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic * needs to be supported */ .lower_interpolate_at = true, + .max_unroll_iterations = 16, + .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp), + .divergence_analysis_options = + nir_divergence_multiple_workgroup_per_compute_subgroup }; const nir_shader_compiler_options * @@ -250,9 +269,7 @@ v3dv_pipeline_get_nir_options(void) }) static void -nir_optimize(nir_shader *nir, - struct v3dv_pipeline_stage *stage, - bool allow_copies) +nir_optimize(nir_shader *nir, bool allow_copies) { bool progress; @@ -276,7 +293,7 @@ nir_optimize(nir_shader *nir, OPT(nir_lower_alu_to_scalar, NULL, NULL); OPT(nir_copy_prop); - OPT(nir_lower_phis_to_scalar); + OPT(nir_lower_phis_to_scalar, false); OPT(nir_copy_prop); OPT(nir_opt_dce); @@ -313,9 +330,29 @@ nir_optimize(nir_shader *nir, } static void -preprocess_nir(nir_shader *nir, - struct v3dv_pipeline_stage *stage) +preprocess_nir(nir_shader *nir) { + /* We have to lower away local variable initializers right before we + * inline functions. That way they get properly initialized at the top + * of the function and not at the top of its caller. + */ + NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); + NIR_PASS_V(nir, nir_lower_returns); + NIR_PASS_V(nir, nir_inline_functions); + NIR_PASS_V(nir, nir_opt_deref); + + /* Pick off the single entrypoint that we want */ + foreach_list_typed_safe(nir_function, func, node, &nir->functions) { + if (func->is_entrypoint) + func->name = ralloc_strdup(func, "main"); + else + exec_node_remove(&func->node); + } + assert(exec_list_length(&nir->functions) == 1); + + /* Vulkan uses the separate-shader linking model */ + nir->info.separate_shader = true; + /* Make sure we lower variable initializers on output variables so that * nir_remove_dead_variables below sees the corresponding stores */ @@ -353,7 +390,7 @@ preprocess_nir(nir_shader *nir, nir_var_shader_out | nir_var_system_value | nir_var_mem_shared, NULL); - NIR_PASS_V(nir, nir_propagate_invariant); + NIR_PASS_V(nir, nir_propagate_invariant, false); NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, false); @@ -369,15 +406,14 @@ preprocess_nir(nir_shader *nir, NIR_PASS_V(nir, nir_split_var_copies); NIR_PASS_V(nir, nir_split_struct_vars, nir_var_function_temp); - nir_optimize(nir, stage, true); + nir_optimize(nir, true); NIR_PASS_V(nir, nir_lower_load_const_to_scalar); /* Lower a bunch of stuff */ NIR_PASS_V(nir, nir_lower_var_copies); - NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in | - nir_var_shader_out, UINT32_MAX); + NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX); NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_function_temp, 2); @@ -389,49 +425,7 @@ preprocess_nir(nir_shader *nir, NIR_PASS_V(nir, nir_lower_frexp); /* Get rid of split copies */ - nir_optimize(nir, stage, false); -} - -/* FIXME: This is basically the same code at anv, tu and radv. Move to common - * place? - */ -static struct nir_spirv_specialization* -vk_spec_info_to_nir_spirv(const VkSpecializationInfo *spec_info, - uint32_t *out_num_spec_entries) -{ - if (spec_info == NULL || spec_info->mapEntryCount == 0) - return NULL; - - uint32_t num_spec_entries = spec_info->mapEntryCount; - struct nir_spirv_specialization *spec_entries = calloc(num_spec_entries, sizeof(*spec_entries)); - - for (uint32_t i = 0; i < num_spec_entries; i++) { - VkSpecializationMapEntry entry = spec_info->pMapEntries[i]; - const void *data = spec_info->pData + entry.offset; - assert(data + entry.size <= spec_info->pData + spec_info->dataSize); - - spec_entries[i].id = spec_info->pMapEntries[i].constantID; - switch (entry.size) { - case 8: - spec_entries[i].value.u64 = *(const uint64_t *)data; - break; - case 4: - spec_entries[i].value.u32 = *(const uint32_t *)data; - break; - case 2: - spec_entries[i].value.u16 = *(const uint16_t *)data; - break; - case 1: - spec_entries[i].value.u8 = *(const uint8_t *)data; - break; - default: - assert(!"Invalid spec constant size"); - break; - } - } - - *out_num_spec_entries = num_spec_entries; - return spec_entries; + nir_optimize(nir, false); } static nir_shader * @@ -445,7 +439,7 @@ shader_module_compile_to_nir(struct v3dv_device *device, uint32_t *spirv = (uint32_t *) stage->module->data; assert(stage->module->size % 4 == 0); - if (V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV) + if (unlikely(V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV)) v3dv_print_spirv(stage->module->data, stage->module->size, stderr); uint32_t num_spec_entries = 0; @@ -472,37 +466,23 @@ shader_module_compile_to_nir(struct v3dv_device *device, } assert(nir->info.stage == broadcom_shader_stage_to_gl(stage->stage)); - if (V3D_DEBUG & (V3D_DEBUG_NIR | - v3d_debug_flag_for_shader_stage(stage->stage))) { + const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = { + .frag_coord = true, + .point_coord = true, + }; + NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings); + + if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR | + v3d_debug_flag_for_shader_stage( + broadcom_shader_stage_to_gl(stage->stage))))) { fprintf(stderr, "Initial form: %s prog %d NIR:\n", - gl_shader_stage_name(stage->stage), + broadcom_shader_stage_name(stage->stage), stage->program_id); nir_print_shader(nir, stderr); fprintf(stderr, "\n"); } - /* We have to lower away local variable initializers right before we - * inline functions. That way they get properly initialized at the top - * of the function and not at the top of its caller. - */ - NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); - NIR_PASS_V(nir, nir_lower_returns); - NIR_PASS_V(nir, nir_inline_functions); - NIR_PASS_V(nir, nir_opt_deref); - - /* Pick off the single entrypoint that we want */ - foreach_list_typed_safe(nir_function, func, node, &nir->functions) { - if (func->is_entrypoint) - func->name = ralloc_strdup(func, "main"); - else - exec_node_remove(&func->node); - } - assert(exec_list_length(&nir->functions) == 1); - - /* Vulkan uses the separate-shader linking model */ - nir->info.separate_shader = true; - - preprocess_nir(nir, stage); + preprocess_nir(nir); return nir; } @@ -567,11 +547,46 @@ lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr, instr->intrinsic = nir_intrinsic_load_uniform; } +static struct v3dv_descriptor_map* +pipeline_get_descriptor_map(struct v3dv_pipeline *pipeline, + VkDescriptorType desc_type, + gl_shader_stage gl_stage, + bool is_sampler) +{ + enum broadcom_shader_stage broadcom_stage = + gl_shader_stage_to_broadcom(gl_stage); + + assert(pipeline->shared_data && + pipeline->shared_data->maps[broadcom_stage]); + + switch(desc_type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + return &pipeline->shared_data->maps[broadcom_stage]->sampler_map; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + return &pipeline->shared_data->maps[broadcom_stage]->texture_map; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + return is_sampler ? + &pipeline->shared_data->maps[broadcom_stage]->sampler_map : + &pipeline->shared_data->maps[broadcom_stage]->texture_map; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + return &pipeline->shared_data->maps[broadcom_stage]->ubo_map; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map; + default: + unreachable("Descriptor type unknown or not having a descriptor map"); + } +} + /* Gathers info from the intrinsic (set and binding) and then lowers it so it * could be used by the v3d_compiler */ static void lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr, + nir_shader *shader, struct v3dv_pipeline *pipeline, const struct v3dv_pipeline_layout *layout) { @@ -585,13 +600,13 @@ lower_vulkan_resource_index(nir_builder *b, struct v3dv_descriptor_set_binding_layout *binding_layout = &set_layout->binding[binding]; unsigned index = 0; + const VkDescriptorType desc_type = nir_intrinsic_desc_type(instr); - switch (nir_intrinsic_desc_type(instr)) { + switch (desc_type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: { struct v3dv_descriptor_map *descriptor_map = - nir_intrinsic_desc_type(instr) == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ? - &pipeline->shared_data->ubo_map : &pipeline->shared_data->ssbo_map; + pipeline_get_descriptor_map(pipeline, desc_type, shader->info.stage, false); if (!const_val) unreachable("non-constant vulkan_resource_index array index"); @@ -601,7 +616,7 @@ lower_vulkan_resource_index(nir_builder *b, binding_layout->array_size, 32 /* return_size: doesn't really apply for this case */); - if (nir_intrinsic_desc_type(instr) == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) { + if (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) { /* skip index 0 which is used for push constants */ index++; } @@ -614,13 +629,11 @@ lower_vulkan_resource_index(nir_builder *b, } /* Since we use the deref pass, both vulkan_resource_index and - * vulkan_load_descriptor returns a vec2. But for the index the backend - * expect just one scalar (like with get_ssbo_size), so lets return here - * just it. Then on load_descriptor we would recreate the vec2, keeping the - * second component (unused right now) to zero. + * vulkan_load_descriptor return a vec2 providing an index and + * offset. Our backend compiler only cares about the index part. */ nir_ssa_def_rewrite_uses(&instr->dest.ssa, - nir_imm_int(b, index)); + nir_imm_ivec2(b, index, 0)); nir_instr_remove(&instr->instr); } @@ -629,6 +642,7 @@ lower_vulkan_resource_index(nir_builder *b, */ static uint8_t lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx, + nir_shader *shader, struct v3dv_pipeline *pipeline, const struct v3dv_pipeline_layout *layout) { @@ -704,11 +718,17 @@ lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx, deref->var->data.index + base_index : base_index; - uint8_t return_size = relaxed_precision || instr->is_shadow ? 16 : 32; + uint8_t return_size; + if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_16BIT)) + return_size = 16; + else if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_32BIT)) + return_size = 32; + else + return_size = relaxed_precision || instr->is_shadow ? 16 : 32; - struct v3dv_descriptor_map *map = is_sampler ? - &pipeline->shared_data->sampler_map : - &pipeline->shared_data->texture_map; + struct v3dv_descriptor_map *map = + pipeline_get_descriptor_map(pipeline, binding_layout->type, + shader->info.stage, is_sampler); int desc_index = descriptor_map_add(map, deref->var->data.descriptor_set, @@ -727,6 +747,7 @@ lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx, static bool lower_sampler(nir_builder *b, nir_tex_instr *instr, + nir_shader *shader, struct v3dv_pipeline *pipeline, const struct v3dv_pipeline_layout *layout) { @@ -736,13 +757,14 @@ lower_sampler(nir_builder *b, nir_tex_instr *instr, nir_tex_instr_src_index(instr, nir_tex_src_texture_deref); if (texture_idx >= 0) - return_size = lower_tex_src_to_offset(b, instr, texture_idx, pipeline, layout); + return_size = lower_tex_src_to_offset(b, instr, texture_idx, shader, + pipeline, layout); int sampler_idx = nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref); if (sampler_idx >= 0) - lower_tex_src_to_offset(b, instr, sampler_idx, pipeline, layout); + lower_tex_src_to_offset(b, instr, sampler_idx, shader, pipeline, layout); if (texture_idx < 0 && sampler_idx < 0) return false; @@ -762,6 +784,7 @@ lower_sampler(nir_builder *b, nir_tex_instr *instr, static void lower_image_deref(nir_builder *b, nir_intrinsic_instr *instr, + nir_shader *shader, struct v3dv_pipeline *pipeline, const struct v3dv_pipeline_layout *layout) { @@ -811,8 +834,12 @@ lower_image_deref(nir_builder *b, assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE || binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); + struct v3dv_descriptor_map *map = + pipeline_get_descriptor_map(pipeline, binding_layout->type, + shader->info.stage, false); + int desc_index = - descriptor_map_add(&pipeline->shared_data->texture_map, + descriptor_map_add(map, deref->var->data.descriptor_set, deref->var->data.binding, array_index, @@ -832,6 +859,7 @@ lower_image_deref(nir_builder *b, static bool lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, + nir_shader *shader, struct v3dv_pipeline *pipeline, const struct v3dv_pipeline_layout *layout) { @@ -850,16 +878,14 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, return true; case nir_intrinsic_vulkan_resource_index: - lower_vulkan_resource_index(b, instr, pipeline, layout); + lower_vulkan_resource_index(b, instr, shader, pipeline, layout); return true; case nir_intrinsic_load_vulkan_descriptor: { - /* We are not using it, as loading the descriptor happens as part of the - * load/store instruction, so the simpler is just doing a no-op. We just - * lower the desc back to a vec2, as it is what load_ssbo/ubo expects. + /* Loading the descriptor happens as part of load/store instructions, + * so for us this is a no-op. */ - nir_ssa_def *desc = nir_vec2(b, instr->src[0].ssa, nir_imm_int(b, 0)); - nir_ssa_def_rewrite_uses(&instr->dest.ssa, desc); + nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa); nir_instr_remove(&instr->instr); return true; } @@ -878,7 +904,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, case nir_intrinsic_image_deref_atomic_comp_swap: case nir_intrinsic_image_deref_size: case nir_intrinsic_image_deref_samples: - lower_image_deref(b, instr, pipeline, layout); + lower_image_deref(b, instr, shader, pipeline, layout); return true; default: @@ -888,6 +914,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, static bool lower_impl(nir_function_impl *impl, + nir_shader *shader, struct v3dv_pipeline *pipeline, const struct v3dv_pipeline_layout *layout) { @@ -901,11 +928,12 @@ lower_impl(nir_function_impl *impl, switch (instr->type) { case nir_instr_type_tex: progress |= - lower_sampler(&b, nir_instr_as_tex(instr), pipeline, layout); + lower_sampler(&b, nir_instr_as_tex(instr), shader, pipeline, layout); break; case nir_instr_type_intrinsic: progress |= - lower_intrinsic(&b, nir_instr_as_intrinsic(instr), pipeline, layout); + lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader, + pipeline, layout); break; default: break; @@ -925,7 +953,7 @@ lower_pipeline_layout_info(nir_shader *shader, nir_foreach_function(function, shader) { if (function->impl) - progress |= lower_impl(function->impl, pipeline, layout); + progress |= lower_impl(function->impl, shader, pipeline, layout); } return progress; @@ -950,6 +978,18 @@ lower_fs_io(nir_shader *nir) } static void +lower_gs_io(struct nir_shader *nir) +{ + NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); + + nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, + MESA_SHADER_GEOMETRY); + + nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, + MESA_SHADER_GEOMETRY); +} + +static void lower_vs_io(struct nir_shader *nir) { NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); @@ -982,13 +1022,16 @@ pipeline_populate_v3d_key(struct v3d_key *key, uint32_t ucp_enables, bool robust_buffer_access) { + assert(p_stage->pipeline->shared_data && + p_stage->pipeline->shared_data->maps[p_stage->stage]); + /* The following values are default values used at pipeline create. We use * there 32 bit as default return size. */ struct v3dv_descriptor_map *sampler_map = - &p_stage->pipeline->shared_data->sampler_map; + &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map; struct v3dv_descriptor_map *texture_map = - &p_stage->pipeline->shared_data->texture_map; + &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map; key->num_tex_used = texture_map->num_desc; assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS); @@ -1010,12 +1053,23 @@ pipeline_populate_v3d_key(struct v3d_key *key, key->sampler[sampler_idx].return_size == 32 ? 4 : 2; } - - - /* default value. Would be override on the vs/gs populate methods when GS - * gets supported - */ - key->is_last_geometry_stage = true; + switch (p_stage->stage) { + case BROADCOM_SHADER_VERTEX: + case BROADCOM_SHADER_VERTEX_BIN: + key->is_last_geometry_stage = p_stage->pipeline->gs == NULL; + break; + case BROADCOM_SHADER_GEOMETRY: + case BROADCOM_SHADER_GEOMETRY_BIN: + /* FIXME: while we don't implement tessellation shaders */ + key->is_last_geometry_stage = true; + break; + case BROADCOM_SHADER_FRAGMENT: + case BROADCOM_SHADER_COMPUTE: + key->is_last_geometry_stage = false; + break; + default: + unreachable("unsupported shader stage"); + } /* Vulkan doesn't have fixed function state for user clip planes. Instead, * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler @@ -1073,8 +1127,11 @@ static void pipeline_populate_v3d_fs_key(struct v3d_fs_key *key, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct v3dv_pipeline_stage *p_stage, + bool has_geometry_shader, uint32_t ucp_enables) { + assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT); + memset(key, 0, sizeof(*key)); const bool rba = p_stage->pipeline->device->features.robustBufferAccess; @@ -1087,9 +1144,11 @@ pipeline_populate_v3d_fs_key(struct v3d_fs_key *key, key->is_points = (topology == PIPE_PRIM_POINTS); key->is_lines = (topology >= PIPE_PRIM_LINES && topology <= PIPE_PRIM_LINE_STRIP); + key->has_gs = has_geometry_shader; const VkPipelineColorBlendStateCreateInfo *cb_info = - pCreateInfo->pColorBlendState; + !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? + pCreateInfo->pColorBlendState : NULL; key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ? vk_to_pipe_logicop[cb_info->logicOp] : @@ -1139,7 +1198,8 @@ pipeline_populate_v3d_fs_key(struct v3d_fs_key *key, */ if (key->logicop_func != PIPE_LOGICOP_COPY) { key->color_fmt[i].format = fb_pipe_format; - key->color_fmt[i].swizzle = v3dv_get_format_swizzle(fb_format); + key->color_fmt[i].swizzle = + v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format); } const struct util_format_description *desc = @@ -1173,43 +1233,140 @@ pipeline_populate_v3d_fs_key(struct v3d_fs_key *key, } static void -pipeline_populate_v3d_vs_key(struct v3d_vs_key *key, +setup_stage_outputs_from_next_stage_inputs( + uint8_t next_stage_num_inputs, + struct v3d_varying_slot *next_stage_input_slots, + uint8_t *num_used_outputs, + struct v3d_varying_slot *used_output_slots, + uint32_t size_of_used_output_slots) +{ + *num_used_outputs = next_stage_num_inputs; + memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots); +} + +static void +pipeline_populate_v3d_gs_key(struct v3d_gs_key *key, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct v3dv_pipeline_stage *p_stage) { + assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY || + p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN); + memset(key, 0, sizeof(*key)); const bool rba = p_stage->pipeline->device->features.robustBufferAccess; pipeline_populate_v3d_key(&key->base, p_stage, 0, rba); - /* Vulkan specifies a point size per vertex, so true for if the prim are - * points, like on ES2) - */ - const VkPipelineInputAssemblyStateCreateInfo *ia_info = - pCreateInfo->pInputAssemblyState; - uint8_t topology = vk_to_pipe_prim_type[ia_info->topology]; + struct v3dv_pipeline *pipeline = p_stage->pipeline; - /* FIXME: not enough to being PRIM_POINTS, on gallium the full check is - * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */ - key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS); + key->per_vertex_point_size = + p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ); + + key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage); - key->is_coord = p_stage->stage == BROADCOM_SHADER_VERTEX_BIN; + assert(key->base.is_last_geometry_stage); if (key->is_coord) { - /* The only output varying on coord shaders are for transform + /* Output varyings in the last binning shader are only used for transform * feedback. Set to 0 as VK_EXT_transform_feedback is not supported. */ key->num_used_outputs = 0; } else { - struct v3dv_pipeline *pipeline = p_stage->pipeline; struct v3dv_shader_variant *fs_variant = pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; - key->num_used_outputs = fs_variant->prog_data.fs->num_inputs; - STATIC_ASSERT(sizeof(key->used_outputs) == sizeof(fs_variant->prog_data.fs->input_slots)); - memcpy(key->used_outputs, fs_variant->prog_data.fs->input_slots, - sizeof(key->used_outputs)); + + setup_stage_outputs_from_next_stage_inputs( + fs_variant->prog_data.fs->num_inputs, + fs_variant->prog_data.fs->input_slots, + &key->num_used_outputs, + key->used_outputs, + sizeof(key->used_outputs)); + } +} + +static void +pipeline_populate_v3d_vs_key(struct v3d_vs_key *key, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct v3dv_pipeline_stage *p_stage) +{ + assert(p_stage->stage == BROADCOM_SHADER_VERTEX || + p_stage->stage == BROADCOM_SHADER_VERTEX_BIN); + + memset(key, 0, sizeof(*key)); + + const bool rba = p_stage->pipeline->device->features.robustBufferAccess; + pipeline_populate_v3d_key(&key->base, p_stage, 0, rba); + + struct v3dv_pipeline *pipeline = p_stage->pipeline; + + /* Vulkan specifies a point size per vertex, so true for if the prim are + * points, like on ES2) + */ + const VkPipelineInputAssemblyStateCreateInfo *ia_info = + pCreateInfo->pInputAssemblyState; + uint8_t topology = vk_to_pipe_prim_type[ia_info->topology]; + + /* FIXME: PRIM_POINTS is not enough, in gallium the full check is + * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */ + key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS); + + key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage); + + if (key->is_coord) { /* Binning VS*/ + if (key->base.is_last_geometry_stage) { + /* Output varyings in the last binning shader are only used for + * transform feedback. Set to 0 as VK_EXT_transform_feedback is not + * supported. + */ + key->num_used_outputs = 0; + } else { + /* Linking against GS binning program */ + assert(pipeline->gs); + struct v3dv_shader_variant *gs_bin_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]; + + STATIC_ASSERT(sizeof(key->used_outputs) == + sizeof(gs_bin_variant->prog_data.gs->input_slots)); + + setup_stage_outputs_from_next_stage_inputs( + gs_bin_variant->prog_data.gs->num_inputs, + gs_bin_variant->prog_data.gs->input_slots, + &key->num_used_outputs, + key->used_outputs, + sizeof(key->used_outputs)); + } + } else { /* Render VS */ + if (pipeline->gs) { + /* Linking against GS render program */ + struct v3dv_shader_variant *gs_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]; + + STATIC_ASSERT(sizeof(key->used_outputs) == + sizeof(gs_variant->prog_data.gs->input_slots)); + + setup_stage_outputs_from_next_stage_inputs( + gs_variant->prog_data.gs->num_inputs, + gs_variant->prog_data.gs->input_slots, + &key->num_used_outputs, + key->used_outputs, + sizeof(key->used_outputs)); + } else { + /* Linking against FS program */ + struct v3dv_shader_variant *fs_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; + + STATIC_ASSERT(sizeof(key->used_outputs) == + sizeof(fs_variant->prog_data.fs->input_slots)); + + setup_stage_outputs_from_next_stage_inputs( + fs_variant->prog_data.fs->num_inputs, + fs_variant->prog_data.fs->input_slots, + &key->num_used_outputs, + key->used_outputs, + sizeof(key->used_outputs)); + } } const VkPipelineVertexInputStateCreateInfo *vi_info = @@ -1223,16 +1380,16 @@ pipeline_populate_v3d_vs_key(struct v3d_vs_key *key, } } -/* - * Creates the pipeline_stage for the coordinate shader. Initially a clone of - * the vs pipeline_stage, with is_coord to true +/** + * Creates the initial form of the pipeline stage for a binning shader by + * cloning the render shader and flagging it as a coordinate shader. * * Returns NULL if it was not able to allocate the object, so it should be * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error. */ -static struct v3dv_pipeline_stage* -pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage *src, - const VkAllocationCallbacks *pAllocator) +static struct v3dv_pipeline_stage * +pipeline_stage_create_binning(const struct v3dv_pipeline_stage *src, + const VkAllocationCallbacks *pAllocator) { struct v3dv_device *device = src->pipeline->device; @@ -1243,13 +1400,25 @@ pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage *src, if (p_stage == NULL) return NULL; + assert(src->stage == BROADCOM_SHADER_VERTEX || + src->stage == BROADCOM_SHADER_GEOMETRY); + + enum broadcom_shader_stage bin_stage = + src->stage == BROADCOM_SHADER_VERTEX ? + BROADCOM_SHADER_VERTEX_BIN : + BROADCOM_SHADER_GEOMETRY_BIN; + p_stage->pipeline = src->pipeline; - assert(src->stage == BROADCOM_SHADER_VERTEX); - p_stage->stage = BROADCOM_SHADER_VERTEX_BIN; + p_stage->stage = bin_stage; p_stage->entrypoint = src->entrypoint; p_stage->module = src->module; - p_stage->nir = src->nir ? nir_shader_clone(NULL, src->nir) : NULL; + /* For binning shaders we will clone the NIR code from the corresponding + * render shader later, when we call pipeline_compile_xxx_shader. This way + * we only have to run the relevant NIR lowerings once for render shaders + */ + p_stage->nir = NULL; p_stage->spec_info = src->spec_info; + p_stage->feedback = (VkPipelineCreationFeedbackEXT) { 0 }; memcpy(p_stage->shader_sha1, src->shader_sha1, 20); return p_stage; @@ -1314,14 +1483,18 @@ pipeline_hash_graphics(const struct v3dv_pipeline *pipeline, struct mesa_sha1 ctx; _mesa_sha1_init(&ctx); - /* We need to include both on the sha1 key as one could affect the other - * during linking (like if vertex output are constants, then the - * fragment shader would load_const intead of load_input). An - * alternative would be to use the serialized nir, but that seems like - * an overkill + /* We need to include all shader stages in the sha1 key as linking may modify + * the shader code in any stage. An alternative would be to use the + * serialized NIR, but that seems like an overkill. */ _mesa_sha1_update(&ctx, pipeline->vs->shader_sha1, sizeof(pipeline->vs->shader_sha1)); + + if (pipeline->gs) { + _mesa_sha1_update(&ctx, pipeline->gs->shader_sha1, + sizeof(pipeline->gs->shader_sha1)); + } + _mesa_sha1_update(&ctx, pipeline->fs->shader_sha1, sizeof(pipeline->fs->shader_sha1)); @@ -1397,7 +1570,7 @@ pipeline_check_spill_size(struct v3dv_pipeline *pipeline) */ struct v3dv_shader_variant * v3dv_shader_variant_create(struct v3dv_device *device, - broadcom_shader_stage stage, + enum broadcom_shader_stage stage, struct v3d_prog_data *prog_data, uint32_t prog_data_size, uint32_t assembly_offset, @@ -1441,22 +1614,25 @@ v3dv_shader_variant_create(struct v3dv_device *device, * VK_ERROR_UNKNOWN, even if we know that the problem was a compiler * error. */ -static struct v3dv_shader_variant* +static struct v3dv_shader_variant * pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage, struct v3d_key *key, size_t key_size, const VkAllocationCallbacks *pAllocator, VkResult *out_vk_result) { + int64_t stage_start = os_time_get_nano(); + struct v3dv_pipeline *pipeline = p_stage->pipeline; struct v3dv_physical_device *physical_device = &pipeline->device->instance->physicalDevice; const struct v3d_compiler *compiler = physical_device->compiler; - if (V3D_DEBUG & (V3D_DEBUG_NIR | - v3d_debug_flag_for_shader_stage(p_stage->stage))) { + if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR | + v3d_debug_flag_for_shader_stage + (broadcom_shader_stage_to_gl(p_stage->stage))))) { fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n", - gl_shader_stage_name(p_stage->stage), + broadcom_shader_stage_name(p_stage->stage), p_stage->program_id); nir_print_shader(p_stage->nir, stderr); fprintf(stderr, "\n"); @@ -1495,6 +1671,8 @@ pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage, * we finish it, so let's not worry about freeing the nir here. */ + p_stage->feedback.duration += os_time_get_nano() - stage_start; + return variant; } @@ -1525,7 +1703,7 @@ st_nir_opts(nir_shader *nir) if (nir->options->lower_to_scalar) { NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); - NIR_PASS_V(nir, nir_lower_phis_to_scalar); + NIR_PASS_V(nir, nir_lower_phis_to_scalar, false); } NIR_PASS_V(nir, nir_lower_alu); @@ -1594,6 +1772,11 @@ pipeline_lower_nir(struct v3dv_pipeline *pipeline, struct v3dv_pipeline_stage *p_stage, struct v3dv_pipeline_layout *layout) { + int64_t stage_start = os_time_get_nano(); + + assert(pipeline->shared_data && + pipeline->shared_data->maps[p_stage->stage]); + nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir)); /* We add this because we need a valid sampler for nir_lower_tex to do @@ -1604,17 +1787,19 @@ pipeline_lower_nir(struct v3dv_pipeline *pipeline, * another for the case we need a 32bit return size. */ UNUSED unsigned index = - descriptor_map_add(&pipeline->shared_data->sampler_map, + descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map, -1, -1, -1, 0, 16); assert(index == V3DV_NO_SAMPLER_16BIT_IDX); index = - descriptor_map_add(&pipeline->shared_data->sampler_map, + descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map, -2, -2, -2, 0, 32); assert(index == V3DV_NO_SAMPLER_32BIT_IDX); /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ NIR_PASS_V(p_stage->nir, lower_pipeline_layout_info, pipeline, layout); + + p_stage->feedback.duration += os_time_get_nano() - stage_start; } /** @@ -1638,11 +1823,13 @@ get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage) return 0; } -static nir_shader* +static nir_shader * pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage, struct v3dv_pipeline *pipeline, struct v3dv_pipeline_cache *cache) { + int64_t stage_start = os_time_get_nano(); + nir_shader *nir = NULL; nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache, @@ -1651,6 +1838,14 @@ pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage, if (nir) { assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage)); + + /* A NIR cach hit doesn't avoid the large majority of pipeline stage + * creation so the cache hit is not recorded in the pipeline feedback + * flags + */ + + p_stage->feedback.duration += os_time_get_nano() - stage_start; + return nir; } @@ -1670,6 +1865,9 @@ pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage, v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir, p_stage->shader_sha1); } + + p_stage->feedback.duration += os_time_get_nano() - stage_start; + return nir; } @@ -1706,13 +1904,6 @@ pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline, const VkAllocationCallbacks *pAllocator, const VkGraphicsPipelineCreateInfo *pCreateInfo) { - struct v3dv_pipeline_stage *p_stage = pipeline->vs; - - /* Right now we only support pipelines with both vertex and fragment - * shader. - */ - assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]); - assert(pipeline->vs_bin != NULL); if (pipeline->vs_bin->nir == NULL) { assert(pipeline->vs->nir); @@ -1728,8 +1919,7 @@ pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline, if (vk_result != VK_SUCCESS) return vk_result; - p_stage = pipeline->vs_bin; - pipeline_populate_v3d_vs_key(&key, pCreateInfo, p_stage); + pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs_bin); pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] = pipeline_compile_shader_variant(pipeline->vs_bin, &key.base, sizeof(key), pAllocator, &vk_result); @@ -1738,6 +1928,36 @@ pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline, } static VkResult +pipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline, + const VkAllocationCallbacks *pAllocator, + const VkGraphicsPipelineCreateInfo *pCreateInfo) +{ + assert(pipeline->gs); + + assert(pipeline->gs_bin != NULL); + if (pipeline->gs_bin->nir == NULL) { + assert(pipeline->gs->nir); + pipeline->gs_bin->nir = nir_shader_clone(NULL, pipeline->gs->nir); + } + + VkResult vk_result; + struct v3d_gs_key key; + pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs); + pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] = + pipeline_compile_shader_variant(pipeline->gs, &key.base, sizeof(key), + pAllocator, &vk_result); + if (vk_result != VK_SUCCESS) + return vk_result; + + pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs_bin); + pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] = + pipeline_compile_shader_variant(pipeline->gs_bin, &key.base, sizeof(key), + pAllocator, &vk_result); + + return vk_result; +} + +static VkResult pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline, const VkAllocationCallbacks *pAllocator, const VkGraphicsPipelineCreateInfo *pCreateInfo) @@ -1749,6 +1969,7 @@ pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline, struct v3d_fs_key key; pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage, + pipeline->gs != NULL, get_ucp_enable_mask(pipeline->vs)); VkResult vk_result; @@ -1768,19 +1989,20 @@ pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline, key->robust_buffer_access = pipeline->device->features.robustBufferAccess; + const bool raster_enabled = + !pCreateInfo->pRasterizationState->rasterizerDiscardEnable; + const VkPipelineInputAssemblyStateCreateInfo *ia_info = pCreateInfo->pInputAssemblyState; key->topology = vk_to_pipe_prim_type[ia_info->topology]; const VkPipelineColorBlendStateCreateInfo *cb_info = - pCreateInfo->pColorBlendState; + raster_enabled ? pCreateInfo->pColorBlendState : NULL; + key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ? vk_to_pipe_logicop[cb_info->logicOp] : PIPE_LOGICOP_COPY; - const bool raster_enabled = - !pCreateInfo->pRasterizationState->rasterizerDiscardEnable; - /* Multisample rasterization state must be ignored if rasterization * is disabled. */ @@ -1817,7 +2039,8 @@ pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline, */ if (key->logicop_func != PIPE_LOGICOP_COPY) { key->color_fmt[i].format = fb_pipe_format; - key->color_fmt[i].swizzle = v3dv_get_format_swizzle(fb_format); + key->color_fmt[i].swizzle = v3dv_get_format_swizzle(pipeline->device, + fb_format); } const struct util_format_description *desc = @@ -1839,6 +2062,8 @@ pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline, key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location); } + assert(pipeline->subpass); + key->has_multiview = pipeline->subpass->view_mask != 0; } static void @@ -1858,25 +2083,285 @@ pipeline_populate_compute_key(struct v3dv_pipeline *pipeline, static struct v3dv_pipeline_shared_data * v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20], - struct v3dv_device *device) + struct v3dv_pipeline *pipeline, + bool is_graphics_pipeline) { - size_t size = sizeof(struct v3dv_pipeline_shared_data); /* We create new_entry using the device alloc. Right now shared_data is ref * and unref by both the pipeline and the pipeline cache, so we can't * ensure that the cache or pipeline alloc will be available on the last * unref. */ struct v3dv_pipeline_shared_data *new_entry = - vk_zalloc2(&device->vk.alloc, NULL, size, 8, + vk_zalloc2(&pipeline->device->vk.alloc, NULL, + sizeof(struct v3dv_pipeline_shared_data), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (new_entry == NULL) return NULL; + for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { + /* We don't need specific descriptor maps for binning stages we use the + * map for the render stage. + */ + if (broadcom_shader_stage_is_binning(stage)) + continue; + + if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) || + (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) { + continue; + } + + if (stage == BROADCOM_SHADER_GEOMETRY && !pipeline->gs) { + /* We always inject a custom GS if we have multiview */ + if (!pipeline->subpass->view_mask) + continue; + } + + struct v3dv_descriptor_maps *new_maps = + vk_zalloc2(&pipeline->device->vk.alloc, NULL, + sizeof(struct v3dv_descriptor_maps), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (new_maps == NULL) + goto fail; + + new_entry->maps[stage] = new_maps; + } + + new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] = + new_entry->maps[BROADCOM_SHADER_VERTEX]; + + new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] = + new_entry->maps[BROADCOM_SHADER_GEOMETRY]; + new_entry->ref_cnt = 1; memcpy(new_entry->sha1_key, sha1_key, 20); return new_entry; + +fail: + if (new_entry != NULL) { + for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { + if (new_entry->maps[stage] != NULL) + vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]); + } + } + + vk_free(&pipeline->device->vk.alloc, new_entry); + + return NULL; +} + +static void +write_creation_feedback(struct v3dv_pipeline *pipeline, + const void *next, + const VkPipelineCreationFeedbackEXT *pipeline_feedback, + uint32_t stage_count, + const VkPipelineShaderStageCreateInfo *stages) +{ + const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback = + vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT); + + if (create_feedback) { + typed_memcpy(create_feedback->pPipelineCreationFeedback, + pipeline_feedback, + 1); + + assert(stage_count == create_feedback->pipelineStageCreationFeedbackCount); + + for (uint32_t i = 0; i < stage_count; i++) { + gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage); + switch (s) { + case MESA_SHADER_VERTEX: + create_feedback->pPipelineStageCreationFeedbacks[i] = + pipeline->vs->feedback; + + create_feedback->pPipelineStageCreationFeedbacks[i].duration += + pipeline->vs_bin->feedback.duration; + break; + + case MESA_SHADER_GEOMETRY: + create_feedback->pPipelineStageCreationFeedbacks[i] = + pipeline->gs->feedback; + + create_feedback->pPipelineStageCreationFeedbacks[i].duration += + pipeline->gs_bin->feedback.duration; + break; + + case MESA_SHADER_FRAGMENT: + create_feedback->pPipelineStageCreationFeedbacks[i] = + pipeline->fs->feedback; + break; + + case MESA_SHADER_COMPUTE: + create_feedback->pPipelineStageCreationFeedbacks[i] = + pipeline->cs->feedback; + break; + + default: + unreachable("not supported shader stage"); + } + } + } +} + +static uint32_t +multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline) +{ + switch (pipeline->topology) { + case PIPE_PRIM_POINTS: + return GL_POINTS; + case PIPE_PRIM_LINES: + case PIPE_PRIM_LINE_STRIP: + return GL_LINES; + case PIPE_PRIM_TRIANGLES: + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_TRIANGLE_FAN: + return GL_TRIANGLES; + default: + /* Since we don't allow GS with multiview, we can only see non-adjacency + * primitives. + */ + unreachable("Unexpected pipeline primitive type"); + } +} + +static uint32_t +multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline) +{ + switch (pipeline->topology) { + case PIPE_PRIM_POINTS: + return GL_POINTS; + case PIPE_PRIM_LINES: + case PIPE_PRIM_LINE_STRIP: + return GL_LINE_STRIP; + case PIPE_PRIM_TRIANGLES: + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_TRIANGLE_FAN: + return GL_TRIANGLE_STRIP; + default: + /* Since we don't allow GS with multiview, we can only see non-adjacency + * primitives. + */ + unreachable("Unexpected pipeline primitive type"); + } +} + +static bool +pipeline_add_multiview_gs(struct v3dv_pipeline *pipeline, + struct v3dv_pipeline_cache *cache, + const VkAllocationCallbacks *pAllocator) +{ + /* Create the passthrough GS from the VS output interface */ + pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache); + nir_shader *vs_nir = pipeline->vs->nir; + + const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options(); + nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options, + "multiview broadcast gs"); + nir_shader *nir = b.shader; + nir->info.inputs_read = vs_nir->info.outputs_written; + nir->info.outputs_written = vs_nir->info.outputs_written | + (1ull << VARYING_SLOT_LAYER); + + uint32_t vertex_count = u_vertices_per_prim(pipeline->topology); + nir->info.gs.input_primitive = + multiview_gs_input_primitive_from_pipeline(pipeline); + nir->info.gs.output_primitive = + multiview_gs_output_primitive_from_pipeline(pipeline); + nir->info.gs.vertices_in = vertex_count; + nir->info.gs.vertices_out = nir->info.gs.vertices_in; + nir->info.gs.invocations = 1; + nir->info.gs.active_stream_mask = 0x1; + + /* Make a list of GS input/output variables from the VS outputs */ + nir_variable *in_vars[100]; + nir_variable *out_vars[100]; + uint32_t var_count = 0; + nir_foreach_shader_out_variable(out_vs_var, vs_nir) { + char name[8]; + snprintf(name, ARRAY_SIZE(name), "in_%d", var_count); + + in_vars[var_count] = + nir_variable_create(nir, nir_var_shader_in, + glsl_array_type(out_vs_var->type, vertex_count, 0), + name); + in_vars[var_count]->data.location = out_vs_var->data.location; + in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac; + in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation; + + snprintf(name, ARRAY_SIZE(name), "out_%d", var_count); + out_vars[var_count] = + nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name); + out_vars[var_count]->data.location = out_vs_var->data.location; + out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation; + + var_count++; + } + + /* Add the gl_Layer output variable */ + nir_variable *out_layer = + nir_variable_create(nir, nir_var_shader_out, glsl_int_type(), + "out_Layer"); + out_layer->data.location = VARYING_SLOT_LAYER; + + /* Get the view index value that we will write to gl_Layer */ + nir_ssa_def *layer = + nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32); + + /* Emit all output vertices */ + for (uint32_t vi = 0; vi < vertex_count; vi++) { + /* Emit all output varyings */ + for (uint32_t i = 0; i < var_count; i++) { + nir_deref_instr *in_value = + nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi); + nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value); + } + + /* Emit gl_Layer write */ + nir_store_var(&b, out_layer, layer, 0x1); + + nir_emit_vertex(&b, 0); + } + nir_end_primitive(&b, 0); + + /* Make sure we run our pre-process NIR passes so we produce NIR compatible + * with what we expect from SPIR-V modules. + */ + preprocess_nir(nir); + + /* Attach the geometry shader to the pipeline */ + struct v3dv_device *device = pipeline->device; + struct v3dv_physical_device *physical_device = + &device->instance->physicalDevice; + + struct v3dv_pipeline_stage *p_stage = + vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (p_stage == NULL) { + ralloc_free(nir); + return false; + } + + p_stage->pipeline = pipeline; + p_stage->stage = BROADCOM_SHADER_GEOMETRY; + p_stage->entrypoint = "main"; + p_stage->module = 0; + p_stage->nir = nir; + pipeline_compute_sha1_from_nir(p_stage->nir, p_stage->shader_sha1); + p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id); + + pipeline->has_gs = true; + pipeline->gs = p_stage; + pipeline->active_stages |= MESA_SHADER_GEOMETRY; + + pipeline->gs_bin = + pipeline_stage_create_binning(pipeline->gs, pAllocator); + if (pipeline->gs_bin == NULL) + return false; + + return true; } /* @@ -1895,6 +2380,11 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator) { + VkPipelineCreationFeedbackEXT pipeline_feedback = { + .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT, + }; + int64_t pipeline_start = os_time_get_nano(); + struct v3dv_device *device = pipeline->device; struct v3dv_physical_device *physical_device = &device->instance->physicalDevice; @@ -1945,14 +2435,24 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline, case MESA_SHADER_VERTEX: pipeline->vs = p_stage; pipeline->vs_bin = - pipeline_stage_create_vs_bin(pipeline->vs, pAllocator); + pipeline_stage_create_binning(pipeline->vs, pAllocator); if (pipeline->vs_bin == NULL) return VK_ERROR_OUT_OF_HOST_MEMORY; + break; + case MESA_SHADER_GEOMETRY: + pipeline->has_gs = true; + pipeline->gs = p_stage; + pipeline->gs_bin = + pipeline_stage_create_binning(pipeline->gs, pAllocator); + if (pipeline->gs_bin == NULL) + return VK_ERROR_OUT_OF_HOST_MEMORY; break; + case MESA_SHADER_FRAGMENT: pipeline->fs = p_stage; break; + default: unreachable("not supported shader stage"); } @@ -1984,39 +2484,85 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline, pipeline->active_stages |= MESA_SHADER_FRAGMENT; } - /* Now we will try to get the variants from the pipeline cache */ + /* If multiview is enabled, we inject a custom passthrough geometry shader + * to broadcast draw calls to the appropriate views. + */ + assert(!pipeline->subpass->view_mask || (!pipeline->has_gs && !pipeline->gs)); + if (pipeline->subpass->view_mask) { + if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator)) + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + /* First we try to get the variants from the pipeline cache */ struct v3dv_pipeline_key pipeline_key; pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo); unsigned char pipeline_sha1[20]; pipeline_hash_graphics(pipeline, &pipeline_key, pipeline_sha1); + bool cache_hit = false; + pipeline->shared_data = - v3dv_pipeline_cache_search_for_pipeline(cache, pipeline_sha1); + v3dv_pipeline_cache_search_for_pipeline(cache, + pipeline_sha1, + &cache_hit); if (pipeline->shared_data != NULL) { + /* A correct pipeline must have at least a VS and FS */ assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]); assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]); assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]); + assert(!pipeline->gs || + pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]); + assert(!pipeline->gs || + pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]); + + if (cache_hit && cache != &pipeline->device->default_pipeline_cache) + pipeline_feedback.flags |= + VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT; goto success; } - pipeline->shared_data = - v3dv_pipeline_shared_data_new_empty(pipeline_sha1, pipeline->device); - /* If not, we try to get the nir shaders (from the SPIR-V shader, or from - * the pipeline cache again) and compile. + if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) + return VK_PIPELINE_COMPILE_REQUIRED_EXT; + + /* Otherwise we try to get the NIR shaders (either from the original SPIR-V + * shader or the pipeline cache) and compile. */ + pipeline->shared_data = + v3dv_pipeline_shared_data_new_empty(pipeline_sha1, pipeline, true); + + pipeline->vs->feedback.flags |= + VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT; + if (pipeline->gs) + pipeline->gs->feedback.flags |= + VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT; + pipeline->fs->feedback.flags |= + VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT; + if (!pipeline->vs->nir) pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache); + if (pipeline->gs && !pipeline->gs->nir) + pipeline->gs->nir = pipeline_stage_get_nir(pipeline->gs, pipeline, cache); if (!pipeline->fs->nir) pipeline->fs->nir = pipeline_stage_get_nir(pipeline->fs, pipeline, cache); /* Linking + pipeline lowerings */ - link_shaders(pipeline->vs->nir, pipeline->fs->nir); + if (pipeline->gs) { + link_shaders(pipeline->gs->nir, pipeline->fs->nir); + link_shaders(pipeline->vs->nir, pipeline->gs->nir); + } else { + link_shaders(pipeline->vs->nir, pipeline->fs->nir); + } pipeline_lower_nir(pipeline, pipeline->fs, pipeline->layout); lower_fs_io(pipeline->fs->nir); + if (pipeline->gs) { + pipeline_lower_nir(pipeline, pipeline->gs, pipeline->layout); + lower_gs_io(pipeline->gs->nir); + } + pipeline_lower_nir(pipeline, pipeline->vs, pipeline->layout); lower_vs_io(pipeline->vs->nir); @@ -2029,6 +2575,16 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline, if (vk_result != VK_SUCCESS) return vk_result; + assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] && + !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]); + + if (pipeline->gs) { + vk_result = + pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo); + if (vk_result != VK_SUCCESS) + return vk_result; + } + assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] && !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]); @@ -2041,29 +2597,52 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline, v3dv_pipeline_cache_upload_pipeline(pipeline, cache); - /* As we got the variants in pipeline->shared_data, after compiling we - * don't need the pipeline_stages + success: + + pipeline_feedback.duration = os_time_get_nano() - pipeline_start; + write_creation_feedback(pipeline, + pCreateInfo->pNext, + &pipeline_feedback, + pCreateInfo->stageCount, + pCreateInfo->pStages); + + /* Since we have the variants in the pipeline shared data we can now free + * the pipeline stages. */ pipeline_free_stages(device, pipeline, pAllocator); - success: pipeline_check_spill_size(pipeline); - /* FIXME: values below are default when non-GS is available. Would need to - * provide real values if GS gets supported - */ + return compute_vpm_config(pipeline); +} + +static VkResult +compute_vpm_config(struct v3dv_pipeline *pipeline) +{ struct v3dv_shader_variant *vs_variant = pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; struct v3dv_shader_variant *vs_bin_variant = - pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]; - - pipeline->vpm_cfg_bin.As = 1; - pipeline->vpm_cfg_bin.Ve = 0; - pipeline->vpm_cfg_bin.Vc = vs_bin_variant->prog_data.vs->vcm_cache_size; - - pipeline->vpm_cfg.As = 1; - pipeline->vpm_cfg.Ve = 0; - pipeline->vpm_cfg.Vc = vs_variant->prog_data.vs->vcm_cache_size; + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; + struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs; + struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs; + + struct v3d_gs_prog_data *gs = NULL; + struct v3d_gs_prog_data *gs_bin = NULL; + if (pipeline->has_gs) { + struct v3dv_shader_variant *gs_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]; + struct v3dv_shader_variant *gs_bin_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]; + gs = gs_variant->prog_data.gs; + gs_bin = gs_bin_variant->prog_data.gs; + } + + if (!v3d_compute_vpm_config(&pipeline->device->devinfo, + vs_bin, vs, gs_bin, gs, + &pipeline->vpm_cfg_bin, + &pipeline->vpm_cfg)) { + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } return VK_SUCCESS; } @@ -2088,6 +2667,8 @@ v3dv_dynamic_state_mask(VkDynamicState state) return V3DV_DYNAMIC_DEPTH_BIAS; case VK_DYNAMIC_STATE_LINE_WIDTH: return V3DV_DYNAMIC_LINE_WIDTH; + case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT: + return V3DV_DYNAMIC_COLOR_WRITE_ENABLE; /* Depth bounds testing is not available in in V3D 4.2 so here we are just * ignoring this dynamic state. We are already asserting at pipeline creation @@ -2108,7 +2689,8 @@ pipeline_init_dynamic_state( const VkPipelineViewportStateCreateInfo *pViewportState, const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState, const VkPipelineColorBlendStateCreateInfo *pColorBlendState, - const VkPipelineRasterizationStateCreateInfo *pRasterizationState) + const VkPipelineRasterizationStateCreateInfo *pRasterizationState, + const VkPipelineColorWriteCreateInfoEXT *pColorWriteState) { pipeline->dynamic_state = default_dynamic_state; struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state; @@ -2184,310 +2766,13 @@ pipeline_init_dynamic_state( dynamic->line_width = pRasterizationState->lineWidth; } - pipeline->dynamic_state.mask = dynamic_states; -} - -static uint8_t -blend_factor(VkBlendFactor factor, bool dst_alpha_one, bool *needs_constants) -{ - switch (factor) { - case VK_BLEND_FACTOR_ZERO: - case VK_BLEND_FACTOR_ONE: - case VK_BLEND_FACTOR_SRC_COLOR: - case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: - case VK_BLEND_FACTOR_DST_COLOR: - case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: - case VK_BLEND_FACTOR_SRC_ALPHA: - case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: - case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: - return factor; - case VK_BLEND_FACTOR_CONSTANT_COLOR: - case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: - case VK_BLEND_FACTOR_CONSTANT_ALPHA: - case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: - *needs_constants = true; - return factor; - case VK_BLEND_FACTOR_DST_ALPHA: - return dst_alpha_one ? V3D_BLEND_FACTOR_ONE : - V3D_BLEND_FACTOR_DST_ALPHA; - case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: - return dst_alpha_one ? V3D_BLEND_FACTOR_ZERO : - V3D_BLEND_FACTOR_INV_DST_ALPHA; - case VK_BLEND_FACTOR_SRC1_COLOR: - case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: - case VK_BLEND_FACTOR_SRC1_ALPHA: - case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: - assert(!"Invalid blend factor: dual source blending not supported."); - default: - assert(!"Unknown blend factor."); + if (pColorWriteState && !(dynamic_states & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) { + dynamic->color_write_enable = 0; + for (uint32_t i = 0; i < pColorWriteState->attachmentCount; i++) + dynamic->color_write_enable |= pColorWriteState->pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0; } - /* Should be handled by the switch, added to avoid a "end of non-void - * function" error - */ - unreachable("Unknown blend factor."); -} - -static void -pack_blend(struct v3dv_pipeline *pipeline, - const VkPipelineColorBlendStateCreateInfo *cb_info) -{ - /* By default, we are not enabling blending and all color channel writes are - * enabled. Color write enables are independent of whether blending is - * enabled or not. - * - * Vulkan specifies color write masks so that bits set correspond to - * enabled channels. Our hardware does it the other way around. - */ - pipeline->blend.enables = 0; - pipeline->blend.color_write_masks = 0; /* All channels enabled */ - - if (!cb_info) - return; - - assert(pipeline->subpass); - if (pipeline->subpass->color_count == 0) - return; - - assert(pipeline->subpass->color_count == cb_info->attachmentCount); - - pipeline->blend.needs_color_constants = false; - uint32_t color_write_masks = 0; - for (uint32_t i = 0; i < pipeline->subpass->color_count; i++) { - const VkPipelineColorBlendAttachmentState *b_state = - &cb_info->pAttachments[i]; - - uint32_t attachment_idx = - pipeline->subpass->color_attachments[i].attachment; - if (attachment_idx == VK_ATTACHMENT_UNUSED) - continue; - - color_write_masks |= (~b_state->colorWriteMask & 0xf) << (4 * i); - - if (!b_state->blendEnable) - continue; - - VkAttachmentDescription *desc = - &pipeline->pass->attachments[attachment_idx].desc; - const struct v3dv_format *format = v3dv_get_format(desc->format); - bool dst_alpha_one = (format->swizzle[3] == PIPE_SWIZZLE_1); - - uint8_t rt_mask = 1 << i; - pipeline->blend.enables |= rt_mask; - - v3dv_pack(pipeline->blend.cfg[i], BLEND_CFG, config) { - config.render_target_mask = rt_mask; - - config.color_blend_mode = b_state->colorBlendOp; - config.color_blend_dst_factor = - blend_factor(b_state->dstColorBlendFactor, dst_alpha_one, - &pipeline->blend.needs_color_constants); - config.color_blend_src_factor = - blend_factor(b_state->srcColorBlendFactor, dst_alpha_one, - &pipeline->blend.needs_color_constants); - - config.alpha_blend_mode = b_state->alphaBlendOp; - config.alpha_blend_dst_factor = - blend_factor(b_state->dstAlphaBlendFactor, dst_alpha_one, - &pipeline->blend.needs_color_constants); - config.alpha_blend_src_factor = - blend_factor(b_state->srcAlphaBlendFactor, dst_alpha_one, - &pipeline->blend.needs_color_constants); - } - } - - pipeline->blend.color_write_masks = color_write_masks; -} - -/* This requires that pack_blend() had been called before so we can set - * the overall blend enable bit in the CFG_BITS packet. - */ -static void -pack_cfg_bits(struct v3dv_pipeline *pipeline, - const VkPipelineDepthStencilStateCreateInfo *ds_info, - const VkPipelineRasterizationStateCreateInfo *rs_info, - const VkPipelineMultisampleStateCreateInfo *ms_info) -{ - assert(sizeof(pipeline->cfg_bits) == cl_packet_length(CFG_BITS)); - - pipeline->msaa = - ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT; - - v3dv_pack(pipeline->cfg_bits, CFG_BITS, config) { - config.enable_forward_facing_primitive = - rs_info ? !(rs_info->cullMode & VK_CULL_MODE_FRONT_BIT) : false; - - config.enable_reverse_facing_primitive = - rs_info ? !(rs_info->cullMode & VK_CULL_MODE_BACK_BIT) : false; - - /* Seems like the hardware is backwards regarding this setting... */ - config.clockwise_primitives = - rs_info ? rs_info->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE : false; - - config.enable_depth_offset = rs_info ? rs_info->depthBiasEnable: false; - - /* This is required to pass line rasterization tests in CTS while - * exposing, at least, a minimum of 4-bits of subpixel precision - * (the minimum requirement). - */ - config.line_rasterization = 1; /* perp end caps */ - - if (rs_info && rs_info->polygonMode != VK_POLYGON_MODE_FILL) { - config.direct3d_wireframe_triangles_mode = true; - config.direct3d_point_fill_mode = - rs_info->polygonMode == VK_POLYGON_MODE_POINT; - } - - config.rasterizer_oversample_mode = pipeline->msaa ? 1 : 0; - - /* From the Vulkan spec: - * - * "Provoking Vertex: - * - * The vertex in a primitive from which flat shaded attribute - * values are taken. This is generally the “first” vertex in the - * primitive, and depends on the primitive topology." - * - * First vertex is the Direct3D style for provoking vertex. OpenGL uses - * the last vertex by default. - */ - config.direct3d_provoking_vertex = true; - - config.blend_enable = pipeline->blend.enables != 0; - - /* Disable depth/stencil if we don't have a D/S attachment */ - bool has_ds_attachment = - pipeline->subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED; - - if (ds_info && ds_info->depthTestEnable && has_ds_attachment) { - config.z_updates_enable = ds_info->depthWriteEnable; - config.depth_test_function = ds_info->depthCompareOp; - } else { - config.depth_test_function = VK_COMPARE_OP_ALWAYS; - } - - /* EZ state will be updated at draw time based on bound pipeline state */ - config.early_z_updates_enable = false; - config.early_z_enable = false; - - config.stencil_enable = - ds_info ? ds_info->stencilTestEnable && has_ds_attachment: false; - - pipeline->z_updates_enable = config.z_updates_enable; - }; -} - -static uint32_t -translate_stencil_op(enum pipe_stencil_op op) -{ - switch (op) { - case VK_STENCIL_OP_KEEP: - return V3D_STENCIL_OP_KEEP; - case VK_STENCIL_OP_ZERO: - return V3D_STENCIL_OP_ZERO; - case VK_STENCIL_OP_REPLACE: - return V3D_STENCIL_OP_REPLACE; - case VK_STENCIL_OP_INCREMENT_AND_CLAMP: - return V3D_STENCIL_OP_INCR; - case VK_STENCIL_OP_DECREMENT_AND_CLAMP: - return V3D_STENCIL_OP_DECR; - case VK_STENCIL_OP_INVERT: - return V3D_STENCIL_OP_INVERT; - case VK_STENCIL_OP_INCREMENT_AND_WRAP: - return V3D_STENCIL_OP_INCWRAP; - case VK_STENCIL_OP_DECREMENT_AND_WRAP: - return V3D_STENCIL_OP_DECWRAP; - default: - unreachable("bad stencil op"); - } -} - -static void -pack_single_stencil_cfg(struct v3dv_pipeline *pipeline, - uint8_t *stencil_cfg, - bool is_front, - bool is_back, - const VkStencilOpState *stencil_state) -{ - /* From the Vulkan spec: - * - * "Reference is an integer reference value that is used in the unsigned - * stencil comparison. The reference value used by stencil comparison - * must be within the range [0,2^s-1] , where s is the number of bits in - * the stencil framebuffer attachment, otherwise the reference value is - * considered undefined." - * - * In our case, 's' is always 8, so we clamp to that to prevent our packing - * functions to assert in debug mode if they see larger values. - * - * If we have dynamic state we need to make sure we set the corresponding - * state bits to 0, since cl_emit_with_prepacked ORs the new value with - * the old. - */ - const uint8_t write_mask = - pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK ? - 0 : stencil_state->writeMask & 0xff; - - const uint8_t compare_mask = - pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ? - 0 : stencil_state->compareMask & 0xff; - - const uint8_t reference = - pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ? - 0 : stencil_state->reference & 0xff; - - v3dv_pack(stencil_cfg, STENCIL_CFG, config) { - config.front_config = is_front; - config.back_config = is_back; - config.stencil_write_mask = write_mask; - config.stencil_test_mask = compare_mask; - config.stencil_test_function = stencil_state->compareOp; - config.stencil_pass_op = translate_stencil_op(stencil_state->passOp); - config.depth_test_fail_op = translate_stencil_op(stencil_state->depthFailOp); - config.stencil_test_fail_op = translate_stencil_op(stencil_state->failOp); - config.stencil_ref_value = reference; - } -} - -static void -pack_stencil_cfg(struct v3dv_pipeline *pipeline, - const VkPipelineDepthStencilStateCreateInfo *ds_info) -{ - assert(sizeof(pipeline->stencil_cfg) == 2 * cl_packet_length(STENCIL_CFG)); - - if (!ds_info || !ds_info->stencilTestEnable) - return; - - if (pipeline->subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED) - return; - - const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK | - V3DV_DYNAMIC_STENCIL_WRITE_MASK | - V3DV_DYNAMIC_STENCIL_REFERENCE; - - - /* If front != back or we have dynamic stencil state we can't emit a single - * packet for both faces. - */ - bool needs_front_and_back = false; - if ((pipeline->dynamic_state.mask & dynamic_stencil_states) || - memcmp(&ds_info->front, &ds_info->back, sizeof(ds_info->front))) - needs_front_and_back = true; - - /* If the front and back configurations are the same we can emit both with - * a single packet. - */ - pipeline->emit_stencil_cfg[0] = true; - if (!needs_front_and_back) { - pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0], - true, true, &ds_info->front); - } else { - pipeline->emit_stencil_cfg[1] = true; - pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0], - true, false, &ds_info->front); - pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[1], - false, true, &ds_info->back); - } + pipeline->dynamic_state.mask = dynamic_states; } static bool @@ -2532,25 +2817,25 @@ pipeline_set_ez_state(struct v3dv_pipeline *pipeline, const VkPipelineDepthStencilStateCreateInfo *ds_info) { if (!ds_info || !ds_info->depthTestEnable) { - pipeline->ez_state = VC5_EZ_DISABLED; + pipeline->ez_state = V3D_EZ_DISABLED; return; } switch (ds_info->depthCompareOp) { case VK_COMPARE_OP_LESS: case VK_COMPARE_OP_LESS_OR_EQUAL: - pipeline->ez_state = VC5_EZ_LT_LE; + pipeline->ez_state = V3D_EZ_LT_LE; break; case VK_COMPARE_OP_GREATER: case VK_COMPARE_OP_GREATER_OR_EQUAL: - pipeline->ez_state = VC5_EZ_GT_GE; + pipeline->ez_state = V3D_EZ_GT_GE; break; case VK_COMPARE_OP_NEVER: case VK_COMPARE_OP_EQUAL: - pipeline->ez_state = VC5_EZ_UNDECIDED; + pipeline->ez_state = V3D_EZ_UNDECIDED; break; default: - pipeline->ez_state = VC5_EZ_DISABLED; + pipeline->ez_state = V3D_EZ_DISABLED; break; } @@ -2558,220 +2843,10 @@ pipeline_set_ez_state(struct v3dv_pipeline *pipeline, if (ds_info->stencilTestEnable && (!stencil_op_is_no_op(&ds_info->front) || !stencil_op_is_no_op(&ds_info->back))) { - pipeline->ez_state = VC5_EZ_DISABLED; - } -} - -static void -pack_shader_state_record(struct v3dv_pipeline *pipeline) -{ - assert(sizeof(pipeline->shader_state_record) == - cl_packet_length(GL_SHADER_STATE_RECORD)); - - struct v3d_fs_prog_data *prog_data_fs = - pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs; - - struct v3d_vs_prog_data *prog_data_vs = - pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs; - - struct v3d_vs_prog_data *prog_data_vs_bin = - pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs; - - - /* Note: we are not packing addresses, as we need the job (see - * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this - * point as they depend on dynamic info that can be set after create the - * pipeline (like viewport), . Would need to be filled later, so we are - * doing a partial prepacking. - */ - v3dv_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD, shader) { - shader.enable_clipping = true; - - shader.point_size_in_shaded_vertex_data = - pipeline->topology == PIPE_PRIM_POINTS; - - /* Must be set if the shader modifies Z, discards, or modifies - * the sample mask. For any of these cases, the fragment - * shader needs to write the Z value (even just discards). - */ - shader.fragment_shader_does_z_writes = prog_data_fs->writes_z; - /* Set if the EZ test must be disabled (due to shader side - * effects and the early_z flag not being present in the - * shader). - */ - shader.turn_off_early_z_test = prog_data_fs->disable_ez; - - shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 = - prog_data_fs->uses_center_w; - - /* The description for gl_SampleID states that if a fragment shader reads - * it, then we should automatically activate per-sample shading. However, - * the Vulkan spec also states that if a framebuffer has no attachments: - * - * "The subpass continues to use the width, height, and layers of the - * framebuffer to define the dimensions of the rendering area, and the - * rasterizationSamples from each pipeline’s - * VkPipelineMultisampleStateCreateInfo to define the number of - * samples used in rasterization multisample rasterization." - * - * So in this scenario, if the pipeline doesn't enable multiple samples - * but the fragment shader accesses gl_SampleID we would be requested - * to do per-sample shading in single sample rasterization mode, which - * is pointless, so just disable it in that case. - */ - shader.enable_sample_rate_shading = - pipeline->sample_rate_shading || - (pipeline->msaa && prog_data_fs->force_per_sample_msaa); - - shader.any_shader_reads_hardware_written_primitive_id = false; - - shader.do_scoreboard_wait_on_first_thread_switch = - prog_data_fs->lock_scoreboard_on_first_thrsw; - shader.disable_implicit_point_line_varyings = - !prog_data_fs->uses_implicit_point_line_varyings; - - shader.number_of_varyings_in_fragment_shader = - prog_data_fs->num_inputs; - - shader.coordinate_shader_propagate_nans = true; - shader.vertex_shader_propagate_nans = true; - shader.fragment_shader_propagate_nans = true; - - /* Note: see previous note about adresses */ - /* shader.coordinate_shader_code_address */ - /* shader.vertex_shader_code_address */ - /* shader.fragment_shader_code_address */ - - /* FIXME: Use combined input/output size flag in the common case (also - * on v3d, see v3dx_draw). - */ - shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = - prog_data_vs_bin->separate_segments; - shader.vertex_shader_has_separate_input_and_output_vpm_blocks = - prog_data_vs->separate_segments; - - shader.coordinate_shader_input_vpm_segment_size = - prog_data_vs_bin->separate_segments ? - prog_data_vs_bin->vpm_input_size : 1; - shader.vertex_shader_input_vpm_segment_size = - prog_data_vs->separate_segments ? - prog_data_vs->vpm_input_size : 1; - - shader.coordinate_shader_output_vpm_segment_size = - prog_data_vs_bin->vpm_output_size; - shader.vertex_shader_output_vpm_segment_size = - prog_data_vs->vpm_output_size; - - /* Note: see previous note about adresses */ - /* shader.coordinate_shader_uniforms_address */ - /* shader.vertex_shader_uniforms_address */ - /* shader.fragment_shader_uniforms_address */ - - shader.min_coord_shader_input_segments_required_in_play = - pipeline->vpm_cfg_bin.As; - shader.min_vertex_shader_input_segments_required_in_play = - pipeline->vpm_cfg.As; - - shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size = - pipeline->vpm_cfg_bin.Ve; - shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size = - pipeline->vpm_cfg.Ve; - - shader.coordinate_shader_4_way_threadable = - prog_data_vs_bin->base.threads == 4; - shader.vertex_shader_4_way_threadable = - prog_data_vs->base.threads == 4; - shader.fragment_shader_4_way_threadable = - prog_data_fs->base.threads == 4; - - shader.coordinate_shader_start_in_final_thread_section = - prog_data_vs_bin->base.single_seg; - shader.vertex_shader_start_in_final_thread_section = - prog_data_vs->base.single_seg; - shader.fragment_shader_start_in_final_thread_section = - prog_data_fs->base.single_seg; - - shader.vertex_id_read_by_coordinate_shader = - prog_data_vs_bin->uses_vid; - shader.base_instance_id_read_by_coordinate_shader = - prog_data_vs_bin->uses_biid; - shader.instance_id_read_by_coordinate_shader = - prog_data_vs_bin->uses_iid; - shader.vertex_id_read_by_vertex_shader = - prog_data_vs->uses_vid; - shader.base_instance_id_read_by_vertex_shader = - prog_data_vs->uses_biid; - shader.instance_id_read_by_vertex_shader = - prog_data_vs->uses_iid; - - /* Note: see previous note about adresses */ - /* shader.address_of_default_attribute_values */ - } -} - -static void -pack_vcm_cache_size(struct v3dv_pipeline *pipeline) -{ - assert(sizeof(pipeline->vcm_cache_size) == - cl_packet_length(VCM_CACHE_SIZE)); - - v3dv_pack(pipeline->vcm_cache_size, VCM_CACHE_SIZE, vcm) { - vcm.number_of_16_vertex_batches_for_binning = pipeline->vpm_cfg_bin.Vc; - vcm.number_of_16_vertex_batches_for_rendering = pipeline->vpm_cfg.Vc; + pipeline->ez_state = V3D_EZ_DISABLED; } } -/* As defined on the GL_SHADER_STATE_ATTRIBUTE_RECORD */ -static uint8_t -get_attr_type(const struct util_format_description *desc) -{ - uint32_t r_size = desc->channel[0].size; - uint8_t attr_type = ATTRIBUTE_FLOAT; - - switch (desc->channel[0].type) { - case UTIL_FORMAT_TYPE_FLOAT: - if (r_size == 32) { - attr_type = ATTRIBUTE_FLOAT; - } else { - assert(r_size == 16); - attr_type = ATTRIBUTE_HALF_FLOAT; - } - break; - - case UTIL_FORMAT_TYPE_SIGNED: - case UTIL_FORMAT_TYPE_UNSIGNED: - switch (r_size) { - case 32: - attr_type = ATTRIBUTE_INT; - break; - case 16: - attr_type = ATTRIBUTE_SHORT; - break; - case 10: - attr_type = ATTRIBUTE_INT2_10_10_10; - break; - case 8: - attr_type = ATTRIBUTE_BYTE; - break; - default: - fprintf(stderr, - "format %s unsupported\n", - desc->name); - attr_type = ATTRIBUTE_BYTE; - abort(); - } - break; - - default: - fprintf(stderr, - "format %s unsupported\n", - desc->name); - abort(); - } - - return attr_type; -} - static bool pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline) { @@ -2829,36 +2904,6 @@ v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device, } static void -pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline, - uint32_t index, - const VkVertexInputAttributeDescription *vi_desc) -{ - const uint32_t packet_length = - cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD); - - const struct util_format_description *desc = - vk_format_description(vi_desc->format); - - uint32_t binding = vi_desc->binding; - - v3dv_pack(&pipeline->vertex_attrs[index * packet_length], - GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) { - - /* vec_size == 0 means 4 */ - attr.vec_size = desc->nr_channels & 3; - attr.signed_int_type = (desc->channel[0].type == - UTIL_FORMAT_TYPE_SIGNED); - attr.normalized_int_type = desc->channel[0].normalized; - attr.read_as_int_uint = desc->channel[0].pure_integer; - - attr.instance_divisor = MIN2(pipeline->vb[binding].instance_divisor, - 0xffff); - attr.stride = pipeline->vb[binding].stride; - attr.type = get_attr_type(desc); - } -} - -static void pipeline_set_sample_mask(struct v3dv_pipeline *pipeline, const VkPipelineMultisampleStateCreateInfo *ms_info) { @@ -2920,24 +2965,35 @@ pipeline_init(struct v3dv_pipeline *pipeline, const VkPipelineRasterizationStateCreateInfo *rs_info = raster_enabled ? pCreateInfo->pRasterizationState : NULL; + const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info = + rs_info ? vk_find_struct_const( + rs_info->pNext, + PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) : + NULL; + const VkPipelineColorBlendStateCreateInfo *cb_info = raster_enabled ? pCreateInfo->pColorBlendState : NULL; const VkPipelineMultisampleStateCreateInfo *ms_info = raster_enabled ? pCreateInfo->pMultisampleState : NULL; + const VkPipelineColorWriteCreateInfoEXT *cw_info = + cb_info ? vk_find_struct_const(cb_info->pNext, + PIPELINE_COLOR_WRITE_CREATE_INFO_EXT) : + NULL; + pipeline_init_dynamic_state(pipeline, pCreateInfo->pDynamicState, - vp_info, ds_info, cb_info, rs_info); + vp_info, ds_info, cb_info, rs_info, cw_info); /* V3D 4.2 doesn't support depth bounds testing so we don't advertise that * feature and it shouldn't be used by any pipeline. */ assert(!ds_info || !ds_info->depthBoundsTestEnable); - pack_blend(pipeline, cb_info); - pack_cfg_bits(pipeline, ds_info, rs_info, ms_info); - pack_stencil_cfg(pipeline, ds_info); + v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info, + rs_info, pv_info, ms_info); + pipeline_set_ez_state(pipeline, ds_info); enable_depth_bias(pipeline, rs_info); pipeline_set_sample_mask(pipeline, ms_info); @@ -2955,49 +3011,14 @@ pipeline_init(struct v3dv_pipeline *pipeline, return result; } - pack_shader_state_record(pipeline); - pack_vcm_cache_size(pipeline); - const VkPipelineVertexInputStateCreateInfo *vi_info = pCreateInfo->pVertexInputState; - pipeline->vb_count = vi_info->vertexBindingDescriptionCount; - for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { - const VkVertexInputBindingDescription *desc = - &vi_info->pVertexBindingDescriptions[i]; - - pipeline->vb[desc->binding].stride = desc->stride; - pipeline->vb[desc->binding].instance_divisor = desc->inputRate; - } - - pipeline->va_count = 0; - struct v3d_vs_prog_data *prog_data_vs = - pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs; - - for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { - const VkVertexInputAttributeDescription *desc = - &vi_info->pVertexAttributeDescriptions[i]; - uint32_t location = desc->location + VERT_ATTRIB_GENERIC0; - - /* We use a custom driver_location_map instead of - * nir_find_variable_with_location because if we were able to get the - * shader variant from the cache, we would not have the nir shader - * available. - */ - uint32_t driver_location = - prog_data_vs->driver_location_map[location]; - - if (driver_location != -1) { - assert(driver_location < MAX_VERTEX_ATTRIBS); - pipeline->va[driver_location].offset = desc->offset; - pipeline->va[driver_location].binding = desc->binding; - pipeline->va[driver_location].vk_format = desc->format; - - pack_shader_state_attribute_record(pipeline, driver_location, desc); + const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info = + vk_find_struct_const(vi_info->pNext, + PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT); - pipeline->va_count++; - } - } + v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info); if (pipeline_has_integer_vertex_attrib(pipeline)) { pipeline->default_attribute_values = @@ -3032,7 +3053,7 @@ graphics_pipeline_create(VkDevice _device, VK_OBJECT_TYPE_PIPELINE); if (pipeline == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); result = pipeline_init(pipeline, device, cache, pCreateInfo, @@ -3040,6 +3061,8 @@ graphics_pipeline_create(VkDevice _device, if (result != VK_SUCCESS) { v3dv_destroy_pipeline(pipeline, device, pAllocator); + if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT) + *pPipeline = VK_NULL_HANDLE; return result; } @@ -3048,7 +3071,7 @@ graphics_pipeline_create(VkDevice _device, return VK_SUCCESS; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, @@ -3062,7 +3085,8 @@ v3dv_CreateGraphicsPipelines(VkDevice _device, if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS)) mtx_lock(&device->pdevice->mutex); - for (uint32_t i = 0; i < count; i++) { + uint32_t i = 0; + for (; i < count; i++) { VkResult local_result; local_result = graphics_pipeline_create(_device, @@ -3074,9 +3098,16 @@ v3dv_CreateGraphicsPipelines(VkDevice _device, if (local_result != VK_SUCCESS) { result = local_result; pPipelines[i] = VK_NULL_HANDLE; + + if (pCreateInfos[i].flags & + VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT) + break; } } + for (; i < count; i++) + pPipelines[i] = VK_NULL_HANDLE; + if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS)) mtx_unlock(&device->pdevice->mutex); @@ -3110,6 +3141,11 @@ pipeline_compile_compute(struct v3dv_pipeline *pipeline, const VkComputePipelineCreateInfo *info, const VkAllocationCallbacks *alloc) { + VkPipelineCreationFeedbackEXT pipeline_feedback = { + .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT, + }; + int64_t pipeline_start = os_time_get_nano(); + struct v3dv_device *device = pipeline->device; struct v3dv_physical_device *physical_device = &device->instance->physicalDevice; @@ -3129,6 +3165,7 @@ pipeline_compile_compute(struct v3dv_pipeline *pipeline, p_stage->entrypoint = sinfo->pName; p_stage->module = vk_shader_module_from_handle(sinfo->module); p_stage->spec_info = sinfo->pSpecializationInfo; + p_stage->feedback = (VkPipelineCreationFeedbackEXT) { 0 }; pipeline_hash_shader(p_stage->module, p_stage->entrypoint, @@ -3147,16 +3184,27 @@ pipeline_compile_compute(struct v3dv_pipeline *pipeline, unsigned char pipeline_sha1[20]; pipeline_hash_compute(pipeline, &pipeline_key, pipeline_sha1); + bool cache_hit = false; pipeline->shared_data = - v3dv_pipeline_cache_search_for_pipeline(cache, pipeline_sha1); + v3dv_pipeline_cache_search_for_pipeline(cache, pipeline_sha1, &cache_hit); if (pipeline->shared_data != NULL) { assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]); + if (cache_hit && cache != &pipeline->device->default_pipeline_cache) + pipeline_feedback.flags |= + VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT; + goto success; } + if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) + return VK_PIPELINE_COMPILE_REQUIRED_EXT; + pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline_sha1, - pipeline->device); + pipeline, + false); + + p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT; /* If not found on cache, compile it */ p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache); @@ -3183,12 +3231,21 @@ pipeline_compile_compute(struct v3dv_pipeline *pipeline, return VK_ERROR_OUT_OF_DEVICE_MEMORY; v3dv_pipeline_cache_upload_pipeline(pipeline, cache); + +success: + + pipeline_feedback.duration = os_time_get_nano() - pipeline_start; + write_creation_feedback(pipeline, + info->pNext, + &pipeline_feedback, + 1, + &info->stage); + /* As we got the variants in pipeline->shared_data, after compiling we * don't need the pipeline_stages */ pipeline_free_stages(device, pipeline, alloc); - success: pipeline_check_spill_size(pipeline); return VK_SUCCESS; @@ -3231,12 +3288,14 @@ compute_pipeline_create(VkDevice _device, pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline), VK_OBJECT_TYPE_PIPELINE); if (pipeline == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); result = compute_pipeline_init(pipeline, device, cache, pCreateInfo, pAllocator); if (result != VK_SUCCESS) { v3dv_destroy_pipeline(pipeline, device, pAllocator); + if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT) + *pPipeline = VK_NULL_HANDLE; return result; } @@ -3245,7 +3304,7 @@ compute_pipeline_create(VkDevice _device, return VK_SUCCESS; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t createInfoCount, @@ -3259,7 +3318,8 @@ v3dv_CreateComputePipelines(VkDevice _device, if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS)) mtx_lock(&device->pdevice->mutex); - for (uint32_t i = 0; i < createInfoCount; i++) { + uint32_t i = 0; + for (; i < createInfoCount; i++) { VkResult local_result; local_result = compute_pipeline_create(_device, pipelineCache, @@ -3270,9 +3330,16 @@ v3dv_CreateComputePipelines(VkDevice _device, if (local_result != VK_SUCCESS) { result = local_result; pPipelines[i] = VK_NULL_HANDLE; + + if (pCreateInfos[i].flags & + VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT) + break; } } + for (; i < createInfoCount; i++) + pPipelines[i] = VK_NULL_HANDLE; + if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS)) mtx_unlock(&device->pdevice->mutex); diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_pipeline_cache.c b/lib/mesa/src/broadcom/vulkan/v3dv_pipeline_cache.c index 7d1d11485..c19eecc42 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_pipeline_cache.c +++ b/lib/mesa/src/broadcom/vulkan/v3dv_pipeline_cache.c @@ -63,6 +63,20 @@ cache_dump_stats(struct v3dv_pipeline_cache *cache) fprintf(stderr, " cache hit count: %d\n", cache->stats.hit); } +static void +pipeline_cache_lock(struct v3dv_pipeline_cache *cache) +{ + if (!cache->externally_synchronized) + pthread_mutex_lock(&cache->mutex); +} + +static void +pipeline_cache_unlock(struct v3dv_pipeline_cache *cache) +{ + if (!cache->externally_synchronized) + pthread_mutex_unlock(&cache->mutex); +} + void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline, struct v3dv_pipeline_cache *cache, @@ -75,10 +89,10 @@ v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline, if (cache->nir_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES) return; - pthread_mutex_lock(&cache->mutex); + pipeline_cache_lock(cache); struct hash_entry *entry = _mesa_hash_table_search(cache->nir_cache, sha1_key); - pthread_mutex_unlock(&cache->mutex); + pipeline_cache_unlock(cache); if (entry) return; @@ -91,7 +105,7 @@ v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline, return; } - pthread_mutex_lock(&cache->mutex); + pipeline_cache_lock(cache); /* Because ralloc isn't thread-safe, we have to do all this inside the * lock. We could unlock for the big memcpy but it's probably not worth * the hassle. @@ -99,7 +113,7 @@ v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline, entry = _mesa_hash_table_search(cache->nir_cache, sha1_key); if (entry) { blob_finish(&blob); - pthread_mutex_unlock(&cache->mutex); + pipeline_cache_unlock(cache); return; } @@ -122,7 +136,7 @@ v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline, _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir); - pthread_mutex_unlock(&cache->mutex); + pipeline_cache_unlock(cache); } nir_shader* @@ -143,12 +157,12 @@ v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline, const struct serialized_nir *snir = NULL; - pthread_mutex_lock(&cache->mutex); + pipeline_cache_lock(cache); struct hash_entry *entry = _mesa_hash_table_search(cache->nir_cache, sha1_key); if (entry) snir = entry->data; - pthread_mutex_unlock(&cache->mutex); + pipeline_cache_unlock(cache); if (snir) { struct blob_reader blob; @@ -185,6 +199,7 @@ v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline, void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache, struct v3dv_device *device, + VkPipelineCacheCreateFlags flags, bool cache_enabled) { cache->device = device; @@ -202,6 +217,9 @@ v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache, cache->stats.miss = 0; cache->stats.hit = 0; cache->stats.count = 0; + + cache->externally_synchronized = flags & + VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT; } else { cache->nir_cache = NULL; cache->cache = NULL; @@ -229,7 +247,8 @@ v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data * */ struct v3dv_pipeline_shared_data * v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache, - unsigned char sha1_key[20]) + unsigned char sha1_key[20], + bool *cache_hit) { if (!cache || !cache->cache) return NULL; @@ -241,7 +260,7 @@ v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache, fprintf(stderr, "pipeline cache %p, search pipeline with key %s\n", cache, sha1buf); } - pthread_mutex_lock(&cache->mutex); + pipeline_cache_lock(cache); struct hash_entry *entry = _mesa_hash_table_search(cache->cache, sha1_key); @@ -252,6 +271,7 @@ v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache, assert(cache_entry); cache->stats.hit++; + *cache_hit = true; if (debug_cache) { fprintf(stderr, "\tcache hit: %p\n", cache_entry); if (dump_stats) @@ -261,7 +281,7 @@ v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache, v3dv_pipeline_shared_data_ref(cache_entry); - pthread_mutex_unlock(&cache->mutex); + pipeline_cache_unlock(cache); return cache_entry; } @@ -273,7 +293,7 @@ v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache, cache_dump_stats(cache); } - pthread_mutex_unlock(&cache->mutex); + pipeline_cache_unlock(cache); #ifdef ENABLE_SHADER_CACHE struct v3dv_device *device = cache->device; @@ -324,6 +344,14 @@ v3dv_pipeline_shared_data_destroy(struct v3dv_device *device, for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { if (shared_data->variants[stage] != NULL) v3dv_shader_variant_destroy(device, shared_data->variants[stage]); + + /* We don't free binning descriptor maps as we are sharing them + * with the render shaders. + */ + if (shared_data->maps[stage] != NULL && + !broadcom_shader_stage_is_binning(stage)) { + vk_free(&device->vk.alloc, shared_data->maps[stage]); + } } if (shared_data->assembly_bo) @@ -335,11 +363,8 @@ v3dv_pipeline_shared_data_destroy(struct v3dv_device *device, static struct v3dv_pipeline_shared_data * v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache *cache, const unsigned char sha1_key[20], + struct v3dv_descriptor_maps **maps, struct v3dv_shader_variant **variants, - const struct v3dv_descriptor_map *ubo_map, - const struct v3dv_descriptor_map *ssbo_map, - const struct v3dv_descriptor_map *sampler_map, - const struct v3dv_descriptor_map *texture_map, const uint64_t *total_assembly, const uint32_t total_assembly_size) { @@ -359,13 +384,10 @@ v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache *cache, new_entry->ref_cnt = 1; memcpy(new_entry->sha1_key, sha1_key, 20); - memcpy(&new_entry->ubo_map, ubo_map, sizeof(struct v3dv_descriptor_map)); - memcpy(&new_entry->ssbo_map, ssbo_map, sizeof(struct v3dv_descriptor_map)); - memcpy(&new_entry->sampler_map, sampler_map, sizeof(struct v3dv_descriptor_map)); - memcpy(&new_entry->texture_map, texture_map, sizeof(struct v3dv_descriptor_map)); - - for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) + for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { + new_entry->maps[stage] = maps[stage]; new_entry->variants[stage] = variants[stage]; + } struct v3dv_bo *bo = v3dv_bo_alloc(cache->device, total_assembly_size, "pipeline shader assembly", true); @@ -402,12 +424,12 @@ pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache, if (cache->stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES) return; - pthread_mutex_lock(&cache->mutex); + pipeline_cache_lock(cache); struct hash_entry *entry = _mesa_hash_table_search(cache->cache, shared_data->sha1_key); if (entry) { - pthread_mutex_unlock(&cache->mutex); + pipeline_cache_unlock(cache); return; } @@ -424,7 +446,7 @@ pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache, cache_dump_stats(cache); } - pthread_mutex_unlock(&cache->mutex); + pipeline_cache_unlock(cache); #ifdef ENABLE_SHADER_CACHE /* If we are being called from a on-disk-cache hit, we can skip writing to @@ -490,7 +512,7 @@ shader_variant_create_from_blob(struct v3dv_device *device, { VkResult result; - broadcom_shader_stage stage = blob_read_uint32(blob); + enum broadcom_shader_stage stage = blob_read_uint32(blob); uint32_t prog_data_size = blob_read_uint32(blob); /* FIXME: as we include the stage perhaps we can avoid prog_data_size? */ @@ -541,17 +563,32 @@ v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache, { const unsigned char *sha1_key = blob_read_bytes(blob, 20); - const struct v3dv_descriptor_map *ubo_map = - blob_read_bytes(blob, sizeof(struct v3dv_descriptor_map)); - const struct v3dv_descriptor_map *ssbo_map = - blob_read_bytes(blob, sizeof(struct v3dv_descriptor_map)); - const struct v3dv_descriptor_map *sampler_map = - blob_read_bytes(blob, sizeof(struct v3dv_descriptor_map)); - const struct v3dv_descriptor_map *texture_map = - blob_read_bytes(blob, sizeof(struct v3dv_descriptor_map)); + struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES] = { 0 }; - if (blob->overrun) - return NULL; + uint8_t descriptor_maps_count = blob_read_uint8(blob); + for (uint8_t count = 0; count < descriptor_maps_count; count++) { + uint8_t stage = blob_read_uint8(blob); + + const struct v3dv_descriptor_maps *current_maps = + blob_read_bytes(blob, sizeof(struct v3dv_descriptor_maps)); + + if (blob->overrun) + return NULL; + + maps[stage] = vk_zalloc2(&cache->device->vk.alloc, NULL, + sizeof(struct v3dv_descriptor_maps), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (maps[stage] == NULL) + return NULL; + + memcpy(maps[stage], current_maps, sizeof(struct v3dv_descriptor_maps)); + if (broadcom_shader_stage_is_render_with_binning(stage)) { + enum broadcom_shader_stage bin_stage = + broadcom_binning_shader_stage_for_render_stage(stage); + maps[bin_stage] = maps[stage]; + } + } uint8_t variant_count = blob_read_uint8(blob); @@ -571,8 +608,7 @@ v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache, if (blob->overrun) return NULL; - return v3dv_pipeline_shared_data_new(cache, sha1_key, variants, - ubo_map, ssbo_map, sampler_map, texture_map, + return v3dv_pipeline_shared_data_new(cache, sha1_key, maps, variants, total_assembly, total_assembly_size); } @@ -643,7 +679,7 @@ pipeline_cache_load(struct v3dv_pipeline_cache *cache, } } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreatePipelineCache(VkDevice _device, const VkPipelineCacheCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, @@ -653,16 +689,15 @@ v3dv_CreatePipelineCache(VkDevice _device, struct v3dv_pipeline_cache *cache; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); - assert(pCreateInfo->flags == 0); cache = vk_object_zalloc(&device->vk, pAllocator, sizeof(*cache), VK_OBJECT_TYPE_PIPELINE_CACHE); if (cache == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - v3dv_pipeline_cache_init(cache, device, + v3dv_pipeline_cache_init(cache, device, pCreateInfo->flags, device->instance->pipeline_cache_enabled); if (pCreateInfo->initialDataSize > 0) { @@ -702,7 +737,7 @@ v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache) } } -void +VKAPI_ATTR void VKAPI_CALL v3dv_DestroyPipelineCache(VkDevice _device, VkPipelineCache _cache, const VkAllocationCallbacks *pAllocator) @@ -718,7 +753,7 @@ v3dv_DestroyPipelineCache(VkDevice _device, vk_object_free(&device->vk, pAllocator, cache); } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_MergePipelineCaches(VkDevice device, VkPipelineCache dstCache, uint32_t srcCacheCount, @@ -820,14 +855,33 @@ v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data * { blob_write_bytes(blob, cache_entry->sha1_key, 20); - blob_write_bytes(blob, &cache_entry->ubo_map, - sizeof(struct v3dv_descriptor_map)); - blob_write_bytes(blob, &cache_entry->ssbo_map, - sizeof(struct v3dv_descriptor_map)); - blob_write_bytes(blob, &cache_entry->sampler_map, - sizeof(struct v3dv_descriptor_map)); - blob_write_bytes(blob, &cache_entry->texture_map, - sizeof(struct v3dv_descriptor_map)); + uint8_t descriptor_maps_count = 0; + for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { + if (broadcom_shader_stage_is_binning(stage)) + continue; + if (cache_entry->maps[stage] == NULL) + continue; + descriptor_maps_count++; + } + + /* Compute pipelines only have one descriptor map, + * graphics pipelines may have 2 (VS+FS) or 3 (VS+GS+FS), since the binning + * stages take the descriptor map from the render stage. + */ + assert((descriptor_maps_count >= 2 && descriptor_maps_count <= 3) || + (descriptor_maps_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE])); + blob_write_uint8(blob, descriptor_maps_count); + + for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { + if (cache_entry->maps[stage] == NULL) + continue; + if (broadcom_shader_stage_is_binning(stage)) + continue; + + blob_write_uint8(blob, stage); + blob_write_bytes(blob, cache_entry->maps[stage], + sizeof(struct v3dv_descriptor_maps)); + } uint8_t variant_count = 0; for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { @@ -836,10 +890,10 @@ v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data * variant_count++; } - /* Right now we only support compute pipeline, or graphics pipeline with - * vertex, vertex bin, and fragment shader. + /* Graphics pipelines with VS+FS have 3 variants, VS+GS+FS will have 5 and + * compute pipelines only have 1. */ - assert(variant_count == 3 || + assert((variant_count == 5 || variant_count == 3) || (variant_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE])); blob_write_uint8(blob, variant_count); @@ -864,7 +918,7 @@ v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data * } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_GetPipelineCacheData(VkDevice _device, VkPipelineCache _cache, size_t *pDataSize, @@ -881,9 +935,9 @@ v3dv_GetPipelineCacheData(VkDevice _device, } struct v3dv_physical_device *pdevice = &device->instance->physicalDevice; - VkResult result = VK_SUCCESS; + VkResult result = VK_INCOMPLETE; - pthread_mutex_lock(&cache->mutex); + pipeline_cache_lock(cache); struct vk_pipeline_cache_header header = { .header_size = sizeof(struct vk_pipeline_cache_header), @@ -898,9 +952,7 @@ v3dv_GetPipelineCacheData(VkDevice _device, intptr_t nir_count_offset = blob_reserve_uint32(&blob); if (nir_count_offset < 0) { *pDataSize = 0; - blob_finish(&blob); - pthread_mutex_unlock(&cache->mutex); - return VK_INCOMPLETE; + goto done; } if (cache->nir_cache) { @@ -915,9 +967,7 @@ v3dv_GetPipelineCacheData(VkDevice _device, if (blob.out_of_memory) { blob.size = save_size; - pthread_mutex_unlock(&cache->mutex); - result = VK_INCOMPLETE; - break; + goto done; } nir_count++; @@ -929,9 +979,7 @@ v3dv_GetPipelineCacheData(VkDevice _device, intptr_t count_offset = blob_reserve_uint32(&blob); if (count_offset < 0) { *pDataSize = 0; - blob_finish(&blob); - pthread_mutex_unlock(&cache->mutex); - return VK_INCOMPLETE; + goto done; } if (cache->cache) { @@ -942,9 +990,7 @@ v3dv_GetPipelineCacheData(VkDevice _device, if (!v3dv_pipeline_shared_data_write_to_blob(cache_entry, &blob)) { /* If it fails reset to the previous size and bail */ blob.size = save_size; - pthread_mutex_unlock(&cache->mutex); - result = VK_INCOMPLETE; - break; + goto done; } count++; @@ -955,7 +1001,7 @@ v3dv_GetPipelineCacheData(VkDevice _device, *pDataSize = blob.size; - blob_finish(&blob); + result = VK_SUCCESS; if (debug_cache) { assert(count <= cache->stats.count); @@ -965,7 +1011,10 @@ v3dv_GetPipelineCacheData(VkDevice _device, cache, nir_count, count, (uint32_t) *pDataSize); } - pthread_mutex_unlock(&cache->mutex); + done: + blob_finish(&blob); + + pipeline_cache_unlock(cache); return result; } diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_private.h b/lib/mesa/src/broadcom/vulkan/v3dv_private.h index 36ecba130..d3c07c649 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_private.h +++ b/lib/mesa/src/broadcom/vulkan/v3dv_private.h @@ -38,10 +38,15 @@ #include "vk_device.h" #include "vk_instance.h" +#include "vk_image.h" +#include "vk_log.h" #include "vk_physical_device.h" #include "vk_shader_module.h" #include "vk_util.h" +#include "vk_command_buffer.h" +#include "vk_queue.h" + #include <xf86drm.h> #ifdef HAVE_VALGRIND @@ -56,6 +61,8 @@ #include "common/v3d_device_info.h" #include "common/v3d_limits.h" +#include "common/v3d_tiling.h" +#include "common/v3d_util.h" #include "compiler/shader_enums.h" #include "compiler/spirv/nir_spirv.h" @@ -69,36 +76,17 @@ #include "u_atomic.h" #include "v3dv_entrypoints.h" -#include "v3dv_extensions.h" #include "v3dv_bo.h" #include "drm-uapi/v3d_drm.h" -/* FIXME: hooks for the packet definition functions. */ -static inline void -pack_emit_reloc(void *cl, const void *reloc) {} - -#define __gen_user_data struct v3dv_cl -#define __gen_address_type struct v3dv_cl_reloc -#define __gen_address_offset(reloc) (((reloc)->bo ? (reloc)->bo->offset : 0) + \ - (reloc)->offset) -#define __gen_emit_reloc cl_pack_emit_reloc -#define __gen_unpack_address(cl, s, e) __unpack_address(cl, s, e) -#include "v3dv_cl.h" - #include "vk_alloc.h" #include "simulator/v3d_simulator.h" +#include "v3dv_cl.h" -/* FIXME: pipe_box from Gallium. Needed for some v3d_tiling.c functions. - * In the future we might want to drop that depedency, but for now it is - * good enough. - */ -#include "util/u_box.h" #include "wsi_common.h" -#include "broadcom/cle/v3dx_pack.h" - /* A non-fatal assert. Useful for debugging. */ #ifdef DEBUG #define v3dv_assert(x) ({ \ @@ -124,6 +112,9 @@ struct v3dv_instance; struct v3d_simulator_file; +/* Minimum required by the Vulkan 1.1 spec */ +#define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30) + struct v3dv_physical_device { struct vk_physical_device vk; @@ -132,6 +123,15 @@ struct v3dv_physical_device { int32_t display_fd; int32_t master_fd; + /* We need these because it is not clear how to detect + * valid devids in a portable way + */ + bool has_primary; + bool has_render; + + dev_t primary_devid; + dev_t render_devid; + uint8_t driver_build_sha1[20]; uint8_t pipeline_cache_uuid[VK_UUID_SIZE]; uint8_t device_uuid[VK_UUID_SIZE]; @@ -163,6 +163,8 @@ VkResult v3dv_physical_device_acquire_display(struct v3dv_instance *instance, VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device); void v3dv_wsi_finish(struct v3dv_physical_device *physical_device); +struct v3dv_image *v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain, + uint32_t index); void v3dv_meta_clear_init(struct v3dv_device *device); void v3dv_meta_clear_finish(struct v3dv_device *device); @@ -173,6 +175,10 @@ void v3dv_meta_blit_finish(struct v3dv_device *device); void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device); void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device); +bool v3dv_meta_can_use_tlb(struct v3dv_image *image, + const VkOffset3D *offset, + VkFormat *compat_format); + struct v3dv_instance { struct vk_instance vk; @@ -214,10 +220,9 @@ struct v3dv_queue_submit_wait_info { }; struct v3dv_queue { - struct vk_object_base base; + struct vk_queue vk; struct v3dv_device *device; - VkDeviceQueueCreateFlags flags; /* A list of active v3dv_queue_submit_wait_info */ struct list_head submit_wait_list; @@ -229,7 +234,7 @@ struct v3dv_queue { }; #define V3DV_META_BLIT_CACHE_KEY_SIZE (4 * sizeof(uint32_t)) -#define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (2 * sizeof(uint32_t) + \ +#define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \ sizeof(VkComponentMapping)) struct v3dv_meta_color_clear_pipeline { @@ -273,6 +278,7 @@ struct v3dv_pipeline_key { } color_fmt[V3D_MAX_DRAW_BUFFERS]; uint8_t f32_color_rb; uint32_t va_swap_rb_mask; + bool has_multiview; }; struct v3dv_pipeline_cache_stats { @@ -285,22 +291,26 @@ struct v3dv_pipeline_cache_stats { * * FIXME: perhaps move to common */ -typedef enum { +enum broadcom_shader_stage { BROADCOM_SHADER_VERTEX, BROADCOM_SHADER_VERTEX_BIN, + BROADCOM_SHADER_GEOMETRY, + BROADCOM_SHADER_GEOMETRY_BIN, BROADCOM_SHADER_FRAGMENT, BROADCOM_SHADER_COMPUTE, -} broadcom_shader_stage; +}; #define BROADCOM_SHADER_STAGES (BROADCOM_SHADER_COMPUTE + 1) /* Assumes that coordinate shaders will be custom-handled by the caller */ -static inline broadcom_shader_stage +static inline enum broadcom_shader_stage gl_shader_stage_to_broadcom(gl_shader_stage stage) { switch (stage) { case MESA_SHADER_VERTEX: return BROADCOM_SHADER_VERTEX; + case MESA_SHADER_GEOMETRY: + return BROADCOM_SHADER_GEOMETRY; case MESA_SHADER_FRAGMENT: return BROADCOM_SHADER_FRAGMENT; case MESA_SHADER_COMPUTE: @@ -311,12 +321,15 @@ gl_shader_stage_to_broadcom(gl_shader_stage stage) } static inline gl_shader_stage -broadcom_shader_stage_to_gl(broadcom_shader_stage stage) +broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage) { switch (stage) { case BROADCOM_SHADER_VERTEX: case BROADCOM_SHADER_VERTEX_BIN: return MESA_SHADER_VERTEX; + case BROADCOM_SHADER_GEOMETRY: + case BROADCOM_SHADER_GEOMETRY_BIN: + return MESA_SHADER_GEOMETRY; case BROADCOM_SHADER_FRAGMENT: return MESA_SHADER_FRAGMENT; case BROADCOM_SHADER_COMPUTE: @@ -326,6 +339,56 @@ broadcom_shader_stage_to_gl(broadcom_shader_stage stage) } } +static inline bool +broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage) +{ + switch (stage) { + case BROADCOM_SHADER_VERTEX_BIN: + case BROADCOM_SHADER_GEOMETRY_BIN: + return true; + default: + return false; + } +} + +static inline bool +broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage) +{ + switch (stage) { + case BROADCOM_SHADER_VERTEX: + case BROADCOM_SHADER_GEOMETRY: + return true; + default: + return false; + } +} + +static inline enum broadcom_shader_stage +broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage) +{ + switch (stage) { + case BROADCOM_SHADER_VERTEX: + return BROADCOM_SHADER_VERTEX_BIN; + case BROADCOM_SHADER_GEOMETRY: + return BROADCOM_SHADER_GEOMETRY_BIN; + default: + unreachable("Invalid shader stage"); + } +} + +static inline const char * +broadcom_shader_stage_name(enum broadcom_shader_stage stage) +{ + switch(stage) { + case BROADCOM_SHADER_VERTEX_BIN: + return "MESA_SHADER_VERTEX_BIN"; + case BROADCOM_SHADER_GEOMETRY_BIN: + return "MESA_SHADER_GEOMETRY_BIN"; + default: + return gl_shader_stage_name(broadcom_shader_stage_to_gl(stage)); + } +} + struct v3dv_pipeline_cache { struct vk_object_base base; @@ -337,6 +400,9 @@ struct v3dv_pipeline_cache { struct hash_table *cache; struct v3dv_pipeline_cache_stats stats; + + /* For VK_EXT_pipeline_creation_cache_control. */ + bool externally_synchronized; }; struct v3dv_device { @@ -441,34 +507,6 @@ struct v3dv_format { bool supports_filtering; }; -/** - * Tiling mode enum used for v3d_resource.c, which maps directly to the Memory - * Format field of render target and Z/Stencil config. - */ -enum v3d_tiling_mode { - /* Untiled resources. Not valid as texture inputs. */ - VC5_TILING_RASTER, - - /* Single line of u-tiles. */ - VC5_TILING_LINEARTILE, - - /* Departure from standard 4-UIF block column format. */ - VC5_TILING_UBLINEAR_1_COLUMN, - - /* Departure from standard 4-UIF block column format. */ - VC5_TILING_UBLINEAR_2_COLUMN, - - /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is - * split 2x2 into utiles. - */ - VC5_TILING_UIF_NO_XOR, - - /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is - * split 2x2 into utiles. - */ - VC5_TILING_UIF_XOR, -}; - struct v3d_resource_slice { uint32_t offset; uint32_t stride; @@ -484,56 +522,43 @@ struct v3d_resource_slice { }; struct v3dv_image { - struct vk_object_base base; - - VkImageType type; - VkImageAspectFlags aspects; - - VkExtent3D extent; - uint32_t levels; - uint32_t array_size; - uint32_t samples; - VkImageUsageFlags usage; - VkImageCreateFlags flags; - VkImageTiling tiling; + struct vk_image vk; - VkFormat vk_format; const struct v3dv_format *format; - uint32_t cpp; - - uint64_t drm_format_mod; bool tiled; struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS]; uint64_t size; /* Total size in bytes */ uint32_t cube_map_stride; - uint32_t alignment; struct v3dv_device_memory *mem; VkDeviceSize mem_offset; + uint32_t alignment; }; VkImageViewType v3dv_image_type_to_view_type(VkImageType type); -struct v3dv_image_view { - struct vk_object_base base; +/* Pre-generating packets needs to consider changes in packet sizes across hw + * versions. Keep things simple and allocate enough space for any supported + * version. We ensure the size is large enough through static asserts. + */ +#define V3DV_TEXTURE_SHADER_STATE_LENGTH 32 +#define V3DV_SAMPLER_STATE_LENGTH 24 +#define V3DV_BLEND_CFG_LENGTH 5 +#define V3DV_CFG_BITS_LENGTH 4 +#define V3DV_GL_SHADER_STATE_RECORD_LENGTH 36 +#define V3DV_VCM_CACHE_SIZE_LENGTH 2 +#define V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH 16 +#define V3DV_STENCIL_CFG_LENGTH 6 - const struct v3dv_image *image; - VkImageAspectFlags aspects; - VkExtent3D extent; - VkImageViewType type; +struct v3dv_image_view { + struct vk_image_view vk; - VkFormat vk_format; const struct v3dv_format *format; bool swap_rb; uint32_t internal_bpp; uint32_t internal_type; - - uint32_t base_level; - uint32_t max_level; - uint32_t first_layer; - uint32_t last_layer; uint32_t offset; /* Precomputed (composed from createinfo->components and formar swizzle) @@ -552,7 +577,7 @@ struct v3dv_image_view { * we generate two states and select the one to use based on the descriptor * type. */ - uint8_t texture_shader_state[2][cl_packet_length(TEXTURE_SHADER_STATE)]; + uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH]; }; uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer); @@ -571,7 +596,7 @@ struct v3dv_buffer { struct v3dv_buffer_view { struct vk_object_base base; - const struct v3dv_buffer *buffer; + struct v3dv_buffer *buffer; VkFormat vk_format; const struct v3dv_format *format; @@ -583,7 +608,7 @@ struct v3dv_buffer_view { uint32_t num_elements; /* Prepacked TEXTURE_SHADER_STATE. */ - uint8_t texture_shader_state[cl_packet_length(TEXTURE_SHADER_STATE)]; + uint8_t texture_shader_state[V3DV_TEXTURE_SHADER_STATE_LENGTH]; }; struct v3dv_subpass_attachment { @@ -601,20 +626,33 @@ struct v3dv_subpass { struct v3dv_subpass_attachment ds_attachment; - bool has_srgb_rt; - /* If we need to emit the clear of the depth/stencil attachment using a * a draw call instead of using the TLB (GFXH-1461). */ bool do_depth_clear_with_draw; bool do_stencil_clear_with_draw; + + /* Multiview */ + uint32_t view_mask; }; struct v3dv_render_pass_attachment { VkAttachmentDescription desc; + uint32_t first_subpass; uint32_t last_subpass; + /* When multiview is enabled, we no longer care about when a particular + * attachment is first or last used in a render pass, since not all views + * in the attachment will meet that criteria. Instead, we need to track + * each individual view (layer) in each attachment and emit our stores, + * loads and clears accordingly. + */ + struct { + uint32_t first_subpass; + uint32_t last_subpass; + } views[MAX_MULTIVIEW_VIEW_COUNT]; + /* If this is a multismapled attachment that is going to be resolved, * whether we can use the TLB resolve on store. */ @@ -624,6 +662,8 @@ struct v3dv_render_pass_attachment { struct v3dv_render_pass { struct vk_object_base base; + bool multiview_enabled; + uint32_t attachment_count; struct v3dv_render_pass_attachment *attachments; @@ -677,10 +717,12 @@ void v3dv_framebuffer_compute_internal_bpp_msaa(const struct v3dv_framebuffer *f const struct v3dv_subpass *subpass, uint8_t *max_bpp, bool *msaa); -bool v3dv_subpass_area_is_tile_aligned(const VkRect2D *area, +bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device, + const VkRect2D *area, struct v3dv_framebuffer *fb, struct v3dv_render_pass *pass, uint32_t subpass_idx); + struct v3dv_cmd_pool { struct vk_object_base base; @@ -711,11 +753,6 @@ struct v3dv_cmd_buffer_attachment_state { union v3dv_clear_value clear_value; }; -void v3dv_get_hw_clear_color(const VkClearColorValue *color, - uint32_t internal_type, - uint32_t internal_size, - uint32_t *hw_color); - struct v3dv_viewport_state { uint32_t count; VkViewport viewports[MAX_VIEWPORTS]; @@ -740,7 +777,8 @@ enum v3dv_dynamic_state_bits { V3DV_DYNAMIC_BLEND_CONSTANTS = 1 << 5, V3DV_DYNAMIC_DEPTH_BIAS = 1 << 6, V3DV_DYNAMIC_LINE_WIDTH = 1 << 7, - V3DV_DYNAMIC_ALL = (1 << 8) - 1, + V3DV_DYNAMIC_COLOR_WRITE_ENABLE = 1 << 8, + V3DV_DYNAMIC_ALL = (1 << 9) - 1, }; /* Flags for dirty pipeline state. @@ -762,6 +800,8 @@ enum v3dv_cmd_dirty_bits { V3DV_CMD_DIRTY_OCCLUSION_QUERY = 1 << 13, V3DV_CMD_DIRTY_DEPTH_BIAS = 1 << 14, V3DV_CMD_DIRTY_LINE_WIDTH = 1 << 15, + V3DV_CMD_DIRTY_VIEW_INDEX = 1 << 16, + V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE = 1 << 17, }; struct v3dv_dynamic_state { @@ -799,6 +839,8 @@ struct v3dv_dynamic_state { } depth_bias; float line_width; + + uint32_t color_write_enable; }; extern const struct v3dv_dynamic_state default_dynamic_state; @@ -808,10 +850,10 @@ void v3dv_viewport_compute_xform(const VkViewport *viewport, float translate[3]); enum v3dv_ez_state { - VC5_EZ_UNDECIDED = 0, - VC5_EZ_GT_GE, - VC5_EZ_LT_LE, - VC5_EZ_DISABLED, + V3D_EZ_UNDECIDED = 0, + V3D_EZ_GT_GE, + V3D_EZ_LT_LE, + V3D_EZ_DISABLED, }; enum v3dv_job_type { @@ -824,7 +866,6 @@ enum v3dv_job_type { V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS, V3DV_JOB_TYPE_CPU_SET_EVENT, V3DV_JOB_TYPE_CPU_WAIT_EVENTS, - V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS, V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE, V3DV_JOB_TYPE_CPU_CSD_INDIRECT, V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY, @@ -839,6 +880,9 @@ struct v3dv_reset_query_cpu_job_info { struct v3dv_end_query_cpu_job_info { struct v3dv_query_pool *pool; uint32_t query; + + /* This is one unless multiview is used */ + uint32_t count; }; struct v3dv_copy_query_results_cpu_job_info { @@ -865,13 +909,6 @@ struct v3dv_event_wait_cpu_job_info { bool sem_wait; }; -struct v3dv_clear_attachments_cpu_job_info { - uint32_t attachment_count; - VkClearAttachment attachments[V3D_MAX_DRAW_BUFFERS + 1]; /* 4 color + D/S */ - uint32_t rect_count; - VkClearRect *rects; -}; - struct v3dv_copy_buffer_to_image_cpu_job_info { struct v3dv_image *image; struct v3dv_buffer *buffer; @@ -897,6 +934,9 @@ struct v3dv_csd_indirect_cpu_job_info { struct v3dv_timestamp_query_cpu_job_info { struct v3dv_query_pool *pool; uint32_t query; + + /* This is one unless multiview is used */ + uint32_t count; }; struct v3dv_job { @@ -924,6 +964,7 @@ struct v3dv_job { */ struct set *bos; uint32_t bo_count; + uint64_t bo_handle_mask; struct v3dv_bo *tile_alloc; struct v3dv_bo *tile_state; @@ -975,7 +1016,6 @@ struct v3dv_job { struct v3dv_copy_query_results_cpu_job_info query_copy_results; struct v3dv_event_set_cpu_job_info event_set; struct v3dv_event_wait_cpu_job_info event_wait; - struct v3dv_clear_attachments_cpu_job_info clear_attachments; struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image; struct v3dv_csd_indirect_cpu_job_info csd_indirect; struct v3dv_timestamp_query_cpu_job_info query_timestamp; @@ -988,6 +1028,7 @@ struct v3dv_job { struct { struct v3dv_bo *shared_memory; uint32_t wg_count[3]; + uint32_t wg_base[3]; struct drm_v3d_submit_csd submit; } csd; }; @@ -998,20 +1039,47 @@ void v3dv_job_init(struct v3dv_job *job, struct v3dv_cmd_buffer *cmd_buffer, int32_t subpass_idx); void v3dv_job_destroy(struct v3dv_job *job); + void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo); -void v3dv_job_emit_binning_flush(struct v3dv_job *job); +void v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo); + void v3dv_job_start_frame(struct v3dv_job *job, uint32_t width, uint32_t height, uint32_t layers, + bool allocate_tile_state_for_all_layers, uint32_t render_target_count, uint8_t max_internal_bpp, bool msaa); + +struct v3dv_job * +v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job, + struct v3dv_cmd_buffer *cmd_buffer); + struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device, enum v3dv_job_type type, struct v3dv_cmd_buffer *cmd_buffer, uint32_t subpass_idx); +void +v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer, + uint32_t slot_size, + uint32_t used_count, + uint32_t *alloc_count, + void **ptr); + +void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer); + +/* FIXME: only used on v3dv_cmd_buffer and v3dvx_cmd_buffer, perhaps move to a + * cmd_buffer specific header? + */ +struct v3dv_draw_info { + uint32_t vertex_count; + uint32_t instance_count; + uint32_t first_vertex; + uint32_t first_instance; +}; + struct v3dv_vertex_binding { struct v3dv_buffer *buffer; VkDeviceSize offset; @@ -1043,7 +1111,10 @@ struct v3dv_cmd_buffer_state { struct v3dv_cmd_pipeline_state compute; struct v3dv_dynamic_state dynamic; + uint32_t dirty; + VkShaderStageFlagBits dirty_descriptor_stages; + VkShaderStageFlagBits dirty_push_constants_stages; /* Current clip window. We use this to check whether we have an active * scissor, since in that case we can't use TLB clears and need to fallback @@ -1075,9 +1146,14 @@ struct v3dv_cmd_buffer_state { struct { struct v3dv_cl_reloc vs_bin; struct v3dv_cl_reloc vs; + struct v3dv_cl_reloc gs_bin; + struct v3dv_cl_reloc gs; struct v3dv_cl_reloc fs; } uniforms; + /* Current view index for multiview rendering */ + uint32_t view_index; + /* Used to flag OOM conditions during command buffer recording */ bool oom; @@ -1126,10 +1202,13 @@ struct v3dv_cmd_buffer_state { struct v3dv_end_query_cpu_job_info *states; } end; - /* This is not NULL if we have an active query, that is, we have called - * vkCmdBeginQuery but not vkCmdEndQuery. + /* This BO is not NULL if we have an active query, that is, we have + * called vkCmdBeginQuery but not vkCmdEndQuery. */ - struct v3dv_bo *active_query; + struct { + struct v3dv_bo *bo; + uint32_t offset; + } active_query; } query; }; @@ -1160,44 +1239,24 @@ struct v3dv_descriptor { }; }; -/* The following v3dv_xxx_descriptor structs represent descriptor info that we - * upload to a bo, specifically a subregion of the descriptor pool bo. - * - * The general rule that we apply right now to decide which info goes to such - * bo is that we upload those that are referenced by an address when emitting - * a packet, so needed to be uploaded to an bo in any case. - * - * Note that these structs are mostly helpers that improve the semantics when - * doing all that, but we could do as other mesa vulkan drivers and just - * upload the info we know it is expected based on the context. - * - * Also note that the sizes are aligned, as there is an alignment requirement - * for addresses. - */ -struct v3dv_sampled_image_descriptor { - uint8_t texture_state[cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32)]; -}; - -struct v3dv_sampler_descriptor { - uint8_t sampler_state[cl_aligned_packet_length(SAMPLER_STATE, 32)]; -}; - -struct v3dv_combined_image_sampler_descriptor { - uint8_t texture_state[cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32)]; - uint8_t sampler_state[cl_aligned_packet_length(SAMPLER_STATE, 32)]; -}; - struct v3dv_query { bool maybe_available; union { - struct v3dv_bo *bo; /* Used by GPU queries (occlusion) */ - uint64_t value; /* Used by CPU queries (timestamp) */ + /* Used by GPU queries (occlusion) */ + struct { + struct v3dv_bo *bo; + uint32_t offset; + }; + /* Used by CPU queries (timestamp) */ + uint64_t value; }; }; struct v3dv_query_pool { struct vk_object_base base; + struct v3dv_bo *bo; /* Only used with GPU queries (occlusion) */ + VkQueryType query_type; uint32_t query_count; struct v3dv_query *queries; @@ -1221,7 +1280,7 @@ struct v3dv_cmd_buffer_private_obj { }; struct v3dv_cmd_buffer { - struct vk_object_base base; + struct vk_command_buffer vk; struct v3dv_device *device; @@ -1293,12 +1352,6 @@ void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer, uint32_t dirty_dynamic_state, bool needs_subpass_resume); -void v3dv_render_pass_setup_render_target(struct v3dv_cmd_buffer *cmd_buffer, - int rt, - uint32_t *rt_bpp, - uint32_t *rt_type, - uint32_t *rt_clamp); - void v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_query_pool *pool, uint32_t first, @@ -1338,8 +1391,8 @@ struct v3dv_semaphore { /* A syncobject handle associated with this semaphore */ uint32_t sync; - /* The file handle of a fence that we imported into our syncobject */ - int32_t fd; + /* A temporary syncobject handle produced from a vkImportSemaphoreFd. */ + uint32_t temp_sync; }; struct v3dv_fence { @@ -1348,8 +1401,8 @@ struct v3dv_fence { /* A syncobject handle associated with this fence */ uint32_t sync; - /* The file handle of a fence that we imported into our syncobject */ - int32_t fd; + /* A temporary syncobject handle produced from a vkImportFenceFd. */ + uint32_t temp_sync; }; struct v3dv_event { @@ -1358,11 +1411,12 @@ struct v3dv_event { }; struct v3dv_shader_variant { - broadcom_shader_stage stage; + enum broadcom_shader_stage stage; union { struct v3d_prog_data *base; struct v3d_vs_prog_data *vs; + struct v3d_gs_prog_data *gs; struct v3d_fs_prog_data *fs; struct v3d_compute_prog_data *cs; } prog_data; @@ -1397,7 +1451,7 @@ struct v3dv_shader_variant { struct v3dv_pipeline_stage { struct v3dv_pipeline *pipeline; - broadcom_shader_stage stage; + enum broadcom_shader_stage stage; const struct vk_shader_module *module; const char *entrypoint; @@ -1410,20 +1464,8 @@ struct v3dv_pipeline_stage { /** A name for this program, so you can track it in shader-db output. */ uint32_t program_id; -}; -/* FIXME: although the full vpm_config is not required at this point, as we - * don't plan to initially support GS, it is more readable and serves as a - * placeholder, to have the struct and fill it with default values. - */ -struct vpm_config { - uint32_t As; - uint32_t Vc; - uint32_t Gs; - uint32_t Gd; - uint32_t Gv; - uint32_t Ve; - uint32_t gs_width; + VkPipelineCreationFeedbackEXT feedback; }; /* We are using the descriptor pool entry for two things: @@ -1590,9 +1632,48 @@ struct v3dv_sampler { * configuration. If needed it will be copied to the descriptor info during * UpdateDescriptorSets */ - uint8_t sampler_state[cl_packet_length(SAMPLER_STATE)]; + uint8_t sampler_state[V3DV_SAMPLER_STATE_LENGTH]; +}; + +struct v3dv_descriptor_template_entry { + /* The type of descriptor in this entry */ + VkDescriptorType type; + + /* Binding in the descriptor set */ + uint32_t binding; + + /* Offset at which to write into the descriptor set binding */ + uint32_t array_element; + + /* Number of elements to write into the descriptor set binding */ + uint32_t array_count; + + /* Offset into the user provided data */ + size_t offset; + + /* Stride between elements into the user provided data */ + size_t stride; }; +struct v3dv_descriptor_update_template { + struct vk_object_base base; + + VkPipelineBindPoint bind_point; + + /* The descriptor set this template corresponds to. This value is only + * valid if the template was created with the templateType + * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET. + */ + uint8_t set; + + /* Number of entries in this template */ + uint32_t entry_count; + + /* Entries of the template */ + struct v3dv_descriptor_template_entry entries[0]; +}; + + /* We keep two special values for the sampler idx that represents exactly when a * sampler is not needed/provided. The main use is that even if we don't have * sampler, we still need to do the output unpacking (through @@ -1633,6 +1714,13 @@ v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key, *sampler_index = sampler; } +struct v3dv_descriptor_maps { + struct v3dv_descriptor_map ubo_map; + struct v3dv_descriptor_map ssbo_map; + struct v3dv_descriptor_map sampler_map; + struct v3dv_descriptor_map texture_map; +}; + /* The structure represents data shared between different objects, like the * pipeline and the pipeline cache, so we ref count it to know when it should * be freed. @@ -1642,11 +1730,7 @@ struct v3dv_pipeline_shared_data { unsigned char sha1_key[20]; - struct v3dv_descriptor_map ubo_map; - struct v3dv_descriptor_map ssbo_map; - struct v3dv_descriptor_map sampler_map; - struct v3dv_descriptor_map texture_map; - + struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES]; struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES]; struct v3dv_bo *assembly_bo; @@ -1662,14 +1746,20 @@ struct v3dv_pipeline { struct v3dv_render_pass *pass; struct v3dv_subpass *subpass; - /* Note: We can't use just a MESA_SHADER_STAGES array as we need to track - * too the coordinate shader + /* Note: We can't use just a MESA_SHADER_STAGES array because we also need + * to track binning shaders. Note these will be freed once the pipeline + * has been compiled. */ struct v3dv_pipeline_stage *vs; struct v3dv_pipeline_stage *vs_bin; + struct v3dv_pipeline_stage *gs; + struct v3dv_pipeline_stage *gs_bin; struct v3dv_pipeline_stage *fs; struct v3dv_pipeline_stage *cs; + /* Flags for whether optional pipeline stages are present, for convenience */ + bool has_gs; + /* Spilling memory requirements */ struct { struct v3dv_bo *bo; @@ -1736,7 +1826,7 @@ struct v3dv_pipeline { /* Per-RT bit mask with blend enables */ uint8_t enables; /* Per-RT prepacked blend config packets */ - uint8_t cfg[V3D_MAX_DRAW_BUFFERS][cl_packet_length(BLEND_CFG)]; + uint8_t cfg[V3D_MAX_DRAW_BUFFERS][V3DV_BLEND_CFG_LENGTH]; /* Flag indicating whether the blend factors in use require * color constants. */ @@ -1753,12 +1843,12 @@ struct v3dv_pipeline { /* Packets prepacked during pipeline creation */ - uint8_t cfg_bits[cl_packet_length(CFG_BITS)]; - uint8_t shader_state_record[cl_packet_length(GL_SHADER_STATE_RECORD)]; - uint8_t vcm_cache_size[cl_packet_length(VCM_CACHE_SIZE)]; - uint8_t vertex_attrs[cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD) * + uint8_t cfg_bits[V3DV_CFG_BITS_LENGTH]; + uint8_t shader_state_record[V3DV_GL_SHADER_STATE_RECORD_LENGTH]; + uint8_t vcm_cache_size[V3DV_VCM_CACHE_SIZE_LENGTH]; + uint8_t vertex_attrs[V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH * MAX_VERTEX_ATTRIBS]; - uint8_t stencil_cfg[2][cl_packet_length(STENCIL_CFG)]; + uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH]; }; static inline VkPipelineBindPoint @@ -1782,82 +1872,9 @@ v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer *cmd_buffer, const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(void); -static inline uint32_t -v3dv_zs_buffer_from_aspect_bits(VkImageAspectFlags aspects) -{ - const VkImageAspectFlags zs_aspects = - VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - const VkImageAspectFlags filtered_aspects = aspects & zs_aspects; - - if (filtered_aspects == zs_aspects) - return ZSTENCIL; - else if (filtered_aspects == VK_IMAGE_ASPECT_DEPTH_BIT) - return Z; - else if (filtered_aspects == VK_IMAGE_ASPECT_STENCIL_BIT) - return STENCIL; - else - return NONE; -} - -static inline uint32_t -v3dv_zs_buffer_from_vk_format(VkFormat format) -{ - switch (format) { - case VK_FORMAT_D16_UNORM_S8_UINT: - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - return ZSTENCIL; - case VK_FORMAT_D16_UNORM: - case VK_FORMAT_D32_SFLOAT: - case VK_FORMAT_X8_D24_UNORM_PACK32: - return Z; - case VK_FORMAT_S8_UINT: - return STENCIL; - default: - return NONE; - } -} - -static inline uint32_t -v3dv_zs_buffer(bool depth, bool stencil) -{ - if (depth && stencil) - return ZSTENCIL; - else if (depth) - return Z; - else if (stencil) - return STENCIL; - return NONE; -} - -static inline uint8_t -v3dv_get_internal_depth_type(VkFormat format) -{ - switch (format) { - case VK_FORMAT_D16_UNORM: - return V3D_INTERNAL_TYPE_DEPTH_16; - case VK_FORMAT_D32_SFLOAT: - return V3D_INTERNAL_TYPE_DEPTH_32F; - case VK_FORMAT_X8_D24_UNORM_PACK32: - case VK_FORMAT_D24_UNORM_S8_UINT: - return V3D_INTERNAL_TYPE_DEPTH_24; - default: - unreachable("Invalid depth format"); - break; - } -} - -uint32_t v3dv_physical_device_api_version(struct v3dv_physical_device *dev); uint32_t v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev); uint32_t v3dv_physical_device_device_id(struct v3dv_physical_device *dev); -VkResult __vk_errorf(struct v3dv_instance *instance, VkResult error, - const char *file, int line, - const char *format, ...); - -#define vk_error(instance, error) __vk_errorf(instance, error, __FILE__, __LINE__, NULL); -#define vk_errorf(instance, error, format, ...) __vk_errorf(instance, error, __FILE__, __LINE__, format, ## __VA_ARGS__); - #ifdef DEBUG #define v3dv_debug_ignored_stype(sType) \ fprintf(stderr, "%s: ignored VkStructureType %u:%s\n\n", __func__, (sType), vk_StructureType_to_str(sType)) @@ -1865,33 +1882,14 @@ VkResult __vk_errorf(struct v3dv_instance *instance, VkResult error, #define v3dv_debug_ignored_stype(sType) #endif -const struct v3dv_format *v3dv_get_format(VkFormat); -const uint8_t *v3dv_get_format_swizzle(VkFormat f); -void v3dv_get_internal_type_bpp_for_output_format(uint32_t format, uint32_t *type, uint32_t *bpp); +const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f); uint8_t v3dv_get_tex_return_size(const struct v3dv_format *vf, bool compare_enable); -bool v3dv_tfu_supports_tex_format(const struct v3d_device_info *devinfo, - uint32_t tex_format); const struct v3dv_format * -v3dv_get_compatible_tfu_format(const struct v3d_device_info *devinfo, +v3dv_get_compatible_tfu_format(struct v3dv_device *device, uint32_t bpp, VkFormat *out_vk_format); -bool v3dv_buffer_format_supports_features(VkFormat vk_format, +bool v3dv_buffer_format_supports_features(struct v3dv_device *device, + VkFormat vk_format, VkFormatFeatureFlags features); -bool v3dv_format_supports_tlb_resolve(const struct v3dv_format *format); - -uint32_t v3d_utile_width(int cpp); -uint32_t v3d_utile_height(int cpp); - -void v3d_load_tiled_image(void *dst, uint32_t dst_stride, - void *src, uint32_t src_stride, - enum v3d_tiling_mode tiling_format, - int cpp, uint32_t image_h, - const struct pipe_box *box); - -void v3d_store_tiled_image(void *dst, uint32_t dst_stride, - void *src, uint32_t src_stride, - enum v3d_tiling_mode tiling_format, - int cpp, uint32_t image_h, - const struct pipe_box *box); struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_pipeline *pipeline, @@ -1912,7 +1910,7 @@ v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage, struct v3dv_shader_variant * v3dv_shader_variant_create(struct v3dv_device *device, - broadcom_shader_stage stage, + enum broadcom_shader_stage stage, struct v3d_prog_data *prog_data, uint32_t prog_data_size, uint32_t assembly_offset, @@ -1958,13 +1956,15 @@ v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state, uint32_t index); struct v3dv_cl_reloc -v3dv_descriptor_map_get_sampler_state(struct v3dv_descriptor_state *descriptor_state, +v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device, + struct v3dv_descriptor_state *descriptor_state, struct v3dv_descriptor_map *map, struct v3dv_pipeline_layout *pipeline_layout, uint32_t index); struct v3dv_cl_reloc -v3dv_descriptor_map_get_texture_shader_state(struct v3dv_descriptor_state *descriptor_state, +v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device, + struct v3dv_descriptor_state *descriptor_state, struct v3dv_descriptor_map *map, struct v3dv_pipeline_layout *pipeline_layout, uint32_t index); @@ -1992,6 +1992,7 @@ v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set, void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache, struct v3dv_device *device, + VkPipelineCacheCreateFlags, bool cache_enabled); void v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache); @@ -2008,7 +2009,8 @@ nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline, struct v3dv_pipeline_shared_data * v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache, - unsigned char sha1_key[20]); + unsigned char sha1_key[20], + bool *cache_hit); void v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline, @@ -2022,73 +2024,58 @@ void v3dv_shader_module_internal_init(struct v3dv_device *device, struct vk_shader_module *module, nir_shader *nir); -#define V3DV_DEFINE_HANDLE_CASTS(__v3dv_type, __VkType) \ - \ - static inline struct __v3dv_type * \ - __v3dv_type ## _from_handle(__VkType _handle) \ - { \ - return (struct __v3dv_type *) _handle; \ - } \ - \ - static inline __VkType \ - __v3dv_type ## _to_handle(struct __v3dv_type *_obj) \ - { \ - return (__VkType) _obj; \ - } - -#define V3DV_DEFINE_NONDISP_HANDLE_CASTS(__v3dv_type, __VkType) \ - \ - static inline struct __v3dv_type * \ - __v3dv_type ## _from_handle(__VkType _handle) \ - { \ - return (struct __v3dv_type *)(uintptr_t) _handle; \ - } \ - \ - static inline __VkType \ - __v3dv_type ## _to_handle(struct __v3dv_type *_obj) \ - { \ - return (__VkType)(uintptr_t) _obj; \ - } - #define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle) \ - struct __v3dv_type *__name = __v3dv_type ## _from_handle(__handle) - -V3DV_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, VkCommandBuffer) -V3DV_DEFINE_HANDLE_CASTS(v3dv_device, VkDevice) -V3DV_DEFINE_HANDLE_CASTS(v3dv_instance, VkInstance) -V3DV_DEFINE_HANDLE_CASTS(v3dv_physical_device, VkPhysicalDevice) -V3DV_DEFINE_HANDLE_CASTS(v3dv_queue, VkQueue) - -V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_cmd_pool, VkCommandPool) -V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, VkBuffer) -V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, VkBufferView) -V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, VkDeviceMemory) -V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, VkDescriptorPool) -V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, VkDescriptorSet) -V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, VkDescriptorSetLayout) -V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, VkEvent) -V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_fence, VkFence) -V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, VkFramebuffer) -V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, VkImage) -V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, VkImageView) -V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, VkPipeline) -V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, VkPipelineCache) -V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, VkPipelineLayout) -V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, VkQueryPool) -V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, VkRenderPass) -V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, VkSampler) -V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_semaphore, VkSemaphore) - -/* This is defined as a macro so that it works for both - * VkImageSubresourceRange and VkImageSubresourceLayers - */ -#define v3dv_layer_count(_image, _range) \ - ((_range)->layerCount == VK_REMAINING_ARRAY_LAYERS ? \ - (_image)->array_size - (_range)->baseArrayLayer : (_range)->layerCount) - -#define v3dv_level_count(_image, _range) \ - ((_range)->levelCount == VK_REMAINING_MIP_LEVELS ? \ - (_image)->levels - (_range)->baseMipLevel : (_range)->levelCount) + VK_FROM_HANDLE(__v3dv_type, __name, __handle) + +VK_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, vk.base, VkCommandBuffer, + VK_OBJECT_TYPE_COMMAND_BUFFER) +VK_DEFINE_HANDLE_CASTS(v3dv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE) +VK_DEFINE_HANDLE_CASTS(v3dv_instance, vk.base, VkInstance, + VK_OBJECT_TYPE_INSTANCE) +VK_DEFINE_HANDLE_CASTS(v3dv_physical_device, vk.base, VkPhysicalDevice, + VK_OBJECT_TYPE_PHYSICAL_DEVICE) +VK_DEFINE_HANDLE_CASTS(v3dv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE) + +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_cmd_pool, base, VkCommandPool, + VK_OBJECT_TYPE_COMMAND_POOL) +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, base, VkBuffer, + VK_OBJECT_TYPE_BUFFER) +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, base, VkBufferView, + VK_OBJECT_TYPE_BUFFER_VIEW) +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, base, VkDeviceMemory, + VK_OBJECT_TYPE_DEVICE_MEMORY) +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, base, VkDescriptorPool, + VK_OBJECT_TYPE_DESCRIPTOR_POOL) +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, base, VkDescriptorSet, + VK_OBJECT_TYPE_DESCRIPTOR_SET) +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, base, + VkDescriptorSetLayout, + VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT) +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_update_template, base, + VkDescriptorUpdateTemplate, + VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE) +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT) +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_fence, base, VkFence, VK_OBJECT_TYPE_FENCE) +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, base, VkFramebuffer, + VK_OBJECT_TYPE_FRAMEBUFFER) +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, vk.base, VkImage, + VK_OBJECT_TYPE_IMAGE) +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, vk.base, VkImageView, + VK_OBJECT_TYPE_IMAGE_VIEW) +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, base, VkPipeline, + VK_OBJECT_TYPE_PIPELINE) +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, base, VkPipelineCache, + VK_OBJECT_TYPE_PIPELINE_CACHE) +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, base, VkPipelineLayout, + VK_OBJECT_TYPE_PIPELINE_LAYOUT) +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, base, VkQueryPool, + VK_OBJECT_TYPE_QUERY_POOL) +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, base, VkRenderPass, + VK_OBJECT_TYPE_RENDER_PASS) +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, base, VkSampler, + VK_OBJECT_TYPE_SAMPLER) +VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_semaphore, base, VkSemaphore, + VK_OBJECT_TYPE_SEMAPHORE) static inline int v3dv_ioctl(int fd, unsigned long request, void *arg) @@ -2136,4 +2123,31 @@ u64_compare(const void *key1, const void *key2) return memcmp(key1, key2, sizeof(uint64_t)) == 0; } +/* Helper to call hw ver speficic functions */ +#define v3dv_X(device, thing) ({ \ + __typeof(&v3d42_##thing) v3d_X_thing; \ + switch (device->devinfo.ver) { \ + case 42: \ + v3d_X_thing = &v3d42_##thing; \ + break; \ + default: \ + unreachable("Unsupported hardware generation"); \ + } \ + v3d_X_thing; \ +}) + + +/* v3d_macros from common requires v3dX and V3DX definitions. Below we need to + * define v3dX for each version supported, because when we compile code that + * is not version-specific, all version-specific macros need to be already + * defined. + */ +#ifdef v3dX +# include "v3dvx_private.h" +#else +# define v3dX(x) v3d42_##x +# include "v3dvx_private.h" +# undef v3dX +#endif + #endif /* V3DV_PRIVATE_H */ diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_query.c b/lib/mesa/src/broadcom/vulkan/v3dv_query.c index d3100498c..5e4b92fb1 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_query.c +++ b/lib/mesa/src/broadcom/vulkan/v3dv_query.c @@ -23,7 +23,7 @@ #include "v3dv_private.h" -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateQueryPool(VkDevice _device, const VkQueryPoolCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, @@ -35,14 +35,11 @@ v3dv_CreateQueryPool(VkDevice _device, pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP); assert(pCreateInfo->queryCount > 0); - /* FIXME: the hw allows us to allocate up to 16 queries in a single block - * for occlussion queries so we should try to use that. - */ struct v3dv_query_pool *pool = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pool), VK_OBJECT_TYPE_QUERY_POOL); if (pool == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); pool->query_type = pCreateInfo->queryType; pool->query_count = pCreateInfo->queryCount; @@ -53,26 +50,39 @@ v3dv_CreateQueryPool(VkDevice _device, pool->queries = vk_alloc2(&device->vk.alloc, pAllocator, pool_bytes, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pool->queries == NULL) { - result = vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_alloc_bo_list; + result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail; + } + + if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) { + /* The hardware allows us to setup groups of 16 queries in consecutive + * 4-byte addresses, requiring only that each group of 16 queries is + * aligned to a 1024 byte boundary. + */ + const uint32_t query_groups = DIV_ROUND_UP(pool->query_count, 16); + const uint32_t bo_size = query_groups * 1024; + pool->bo = v3dv_bo_alloc(device, bo_size, "query", true); + if (!pool->bo) { + result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail; + } + if (!v3dv_bo_map(device, pool->bo, bo_size)) { + result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); + goto fail; + } } uint32_t i; for (i = 0; i < pool->query_count; i++) { pool->queries[i].maybe_available = false; switch (pool->query_type) { - case VK_QUERY_TYPE_OCCLUSION: - pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query", true); - if (!pool->queries[i].bo) { - result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); - goto fail_alloc_bo; - } - /* For occlusion queries we only need a 4-byte counter */ - if (!v3dv_bo_map(device, pool->queries[i].bo, 4)) { - result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); - goto fail_alloc_bo; - } + case VK_QUERY_TYPE_OCCLUSION: { + const uint32_t query_group = i / 16; + const uint32_t query_offset = query_group * 1024 + (i % 16) * 4; + pool->queries[i].bo = pool->bo; + pool->queries[i].offset = query_offset; break; + } case VK_QUERY_TYPE_TIMESTAMP: pool->queries[i].value = 0; break; @@ -85,18 +95,17 @@ v3dv_CreateQueryPool(VkDevice _device, return VK_SUCCESS; -fail_alloc_bo: - for (uint32_t j = 0; j < i; j++) - v3dv_bo_free(device, pool->queries[j].bo); - vk_free2(&device->vk.alloc, pAllocator, pool->queries); - -fail_alloc_bo_list: +fail: + if (pool->bo) + v3dv_bo_free(device, pool->bo); + if (pool->queries) + vk_free2(&device->vk.alloc, pAllocator, pool->queries); vk_object_free(&device->vk, pAllocator, pool); return result; } -void +VKAPI_ATTR void VKAPI_CALL v3dv_DestroyQueryPool(VkDevice _device, VkQueryPool queryPool, const VkAllocationCallbacks *pAllocator) @@ -107,12 +116,12 @@ v3dv_DestroyQueryPool(VkDevice _device, if (!pool) return; - if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) { - for (uint32_t i = 0; i < pool->query_count; i++) - v3dv_bo_free(device, pool->queries[i].bo); - } + if (pool->bo) + v3dv_bo_free(device, pool->bo); + + if (pool->queries) + vk_free2(&device->vk.alloc, pAllocator, pool->queries); - vk_free2(&device->vk.alloc, pAllocator, pool->queries); vk_object_free(&device->vk, pAllocator, pool); } @@ -128,12 +137,13 @@ write_query_result(void *dst, uint32_t idx, bool do_64bit, uint64_t value) } } -static uint64_t +static VkResult get_occlusion_query_result(struct v3dv_device *device, struct v3dv_query_pool *pool, uint32_t query, bool do_wait, - bool *available) + bool *available, + uint64_t *value) { assert(pool && pool->query_type == VK_QUERY_TYPE_OCCLUSION); @@ -149,25 +159,28 @@ get_occlusion_query_result(struct v3dv_device *device, * error may occur." */ if (!q->maybe_available) - return vk_error(device->instance, VK_ERROR_DEVICE_LOST); + return vk_error(device, VK_ERROR_DEVICE_LOST); if (!v3dv_bo_wait(device, q->bo, 0xffffffffffffffffull)) - return vk_error(device->instance, VK_ERROR_DEVICE_LOST); + return vk_error(device, VK_ERROR_DEVICE_LOST); *available = true; } else { *available = q->maybe_available && v3dv_bo_wait(device, q->bo, 0); } - return (uint64_t) *((uint32_t *) q->bo->map); + const uint8_t *query_addr = ((uint8_t *) q->bo->map) + q->offset; + *value = (uint64_t) *((uint32_t *)query_addr); + return VK_SUCCESS; } -static uint64_t +static VkResult get_timestamp_query_result(struct v3dv_device *device, struct v3dv_query_pool *pool, uint32_t query, bool do_wait, - bool *available) + bool *available, + uint64_t *value) { assert(pool && pool->query_type == VK_QUERY_TYPE_TIMESTAMP); @@ -182,28 +195,32 @@ get_timestamp_query_result(struct v3dv_device *device, * error may occur." */ if (!q->maybe_available) - return vk_error(device->instance, VK_ERROR_DEVICE_LOST); + return vk_error(device, VK_ERROR_DEVICE_LOST); *available = true; } else { *available = q->maybe_available; } - return q->value; + *value = q->value; + return VK_SUCCESS; } -static uint64_t +static VkResult get_query_result(struct v3dv_device *device, struct v3dv_query_pool *pool, uint32_t query, bool do_wait, - bool *available) + bool *available, + uint64_t *value) { switch (pool->query_type) { case VK_QUERY_TYPE_OCCLUSION: - return get_occlusion_query_result(device, pool, query, do_wait, available); + return get_occlusion_query_result(device, pool, query, do_wait, + available, value); case VK_QUERY_TYPE_TIMESTAMP: - return get_timestamp_query_result(device, pool, query, do_wait, available); + return get_timestamp_query_result(device, pool, query, do_wait, + available, value); default: unreachable("Unsupported query type"); } @@ -229,7 +246,11 @@ v3dv_get_query_pool_results_cpu(struct v3dv_device *device, VkResult result = VK_SUCCESS; for (uint32_t i = first; i < first + count; i++) { bool available = false; - uint64_t value = get_query_result(device, pool, i, do_wait, &available); + uint64_t value = 0; + VkResult query_result = + get_query_result(device, pool, i, do_wait, &available, &value); + if (query_result == VK_ERROR_DEVICE_LOST) + result = VK_ERROR_DEVICE_LOST; /** * From the Vulkan 1.0 spec: @@ -251,7 +272,7 @@ v3dv_get_query_pool_results_cpu(struct v3dv_device *device, if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) write_query_result(data, slot++, do_64bit, available ? 1u : 0u); - if (!write_result) + if (!write_result && result != VK_ERROR_DEVICE_LOST) result = VK_NOT_READY; data += stride; @@ -260,7 +281,7 @@ v3dv_get_query_pool_results_cpu(struct v3dv_device *device, return result; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery, @@ -277,7 +298,7 @@ v3dv_GetQueryPoolResults(VkDevice _device, pData, stride, flags); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, @@ -289,7 +310,7 @@ v3dv_CmdResetQueryPool(VkCommandBuffer commandBuffer, v3dv_cmd_buffer_reset_queries(cmd_buffer, pool, firstQuery, queryCount); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, @@ -308,7 +329,7 @@ v3dv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, dst, dstOffset, stride, flags); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, @@ -320,7 +341,7 @@ v3dv_CmdBeginQuery(VkCommandBuffer commandBuffer, v3dv_cmd_buffer_begin_query(cmd_buffer, pool, query, flags); } -void +VKAPI_ATTR void VKAPI_CALL v3dv_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query) diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_queue.c b/lib/mesa/src/broadcom/vulkan/v3dv_queue.c index 6ea6d1acf..1209031d5 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_queue.c +++ b/lib/mesa/src/broadcom/vulkan/v3dv_queue.c @@ -34,19 +34,28 @@ v3dv_clif_dump(struct v3dv_device *device, struct v3dv_job *job, struct drm_v3d_submit_cl *submit) { - if (!(V3D_DEBUG & (V3D_DEBUG_CL | V3D_DEBUG_CLIF))) + if (!(unlikely(V3D_DEBUG & (V3D_DEBUG_CL | + V3D_DEBUG_CL_NO_BIN | + V3D_DEBUG_CLIF)))) return; struct clif_dump *clif = clif_dump_init(&device->devinfo, stderr, - V3D_DEBUG & V3D_DEBUG_CL); + V3D_DEBUG & (V3D_DEBUG_CL | + V3D_DEBUG_CL_NO_BIN), + V3D_DEBUG & V3D_DEBUG_CL_NO_BIN); set_foreach(job->bos, entry) { struct v3dv_bo *bo = (void *)entry->key; char *name = ralloc_asprintf(NULL, "%s_0x%x", bo->name, bo->offset); - v3dv_bo_map(device, bo, bo->size); + bool ok = v3dv_bo_map(device, bo, bo->size); + if (!ok) { + fprintf(stderr, "failed to map BO for clif_dump.\n"); + ralloc_free(name); + goto free_clif; + } clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map); ralloc_free(name); @@ -54,6 +63,7 @@ v3dv_clif_dump(struct v3dv_device *device, clif_dump(clif, submit); + free_clif: clif_dump_destroy(clif); } @@ -136,7 +146,7 @@ gpu_queue_wait_idle(struct v3dv_queue *queue) return VK_SUCCESS; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_QueueWaitIdle(VkQueue _queue) { V3DV_FROM_HANDLE(v3dv_queue, queue, _queue); @@ -163,19 +173,22 @@ handle_reset_query_cpu_job(struct v3dv_job *job) * FIXME: we could avoid blocking the main thread for this if we use * submission thread. */ + if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION) + v3dv_bo_wait(job->device, info->pool->bo, PIPE_TIMEOUT_INFINITE); + for (uint32_t i = info->first; i < info->first + info->count; i++) { assert(i < info->pool->query_count); - struct v3dv_query *query = &info->pool->queries[i]; - query->maybe_available = false; + struct v3dv_query *q = &info->pool->queries[i]; + q->maybe_available = false; switch (info->pool->query_type) { case VK_QUERY_TYPE_OCCLUSION: { - v3dv_bo_wait(job->device, query->bo, PIPE_TIMEOUT_INFINITE); - uint32_t *counter = (uint32_t *) query->bo->map; + const uint8_t *q_addr = ((uint8_t *) q->bo->map) + q->offset; + uint32_t *counter = (uint32_t *) q_addr; *counter = 0; break; } case VK_QUERY_TYPE_TIMESTAMP: - query->value = 0; + q->value = 0; break; default: unreachable("Unsupported query type"); @@ -189,9 +202,11 @@ static VkResult handle_end_query_cpu_job(struct v3dv_job *job) { struct v3dv_end_query_cpu_job_info *info = &job->cpu.query_end; - assert(info->query < info->pool->query_count); - struct v3dv_query *query = &info->pool->queries[info->query]; - query->maybe_available = true; + for (uint32_t i = 0; i < info->count; i++) { + assert(info->query + i < info->pool->query_count); + struct v3dv_query *query = &info->pool->queries[info->query + i]; + query->maybe_available = true; + } return VK_SUCCESS; } @@ -208,17 +223,19 @@ handle_copy_query_results_cpu_job(struct v3dv_job *job) /* Map the entire dst buffer for the CPU copy if needed */ assert(!bo->map || bo->map_size == bo->size); if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size)) - return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY); /* FIXME: if flags includes VK_QUERY_RESULT_WAIT_BIT this could trigger a * sync wait on the CPU for the corresponding GPU jobs to finish. We might * want to use a submission thread to avoid blocking on the main thread. */ + uint8_t *offset = ((uint8_t *) bo->map) + + info->offset + info->dst->mem_offset; v3dv_get_query_pool_results_cpu(job->device, info->pool, info->first, info->count, - bo->map + info->dst->mem_offset, + offset, info->stride, info->flags); @@ -343,7 +360,7 @@ spawn_event_wait_thread(struct v3dv_job *job, pthread_t *wait_thread) assert(wait_thread != NULL); if (pthread_create(wait_thread, NULL, event_wait_thread_func, job)) - return vk_error(job->device->instance, VK_ERROR_DEVICE_LOST); + return vk_error(job->device, VK_ERROR_DEVICE_LOST); return VK_NOT_READY; } @@ -396,13 +413,13 @@ handle_copy_buffer_to_image_cpu_job(struct v3dv_job *job) struct v3dv_bo *dst_bo = info->image->mem->bo; assert(!dst_bo->map || dst_bo->map_size == dst_bo->size); if (!dst_bo->map && !v3dv_bo_map(job->device, dst_bo, dst_bo->size)) - return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY); void *dst_ptr = dst_bo->map; struct v3dv_bo *src_bo = info->buffer->mem->bo; assert(!src_bo->map || src_bo->map_size == src_bo->size); if (!src_bo->map && !v3dv_bo_map(job->device, src_bo, src_bo->size)) - return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY); void *src_ptr = src_bo->map; const struct v3d_resource_slice *slice = @@ -441,10 +458,14 @@ handle_timestamp_query_cpu_job(struct v3dv_job *job) /* Compute timestamp */ struct timespec t; clock_gettime(CLOCK_MONOTONIC, &t); - assert(info->query < info->pool->query_count); - struct v3dv_query *query = &info->pool->queries[info->query]; - query->maybe_available = true; - query->value = t.tv_sec * 1000000000ull + t.tv_nsec; + + for (uint32_t i = 0; i < info->count; i++) { + assert(info->query + i < info->pool->query_count); + struct v3dv_query *query = &info->pool->queries[info->query + i]; + query->maybe_available = true; + if (i == 0) + query->value = t.tv_sec * 1000000000ull + t.tv_nsec; + } return VK_SUCCESS; } @@ -471,7 +492,7 @@ handle_csd_indirect_cpu_job(struct v3dv_queue *queue, assert(info->buffer && info->buffer->mem && info->buffer->mem->bo); struct v3dv_bo *bo = info->buffer->mem->bo; if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size)) - return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY); assert(bo->map); const uint32_t offset = info->buffer->mem_offset + info->offset; @@ -503,23 +524,28 @@ process_semaphores_to_signal(struct v3dv_device *device, drmSyncobjExportSyncFile(render_fd, device->last_job_sync, &fd); mtx_unlock(&device->mutex); if (fd == -1) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + VkResult result = VK_SUCCESS; for (uint32_t i = 0; i < count; i++) { struct v3dv_semaphore *sem = v3dv_semaphore_from_handle(sems[i]); - if (sem->fd >= 0) - close(sem->fd); - sem->fd = -1; - - int ret = drmSyncobjImportSyncFile(render_fd, sem->sync, fd); - if (ret) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + int ret; + if (!sem->temp_sync) + ret = drmSyncobjImportSyncFile(render_fd, sem->sync, fd); + else + ret = drmSyncobjImportSyncFile(render_fd, sem->temp_sync, fd); - sem->fd = fd; + if (ret) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + break; + } } - return VK_SUCCESS; + assert(fd >= 0); + close(fd); + + return result; } static VkResult @@ -530,10 +556,6 @@ process_fence_to_signal(struct v3dv_device *device, VkFence _fence) struct v3dv_fence *fence = v3dv_fence_from_handle(_fence); - if (fence->fd >= 0) - close(fence->fd); - fence->fd = -1; - int render_fd = device->pdevice->render_fd; int fd; @@ -541,15 +563,18 @@ process_fence_to_signal(struct v3dv_device *device, VkFence _fence) drmSyncobjExportSyncFile(render_fd, device->last_job_sync, &fd); mtx_unlock(&device->mutex); if (fd == -1) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - int ret = drmSyncobjImportSyncFile(render_fd, fence->sync, fd); - if (ret) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + int ret; + if (!fence->temp_sync) + ret = drmSyncobjImportSyncFile(render_fd, fence->sync, fd); + else + ret = drmSyncobjImportSyncFile(render_fd, fence->temp_sync, fd); - fence->fd = fd; + assert(fd >= 0); + close(fd); - return VK_SUCCESS; + return ret ? VK_ERROR_OUT_OF_HOST_MEMORY : VK_SUCCESS; } static VkResult @@ -559,7 +584,7 @@ handle_cl_job(struct v3dv_queue *queue, { struct v3dv_device *device = queue->device; - struct drm_v3d_submit_cl submit; + struct drm_v3d_submit_cl submit = { 0 }; /* Sanity check: we should only flag a bcl sync on a job that needs to be * serialized. @@ -636,7 +661,7 @@ handle_cl_job(struct v3dv_queue *queue, free(bo_handles); if (ret) - return vk_error(device->instance, VK_ERROR_DEVICE_LOST); + return vk_error(device, VK_ERROR_DEVICE_LOST); return VK_SUCCESS; } @@ -659,7 +684,7 @@ handle_tfu_job(struct v3dv_queue *queue, if (ret != 0) { fprintf(stderr, "Failed to submit TFU job: %d\n", ret); - return vk_error(device->instance, VK_ERROR_DEVICE_LOST); + return vk_error(device, VK_ERROR_DEVICE_LOST); } return VK_SUCCESS; @@ -704,7 +729,7 @@ handle_csd_job(struct v3dv_queue *queue, free(bo_handles); if (ret) - return vk_error(device->instance, VK_ERROR_DEVICE_LOST); + return vk_error(device, VK_ERROR_DEVICE_LOST); return VK_SUCCESS; } @@ -745,91 +770,6 @@ queue_submit_job(struct v3dv_queue *queue, } } -static void -emit_noop_bin(struct v3dv_job *job) -{ - v3dv_job_start_frame(job, 1, 1, 1, 1, V3D_INTERNAL_BPP_32, false); - v3dv_job_emit_binning_flush(job); -} - -static void -emit_noop_render(struct v3dv_job *job) -{ - struct v3dv_cl *rcl = &job->rcl; - v3dv_cl_ensure_space_with_branch(rcl, 200 + 1 * 256 * - cl_packet_length(SUPERTILE_COORDINATES)); - - cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) { - config.early_z_disable = true; - config.image_width_pixels = 1; - config.image_height_pixels = 1; - config.number_of_render_targets = 1; - config.multisample_mode_4x = false; - config.maximum_bpp_of_all_render_targets = V3D_INTERNAL_BPP_32; - } - - cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { - rt.render_target_0_internal_bpp = V3D_INTERNAL_BPP_32; - rt.render_target_0_internal_type = V3D_INTERNAL_TYPE_8; - rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE; - } - - cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { - clear.z_clear_value = 1.0f; - clear.stencil_clear_value = 0; - }; - - cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { - init.use_auto_chained_tile_lists = true; - init.size_of_first_block_in_chained_tile_lists = - TILE_ALLOCATION_BLOCK_SIZE_64B; - } - - cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) { - list.address = v3dv_cl_address(job->tile_alloc, 0); - } - - cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) { - config.number_of_bin_tile_lists = 1; - config.total_frame_width_in_tiles = 1; - config.total_frame_height_in_tiles = 1; - config.supertile_width_in_tiles = 1; - config.supertile_height_in_tiles = 1; - config.total_frame_width_in_supertiles = 1; - config.total_frame_height_in_supertiles = 1; - } - - struct v3dv_cl *icl = &job->indirect; - v3dv_cl_ensure_space(icl, 200, 1); - struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(icl); - - cl_emit(icl, TILE_COORDINATES_IMPLICIT, coords); - - cl_emit(icl, END_OF_LOADS, end); - - cl_emit(icl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); - - cl_emit(icl, STORE_TILE_BUFFER_GENERAL, store) { - store.buffer_to_store = NONE; - } - - cl_emit(icl, END_OF_TILE_MARKER, end); - - cl_emit(icl, RETURN_FROM_SUB_LIST, ret); - - cl_emit(rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { - branch.start = tile_list_start; - branch.end = v3dv_cl_get_address(icl); - } - - cl_emit(rcl, SUPERTILE_COORDINATES, coords) { - coords.column_number_in_supertiles = 0; - coords.row_number_in_supertiles = 0; - } - - cl_emit(rcl, END_OF_RENDERING, end); -} - static VkResult queue_create_noop_job(struct v3dv_queue *queue) { @@ -837,11 +777,10 @@ queue_create_noop_job(struct v3dv_queue *queue) queue->noop_job = vk_zalloc(&device->vk.alloc, sizeof(struct v3dv_job), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!queue->noop_job) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); v3dv_job_init(queue->noop_job, V3DV_JOB_TYPE_GPU_CL, device, NULL, -1); - emit_noop_bin(queue->noop_job); - emit_noop_render(queue->noop_job); + v3dv_X(device, job_emit_noop)(queue->noop_job); return VK_SUCCESS; } @@ -1060,7 +999,7 @@ spawn_master_wait_thread(struct v3dv_queue *queue, mtx_lock(&queue->mutex); if (pthread_create(&wait_info->master_wait_thread, NULL, master_wait_thread_func, wait_info)) { - result = vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST); + result = vk_error(queue, VK_ERROR_DEVICE_LOST); goto done; } @@ -1071,7 +1010,7 @@ done: return result; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_QueueSubmit(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo* pSubmits, @@ -1106,7 +1045,15 @@ done: return result; } -VkResult +static void +destroy_syncobj(uint32_t device_fd, uint32_t *sync) +{ + assert(sync); + drmSyncobjDestroy(device_fd, *sync); + *sync = 0; +} + +VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateSemaphore(VkDevice _device, const VkSemaphoreCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, @@ -1120,14 +1067,12 @@ v3dv_CreateSemaphore(VkDevice _device, vk_object_zalloc(&device->vk, pAllocator, sizeof(struct v3dv_semaphore), VK_OBJECT_TYPE_SEMAPHORE); if (sem == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - sem->fd = -1; + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); int ret = drmSyncobjCreate(device->pdevice->render_fd, 0, &sem->sync); if (ret) { vk_object_free(&device->vk, pAllocator, sem); - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); } *pSemaphore = v3dv_semaphore_to_handle(sem); @@ -1135,7 +1080,158 @@ v3dv_CreateSemaphore(VkDevice _device, return VK_SUCCESS; } -void +VKAPI_ATTR void VKAPI_CALL +v3dv_GetPhysicalDeviceExternalSemaphoreProperties( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo, + VkExternalSemaphoreProperties *pExternalSemaphoreProperties) +{ + switch (pExternalSemaphoreInfo->handleType) { + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: + pExternalSemaphoreProperties->exportFromImportedHandleTypes = + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; + pExternalSemaphoreProperties->compatibleHandleTypes = + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; + + /* FIXME: we can't import external semaphores until we improve the kernel + * submit interface to handle multiple in syncobjs, because once we have + * an imported semaphore in our list of semaphores to wait on, we can no + * longer use the workaround of waiting on the last syncobj fence produced + * from the device, since the imported semaphore may not (and in fact, it + * would typically not) have been produced from same device. + * + * This behavior is exercised via dEQP-VK.synchronization.cross_instance.*. + * Particularly, this test: + * dEQP-VK.synchronization.cross_instance.dedicated. + * write_ssbo_compute_read_vertex_input.buffer_16384_binary_semaphore_fd + * fails consistently because of this, so it'll be a good reference to + * verify the implementation when the kernel bits are in place. + */ + pExternalSemaphoreProperties->externalSemaphoreFeatures = 0; + + /* FIXME: See comment in GetPhysicalDeviceExternalFenceProperties + * for details on why we can't export to SYNC_FD. + */ + if (pExternalSemaphoreInfo->handleType != + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) { + pExternalSemaphoreProperties->externalSemaphoreFeatures |= + VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT; + } + break; + default: + pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0; + pExternalSemaphoreProperties->compatibleHandleTypes = 0; + pExternalSemaphoreProperties->externalSemaphoreFeatures = 0; + break; + } +} + +VKAPI_ATTR VkResult VKAPI_CALL +v3dv_ImportSemaphoreFdKHR( + VkDevice _device, + const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + V3DV_FROM_HANDLE(v3dv_semaphore, sem, pImportSemaphoreFdInfo->semaphore); + + assert(pImportSemaphoreFdInfo->sType == + VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR); + + int fd = pImportSemaphoreFdInfo->fd; + int render_fd = device->pdevice->render_fd; + + bool is_temporary = + pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT || + (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT); + + uint32_t new_sync; + switch (pImportSemaphoreFdInfo->handleType) { + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: { + /* "If handleType is VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, the + * special value -1 for fd is treated like a valid sync file descriptor + * referring to an object that has already signaled. The import + * operation will succeed and the VkSemaphore will have a temporarily + * imported payload as if a valid file descriptor had been provided." + */ + unsigned flags = fd == -1 ? DRM_SYNCOBJ_CREATE_SIGNALED : 0; + if (drmSyncobjCreate(render_fd, flags, &new_sync)) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + if (fd != -1) { + if (drmSyncobjImportSyncFile(render_fd, new_sync, fd)) { + drmSyncobjDestroy(render_fd, new_sync); + return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); + } + } + break; + } + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: { + if (drmSyncobjFDToHandle(render_fd, fd, &new_sync)) + return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); + break; + } + default: + return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); + } + + destroy_syncobj(render_fd, &sem->temp_sync); + if (is_temporary) { + sem->temp_sync = new_sync; + } else { + destroy_syncobj(render_fd, &sem->sync); + sem->sync = new_sync; + } + + /* From the Vulkan 1.0.53 spec: + * + * "Importing a semaphore payload from a file descriptor transfers + * ownership of the file descriptor from the application to the + * Vulkan implementation. The application must not perform any + * operations on the file descriptor after a successful import." + * + * If the import fails, we leave the file descriptor open. + */ + if (fd != -1) + close(fd); + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +v3dv_GetSemaphoreFdKHR(VkDevice _device, + const VkSemaphoreGetFdInfoKHR *pGetFdInfo, + int *pFd) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + V3DV_FROM_HANDLE(v3dv_semaphore, sem, pGetFdInfo->semaphore); + + assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR); + + *pFd = -1; + int render_fd = device->pdevice->render_fd; + switch (pGetFdInfo->handleType) { + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: { + drmSyncobjExportSyncFile(render_fd, sem->sync, pFd); + if (*pFd == -1) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + break; + case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: + drmSyncobjHandleToFD(render_fd, sem->sync, pFd); + if (*pFd == -1) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + break; + } + default: + unreachable("Unsupported external semaphore handle type"); + } + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL v3dv_DestroySemaphore(VkDevice _device, VkSemaphore semaphore, const VkAllocationCallbacks *pAllocator) @@ -1146,15 +1242,13 @@ v3dv_DestroySemaphore(VkDevice _device, if (sem == NULL) return; - drmSyncobjDestroy(device->pdevice->render_fd, sem->sync); - - if (sem->fd != -1) - close(sem->fd); + destroy_syncobj(device->pdevice->render_fd, &sem->sync); + destroy_syncobj(device->pdevice->render_fd, &sem->temp_sync); vk_object_free(&device->vk, pAllocator, sem); } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateFence(VkDevice _device, const VkFenceCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, @@ -1168,7 +1262,7 @@ v3dv_CreateFence(VkDevice _device, vk_object_zalloc(&device->vk, pAllocator, sizeof(struct v3dv_fence), VK_OBJECT_TYPE_FENCE); if (fence == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); unsigned flags = 0; if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) @@ -1176,17 +1270,136 @@ v3dv_CreateFence(VkDevice _device, int ret = drmSyncobjCreate(device->pdevice->render_fd, flags, &fence->sync); if (ret) { vk_object_free(&device->vk, pAllocator, fence); - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); } - fence->fd = -1; - *pFence = v3dv_fence_to_handle(fence); return VK_SUCCESS; } -void +VKAPI_ATTR void VKAPI_CALL +v3dv_GetPhysicalDeviceExternalFenceProperties( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo, + VkExternalFenceProperties *pExternalFenceProperties) + +{ + switch (pExternalFenceInfo->handleType) { + case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: + case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: + pExternalFenceProperties->exportFromImportedHandleTypes = + VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | + VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT; + pExternalFenceProperties->compatibleHandleTypes = + VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | + VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT; + pExternalFenceProperties->externalFenceFeatures = + VK_EXTERNAL_FENCE_FEATURE_IMPORTABLE_BIT; + + /* FIXME: SYNC_FD exports the actual fence referenced by the syncobj, not + * the syncobj itself, and that fence is only created after we have + * submitted to the kernel and updated the syncobj for the fence to import + * the actual DRM fence created with the submission. Unfortunately, if the + * queue submission has a 'wait for events' we may hold any jobs after the + * wait in a user-space thread until the events are signaled, and in that + * case we don't update the out fence of the submit until the events are + * signaled and we can submit all the jobs involved with the vkQueueSubmit + * call. This means that if the applications submits with an out fence and + * a wait for events, trying to export the out fence to a SYNC_FD rigth + * after the submission and before the events are signaled will fail, + * because the actual DRM fence won't exist yet. This is not a problem + * with OPAQUE_FD because in this case we export the entire syncobj, not + * the underlying DRM fence. To fix this we need to rework our kernel + * interface to be more flexible and accept multiple in/out syncobjs so + * we can implement event waits as regular fence waits on the kernel side, + * until then, we can only reliably export OPAQUE_FD. + */ + if (pExternalFenceInfo->handleType != + VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT) { + pExternalFenceProperties->externalFenceFeatures |= + VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT; + } + break; + default: + pExternalFenceProperties->exportFromImportedHandleTypes = 0; + pExternalFenceProperties->compatibleHandleTypes = 0; + pExternalFenceProperties->externalFenceFeatures = 0; + break; + } +} + +VKAPI_ATTR VkResult VKAPI_CALL +v3dv_ImportFenceFdKHR(VkDevice _device, + const VkImportFenceFdInfoKHR *pImportFenceFdInfo) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + V3DV_FROM_HANDLE(v3dv_fence, fence, pImportFenceFdInfo->fence); + + assert(pImportFenceFdInfo->sType == + VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR); + + int fd = pImportFenceFdInfo->fd; + int render_fd = device->pdevice->render_fd; + + bool is_temporary = + pImportFenceFdInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT || + (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT); + + uint32_t new_sync; + switch (pImportFenceFdInfo->handleType) { + case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: { + /* "If handleType is VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, the + * special value -1 for fd is treated like a valid sync file descriptor + * referring to an object that has already signaled. The import + * operation will succeed and the VkFence will have a temporarily + * imported payload as if a valid file descriptor had been provided." + */ + unsigned flags = fd == -1 ? DRM_SYNCOBJ_CREATE_SIGNALED : 0; + if (drmSyncobjCreate(render_fd, flags, &new_sync)) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + if (fd != -1) { + if (drmSyncobjImportSyncFile(render_fd, new_sync, fd)) { + drmSyncobjDestroy(render_fd, new_sync); + return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); + } + } + break; + } + case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: { + if (drmSyncobjFDToHandle(render_fd, fd, &new_sync)) + return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); + break; + } + default: + return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); + } + + destroy_syncobj(render_fd, &fence->temp_sync); + if (is_temporary) { + fence->temp_sync = new_sync; + } else { + destroy_syncobj(render_fd, &fence->sync); + fence->sync = new_sync; + } + + /* From the Vulkan 1.0.53 spec: + * + * "Importing a fence payload from a file descriptor transfers + * ownership of the file descriptor from the application to the + * Vulkan implementation. The application must not perform any + * operations on the file descriptor after a successful import." + * + * If the import fails, we leave the file descriptor open. + */ + if (fd != -1) + close(fd); + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL v3dv_DestroyFence(VkDevice _device, VkFence _fence, const VkAllocationCallbacks *pAllocator) @@ -1197,15 +1410,13 @@ v3dv_DestroyFence(VkDevice _device, if (fence == NULL) return; - drmSyncobjDestroy(device->pdevice->render_fd, fence->sync); - - if (fence->fd != -1) - close(fence->fd); + destroy_syncobj(device->pdevice->render_fd, &fence->sync); + destroy_syncobj(device->pdevice->render_fd, &fence->temp_sync); vk_object_free(&device->vk, pAllocator, fence); } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_GetFenceStatus(VkDevice _device, VkFence _fence) { V3DV_FROM_HANDLE(v3dv_device, device, _device); @@ -1216,11 +1427,42 @@ v3dv_GetFenceStatus(VkDevice _device, VkFence _fence) if (ret == -ETIME) return VK_NOT_READY; else if (ret) - return vk_error(device->instance, VK_ERROR_DEVICE_LOST); + return vk_error(device, VK_ERROR_DEVICE_LOST); + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +v3dv_GetFenceFdKHR(VkDevice _device, + const VkFenceGetFdInfoKHR *pGetFdInfo, + int *pFd) +{ + V3DV_FROM_HANDLE(v3dv_device, device, _device); + V3DV_FROM_HANDLE(v3dv_fence, fence, pGetFdInfo->fence); + + assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_FENCE_GET_FD_INFO_KHR); + + *pFd = -1; + int render_fd = device->pdevice->render_fd; + switch (pGetFdInfo->handleType) { + case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: { + drmSyncobjExportSyncFile(render_fd, fence->sync, pFd); + if (*pFd == -1) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + break; + case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: + drmSyncobjHandleToFD(render_fd, fence->sync, pFd); + if (*pFd == -1) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + break; + } + default: + unreachable("Unsupported external fence handle type"); + } + return VK_SUCCESS; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences) { V3DV_FROM_HANDLE(v3dv_device, device, _device); @@ -1229,23 +1471,41 @@ v3dv_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences) sizeof(*syncobjs) * fenceCount, 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); if (!syncobjs) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + int render_fd = device->pdevice->render_fd; + uint32_t reset_count = 0; for (uint32_t i = 0; i < fenceCount; i++) { struct v3dv_fence *fence = v3dv_fence_from_handle(pFences[i]); - syncobjs[i] = fence->sync; + /* From the Vulkan spec, section 'Importing Fence Payloads': + * + * "If the import is temporary, the fence will be restored to its + * permanent state the next time that fence is passed to + * vkResetFences. + * + * Note: Restoring a fence to its prior permanent payload is a + * distinct operation from resetting a fence payload." + * + * To restore the previous state, we just need to destroy the temporary. + */ + if (fence->temp_sync) + destroy_syncobj(render_fd, &fence->temp_sync); + else + syncobjs[reset_count++] = fence->sync; } - int ret = drmSyncobjReset(device->pdevice->render_fd, syncobjs, fenceCount); + int ret = 0; + if (reset_count > 0) + ret = drmSyncobjReset(render_fd, syncobjs, reset_count); vk_free(&device->vk.alloc, syncobjs); if (ret) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); return VK_SUCCESS; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_WaitForFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences, @@ -1260,11 +1520,11 @@ v3dv_WaitForFences(VkDevice _device, sizeof(*syncobjs) * fenceCount, 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); if (!syncobjs) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); for (uint32_t i = 0; i < fenceCount; i++) { struct v3dv_fence *fence = v3dv_fence_from_handle(pFences[i]); - syncobjs[i] = fence->sync; + syncobjs[i] = fence->temp_sync ? fence->temp_sync : fence->sync; } unsigned flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT; @@ -1282,16 +1542,16 @@ v3dv_WaitForFences(VkDevice _device, if (ret == -ETIME) return VK_TIMEOUT; else if (ret) - return vk_error(device->instance, VK_ERROR_DEVICE_LOST); + return vk_error(device, VK_ERROR_DEVICE_LOST); return VK_SUCCESS; } -VkResult +VKAPI_ATTR VkResult VKAPI_CALL v3dv_QueueBindSparse(VkQueue _queue, uint32_t bindInfoCount, const VkBindSparseInfo *pBindInfo, VkFence fence) { V3DV_FROM_HANDLE(v3dv_queue, queue, _queue); - return vk_error(queue->device->instance, VK_ERROR_FEATURE_NOT_PRESENT); + return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT); } diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_uniforms.c b/lib/mesa/src/broadcom/vulkan/v3dv_uniforms.c index 8dd085862..47bc3a0b1 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_uniforms.c +++ b/lib/mesa/src/broadcom/vulkan/v3dv_uniforms.c @@ -28,6 +28,52 @@ #include "v3dv_private.h" #include "vk_format_info.h" +/* The only version specific structure that we need is + * TMU_CONFIG_PARAMETER_1. This didn't seem to change significantly from + * previous V3D versions and we don't expect that to change, so for now let's + * just hardcode the V3D version here. + */ +#define V3D_VERSION 41 +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" + +/* Our Vulkan resource indices represent indices in descriptor maps which + * include all shader stages, so we need to size the arrays below + * accordingly. For now we only support a maximum of 3 stages: VS, GS, FS. + */ +#define MAX_STAGES 3 + +#define MAX_TOTAL_TEXTURE_SAMPLERS (V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES) +struct texture_bo_list { + struct v3dv_bo *tex[MAX_TOTAL_TEXTURE_SAMPLERS]; +}; + +/* This tracks state BOs for both textures and samplers, so we + * multiply by 2. + */ +#define MAX_TOTAL_STATES (2 * V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES) +struct state_bo_list { + uint32_t count; + struct v3dv_bo *states[MAX_TOTAL_STATES]; +}; + +#define MAX_TOTAL_UNIFORM_BUFFERS (1 + MAX_UNIFORM_BUFFERS * MAX_STAGES) +#define MAX_TOTAL_STORAGE_BUFFERS (MAX_STORAGE_BUFFERS * MAX_STAGES) +struct buffer_bo_list { + struct v3dv_bo *ubo[MAX_TOTAL_UNIFORM_BUFFERS]; + struct v3dv_bo *ssbo[MAX_TOTAL_STORAGE_BUFFERS]; +}; + +static bool +state_bo_in_list(struct state_bo_list *list, struct v3dv_bo *bo) +{ + for (int i = 0; i < list->count; i++) { + if (list->states[i] == bo) + return true; + } + return false; +} + /* * This method checks if the ubo used for push constants is needed to be * updated or not. @@ -87,43 +133,56 @@ check_push_constants_ubo(struct v3dv_cmd_buffer *cmd_buffer, static void write_tmu_p0(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_pipeline *pipeline, + enum broadcom_shader_stage stage, struct v3dv_cl_out **uniforms, - uint32_t data) + uint32_t data, + struct texture_bo_list *tex_bos, + struct state_bo_list *state_bos) { uint32_t texture_idx = v3d_unit_data_get_unit(data); - struct v3dv_job *job = cmd_buffer->state.job; + struct v3dv_descriptor_state *descriptor_state = v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline); /* We need to ensure that the texture bo is added to the job */ struct v3dv_bo *texture_bo = v3dv_descriptor_map_get_texture_bo(descriptor_state, - &pipeline->shared_data->texture_map, + &pipeline->shared_data->maps[stage]->texture_map, pipeline->layout, texture_idx); assert(texture_bo); - v3dv_job_add_bo(job, texture_bo); + assert(texture_idx < V3D_MAX_TEXTURE_SAMPLERS); + tex_bos->tex[texture_idx] = texture_bo; struct v3dv_cl_reloc state_reloc = - v3dv_descriptor_map_get_texture_shader_state(descriptor_state, - &pipeline->shared_data->texture_map, + v3dv_descriptor_map_get_texture_shader_state(cmd_buffer->device, descriptor_state, + &pipeline->shared_data->maps[stage]->texture_map, pipeline->layout, texture_idx); - cl_aligned_reloc(&job->indirect, uniforms, - state_reloc.bo, - state_reloc.offset + - v3d_unit_data_get_offset(data)); + cl_aligned_u32(uniforms, state_reloc.bo->offset + + state_reloc.offset + + v3d_unit_data_get_offset(data)); + + /* Texture and Sampler states are typically suballocated, so they are + * usually the same BO: only flag them once to avoid trying to add them + * multiple times to the job later. + */ + if (!state_bo_in_list(state_bos, state_reloc.bo)) { + assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS); + state_bos->states[state_bos->count++] = state_reloc.bo; + } } /** V3D 4.x TMU configuration parameter 1 (sampler) */ static void write_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_pipeline *pipeline, + enum broadcom_shader_stage stage, struct v3dv_cl_out **uniforms, - uint32_t data) + uint32_t data, + struct state_bo_list *state_bos) { uint32_t sampler_idx = v3d_unit_data_get_unit(data); - struct v3dv_job *job = cmd_buffer->state.job; struct v3dv_descriptor_state *descriptor_state = v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline); @@ -131,13 +190,13 @@ write_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer, sampler_idx != V3DV_NO_SAMPLER_32BIT_IDX); struct v3dv_cl_reloc sampler_state_reloc = - v3dv_descriptor_map_get_sampler_state(descriptor_state, - &pipeline->shared_data->sampler_map, + v3dv_descriptor_map_get_sampler_state(cmd_buffer->device, descriptor_state, + &pipeline->shared_data->maps[stage]->sampler_map, pipeline->layout, sampler_idx); const struct v3dv_sampler *sampler = v3dv_descriptor_map_get_sampler(descriptor_state, - &pipeline->shared_data->sampler_map, + &pipeline->shared_data->maps[stage]->sampler_map, pipeline->layout, sampler_idx); assert(sampler); @@ -151,26 +210,36 @@ write_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer, &p1_unpacked); } - cl_aligned_reloc(&job->indirect, uniforms, - sampler_state_reloc.bo, - sampler_state_reloc.offset + - p1_packed); + cl_aligned_u32(uniforms, sampler_state_reloc.bo->offset + + sampler_state_reloc.offset + + p1_packed); + + /* Texture and Sampler states are typically suballocated, so they are + * usually the same BO: only flag them once to avoid trying to add them + * multiple times to the job later. + */ + if (!state_bo_in_list(state_bos, sampler_state_reloc.bo)) { + assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS); + state_bos->states[state_bos->count++] = sampler_state_reloc.bo; + } } static void write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_pipeline *pipeline, + enum broadcom_shader_stage stage, struct v3dv_cl_out **uniforms, enum quniform_contents content, - uint32_t data) + uint32_t data, + struct buffer_bo_list *buffer_bos) { - struct v3dv_job *job = cmd_buffer->state.job; struct v3dv_descriptor_state *descriptor_state = v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline); struct v3dv_descriptor_map *map = content == QUNIFORM_UBO_ADDR || content == QUNIFORM_GET_UBO_SIZE ? - &pipeline->shared_data->ubo_map : &pipeline->shared_data->ssbo_map; + &pipeline->shared_data->maps[stage]->ubo_map : + &pipeline->shared_data->maps[stage]->ssbo_map; uint32_t offset = content == QUNIFORM_UBO_ADDR ? @@ -193,10 +262,10 @@ write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer, &cmd_buffer->push_constants_resource; assert(resource->bo); - cl_aligned_reloc(&job->indirect, uniforms, - resource->bo, - resource->offset + offset + dynamic_offset); - + cl_aligned_u32(uniforms, resource->bo->offset + + resource->offset + + offset + dynamic_offset); + buffer_bos->ubo[0] = resource->bo; } else { uint32_t index = content == QUNIFORM_UBO_ADDR ? @@ -216,10 +285,18 @@ write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer, content == QUNIFORM_GET_UBO_SIZE) { cl_aligned_u32(uniforms, descriptor->range); } else { - cl_aligned_reloc(&job->indirect, uniforms, - descriptor->buffer->mem->bo, - descriptor->buffer->mem_offset + - descriptor->offset + offset + dynamic_offset); + cl_aligned_u32(uniforms, descriptor->buffer->mem->bo->offset + + descriptor->buffer->mem_offset + + descriptor->offset + + offset + dynamic_offset); + + if (content == QUNIFORM_UBO_ADDR) { + assert(index + 1 < MAX_TOTAL_UNIFORM_BUFFERS); + buffer_bos->ubo[index + 1] = descriptor->buffer->mem->bo; + } else { + assert(index < MAX_TOTAL_STORAGE_BUFFERS); + buffer_bos->ssbo[index] = descriptor->buffer->mem->bo; + } } } } @@ -235,26 +312,26 @@ get_texture_size_from_image_view(struct v3dv_image_view *image_view, /* We don't u_minify the values, as we are using the image_view * extents */ - return image_view->extent.width; + return image_view->vk.extent.width; case QUNIFORM_IMAGE_HEIGHT: case QUNIFORM_TEXTURE_HEIGHT: - return image_view->extent.height; + return image_view->vk.extent.height; case QUNIFORM_IMAGE_DEPTH: case QUNIFORM_TEXTURE_DEPTH: - return image_view->extent.depth; + return image_view->vk.extent.depth; case QUNIFORM_IMAGE_ARRAY_SIZE: case QUNIFORM_TEXTURE_ARRAY_SIZE: - if (image_view->type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) { - return image_view->last_layer - image_view->first_layer + 1; + if (image_view->vk.view_type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) { + return image_view->vk.layer_count; } else { - assert((image_view->last_layer - image_view->first_layer + 1) % 6 == 0); - return (image_view->last_layer - image_view->first_layer + 1) / 6; + assert(image_view->vk.layer_count % 6 == 0); + return image_view->vk.layer_count / 6; } case QUNIFORM_TEXTURE_LEVELS: - return image_view->max_level - image_view->base_level + 1; + return image_view->vk.level_count; case QUNIFORM_TEXTURE_SAMPLES: - assert(image_view->image); - return image_view->image->samples; + assert(image_view->vk.image); + return image_view->vk.image->samples; default: unreachable("Bad texture size field"); } @@ -279,16 +356,18 @@ get_texture_size_from_buffer_view(struct v3dv_buffer_view *buffer_view, static uint32_t get_texture_size(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_pipeline *pipeline, + enum broadcom_shader_stage stage, enum quniform_contents contents, uint32_t data) { - uint32_t texture_idx = v3d_unit_data_get_unit(data); + uint32_t texture_idx = data; + struct v3dv_descriptor_state *descriptor_state = v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline); struct v3dv_descriptor *descriptor = v3dv_descriptor_map_get_descriptor(descriptor_state, - &pipeline->shared_data->texture_map, + &pipeline->shared_data->maps[stage]->texture_map, pipeline->layout, texture_idx, NULL); @@ -322,6 +401,11 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job = cmd_buffer->state.job; assert(job); + assert(job->cmd_buffer == cmd_buffer); + + struct texture_bo_list tex_bos = { 0 }; + struct state_bo_list state_bos = { 0 }; + struct buffer_bo_list buffer_bos = { 0 }; /* The hardware always pre-fetches the next uniform (also when there * aren't any), so we always allocate space for an extra slot. This @@ -369,17 +453,20 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer, case QUNIFORM_UBO_ADDR: case QUNIFORM_GET_SSBO_SIZE: case QUNIFORM_GET_UBO_SIZE: - write_ubo_ssbo_uniforms(cmd_buffer, pipeline, &uniforms, - uinfo->contents[i], data); + write_ubo_ssbo_uniforms(cmd_buffer, pipeline, variant->stage, &uniforms, + uinfo->contents[i], data, &buffer_bos); + break; case QUNIFORM_IMAGE_TMU_CONFIG_P0: case QUNIFORM_TMU_CONFIG_P0: - write_tmu_p0(cmd_buffer, pipeline, &uniforms, data); + write_tmu_p0(cmd_buffer, pipeline, variant->stage, + &uniforms, data, &tex_bos, &state_bos); break; case QUNIFORM_TMU_CONFIG_P1: - write_tmu_p1(cmd_buffer, pipeline, &uniforms, data); + write_tmu_p1(cmd_buffer, pipeline, variant->stage, + &uniforms, data, &state_bos); break; case QUNIFORM_IMAGE_WIDTH: @@ -395,10 +482,66 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer, cl_aligned_u32(&uniforms, get_texture_size(cmd_buffer, pipeline, + variant->stage, uinfo->contents[i], data)); break; + /* We generate this from geometry shaders to cap the generated gl_Layer + * to be within the number of layers of the framebuffer so we prevent the + * binner from trying to access tile state memory out of bounds (for + * layers that don't exist). + * + * Unfortunately, for secondary command buffers we may not know the + * number of layers in the framebuffer at this stage. Since we are + * only using this to sanitize the shader and it should not have any + * impact on correct shaders that emit valid values for gl_Layer, + * we just work around it by using the largest number of layers we + * support. + * + * FIXME: we could do better than this by recording in the job that + * the value at this uniform offset is not correct, and patch it when + * we execute the secondary command buffer into a primary, since we do + * have the correct number of layers at that point, but again, since this + * is only for sanityzing the shader and it only affects the specific case + * of secondary command buffers without framebuffer info available it + * might not be worth the trouble. + * + * With multiview the number of layers is dictated by the view mask + * and not by the framebuffer layers. We do set the job's frame tiling + * information correctly from the view mask in that case, however, + * secondary command buffers may not have valid frame tiling data, + * so when multiview is enabled, we always set the number of layers + * from the subpass view mask. + */ + case QUNIFORM_FB_LAYERS: { + const struct v3dv_cmd_buffer_state *state = &job->cmd_buffer->state; + const uint32_t view_mask = + state->pass->subpasses[state->subpass_idx].view_mask; + + uint32_t num_layers; + if (view_mask != 0) { + num_layers = util_last_bit(view_mask); + } else if (job->frame_tiling.layers != 0) { + num_layers = job->frame_tiling.layers; + } else if (cmd_buffer->state.framebuffer) { + num_layers = cmd_buffer->state.framebuffer->layers; + } else { + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); + num_layers = 2048; +#if DEBUG + fprintf(stderr, "Skipping gl_LayerID shader sanity check for " + "secondary command buffer\n"); +#endif + } + cl_aligned_u32(&uniforms, num_layers); + break; + } + + case QUNIFORM_VIEW_INDEX: + cl_aligned_u32(&uniforms, job->cmd_buffer->state.view_index); + break; + case QUNIFORM_NUM_WORK_GROUPS: assert(job->type == V3DV_JOB_TYPE_GPU_CSD); assert(job->csd.wg_count[data] > 0); @@ -407,15 +550,20 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer, cl_aligned_u32(&uniforms, job->csd.wg_count[data]); break; + case QUNIFORM_WORK_GROUP_BASE: + assert(job->type == V3DV_JOB_TYPE_GPU_CSD); + cl_aligned_u32(&uniforms, job->csd.wg_base[data]); + break; + case QUNIFORM_SHARED_OFFSET: assert(job->type == V3DV_JOB_TYPE_GPU_CSD); assert(job->csd.shared_memory); - cl_aligned_reloc(&job->indirect, &uniforms, job->csd.shared_memory, 0); + cl_aligned_u32(&uniforms, job->csd.shared_memory->offset); break; case QUNIFORM_SPILL_OFFSET: assert(pipeline->spill.bo); - cl_aligned_reloc(&job->indirect, &uniforms, pipeline->spill.bo, 0); + cl_aligned_u32(&uniforms, pipeline->spill.bo->offset); break; case QUNIFORM_SPILL_SIZE_PER_THREAD: @@ -430,6 +578,30 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer, cl_end(&job->indirect, uniforms); + for (int i = 0; i < MAX_TOTAL_TEXTURE_SAMPLERS; i++) { + if (tex_bos.tex[i]) + v3dv_job_add_bo(job, tex_bos.tex[i]); + } + + for (int i = 0; i < state_bos.count; i++) + v3dv_job_add_bo(job, state_bos.states[i]); + + for (int i = 0; i < MAX_TOTAL_UNIFORM_BUFFERS; i++) { + if (buffer_bos.ubo[i]) + v3dv_job_add_bo(job, buffer_bos.ubo[i]); + } + + for (int i = 0; i < MAX_TOTAL_STORAGE_BUFFERS; i++) { + if (buffer_bos.ssbo[i]) + v3dv_job_add_bo(job, buffer_bos.ssbo[i]); + } + + if (job->csd.shared_memory) + v3dv_job_add_bo(job, job->csd.shared_memory); + + if (pipeline->spill.bo) + v3dv_job_add_bo(job, pipeline->spill.bo); + return uniform_stream; } diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_wsi.c b/lib/mesa/src/broadcom/vulkan/v3dv_wsi.c index 25bb4636a..154adf3a7 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_wsi.c +++ b/lib/mesa/src/broadcom/vulkan/v3dv_wsi.c @@ -25,11 +25,12 @@ #include "v3dv_private.h" #include "drm-uapi/drm_fourcc.h" +#include "wsi_common_entrypoints.h" #include "vk_format_info.h" #include "vk_util.h" #include "wsi_common.h" -static PFN_vkVoidFunction +static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL v3dv_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName) { V3DV_FROM_HANDLE(v3dv_physical_device, pdevice, physicalDevice); @@ -46,6 +47,31 @@ v3dv_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName) return vk_device_dispatch_table_get(&vk_device_trampolines, pName); } +static bool +v3dv_wsi_can_present_on_device(VkPhysicalDevice _pdevice, int fd) +{ + V3DV_FROM_HANDLE(v3dv_physical_device, pdevice, _pdevice); + + drmDevicePtr fd_devinfo, display_devinfo; + int ret; + + ret = drmGetDevice2(fd, 0, &fd_devinfo); + if (ret) + return false; + + ret = drmGetDevice2(pdevice->display_fd, 0, &display_devinfo); + if (ret) { + drmFreeDevice(&fd_devinfo); + return false; + } + + bool result = drmDevicesEqual(fd_devinfo, display_devinfo); + + drmFreeDevice(&fd_devinfo); + drmFreeDevice(&display_devinfo); + return result; +} + VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device) { @@ -61,6 +87,10 @@ v3dv_wsi_init(struct v3dv_physical_device *physical_device) return result; physical_device->wsi_device.supports_modifiers = true; + physical_device->wsi_device.can_present_on_device = + v3dv_wsi_can_present_on_device; + + physical_device->vk.wsi_device = &physical_device->wsi_device; return VK_SUCCESS; } @@ -68,38 +98,11 @@ v3dv_wsi_init(struct v3dv_physical_device *physical_device) void v3dv_wsi_finish(struct v3dv_physical_device *physical_device) { + physical_device->vk.wsi_device = NULL; wsi_device_finish(&physical_device->wsi_device, &physical_device->vk.instance->alloc); } -void v3dv_DestroySurfaceKHR( - VkInstance _instance, - VkSurfaceKHR _surface, - const VkAllocationCallbacks* pAllocator) -{ - V3DV_FROM_HANDLE(v3dv_instance, instance, _instance); - ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface); - - if (!surface) - return; - - vk_free2(&instance->vk.alloc, pAllocator, surface); -} - -VkResult v3dv_GetPhysicalDeviceSurfaceSupportKHR( - VkPhysicalDevice physicalDevice, - uint32_t queueFamilyIndex, - VkSurfaceKHR surface, - VkBool32* pSupported) -{ - V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice); - - return wsi_common_get_surface_support(&device->wsi_device, - queueFamilyIndex, - surface, - pSupported); -} - static void constraint_surface_capabilities(VkSurfaceCapabilitiesKHR *caps) { @@ -114,74 +117,36 @@ constraint_surface_capabilities(VkSurfaceCapabilitiesKHR *caps) caps->supportedUsageFlags &= ~VK_IMAGE_USAGE_SAMPLED_BIT; } -VkResult v3dv_GetPhysicalDeviceSurfaceCapabilitiesKHR( +VKAPI_ATTR VkResult VKAPI_CALL +v3dv_GetPhysicalDeviceSurfaceCapabilitiesKHR( VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, VkSurfaceCapabilitiesKHR* pSurfaceCapabilities) { - V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice); - VkResult result; - result = wsi_common_get_surface_capabilities(&device->wsi_device, - surface, - pSurfaceCapabilities); + result = wsi_GetPhysicalDeviceSurfaceCapabilitiesKHR(physicalDevice, + surface, + pSurfaceCapabilities); constraint_surface_capabilities(pSurfaceCapabilities); return result; } -VkResult v3dv_GetPhysicalDeviceSurfaceCapabilities2KHR( +VKAPI_ATTR VkResult VKAPI_CALL +v3dv_GetPhysicalDeviceSurfaceCapabilities2KHR( VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo, VkSurfaceCapabilities2KHR* pSurfaceCapabilities) { - V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice); - VkResult result; - result = wsi_common_get_surface_capabilities2(&device->wsi_device, - pSurfaceInfo, - pSurfaceCapabilities); + result = wsi_GetPhysicalDeviceSurfaceCapabilities2KHR(physicalDevice, + pSurfaceInfo, + pSurfaceCapabilities); constraint_surface_capabilities(&pSurfaceCapabilities->surfaceCapabilities); return result; } -VkResult v3dv_GetPhysicalDeviceSurfaceFormatsKHR( - VkPhysicalDevice physicalDevice, - VkSurfaceKHR surface, - uint32_t* pSurfaceFormatCount, - VkSurfaceFormatKHR* pSurfaceFormats) -{ - V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice); - - return wsi_common_get_surface_formats(&device->wsi_device, surface, - pSurfaceFormatCount, pSurfaceFormats); -} - -VkResult v3dv_GetPhysicalDeviceSurfaceFormats2KHR( - VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo, - uint32_t* pSurfaceFormatCount, - VkSurfaceFormat2KHR* pSurfaceFormats) -{ - V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice); - - return wsi_common_get_surface_formats2(&device->wsi_device, pSurfaceInfo, - pSurfaceFormatCount, pSurfaceFormats); -} - -VkResult v3dv_GetPhysicalDeviceSurfacePresentModesKHR( - VkPhysicalDevice physicalDevice, - VkSurfaceKHR surface, - uint32_t* pPresentModeCount, - VkPresentModeKHR* pPresentModes) -{ - V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice); - - return wsi_common_get_surface_present_modes(&device->wsi_device, surface, - pPresentModeCount, - pPresentModes); -} - -VkResult v3dv_CreateSwapchainKHR( +VKAPI_ATTR VkResult VKAPI_CALL +v3dv_CreateSwapchainKHR( VkDevice _device, const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, @@ -190,7 +155,6 @@ VkResult v3dv_CreateSwapchainKHR( V3DV_FROM_HANDLE(v3dv_device, device, _device); struct v3dv_instance *instance = device->instance; struct v3dv_physical_device *pdevice = &instance->physicalDevice; - struct wsi_device *wsi_device = &pdevice->wsi_device; ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, pCreateInfo->surface); VkResult result = @@ -198,64 +162,29 @@ VkResult v3dv_CreateSwapchainKHR( if (result != VK_SUCCESS) return result; - const VkAllocationCallbacks *alloc; - if (pAllocator) - alloc = pAllocator; - else - alloc = &device->vk.alloc; - - return wsi_common_create_swapchain(wsi_device, _device, - pCreateInfo, alloc, pSwapchain); + return wsi_CreateSwapchainKHR(_device, pCreateInfo, pAllocator, pSwapchain); } -void v3dv_DestroySwapchainKHR( - VkDevice _device, - VkSwapchainKHR swapchain, - const VkAllocationCallbacks* pAllocator) +struct v3dv_image * +v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain, uint32_t index) { - V3DV_FROM_HANDLE(v3dv_device, device, _device); - const VkAllocationCallbacks *alloc; + uint32_t n_images = index + 1; + VkImage *images = malloc(sizeof(*images) * n_images); + VkResult result = wsi_common_get_images(swapchain, &n_images, images); - if (pAllocator) - alloc = pAllocator; - else - alloc = &device->vk.alloc; + if (result != VK_SUCCESS && result != VK_INCOMPLETE) { + free(images); + return NULL; + } - wsi_common_destroy_swapchain(_device, swapchain, alloc); -} + V3DV_FROM_HANDLE(v3dv_image, image, images[index]); + free(images); -VkResult v3dv_GetSwapchainImagesKHR( - VkDevice device, - VkSwapchainKHR swapchain, - uint32_t* pSwapchainImageCount, - VkImage* pSwapchainImages) -{ - return wsi_common_get_images(swapchain, - pSwapchainImageCount, - pSwapchainImages); + return image; } -VkResult v3dv_AcquireNextImageKHR( - VkDevice device, - VkSwapchainKHR swapchain, - uint64_t timeout, - VkSemaphore semaphore, - VkFence fence, - uint32_t* pImageIndex) -{ - VkAcquireNextImageInfoKHR acquire_info = { - .sType = VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR, - .swapchain = swapchain, - .timeout = timeout, - .semaphore = semaphore, - .fence = fence, - .deviceMask = 0, - }; - - return v3dv_AcquireNextImage2KHR(device, &acquire_info, pImageIndex); -} - -VkResult v3dv_AcquireNextImage2KHR( +VKAPI_ATTR VkResult VKAPI_CALL +v3dv_AcquireNextImage2KHR( VkDevice _device, const VkAcquireNextImageInfoKHR* pAcquireInfo, uint32_t* pImageIndex) @@ -279,52 +208,3 @@ VkResult v3dv_AcquireNextImage2KHR( return result; } - -VkResult v3dv_QueuePresentKHR( - VkQueue _queue, - const VkPresentInfoKHR* pPresentInfo) -{ - V3DV_FROM_HANDLE(v3dv_queue, queue, _queue); - struct v3dv_physical_device *pdevice = - &queue->device->instance->physicalDevice; - - return wsi_common_queue_present(&pdevice->wsi_device, - v3dv_device_to_handle(queue->device), - _queue, 0, - pPresentInfo); -} - -VkResult v3dv_GetDeviceGroupPresentCapabilitiesKHR( - VkDevice device, - VkDeviceGroupPresentCapabilitiesKHR* pCapabilities) -{ - memset(pCapabilities->presentMask, 0, - sizeof(pCapabilities->presentMask)); - pCapabilities->presentMask[0] = 0x1; - pCapabilities->modes = VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_BIT_KHR; - - return VK_SUCCESS; -} - -VkResult v3dv_GetDeviceGroupSurfacePresentModesKHR( - VkDevice device, - VkSurfaceKHR surface, - VkDeviceGroupPresentModeFlagsKHR* pModes) -{ - *pModes = VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_BIT_KHR; - - return VK_SUCCESS; -} - -VkResult v3dv_GetPhysicalDevicePresentRectanglesKHR( - VkPhysicalDevice physicalDevice, - VkSurfaceKHR surface, - uint32_t* pRectCount, - VkRect2D* pRects) -{ - V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice); - - return wsi_common_get_present_rectangles(&device->wsi_device, - surface, - pRectCount, pRects); -} diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c new file mode 100644 index 000000000..c2f2c7786 --- /dev/null +++ b/lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c @@ -0,0 +1,2281 @@ +/* + * Copyright © 2021 Raspberry Pi + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "v3dv_private.h" +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" +#include "broadcom/compiler/v3d_compiler.h" + +#include "util/half_float.h" +#include "vulkan/util/vk_format.h" +#include "util/u_pack_color.h" + +#include "vk_format_info.h" + +void +v3dX(job_emit_binning_flush)(struct v3dv_job *job) +{ + assert(job); + + v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(FLUSH)); + v3dv_return_if_oom(NULL, job); + + cl_emit(&job->bcl, FLUSH, flush); +} + +void +v3dX(job_emit_binning_prolog)(struct v3dv_job *job, + const struct v3dv_frame_tiling *tiling, + uint32_t layers) +{ + /* This must go before the binning mode configuration. It is + * required for layered framebuffers to work. + */ + cl_emit(&job->bcl, NUMBER_OF_LAYERS, config) { + config.number_of_layers = layers; + } + + cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) { + config.width_in_pixels = tiling->width; + config.height_in_pixels = tiling->height; + config.number_of_render_targets = MAX2(tiling->render_target_count, 1); + config.multisample_mode_4x = tiling->msaa; + config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; + } + + /* There's definitely nothing in the VCD cache we want. */ + cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin); + + /* "Binning mode lists must have a Start Tile Binning item (6) after + * any prefix state data before the binning list proper starts." + */ + cl_emit(&job->bcl, START_TILE_BINNING, bin); +} + +void +v3dX(cmd_buffer_end_render_pass_secondary)(struct v3dv_cmd_buffer *cmd_buffer) +{ + assert(cmd_buffer->state.job); + v3dv_cl_ensure_space_with_branch(&cmd_buffer->state.job->bcl, + cl_packet_length(RETURN_FROM_SUB_LIST)); + v3dv_return_if_oom(cmd_buffer, NULL); + cl_emit(&cmd_buffer->state.job->bcl, RETURN_FROM_SUB_LIST, ret); +} + +void +v3dX(job_emit_clip_window)(struct v3dv_job *job, const VkRect2D *rect) +{ + assert(job); + + v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CLIP_WINDOW)); + v3dv_return_if_oom(NULL, job); + + cl_emit(&job->bcl, CLIP_WINDOW, clip) { + clip.clip_window_left_pixel_coordinate = rect->offset.x; + clip.clip_window_bottom_pixel_coordinate = rect->offset.y; + clip.clip_window_width_in_pixels = rect->extent.width; + clip.clip_window_height_in_pixels = rect->extent.height; + } +} + +static void +cmd_buffer_render_pass_emit_load(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_cl *cl, + struct v3dv_image_view *iview, + uint32_t layer, + uint32_t buffer) +{ + const struct v3dv_image *image = (struct v3dv_image *) iview->vk.image; + const struct v3d_resource_slice *slice = + &image->slices[iview->vk.base_mip_level]; + uint32_t layer_offset = + v3dv_layer_offset(image, iview->vk.base_mip_level, + iview->vk.base_array_layer + layer); + + cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { + load.buffer_to_load = buffer; + load.address = v3dv_cl_address(image->mem->bo, layer_offset); + + load.input_image_format = iview->format->rt_type; + load.r_b_swap = iview->swap_rb; + load.memory_format = slice->tiling; + + if (slice->tiling == V3D_TILING_UIF_NO_XOR || + slice->tiling == V3D_TILING_UIF_XOR) { + load.height_in_ub_or_stride = + slice->padded_height_of_output_image_in_uif_blocks; + } else if (slice->tiling == V3D_TILING_RASTER) { + load.height_in_ub_or_stride = slice->stride; + } + + if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT) + load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; + else + load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; + } +} + +static bool +check_needs_load(const struct v3dv_cmd_buffer_state *state, + VkImageAspectFlags aspect, + uint32_t first_subpass_idx, + VkAttachmentLoadOp load_op) +{ + /* We call this with image->vk.aspects & aspect, so 0 means the aspect we are + * testing does not exist in the image. + */ + if (!aspect) + return false; + + /* Attachment (or view) load operations apply on the first subpass that + * uses the attachment (or view), otherwise we always need to load. + */ + if (state->job->first_subpass > first_subpass_idx) + return true; + + /* If the job is continuing a subpass started in another job, we always + * need to load. + */ + if (state->job->is_subpass_continue) + return true; + + /* If the area is not aligned to tile boundaries, we always need to load */ + if (!state->tile_aligned_render_area) + return true; + + /* The attachment load operations must be LOAD */ + return load_op == VK_ATTACHMENT_LOAD_OP_LOAD; +} + +static inline uint32_t +v3dv_zs_buffer(bool depth, bool stencil) +{ + if (depth && stencil) + return ZSTENCIL; + else if (depth) + return Z; + else if (stencil) + return STENCIL; + return NONE; +} + +static void +cmd_buffer_render_pass_emit_loads(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_cl *cl, + uint32_t layer) +{ + const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; + const struct v3dv_framebuffer *framebuffer = state->framebuffer; + const struct v3dv_render_pass *pass = state->pass; + const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx]; + + assert(!pass->multiview_enabled || layer < MAX_MULTIVIEW_VIEW_COUNT); + + for (uint32_t i = 0; i < subpass->color_count; i++) { + uint32_t attachment_idx = subpass->color_attachments[i].attachment; + + if (attachment_idx == VK_ATTACHMENT_UNUSED) + continue; + + const struct v3dv_render_pass_attachment *attachment = + &state->pass->attachments[attachment_idx]; + + /* According to the Vulkan spec: + * + * "The load operation for each sample in an attachment happens before + * any recorded command which accesses the sample in the first subpass + * where the attachment is used." + * + * If the load operation is CLEAR, we must only clear once on the first + * subpass that uses the attachment (and in that case we don't LOAD). + * After that, we always want to load so we don't lose any rendering done + * by a previous subpass to the same attachment. We also want to load + * if the current job is continuing subpass work started by a previous + * job, for the same reason. + * + * If the render area is not aligned to tile boundaries then we have + * tiles which are partially covered by it. In this case, we need to + * load the tiles so we can preserve the pixels that are outside the + * render area for any such tiles. + */ + uint32_t first_subpass = !pass->multiview_enabled ? + attachment->first_subpass : + attachment->views[layer].first_subpass; + + bool needs_load = check_needs_load(state, + VK_IMAGE_ASPECT_COLOR_BIT, + first_subpass, + attachment->desc.loadOp); + if (needs_load) { + struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx]; + cmd_buffer_render_pass_emit_load(cmd_buffer, cl, iview, + layer, RENDER_TARGET_0 + i); + } + } + + uint32_t ds_attachment_idx = subpass->ds_attachment.attachment; + if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { + const struct v3dv_render_pass_attachment *ds_attachment = + &state->pass->attachments[ds_attachment_idx]; + + const VkImageAspectFlags ds_aspects = + vk_format_aspects(ds_attachment->desc.format); + + uint32_t ds_first_subpass = !pass->multiview_enabled ? + ds_attachment->first_subpass : + ds_attachment->views[layer].first_subpass; + + const bool needs_depth_load = + check_needs_load(state, + ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, + ds_first_subpass, + ds_attachment->desc.loadOp); + + const bool needs_stencil_load = + check_needs_load(state, + ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT, + ds_first_subpass, + ds_attachment->desc.stencilLoadOp); + + if (needs_depth_load || needs_stencil_load) { + struct v3dv_image_view *iview = + framebuffer->attachments[ds_attachment_idx]; + /* From the Vulkan spec: + * + * "When an image view of a depth/stencil image is used as a + * depth/stencil framebuffer attachment, the aspectMask is ignored + * and both depth and stencil image subresources are used." + * + * So we ignore the aspects from the subresource range of the image + * view for the depth/stencil attachment, but we still need to restrict + * the to aspects compatible with the render pass and the image. + */ + const uint32_t zs_buffer = + v3dv_zs_buffer(needs_depth_load, needs_stencil_load); + cmd_buffer_render_pass_emit_load(cmd_buffer, cl, + iview, layer, zs_buffer); + } + } + + cl_emit(cl, END_OF_LOADS, end); +} + +static void +cmd_buffer_render_pass_emit_store(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_cl *cl, + uint32_t attachment_idx, + uint32_t layer, + uint32_t buffer, + bool clear, + bool is_multisample_resolve) +{ + const struct v3dv_image_view *iview = + cmd_buffer->state.framebuffer->attachments[attachment_idx]; + const struct v3dv_image *image = (struct v3dv_image *) iview->vk.image; + const struct v3d_resource_slice *slice = + &image->slices[iview->vk.base_mip_level]; + uint32_t layer_offset = v3dv_layer_offset(image, + iview->vk.base_mip_level, + iview->vk.base_array_layer + layer); + + cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = buffer; + store.address = v3dv_cl_address(image->mem->bo, layer_offset); + store.clear_buffer_being_stored = clear; + + store.output_image_format = iview->format->rt_type; + store.r_b_swap = iview->swap_rb; + store.memory_format = slice->tiling; + + if (slice->tiling == V3D_TILING_UIF_NO_XOR || + slice->tiling == V3D_TILING_UIF_XOR) { + store.height_in_ub_or_stride = + slice->padded_height_of_output_image_in_uif_blocks; + } else if (slice->tiling == V3D_TILING_RASTER) { + store.height_in_ub_or_stride = slice->stride; + } + + if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT) + store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; + else if (is_multisample_resolve) + store.decimate_mode = V3D_DECIMATE_MODE_4X; + else + store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; + } +} + +static bool +check_needs_clear(const struct v3dv_cmd_buffer_state *state, + VkImageAspectFlags aspect, + uint32_t first_subpass_idx, + VkAttachmentLoadOp load_op, + bool do_clear_with_draw) +{ + /* We call this with image->vk.aspects & aspect, so 0 means the aspect we are + * testing does not exist in the image. + */ + if (!aspect) + return false; + + /* If the aspect needs to be cleared with a draw call then we won't emit + * the clear here. + */ + if (do_clear_with_draw) + return false; + + /* If this is resuming a subpass started with another job, then attachment + * load operations don't apply. + */ + if (state->job->is_subpass_continue) + return false; + + /* If the render area is not aligned to tile boudaries we can't use the + * TLB for a clear. + */ + if (!state->tile_aligned_render_area) + return false; + + /* If this job is running in a subpass other than the first subpass in + * which this attachment (or view) is used then attachment load operations + * don't apply. + */ + if (state->job->first_subpass != first_subpass_idx) + return false; + + /* The attachment load operation must be CLEAR */ + return load_op == VK_ATTACHMENT_LOAD_OP_CLEAR; +} + +static bool +check_needs_store(const struct v3dv_cmd_buffer_state *state, + VkImageAspectFlags aspect, + uint32_t last_subpass_idx, + VkAttachmentStoreOp store_op) +{ + /* We call this with image->vk.aspects & aspect, so 0 means the aspect we are + * testing does not exist in the image. + */ + if (!aspect) + return false; + + /* Attachment (or view) store operations only apply on the last subpass + * where the attachment (or view) is used, in other subpasses we always + * need to store. + */ + if (state->subpass_idx < last_subpass_idx) + return true; + + /* Attachment store operations only apply on the last job we emit on the the + * last subpass where the attachment is used, otherwise we always need to + * store. + */ + if (!state->job->is_subpass_finish) + return true; + + /* The attachment store operation must be STORE */ + return store_op == VK_ATTACHMENT_STORE_OP_STORE; +} + +static void +cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_cl *cl, + uint32_t layer) +{ + struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; + struct v3dv_render_pass *pass = state->pass; + const struct v3dv_subpass *subpass = + &pass->subpasses[state->subpass_idx]; + + bool has_stores = false; + bool use_global_zs_clear = false; + bool use_global_rt_clear = false; + + assert(!pass->multiview_enabled || layer < MAX_MULTIVIEW_VIEW_COUNT); + + /* FIXME: separate stencil */ + uint32_t ds_attachment_idx = subpass->ds_attachment.attachment; + if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { + const struct v3dv_render_pass_attachment *ds_attachment = + &state->pass->attachments[ds_attachment_idx]; + + assert(state->job->first_subpass >= ds_attachment->first_subpass); + assert(state->subpass_idx >= ds_attachment->first_subpass); + assert(state->subpass_idx <= ds_attachment->last_subpass); + + /* From the Vulkan spec, VkImageSubresourceRange: + * + * "When an image view of a depth/stencil image is used as a + * depth/stencil framebuffer attachment, the aspectMask is ignored + * and both depth and stencil image subresources are used." + * + * So we ignore the aspects from the subresource range of the image + * view for the depth/stencil attachment, but we still need to restrict + * the to aspects compatible with the render pass and the image. + */ + const VkImageAspectFlags aspects = + vk_format_aspects(ds_attachment->desc.format); + + /* Only clear once on the first subpass that uses the attachment */ + uint32_t ds_first_subpass = !state->pass->multiview_enabled ? + ds_attachment->first_subpass : + ds_attachment->views[layer].first_subpass; + + bool needs_depth_clear = + check_needs_clear(state, + aspects & VK_IMAGE_ASPECT_DEPTH_BIT, + ds_first_subpass, + ds_attachment->desc.loadOp, + subpass->do_depth_clear_with_draw); + + bool needs_stencil_clear = + check_needs_clear(state, + aspects & VK_IMAGE_ASPECT_STENCIL_BIT, + ds_first_subpass, + ds_attachment->desc.stencilLoadOp, + subpass->do_stencil_clear_with_draw); + + /* Skip the last store if it is not required */ + uint32_t ds_last_subpass = !pass->multiview_enabled ? + ds_attachment->last_subpass : + ds_attachment->views[layer].last_subpass; + + bool needs_depth_store = + check_needs_store(state, + aspects & VK_IMAGE_ASPECT_DEPTH_BIT, + ds_last_subpass, + ds_attachment->desc.storeOp); + + bool needs_stencil_store = + check_needs_store(state, + aspects & VK_IMAGE_ASPECT_STENCIL_BIT, + ds_last_subpass, + ds_attachment->desc.stencilStoreOp); + + /* GFXH-1689: The per-buffer store command's clear buffer bit is broken + * for depth/stencil. + * + * There used to be some confusion regarding the Clear Tile Buffers + * Z/S bit also being broken, but we confirmed with Broadcom that this + * is not the case, it was just that some other hardware bugs (that we + * need to work around, such as GFXH-1461) could cause this bit to behave + * incorrectly. + * + * There used to be another issue where the RTs bit in the Clear Tile + * Buffers packet also cleared Z/S, but Broadcom confirmed this is + * fixed since V3D 4.1. + * + * So if we have to emit a clear of depth or stencil we don't use + * the per-buffer store clear bit, even if we need to store the buffers, + * instead we always have to use the Clear Tile Buffers Z/S bit. + * If we have configured the job to do early Z/S clearing, then we + * don't want to emit any Clear Tile Buffers command at all here. + * + * Note that GFXH-1689 is not reproduced in the simulator, where + * using the clear buffer bit in depth/stencil stores works fine. + */ + use_global_zs_clear = !state->job->early_zs_clear && + (needs_depth_clear || needs_stencil_clear); + if (needs_depth_store || needs_stencil_store) { + const uint32_t zs_buffer = + v3dv_zs_buffer(needs_depth_store, needs_stencil_store); + cmd_buffer_render_pass_emit_store(cmd_buffer, cl, + ds_attachment_idx, layer, + zs_buffer, false, false); + has_stores = true; + } + } + + for (uint32_t i = 0; i < subpass->color_count; i++) { + uint32_t attachment_idx = subpass->color_attachments[i].attachment; + + if (attachment_idx == VK_ATTACHMENT_UNUSED) + continue; + + const struct v3dv_render_pass_attachment *attachment = + &state->pass->attachments[attachment_idx]; + + assert(state->job->first_subpass >= attachment->first_subpass); + assert(state->subpass_idx >= attachment->first_subpass); + assert(state->subpass_idx <= attachment->last_subpass); + + /* Only clear once on the first subpass that uses the attachment */ + uint32_t first_subpass = !pass->multiview_enabled ? + attachment->first_subpass : + attachment->views[layer].first_subpass; + + bool needs_clear = + check_needs_clear(state, + VK_IMAGE_ASPECT_COLOR_BIT, + first_subpass, + attachment->desc.loadOp, + false); + + /* Skip the last store if it is not required */ + uint32_t last_subpass = !pass->multiview_enabled ? + attachment->last_subpass : + attachment->views[layer].last_subpass; + + bool needs_store = + check_needs_store(state, + VK_IMAGE_ASPECT_COLOR_BIT, + last_subpass, + attachment->desc.storeOp); + + /* If we need to resolve this attachment emit that store first. Notice + * that we must not request a tile buffer clear here in that case, since + * that would clear the tile buffer before we get to emit the actual + * color attachment store below, since the clear happens after the + * store is completed. + * + * If the attachment doesn't support TLB resolves then we will have to + * fallback to doing the resolve in a shader separately after this + * job, so we will need to store the multisampled sttachment even if that + * wansn't requested by the client. + */ + const bool needs_resolve = + subpass->resolve_attachments && + subpass->resolve_attachments[i].attachment != VK_ATTACHMENT_UNUSED; + if (needs_resolve && attachment->use_tlb_resolve) { + const uint32_t resolve_attachment_idx = + subpass->resolve_attachments[i].attachment; + cmd_buffer_render_pass_emit_store(cmd_buffer, cl, + resolve_attachment_idx, layer, + RENDER_TARGET_0 + i, + false, true); + has_stores = true; + } else if (needs_resolve) { + needs_store = true; + } + + /* Emit the color attachment store if needed */ + if (needs_store) { + cmd_buffer_render_pass_emit_store(cmd_buffer, cl, + attachment_idx, layer, + RENDER_TARGET_0 + i, + needs_clear && !use_global_rt_clear, + false); + has_stores = true; + } else if (needs_clear) { + use_global_rt_clear = true; + } + } + + /* We always need to emit at least one dummy store */ + if (!has_stores) { + cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = NONE; + } + } + + /* If we have any depth/stencil clears we can't use the per-buffer clear + * bit and instead we have to emit a single clear of all tile buffers. + */ + if (use_global_zs_clear || use_global_rt_clear) { + cl_emit(cl, CLEAR_TILE_BUFFERS, clear) { + clear.clear_z_stencil_buffer = use_global_zs_clear; + clear.clear_all_render_targets = use_global_rt_clear; + } + } +} + +static void +cmd_buffer_render_pass_emit_per_tile_rcl(struct v3dv_cmd_buffer *cmd_buffer, + uint32_t layer) +{ + struct v3dv_job *job = cmd_buffer->state.job; + assert(job); + + /* Emit the generic list in our indirect state -- the rcl will just + * have pointers into it. + */ + struct v3dv_cl *cl = &job->indirect; + v3dv_cl_ensure_space(cl, 200, 1); + v3dv_return_if_oom(cmd_buffer, NULL); + + struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); + + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + + cmd_buffer_render_pass_emit_loads(cmd_buffer, cl, layer); + + /* The binner starts out writing tiles assuming that the initial mode + * is triangles, so make sure that's the case. + */ + cl_emit(cl, PRIM_LIST_FORMAT, fmt) { + fmt.primitive_type = LIST_TRIANGLES; + } + + /* PTB assumes that value to be 0, but hw will not set it. */ + cl_emit(cl, SET_INSTANCEID, set) { + set.instance_id = 0; + } + + cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); + + cmd_buffer_render_pass_emit_stores(cmd_buffer, cl, layer); + + cl_emit(cl, END_OF_TILE_MARKER, end); + + cl_emit(cl, RETURN_FROM_SUB_LIST, ret); + + cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { + branch.start = tile_list_start; + branch.end = v3dv_cl_get_address(cl); + } +} + +static void +cmd_buffer_emit_render_pass_layer_rcl(struct v3dv_cmd_buffer *cmd_buffer, + uint32_t layer) +{ + const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; + + struct v3dv_job *job = cmd_buffer->state.job; + struct v3dv_cl *rcl = &job->rcl; + + /* If doing multicore binning, we would need to initialize each + * core's tile list here. + */ + const struct v3dv_frame_tiling *tiling = &job->frame_tiling; + const uint32_t tile_alloc_offset = + 64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y; + cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) { + list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset); + } + + cmd_buffer_render_pass_emit_per_tile_rcl(cmd_buffer, layer); + + uint32_t supertile_w_in_pixels = + tiling->tile_width * tiling->supertile_width; + uint32_t supertile_h_in_pixels = + tiling->tile_height * tiling->supertile_height; + const uint32_t min_x_supertile = + state->render_area.offset.x / supertile_w_in_pixels; + const uint32_t min_y_supertile = + state->render_area.offset.y / supertile_h_in_pixels; + + uint32_t max_render_x = state->render_area.offset.x; + if (state->render_area.extent.width > 0) + max_render_x += state->render_area.extent.width - 1; + uint32_t max_render_y = state->render_area.offset.y; + if (state->render_area.extent.height > 0) + max_render_y += state->render_area.extent.height - 1; + const uint32_t max_x_supertile = max_render_x / supertile_w_in_pixels; + const uint32_t max_y_supertile = max_render_y / supertile_h_in_pixels; + + for (int y = min_y_supertile; y <= max_y_supertile; y++) { + for (int x = min_x_supertile; x <= max_x_supertile; x++) { + cl_emit(rcl, SUPERTILE_COORDINATES, coords) { + coords.column_number_in_supertiles = x; + coords.row_number_in_supertiles = y; + } + } + } +} + +static void +set_rcl_early_z_config(struct v3dv_job *job, + bool *early_z_disable, + uint32_t *early_z_test_and_update_direction) +{ + /* If this is true then we have not emitted any draw calls in this job + * and we don't get any benefits form early Z. + */ + if (!job->decided_global_ez_enable) { + assert(job->draw_count == 0); + *early_z_disable = true; + return; + } + + switch (job->first_ez_state) { + case V3D_EZ_UNDECIDED: + case V3D_EZ_LT_LE: + *early_z_disable = false; + *early_z_test_and_update_direction = EARLY_Z_DIRECTION_LT_LE; + break; + case V3D_EZ_GT_GE: + *early_z_disable = false; + *early_z_test_and_update_direction = EARLY_Z_DIRECTION_GT_GE; + break; + case V3D_EZ_DISABLED: + *early_z_disable = true; + break; + } +} + +void +v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) +{ + struct v3dv_job *job = cmd_buffer->state.job; + assert(job); + + const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; + const struct v3dv_framebuffer *framebuffer = state->framebuffer; + + /* We can't emit the RCL until we have a framebuffer, which we may not have + * if we are recording a secondary command buffer. In that case, we will + * have to wait until vkCmdExecuteCommands is called from a primary command + * buffer. + */ + if (!framebuffer) { + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); + return; + } + + const struct v3dv_frame_tiling *tiling = &job->frame_tiling; + + const uint32_t fb_layers = job->frame_tiling.layers; + + v3dv_cl_ensure_space_with_branch(&job->rcl, 200 + + MAX2(fb_layers, 1) * 256 * + cl_packet_length(SUPERTILE_COORDINATES)); + v3dv_return_if_oom(cmd_buffer, NULL); + + assert(state->subpass_idx < state->pass->subpass_count); + const struct v3dv_render_pass *pass = state->pass; + const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx]; + struct v3dv_cl *rcl = &job->rcl; + + /* Comon config must be the first TILE_RENDERING_MODE_CFG and + * Z_STENCIL_CLEAR_VALUES must be last. The ones in between are optional + * updates to the previous HW state. + */ + bool do_early_zs_clear = false; + const uint32_t ds_attachment_idx = subpass->ds_attachment.attachment; + cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) { + config.image_width_pixels = framebuffer->width; + config.image_height_pixels = framebuffer->height; + config.number_of_render_targets = MAX2(subpass->color_count, 1); + config.multisample_mode_4x = tiling->msaa; + config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; + + if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { + const struct v3dv_image_view *iview = + framebuffer->attachments[ds_attachment_idx]; + config.internal_depth_type = iview->internal_type; + + set_rcl_early_z_config(job, + &config.early_z_disable, + &config.early_z_test_and_update_direction); + + /* Early-Z/S clear can be enabled if the job is clearing and not + * storing (or loading) depth. If a stencil aspect is also present + * we have the same requirements for it, however, in this case we + * can accept stencil loadOp DONT_CARE as well, so instead of + * checking that stencil is cleared we check that is not loaded. + * + * Early-Z/S clearing is independent of Early Z/S testing, so it is + * possible to enable one but not the other so long as their + * respective requirements are met. + */ + struct v3dv_render_pass_attachment *ds_attachment = + &pass->attachments[ds_attachment_idx]; + + const VkImageAspectFlags ds_aspects = + vk_format_aspects(ds_attachment->desc.format); + + bool needs_depth_clear = + check_needs_clear(state, + ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, + ds_attachment->first_subpass, + ds_attachment->desc.loadOp, + subpass->do_depth_clear_with_draw); + + bool needs_depth_store = + check_needs_store(state, + ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, + ds_attachment->last_subpass, + ds_attachment->desc.storeOp); + + do_early_zs_clear = needs_depth_clear && !needs_depth_store; + if (do_early_zs_clear && + vk_format_has_stencil(ds_attachment->desc.format)) { + bool needs_stencil_load = + check_needs_load(state, + ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT, + ds_attachment->first_subpass, + ds_attachment->desc.stencilLoadOp); + + bool needs_stencil_store = + check_needs_store(state, + ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT, + ds_attachment->last_subpass, + ds_attachment->desc.stencilStoreOp); + + do_early_zs_clear = !needs_stencil_load && !needs_stencil_store; + } + + config.early_depth_stencil_clear = do_early_zs_clear; + } else { + config.early_z_disable = true; + } + } + + /* If we enabled early Z/S clear, then we can't emit any "Clear Tile Buffers" + * commands with the Z/S bit set, so keep track of whether we enabled this + * in the job so we can skip these later. + */ + job->early_zs_clear = do_early_zs_clear; + + for (uint32_t i = 0; i < subpass->color_count; i++) { + uint32_t attachment_idx = subpass->color_attachments[i].attachment; + if (attachment_idx == VK_ATTACHMENT_UNUSED) + continue; + + struct v3dv_image_view *iview = + state->framebuffer->attachments[attachment_idx]; + + const struct v3dv_image *image = (struct v3dv_image *) iview->vk.image; + const struct v3d_resource_slice *slice = + &image->slices[iview->vk.base_mip_level]; + + const uint32_t *clear_color = + &state->attachments[attachment_idx].clear_value.color[0]; + + uint32_t clear_pad = 0; + if (slice->tiling == V3D_TILING_UIF_NO_XOR || + slice->tiling == V3D_TILING_UIF_XOR) { + int uif_block_height = v3d_utile_height(image->cpp) * 2; + + uint32_t implicit_padded_height = + align(framebuffer->height, uif_block_height) / uif_block_height; + + if (slice->padded_height_of_output_image_in_uif_blocks - + implicit_padded_height >= 15) { + clear_pad = slice->padded_height_of_output_image_in_uif_blocks; + } + } + + cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) { + clear.clear_color_low_32_bits = clear_color[0]; + clear.clear_color_next_24_bits = clear_color[1] & 0xffffff; + clear.render_target_number = i; + }; + + if (iview->internal_bpp >= V3D_INTERNAL_BPP_64) { + cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) { + clear.clear_color_mid_low_32_bits = + ((clear_color[1] >> 24) | (clear_color[2] << 8)); + clear.clear_color_mid_high_24_bits = + ((clear_color[2] >> 24) | ((clear_color[3] & 0xffff) << 8)); + clear.render_target_number = i; + }; + } + + if (iview->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { + cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) { + clear.uif_padded_height_in_uif_blocks = clear_pad; + clear.clear_color_high_16_bits = clear_color[3] >> 16; + clear.render_target_number = i; + }; + } + } + + cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { + v3dX(cmd_buffer_render_pass_setup_render_target) + (cmd_buffer, 0, &rt.render_target_0_internal_bpp, + &rt.render_target_0_internal_type, &rt.render_target_0_clamp); + v3dX(cmd_buffer_render_pass_setup_render_target) + (cmd_buffer, 1, &rt.render_target_1_internal_bpp, + &rt.render_target_1_internal_type, &rt.render_target_1_clamp); + v3dX(cmd_buffer_render_pass_setup_render_target) + (cmd_buffer, 2, &rt.render_target_2_internal_bpp, + &rt.render_target_2_internal_type, &rt.render_target_2_clamp); + v3dX(cmd_buffer_render_pass_setup_render_target) + (cmd_buffer, 3, &rt.render_target_3_internal_bpp, + &rt.render_target_3_internal_type, &rt.render_target_3_clamp); + } + + /* Ends rendering mode config. */ + if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { + cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { + clear.z_clear_value = + state->attachments[ds_attachment_idx].clear_value.z; + clear.stencil_clear_value = + state->attachments[ds_attachment_idx].clear_value.s; + }; + } else { + cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { + clear.z_clear_value = 1.0f; + clear.stencil_clear_value = 0; + }; + } + + /* Always set initial block size before the first branch, which needs + * to match the value from binning mode config. + */ + cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { + init.use_auto_chained_tile_lists = true; + init.size_of_first_block_in_chained_tile_lists = + TILE_ALLOCATION_BLOCK_SIZE_64B; + } + + cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) { + config.number_of_bin_tile_lists = 1; + config.total_frame_width_in_tiles = tiling->draw_tiles_x; + config.total_frame_height_in_tiles = tiling->draw_tiles_y; + + config.supertile_width_in_tiles = tiling->supertile_width; + config.supertile_height_in_tiles = tiling->supertile_height; + + config.total_frame_width_in_supertiles = + tiling->frame_width_in_supertiles; + config.total_frame_height_in_supertiles = + tiling->frame_height_in_supertiles; + } + + /* Start by clearing the tile buffer. */ + cl_emit(rcl, TILE_COORDINATES, coords) { + coords.tile_column_number = 0; + coords.tile_row_number = 0; + } + + /* Emit an initial clear of the tile buffers. This is necessary + * for any buffers that should be cleared (since clearing + * normally happens at the *end* of the generic tile list), but + * it's also nice to clear everything so the first tile doesn't + * inherit any contents from some previous frame. + * + * Also, implement the GFXH-1742 workaround. There's a race in + * the HW between the RCL updating the TLB's internal type/size + * and the spawning of the QPU instances using the TLB's current + * internal type/size. To make sure the QPUs get the right + * state, we need 1 dummy store in between internal type/size + * changes on V3D 3.x, and 2 dummy stores on 4.x. + */ + for (int i = 0; i < 2; i++) { + if (i > 0) + cl_emit(rcl, TILE_COORDINATES, coords); + cl_emit(rcl, END_OF_LOADS, end); + cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = NONE; + } + if (i == 0 && cmd_buffer->state.tile_aligned_render_area) { + cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) { + clear.clear_z_stencil_buffer = !job->early_zs_clear; + clear.clear_all_render_targets = true; + } + } + cl_emit(rcl, END_OF_TILE_MARKER, end); + } + + cl_emit(rcl, FLUSH_VCD_CACHE, flush); + + for (int layer = 0; layer < MAX2(1, fb_layers); layer++) { + if (subpass->view_mask == 0 || (subpass->view_mask & (1u << layer))) + cmd_buffer_emit_render_pass_layer_rcl(cmd_buffer, layer); + } + + cl_emit(rcl, END_OF_RENDERING, end); +} + +void +v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer) +{ + struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; + /* FIXME: right now we only support one viewport. viewporst[0] would work + * now, would need to change if we allow multiple viewports + */ + float *vptranslate = dynamic->viewport.translate[0]; + float *vpscale = dynamic->viewport.scale[0]; + + struct v3dv_job *job = cmd_buffer->state.job; + assert(job); + + const uint32_t required_cl_size = + cl_packet_length(CLIPPER_XY_SCALING) + + cl_packet_length(CLIPPER_Z_SCALE_AND_OFFSET) + + cl_packet_length(CLIPPER_Z_MIN_MAX_CLIPPING_PLANES) + + cl_packet_length(VIEWPORT_OFFSET); + v3dv_cl_ensure_space_with_branch(&job->bcl, required_cl_size); + v3dv_return_if_oom(cmd_buffer, NULL); + + cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) { + clip.viewport_half_width_in_1_256th_of_pixel = vpscale[0] * 256.0f; + clip.viewport_half_height_in_1_256th_of_pixel = vpscale[1] * 256.0f; + } + + cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) { + clip.viewport_z_offset_zc_to_zs = vptranslate[2]; + clip.viewport_z_scale_zc_to_zs = vpscale[2]; + } + cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) { + /* Vulkan's Z NDC is [0..1], unlile OpenGL which is [-1, 1] */ + float z1 = vptranslate[2]; + float z2 = vptranslate[2] + vpscale[2]; + clip.minimum_zw = MIN2(z1, z2); + clip.maximum_zw = MAX2(z1, z2); + } + + cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) { + vp.viewport_centre_x_coordinate = vptranslate[0]; + vp.viewport_centre_y_coordinate = vptranslate[1]; + } + + cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_VIEWPORT; +} + +void +v3dX(cmd_buffer_emit_stencil)(struct v3dv_cmd_buffer *cmd_buffer) +{ + struct v3dv_job *job = cmd_buffer->state.job; + assert(job); + + struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; + struct v3dv_dynamic_state *dynamic_state = &cmd_buffer->state.dynamic; + + const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK | + V3DV_DYNAMIC_STENCIL_WRITE_MASK | + V3DV_DYNAMIC_STENCIL_REFERENCE; + + v3dv_cl_ensure_space_with_branch(&job->bcl, + 2 * cl_packet_length(STENCIL_CFG)); + v3dv_return_if_oom(cmd_buffer, NULL); + + bool emitted_stencil = false; + for (uint32_t i = 0; i < 2; i++) { + if (pipeline->emit_stencil_cfg[i]) { + if (dynamic_state->mask & dynamic_stencil_states) { + cl_emit_with_prepacked(&job->bcl, STENCIL_CFG, + pipeline->stencil_cfg[i], config) { + if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK) { + config.stencil_test_mask = + i == 0 ? dynamic_state->stencil_compare_mask.front : + dynamic_state->stencil_compare_mask.back; + } + if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK) { + config.stencil_write_mask = + i == 0 ? dynamic_state->stencil_write_mask.front : + dynamic_state->stencil_write_mask.back; + } + if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_REFERENCE) { + config.stencil_ref_value = + i == 0 ? dynamic_state->stencil_reference.front : + dynamic_state->stencil_reference.back; + } + } + } else { + cl_emit_prepacked(&job->bcl, &pipeline->stencil_cfg[i]); + } + + emitted_stencil = true; + } + } + + if (emitted_stencil) { + const uint32_t dynamic_stencil_dirty_flags = + V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK | + V3DV_CMD_DIRTY_STENCIL_WRITE_MASK | + V3DV_CMD_DIRTY_STENCIL_REFERENCE; + cmd_buffer->state.dirty &= ~dynamic_stencil_dirty_flags; + } +} + +void +v3dX(cmd_buffer_emit_depth_bias)(struct v3dv_cmd_buffer *cmd_buffer) +{ + struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; + assert(pipeline); + + if (!pipeline->depth_bias.enabled) + return; + + struct v3dv_job *job = cmd_buffer->state.job; + assert(job); + + v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(DEPTH_OFFSET)); + v3dv_return_if_oom(cmd_buffer, NULL); + + struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; + cl_emit(&job->bcl, DEPTH_OFFSET, bias) { + bias.depth_offset_factor = dynamic->depth_bias.slope_factor; + bias.depth_offset_units = dynamic->depth_bias.constant_factor; + if (pipeline->depth_bias.is_z16) + bias.depth_offset_units *= 256.0f; + bias.limit = dynamic->depth_bias.depth_bias_clamp; + } + + cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_DEPTH_BIAS; +} + +void +v3dX(cmd_buffer_emit_line_width)(struct v3dv_cmd_buffer *cmd_buffer) +{ + struct v3dv_job *job = cmd_buffer->state.job; + assert(job); + + v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(LINE_WIDTH)); + v3dv_return_if_oom(cmd_buffer, NULL); + + cl_emit(&job->bcl, LINE_WIDTH, line) { + line.line_width = cmd_buffer->state.dynamic.line_width; + } + + cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_LINE_WIDTH; +} + +void +v3dX(cmd_buffer_emit_sample_state)(struct v3dv_cmd_buffer *cmd_buffer) +{ + struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; + assert(pipeline); + + struct v3dv_job *job = cmd_buffer->state.job; + assert(job); + + v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(SAMPLE_STATE)); + v3dv_return_if_oom(cmd_buffer, NULL); + + cl_emit(&job->bcl, SAMPLE_STATE, state) { + state.coverage = 1.0f; + state.mask = pipeline->sample_mask; + } +} + +void +v3dX(cmd_buffer_emit_blend)(struct v3dv_cmd_buffer *cmd_buffer) +{ + struct v3dv_job *job = cmd_buffer->state.job; + assert(job); + + struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; + assert(pipeline); + + const uint32_t blend_packets_size = + cl_packet_length(BLEND_ENABLES) + + cl_packet_length(BLEND_CONSTANT_COLOR) + + cl_packet_length(BLEND_CFG) * V3D_MAX_DRAW_BUFFERS; + + v3dv_cl_ensure_space_with_branch(&job->bcl, blend_packets_size); + v3dv_return_if_oom(cmd_buffer, NULL); + + if (cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PIPELINE) { + if (pipeline->blend.enables) { + cl_emit(&job->bcl, BLEND_ENABLES, enables) { + enables.mask = pipeline->blend.enables; + } + } + + for (uint32_t i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) { + if (pipeline->blend.enables & (1 << i)) + cl_emit_prepacked(&job->bcl, &pipeline->blend.cfg[i]); + } + } + + if (pipeline->blend.needs_color_constants && + cmd_buffer->state.dirty & V3DV_CMD_DIRTY_BLEND_CONSTANTS) { + struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; + cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) { + color.red_f16 = _mesa_float_to_half(dynamic->blend_constants[0]); + color.green_f16 = _mesa_float_to_half(dynamic->blend_constants[1]); + color.blue_f16 = _mesa_float_to_half(dynamic->blend_constants[2]); + color.alpha_f16 = _mesa_float_to_half(dynamic->blend_constants[3]); + } + cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_BLEND_CONSTANTS; + } +} + +void +v3dX(cmd_buffer_emit_color_write_mask)(struct v3dv_cmd_buffer *cmd_buffer) +{ + struct v3dv_job *job = cmd_buffer->state.job; + v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(COLOR_WRITE_MASKS)); + + struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; + struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; + cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) { + mask.mask = (~dynamic->color_write_enable | + pipeline->blend.color_write_masks) & 0xffff; + } + + cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE; +} + +static void +emit_flat_shade_flags(struct v3dv_job *job, + int varying_offset, + uint32_t varyings, + enum V3DX(Varying_Flags_Action) lower, + enum V3DX(Varying_Flags_Action) higher) +{ + v3dv_cl_ensure_space_with_branch(&job->bcl, + cl_packet_length(FLAT_SHADE_FLAGS)); + v3dv_return_if_oom(NULL, job); + + cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) { + flags.varying_offset_v0 = varying_offset; + flags.flat_shade_flags_for_varyings_v024 = varyings; + flags.action_for_flat_shade_flags_of_lower_numbered_varyings = lower; + flags.action_for_flat_shade_flags_of_higher_numbered_varyings = higher; + } +} + +static void +emit_noperspective_flags(struct v3dv_job *job, + int varying_offset, + uint32_t varyings, + enum V3DX(Varying_Flags_Action) lower, + enum V3DX(Varying_Flags_Action) higher) +{ + v3dv_cl_ensure_space_with_branch(&job->bcl, + cl_packet_length(NON_PERSPECTIVE_FLAGS)); + v3dv_return_if_oom(NULL, job); + + cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) { + flags.varying_offset_v0 = varying_offset; + flags.non_perspective_flags_for_varyings_v024 = varyings; + flags.action_for_non_perspective_flags_of_lower_numbered_varyings = lower; + flags.action_for_non_perspective_flags_of_higher_numbered_varyings = higher; + } +} + +static void +emit_centroid_flags(struct v3dv_job *job, + int varying_offset, + uint32_t varyings, + enum V3DX(Varying_Flags_Action) lower, + enum V3DX(Varying_Flags_Action) higher) +{ + v3dv_cl_ensure_space_with_branch(&job->bcl, + cl_packet_length(CENTROID_FLAGS)); + v3dv_return_if_oom(NULL, job); + + cl_emit(&job->bcl, CENTROID_FLAGS, flags) { + flags.varying_offset_v0 = varying_offset; + flags.centroid_flags_for_varyings_v024 = varyings; + flags.action_for_centroid_flags_of_lower_numbered_varyings = lower; + flags.action_for_centroid_flags_of_higher_numbered_varyings = higher; + } +} + +static bool +emit_varying_flags(struct v3dv_job *job, + uint32_t num_flags, + const uint32_t *flags, + void (*flag_emit_callback)(struct v3dv_job *job, + int varying_offset, + uint32_t flags, + enum V3DX(Varying_Flags_Action) lower, + enum V3DX(Varying_Flags_Action) higher)) +{ + bool emitted_any = false; + for (int i = 0; i < num_flags; i++) { + if (!flags[i]) + continue; + + if (emitted_any) { + flag_emit_callback(job, i, flags[i], + V3D_VARYING_FLAGS_ACTION_UNCHANGED, + V3D_VARYING_FLAGS_ACTION_UNCHANGED); + } else if (i == 0) { + flag_emit_callback(job, i, flags[i], + V3D_VARYING_FLAGS_ACTION_UNCHANGED, + V3D_VARYING_FLAGS_ACTION_ZEROED); + } else { + flag_emit_callback(job, i, flags[i], + V3D_VARYING_FLAGS_ACTION_ZEROED, + V3D_VARYING_FLAGS_ACTION_ZEROED); + } + + emitted_any = true; + } + + return emitted_any; +} + +void +v3dX(cmd_buffer_emit_varyings_state)(struct v3dv_cmd_buffer *cmd_buffer) +{ + struct v3dv_job *job = cmd_buffer->state.job; + struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; + + struct v3d_fs_prog_data *prog_data_fs = + pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs; + + const uint32_t num_flags = + ARRAY_SIZE(prog_data_fs->flat_shade_flags); + const uint32_t *flat_shade_flags = prog_data_fs->flat_shade_flags; + const uint32_t *noperspective_flags = prog_data_fs->noperspective_flags; + const uint32_t *centroid_flags = prog_data_fs->centroid_flags; + + if (!emit_varying_flags(job, num_flags, flat_shade_flags, + emit_flat_shade_flags)) { + v3dv_cl_ensure_space_with_branch( + &job->bcl, cl_packet_length(ZERO_ALL_FLAT_SHADE_FLAGS)); + v3dv_return_if_oom(cmd_buffer, NULL); + + cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags); + } + + if (!emit_varying_flags(job, num_flags, noperspective_flags, + emit_noperspective_flags)) { + v3dv_cl_ensure_space_with_branch( + &job->bcl, cl_packet_length(ZERO_ALL_NON_PERSPECTIVE_FLAGS)); + v3dv_return_if_oom(cmd_buffer, NULL); + + cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags); + } + + if (!emit_varying_flags(job, num_flags, centroid_flags, + emit_centroid_flags)) { + v3dv_cl_ensure_space_with_branch( + &job->bcl, cl_packet_length(ZERO_ALL_CENTROID_FLAGS)); + v3dv_return_if_oom(cmd_buffer, NULL); + + cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags); + } +} + +static void +job_update_ez_state(struct v3dv_job *job, + struct v3dv_pipeline *pipeline, + struct v3dv_cmd_buffer *cmd_buffer) +{ + /* If first_ez_state is V3D_EZ_DISABLED it means that we have already + * determined that we should disable EZ completely for all draw calls in + * this job. This will cause us to disable EZ for the entire job in the + * Tile Rendering Mode RCL packet and when we do that we need to make sure + * we never emit a draw call in the job with EZ enabled in the CFG_BITS + * packet, so ez_state must also be V3D_EZ_DISABLED; + */ + if (job->first_ez_state == V3D_EZ_DISABLED) { + assert(job->ez_state == V3D_EZ_DISABLED); + return; + } + + /* This is part of the pre draw call handling, so we should be inside a + * render pass. + */ + assert(cmd_buffer->state.pass); + + /* If this is the first time we update EZ state for this job we first check + * if there is anything that requires disabling it completely for the entire + * job (based on state that is not related to the current draw call and + * pipeline state). + */ + if (!job->decided_global_ez_enable) { + job->decided_global_ez_enable = true; + + struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; + assert(state->subpass_idx < state->pass->subpass_count); + struct v3dv_subpass *subpass = &state->pass->subpasses[state->subpass_idx]; + if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED) { + job->first_ez_state = V3D_EZ_DISABLED; + job->ez_state = V3D_EZ_DISABLED; + return; + } + + /* GFXH-1918: the early-z buffer may load incorrect depth values + * if the frame has odd width or height. + * + * So we need to disable EZ in this case. + */ + const struct v3dv_render_pass_attachment *ds_attachment = + &state->pass->attachments[subpass->ds_attachment.attachment]; + + const VkImageAspectFlags ds_aspects = + vk_format_aspects(ds_attachment->desc.format); + + bool needs_depth_load = + check_needs_load(state, + ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, + ds_attachment->first_subpass, + ds_attachment->desc.loadOp); + + if (needs_depth_load) { + struct v3dv_framebuffer *fb = state->framebuffer; + + if (!fb) { + assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); + perf_debug("Loading depth aspect in a secondary command buffer " + "without framebuffer info disables early-z tests.\n"); + job->first_ez_state = V3D_EZ_DISABLED; + job->ez_state = V3D_EZ_DISABLED; + return; + } + + if (((fb->width % 2) != 0 || (fb->height % 2) != 0)) { + perf_debug("Loading depth aspect for framebuffer with odd width " + "or height disables early-Z tests.\n"); + job->first_ez_state = V3D_EZ_DISABLED; + job->ez_state = V3D_EZ_DISABLED; + return; + } + } + } + + /* Otherwise, we can decide to selectively enable or disable EZ for draw + * calls using the CFG_BITS packet based on the bound pipeline state. + */ + + /* If the FS writes Z, then it may update against the chosen EZ direction */ + struct v3dv_shader_variant *fs_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; + if (fs_variant->prog_data.fs->writes_z) { + job->ez_state = V3D_EZ_DISABLED; + return; + } + + switch (pipeline->ez_state) { + case V3D_EZ_UNDECIDED: + /* If the pipeline didn't pick a direction but didn't disable, then go + * along with the current EZ state. This allows EZ optimization for Z + * func == EQUAL or NEVER. + */ + break; + + case V3D_EZ_LT_LE: + case V3D_EZ_GT_GE: + /* If the pipeline picked a direction, then it needs to match the current + * direction if we've decided on one. + */ + if (job->ez_state == V3D_EZ_UNDECIDED) + job->ez_state = pipeline->ez_state; + else if (job->ez_state != pipeline->ez_state) + job->ez_state = V3D_EZ_DISABLED; + break; + + case V3D_EZ_DISABLED: + /* If the pipeline disables EZ because of a bad Z func or stencil + * operation, then we can't do any more EZ in this frame. + */ + job->ez_state = V3D_EZ_DISABLED; + break; + } + + if (job->first_ez_state == V3D_EZ_UNDECIDED && + job->ez_state != V3D_EZ_DISABLED) { + job->first_ez_state = job->ez_state; + } +} + +void +v3dX(cmd_buffer_emit_configuration_bits)(struct v3dv_cmd_buffer *cmd_buffer) +{ + struct v3dv_job *job = cmd_buffer->state.job; + assert(job); + + struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; + assert(pipeline); + + job_update_ez_state(job, pipeline, cmd_buffer); + + v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CFG_BITS)); + v3dv_return_if_oom(cmd_buffer, NULL); + + cl_emit_with_prepacked(&job->bcl, CFG_BITS, pipeline->cfg_bits, config) { + config.early_z_enable = job->ez_state != V3D_EZ_DISABLED; + config.early_z_updates_enable = config.early_z_enable && + pipeline->z_updates_enable; + } +} + +void +v3dX(cmd_buffer_emit_occlusion_query)(struct v3dv_cmd_buffer *cmd_buffer) +{ + struct v3dv_job *job = cmd_buffer->state.job; + assert(job); + + v3dv_cl_ensure_space_with_branch(&job->bcl, + cl_packet_length(OCCLUSION_QUERY_COUNTER)); + v3dv_return_if_oom(cmd_buffer, NULL); + + cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) { + if (cmd_buffer->state.query.active_query.bo) { + counter.address = + v3dv_cl_address(cmd_buffer->state.query.active_query.bo, + cmd_buffer->state.query.active_query.offset); + } + } + + cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_OCCLUSION_QUERY; +} + +static struct v3dv_job * +cmd_buffer_subpass_split_for_barrier(struct v3dv_cmd_buffer *cmd_buffer, + bool is_bcl_barrier) +{ + assert(cmd_buffer->state.subpass_idx != -1); + v3dv_cmd_buffer_finish_job(cmd_buffer); + struct v3dv_job *job = + v3dv_cmd_buffer_subpass_resume(cmd_buffer, + cmd_buffer->state.subpass_idx); + if (!job) + return NULL; + + job->serialize = true; + job->needs_bcl_sync = is_bcl_barrier; + return job; +} + +static void +cmd_buffer_copy_secondary_end_query_state(struct v3dv_cmd_buffer *primary, + struct v3dv_cmd_buffer *secondary) +{ + struct v3dv_cmd_buffer_state *p_state = &primary->state; + struct v3dv_cmd_buffer_state *s_state = &secondary->state; + + const uint32_t total_state_count = + p_state->query.end.used_count + s_state->query.end.used_count; + v3dv_cmd_buffer_ensure_array_state(primary, + sizeof(struct v3dv_end_query_cpu_job_info), + total_state_count, + &p_state->query.end.alloc_count, + (void **) &p_state->query.end.states); + v3dv_return_if_oom(primary, NULL); + + for (uint32_t i = 0; i < s_state->query.end.used_count; i++) { + const struct v3dv_end_query_cpu_job_info *s_qstate = + &secondary->state.query.end.states[i]; + + struct v3dv_end_query_cpu_job_info *p_qstate = + &p_state->query.end.states[p_state->query.end.used_count++]; + + p_qstate->pool = s_qstate->pool; + p_qstate->query = s_qstate->query; + } +} + +void +v3dX(cmd_buffer_execute_inside_pass)(struct v3dv_cmd_buffer *primary, + uint32_t cmd_buffer_count, + const VkCommandBuffer *cmd_buffers) +{ + assert(primary->state.job); + + /* Emit occlusion query state if needed so the draw calls inside our + * secondaries update the counters. + */ + bool has_occlusion_query = + primary->state.dirty & V3DV_CMD_DIRTY_OCCLUSION_QUERY; + if (has_occlusion_query) + v3dX(cmd_buffer_emit_occlusion_query)(primary); + + /* FIXME: if our primary job tiling doesn't enable MSSA but any of the + * pipelines used by the secondaries do, we need to re-start the primary + * job to enable MSAA. See cmd_buffer_restart_job_for_msaa_if_needed. + */ + bool pending_barrier = false; + bool pending_bcl_barrier = false; + for (uint32_t i = 0; i < cmd_buffer_count; i++) { + V3DV_FROM_HANDLE(v3dv_cmd_buffer, secondary, cmd_buffers[i]); + + assert(secondary->usage_flags & + VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT); + + list_for_each_entry(struct v3dv_job, secondary_job, + &secondary->jobs, list_link) { + if (secondary_job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY) { + /* If the job is a CL, then we branch to it from the primary BCL. + * In this case the secondary's BCL is finished with a + * RETURN_FROM_SUB_LIST command to return back to the primary BCL + * once we are done executing it. + */ + assert(v3dv_cl_offset(&secondary_job->rcl) == 0); + assert(secondary_job->bcl.bo); + + /* Sanity check that secondary BCL ends with RETURN_FROM_SUB_LIST */ + STATIC_ASSERT(cl_packet_length(RETURN_FROM_SUB_LIST) == 1); + assert(v3dv_cl_offset(&secondary_job->bcl) >= 1); + assert(*(((uint8_t *)secondary_job->bcl.next) - 1) == + V3DX(RETURN_FROM_SUB_LIST_opcode)); + + /* If this secondary has any barriers (or we had any pending barrier + * to apply), then we can't just branch to it from the primary, we + * need to split the primary to create a new job that can consume + * the barriers first. + * + * FIXME: in this case, maybe just copy the secondary BCL without + * the RETURN_FROM_SUB_LIST into the primary job to skip the + * branch? + */ + struct v3dv_job *primary_job = primary->state.job; + if (!primary_job || secondary_job->serialize || pending_barrier) { + const bool needs_bcl_barrier = + secondary_job->needs_bcl_sync || pending_bcl_barrier; + primary_job = + cmd_buffer_subpass_split_for_barrier(primary, + needs_bcl_barrier); + v3dv_return_if_oom(primary, NULL); + + /* Since we have created a new primary we need to re-emit + * occlusion query state. + */ + if (has_occlusion_query) + v3dX(cmd_buffer_emit_occlusion_query)(primary); + } + + /* Make sure our primary job has all required BO references */ + set_foreach(secondary_job->bos, entry) { + struct v3dv_bo *bo = (struct v3dv_bo *)entry->key; + v3dv_job_add_bo(primary_job, bo); + } + + /* Emit required branch instructions. We expect each of these + * to end with a corresponding 'return from sub list' item. + */ + list_for_each_entry(struct v3dv_bo, bcl_bo, + &secondary_job->bcl.bo_list, list_link) { + v3dv_cl_ensure_space_with_branch(&primary_job->bcl, + cl_packet_length(BRANCH_TO_SUB_LIST)); + v3dv_return_if_oom(primary, NULL); + cl_emit(&primary_job->bcl, BRANCH_TO_SUB_LIST, branch) { + branch.address = v3dv_cl_address(bcl_bo, 0); + } + } + + primary_job->tmu_dirty_rcl |= secondary_job->tmu_dirty_rcl; + } else { + /* This is a regular job (CPU or GPU), so just finish the current + * primary job (if any) and then add the secondary job to the + * primary's job list right after it. + */ + v3dv_cmd_buffer_finish_job(primary); + v3dv_job_clone_in_cmd_buffer(secondary_job, primary); + if (pending_barrier) { + secondary_job->serialize = true; + if (pending_bcl_barrier) + secondary_job->needs_bcl_sync = true; + } + } + + pending_barrier = false; + pending_bcl_barrier = false; + } + + /* If the secondary has recorded any vkCmdEndQuery commands, we need to + * copy this state to the primary so it is processed properly when the + * current primary job is finished. + */ + cmd_buffer_copy_secondary_end_query_state(primary, secondary); + + /* If this secondary had any pending barrier state we will need that + * barrier state consumed with whatever comes next in the primary. + */ + assert(secondary->state.has_barrier || !secondary->state.has_bcl_barrier); + pending_barrier = secondary->state.has_barrier; + pending_bcl_barrier = secondary->state.has_bcl_barrier; + } + + if (pending_barrier) { + primary->state.has_barrier = true; + primary->state.has_bcl_barrier |= pending_bcl_barrier; + } +} + +static void +emit_gs_shader_state_record(struct v3dv_job *job, + struct v3dv_bo *assembly_bo, + struct v3dv_shader_variant *gs_bin, + struct v3dv_cl_reloc gs_bin_uniforms, + struct v3dv_shader_variant *gs, + struct v3dv_cl_reloc gs_render_uniforms) +{ + cl_emit(&job->indirect, GEOMETRY_SHADER_STATE_RECORD, shader) { + shader.geometry_bin_mode_shader_code_address = + v3dv_cl_address(assembly_bo, gs_bin->assembly_offset); + shader.geometry_bin_mode_shader_4_way_threadable = + gs_bin->prog_data.gs->base.threads == 4; + shader.geometry_bin_mode_shader_start_in_final_thread_section = + gs_bin->prog_data.gs->base.single_seg; + shader.geometry_bin_mode_shader_propagate_nans = true; + shader.geometry_bin_mode_shader_uniforms_address = + gs_bin_uniforms; + + shader.geometry_render_mode_shader_code_address = + v3dv_cl_address(assembly_bo, gs->assembly_offset); + shader.geometry_render_mode_shader_4_way_threadable = + gs->prog_data.gs->base.threads == 4; + shader.geometry_render_mode_shader_start_in_final_thread_section = + gs->prog_data.gs->base.single_seg; + shader.geometry_render_mode_shader_propagate_nans = true; + shader.geometry_render_mode_shader_uniforms_address = + gs_render_uniforms; + } +} + +static uint8_t +v3d_gs_output_primitive(uint32_t prim_type) +{ + switch (prim_type) { + case GL_POINTS: + return GEOMETRY_SHADER_POINTS; + case GL_LINE_STRIP: + return GEOMETRY_SHADER_LINE_STRIP; + case GL_TRIANGLE_STRIP: + return GEOMETRY_SHADER_TRI_STRIP; + default: + unreachable("Unsupported primitive type"); + } +} + +static void +emit_tes_gs_common_params(struct v3dv_job *job, + uint8_t gs_out_prim_type, + uint8_t gs_num_invocations) +{ + cl_emit(&job->indirect, TESSELLATION_GEOMETRY_COMMON_PARAMS, shader) { + shader.tessellation_type = TESSELLATION_TYPE_TRIANGLE; + shader.tessellation_point_mode = false; + shader.tessellation_edge_spacing = TESSELLATION_EDGE_SPACING_EVEN; + shader.tessellation_clockwise = true; + shader.tessellation_invocations = 1; + + shader.geometry_shader_output_format = + v3d_gs_output_primitive(gs_out_prim_type); + shader.geometry_shader_instances = gs_num_invocations & 0x1F; + } +} + +static uint8_t +simd_width_to_gs_pack_mode(uint32_t width) +{ + switch (width) { + case 16: + return V3D_PACK_MODE_16_WAY; + case 8: + return V3D_PACK_MODE_8_WAY; + case 4: + return V3D_PACK_MODE_4_WAY; + case 1: + return V3D_PACK_MODE_1_WAY; + default: + unreachable("Invalid SIMD width"); + }; +} + +static void +emit_tes_gs_shader_params(struct v3dv_job *job, + uint32_t gs_simd, + uint32_t gs_vpm_output_size, + uint32_t gs_max_vpm_input_size_per_batch) +{ + cl_emit(&job->indirect, TESSELLATION_GEOMETRY_SHADER_PARAMS, shader) { + shader.tcs_batch_flush_mode = V3D_TCS_FLUSH_MODE_FULLY_PACKED; + shader.per_patch_data_column_depth = 1; + shader.tcs_output_segment_size_in_sectors = 1; + shader.tcs_output_segment_pack_mode = V3D_PACK_MODE_16_WAY; + shader.tes_output_segment_size_in_sectors = 1; + shader.tes_output_segment_pack_mode = V3D_PACK_MODE_16_WAY; + shader.gs_output_segment_size_in_sectors = gs_vpm_output_size; + shader.gs_output_segment_pack_mode = + simd_width_to_gs_pack_mode(gs_simd); + shader.tbg_max_patches_per_tcs_batch = 1; + shader.tbg_max_extra_vertex_segs_for_patches_after_first = 0; + shader.tbg_min_tcs_output_segments_required_in_play = 1; + shader.tbg_min_per_patch_data_segments_required_in_play = 1; + shader.tpg_max_patches_per_tes_batch = 1; + shader.tpg_max_vertex_segments_per_tes_batch = 0; + shader.tpg_max_tcs_output_segments_per_tes_batch = 1; + shader.tpg_min_tes_output_segments_required_in_play = 1; + shader.gbg_max_tes_output_vertex_segments_per_gs_batch = + gs_max_vpm_input_size_per_batch; + shader.gbg_min_gs_output_segments_required_in_play = 1; + } +} + +void +v3dX(cmd_buffer_emit_gl_shader_state)(struct v3dv_cmd_buffer *cmd_buffer) +{ + struct v3dv_job *job = cmd_buffer->state.job; + assert(job); + + struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; + struct v3dv_pipeline *pipeline = state->gfx.pipeline; + assert(pipeline); + + struct v3dv_shader_variant *vs_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; + struct v3d_vs_prog_data *prog_data_vs = vs_variant->prog_data.vs; + + struct v3dv_shader_variant *vs_bin_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]; + struct v3d_vs_prog_data *prog_data_vs_bin = vs_bin_variant->prog_data.vs; + + struct v3dv_shader_variant *fs_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; + struct v3d_fs_prog_data *prog_data_fs = fs_variant->prog_data.fs; + + struct v3dv_shader_variant *gs_variant = NULL; + struct v3dv_shader_variant *gs_bin_variant = NULL; + struct v3d_gs_prog_data *prog_data_gs = NULL; + struct v3d_gs_prog_data *prog_data_gs_bin = NULL; + if (pipeline->has_gs) { + gs_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]; + prog_data_gs = gs_variant->prog_data.gs; + + gs_bin_variant = + pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]; + prog_data_gs_bin = gs_bin_variant->prog_data.gs; + } + + /* Update the cache dirty flag based on the shader progs data */ + job->tmu_dirty_rcl |= prog_data_vs_bin->base.tmu_dirty_rcl; + job->tmu_dirty_rcl |= prog_data_vs->base.tmu_dirty_rcl; + job->tmu_dirty_rcl |= prog_data_fs->base.tmu_dirty_rcl; + if (pipeline->has_gs) { + job->tmu_dirty_rcl |= prog_data_gs_bin->base.tmu_dirty_rcl; + job->tmu_dirty_rcl |= prog_data_gs->base.tmu_dirty_rcl; + } + + /* See GFXH-930 workaround below */ + uint32_t num_elements_to_emit = MAX2(pipeline->va_count, 1); + + uint32_t shader_state_record_length = + cl_packet_length(GL_SHADER_STATE_RECORD); + if (pipeline->has_gs) { + shader_state_record_length += + cl_packet_length(GEOMETRY_SHADER_STATE_RECORD) + + cl_packet_length(TESSELLATION_GEOMETRY_COMMON_PARAMS) + + 2 * cl_packet_length(TESSELLATION_GEOMETRY_SHADER_PARAMS); + } + + uint32_t shader_rec_offset = + v3dv_cl_ensure_space(&job->indirect, + shader_state_record_length + + num_elements_to_emit * + cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD), + 32); + v3dv_return_if_oom(cmd_buffer, NULL); + + struct v3dv_bo *assembly_bo = pipeline->shared_data->assembly_bo; + + if (pipeline->has_gs) { + emit_gs_shader_state_record(job, + assembly_bo, + gs_bin_variant, + cmd_buffer->state.uniforms.gs_bin, + gs_variant, + cmd_buffer->state.uniforms.gs); + + emit_tes_gs_common_params(job, + prog_data_gs->out_prim_type, + prog_data_gs->num_invocations); + + emit_tes_gs_shader_params(job, + pipeline->vpm_cfg_bin.gs_width, + pipeline->vpm_cfg_bin.Gd, + pipeline->vpm_cfg_bin.Gv); + + emit_tes_gs_shader_params(job, + pipeline->vpm_cfg.gs_width, + pipeline->vpm_cfg.Gd, + pipeline->vpm_cfg.Gv); + } + + struct v3dv_bo *default_attribute_values = + pipeline->default_attribute_values != NULL ? + pipeline->default_attribute_values : + pipeline->device->default_attribute_float; + + cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_RECORD, + pipeline->shader_state_record, shader) { + + /* FIXME: we are setting this values here and during the + * prepacking. This is because both cl_emit_with_prepacked and v3dvx_pack + * asserts for minimum values of these. It would be good to get + * v3dvx_pack to assert on the final value if possible + */ + shader.min_coord_shader_input_segments_required_in_play = + pipeline->vpm_cfg_bin.As; + shader.min_vertex_shader_input_segments_required_in_play = + pipeline->vpm_cfg.As; + + shader.coordinate_shader_code_address = + v3dv_cl_address(assembly_bo, vs_bin_variant->assembly_offset); + shader.vertex_shader_code_address = + v3dv_cl_address(assembly_bo, vs_variant->assembly_offset); + shader.fragment_shader_code_address = + v3dv_cl_address(assembly_bo, fs_variant->assembly_offset); + + shader.coordinate_shader_uniforms_address = cmd_buffer->state.uniforms.vs_bin; + shader.vertex_shader_uniforms_address = cmd_buffer->state.uniforms.vs; + shader.fragment_shader_uniforms_address = cmd_buffer->state.uniforms.fs; + + shader.address_of_default_attribute_values = + v3dv_cl_address(default_attribute_values, 0); + + shader.any_shader_reads_hardware_written_primitive_id = + (pipeline->has_gs && prog_data_gs->uses_pid) || prog_data_fs->uses_pid; + shader.insert_primitive_id_as_first_varying_to_fragment_shader = + !pipeline->has_gs && prog_data_fs->uses_pid; + } + + /* Upload vertex element attributes (SHADER_STATE_ATTRIBUTE_RECORD) */ + bool cs_loaded_any = false; + const bool cs_uses_builtins = prog_data_vs_bin->uses_iid || + prog_data_vs_bin->uses_biid || + prog_data_vs_bin->uses_vid; + const uint32_t packet_length = + cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD); + + uint32_t emitted_va_count = 0; + for (uint32_t i = 0; emitted_va_count < pipeline->va_count; i++) { + assert(i < MAX_VERTEX_ATTRIBS); + + if (pipeline->va[i].vk_format == VK_FORMAT_UNDEFINED) + continue; + + const uint32_t binding = pipeline->va[i].binding; + + /* We store each vertex attribute in the array using its driver location + * as index. + */ + const uint32_t location = i; + + struct v3dv_vertex_binding *c_vb = &cmd_buffer->state.vertex_bindings[binding]; + + cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, + &pipeline->vertex_attrs[i * packet_length], attr) { + + assert(c_vb->buffer->mem->bo); + attr.address = v3dv_cl_address(c_vb->buffer->mem->bo, + c_vb->buffer->mem_offset + + pipeline->va[i].offset + + c_vb->offset); + + attr.number_of_values_read_by_coordinate_shader = + prog_data_vs_bin->vattr_sizes[location]; + attr.number_of_values_read_by_vertex_shader = + prog_data_vs->vattr_sizes[location]; + + /* GFXH-930: At least one attribute must be enabled and read by CS + * and VS. If we have attributes being consumed by the VS but not + * the CS, then set up a dummy load of the last attribute into the + * CS's VPM inputs. (Since CS is just dead-code-elimination compared + * to VS, we can't have CS loading but not VS). + * + * GFXH-1602: first attribute must be active if using builtins. + */ + if (prog_data_vs_bin->vattr_sizes[location]) + cs_loaded_any = true; + + if (i == 0 && cs_uses_builtins && !cs_loaded_any) { + attr.number_of_values_read_by_coordinate_shader = 1; + cs_loaded_any = true; + } else if (i == pipeline->va_count - 1 && !cs_loaded_any) { + attr.number_of_values_read_by_coordinate_shader = 1; + cs_loaded_any = true; + } + + attr.maximum_index = 0xffffff; + } + + emitted_va_count++; + } + + if (pipeline->va_count == 0) { + /* GFXH-930: At least one attribute must be enabled and read + * by CS and VS. If we have no attributes being consumed by + * the shader, set up a dummy to be loaded into the VPM. + */ + cl_emit(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) { + /* Valid address of data whose value will be unused. */ + attr.address = v3dv_cl_address(job->indirect.bo, 0); + + attr.type = ATTRIBUTE_FLOAT; + attr.stride = 0; + attr.vec_size = 1; + + attr.number_of_values_read_by_coordinate_shader = 1; + attr.number_of_values_read_by_vertex_shader = 1; + } + } + + if (cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PIPELINE) { + v3dv_cl_ensure_space_with_branch(&job->bcl, + sizeof(pipeline->vcm_cache_size)); + v3dv_return_if_oom(cmd_buffer, NULL); + + cl_emit_prepacked(&job->bcl, &pipeline->vcm_cache_size); + } + + v3dv_cl_ensure_space_with_branch(&job->bcl, + cl_packet_length(GL_SHADER_STATE)); + v3dv_return_if_oom(cmd_buffer, NULL); + + if (pipeline->has_gs) { + cl_emit(&job->bcl, GL_SHADER_STATE_INCLUDING_GS, state) { + state.address = v3dv_cl_address(job->indirect.bo, shader_rec_offset); + state.number_of_attribute_arrays = num_elements_to_emit; + } + } else { + cl_emit(&job->bcl, GL_SHADER_STATE, state) { + state.address = v3dv_cl_address(job->indirect.bo, shader_rec_offset); + state.number_of_attribute_arrays = num_elements_to_emit; + } + } + + cmd_buffer->state.dirty &= ~(V3DV_CMD_DIRTY_VERTEX_BUFFER | + V3DV_CMD_DIRTY_DESCRIPTOR_SETS | + V3DV_CMD_DIRTY_PUSH_CONSTANTS); + cmd_buffer->state.dirty_descriptor_stages &= ~VK_SHADER_STAGE_ALL_GRAPHICS; + cmd_buffer->state.dirty_push_constants_stages &= ~VK_SHADER_STAGE_ALL_GRAPHICS; +} + +/* FIXME: C&P from v3dx_draw. Refactor to common place? */ +static uint32_t +v3d_hw_prim_type(enum pipe_prim_type prim_type) +{ + switch (prim_type) { + case PIPE_PRIM_POINTS: + case PIPE_PRIM_LINES: + case PIPE_PRIM_LINE_LOOP: + case PIPE_PRIM_LINE_STRIP: + case PIPE_PRIM_TRIANGLES: + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_TRIANGLE_FAN: + return prim_type; + + case PIPE_PRIM_LINES_ADJACENCY: + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + case PIPE_PRIM_TRIANGLES_ADJACENCY: + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return 8 + (prim_type - PIPE_PRIM_LINES_ADJACENCY); + + default: + unreachable("Unsupported primitive type"); + } +} + +void +v3dX(cmd_buffer_emit_draw)(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_draw_info *info) +{ + struct v3dv_job *job = cmd_buffer->state.job; + assert(job); + + struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; + struct v3dv_pipeline *pipeline = state->gfx.pipeline; + + assert(pipeline); + + uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology); + + if (info->first_instance > 0) { + v3dv_cl_ensure_space_with_branch( + &job->bcl, cl_packet_length(BASE_VERTEX_BASE_INSTANCE)); + v3dv_return_if_oom(cmd_buffer, NULL); + + cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) { + base.base_instance = info->first_instance; + base.base_vertex = 0; + } + } + + if (info->instance_count > 1) { + v3dv_cl_ensure_space_with_branch( + &job->bcl, cl_packet_length(VERTEX_ARRAY_INSTANCED_PRIMS)); + v3dv_return_if_oom(cmd_buffer, NULL); + + cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMS, prim) { + prim.mode = hw_prim_type; + prim.index_of_first_vertex = info->first_vertex; + prim.number_of_instances = info->instance_count; + prim.instance_length = info->vertex_count; + } + } else { + v3dv_cl_ensure_space_with_branch( + &job->bcl, cl_packet_length(VERTEX_ARRAY_PRIMS)); + v3dv_return_if_oom(cmd_buffer, NULL); + cl_emit(&job->bcl, VERTEX_ARRAY_PRIMS, prim) { + prim.mode = hw_prim_type; + prim.length = info->vertex_count; + prim.index_of_first_vertex = info->first_vertex; + } + } +} + +void +v3dX(cmd_buffer_emit_index_buffer)(struct v3dv_cmd_buffer *cmd_buffer) +{ + struct v3dv_job *job = cmd_buffer->state.job; + assert(job); + + /* We flag all state as dirty when we create a new job so make sure we + * have a valid index buffer before attempting to emit state for it. + */ + struct v3dv_buffer *ibuffer = + v3dv_buffer_from_handle(cmd_buffer->state.index_buffer.buffer); + if (ibuffer) { + v3dv_cl_ensure_space_with_branch( + &job->bcl, cl_packet_length(INDEX_BUFFER_SETUP)); + v3dv_return_if_oom(cmd_buffer, NULL); + + const uint32_t offset = cmd_buffer->state.index_buffer.offset; + cl_emit(&job->bcl, INDEX_BUFFER_SETUP, ib) { + ib.address = v3dv_cl_address(ibuffer->mem->bo, + ibuffer->mem_offset + offset); + ib.size = ibuffer->mem->bo->size; + } + } + + cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_INDEX_BUFFER; +} + +void +v3dX(cmd_buffer_emit_draw_indexed)(struct v3dv_cmd_buffer *cmd_buffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance) +{ + struct v3dv_job *job = cmd_buffer->state.job; + assert(job); + + const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; + uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology); + uint8_t index_type = ffs(cmd_buffer->state.index_buffer.index_size) - 1; + uint32_t index_offset = firstIndex * cmd_buffer->state.index_buffer.index_size; + + if (vertexOffset != 0 || firstInstance != 0) { + v3dv_cl_ensure_space_with_branch( + &job->bcl, cl_packet_length(BASE_VERTEX_BASE_INSTANCE)); + v3dv_return_if_oom(cmd_buffer, NULL); + + cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) { + base.base_instance = firstInstance; + base.base_vertex = vertexOffset; + } + } + + if (instanceCount == 1) { + v3dv_cl_ensure_space_with_branch( + &job->bcl, cl_packet_length(INDEXED_PRIM_LIST)); + v3dv_return_if_oom(cmd_buffer, NULL); + + cl_emit(&job->bcl, INDEXED_PRIM_LIST, prim) { + prim.index_type = index_type; + prim.length = indexCount; + prim.index_offset = index_offset; + prim.mode = hw_prim_type; + prim.enable_primitive_restarts = pipeline->primitive_restart; + } + } else if (instanceCount > 1) { + v3dv_cl_ensure_space_with_branch( + &job->bcl, cl_packet_length(INDEXED_INSTANCED_PRIM_LIST)); + v3dv_return_if_oom(cmd_buffer, NULL); + + cl_emit(&job->bcl, INDEXED_INSTANCED_PRIM_LIST, prim) { + prim.index_type = index_type; + prim.index_offset = index_offset; + prim.mode = hw_prim_type; + prim.enable_primitive_restarts = pipeline->primitive_restart; + prim.number_of_instances = instanceCount; + prim.instance_length = indexCount; + } + } +} + +void +v3dX(cmd_buffer_emit_draw_indirect)(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_buffer *buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + struct v3dv_job *job = cmd_buffer->state.job; + assert(job); + + const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; + uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology); + + v3dv_cl_ensure_space_with_branch( + &job->bcl, cl_packet_length(INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS)); + v3dv_return_if_oom(cmd_buffer, NULL); + + cl_emit(&job->bcl, INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS, prim) { + prim.mode = hw_prim_type; + prim.number_of_draw_indirect_array_records = drawCount; + prim.stride_in_multiples_of_4_bytes = stride >> 2; + prim.address = v3dv_cl_address(buffer->mem->bo, + buffer->mem_offset + offset); + } +} + +void +v3dX(cmd_buffer_emit_indexed_indirect)(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_buffer *buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + struct v3dv_job *job = cmd_buffer->state.job; + assert(job); + + const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; + uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology); + uint8_t index_type = ffs(cmd_buffer->state.index_buffer.index_size) - 1; + + v3dv_cl_ensure_space_with_branch( + &job->bcl, cl_packet_length(INDIRECT_INDEXED_INSTANCED_PRIM_LIST)); + v3dv_return_if_oom(cmd_buffer, NULL); + + cl_emit(&job->bcl, INDIRECT_INDEXED_INSTANCED_PRIM_LIST, prim) { + prim.index_type = index_type; + prim.mode = hw_prim_type; + prim.enable_primitive_restarts = pipeline->primitive_restart; + prim.number_of_draw_indirect_indexed_records = drawCount; + prim.stride_in_multiples_of_4_bytes = stride >> 2; + prim.address = v3dv_cl_address(buffer->mem->bo, + buffer->mem_offset + offset); + } +} + +void +v3dX(cmd_buffer_render_pass_setup_render_target)(struct v3dv_cmd_buffer *cmd_buffer, + int rt, + uint32_t *rt_bpp, + uint32_t *rt_type, + uint32_t *rt_clamp) +{ + const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; + + assert(state->subpass_idx < state->pass->subpass_count); + const struct v3dv_subpass *subpass = + &state->pass->subpasses[state->subpass_idx]; + + if (rt >= subpass->color_count) + return; + + struct v3dv_subpass_attachment *attachment = &subpass->color_attachments[rt]; + const uint32_t attachment_idx = attachment->attachment; + if (attachment_idx == VK_ATTACHMENT_UNUSED) + return; + + const struct v3dv_framebuffer *framebuffer = state->framebuffer; + assert(attachment_idx < framebuffer->attachment_count); + struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx]; + assert(iview->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT); + + *rt_bpp = iview->internal_bpp; + *rt_type = iview->internal_type; + if (vk_format_is_int(iview->vk.format)) + *rt_clamp = V3D_RENDER_TARGET_CLAMP_INT; + else if (vk_format_is_srgb(iview->vk.format)) + *rt_clamp = V3D_RENDER_TARGET_CLAMP_NORM; + else + *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE; +} diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_descriptor_set.c b/lib/mesa/src/broadcom/vulkan/v3dvx_descriptor_set.c new file mode 100644 index 000000000..2c28ce46a --- /dev/null +++ b/lib/mesa/src/broadcom/vulkan/v3dvx_descriptor_set.c @@ -0,0 +1,98 @@ +/* + * Copyright © 2021 Raspberry Pi + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "v3dv_private.h" +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" +#include "broadcom/compiler/v3d_compiler.h" + +/* + * Returns how much space a given descriptor type needs on a bo (GPU + * memory). + */ +uint32_t +v3dX(descriptor_bo_size)(VkDescriptorType type) +{ + switch(type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + return cl_aligned_packet_length(SAMPLER_STATE, 32); + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + return cl_aligned_packet_length(SAMPLER_STATE, 32) + + cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32); + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + return cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32); + default: + return 0; + } +} + +/* To compute the max_bo_size we want to iterate through the descriptor + * types. Unfourtunately we can't just use the descriptor type enum values, as + * the values are not defined consecutively (so extensions could add new + * descriptor types), and VK_DESCRIPTOR_TYPE_MAX_ENUM is also a really big + * number. + */ +static const uint32_t supported_descriptor_types[] = { + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, + VK_DESCRIPTOR_TYPE_SAMPLER, + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, + VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, +}; + +uint32_t +v3dX(max_descriptor_bo_size)(void) +{ + static uint32_t max = 0; + + if (max == 0) { + for (uint32_t i = 0; i < ARRAY_SIZE(supported_descriptor_types); i++) + max = MAX2(max, v3dX(descriptor_bo_size)(supported_descriptor_types[i])); + } + assert(max != 0); + + return max; +} + + +uint32_t +v3dX(combined_image_sampler_texture_state_offset)(void) +{ + return 0; +} + +uint32_t +v3dX(combined_image_sampler_sampler_state_offset)(void) +{ + return cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32); +} diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_device.c b/lib/mesa/src/broadcom/vulkan/v3dvx_device.c new file mode 100644 index 000000000..a48738aec --- /dev/null +++ b/lib/mesa/src/broadcom/vulkan/v3dvx_device.c @@ -0,0 +1,368 @@ +/* + * Copyright © 2021 Raspberry Pi + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "v3dv_private.h" + +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" +#include "broadcom/compiler/v3d_compiler.h" +#include "vk_format_info.h" +#include "util/u_pack_color.h" +#include "util/half_float.h" + +static const enum V3DX(Wrap_Mode) vk_to_v3d_wrap_mode[] = { + [VK_SAMPLER_ADDRESS_MODE_REPEAT] = V3D_WRAP_MODE_REPEAT, + [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = V3D_WRAP_MODE_MIRROR, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = V3D_WRAP_MODE_CLAMP, + [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = V3D_WRAP_MODE_MIRROR_ONCE, + [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = V3D_WRAP_MODE_BORDER, +}; + +static const enum V3DX(Compare_Function) +vk_to_v3d_compare_func[] = { + [VK_COMPARE_OP_NEVER] = V3D_COMPARE_FUNC_NEVER, + [VK_COMPARE_OP_LESS] = V3D_COMPARE_FUNC_LESS, + [VK_COMPARE_OP_EQUAL] = V3D_COMPARE_FUNC_EQUAL, + [VK_COMPARE_OP_LESS_OR_EQUAL] = V3D_COMPARE_FUNC_LEQUAL, + [VK_COMPARE_OP_GREATER] = V3D_COMPARE_FUNC_GREATER, + [VK_COMPARE_OP_NOT_EQUAL] = V3D_COMPARE_FUNC_NOTEQUAL, + [VK_COMPARE_OP_GREATER_OR_EQUAL] = V3D_COMPARE_FUNC_GEQUAL, + [VK_COMPARE_OP_ALWAYS] = V3D_COMPARE_FUNC_ALWAYS, +}; + + +static union pipe_color_union encode_border_color( + const VkSamplerCustomBorderColorCreateInfoEXT *bc_info) +{ + const struct util_format_description *desc = + vk_format_description(bc_info->format); + + const struct v3dv_format *format = v3dX(get_format)(bc_info->format); + + union pipe_color_union border; + for (int i = 0; i < 4; i++) { + if (format->swizzle[i] <= 3) + border.ui[i] = bc_info->customBorderColor.uint32[format->swizzle[i]]; + else + border.ui[i] = 0; + } + + /* handle clamping */ + if (vk_format_has_depth(bc_info->format) && + vk_format_has_stencil(bc_info->format)) { + border.f[0] = CLAMP(border.f[0], 0, 1); + border.ui[1] = CLAMP(border.ui[1], 0, 0xff); + } else if (vk_format_is_unorm(bc_info->format)) { + for (int i = 0; i < 4; i++) + border.f[i] = CLAMP(border.f[i], 0, 1); + } else if (vk_format_is_snorm(bc_info->format)) { + for (int i = 0; i < 4; i++) + border.f[i] = CLAMP(border.f[i], -1, 1); + } else if (vk_format_is_uint(bc_info->format) && + desc->channel[0].size < 32) { + for (int i = 0; i < 4; i++) + border.ui[i] = CLAMP(border.ui[i], 0, (1 << desc->channel[i].size)); + } else if (vk_format_is_sint(bc_info->format) && + desc->channel[0].size < 32) { + for (int i = 0; i < 4; i++) + border.i[i] = CLAMP(border.i[i], + -(1 << (desc->channel[i].size - 1)), + (1 << (desc->channel[i].size - 1)) - 1); + } + + /* convert from float to expected format */ + if (vk_format_is_srgb(bc_info->format) || + vk_format_is_compressed(bc_info->format)) { + for (int i = 0; i < 4; i++) + border.ui[i] = _mesa_float_to_half(border.f[i]); + } else if (vk_format_is_unorm(bc_info->format)) { + for (int i = 0; i < 4; i++) { + switch (desc->channel[i].size) { + case 8: + case 16: + /* expect u16 for non depth values */ + if (!vk_format_has_depth(bc_info->format)) + border.ui[i] = (uint32_t) (border.f[i] * (float) 0xffff); + break; + case 24: + case 32: + /* uses full f32; no conversion needed */ + break; + default: + border.ui[i] = _mesa_float_to_half(border.f[i]); + break; + } + } + } else if (vk_format_is_snorm(bc_info->format)) { + for (int i = 0; i < 4; i++) { + switch (desc->channel[i].size) { + case 8: + border.ui[i] = (int32_t) (border.f[i] * (float) 0x3fff); + break; + case 16: + border.i[i] = (int32_t) (border.f[i] * (float) 0x7fff); + break; + case 24: + case 32: + /* uses full f32; no conversion needed */ + break; + default: + border.ui[i] = _mesa_float_to_half(border.f[i]); + break; + } + } + } else if (vk_format_is_float(bc_info->format)) { + for (int i = 0; i < 4; i++) { + switch(desc->channel[i].size) { + case 16: + border.ui[i] = _mesa_float_to_half(border.f[i]); + break; + default: + break; + } + } + } + + return border; +} + +void +v3dX(pack_sampler_state)(struct v3dv_sampler *sampler, + const VkSamplerCreateInfo *pCreateInfo, + const VkSamplerCustomBorderColorCreateInfoEXT *bc_info) +{ + enum V3DX(Border_Color_Mode) border_color_mode; + + switch (pCreateInfo->borderColor) { + case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: + case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: + border_color_mode = V3D_BORDER_COLOR_0000; + break; + case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: + case VK_BORDER_COLOR_INT_OPAQUE_BLACK: + border_color_mode = V3D_BORDER_COLOR_0001; + break; + case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: + case VK_BORDER_COLOR_INT_OPAQUE_WHITE: + border_color_mode = V3D_BORDER_COLOR_1111; + break; + case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT: + case VK_BORDER_COLOR_INT_CUSTOM_EXT: + border_color_mode = V3D_BORDER_COLOR_FOLLOWS; + break; + default: + unreachable("Unknown border color"); + break; + } + + /* For some texture formats, when clamping to transparent black border the + * CTS expects alpha to be set to 1 instead of 0, but the border color mode + * will take priority over the texture state swizzle, so the only way to + * fix that is to apply a swizzle in the shader. Here we keep track of + * whether we are activating that mode and we will decide if we need to + * activate the texture swizzle lowering in the shader key at compile time + * depending on the actual texture format. + */ + if ((pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || + pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || + pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER) && + border_color_mode == V3D_BORDER_COLOR_0000) { + sampler->clamp_to_transparent_black_border = true; + } + + v3dvx_pack(sampler->sampler_state, SAMPLER_STATE, s) { + if (pCreateInfo->anisotropyEnable) { + s.anisotropy_enable = true; + if (pCreateInfo->maxAnisotropy > 8) + s.maximum_anisotropy = 3; + else if (pCreateInfo->maxAnisotropy > 4) + s.maximum_anisotropy = 2; + else if (pCreateInfo->maxAnisotropy > 2) + s.maximum_anisotropy = 1; + } + + s.border_color_mode = border_color_mode; + + if (s.border_color_mode == V3D_BORDER_COLOR_FOLLOWS) { + union pipe_color_union border = encode_border_color(bc_info); + + s.border_color_word_0 = border.ui[0]; + s.border_color_word_1 = border.ui[1]; + s.border_color_word_2 = border.ui[2]; + s.border_color_word_3 = border.ui[3]; + } + + s.wrap_i_border = false; /* Also hardcoded on v3d */ + s.wrap_s = vk_to_v3d_wrap_mode[pCreateInfo->addressModeU]; + s.wrap_t = vk_to_v3d_wrap_mode[pCreateInfo->addressModeV]; + s.wrap_r = vk_to_v3d_wrap_mode[pCreateInfo->addressModeW]; + s.fixed_bias = pCreateInfo->mipLodBias; + s.max_level_of_detail = MIN2(MAX2(0, pCreateInfo->maxLod), 15); + s.min_level_of_detail = MIN2(MAX2(0, pCreateInfo->minLod), 15); + s.srgb_disable = 0; /* Not even set by v3d */ + s.depth_compare_function = + vk_to_v3d_compare_func[pCreateInfo->compareEnable ? + pCreateInfo->compareOp : VK_COMPARE_OP_NEVER]; + s.mip_filter_nearest = pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_NEAREST; + s.min_filter_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST; + s.mag_filter_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST; + } +} + +/** + * This computes the maximum bpp used by any of the render targets used by + * a particular subpass and checks if any of those render targets are + * multisampled. If we don't have a subpass (when we are not inside a + * render pass), then we assume that all framebuffer attachments are used. + */ +void +v3dX(framebuffer_compute_internal_bpp_msaa)( + const struct v3dv_framebuffer *framebuffer, + const struct v3dv_subpass *subpass, + uint8_t *max_bpp, + bool *msaa) +{ + STATIC_ASSERT(V3D_INTERNAL_BPP_32 == 0); + *max_bpp = V3D_INTERNAL_BPP_32; + *msaa = false; + + if (subpass) { + for (uint32_t i = 0; i < subpass->color_count; i++) { + uint32_t att_idx = subpass->color_attachments[i].attachment; + if (att_idx == VK_ATTACHMENT_UNUSED) + continue; + + const struct v3dv_image_view *att = framebuffer->attachments[att_idx]; + assert(att); + + if (att->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT) + *max_bpp = MAX2(*max_bpp, att->internal_bpp); + + if (att->vk.image->samples > VK_SAMPLE_COUNT_1_BIT) + *msaa = true; + } + + if (!*msaa && subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) { + const struct v3dv_image_view *att = + framebuffer->attachments[subpass->ds_attachment.attachment]; + assert(att); + + if (att->vk.image->samples > VK_SAMPLE_COUNT_1_BIT) + *msaa = true; + } + + return; + } + + assert(framebuffer->attachment_count <= 4); + for (uint32_t i = 0; i < framebuffer->attachment_count; i++) { + const struct v3dv_image_view *att = framebuffer->attachments[i]; + assert(att); + + if (att->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT) + *max_bpp = MAX2(*max_bpp, att->internal_bpp); + + if (att->vk.image->samples > VK_SAMPLE_COUNT_1_BIT) + *msaa = true; + } + + return; +} + +uint32_t +v3dX(zs_buffer_from_aspect_bits)(VkImageAspectFlags aspects) +{ + const VkImageAspectFlags zs_aspects = + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + const VkImageAspectFlags filtered_aspects = aspects & zs_aspects; + + if (filtered_aspects == zs_aspects) + return ZSTENCIL; + else if (filtered_aspects == VK_IMAGE_ASPECT_DEPTH_BIT) + return Z; + else if (filtered_aspects == VK_IMAGE_ASPECT_STENCIL_BIT) + return STENCIL; + else + return NONE; +} + +void +v3dX(get_hw_clear_color)(const VkClearColorValue *color, + uint32_t internal_type, + uint32_t internal_size, + uint32_t *hw_color) +{ + union util_color uc; + switch (internal_type) { + case V3D_INTERNAL_TYPE_8: + util_pack_color(color->float32, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); + memcpy(hw_color, uc.ui, internal_size); + break; + case V3D_INTERNAL_TYPE_8I: + case V3D_INTERNAL_TYPE_8UI: + hw_color[0] = ((color->uint32[0] & 0xff) | + (color->uint32[1] & 0xff) << 8 | + (color->uint32[2] & 0xff) << 16 | + (color->uint32[3] & 0xff) << 24); + break; + case V3D_INTERNAL_TYPE_16F: + util_pack_color(color->float32, PIPE_FORMAT_R16G16B16A16_FLOAT, &uc); + memcpy(hw_color, uc.ui, internal_size); + break; + case V3D_INTERNAL_TYPE_16I: + case V3D_INTERNAL_TYPE_16UI: + hw_color[0] = ((color->uint32[0] & 0xffff) | color->uint32[1] << 16); + hw_color[1] = ((color->uint32[2] & 0xffff) | color->uint32[3] << 16); + break; + case V3D_INTERNAL_TYPE_32F: + case V3D_INTERNAL_TYPE_32I: + case V3D_INTERNAL_TYPE_32UI: + memcpy(hw_color, color->uint32, internal_size); + break; + } +} + +#ifdef DEBUG +void +v3dX(device_check_prepacked_sizes)(void) +{ + STATIC_ASSERT(V3DV_SAMPLER_STATE_LENGTH >= + cl_packet_length(SAMPLER_STATE)); + STATIC_ASSERT(V3DV_TEXTURE_SHADER_STATE_LENGTH >= + cl_packet_length(TEXTURE_SHADER_STATE)); + STATIC_ASSERT(V3DV_SAMPLER_STATE_LENGTH >= + cl_packet_length(SAMPLER_STATE)); + STATIC_ASSERT(V3DV_BLEND_CFG_LENGTH>= + cl_packet_length(BLEND_CFG)); + STATIC_ASSERT(V3DV_CFG_BITS_LENGTH>= + cl_packet_length(CFG_BITS)); + STATIC_ASSERT(V3DV_GL_SHADER_STATE_RECORD_LENGTH >= + cl_packet_length(GL_SHADER_STATE_RECORD)); + STATIC_ASSERT(V3DV_VCM_CACHE_SIZE_LENGTH>= + cl_packet_length(VCM_CACHE_SIZE)); + STATIC_ASSERT(V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH >= + cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD)); + STATIC_ASSERT(V3DV_STENCIL_CFG_LENGTH >= + cl_packet_length(STENCIL_CFG)); +} +#endif diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_formats.c b/lib/mesa/src/broadcom/vulkan/v3dvx_formats.c new file mode 100644 index 000000000..4f77dd008 --- /dev/null +++ b/lib/mesa/src/broadcom/vulkan/v3dvx_formats.c @@ -0,0 +1,465 @@ +/* + * Copyright © 2021 Raspberry Pi + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "v3dv_private.h" +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" + +#include "util/format/u_format.h" + +#define SWIZ(x,y,z,w) { \ + PIPE_SWIZZLE_##x, \ + PIPE_SWIZZLE_##y, \ + PIPE_SWIZZLE_##z, \ + PIPE_SWIZZLE_##w \ +} + +#define FORMAT(vk, rt, tex, swiz, return_size, supports_filtering) \ + [VK_FORMAT_##vk] = { \ + true, \ + V3D_OUTPUT_IMAGE_FORMAT_##rt, \ + TEXTURE_DATA_FORMAT_##tex, \ + swiz, \ + return_size, \ + supports_filtering, \ + } + +#define SWIZ_X001 SWIZ(X, 0, 0, 1) +#define SWIZ_XY01 SWIZ(X, Y, 0, 1) +#define SWIZ_XYZ1 SWIZ(X, Y, Z, 1) +#define SWIZ_XYZW SWIZ(X, Y, Z, W) +#define SWIZ_YZWX SWIZ(Y, Z, W, X) +#define SWIZ_YZW1 SWIZ(Y, Z, W, 1) +#define SWIZ_ZYXW SWIZ(Z, Y, X, W) +#define SWIZ_ZYX1 SWIZ(Z, Y, X, 1) +#define SWIZ_XXXY SWIZ(X, X, X, Y) +#define SWIZ_XXX1 SWIZ(X, X, X, 1) +#define SWIZ_XXXX SWIZ(X, X, X, X) +#define SWIZ_000X SWIZ(0, 0, 0, X) +#define SWIZ_WXYZ SWIZ(W, X, Y, Z) + +/* FIXME: expand format table to describe whether the format is supported + * for buffer surfaces (texel buffers, vertex buffers, etc). + */ +static const struct v3dv_format format_table[] = { + /* Color, 4 channels */ + FORMAT(B8G8R8A8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_ZYXW, 16, true), + FORMAT(B8G8R8A8_UNORM, RGBA8, RGBA8, SWIZ_ZYXW, 16, true), + + FORMAT(R8G8B8A8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_XYZW, 16, true), + FORMAT(R8G8B8A8_UNORM, RGBA8, RGBA8, SWIZ_XYZW, 16, true), + FORMAT(R8G8B8A8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZW, 16, true), + FORMAT(R8G8B8A8_SINT, RGBA8I, RGBA8I, SWIZ_XYZW, 16, false), + FORMAT(R8G8B8A8_UINT, RGBA8UI, RGBA8UI, SWIZ_XYZW, 16, false), + + FORMAT(R16G16B16A16_SFLOAT, RGBA16F, RGBA16F, SWIZ_XYZW, 16, true), + FORMAT(R16G16B16A16_UNORM, NO, RGBA16, SWIZ_XYZW, 32, true), + FORMAT(R16G16B16A16_SNORM, NO, RGBA16_SNORM, SWIZ_XYZW, 32, true), + FORMAT(R16G16B16A16_SINT, RGBA16I, RGBA16I, SWIZ_XYZW, 16, false), + FORMAT(R16G16B16A16_UINT, RGBA16UI, RGBA16UI, SWIZ_XYZW, 16, false), + + FORMAT(R32G32B32A32_SFLOAT, RGBA32F, RGBA32F, SWIZ_XYZW, 32, false), + FORMAT(R32G32B32A32_SINT, RGBA32I, RGBA32I, SWIZ_XYZW, 32, false), + FORMAT(R32G32B32A32_UINT, RGBA32UI, RGBA32UI, SWIZ_XYZW, 32, false), + + /* Color, 3 channels */ + FORMAT(R32G32B32_SFLOAT, NO, NO, SWIZ_XYZ1, 0, false), + FORMAT(R32G32B32_UINT, NO, NO, SWIZ_XYZ1, 0, false), + FORMAT(R32G32B32_SINT, NO, NO, SWIZ_XYZ1, 0, false), + + /* Color, 2 channels */ + FORMAT(R8G8_UNORM, RG8, RG8, SWIZ_XY01, 16, true), + FORMAT(R8G8_SNORM, NO, RG8_SNORM, SWIZ_XY01, 16, true), + FORMAT(R8G8_SINT, RG8I, RG8I, SWIZ_XY01, 16, false), + FORMAT(R8G8_UINT, RG8UI, RG8UI, SWIZ_XY01, 16, false), + + FORMAT(R16G16_UNORM, NO, RG16, SWIZ_XY01, 32, true), + FORMAT(R16G16_SNORM, NO, RG16_SNORM, SWIZ_XY01, 32, true), + FORMAT(R16G16_SFLOAT, RG16F, RG16F, SWIZ_XY01, 16, true), + FORMAT(R16G16_SINT, RG16I, RG16I, SWIZ_XY01, 16, false), + FORMAT(R16G16_UINT, RG16UI, RG16UI, SWIZ_XY01, 16, false), + + FORMAT(R32G32_SFLOAT, RG32F, RG32F, SWIZ_XY01, 32, false), + FORMAT(R32G32_SINT, RG32I, RG32I, SWIZ_XY01, 32, false), + FORMAT(R32G32_UINT, RG32UI, RG32UI, SWIZ_XY01, 32, false), + + /* Color, 1 channel */ + FORMAT(R8_UNORM, R8, R8, SWIZ_X001, 16, true), + FORMAT(R8_SNORM, NO, R8_SNORM, SWIZ_X001, 16, true), + FORMAT(R8_SINT, R8I, R8I, SWIZ_X001, 16, false), + FORMAT(R8_UINT, R8UI, R8UI, SWIZ_X001, 16, false), + + FORMAT(R16_UNORM, NO, R16, SWIZ_X001, 32, true), + FORMAT(R16_SNORM, NO, R16_SNORM, SWIZ_X001, 32, true), + FORMAT(R16_SFLOAT, R16F, R16F, SWIZ_X001, 16, true), + FORMAT(R16_SINT, R16I, R16I, SWIZ_X001, 16, false), + FORMAT(R16_UINT, R16UI, R16UI, SWIZ_X001, 16, false), + + FORMAT(R32_SFLOAT, R32F, R32F, SWIZ_X001, 32, false), + FORMAT(R32_SINT, R32I, R32I, SWIZ_X001, 32, false), + FORMAT(R32_UINT, R32UI, R32UI, SWIZ_X001, 32, false), + + /* Color, packed */ + FORMAT(R4G4B4A4_UNORM_PACK16, ABGR4444, RGBA4, SWIZ_XYZW, 16, true), + FORMAT(B4G4R4A4_UNORM_PACK16, ABGR4444, RGBA4, SWIZ_ZYXW, 16, true), /* Swap RB */ + FORMAT(R5G6B5_UNORM_PACK16, BGR565, RGB565, SWIZ_XYZ1, 16, true), + FORMAT(R5G5B5A1_UNORM_PACK16, ABGR1555, RGB5_A1, SWIZ_XYZW, 16, true), + FORMAT(A1R5G5B5_UNORM_PACK16, RGBA5551, A1_RGB5, SWIZ_ZYXW, 16, true), /* Swap RB */ + FORMAT(A8B8G8R8_UNORM_PACK32, RGBA8, RGBA8, SWIZ_XYZW, 16, true), /* RGBA8 UNORM */ + FORMAT(A8B8G8R8_SNORM_PACK32, NO, RGBA8_SNORM, SWIZ_XYZW, 16, true), /* RGBA8 SNORM */ + FORMAT(A8B8G8R8_UINT_PACK32, RGBA8UI, RGBA8UI, SWIZ_XYZW, 16, false), /* RGBA8 UINT */ + FORMAT(A8B8G8R8_SINT_PACK32, RGBA8I, RGBA8I, SWIZ_XYZW, 16, false), /* RGBA8 SINT */ + FORMAT(A8B8G8R8_SRGB_PACK32, SRGB8_ALPHA8, RGBA8, SWIZ_XYZW, 16, true), /* RGBA8 sRGB */ + FORMAT(A2B10G10R10_UNORM_PACK32,RGB10_A2, RGB10_A2, SWIZ_XYZW, 16, true), + FORMAT(A2B10G10R10_UINT_PACK32, RGB10_A2UI, RGB10_A2UI, SWIZ_XYZW, 16, false), + FORMAT(E5B9G9R9_UFLOAT_PACK32, NO, RGB9_E5, SWIZ_XYZ1, 16, true), + FORMAT(B10G11R11_UFLOAT_PACK32, R11F_G11F_B10F,R11F_G11F_B10F, SWIZ_XYZ1, 16, true), + + /* Depth */ + FORMAT(D16_UNORM, D16, DEPTH_COMP16, SWIZ_X001, 32, false), + FORMAT(D32_SFLOAT, D32F, DEPTH_COMP32F, SWIZ_X001, 32, false), + FORMAT(X8_D24_UNORM_PACK32, D24S8, DEPTH24_X8, SWIZ_X001, 32, false), + + /* Depth + Stencil */ + FORMAT(D24_UNORM_S8_UINT, D24S8, DEPTH24_X8, SWIZ_X001, 32, false), + + /* Compressed: ETC2 / EAC */ + FORMAT(ETC2_R8G8B8_UNORM_BLOCK, NO, RGB8_ETC2, SWIZ_XYZ1, 16, true), + FORMAT(ETC2_R8G8B8_SRGB_BLOCK, NO, RGB8_ETC2, SWIZ_XYZ1, 16, true), + FORMAT(ETC2_R8G8B8A1_UNORM_BLOCK, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, true), + FORMAT(ETC2_R8G8B8A1_SRGB_BLOCK, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, true), + FORMAT(ETC2_R8G8B8A8_UNORM_BLOCK, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, true), + FORMAT(ETC2_R8G8B8A8_SRGB_BLOCK, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, true), + FORMAT(EAC_R11_UNORM_BLOCK, NO, R11_EAC, SWIZ_X001, 16, true), + FORMAT(EAC_R11_SNORM_BLOCK, NO, SIGNED_R11_EAC, SWIZ_X001, 16, true), + FORMAT(EAC_R11G11_UNORM_BLOCK, NO, RG11_EAC, SWIZ_XY01, 16, true), + FORMAT(EAC_R11G11_SNORM_BLOCK, NO, SIGNED_RG11_EAC, SWIZ_XY01, 16, true), + + /* Compressed: BC1-3 */ + FORMAT(BC1_RGB_UNORM_BLOCK, NO, BC1, SWIZ_XYZ1, 16, true), + FORMAT(BC1_RGB_SRGB_BLOCK, NO, BC1, SWIZ_XYZ1, 16, true), + FORMAT(BC1_RGBA_UNORM_BLOCK, NO, BC1, SWIZ_XYZW, 16, true), + FORMAT(BC1_RGBA_SRGB_BLOCK, NO, BC1, SWIZ_XYZW, 16, true), + FORMAT(BC2_UNORM_BLOCK, NO, BC2, SWIZ_XYZW, 16, true), + FORMAT(BC2_SRGB_BLOCK, NO, BC2, SWIZ_XYZW, 16, true), + FORMAT(BC3_UNORM_BLOCK, NO, BC3, SWIZ_XYZW, 16, true), + FORMAT(BC3_SRGB_BLOCK, NO, BC3, SWIZ_XYZW, 16, true), + + /* Compressed: ASTC */ + FORMAT(ASTC_4x4_UNORM_BLOCK, NO, ASTC_4X4, SWIZ_XYZW, 16, true), + FORMAT(ASTC_4x4_SRGB_BLOCK, NO, ASTC_4X4, SWIZ_XYZW, 16, true), + FORMAT(ASTC_5x4_UNORM_BLOCK, NO, ASTC_5X4, SWIZ_XYZW, 16, true), + FORMAT(ASTC_5x4_SRGB_BLOCK, NO, ASTC_5X4, SWIZ_XYZW, 16, true), + FORMAT(ASTC_5x5_UNORM_BLOCK, NO, ASTC_5X5, SWIZ_XYZW, 16, true), + FORMAT(ASTC_5x5_SRGB_BLOCK, NO, ASTC_5X5, SWIZ_XYZW, 16, true), + FORMAT(ASTC_6x5_UNORM_BLOCK, NO, ASTC_6X5, SWIZ_XYZW, 16, true), + FORMAT(ASTC_6x5_SRGB_BLOCK, NO, ASTC_6X5, SWIZ_XYZW, 16, true), + FORMAT(ASTC_6x6_UNORM_BLOCK, NO, ASTC_6X6, SWIZ_XYZW, 16, true), + FORMAT(ASTC_6x6_SRGB_BLOCK, NO, ASTC_6X6, SWIZ_XYZW, 16, true), + FORMAT(ASTC_8x5_UNORM_BLOCK, NO, ASTC_8X5, SWIZ_XYZW, 16, true), + FORMAT(ASTC_8x5_SRGB_BLOCK, NO, ASTC_8X5, SWIZ_XYZW, 16, true), + FORMAT(ASTC_8x6_UNORM_BLOCK, NO, ASTC_8X6, SWIZ_XYZW, 16, true), + FORMAT(ASTC_8x6_SRGB_BLOCK, NO, ASTC_8X6, SWIZ_XYZW, 16, true), + FORMAT(ASTC_8x8_UNORM_BLOCK, NO, ASTC_8X8, SWIZ_XYZW, 16, true), + FORMAT(ASTC_8x8_SRGB_BLOCK, NO, ASTC_8X8, SWIZ_XYZW, 16, true), + FORMAT(ASTC_10x5_UNORM_BLOCK, NO, ASTC_10X5, SWIZ_XYZW, 16, true), + FORMAT(ASTC_10x5_SRGB_BLOCK, NO, ASTC_10X5, SWIZ_XYZW, 16, true), + FORMAT(ASTC_10x6_UNORM_BLOCK, NO, ASTC_10X6, SWIZ_XYZW, 16, true), + FORMAT(ASTC_10x6_SRGB_BLOCK, NO, ASTC_10X6, SWIZ_XYZW, 16, true), + FORMAT(ASTC_10x8_UNORM_BLOCK, NO, ASTC_10X8, SWIZ_XYZW, 16, true), + FORMAT(ASTC_10x8_SRGB_BLOCK, NO, ASTC_10X8, SWIZ_XYZW, 16, true), + FORMAT(ASTC_10x10_UNORM_BLOCK, NO, ASTC_10X10, SWIZ_XYZW, 16, true), + FORMAT(ASTC_10x10_SRGB_BLOCK, NO, ASTC_10X10, SWIZ_XYZW, 16, true), + FORMAT(ASTC_12x10_UNORM_BLOCK, NO, ASTC_12X10, SWIZ_XYZW, 16, true), + FORMAT(ASTC_12x10_SRGB_BLOCK, NO, ASTC_12X10, SWIZ_XYZW, 16, true), + FORMAT(ASTC_12x12_UNORM_BLOCK, NO, ASTC_12X12, SWIZ_XYZW, 16, true), + FORMAT(ASTC_12x12_SRGB_BLOCK, NO, ASTC_12X12, SWIZ_XYZW, 16, true), +}; + +const struct v3dv_format * +v3dX(get_format)(VkFormat format) +{ + if (format < ARRAY_SIZE(format_table) && format_table[format].supported) + return &format_table[format]; + else + return NULL; +} + +void +v3dX(get_internal_type_bpp_for_output_format)(uint32_t format, + uint32_t *type, + uint32_t *bpp) +{ + switch (format) { + case V3D_OUTPUT_IMAGE_FORMAT_RGBA8: + case V3D_OUTPUT_IMAGE_FORMAT_RGB8: + case V3D_OUTPUT_IMAGE_FORMAT_RG8: + case V3D_OUTPUT_IMAGE_FORMAT_R8: + case V3D_OUTPUT_IMAGE_FORMAT_ABGR4444: + case V3D_OUTPUT_IMAGE_FORMAT_BGR565: + case V3D_OUTPUT_IMAGE_FORMAT_ABGR1555: + *type = V3D_INTERNAL_TYPE_8; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGBA8I: + case V3D_OUTPUT_IMAGE_FORMAT_RG8I: + case V3D_OUTPUT_IMAGE_FORMAT_R8I: + *type = V3D_INTERNAL_TYPE_8I; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI: + case V3D_OUTPUT_IMAGE_FORMAT_RG8UI: + case V3D_OUTPUT_IMAGE_FORMAT_R8UI: + *type = V3D_INTERNAL_TYPE_8UI; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_SRGB8_ALPHA8: + case V3D_OUTPUT_IMAGE_FORMAT_SRGB: + case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2: + case V3D_OUTPUT_IMAGE_FORMAT_R11F_G11F_B10F: + case V3D_OUTPUT_IMAGE_FORMAT_RGBA16F: + /* Note that sRGB RTs are stored in the tile buffer at 16F, + * and the conversion to sRGB happens at tilebuffer load/store. + */ + *type = V3D_INTERNAL_TYPE_16F; + *bpp = V3D_INTERNAL_BPP_64; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RG16F: + case V3D_OUTPUT_IMAGE_FORMAT_R16F: + *type = V3D_INTERNAL_TYPE_16F; + /* Use 64bpp to make sure the TLB doesn't throw away the alpha + * channel before alpha test happens. + */ + *bpp = V3D_INTERNAL_BPP_64; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGBA16I: + *type = V3D_INTERNAL_TYPE_16I; + *bpp = V3D_INTERNAL_BPP_64; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RG16I: + case V3D_OUTPUT_IMAGE_FORMAT_R16I: + *type = V3D_INTERNAL_TYPE_16I; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2UI: + case V3D_OUTPUT_IMAGE_FORMAT_RGBA16UI: + *type = V3D_INTERNAL_TYPE_16UI; + *bpp = V3D_INTERNAL_BPP_64; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RG16UI: + case V3D_OUTPUT_IMAGE_FORMAT_R16UI: + *type = V3D_INTERNAL_TYPE_16UI; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGBA32I: + *type = V3D_INTERNAL_TYPE_32I; + *bpp = V3D_INTERNAL_BPP_128; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RG32I: + *type = V3D_INTERNAL_TYPE_32I; + *bpp = V3D_INTERNAL_BPP_64; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_R32I: + *type = V3D_INTERNAL_TYPE_32I; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGBA32UI: + *type = V3D_INTERNAL_TYPE_32UI; + *bpp = V3D_INTERNAL_BPP_128; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RG32UI: + *type = V3D_INTERNAL_TYPE_32UI; + *bpp = V3D_INTERNAL_BPP_64; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_R32UI: + *type = V3D_INTERNAL_TYPE_32UI; + *bpp = V3D_INTERNAL_BPP_32; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RGBA32F: + *type = V3D_INTERNAL_TYPE_32F; + *bpp = V3D_INTERNAL_BPP_128; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_RG32F: + *type = V3D_INTERNAL_TYPE_32F; + *bpp = V3D_INTERNAL_BPP_64; + break; + + case V3D_OUTPUT_IMAGE_FORMAT_R32F: + *type = V3D_INTERNAL_TYPE_32F; + *bpp = V3D_INTERNAL_BPP_32; + break; + + default: + /* Provide some default values, as we'll be called at RB + * creation time, even if an RB with this format isn't supported. + */ + *type = V3D_INTERNAL_TYPE_8; + *bpp = V3D_INTERNAL_BPP_32; + break; + } +} + +bool +v3dX(format_supports_tlb_resolve)(const struct v3dv_format *format) +{ + uint32_t type, bpp; + v3dX(get_internal_type_bpp_for_output_format)(format->rt_type, &type, &bpp); + return type == V3D_INTERNAL_TYPE_8 || type == V3D_INTERNAL_TYPE_16F; +} + +bool +v3dX(format_supports_blending)(const struct v3dv_format *format) +{ + /* Hardware blending is only supported on render targets that are configured + * 4x8-bit unorm, 2x16-bit float or 4x16-bit float. + */ + uint32_t type, bpp; + v3dX(get_internal_type_bpp_for_output_format)(format->rt_type, &type, &bpp); + switch (type) { + case V3D_INTERNAL_TYPE_8: + return bpp == V3D_INTERNAL_BPP_32; + case V3D_INTERNAL_TYPE_16F: + return bpp == V3D_INTERNAL_BPP_32 || V3D_INTERNAL_BPP_64; + default: + return false; + } +} + +bool +v3dX(tfu_supports_tex_format)(uint32_t tex_format) +{ + switch (tex_format) { + case TEXTURE_DATA_FORMAT_R8: + case TEXTURE_DATA_FORMAT_R8_SNORM: + case TEXTURE_DATA_FORMAT_RG8: + case TEXTURE_DATA_FORMAT_RG8_SNORM: + case TEXTURE_DATA_FORMAT_RGBA8: + case TEXTURE_DATA_FORMAT_RGBA8_SNORM: + case TEXTURE_DATA_FORMAT_RGB565: + case TEXTURE_DATA_FORMAT_RGBA4: + case TEXTURE_DATA_FORMAT_RGB5_A1: + case TEXTURE_DATA_FORMAT_RGB10_A2: + case TEXTURE_DATA_FORMAT_R16: + case TEXTURE_DATA_FORMAT_R16_SNORM: + case TEXTURE_DATA_FORMAT_RG16: + case TEXTURE_DATA_FORMAT_RG16_SNORM: + case TEXTURE_DATA_FORMAT_RGBA16: + case TEXTURE_DATA_FORMAT_RGBA16_SNORM: + case TEXTURE_DATA_FORMAT_R16F: + case TEXTURE_DATA_FORMAT_RG16F: + case TEXTURE_DATA_FORMAT_RGBA16F: + case TEXTURE_DATA_FORMAT_R11F_G11F_B10F: + case TEXTURE_DATA_FORMAT_R4: + case TEXTURE_DATA_FORMAT_RGB9_E5: + case TEXTURE_DATA_FORMAT_R32F: + case TEXTURE_DATA_FORMAT_RG32F: + case TEXTURE_DATA_FORMAT_RGBA32F: + case TEXTURE_DATA_FORMAT_RGB8_ETC2: + case TEXTURE_DATA_FORMAT_RGB8_PUNCHTHROUGH_ALPHA1: + case TEXTURE_DATA_FORMAT_RGBA8_ETC2_EAC: + case TEXTURE_DATA_FORMAT_R11_EAC: + case TEXTURE_DATA_FORMAT_SIGNED_R11_EAC: + case TEXTURE_DATA_FORMAT_RG11_EAC: + case TEXTURE_DATA_FORMAT_SIGNED_RG11_EAC: + return true; + default: + return false; + } +} + +uint8_t +v3dX(get_internal_depth_type)(VkFormat format) +{ + switch (format) { + case VK_FORMAT_D16_UNORM: + return V3D_INTERNAL_TYPE_DEPTH_16; + case VK_FORMAT_D32_SFLOAT: + return V3D_INTERNAL_TYPE_DEPTH_32F; + case VK_FORMAT_X8_D24_UNORM_PACK32: + case VK_FORMAT_D24_UNORM_S8_UINT: + return V3D_INTERNAL_TYPE_DEPTH_24; + default: + unreachable("Invalid depth format"); + break; + } +} + +void +v3dX(get_internal_type_bpp_for_image_aspects)(VkFormat vk_format, + VkImageAspectFlags aspect_mask, + uint32_t *internal_type, + uint32_t *internal_bpp) +{ + const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT | + VK_IMAGE_ASPECT_STENCIL_BIT; + + /* We can't store depth/stencil pixel formats to a raster format, so + * so instead we load our depth/stencil aspects to a compatible color + * format. + */ + /* FIXME: pre-compute this at image creation time? */ + if (aspect_mask & ds_aspects) { + switch (vk_format) { + case VK_FORMAT_D16_UNORM: + *internal_type = V3D_INTERNAL_TYPE_16UI; + *internal_bpp = V3D_INTERNAL_BPP_64; + break; + case VK_FORMAT_D32_SFLOAT: + *internal_type = V3D_INTERNAL_TYPE_32F; + *internal_bpp = V3D_INTERNAL_BPP_128; + break; + case VK_FORMAT_X8_D24_UNORM_PACK32: + case VK_FORMAT_D24_UNORM_S8_UINT: + /* Use RGBA8 format so we can relocate the X/S bits in the appropriate + * place to match Vulkan expectations. See the comment on the tile + * load command for more details. + */ + *internal_type = V3D_INTERNAL_TYPE_8UI; + *internal_bpp = V3D_INTERNAL_BPP_32; + break; + default: + assert(!"unsupported format"); + break; + } + } else { + const struct v3dv_format *format = v3dX(get_format)(vk_format); + v3dX(get_internal_type_bpp_for_output_format)(format->rt_type, + internal_type, internal_bpp); + } +} diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_image.c b/lib/mesa/src/broadcom/vulkan/v3dvx_image.c new file mode 100644 index 000000000..a9aa0fb97 --- /dev/null +++ b/lib/mesa/src/broadcom/vulkan/v3dvx_image.c @@ -0,0 +1,198 @@ +/* + * Copyright © 2021 Raspberry Pi + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "v3dv_private.h" +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" +#include "broadcom/compiler/v3d_compiler.h" + +#include "vk_format_info.h" + +/* + * This method translates pipe_swizzle to the swizzle values used at the + * packet TEXTURE_SHADER_STATE + * + * FIXME: C&P from v3d, common place? + */ +static uint32_t +translate_swizzle(unsigned char pipe_swizzle) +{ + switch (pipe_swizzle) { + case PIPE_SWIZZLE_0: + return 0; + case PIPE_SWIZZLE_1: + return 1; + case PIPE_SWIZZLE_X: + case PIPE_SWIZZLE_Y: + case PIPE_SWIZZLE_Z: + case PIPE_SWIZZLE_W: + return 2 + pipe_swizzle; + default: + unreachable("unknown swizzle"); + } +} + +/* + * Packs and ensure bo for the shader state (the latter can be temporal). + */ +static void +pack_texture_shader_state_helper(struct v3dv_device *device, + struct v3dv_image_view *image_view, + bool for_cube_map_array_storage) +{ + assert(!for_cube_map_array_storage || + image_view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY); + const uint32_t index = for_cube_map_array_storage ? 1 : 0; + + assert(image_view->vk.image); + const struct v3dv_image *image = (struct v3dv_image *) image_view->vk.image; + + assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT || + image->vk.samples == VK_SAMPLE_COUNT_4_BIT); + const uint32_t msaa_scale = image->vk.samples == VK_SAMPLE_COUNT_1_BIT ? 1 : 2; + + v3dvx_pack(image_view->texture_shader_state[index], TEXTURE_SHADER_STATE, tex) { + + tex.level_0_is_strictly_uif = + (image->slices[0].tiling == V3D_TILING_UIF_XOR || + image->slices[0].tiling == V3D_TILING_UIF_NO_XOR); + + tex.level_0_xor_enable = (image->slices[0].tiling == V3D_TILING_UIF_XOR); + + if (tex.level_0_is_strictly_uif) + tex.level_0_ub_pad = image->slices[0].ub_pad; + + /* FIXME: v3d never sets uif_xor_disable, but uses it on the following + * check so let's set the default value + */ + tex.uif_xor_disable = false; + if (tex.uif_xor_disable || + tex.level_0_is_strictly_uif) { + tex.extended = true; + } + + tex.base_level = image_view->vk.base_mip_level; + tex.max_level = image_view->vk.base_mip_level + + image_view->vk.level_count - 1; + + tex.swizzle_r = translate_swizzle(image_view->swizzle[0]); + tex.swizzle_g = translate_swizzle(image_view->swizzle[1]); + tex.swizzle_b = translate_swizzle(image_view->swizzle[2]); + tex.swizzle_a = translate_swizzle(image_view->swizzle[3]); + + tex.texture_type = image_view->format->tex_type; + + if (image->vk.image_type == VK_IMAGE_TYPE_3D) { + tex.image_depth = image->vk.extent.depth; + } else { + tex.image_depth = image_view->vk.layer_count; + } + + /* Empirical testing with CTS shows that when we are sampling from cube + * arrays we want to set image depth to layers / 6, but not when doing + * image load/store. + */ + if (image_view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY && + !for_cube_map_array_storage) { + assert(tex.image_depth % 6 == 0); + tex.image_depth /= 6; + } + + tex.image_height = image->vk.extent.height * msaa_scale; + tex.image_width = image->vk.extent.width * msaa_scale; + + /* On 4.x, the height of a 1D texture is redefined to be the + * upper 14 bits of the width (which is only usable with txf). + */ + if (image->vk.image_type == VK_IMAGE_TYPE_1D) { + tex.image_height = tex.image_width >> 14; + } + tex.image_width &= (1 << 14) - 1; + tex.image_height &= (1 << 14) - 1; + + tex.array_stride_64_byte_aligned = image->cube_map_stride / 64; + + tex.srgb = vk_format_is_srgb(image_view->vk.format); + + /* At this point we don't have the job. That's the reason the first + * parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to + * add the bo to the job. This also means that we need to add manually + * the image bo to the job using the texture. + */ + const uint32_t base_offset = + image->mem->bo->offset + + v3dv_layer_offset(image, 0, image_view->vk.base_array_layer); + tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset); + } +} + +void +v3dX(pack_texture_shader_state)(struct v3dv_device *device, + struct v3dv_image_view *iview) +{ + pack_texture_shader_state_helper(device, iview, false); + if (iview->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) + pack_texture_shader_state_helper(device, iview, true); +} + +void +v3dX(pack_texture_shader_state_from_buffer_view)(struct v3dv_device *device, + struct v3dv_buffer_view *buffer_view) +{ + assert(buffer_view->buffer); + const struct v3dv_buffer *buffer = buffer_view->buffer; + + v3dvx_pack(buffer_view->texture_shader_state, TEXTURE_SHADER_STATE, tex) { + tex.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X); + tex.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y); + tex.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z); + tex.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W); + + tex.image_depth = 1; + + /* On 4.x, the height of a 1D texture is redefined to be the upper 14 + * bits of the width (which is only usable with txf) (or in other words, + * we are providing a 28 bit field for size, but split on the usual + * 14bit height/width). + */ + tex.image_width = buffer_view->num_elements; + tex.image_height = tex.image_width >> 14; + tex.image_width &= (1 << 14) - 1; + tex.image_height &= (1 << 14) - 1; + + tex.texture_type = buffer_view->format->tex_type; + tex.srgb = vk_format_is_srgb(buffer_view->vk_format); + + /* At this point we don't have the job. That's the reason the first + * parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to + * add the bo to the job. This also means that we need to add manually + * the image bo to the job using the texture. + */ + const uint32_t base_offset = + buffer->mem->bo->offset + + buffer->mem_offset + + buffer_view->offset; + + tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset); + } +} diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_meta_common.c b/lib/mesa/src/broadcom/vulkan/v3dvx_meta_common.c new file mode 100644 index 000000000..2f79e4e9c --- /dev/null +++ b/lib/mesa/src/broadcom/vulkan/v3dvx_meta_common.c @@ -0,0 +1,1357 @@ +/* + * Copyright © 2021 Raspberry Pi + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "v3dv_private.h" +#include "v3dv_meta_common.h" + +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" +#include "broadcom/compiler/v3d_compiler.h" + +#include "vk_format_info.h" + +struct rcl_clear_info { + const union v3dv_clear_value *clear_value; + struct v3dv_image *image; + VkImageAspectFlags aspects; + uint32_t level; +}; + +static struct v3dv_cl * +emit_rcl_prologue(struct v3dv_job *job, + struct v3dv_meta_framebuffer *fb, + const struct rcl_clear_info *clear_info) +{ + const struct v3dv_frame_tiling *tiling = &job->frame_tiling; + + struct v3dv_cl *rcl = &job->rcl; + v3dv_cl_ensure_space_with_branch(rcl, 200 + + tiling->layers * 256 * + cl_packet_length(SUPERTILE_COORDINATES)); + if (job->cmd_buffer->state.oom) + return NULL; + + cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) { + config.early_z_disable = true; + config.image_width_pixels = tiling->width; + config.image_height_pixels = tiling->height; + config.number_of_render_targets = 1; + config.multisample_mode_4x = tiling->msaa; + config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; + config.internal_depth_type = fb->internal_depth_type; + } + + if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) { + uint32_t clear_pad = 0; + if (clear_info->image) { + const struct v3dv_image *image = clear_info->image; + const struct v3d_resource_slice *slice = + &image->slices[clear_info->level]; + if (slice->tiling == V3D_TILING_UIF_NO_XOR || + slice->tiling == V3D_TILING_UIF_XOR) { + int uif_block_height = v3d_utile_height(image->cpp) * 2; + + uint32_t implicit_padded_height = + align(tiling->height, uif_block_height) / uif_block_height; + + if (slice->padded_height_of_output_image_in_uif_blocks - + implicit_padded_height >= 15) { + clear_pad = slice->padded_height_of_output_image_in_uif_blocks; + } + } + } + + const uint32_t *color = &clear_info->clear_value->color[0]; + cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) { + clear.clear_color_low_32_bits = color[0]; + clear.clear_color_next_24_bits = color[1] & 0x00ffffff; + clear.render_target_number = 0; + }; + + if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) { + cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) { + clear.clear_color_mid_low_32_bits = + ((color[1] >> 24) | (color[2] << 8)); + clear.clear_color_mid_high_24_bits = + ((color[2] >> 24) | ((color[3] & 0xffff) << 8)); + clear.render_target_number = 0; + }; + } + + if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { + cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) { + clear.uif_padded_height_in_uif_blocks = clear_pad; + clear.clear_color_high_16_bits = color[3] >> 16; + clear.render_target_number = 0; + }; + } + } + + cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { + rt.render_target_0_internal_bpp = tiling->internal_bpp; + rt.render_target_0_internal_type = fb->internal_type; + rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE; + } + + cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { + clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f; + clear.stencil_clear_value = clear_info ? clear_info->clear_value->s : 0; + }; + + cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { + init.use_auto_chained_tile_lists = true; + init.size_of_first_block_in_chained_tile_lists = + TILE_ALLOCATION_BLOCK_SIZE_64B; + } + + return rcl; +} + +static void +emit_frame_setup(struct v3dv_job *job, + uint32_t min_layer, + const union v3dv_clear_value *clear_value) +{ + v3dv_return_if_oom(NULL, job); + + const struct v3dv_frame_tiling *tiling = &job->frame_tiling; + + struct v3dv_cl *rcl = &job->rcl; + + const uint32_t tile_alloc_offset = + 64 * min_layer * tiling->draw_tiles_x * tiling->draw_tiles_y; + cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) { + list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset); + } + + cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) { + config.number_of_bin_tile_lists = 1; + config.total_frame_width_in_tiles = tiling->draw_tiles_x; + config.total_frame_height_in_tiles = tiling->draw_tiles_y; + + config.supertile_width_in_tiles = tiling->supertile_width; + config.supertile_height_in_tiles = tiling->supertile_height; + + config.total_frame_width_in_supertiles = + tiling->frame_width_in_supertiles; + config.total_frame_height_in_supertiles = + tiling->frame_height_in_supertiles; + } + + /* Implement GFXH-1742 workaround. Also, if we are clearing we have to do + * it here. + */ + for (int i = 0; i < 2; i++) { + cl_emit(rcl, TILE_COORDINATES, coords); + cl_emit(rcl, END_OF_LOADS, end); + cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = NONE; + } + if (clear_value && i == 0) { + cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) { + clear.clear_z_stencil_buffer = true; + clear.clear_all_render_targets = true; + } + } + cl_emit(rcl, END_OF_TILE_MARKER, end); + } + + cl_emit(rcl, FLUSH_VCD_CACHE, flush); +} + +static void +emit_supertile_coordinates(struct v3dv_job *job, + struct v3dv_meta_framebuffer *framebuffer) +{ + v3dv_return_if_oom(NULL, job); + + struct v3dv_cl *rcl = &job->rcl; + + const uint32_t min_y = framebuffer->min_y_supertile; + const uint32_t max_y = framebuffer->max_y_supertile; + const uint32_t min_x = framebuffer->min_x_supertile; + const uint32_t max_x = framebuffer->max_x_supertile; + + for (int y = min_y; y <= max_y; y++) { + for (int x = min_x; x <= max_x; x++) { + cl_emit(rcl, SUPERTILE_COORDINATES, coords) { + coords.column_number_in_supertiles = x; + coords.row_number_in_supertiles = y; + } + } + } +} + +static void +emit_linear_load(struct v3dv_cl *cl, + uint32_t buffer, + struct v3dv_bo *bo, + uint32_t offset, + uint32_t stride, + uint32_t format) +{ + cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { + load.buffer_to_load = buffer; + load.address = v3dv_cl_address(bo, offset); + load.input_image_format = format; + load.memory_format = V3D_TILING_RASTER; + load.height_in_ub_or_stride = stride; + load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; + } +} + +static void +emit_linear_store(struct v3dv_cl *cl, + uint32_t buffer, + struct v3dv_bo *bo, + uint32_t offset, + uint32_t stride, + bool msaa, + uint32_t format) +{ + cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = RENDER_TARGET_0; + store.address = v3dv_cl_address(bo, offset); + store.clear_buffer_being_stored = false; + store.output_image_format = format; + store.memory_format = V3D_TILING_RASTER; + store.height_in_ub_or_stride = stride; + store.decimate_mode = msaa ? V3D_DECIMATE_MODE_ALL_SAMPLES : + V3D_DECIMATE_MODE_SAMPLE_0; + } +} + +/* This chooses a tile buffer format that is appropriate for the copy operation. + * Typically, this is the image render target type, however, if we are copying + * depth/stencil to/from a buffer the hardware can't do raster loads/stores, so + * we need to load and store to/from a tile color buffer using a compatible + * color format. + */ +static uint32_t +choose_tlb_format(struct v3dv_meta_framebuffer *framebuffer, + VkImageAspectFlags aspect, + bool for_store, + bool is_copy_to_buffer, + bool is_copy_from_buffer) +{ + if (is_copy_to_buffer || is_copy_from_buffer) { + switch (framebuffer->vk_format) { + case VK_FORMAT_D16_UNORM: + return V3D_OUTPUT_IMAGE_FORMAT_R16UI; + case VK_FORMAT_D32_SFLOAT: + return V3D_OUTPUT_IMAGE_FORMAT_R32F; + case VK_FORMAT_X8_D24_UNORM_PACK32: + return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; + case VK_FORMAT_D24_UNORM_S8_UINT: + /* When storing the stencil aspect of a combined depth/stencil image + * to a buffer, the Vulkan spec states that the output buffer must + * have packed stencil values, so we choose an R8UI format for our + * store outputs. For the load input we still want RGBA8UI since the + * source image contains 4 channels (including the 3 channels + * containing the 24-bit depth value). + * + * When loading the stencil aspect of a combined depth/stencil image + * from a buffer, we read packed 8-bit stencil values from the buffer + * that we need to put into the LSB of the 32-bit format (the R + * channel), so we use R8UI. For the store, if we used R8UI then we + * would write 8-bit stencil values consecutively over depth channels, + * so we need to use RGBA8UI. This will write each stencil value in + * its correct position, but will overwrite depth values (channels G + * B,A) with undefined values. To fix this, we will have to restore + * the depth aspect from the Z tile buffer, which we should pre-load + * from the image before the store). + */ + if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) { + return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; + } else { + assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT); + if (is_copy_to_buffer) { + return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI : + V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; + } else { + assert(is_copy_from_buffer); + return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI : + V3D_OUTPUT_IMAGE_FORMAT_R8UI; + } + } + default: /* Color formats */ + return framebuffer->format->rt_type; + break; + } + } else { + return framebuffer->format->rt_type; + } +} + +static inline bool +format_needs_rb_swap(struct v3dv_device *device, + VkFormat format) +{ + const uint8_t *swizzle = v3dv_get_format_swizzle(device, format); + return swizzle[0] == PIPE_SWIZZLE_Z; +} + +static void +emit_image_load(struct v3dv_device *device, + struct v3dv_cl *cl, + struct v3dv_meta_framebuffer *framebuffer, + struct v3dv_image *image, + VkImageAspectFlags aspect, + uint32_t layer, + uint32_t mip_level, + bool is_copy_to_buffer, + bool is_copy_from_buffer) +{ + uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer); + + /* For image to/from buffer copies we always load to and store from RT0, + * even for depth/stencil aspects, because the hardware can't do raster + * stores or loads from/to the depth/stencil tile buffers. + */ + bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer || + aspect == VK_IMAGE_ASPECT_COLOR_BIT; + + const struct v3d_resource_slice *slice = &image->slices[mip_level]; + cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { + load.buffer_to_load = load_to_color_tlb ? + RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect); + + load.address = v3dv_cl_address(image->mem->bo, layer_offset); + + load.input_image_format = choose_tlb_format(framebuffer, aspect, false, + is_copy_to_buffer, + is_copy_from_buffer); + load.memory_format = slice->tiling; + + /* When copying depth/stencil images to a buffer, for D24 formats Vulkan + * expects the depth value in the LSB bits of each 32-bit pixel. + * Unfortunately, the hardware seems to put the S8/X8 bits there and the + * depth bits on the MSB. To work around that we can reverse the channel + * order and then swap the R/B channels to get what we want. + * + * NOTE: reversing and swapping only gets us the behavior we want if the + * operations happen in that exact order, which seems to be the case when + * done on the tile buffer load operations. On the store, it seems the + * order is not the same. The order on the store is probably reversed so + * that reversing and swapping on both the load and the store preserves + * the original order of the channels in memory. + * + * Notice that we only need to do this when copying to a buffer, where + * depth and stencil aspects are copied as separate regions and + * the spec expects them to be tightly packed. + */ + bool needs_rb_swap = false; + bool needs_chan_reverse = false; + if (is_copy_to_buffer && + (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 || + (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT && + (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) { + needs_rb_swap = true; + needs_chan_reverse = true; + } else if (!is_copy_from_buffer && !is_copy_to_buffer && + (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) { + /* This is not a raw data copy (i.e. we are clearing the image), + * so we need to make sure we respect the format swizzle. + */ + needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format); + } + + load.r_b_swap = needs_rb_swap; + load.channel_reverse = needs_chan_reverse; + + if (slice->tiling == V3D_TILING_UIF_NO_XOR || + slice->tiling == V3D_TILING_UIF_XOR) { + load.height_in_ub_or_stride = + slice->padded_height_of_output_image_in_uif_blocks; + } else if (slice->tiling == V3D_TILING_RASTER) { + load.height_in_ub_or_stride = slice->stride; + } + + if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT) + load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; + else + load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; + } +} + +static void +emit_image_store(struct v3dv_device *device, + struct v3dv_cl *cl, + struct v3dv_meta_framebuffer *framebuffer, + struct v3dv_image *image, + VkImageAspectFlags aspect, + uint32_t layer, + uint32_t mip_level, + bool is_copy_to_buffer, + bool is_copy_from_buffer, + bool is_multisample_resolve) +{ + uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer); + + bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer || + aspect == VK_IMAGE_ASPECT_COLOR_BIT; + + const struct v3d_resource_slice *slice = &image->slices[mip_level]; + cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = store_from_color_tlb ? + RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect); + + store.address = v3dv_cl_address(image->mem->bo, layer_offset); + store.clear_buffer_being_stored = false; + + /* See rationale in emit_image_load() */ + bool needs_rb_swap = false; + bool needs_chan_reverse = false; + if (is_copy_from_buffer && + (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 || + (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT && + (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) { + needs_rb_swap = true; + needs_chan_reverse = true; + } else if (!is_copy_from_buffer && !is_copy_to_buffer && + (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) { + needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format); + } + + store.r_b_swap = needs_rb_swap; + store.channel_reverse = needs_chan_reverse; + + store.output_image_format = choose_tlb_format(framebuffer, aspect, true, + is_copy_to_buffer, + is_copy_from_buffer); + store.memory_format = slice->tiling; + if (slice->tiling == V3D_TILING_UIF_NO_XOR || + slice->tiling == V3D_TILING_UIF_XOR) { + store.height_in_ub_or_stride = + slice->padded_height_of_output_image_in_uif_blocks; + } else if (slice->tiling == V3D_TILING_RASTER) { + store.height_in_ub_or_stride = slice->stride; + } + + if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT) + store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; + else if (is_multisample_resolve) + store.decimate_mode = V3D_DECIMATE_MODE_4X; + else + store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; + } +} + +static void +emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job, + struct v3dv_meta_framebuffer *framebuffer, + struct v3dv_buffer *buffer, + struct v3dv_image *image, + uint32_t layer_offset, + const VkBufferImageCopy2KHR *region) +{ + struct v3dv_cl *cl = &job->indirect; + v3dv_cl_ensure_space(cl, 200, 1); + v3dv_return_if_oom(NULL, job); + + struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); + + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + + /* Load image to TLB */ + assert((image->vk.image_type != VK_IMAGE_TYPE_3D && + layer_offset < region->imageSubresource.layerCount) || + layer_offset < image->vk.extent.depth); + + const uint32_t image_layer = image->vk.image_type != VK_IMAGE_TYPE_3D ? + region->imageSubresource.baseArrayLayer + layer_offset : + region->imageOffset.z + layer_offset; + + emit_image_load(job->device, cl, framebuffer, image, + region->imageSubresource.aspectMask, + image_layer, + region->imageSubresource.mipLevel, + true, false); + + cl_emit(cl, END_OF_LOADS, end); + + cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); + + /* Store TLB to buffer */ + uint32_t width, height; + if (region->bufferRowLength == 0) + width = region->imageExtent.width; + else + width = region->bufferRowLength; + + if (region->bufferImageHeight == 0) + height = region->imageExtent.height; + else + height = region->bufferImageHeight; + + /* Handle copy from compressed format */ + width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format)); + height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format)); + + /* If we are storing stencil from a combined depth/stencil format the + * Vulkan spec states that the output buffer must have packed stencil + * values, where each stencil value is 1 byte. + */ + uint32_t cpp = + region->imageSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ? + 1 : image->cpp; + uint32_t buffer_stride = width * cpp; + uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset + + height * buffer_stride * layer_offset; + + uint32_t format = choose_tlb_format(framebuffer, + region->imageSubresource.aspectMask, + true, true, false); + bool msaa = image->vk.samples > VK_SAMPLE_COUNT_1_BIT; + + emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo, + buffer_offset, buffer_stride, msaa, format); + + cl_emit(cl, END_OF_TILE_MARKER, end); + + cl_emit(cl, RETURN_FROM_SUB_LIST, ret); + + cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { + branch.start = tile_list_start; + branch.end = v3dv_cl_get_address(cl); + } +} + +static void +emit_copy_layer_to_buffer(struct v3dv_job *job, + struct v3dv_buffer *buffer, + struct v3dv_image *image, + struct v3dv_meta_framebuffer *framebuffer, + uint32_t layer, + const VkBufferImageCopy2KHR *region) +{ + emit_copy_layer_to_buffer_per_tile_list(job, framebuffer, buffer, + image, layer, region); + emit_supertile_coordinates(job, framebuffer); +} + +void +v3dX(meta_emit_copy_image_to_buffer_rcl)(struct v3dv_job *job, + struct v3dv_buffer *buffer, + struct v3dv_image *image, + struct v3dv_meta_framebuffer *framebuffer, + const VkBufferImageCopy2KHR *region) +{ + struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); + v3dv_return_if_oom(NULL, job); + + emit_frame_setup(job, 0, NULL); + for (int layer = 0; layer < job->frame_tiling.layers; layer++) + emit_copy_layer_to_buffer(job, buffer, image, framebuffer, layer, region); + cl_emit(rcl, END_OF_RENDERING, end); +} + +static void +emit_resolve_image_layer_per_tile_list(struct v3dv_job *job, + struct v3dv_meta_framebuffer *framebuffer, + struct v3dv_image *dst, + struct v3dv_image *src, + uint32_t layer_offset, + const VkImageResolve2KHR *region) +{ + struct v3dv_cl *cl = &job->indirect; + v3dv_cl_ensure_space(cl, 200, 1); + v3dv_return_if_oom(NULL, job); + + struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); + + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + + assert((src->vk.image_type != VK_IMAGE_TYPE_3D && + layer_offset < region->srcSubresource.layerCount) || + layer_offset < src->vk.extent.depth); + + const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ? + region->srcSubresource.baseArrayLayer + layer_offset : + region->srcOffset.z + layer_offset; + + emit_image_load(job->device, cl, framebuffer, src, + region->srcSubresource.aspectMask, + src_layer, + region->srcSubresource.mipLevel, + false, false); + + cl_emit(cl, END_OF_LOADS, end); + + cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); + + assert((dst->vk.image_type != VK_IMAGE_TYPE_3D && + layer_offset < region->dstSubresource.layerCount) || + layer_offset < dst->vk.extent.depth); + + const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ? + region->dstSubresource.baseArrayLayer + layer_offset : + region->dstOffset.z + layer_offset; + + emit_image_store(job->device, cl, framebuffer, dst, + region->dstSubresource.aspectMask, + dst_layer, + region->dstSubresource.mipLevel, + false, false, true); + + cl_emit(cl, END_OF_TILE_MARKER, end); + + cl_emit(cl, RETURN_FROM_SUB_LIST, ret); + + cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { + branch.start = tile_list_start; + branch.end = v3dv_cl_get_address(cl); + } +} + +static void +emit_resolve_image_layer(struct v3dv_job *job, + struct v3dv_image *dst, + struct v3dv_image *src, + struct v3dv_meta_framebuffer *framebuffer, + uint32_t layer, + const VkImageResolve2KHR *region) +{ + emit_resolve_image_layer_per_tile_list(job, framebuffer, + dst, src, layer, region); + emit_supertile_coordinates(job, framebuffer); +} + +void +v3dX(meta_emit_resolve_image_rcl)(struct v3dv_job *job, + struct v3dv_image *dst, + struct v3dv_image *src, + struct v3dv_meta_framebuffer *framebuffer, + const VkImageResolve2KHR *region) +{ + struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); + v3dv_return_if_oom(NULL, job); + + emit_frame_setup(job, 0, NULL); + for (int layer = 0; layer < job->frame_tiling.layers; layer++) + emit_resolve_image_layer(job, dst, src, framebuffer, layer, region); + cl_emit(rcl, END_OF_RENDERING, end); +} + +static void +emit_copy_buffer_per_tile_list(struct v3dv_job *job, + struct v3dv_bo *dst, + struct v3dv_bo *src, + uint32_t dst_offset, + uint32_t src_offset, + uint32_t stride, + uint32_t format) +{ + struct v3dv_cl *cl = &job->indirect; + v3dv_cl_ensure_space(cl, 200, 1); + v3dv_return_if_oom(NULL, job); + + struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); + + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + + emit_linear_load(cl, RENDER_TARGET_0, src, src_offset, stride, format); + + cl_emit(cl, END_OF_LOADS, end); + + cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); + + emit_linear_store(cl, RENDER_TARGET_0, + dst, dst_offset, stride, false, format); + + cl_emit(cl, END_OF_TILE_MARKER, end); + + cl_emit(cl, RETURN_FROM_SUB_LIST, ret); + + cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { + branch.start = tile_list_start; + branch.end = v3dv_cl_get_address(cl); + } +} + +void +v3dX(meta_emit_copy_buffer)(struct v3dv_job *job, + struct v3dv_bo *dst, + struct v3dv_bo *src, + uint32_t dst_offset, + uint32_t src_offset, + struct v3dv_meta_framebuffer *framebuffer, + uint32_t format, + uint32_t item_size) +{ + const uint32_t stride = job->frame_tiling.width * item_size; + emit_copy_buffer_per_tile_list(job, dst, src, + dst_offset, src_offset, + stride, format); + emit_supertile_coordinates(job, framebuffer); +} + +void +v3dX(meta_emit_copy_buffer_rcl)(struct v3dv_job *job, + struct v3dv_bo *dst, + struct v3dv_bo *src, + uint32_t dst_offset, + uint32_t src_offset, + struct v3dv_meta_framebuffer *framebuffer, + uint32_t format, + uint32_t item_size) +{ + struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); + v3dv_return_if_oom(NULL, job); + + emit_frame_setup(job, 0, NULL); + + v3dX(meta_emit_copy_buffer)(job, dst, src, dst_offset, src_offset, + framebuffer, format, item_size); + + cl_emit(rcl, END_OF_RENDERING, end); +} + +static void +emit_copy_image_layer_per_tile_list(struct v3dv_job *job, + struct v3dv_meta_framebuffer *framebuffer, + struct v3dv_image *dst, + struct v3dv_image *src, + uint32_t layer_offset, + const VkImageCopy2KHR *region) +{ + struct v3dv_cl *cl = &job->indirect; + v3dv_cl_ensure_space(cl, 200, 1); + v3dv_return_if_oom(NULL, job); + + struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); + + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + + assert((src->vk.image_type != VK_IMAGE_TYPE_3D && + layer_offset < region->srcSubresource.layerCount) || + layer_offset < src->vk.extent.depth); + + const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ? + region->srcSubresource.baseArrayLayer + layer_offset : + region->srcOffset.z + layer_offset; + + emit_image_load(job->device, cl, framebuffer, src, + region->srcSubresource.aspectMask, + src_layer, + region->srcSubresource.mipLevel, + false, false); + + cl_emit(cl, END_OF_LOADS, end); + + cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); + + assert((dst->vk.image_type != VK_IMAGE_TYPE_3D && + layer_offset < region->dstSubresource.layerCount) || + layer_offset < dst->vk.extent.depth); + + const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ? + region->dstSubresource.baseArrayLayer + layer_offset : + region->dstOffset.z + layer_offset; + + emit_image_store(job->device, cl, framebuffer, dst, + region->dstSubresource.aspectMask, + dst_layer, + region->dstSubresource.mipLevel, + false, false, false); + + cl_emit(cl, END_OF_TILE_MARKER, end); + + cl_emit(cl, RETURN_FROM_SUB_LIST, ret); + + cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { + branch.start = tile_list_start; + branch.end = v3dv_cl_get_address(cl); + } +} + +static void +emit_copy_image_layer(struct v3dv_job *job, + struct v3dv_image *dst, + struct v3dv_image *src, + struct v3dv_meta_framebuffer *framebuffer, + uint32_t layer, + const VkImageCopy2KHR *region) +{ + emit_copy_image_layer_per_tile_list(job, framebuffer, dst, src, layer, region); + emit_supertile_coordinates(job, framebuffer); +} + +void +v3dX(meta_emit_copy_image_rcl)(struct v3dv_job *job, + struct v3dv_image *dst, + struct v3dv_image *src, + struct v3dv_meta_framebuffer *framebuffer, + const VkImageCopy2KHR *region) +{ + struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); + v3dv_return_if_oom(NULL, job); + + emit_frame_setup(job, 0, NULL); + for (int layer = 0; layer < job->frame_tiling.layers; layer++) + emit_copy_image_layer(job, dst, src, framebuffer, layer, region); + cl_emit(rcl, END_OF_RENDERING, end); +} + +void +v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_image *dst, + uint32_t dst_mip_level, + uint32_t dst_layer, + struct v3dv_image *src, + uint32_t src_mip_level, + uint32_t src_layer, + uint32_t width, + uint32_t height, + const struct v3dv_format *format) +{ + const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level]; + const struct v3d_resource_slice *dst_slice = &dst->slices[dst_mip_level]; + + assert(dst->mem && dst->mem->bo); + const struct v3dv_bo *dst_bo = dst->mem->bo; + + assert(src->mem && src->mem->bo); + const struct v3dv_bo *src_bo = src->mem->bo; + + struct drm_v3d_submit_tfu tfu = { + .ios = (height << 16) | width, + .bo_handles = { + dst_bo->handle, + src_bo->handle != dst_bo->handle ? src_bo->handle : 0 + }, + }; + + const uint32_t src_offset = + src_bo->offset + v3dv_layer_offset(src, src_mip_level, src_layer); + tfu.iia |= src_offset; + + uint32_t icfg; + if (src_slice->tiling == V3D_TILING_RASTER) { + icfg = V3D_TFU_ICFG_FORMAT_RASTER; + } else { + icfg = V3D_TFU_ICFG_FORMAT_LINEARTILE + + (src_slice->tiling - V3D_TILING_LINEARTILE); + } + tfu.icfg |= icfg << V3D_TFU_ICFG_FORMAT_SHIFT; + + const uint32_t dst_offset = + dst_bo->offset + v3dv_layer_offset(dst, dst_mip_level, dst_layer); + tfu.ioa |= dst_offset; + + tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE + + (dst_slice->tiling - V3D_TILING_LINEARTILE)) << + V3D_TFU_IOA_FORMAT_SHIFT; + tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT; + + switch (src_slice->tiling) { + case V3D_TILING_UIF_NO_XOR: + case V3D_TILING_UIF_XOR: + tfu.iis |= src_slice->padded_height / (2 * v3d_utile_height(src->cpp)); + break; + case V3D_TILING_RASTER: + tfu.iis |= src_slice->stride / src->cpp; + break; + default: + break; + } + + /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the + * OPAD field for the destination (how many extra UIF blocks beyond + * those necessary to cover the height). + */ + if (dst_slice->tiling == V3D_TILING_UIF_NO_XOR || + dst_slice->tiling == V3D_TILING_UIF_XOR) { + uint32_t uif_block_h = 2 * v3d_utile_height(dst->cpp); + uint32_t implicit_padded_height = align(height, uif_block_h); + uint32_t icfg = + (dst_slice->padded_height - implicit_padded_height) / uif_block_h; + tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT; + } + + v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu); +} + +static void +emit_clear_image_layer_per_tile_list(struct v3dv_job *job, + struct v3dv_meta_framebuffer *framebuffer, + struct v3dv_image *image, + VkImageAspectFlags aspects, + uint32_t layer, + uint32_t level) +{ + struct v3dv_cl *cl = &job->indirect; + v3dv_cl_ensure_space(cl, 200, 1); + v3dv_return_if_oom(NULL, job); + + struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); + + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + + cl_emit(cl, END_OF_LOADS, end); + + cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); + + emit_image_store(job->device, cl, framebuffer, image, aspects, + layer, level, false, false, false); + + cl_emit(cl, END_OF_TILE_MARKER, end); + + cl_emit(cl, RETURN_FROM_SUB_LIST, ret); + + cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { + branch.start = tile_list_start; + branch.end = v3dv_cl_get_address(cl); + } +} + +static void +emit_clear_image_layers(struct v3dv_job *job, + struct v3dv_image *image, + struct v3dv_meta_framebuffer *framebuffer, + VkImageAspectFlags aspects, + uint32_t min_layer, + uint32_t max_layer, + uint32_t level) +{ + for (uint32_t layer = min_layer; layer < max_layer; layer++) { + emit_clear_image_layer_per_tile_list(job, framebuffer, image, aspects, + layer, level); + emit_supertile_coordinates(job, framebuffer); + } +} + +void +v3dX(meta_emit_clear_image_rcl)(struct v3dv_job *job, + struct v3dv_image *image, + struct v3dv_meta_framebuffer *framebuffer, + const union v3dv_clear_value *clear_value, + VkImageAspectFlags aspects, + uint32_t min_layer, + uint32_t max_layer, + uint32_t level) +{ + const struct rcl_clear_info clear_info = { + .clear_value = clear_value, + .image = image, + .aspects = aspects, + .level = level, + }; + + struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info); + v3dv_return_if_oom(NULL, job); + + emit_frame_setup(job, 0, clear_value); + emit_clear_image_layers(job, image, framebuffer, aspects, + min_layer, max_layer, level); + cl_emit(rcl, END_OF_RENDERING, end); +} + +static void +emit_fill_buffer_per_tile_list(struct v3dv_job *job, + struct v3dv_bo *bo, + uint32_t offset, + uint32_t stride) +{ + struct v3dv_cl *cl = &job->indirect; + v3dv_cl_ensure_space(cl, 200, 1); + v3dv_return_if_oom(NULL, job); + + struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); + + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + + cl_emit(cl, END_OF_LOADS, end); + + cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); + + emit_linear_store(cl, RENDER_TARGET_0, bo, offset, stride, false, + V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI); + + cl_emit(cl, END_OF_TILE_MARKER, end); + + cl_emit(cl, RETURN_FROM_SUB_LIST, ret); + + cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { + branch.start = tile_list_start; + branch.end = v3dv_cl_get_address(cl); + } +} + +static void +emit_fill_buffer(struct v3dv_job *job, + struct v3dv_bo *bo, + uint32_t offset, + struct v3dv_meta_framebuffer *framebuffer) +{ + const uint32_t stride = job->frame_tiling.width * 4; + emit_fill_buffer_per_tile_list(job, bo, offset, stride); + emit_supertile_coordinates(job, framebuffer); +} + +void +v3dX(meta_emit_fill_buffer_rcl)(struct v3dv_job *job, + struct v3dv_bo *bo, + uint32_t offset, + struct v3dv_meta_framebuffer *framebuffer, + uint32_t data) +{ + const union v3dv_clear_value clear_value = { + .color = { data, 0, 0, 0 }, + }; + + const struct rcl_clear_info clear_info = { + .clear_value = &clear_value, + .image = NULL, + .aspects = VK_IMAGE_ASPECT_COLOR_BIT, + .level = 0, + }; + + struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info); + v3dv_return_if_oom(NULL, job); + + emit_frame_setup(job, 0, &clear_value); + emit_fill_buffer(job, bo, offset, framebuffer); + cl_emit(rcl, END_OF_RENDERING, end); +} + + +static void +emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job, + struct v3dv_meta_framebuffer *framebuffer, + struct v3dv_image *image, + struct v3dv_buffer *buffer, + uint32_t layer, + const VkBufferImageCopy2KHR *region) +{ + struct v3dv_cl *cl = &job->indirect; + v3dv_cl_ensure_space(cl, 200, 1); + v3dv_return_if_oom(NULL, job); + + struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); + + cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); + + const VkImageSubresourceLayers *imgrsc = ®ion->imageSubresource; + assert((image->vk.image_type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) || + layer < image->vk.extent.depth); + + /* Load TLB from buffer */ + uint32_t width, height; + if (region->bufferRowLength == 0) + width = region->imageExtent.width; + else + width = region->bufferRowLength; + + if (region->bufferImageHeight == 0) + height = region->imageExtent.height; + else + height = region->bufferImageHeight; + + /* Handle copy to compressed format using a compatible format */ + width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format)); + height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format)); + + uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ? + 1 : image->cpp; + uint32_t buffer_stride = width * cpp; + uint32_t buffer_offset = + buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer; + + uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask, + false, false, true); + + emit_linear_load(cl, RENDER_TARGET_0, buffer->mem->bo, + buffer_offset, buffer_stride, format); + + /* Because we can't do raster loads/stores of Z/S formats we need to + * use a color tile buffer with a compatible RGBA color format instead. + * However, when we are uploading a single aspect to a combined + * depth/stencil image we have the problem that our tile buffer stores don't + * allow us to mask out the other aspect, so we always write all four RGBA + * channels to the image and we end up overwriting that other aspect with + * undefined values. To work around that, we first load the aspect we are + * not copying from the image memory into a proper Z/S tile buffer. Then we + * do our store from the color buffer for the aspect we are copying, and + * after that, we do another store from the Z/S tile buffer to restore the + * other aspect to its original value. + */ + if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { + if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { + emit_image_load(job->device, cl, framebuffer, image, + VK_IMAGE_ASPECT_STENCIL_BIT, + imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, + false, false); + } else { + assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT); + emit_image_load(job->device, cl, framebuffer, image, + VK_IMAGE_ASPECT_DEPTH_BIT, + imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, + false, false); + } + } + + cl_emit(cl, END_OF_LOADS, end); + + cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); + + /* Store TLB to image */ + emit_image_store(job->device, cl, framebuffer, image, imgrsc->aspectMask, + imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, + false, true, false); + + if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) { + if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { + emit_image_store(job->device, cl, framebuffer, image, + VK_IMAGE_ASPECT_STENCIL_BIT, + imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, + false, false, false); + } else { + assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT); + emit_image_store(job->device, cl, framebuffer, image, + VK_IMAGE_ASPECT_DEPTH_BIT, + imgrsc->baseArrayLayer + layer, imgrsc->mipLevel, + false, false, false); + } + } + + cl_emit(cl, END_OF_TILE_MARKER, end); + + cl_emit(cl, RETURN_FROM_SUB_LIST, ret); + + cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { + branch.start = tile_list_start; + branch.end = v3dv_cl_get_address(cl); + } +} + +static void +emit_copy_buffer_to_layer(struct v3dv_job *job, + struct v3dv_image *image, + struct v3dv_buffer *buffer, + struct v3dv_meta_framebuffer *framebuffer, + uint32_t layer, + const VkBufferImageCopy2KHR *region) +{ + emit_copy_buffer_to_layer_per_tile_list(job, framebuffer, image, buffer, + layer, region); + emit_supertile_coordinates(job, framebuffer); +} + +void +v3dX(meta_emit_copy_buffer_to_image_rcl)(struct v3dv_job *job, + struct v3dv_image *image, + struct v3dv_buffer *buffer, + struct v3dv_meta_framebuffer *framebuffer, + const VkBufferImageCopy2KHR *region) +{ + struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL); + v3dv_return_if_oom(NULL, job); + + emit_frame_setup(job, 0, NULL); + for (int layer = 0; layer < job->frame_tiling.layers; layer++) + emit_copy_buffer_to_layer(job, image, buffer, framebuffer, layer, region); + cl_emit(rcl, END_OF_RENDERING, end); +} + +/* Figure out a TLB size configuration for a number of pixels to process. + * Beware that we can't "render" more than 4096x4096 pixels in a single job, + * if the pixel count is larger than this, the caller might need to split + * the job and call this function multiple times. + */ +static void +framebuffer_size_for_pixel_count(uint32_t num_pixels, + uint32_t *width, + uint32_t *height) +{ + assert(num_pixels > 0); + + const uint32_t max_dim_pixels = 4096; + const uint32_t max_pixels = max_dim_pixels * max_dim_pixels; + + uint32_t w, h; + if (num_pixels > max_pixels) { + w = max_dim_pixels; + h = max_dim_pixels; + } else { + w = num_pixels; + h = 1; + while (w > max_dim_pixels || ((w % 2) == 0 && w > 2 * h)) { + w >>= 1; + h <<= 1; + } + } + assert(w <= max_dim_pixels && h <= max_dim_pixels); + assert(w * h <= num_pixels); + assert(w > 0 && h > 0); + + *width = w; + *height = h; +} + +struct v3dv_job * +v3dX(meta_copy_buffer)(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_bo *dst, + uint32_t dst_offset, + struct v3dv_bo *src, + uint32_t src_offset, + const VkBufferCopy2KHR *region) +{ + const uint32_t internal_bpp = V3D_INTERNAL_BPP_32; + const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI; + + /* Select appropriate pixel format for the copy operation based on the + * size to copy and the alignment of the source and destination offsets. + */ + src_offset += region->srcOffset; + dst_offset += region->dstOffset; + uint32_t item_size = 4; + while (item_size > 1 && + (src_offset % item_size != 0 || dst_offset % item_size != 0)) { + item_size /= 2; + } + + while (item_size > 1 && region->size % item_size != 0) + item_size /= 2; + + assert(region->size % item_size == 0); + uint32_t num_items = region->size / item_size; + assert(num_items > 0); + + uint32_t format; + VkFormat vk_format; + switch (item_size) { + case 4: + format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI; + vk_format = VK_FORMAT_R8G8B8A8_UINT; + break; + case 2: + format = V3D_OUTPUT_IMAGE_FORMAT_RG8UI; + vk_format = VK_FORMAT_R8G8_UINT; + break; + default: + format = V3D_OUTPUT_IMAGE_FORMAT_R8UI; + vk_format = VK_FORMAT_R8_UINT; + break; + } + + struct v3dv_job *job = NULL; + while (num_items > 0) { + job = v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); + if (!job) + return NULL; + + uint32_t width, height; + framebuffer_size_for_pixel_count(num_items, &width, &height); + + v3dv_job_start_frame(job, width, height, 1, true, 1, internal_bpp, false); + + struct v3dv_meta_framebuffer framebuffer; + v3dX(meta_framebuffer_init)(&framebuffer, vk_format, internal_type, + &job->frame_tiling); + + v3dX(job_emit_binning_flush)(job); + + v3dX(meta_emit_copy_buffer_rcl)(job, dst, src, dst_offset, src_offset, + &framebuffer, format, item_size); + + v3dv_cmd_buffer_finish_job(cmd_buffer); + + const uint32_t items_copied = width * height; + const uint32_t bytes_copied = items_copied * item_size; + num_items -= items_copied; + src_offset += bytes_copied; + dst_offset += bytes_copied; + } + + return job; +} + +void +v3dX(meta_fill_buffer)(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_bo *bo, + uint32_t offset, + uint32_t size, + uint32_t data) +{ + assert(size > 0 && size % 4 == 0); + assert(offset + size <= bo->size); + + const uint32_t internal_bpp = V3D_INTERNAL_BPP_32; + const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI; + uint32_t num_items = size / 4; + + while (num_items > 0) { + struct v3dv_job *job = + v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL); + if (!job) + return; + + uint32_t width, height; + framebuffer_size_for_pixel_count(num_items, &width, &height); + + v3dv_job_start_frame(job, width, height, 1, true, 1, internal_bpp, false); + + struct v3dv_meta_framebuffer framebuffer; + v3dX(meta_framebuffer_init)(&framebuffer, VK_FORMAT_R8G8B8A8_UINT, + internal_type, &job->frame_tiling); + + v3dX(job_emit_binning_flush)(job); + + v3dX(meta_emit_fill_buffer_rcl)(job, bo, offset, &framebuffer, data); + + v3dv_cmd_buffer_finish_job(cmd_buffer); + + const uint32_t items_copied = width * height; + const uint32_t bytes_copied = items_copied * 4; + num_items -= items_copied; + offset += bytes_copied; + } +} + +void +v3dX(meta_framebuffer_init)(struct v3dv_meta_framebuffer *fb, + VkFormat vk_format, + uint32_t internal_type, + const struct v3dv_frame_tiling *tiling) +{ + fb->internal_type = internal_type; + + /* Supertile coverage always starts at 0,0 */ + uint32_t supertile_w_in_pixels = + tiling->tile_width * tiling->supertile_width; + uint32_t supertile_h_in_pixels = + tiling->tile_height * tiling->supertile_height; + + fb->min_x_supertile = 0; + fb->min_y_supertile = 0; + fb->max_x_supertile = (tiling->width - 1) / supertile_w_in_pixels; + fb->max_y_supertile = (tiling->height - 1) / supertile_h_in_pixels; + + fb->vk_format = vk_format; + fb->format = v3dX(get_format)(vk_format); + + fb->internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F; + if (vk_format_is_depth_or_stencil(vk_format)) + fb->internal_depth_type = v3dX(get_internal_depth_type)(vk_format); +} diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_pipeline.c b/lib/mesa/src/broadcom/vulkan/v3dvx_pipeline.c new file mode 100644 index 000000000..8623a4537 --- /dev/null +++ b/lib/mesa/src/broadcom/vulkan/v3dvx_pipeline.c @@ -0,0 +1,654 @@ +/* + * Copyright © 2021 Raspberry Pi + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "v3dv_private.h" +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" +#include "broadcom/compiler/v3d_compiler.h" + +#include "vk_format_info.h" + +static uint8_t +blend_factor(VkBlendFactor factor, bool dst_alpha_one, bool *needs_constants) +{ + switch (factor) { + case VK_BLEND_FACTOR_ZERO: + case VK_BLEND_FACTOR_ONE: + case VK_BLEND_FACTOR_SRC_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: + case VK_BLEND_FACTOR_DST_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: + case VK_BLEND_FACTOR_SRC_ALPHA: + case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: + case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: + return factor; + case VK_BLEND_FACTOR_CONSTANT_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: + case VK_BLEND_FACTOR_CONSTANT_ALPHA: + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: + *needs_constants = true; + return factor; + case VK_BLEND_FACTOR_DST_ALPHA: + return dst_alpha_one ? V3D_BLEND_FACTOR_ONE : + V3D_BLEND_FACTOR_DST_ALPHA; + case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: + return dst_alpha_one ? V3D_BLEND_FACTOR_ZERO : + V3D_BLEND_FACTOR_INV_DST_ALPHA; + case VK_BLEND_FACTOR_SRC1_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: + case VK_BLEND_FACTOR_SRC1_ALPHA: + case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: + assert(!"Invalid blend factor: dual source blending not supported."); + default: + assert(!"Unknown blend factor."); + } + + /* Should be handled by the switch, added to avoid a "end of non-void + * function" error + */ + unreachable("Unknown blend factor."); +} + +static void +pack_blend(struct v3dv_pipeline *pipeline, + const VkPipelineColorBlendStateCreateInfo *cb_info) +{ + /* By default, we are not enabling blending and all color channel writes are + * enabled. Color write enables are independent of whether blending is + * enabled or not. + * + * Vulkan specifies color write masks so that bits set correspond to + * enabled channels. Our hardware does it the other way around. + */ + pipeline->blend.enables = 0; + pipeline->blend.color_write_masks = 0; /* All channels enabled */ + + if (!cb_info) + return; + + assert(pipeline->subpass); + if (pipeline->subpass->color_count == 0) + return; + + assert(pipeline->subpass->color_count == cb_info->attachmentCount); + + pipeline->blend.needs_color_constants = false; + uint32_t color_write_masks = 0; + for (uint32_t i = 0; i < pipeline->subpass->color_count; i++) { + const VkPipelineColorBlendAttachmentState *b_state = + &cb_info->pAttachments[i]; + + uint32_t attachment_idx = + pipeline->subpass->color_attachments[i].attachment; + if (attachment_idx == VK_ATTACHMENT_UNUSED) + continue; + + color_write_masks |= (~b_state->colorWriteMask & 0xf) << (4 * i); + + if (!b_state->blendEnable) + continue; + + VkAttachmentDescription *desc = + &pipeline->pass->attachments[attachment_idx].desc; + const struct v3dv_format *format = v3dX(get_format)(desc->format); + bool dst_alpha_one = (format->swizzle[3] == PIPE_SWIZZLE_1); + + uint8_t rt_mask = 1 << i; + pipeline->blend.enables |= rt_mask; + + v3dvx_pack(pipeline->blend.cfg[i], BLEND_CFG, config) { + config.render_target_mask = rt_mask; + + config.color_blend_mode = b_state->colorBlendOp; + config.color_blend_dst_factor = + blend_factor(b_state->dstColorBlendFactor, dst_alpha_one, + &pipeline->blend.needs_color_constants); + config.color_blend_src_factor = + blend_factor(b_state->srcColorBlendFactor, dst_alpha_one, + &pipeline->blend.needs_color_constants); + + config.alpha_blend_mode = b_state->alphaBlendOp; + config.alpha_blend_dst_factor = + blend_factor(b_state->dstAlphaBlendFactor, dst_alpha_one, + &pipeline->blend.needs_color_constants); + config.alpha_blend_src_factor = + blend_factor(b_state->srcAlphaBlendFactor, dst_alpha_one, + &pipeline->blend.needs_color_constants); + } + } + + pipeline->blend.color_write_masks = color_write_masks; +} + +/* This requires that pack_blend() had been called before so we can set + * the overall blend enable bit in the CFG_BITS packet. + */ +static void +pack_cfg_bits(struct v3dv_pipeline *pipeline, + const VkPipelineDepthStencilStateCreateInfo *ds_info, + const VkPipelineRasterizationStateCreateInfo *rs_info, + const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info, + const VkPipelineMultisampleStateCreateInfo *ms_info) +{ + assert(sizeof(pipeline->cfg_bits) == cl_packet_length(CFG_BITS)); + + pipeline->msaa = + ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT; + + v3dvx_pack(pipeline->cfg_bits, CFG_BITS, config) { + config.enable_forward_facing_primitive = + rs_info ? !(rs_info->cullMode & VK_CULL_MODE_FRONT_BIT) : false; + + config.enable_reverse_facing_primitive = + rs_info ? !(rs_info->cullMode & VK_CULL_MODE_BACK_BIT) : false; + + /* Seems like the hardware is backwards regarding this setting... */ + config.clockwise_primitives = + rs_info ? rs_info->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE : false; + + config.enable_depth_offset = rs_info ? rs_info->depthBiasEnable: false; + + /* This is required to pass line rasterization tests in CTS while + * exposing, at least, a minimum of 4-bits of subpixel precision + * (the minimum requirement). + */ + config.line_rasterization = 1; /* perp end caps */ + + if (rs_info && rs_info->polygonMode != VK_POLYGON_MODE_FILL) { + config.direct3d_wireframe_triangles_mode = true; + config.direct3d_point_fill_mode = + rs_info->polygonMode == VK_POLYGON_MODE_POINT; + } + + config.rasterizer_oversample_mode = pipeline->msaa ? 1 : 0; + + /* From the Vulkan spec: + * + * "Provoking Vertex: + * + * The vertex in a primitive from which flat shaded attribute + * values are taken. This is generally the “first” vertex in the + * primitive, and depends on the primitive topology." + * + * First vertex is the Direct3D style for provoking vertex. OpenGL uses + * the last vertex by default. + */ + if (pv_info) { + config.direct3d_provoking_vertex = + pv_info->provokingVertexMode == + VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT; + } else { + config.direct3d_provoking_vertex = true; + } + + config.blend_enable = pipeline->blend.enables != 0; + + /* Disable depth/stencil if we don't have a D/S attachment */ + bool has_ds_attachment = + pipeline->subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED; + + if (ds_info && ds_info->depthTestEnable && has_ds_attachment) { + config.z_updates_enable = ds_info->depthWriteEnable; + config.depth_test_function = ds_info->depthCompareOp; + } else { + config.depth_test_function = VK_COMPARE_OP_ALWAYS; + } + + /* EZ state will be updated at draw time based on bound pipeline state */ + config.early_z_updates_enable = false; + config.early_z_enable = false; + + config.stencil_enable = + ds_info ? ds_info->stencilTestEnable && has_ds_attachment: false; + + pipeline->z_updates_enable = config.z_updates_enable; + }; +} + +static uint32_t +translate_stencil_op(enum pipe_stencil_op op) +{ + switch (op) { + case VK_STENCIL_OP_KEEP: + return V3D_STENCIL_OP_KEEP; + case VK_STENCIL_OP_ZERO: + return V3D_STENCIL_OP_ZERO; + case VK_STENCIL_OP_REPLACE: + return V3D_STENCIL_OP_REPLACE; + case VK_STENCIL_OP_INCREMENT_AND_CLAMP: + return V3D_STENCIL_OP_INCR; + case VK_STENCIL_OP_DECREMENT_AND_CLAMP: + return V3D_STENCIL_OP_DECR; + case VK_STENCIL_OP_INVERT: + return V3D_STENCIL_OP_INVERT; + case VK_STENCIL_OP_INCREMENT_AND_WRAP: + return V3D_STENCIL_OP_INCWRAP; + case VK_STENCIL_OP_DECREMENT_AND_WRAP: + return V3D_STENCIL_OP_DECWRAP; + default: + unreachable("bad stencil op"); + } +} + +static void +pack_single_stencil_cfg(struct v3dv_pipeline *pipeline, + uint8_t *stencil_cfg, + bool is_front, + bool is_back, + const VkStencilOpState *stencil_state) +{ + /* From the Vulkan spec: + * + * "Reference is an integer reference value that is used in the unsigned + * stencil comparison. The reference value used by stencil comparison + * must be within the range [0,2^s-1] , where s is the number of bits in + * the stencil framebuffer attachment, otherwise the reference value is + * considered undefined." + * + * In our case, 's' is always 8, so we clamp to that to prevent our packing + * functions to assert in debug mode if they see larger values. + * + * If we have dynamic state we need to make sure we set the corresponding + * state bits to 0, since cl_emit_with_prepacked ORs the new value with + * the old. + */ + const uint8_t write_mask = + pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK ? + 0 : stencil_state->writeMask & 0xff; + + const uint8_t compare_mask = + pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ? + 0 : stencil_state->compareMask & 0xff; + + const uint8_t reference = + pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ? + 0 : stencil_state->reference & 0xff; + + v3dvx_pack(stencil_cfg, STENCIL_CFG, config) { + config.front_config = is_front; + config.back_config = is_back; + config.stencil_write_mask = write_mask; + config.stencil_test_mask = compare_mask; + config.stencil_test_function = stencil_state->compareOp; + config.stencil_pass_op = translate_stencil_op(stencil_state->passOp); + config.depth_test_fail_op = translate_stencil_op(stencil_state->depthFailOp); + config.stencil_test_fail_op = translate_stencil_op(stencil_state->failOp); + config.stencil_ref_value = reference; + } +} + +static void +pack_stencil_cfg(struct v3dv_pipeline *pipeline, + const VkPipelineDepthStencilStateCreateInfo *ds_info) +{ + assert(sizeof(pipeline->stencil_cfg) == 2 * cl_packet_length(STENCIL_CFG)); + + if (!ds_info || !ds_info->stencilTestEnable) + return; + + if (pipeline->subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED) + return; + + const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK | + V3DV_DYNAMIC_STENCIL_WRITE_MASK | + V3DV_DYNAMIC_STENCIL_REFERENCE; + + + /* If front != back or we have dynamic stencil state we can't emit a single + * packet for both faces. + */ + bool needs_front_and_back = false; + if ((pipeline->dynamic_state.mask & dynamic_stencil_states) || + memcmp(&ds_info->front, &ds_info->back, sizeof(ds_info->front))) + needs_front_and_back = true; + + /* If the front and back configurations are the same we can emit both with + * a single packet. + */ + pipeline->emit_stencil_cfg[0] = true; + if (!needs_front_and_back) { + pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0], + true, true, &ds_info->front); + } else { + pipeline->emit_stencil_cfg[1] = true; + pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0], + true, false, &ds_info->front); + pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[1], + false, true, &ds_info->back); + } +} + +void +v3dX(pipeline_pack_state)(struct v3dv_pipeline *pipeline, + const VkPipelineColorBlendStateCreateInfo *cb_info, + const VkPipelineDepthStencilStateCreateInfo *ds_info, + const VkPipelineRasterizationStateCreateInfo *rs_info, + const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info, + const VkPipelineMultisampleStateCreateInfo *ms_info) +{ + pack_blend(pipeline, cb_info); + pack_cfg_bits(pipeline, ds_info, rs_info, pv_info, ms_info); + pack_stencil_cfg(pipeline, ds_info); +} + +static void +pack_shader_state_record(struct v3dv_pipeline *pipeline) +{ + assert(sizeof(pipeline->shader_state_record) == + cl_packet_length(GL_SHADER_STATE_RECORD)); + + struct v3d_fs_prog_data *prog_data_fs = + pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs; + + struct v3d_vs_prog_data *prog_data_vs = + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs; + + struct v3d_vs_prog_data *prog_data_vs_bin = + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs; + + + /* Note: we are not packing addresses, as we need the job (see + * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this + * point as they depend on dynamic info that can be set after create the + * pipeline (like viewport), . Would need to be filled later, so we are + * doing a partial prepacking. + */ + v3dvx_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD, shader) { + shader.enable_clipping = true; + + if (!pipeline->has_gs) { + shader.point_size_in_shaded_vertex_data = + pipeline->topology == PIPE_PRIM_POINTS; + } else { + struct v3d_gs_prog_data *prog_data_gs = + pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]->prog_data.gs; + shader.point_size_in_shaded_vertex_data = prog_data_gs->writes_psiz; + } + + /* Must be set if the shader modifies Z, discards, or modifies + * the sample mask. For any of these cases, the fragment + * shader needs to write the Z value (even just discards). + */ + shader.fragment_shader_does_z_writes = prog_data_fs->writes_z; + /* Set if the EZ test must be disabled (due to shader side + * effects and the early_z flag not being present in the + * shader). + */ + shader.turn_off_early_z_test = prog_data_fs->disable_ez; + + shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 = + prog_data_fs->uses_center_w; + + /* The description for gl_SampleID states that if a fragment shader reads + * it, then we should automatically activate per-sample shading. However, + * the Vulkan spec also states that if a framebuffer has no attachments: + * + * "The subpass continues to use the width, height, and layers of the + * framebuffer to define the dimensions of the rendering area, and the + * rasterizationSamples from each pipeline’s + * VkPipelineMultisampleStateCreateInfo to define the number of + * samples used in rasterization multisample rasterization." + * + * So in this scenario, if the pipeline doesn't enable multiple samples + * but the fragment shader accesses gl_SampleID we would be requested + * to do per-sample shading in single sample rasterization mode, which + * is pointless, so just disable it in that case. + */ + shader.enable_sample_rate_shading = + pipeline->sample_rate_shading || + (pipeline->msaa && prog_data_fs->force_per_sample_msaa); + + shader.any_shader_reads_hardware_written_primitive_id = false; + + shader.do_scoreboard_wait_on_first_thread_switch = + prog_data_fs->lock_scoreboard_on_first_thrsw; + shader.disable_implicit_point_line_varyings = + !prog_data_fs->uses_implicit_point_line_varyings; + + shader.number_of_varyings_in_fragment_shader = + prog_data_fs->num_inputs; + + shader.coordinate_shader_propagate_nans = true; + shader.vertex_shader_propagate_nans = true; + shader.fragment_shader_propagate_nans = true; + + /* Note: see previous note about adresses */ + /* shader.coordinate_shader_code_address */ + /* shader.vertex_shader_code_address */ + /* shader.fragment_shader_code_address */ + + /* FIXME: Use combined input/output size flag in the common case (also + * on v3d, see v3dx_draw). + */ + shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = + prog_data_vs_bin->separate_segments; + shader.vertex_shader_has_separate_input_and_output_vpm_blocks = + prog_data_vs->separate_segments; + + shader.coordinate_shader_input_vpm_segment_size = + prog_data_vs_bin->separate_segments ? + prog_data_vs_bin->vpm_input_size : 1; + shader.vertex_shader_input_vpm_segment_size = + prog_data_vs->separate_segments ? + prog_data_vs->vpm_input_size : 1; + + shader.coordinate_shader_output_vpm_segment_size = + prog_data_vs_bin->vpm_output_size; + shader.vertex_shader_output_vpm_segment_size = + prog_data_vs->vpm_output_size; + + /* Note: see previous note about adresses */ + /* shader.coordinate_shader_uniforms_address */ + /* shader.vertex_shader_uniforms_address */ + /* shader.fragment_shader_uniforms_address */ + + shader.min_coord_shader_input_segments_required_in_play = + pipeline->vpm_cfg_bin.As; + shader.min_vertex_shader_input_segments_required_in_play = + pipeline->vpm_cfg.As; + + shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size = + pipeline->vpm_cfg_bin.Ve; + shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size = + pipeline->vpm_cfg.Ve; + + shader.coordinate_shader_4_way_threadable = + prog_data_vs_bin->base.threads == 4; + shader.vertex_shader_4_way_threadable = + prog_data_vs->base.threads == 4; + shader.fragment_shader_4_way_threadable = + prog_data_fs->base.threads == 4; + + shader.coordinate_shader_start_in_final_thread_section = + prog_data_vs_bin->base.single_seg; + shader.vertex_shader_start_in_final_thread_section = + prog_data_vs->base.single_seg; + shader.fragment_shader_start_in_final_thread_section = + prog_data_fs->base.single_seg; + + shader.vertex_id_read_by_coordinate_shader = + prog_data_vs_bin->uses_vid; + shader.base_instance_id_read_by_coordinate_shader = + prog_data_vs_bin->uses_biid; + shader.instance_id_read_by_coordinate_shader = + prog_data_vs_bin->uses_iid; + shader.vertex_id_read_by_vertex_shader = + prog_data_vs->uses_vid; + shader.base_instance_id_read_by_vertex_shader = + prog_data_vs->uses_biid; + shader.instance_id_read_by_vertex_shader = + prog_data_vs->uses_iid; + + /* Note: see previous note about adresses */ + /* shader.address_of_default_attribute_values */ + } +} + +static void +pack_vcm_cache_size(struct v3dv_pipeline *pipeline) +{ + assert(sizeof(pipeline->vcm_cache_size) == + cl_packet_length(VCM_CACHE_SIZE)); + + v3dvx_pack(pipeline->vcm_cache_size, VCM_CACHE_SIZE, vcm) { + vcm.number_of_16_vertex_batches_for_binning = pipeline->vpm_cfg_bin.Vc; + vcm.number_of_16_vertex_batches_for_rendering = pipeline->vpm_cfg.Vc; + } +} + +/* As defined on the GL_SHADER_STATE_ATTRIBUTE_RECORD */ +static uint8_t +get_attr_type(const struct util_format_description *desc) +{ + uint32_t r_size = desc->channel[0].size; + uint8_t attr_type = ATTRIBUTE_FLOAT; + + switch (desc->channel[0].type) { + case UTIL_FORMAT_TYPE_FLOAT: + if (r_size == 32) { + attr_type = ATTRIBUTE_FLOAT; + } else { + assert(r_size == 16); + attr_type = ATTRIBUTE_HALF_FLOAT; + } + break; + + case UTIL_FORMAT_TYPE_SIGNED: + case UTIL_FORMAT_TYPE_UNSIGNED: + switch (r_size) { + case 32: + attr_type = ATTRIBUTE_INT; + break; + case 16: + attr_type = ATTRIBUTE_SHORT; + break; + case 10: + attr_type = ATTRIBUTE_INT2_10_10_10; + break; + case 8: + attr_type = ATTRIBUTE_BYTE; + break; + default: + fprintf(stderr, + "format %s unsupported\n", + desc->name); + attr_type = ATTRIBUTE_BYTE; + abort(); + } + break; + + default: + fprintf(stderr, + "format %s unsupported\n", + desc->name); + abort(); + } + + return attr_type; +} + +static void +pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline, + uint32_t index, + const VkVertexInputAttributeDescription *vi_desc) +{ + const uint32_t packet_length = + cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD); + + const struct util_format_description *desc = + vk_format_description(vi_desc->format); + + uint32_t binding = vi_desc->binding; + + v3dvx_pack(&pipeline->vertex_attrs[index * packet_length], + GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) { + + /* vec_size == 0 means 4 */ + attr.vec_size = desc->nr_channels & 3; + attr.signed_int_type = (desc->channel[0].type == + UTIL_FORMAT_TYPE_SIGNED); + attr.normalized_int_type = desc->channel[0].normalized; + attr.read_as_int_uint = desc->channel[0].pure_integer; + + attr.instance_divisor = MIN2(pipeline->vb[binding].instance_divisor, + 0xffff); + attr.stride = pipeline->vb[binding].stride; + attr.type = get_attr_type(desc); + } +} + +void +v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline, + const VkPipelineVertexInputStateCreateInfo *vi_info, + const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info) +{ + pack_shader_state_record(pipeline); + pack_vcm_cache_size(pipeline); + + pipeline->vb_count = vi_info->vertexBindingDescriptionCount; + for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { + const VkVertexInputBindingDescription *desc = + &vi_info->pVertexBindingDescriptions[i]; + + pipeline->vb[desc->binding].stride = desc->stride; + pipeline->vb[desc->binding].instance_divisor = desc->inputRate; + } + + if (vd_info) { + for (uint32_t i = 0; i < vd_info->vertexBindingDivisorCount; i++) { + const VkVertexInputBindingDivisorDescriptionEXT *desc = + &vd_info->pVertexBindingDivisors[i]; + + pipeline->vb[desc->binding].instance_divisor = desc->divisor; + } + } + + pipeline->va_count = 0; + struct v3d_vs_prog_data *prog_data_vs = + pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs; + + for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *desc = + &vi_info->pVertexAttributeDescriptions[i]; + uint32_t location = desc->location + VERT_ATTRIB_GENERIC0; + + /* We use a custom driver_location_map instead of + * nir_find_variable_with_location because if we were able to get the + * shader variant from the cache, we would not have the nir shader + * available. + */ + uint32_t driver_location = + prog_data_vs->driver_location_map[location]; + + if (driver_location != -1) { + assert(driver_location < MAX_VERTEX_ATTRIBS); + pipeline->va[driver_location].offset = desc->offset; + pipeline->va[driver_location].binding = desc->binding; + pipeline->va[driver_location].vk_format = desc->format; + + pack_shader_state_attribute_record(pipeline, driver_location, desc); + + pipeline->va_count++; + } + } +} diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_private.h b/lib/mesa/src/broadcom/vulkan/v3dvx_private.h new file mode 100644 index 000000000..ab134225a --- /dev/null +++ b/lib/mesa/src/broadcom/vulkan/v3dvx_private.h @@ -0,0 +1,314 @@ +/* + * Copyright © 2021 Raspberry Pi + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* This file generates the per-v3d-version function prototypes. It must only + * be included from v3dv_private.h. + */ + +#ifndef V3DV_PRIVATE_H +#error This file is included by means other than v3dv_private.h +#endif + +/* Used at v3dv_cmd_buffer */ +void +v3dX(job_emit_binning_flush)(struct v3dv_job *job); + +void +v3dX(cmd_buffer_emit_color_write_mask)(struct v3dv_cmd_buffer *cmd_buffer); + +void +v3dX(cmd_buffer_end_render_pass_secondary)(struct v3dv_cmd_buffer *cmd_buffer); + +void +v3dX(job_emit_clip_window)(struct v3dv_job *job, const VkRect2D *rect); + +void +v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer); + +void +v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer); + +void +v3dX(cmd_buffer_emit_stencil)(struct v3dv_cmd_buffer *cmd_buffer); + +void +v3dX(cmd_buffer_emit_depth_bias)(struct v3dv_cmd_buffer *cmd_buffer); + +void +v3dX(cmd_buffer_emit_line_width)(struct v3dv_cmd_buffer *cmd_buffer); + +void +v3dX(cmd_buffer_emit_sample_state)(struct v3dv_cmd_buffer *cmd_buffer); + +void +v3dX(cmd_buffer_emit_blend)(struct v3dv_cmd_buffer *cmd_buffer); + +void +v3dX(cmd_buffer_emit_varyings_state)(struct v3dv_cmd_buffer *cmd_buffer); + +void +v3dX(cmd_buffer_emit_configuration_bits)(struct v3dv_cmd_buffer *cmd_buffer); + +void +v3dX(job_emit_binning_prolog)(struct v3dv_job *job, + const struct v3dv_frame_tiling *tiling, + uint32_t layers); + +void +v3dX(cmd_buffer_execute_inside_pass)(struct v3dv_cmd_buffer *primary, + uint32_t cmd_buffer_count, + const VkCommandBuffer *cmd_buffers); + +void +v3dX(cmd_buffer_emit_occlusion_query)(struct v3dv_cmd_buffer *cmd_buffer); + +void +v3dX(cmd_buffer_emit_gl_shader_state)(struct v3dv_cmd_buffer *cmd_buffer); + + +void +v3dX(cmd_buffer_emit_draw)(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_draw_info *info); + + +void +v3dX(cmd_buffer_emit_index_buffer)(struct v3dv_cmd_buffer *cmd_buffer); + +void +v3dX(cmd_buffer_emit_draw_indexed)(struct v3dv_cmd_buffer *cmd_buffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance); + +void +v3dX(cmd_buffer_emit_draw_indirect)(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_buffer *buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride); + +void +v3dX(cmd_buffer_emit_indexed_indirect)(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_buffer *buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride); + +void +v3dX(get_hw_clear_color)(const VkClearColorValue *color, + uint32_t internal_type, + uint32_t internal_size, + uint32_t *hw_color); + +void +v3dX(cmd_buffer_render_pass_setup_render_target)(struct v3dv_cmd_buffer *cmd_buffer, + int rt, + uint32_t *rt_bpp, + uint32_t *rt_type, + uint32_t *rt_clamp); + +/* Used at v3dv_device */ + +void +v3dX(pack_sampler_state)(struct v3dv_sampler *sampler, + const VkSamplerCreateInfo *pCreateInfo, + const VkSamplerCustomBorderColorCreateInfoEXT *bc_info); + +void +v3dX(framebuffer_compute_internal_bpp_msaa)(const struct v3dv_framebuffer *framebuffer, + const struct v3dv_subpass *subpass, + uint8_t *max_bpp, bool *msaa); + +#ifdef DEBUG +void +v3dX(device_check_prepacked_sizes)(void); +#endif + +/* Used at v3dv_format */ +const struct v3dv_format * +v3dX(get_format)(VkFormat); + +void +v3dX(get_internal_type_bpp_for_output_format)(uint32_t format, + uint32_t *type, + uint32_t *bpp); + +bool +v3dX(format_supports_tlb_resolve)(const struct v3dv_format *format); + +bool +v3dX(format_supports_blending)(const struct v3dv_format *format); + +bool +v3dX(tfu_supports_tex_format)(uint32_t tex_format); + +/* Used at v3dv_image */ + +void +v3dX(pack_texture_shader_state)(struct v3dv_device *device, + struct v3dv_image_view *iview); + +void +v3dX(pack_texture_shader_state_from_buffer_view)(struct v3dv_device *device, + struct v3dv_buffer_view *buffer_view); + +/* Used at v3dv_meta_* */ + +uint32_t +v3dX(zs_buffer_from_aspect_bits)(VkImageAspectFlags aspects); + +uint8_t +v3dX(get_internal_depth_type)(VkFormat format); + +struct v3dv_meta_framebuffer; + +void +v3dX(meta_emit_copy_image_to_buffer_rcl)(struct v3dv_job *job, + struct v3dv_buffer *buffer, + struct v3dv_image *image, + struct v3dv_meta_framebuffer *framebuffer, + const VkBufferImageCopy2KHR *region); + +void +v3dX(meta_emit_resolve_image_rcl)(struct v3dv_job *job, + struct v3dv_image *dst, + struct v3dv_image *src, + struct v3dv_meta_framebuffer *framebuffer, + const VkImageResolve2KHR *region); + +void +v3dX(meta_emit_copy_buffer)(struct v3dv_job *job, + struct v3dv_bo *dst, + struct v3dv_bo *src, + uint32_t dst_offset, + uint32_t src_offset, + struct v3dv_meta_framebuffer *framebuffer, + uint32_t format, + uint32_t item_size); + +void +v3dX(meta_emit_copy_buffer_rcl)(struct v3dv_job *job, + struct v3dv_bo *dst, + struct v3dv_bo *src, + uint32_t dst_offset, + uint32_t src_offset, + struct v3dv_meta_framebuffer *framebuffer, + uint32_t format, + uint32_t item_size); + +void +v3dX(meta_emit_copy_image_rcl)(struct v3dv_job *job, + struct v3dv_image *dst, + struct v3dv_image *src, + struct v3dv_meta_framebuffer *framebuffer, + const VkImageCopy2KHR *region); + +void +v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_image *dst, + uint32_t dst_mip_level, + uint32_t dst_layer, + struct v3dv_image *src, + uint32_t src_mip_level, + uint32_t src_layer, + uint32_t width, + uint32_t height, + const struct v3dv_format *format); + +void +v3dX(meta_emit_clear_image_rcl)(struct v3dv_job *job, + struct v3dv_image *image, + struct v3dv_meta_framebuffer *framebuffer, + const union v3dv_clear_value *clear_value, + VkImageAspectFlags aspects, + uint32_t min_layer, + uint32_t max_layer, + uint32_t level); + +void +v3dX(meta_emit_fill_buffer_rcl)(struct v3dv_job *job, + struct v3dv_bo *bo, + uint32_t offset, + struct v3dv_meta_framebuffer *framebuffer, + uint32_t data); + +void +v3dX(meta_emit_copy_buffer_to_image_rcl)(struct v3dv_job *job, + struct v3dv_image *image, + struct v3dv_buffer *buffer, + struct v3dv_meta_framebuffer *framebuffer, + const VkBufferImageCopy2KHR *region); + +void +v3dX(get_internal_type_bpp_for_image_aspects)(VkFormat vk_format, + VkImageAspectFlags aspect_mask, + uint32_t *internal_type, + uint32_t *internal_bpp); + +struct v3dv_job * +v3dX(meta_copy_buffer)(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_bo *dst, + uint32_t dst_offset, + struct v3dv_bo *src, + uint32_t src_offset, + const VkBufferCopy2KHR *region); + +void +v3dX(meta_fill_buffer)(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_bo *bo, + uint32_t offset, + uint32_t size, + uint32_t data); + +void +v3dX(meta_framebuffer_init)(struct v3dv_meta_framebuffer *fb, + VkFormat vk_format, + uint32_t internal_type, + const struct v3dv_frame_tiling *tiling); + +/* Used at v3dv_pipeline */ +void +v3dX(pipeline_pack_state)(struct v3dv_pipeline *pipeline, + const VkPipelineColorBlendStateCreateInfo *cb_info, + const VkPipelineDepthStencilStateCreateInfo *ds_info, + const VkPipelineRasterizationStateCreateInfo *rs_info, + const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info, + const VkPipelineMultisampleStateCreateInfo *ms_info); +void +v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline, + const VkPipelineVertexInputStateCreateInfo *vi_info, + const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info); +/* Used at v3dv_queue */ +void +v3dX(job_emit_noop)(struct v3dv_job *job); + +/* Used at v3dv_descriptor_set, and other descriptor set utils */ +uint32_t v3dX(descriptor_bo_size)(VkDescriptorType type); + +uint32_t v3dX(max_descriptor_bo_size)(void); + +uint32_t v3dX(combined_image_sampler_texture_state_offset)(void); + +uint32_t v3dX(combined_image_sampler_sampler_state_offset)(void); diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_queue.c b/lib/mesa/src/broadcom/vulkan/v3dvx_queue.c new file mode 100644 index 000000000..38f9efbfa --- /dev/null +++ b/lib/mesa/src/broadcom/vulkan/v3dvx_queue.c @@ -0,0 +1,108 @@ +/* + * Copyright © 2021 Raspberry Pi + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "v3dv_private.h" +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" +#include "broadcom/compiler/v3d_compiler.h" + +void +v3dX(job_emit_noop)(struct v3dv_job *job) +{ + v3dv_job_start_frame(job, 1, 1, 1, true, 1, V3D_INTERNAL_BPP_32, false); + v3dX(job_emit_binning_flush)(job); + + struct v3dv_cl *rcl = &job->rcl; + v3dv_cl_ensure_space_with_branch(rcl, 200 + 1 * 256 * + cl_packet_length(SUPERTILE_COORDINATES)); + + cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) { + config.early_z_disable = true; + config.image_width_pixels = 1; + config.image_height_pixels = 1; + config.number_of_render_targets = 1; + config.multisample_mode_4x = false; + config.maximum_bpp_of_all_render_targets = V3D_INTERNAL_BPP_32; + } + + cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { + rt.render_target_0_internal_bpp = V3D_INTERNAL_BPP_32; + rt.render_target_0_internal_type = V3D_INTERNAL_TYPE_8; + rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE; + } + + cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { + clear.z_clear_value = 1.0f; + clear.stencil_clear_value = 0; + }; + + cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { + init.use_auto_chained_tile_lists = true; + init.size_of_first_block_in_chained_tile_lists = + TILE_ALLOCATION_BLOCK_SIZE_64B; + } + + cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) { + list.address = v3dv_cl_address(job->tile_alloc, 0); + } + + cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) { + config.number_of_bin_tile_lists = 1; + config.total_frame_width_in_tiles = 1; + config.total_frame_height_in_tiles = 1; + config.supertile_width_in_tiles = 1; + config.supertile_height_in_tiles = 1; + config.total_frame_width_in_supertiles = 1; + config.total_frame_height_in_supertiles = 1; + } + + struct v3dv_cl *icl = &job->indirect; + v3dv_cl_ensure_space(icl, 200, 1); + struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(icl); + + cl_emit(icl, TILE_COORDINATES_IMPLICIT, coords); + + cl_emit(icl, END_OF_LOADS, end); + + cl_emit(icl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); + + cl_emit(icl, STORE_TILE_BUFFER_GENERAL, store) { + store.buffer_to_store = NONE; + } + + cl_emit(icl, END_OF_TILE_MARKER, end); + + cl_emit(icl, RETURN_FROM_SUB_LIST, ret); + + cl_emit(rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { + branch.start = tile_list_start; + branch.end = v3dv_cl_get_address(icl); + } + + cl_emit(rcl, SUPERTILE_COORDINATES, coords) { + coords.column_number_in_supertiles = 0; + coords.row_number_in_supertiles = 0; + } + + cl_emit(rcl, END_OF_RENDERING, end); +} diff --git a/lib/mesa/src/broadcom/vulkan/vk_format_info.h b/lib/mesa/src/broadcom/vulkan/vk_format_info.h index 3490ededf..da85cb5b5 100644 --- a/lib/mesa/src/broadcom/vulkan/vk_format_info.h +++ b/lib/mesa/src/broadcom/vulkan/vk_format_info.h @@ -50,6 +50,24 @@ vk_format_is_uint(VkFormat format) } static inline bool +vk_format_is_unorm(VkFormat format) +{ + return util_format_is_unorm(vk_format_to_pipe_format(format)); +} + +static inline bool +vk_format_is_snorm(VkFormat format) +{ + return util_format_is_snorm(vk_format_to_pipe_format(format)); +} + +static inline bool +vk_format_is_float(VkFormat format) +{ + return util_format_is_float(vk_format_to_pipe_format(format)); +} + +static inline bool vk_format_is_srgb(VkFormat format) { return util_format_is_srgb(vk_format_to_pipe_format(format)); |