summaryrefslogtreecommitdiff
path: root/lib/mesa/src/broadcom
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2022-02-24 01:57:18 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2022-02-24 01:57:18 +0000
commitb24b5b9049e889ee4eb39b565bcc8d48bd45ab48 (patch)
tree658ca4e6b41655f49463c85edbaeda48979c394c /lib/mesa/src/broadcom
parent57768bbb154c2879d34ec20e401b19472e77aaf7 (diff)
Import Mesa 21.3.7
Diffstat (limited to 'lib/mesa/src/broadcom')
-rw-r--r--lib/mesa/src/broadcom/ci/deqp-v3d-rpi4-gles.toml49
-rw-r--r--lib/mesa/src/broadcom/ci/deqp-v3dv-rpi4-fails.txt153
-rw-r--r--lib/mesa/src/broadcom/ci/deqp-vc4-rpi3-gles.toml25
-rw-r--r--lib/mesa/src/broadcom/ci/gitlab-ci.yml113
-rw-r--r--lib/mesa/src/broadcom/ci/v3d-rpi4-fails.txt330
-rw-r--r--lib/mesa/src/broadcom/ci/v3d-rpi4-flakes.txt11
-rw-r--r--lib/mesa/src/broadcom/ci/v3d-rpi4-skips.txt40
-rw-r--r--lib/mesa/src/broadcom/ci/v3dv-rpi4-flakes.txt5
-rw-r--r--lib/mesa/src/broadcom/ci/v3dv-rpi4-skips.txt21
-rw-r--r--lib/mesa/src/broadcom/ci/vc4-rpi3-fails.txt1611
-rw-r--r--lib/mesa/src/broadcom/ci/vc4-rpi3-flakes.txt39
-rw-r--r--lib/mesa/src/broadcom/ci/vc4-rpi3-skips.txt46
-rw-r--r--lib/mesa/src/broadcom/cle/v3d_decoder.c6
-rw-r--r--lib/mesa/src/broadcom/cle/v3d_packet_v33.xml12
-rw-r--r--lib/mesa/src/broadcom/clif/clif_dump.c6
-rw-r--r--lib/mesa/src/broadcom/clif/clif_dump.h2
-rw-r--r--lib/mesa/src/broadcom/clif/clif_private.h5
-rw-r--r--lib/mesa/src/broadcom/common/v3d_debug.c89
-rw-r--r--lib/mesa/src/broadcom/common/v3d_debug.h9
-rw-r--r--lib/mesa/src/broadcom/common/v3d_limits.h2
-rw-r--r--lib/mesa/src/broadcom/common/v3d_tiling.c492
-rw-r--r--lib/mesa/src/broadcom/common/v3d_tiling.h80
-rw-r--r--lib/mesa/src/broadcom/common/v3d_util.c88
-rw-r--r--lib/mesa/src/broadcom/common/v3d_util.h37
-rw-r--r--lib/mesa/src/broadcom/compiler/nir_to_vir.c520
-rw-r--r--lib/mesa/src/broadcom/compiler/qpu_schedule.c67
-rw-r--r--lib/mesa/src/broadcom/compiler/v3d_compiler.h112
-rw-r--r--lib/mesa/src/broadcom/compiler/v3d_nir_lower_io.c57
-rw-r--r--lib/mesa/src/broadcom/compiler/v3d_nir_lower_logic_ops.c37
-rw-r--r--lib/mesa/src/broadcom/compiler/v3d_nir_lower_robust_buffer_access.c6
-rw-r--r--lib/mesa/src/broadcom/compiler/vir.c476
-rw-r--r--lib/mesa/src/broadcom/compiler/vir_live_variables.c155
-rw-r--r--lib/mesa/src/broadcom/compiler/vir_opt_dead_code.c9
-rw-r--r--lib/mesa/src/broadcom/compiler/vir_opt_redundant_flags.c11
-rw-r--r--lib/mesa/src/broadcom/compiler/vir_register_allocate.c215
-rw-r--r--lib/mesa/src/broadcom/compiler/vir_to_qpu.c15
-rw-r--r--lib/mesa/src/broadcom/meson.build21
-rw-r--r--lib/mesa/src/broadcom/qpu/qpu_disasm.h6
-rw-r--r--lib/mesa/src/broadcom/qpu/qpu_instr.c6
-rw-r--r--lib/mesa/src/broadcom/qpu/qpu_instr.h3
-rw-r--r--lib/mesa/src/broadcom/qpu/qpu_pack.c263
-rw-r--r--lib/mesa/src/broadcom/qpu/tests/qpu_disasm.c1
-rw-r--r--lib/mesa/src/broadcom/simulator/v3d_simulator.c175
-rw-r--r--lib/mesa/src/broadcom/simulator/v3d_simulator_wrapper.cpp12
-rw-r--r--lib/mesa/src/broadcom/simulator/v3d_simulator_wrapper.h4
-rw-r--r--lib/mesa/src/broadcom/simulator/v3dx_simulator.c280
-rw-r--r--lib/mesa/src/broadcom/simulator/v3dx_simulator.h6
-rw-r--r--lib/mesa/src/broadcom/vulkan/meson.build82
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_bo.c1
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_bo.h1
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_cl.c11
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_cl.h14
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c2676
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_descriptor_set.c404
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_device.c980
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_formats.c416
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_image.c530
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_limits.h23
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_meta_clear.c1226
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_meta_common.h74
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_meta_copy.c2678
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_pass.c87
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_pipeline.c1669
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_pipeline_cache.c191
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_private.h710
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_query.c121
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_queue.c598
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_uniforms.c268
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_wsi.c238
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c2281
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dvx_descriptor_set.c98
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dvx_device.c368
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dvx_formats.c465
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dvx_image.c198
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dvx_meta_common.c1357
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dvx_pipeline.c654
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dvx_private.h314
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dvx_queue.c108
-rw-r--r--lib/mesa/src/broadcom/vulkan/vk_format_info.h18
79 files changed, 15600 insertions, 8987 deletions
diff --git a/lib/mesa/src/broadcom/ci/deqp-v3d-rpi4-gles.toml b/lib/mesa/src/broadcom/ci/deqp-v3d-rpi4-gles.toml
new file mode 100644
index 000000000..659a4ca9c
--- /dev/null
+++ b/lib/mesa/src/broadcom/ci/deqp-v3d-rpi4-gles.toml
@@ -0,0 +1,49 @@
+[[deqp]]
+deqp = "/deqp/modules/gles31/deqp-gles31"
+caselists = [ "/deqp/mustpass/gles31-master.txt" ]
+deqp_args = [
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+ "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer",
+ "--deqp-surface-width=256",
+ "--deqp-visibility=hidden",
+]
+version_check = "GL ES 3.1.*git"
+renderer_check = "V3D"
+
+[[deqp]]
+deqp = "/deqp/modules/gles3/deqp-gles3"
+caselists = [ "/deqp/mustpass/gles3-master.txt" ]
+deqp_args = [
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+ "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer",
+ "--deqp-surface-width=256",
+ "--deqp-visibility=hidden",
+]
+
+[[deqp]]
+deqp = "/deqp/modules/gles2/deqp-gles2"
+caselists = [ "/deqp/mustpass/gles2-master.txt" ]
+deqp_args = [
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+ "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer",
+ "--deqp-surface-width=256",
+ "--deqp-visibility=hidden",
+]
+
+[[deqp]]
+deqp = "/deqp/external/openglcts/modules/glcts"
+caselists = [
+ "/deqp/mustpass/gles31-khr-master.txt",
+ "/deqp/mustpass/gles3-khr-master.txt",
+ "/deqp/mustpass/gles2-khr-master.txt",
+]
+deqp_args = [
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+ "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer",
+ "--deqp-surface-width=256",
+ "--deqp-visibility=hidden",
+]
diff --git a/lib/mesa/src/broadcom/ci/deqp-v3dv-rpi4-fails.txt b/lib/mesa/src/broadcom/ci/deqp-v3dv-rpi4-fails.txt
index 7a673b01f..6379afbe3 100644
--- a/lib/mesa/src/broadcom/ci/deqp-v3dv-rpi4-fails.txt
+++ b/lib/mesa/src/broadcom/ci/deqp-v3dv-rpi4-fails.txt
@@ -1,148 +1,5 @@
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.general_general_linear_stripes_x,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.general_general_linear_stripes_y,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.general_general_linear_stripes_z,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.general_optimal_linear_stripes_x,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.general_optimal_linear_stripes_y,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.general_optimal_linear_stripes_z,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.optimal_general_linear_stripes_x,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.optimal_general_linear_stripes_y,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.optimal_general_linear_stripes_z,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.optimal_optimal_linear_stripes_x,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.optimal_optimal_linear_stripes_y,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a2b10g10r10_uint_pack32.a2b10g10r10_uint_pack32.optimal_optimal_linear_stripes_z,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.general_general_linear_stripes_x,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.general_general_linear_stripes_y,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.general_general_linear_stripes_z,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.general_optimal_linear_stripes_x,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.general_optimal_linear_stripes_y,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.general_optimal_linear_stripes_z,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.optimal_general_linear_stripes_x,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.optimal_general_linear_stripes_y,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.optimal_general_linear_stripes_z,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.optimal_optimal_linear_stripes_x,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.optimal_optimal_linear_stripes_y,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_sint_pack32.a8b8g8r8_sint_pack32.optimal_optimal_linear_stripes_z,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.general_general_linear_stripes_x,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.general_general_linear_stripes_y,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.general_general_linear_stripes_z,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.general_optimal_linear_stripes_x,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.general_optimal_linear_stripes_y,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.general_optimal_linear_stripes_z,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.optimal_general_linear_stripes_x,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.optimal_general_linear_stripes_y,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.optimal_general_linear_stripes_z,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.optimal_optimal_linear_stripes_x,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.optimal_optimal_linear_stripes_y,Fail
-dEQP-VK.api.copy_and_blit.core.blit_image.all_formats.color.3d.a8b8g8r8_uint_pack32.a8b8g8r8_uint_pack32.optimal_optimal_linear_stripes_z,Fail
-dEQP-VK.pipeline.logic_op.r16_uint.and,Crash
-dEQP-VK.pipeline.logic_op.r16_uint.and_inverted,Crash
-dEQP-VK.pipeline.logic_op.r16_uint.and_reverse,Crash
-dEQP-VK.pipeline.logic_op.r16_uint.clear,Crash
-dEQP-VK.pipeline.logic_op.r16_uint.copy_inverted,Crash
-dEQP-VK.pipeline.logic_op.r16_uint.equivalent,Crash
-dEQP-VK.pipeline.logic_op.r16_uint.invert,Crash
-dEQP-VK.pipeline.logic_op.r16_uint.nand,Crash
-dEQP-VK.pipeline.logic_op.r16_uint.no_op,Crash
-dEQP-VK.pipeline.logic_op.r16_uint.nor,Crash
-dEQP-VK.pipeline.logic_op.r16_uint.or,Crash
-dEQP-VK.pipeline.logic_op.r16_uint.or_inverted,Crash
-dEQP-VK.pipeline.logic_op.r16_uint.or_reverse,Crash
-dEQP-VK.pipeline.logic_op.r16_uint.set,Crash
-dEQP-VK.pipeline.logic_op.r16_uint.xor,Crash
-dEQP-VK.pipeline.logic_op.r16g16_uint.and,Crash
-dEQP-VK.pipeline.logic_op.r16g16_uint.and_inverted,Crash
-dEQP-VK.pipeline.logic_op.r16g16_uint.and_reverse,Crash
-dEQP-VK.pipeline.logic_op.r16g16_uint.clear,Crash
-dEQP-VK.pipeline.logic_op.r16g16_uint.copy_inverted,Crash
-dEQP-VK.pipeline.logic_op.r16g16_uint.equivalent,Crash
-dEQP-VK.pipeline.logic_op.r16g16_uint.invert,Crash
-dEQP-VK.pipeline.logic_op.r16g16_uint.nand,Crash
-dEQP-VK.pipeline.logic_op.r16g16_uint.no_op,Crash
-dEQP-VK.pipeline.logic_op.r16g16_uint.nor,Crash
-dEQP-VK.pipeline.logic_op.r16g16_uint.or,Crash
-dEQP-VK.pipeline.logic_op.r16g16_uint.or_inverted,Crash
-dEQP-VK.pipeline.logic_op.r16g16_uint.or_reverse,Crash
-dEQP-VK.pipeline.logic_op.r16g16_uint.set,Crash
-dEQP-VK.pipeline.logic_op.r16g16_uint.xor,Crash
-dEQP-VK.pipeline.logic_op.r16g16b16a16_uint.copy_inverted,Fail
-dEQP-VK.pipeline.logic_op.r16g16b16a16_uint.equivalent,Fail
-dEQP-VK.pipeline.logic_op.r16g16b16a16_uint.invert,Fail
-dEQP-VK.pipeline.logic_op.r16g16b16a16_uint.nand,Fail
-dEQP-VK.pipeline.logic_op.r16g16b16a16_uint.nor,Fail
-dEQP-VK.pipeline.logic_op.r16g16b16a16_uint.or_inverted,Fail
-dEQP-VK.pipeline.logic_op.r16g16b16a16_uint.or_reverse,Fail
-dEQP-VK.pipeline.logic_op.r32_uint.and,Crash
-dEQP-VK.pipeline.logic_op.r32_uint.and_inverted,Crash
-dEQP-VK.pipeline.logic_op.r32_uint.and_reverse,Crash
-dEQP-VK.pipeline.logic_op.r32_uint.clear,Crash
-dEQP-VK.pipeline.logic_op.r32_uint.copy_inverted,Crash
-dEQP-VK.pipeline.logic_op.r32_uint.equivalent,Crash
-dEQP-VK.pipeline.logic_op.r32_uint.invert,Crash
-dEQP-VK.pipeline.logic_op.r32_uint.nand,Crash
-dEQP-VK.pipeline.logic_op.r32_uint.no_op,Crash
-dEQP-VK.pipeline.logic_op.r32_uint.nor,Crash
-dEQP-VK.pipeline.logic_op.r32_uint.or,Crash
-dEQP-VK.pipeline.logic_op.r32_uint.or_inverted,Crash
-dEQP-VK.pipeline.logic_op.r32_uint.or_reverse,Crash
-dEQP-VK.pipeline.logic_op.r32_uint.set,Crash
-dEQP-VK.pipeline.logic_op.r32_uint.xor,Crash
-dEQP-VK.pipeline.logic_op.r32g32_uint.and,Crash
-dEQP-VK.pipeline.logic_op.r32g32_uint.and_inverted,Crash
-dEQP-VK.pipeline.logic_op.r32g32_uint.and_reverse,Crash
-dEQP-VK.pipeline.logic_op.r32g32_uint.clear,Crash
-dEQP-VK.pipeline.logic_op.r32g32_uint.copy_inverted,Crash
-dEQP-VK.pipeline.logic_op.r32g32_uint.equivalent,Crash
-dEQP-VK.pipeline.logic_op.r32g32_uint.invert,Crash
-dEQP-VK.pipeline.logic_op.r32g32_uint.nand,Crash
-dEQP-VK.pipeline.logic_op.r32g32_uint.no_op,Crash
-dEQP-VK.pipeline.logic_op.r32g32_uint.nor,Crash
-dEQP-VK.pipeline.logic_op.r32g32_uint.or,Crash
-dEQP-VK.pipeline.logic_op.r32g32_uint.or_inverted,Crash
-dEQP-VK.pipeline.logic_op.r32g32_uint.or_reverse,Crash
-dEQP-VK.pipeline.logic_op.r32g32_uint.set,Crash
-dEQP-VK.pipeline.logic_op.r32g32_uint.xor,Crash
-dEQP-VK.pipeline.logic_op.r8_uint.and,Crash
-dEQP-VK.pipeline.logic_op.r8_uint.and_inverted,Crash
-dEQP-VK.pipeline.logic_op.r8_uint.and_reverse,Crash
-dEQP-VK.pipeline.logic_op.r8_uint.clear,Crash
-dEQP-VK.pipeline.logic_op.r8_uint.copy_inverted,Crash
-dEQP-VK.pipeline.logic_op.r8_uint.equivalent,Crash
-dEQP-VK.pipeline.logic_op.r8_uint.invert,Crash
-dEQP-VK.pipeline.logic_op.r8_uint.nand,Crash
-dEQP-VK.pipeline.logic_op.r8_uint.no_op,Crash
-dEQP-VK.pipeline.logic_op.r8_uint.nor,Crash
-dEQP-VK.pipeline.logic_op.r8_uint.or,Crash
-dEQP-VK.pipeline.logic_op.r8_uint.or_inverted,Crash
-dEQP-VK.pipeline.logic_op.r8_uint.or_reverse,Crash
-dEQP-VK.pipeline.logic_op.r8_uint.set,Crash
-dEQP-VK.pipeline.logic_op.r8_uint.xor,Crash
-dEQP-VK.pipeline.logic_op.r8g8_uint.and,Crash
-dEQP-VK.pipeline.logic_op.r8g8_uint.and_inverted,Crash
-dEQP-VK.pipeline.logic_op.r8g8_uint.and_reverse,Crash
-dEQP-VK.pipeline.logic_op.r8g8_uint.clear,Crash
-dEQP-VK.pipeline.logic_op.r8g8_uint.copy_inverted,Crash
-dEQP-VK.pipeline.logic_op.r8g8_uint.equivalent,Crash
-dEQP-VK.pipeline.logic_op.r8g8_uint.invert,Crash
-dEQP-VK.pipeline.logic_op.r8g8_uint.nand,Crash
-dEQP-VK.pipeline.logic_op.r8g8_uint.no_op,Crash
-dEQP-VK.pipeline.logic_op.r8g8_uint.nor,Crash
-dEQP-VK.pipeline.logic_op.r8g8_uint.or,Crash
-dEQP-VK.pipeline.logic_op.r8g8_uint.or_inverted,Crash
-dEQP-VK.pipeline.logic_op.r8g8_uint.or_reverse,Crash
-dEQP-VK.pipeline.logic_op.r8g8_uint.set,Crash
-dEQP-VK.pipeline.logic_op.r8g8_uint.xor,Crash
-dEQP-VK.pipeline.logic_op.r8g8b8a8_uint.copy_inverted,Fail
-dEQP-VK.pipeline.logic_op.r8g8b8a8_uint.equivalent,Fail
-dEQP-VK.pipeline.logic_op.r8g8b8a8_uint.invert,Fail
-dEQP-VK.pipeline.logic_op.r8g8b8a8_uint.nand,Fail
-dEQP-VK.pipeline.logic_op.r8g8b8a8_uint.nor,Fail
-dEQP-VK.pipeline.logic_op.r8g8b8a8_uint.or_inverted,Fail
-dEQP-VK.pipeline.logic_op.r8g8b8a8_uint.or_reverse,Fail
-dEQP-VK.spirv_assembly.instruction.compute.vector_shuffle.vector_shuffle,Fail
-dEQP-VK.synchronization.basic.binary_semaphore.chain,Fail
-dEQP-VK.ycbcr.query.levels.geometry.r8g8b8a8_unorm,Crash
-dEQP-VK.ycbcr.query.levels.tess_control.r8g8b8a8_unorm,Crash
-dEQP-VK.ycbcr.query.levels.tess_eval.r8g8b8a8_unorm,Crash
-dEQP-VK.ycbcr.query.size_lod.geometry.r8g8b8a8_unorm,Crash
-dEQP-VK.ycbcr.query.size_lod.tess_control.r8g8b8a8_unorm,Crash
-dEQP-VK.ycbcr.query.size_lod.tess_eval.r8g8b8a8_unorm,Crash
+# This seems to fail due to the test error threshold being insufficient
+dEQP-VK.geometry.input.basic_primitive.line_strip_adjacency,Fail
+
+# CTS bug; fix submitted
+dEQP-VK.spirv_assembly.instruction.graphics.variable_pointers.graphics.writes_single_buffer_geom,Fail
diff --git a/lib/mesa/src/broadcom/ci/deqp-vc4-rpi3-gles.toml b/lib/mesa/src/broadcom/ci/deqp-vc4-rpi3-gles.toml
new file mode 100644
index 000000000..218cb1835
--- /dev/null
+++ b/lib/mesa/src/broadcom/ci/deqp-vc4-rpi3-gles.toml
@@ -0,0 +1,25 @@
+[[deqp]]
+deqp = "/deqp/modules/gles2/deqp-gles2"
+caselists = [ "/deqp/mustpass/gles2-master.txt" ]
+tests_per_group = 250
+deqp_args = [
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+ "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer",
+ "--deqp-surface-width=256",
+ "--deqp-visibility=hidden",
+]
+version_check = "GL ES 2.0.*git"
+renderer_check = "VC4"
+
+[[deqp]]
+deqp = "/deqp/external/openglcts/modules/glcts"
+caselists = [ "/deqp/mustpass/gles2-khr-master.txt" ]
+tests_per_group = 250
+deqp_args = [
+ "--deqp-gl-config-name=rgba8888d24s8ms0",
+ "--deqp-surface-height=256",
+ "--deqp-surface-type=pbuffer",
+ "--deqp-surface-width=256",
+ "--deqp-visibility=hidden",
+]
diff --git a/lib/mesa/src/broadcom/ci/gitlab-ci.yml b/lib/mesa/src/broadcom/ci/gitlab-ci.yml
index c3d28777b..4f70ef1e1 100644
--- a/lib/mesa/src/broadcom/ci/gitlab-ci.yml
+++ b/lib/mesa/src/broadcom/ci/gitlab-ci.yml
@@ -2,32 +2,38 @@
extends:
- .baremetal-test-armhf
- .vc4-rules
- - .use-arm_test
+ - .use-debian/arm_test
variables:
BM_BOOTFS: /boot/raspberrypi_armhf
- BM_KERNEL_MODULES: vc4
BM_ROOTFS: /rootfs-armhf
GPU_VERSION: vc4-rpi3
- DEQP_EXPECTED_RENDERER: VC4
+ HWCI_KERNEL_MODULES: vc4
+ FLAKES_CHANNEL: "#videocore-ci"
script:
- ./install/bare-metal/poe-powered.sh
needs:
- - job: arm_test
+ - job: debian/arm_test
artifacts: false
- - meson-armhf
+ - debian-armhf
tags:
- igalia-rpi3
-vc4-rpi3-gles2:armhf:
+vc4-rpi3-gles:armhf:
extends:
- .vc4-rpi3-test:armhf
- parallel: 4
+ parallel: 2
variables:
- BARE_METAL_TEST_SCRIPT: "/install/deqp-runner.sh"
- DEQP_VER: gles2
- # The vc4s are so slow that it takes about a minute to get through the
- # default 500 tests in a group, triggering the serial watchdog.
+ HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
+ DEQP_SUITE: vc4-rpi3-gles
+
+vc4-rpi3-egl:armhf:
+ extends:
+ - .vc4-rpi3-test:armhf
+ variables:
+ HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
+ HWCI_START_XORG: 1
DEQP_RUNNER_OPTIONS: "--tests-per-group 250"
+ DEQP_VER: egl
.vc4-rpi3-piglit:armhf:
extends:
@@ -35,9 +41,9 @@ vc4-rpi3-gles2:armhf:
- .vc4-rpi3-test:armhf
- .test-manual
variables:
- BARE_METAL_TEST_SCRIPT: "/install/piglit/piglit-runner.sh"
+ HWCI_TEST_SCRIPT: "/install/piglit/piglit-runner.sh"
BM_POE_TIMEOUT: 180
- BM_START_XORG: 1
+ HWCI_START_XORG: 1
PIGLIT_PLATFORM: mixed_glx_egl
vc4-rpi3-piglit-quick_gl:armhf:
@@ -60,89 +66,72 @@ vc4-rpi3-piglit-quick_shader:armhf:
extends:
- .baremetal-test-armhf
- .v3d-rules
- - .use-arm_test
+ - .use-debian/arm_test
variables:
- BARE_METAL_TEST_SCRIPT: "/install/deqp-runner.sh"
+ HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
BM_BOOTFS: /boot/raspberrypi_armhf
- BM_KERNEL_MODULES: v3d,vc4
BM_POE_TIMEOUT: 300
BM_ROOTFS: /rootfs-armhf
- DEQP_EXPECTED_RENDERER: V3D
+ FLAKES_CHANNEL: "#videocore-ci"
GPU_VERSION: v3d-rpi4
+ HWCI_KERNEL_MODULES: v3d,vc4
script:
- ./install/bare-metal/poe-powered.sh
needs:
- - arm_test
- - meson-armhf
+ - debian/arm_test
+ - debian-armhf
tags:
- igalia-rpi4
-v3d-rpi4-gles31:armhf:
- extends:
- - .v3d-rpi4-test:armhf
- parallel: 2
- variables:
- DEQP_VER: gles31
-
-v3d-rpi4-gles3:armhf:
+v3d-rpi4-gles:armhf:
extends:
- .v3d-rpi4-test:armhf
- parallel: 4
+ parallel: 8
variables:
- DEQP_VER: gles3
+ DEQP_SUITE: v3d-rpi4-gles
-v3d-rpi4-gles2:armhf:
+v3d-rpi4-egl:armhf:
extends:
- .v3d-rpi4-test:armhf
variables:
- DEQP_VER: gles2
+ HWCI_START_XORG: 1
+ DEQP_VER: egl
-.v3d-rpi4-piglit:armhf:
+v3d-rpi4-piglit:armhf:
extends:
- .piglit-test
- .v3d-rpi4-test:armhf
- - .test-manual
+ parallel: 4
variables:
- BARE_METAL_TEST_SCRIPT: "/install/piglit/piglit-runner.sh"
- BM_START_XORG: 1
+ HWCI_TEST_SCRIPT: "/install/piglit/piglit-runner.sh"
+ HWCI_START_XORG: 1
PIGLIT_PLATFORM: mixed_glx_egl
+ PIGLIT_PROFILES: all
-v3d-rpi4-piglit-quick_gl:armhf:
+v3dv-rpi4-vk:arm64:
extends:
- - .v3d-rpi4-piglit:armhf
- parallel: 2
- variables:
- PIGLIT_PROFILES: quick_gl
-
-v3d-rpi4-piglit-quick_shader:armhf:
- extends:
- - .v3d-rpi4-piglit:armhf
- variables:
- PIGLIT_PROFILES: quick_shader
-
-v3dv-rpi4-vk:armhf:
- extends:
- - .baremetal-test-armhf
- - .use-arm_test
+ - .baremetal-test
+ - .use-debian/arm_test
- .v3dv-rules
- parallel: 6
+ parallel: 8
variables:
- BARE_METAL_TEST_SCRIPT: "/install/deqp-runner.sh"
- BM_BOOTFS: /boot/raspberrypi_armhf
- BM_KERNEL_MODULES: v3d,vc4
+ HWCI_TEST_SCRIPT: "/install/deqp-runner.sh"
+ BM_BOOTFS: /boot/raspberrypi_arm64
BM_POE_TIMEOUT: 300
- BM_ROOTFS: /rootfs-armhf
- CPU: arm7hlf
- DEQP_EXPECTED_RENDERER: "V3D 4.2"
- DEQP_FRACTION: 7
+ BM_ROOTFS: /rootfs-arm64
+ DEQP_EXPECTED_RENDERER: "V3D.4.2"
+ DEQP_FRACTION: 5
DEQP_VER: vk
+ FLAKES_CHANNEL: "#videocore-ci"
GPU_VERSION: v3dv-rpi4
- VK_CPU: arm7hlf
+ HWCI_KERNEL_MODULES: v3d,vc4
+ MINIO_ARTIFACT_NAME: mesa-arm64
VK_DRIVER: broadcom
script:
- ./install/bare-metal/poe-powered.sh
needs:
- - arm_test
- - meson-armhf
+ - debian/arm_test
+ - job: debian-arm64
+ artifacts: false
tags:
- igalia-rpi4
diff --git a/lib/mesa/src/broadcom/ci/v3d-rpi4-fails.txt b/lib/mesa/src/broadcom/ci/v3d-rpi4-fails.txt
new file mode 100644
index 000000000..c0d90c2d2
--- /dev/null
+++ b/lib/mesa/src/broadcom/ci/v3d-rpi4-fails.txt
@@ -0,0 +1,330 @@
+glx@glx-make-current,Crash
+glx@glx-multi-window-single-context,Fail
+glx@glx-multithread-buffer,Fail
+glx@glx-query-drawable-glx_fbconfig_id-window,Fail
+glx@glx-swap-pixmap-bad,Fail
+glx@glx-visuals-depth -pixmap,Crash
+glx@glx-visuals-stencil -pixmap,Crash
+glx@glx_arb_create_context_es2_profile@invalid opengl es version,Fail
+glx@glx_arb_create_context_no_error@no error,Fail
+glx@glx_ext_import_context@free context,Fail
+glx@glx_ext_import_context@get context id,Fail
+glx@glx_ext_import_context@get current display,Fail
+glx@glx_ext_import_context@import context- multi process,Fail
+glx@glx_ext_import_context@import context- single process,Fail
+glx@glx_ext_import_context@imported context has same context id,Fail
+glx@glx_ext_import_context@make current- multi process,Fail
+glx@glx_ext_import_context@make current- single process,Fail
+glx@glx_ext_import_context@query context info,Fail
+shaders@glsl-bug-110796,Fail
+spec@!opengl 1.0@gl-1.0-bitmap-heart-dance,Fail
+spec@!opengl 1.0@gl-1.0-dlist-bitmap,Fail
+spec@!opengl 1.0@gl-1.0-edgeflag,Fail
+spec@!opengl 1.0@gl-1.0-edgeflag-const,Fail
+spec@!opengl 1.0@gl-1.0-edgeflag-quads,Fail
+spec@!opengl 1.0@gl-1.0-no-op-paths,Fail
+spec@!opengl 1.0@gl-1.0-spot-light,Fail
+spec@!opengl 1.0@gl-1.0-user-clip-all-planes,Fail
+spec@!opengl 1.1@depthstencil-default_fb-drawpixels-24_8 samples=2,Fail
+spec@!opengl 1.1@depthstencil-default_fb-drawpixels-32f_24_8_rev samples=2,Fail
+spec@!opengl 1.1@depthstencil-default_fb-drawpixels-32f_24_8_rev samples=4,Fail
+spec@!opengl 1.1@depthstencil-default_fb-drawpixels-float-and-ushort samples=2,Fail
+spec@!opengl 1.1@getteximage-depth,Fail
+spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D_ARRAY-GL_DEPTH_COMPONENT16,Fail
+spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D_ARRAY-GL_DEPTH_COMPONENT24,Fail
+spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D_ARRAY-GL_DEPTH_COMPONENT32,Fail
+spec@!opengl 1.1@getteximage-depth@GL_TEXTURE_1D_ARRAY-GL_DEPTH_COMPONENT,Fail
+spec@!opengl 1.1@getteximage-formats,Fail
+spec@!opengl 1.1@linestipple,Fail
+spec@!opengl 1.1@linestipple@Factor 2x,Fail
+spec@!opengl 1.1@linestipple@Factor 3x,Fail
+spec@!opengl 1.1@linestipple@Line loop,Fail
+spec@!opengl 1.1@linestipple@Line strip,Fail
+spec@!opengl 1.1@linestipple@Restarting lines within a single Begin-End block,Fail
+spec@!opengl 1.1@point-line-no-cull,Fail
+spec@!opengl 1.1@polygon-mode,Fail
+spec@!opengl 1.1@polygon-mode-offset,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 1: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 1: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on bottom edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on left edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on bottom edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on left edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 5: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 5: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@texwrap formats bordercolor,Fail
+spec@!opengl 1.1@texwrap formats bordercolor@GL_INTENSITY12- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor@GL_INTENSITY16- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor@GL_LUMINANCE12- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor@GL_LUMINANCE12_ALPHA12- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor@GL_LUMINANCE12_ALPHA4- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor@GL_LUMINANCE16- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor@GL_LUMINANCE16_ALPHA16- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor@GL_RGB12- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor@GL_RGB16- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor@GL_RGBA12- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor@GL_RGBA16- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_INTENSITY12- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_INTENSITY16- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_LUMINANCE12- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_LUMINANCE12_ALPHA12- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_LUMINANCE12_ALPHA4- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_LUMINANCE16- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_LUMINANCE16_ALPHA16- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB12- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGB16- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGBA12- swizzled- border color only,Fail
+spec@!opengl 1.1@texwrap formats bordercolor-swizzled@GL_RGBA16- swizzled- border color only,Fail
+spec@!opengl 1.1@windowoverlap,Fail
+spec@!opengl 1.4@gl-1.4-polygon-offset,Fail
+spec@!opengl 2.0@gl-2.0-edgeflag,Fail
+spec@!opengl 2.0@gl-2.0-edgeflag-immediate,Fail
+spec@!opengl 2.0@max-samplers,Fail
+spec@!opengl 2.0@max-samplers border,Fail
+spec@!opengl 2.1@pbo,Fail
+spec@!opengl 2.1@pbo@test_polygon_stip,Fail
+spec@!opengl 2.1@polygon-stipple-fs,Fail
+spec@!opengl es 3.0@gles-3.0-transform-feedback-uniform-buffer-object,Fail
+spec@arb_color_buffer_float@gl_rgba32f-render,Fail
+spec@arb_color_buffer_float@gl_rgba32f-render-fog,Fail
+spec@arb_color_buffer_float@gl_rgba32f-render-sanity,Fail
+spec@arb_color_buffer_float@gl_rgba32f-render-sanity-fog,Fail
+spec@arb_compute_shader@minmax,Fail
+spec@arb_copy_buffer@targets,Fail
+spec@arb_depth_buffer_float@fbo-generatemipmap-formats,Fail
+spec@arb_depth_buffer_float@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT32F,Fail
+spec@arb_depth_buffer_float@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT32F NPOT,Fail
+spec@arb_depth_buffer_float@texwrap formats bordercolor,Fail
+spec@arb_depth_buffer_float@texwrap formats bordercolor@GL_DEPTH32F_STENCIL8- border color only,Fail
+spec@arb_depth_buffer_float@texwrap formats bordercolor@GL_DEPTH_COMPONENT32F- border color only,Fail
+spec@arb_depth_buffer_float@texwrap formats bordercolor-swizzled,Fail
+spec@arb_depth_buffer_float@texwrap formats bordercolor-swizzled@GL_DEPTH32F_STENCIL8- swizzled- border color only,Fail
+spec@arb_depth_buffer_float@texwrap formats bordercolor-swizzled@GL_DEPTH_COMPONENT32F- swizzled- border color only,Fail
+spec@arb_depth_buffer_float@texwrap formats,Fail
+spec@arb_depth_buffer_float@texwrap formats@GL_DEPTH32F_STENCIL8- NPOT,Fail
+spec@arb_depth_buffer_float@texwrap formats@GL_DEPTH_COMPONENT32F- NPOT,Fail
+spec@arb_depth_texture@fbo-generatemipmap-formats,Fail
+spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT16,Fail
+spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT16 NPOT,Fail
+spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT24,Fail
+spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT24 NPOT,Fail
+spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT32,Fail
+spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT32 NPOT,Fail
+spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT,Fail
+spec@arb_depth_texture@fbo-generatemipmap-formats@GL_DEPTH_COMPONENT NPOT,Fail
+spec@arb_depth_texture@texwrap formats bordercolor,Fail
+spec@arb_depth_texture@texwrap formats bordercolor@GL_DEPTH_COMPONENT16- border color only,Fail
+spec@arb_depth_texture@texwrap formats bordercolor@GL_DEPTH_COMPONENT24- border color only,Fail
+spec@arb_depth_texture@texwrap formats bordercolor@GL_DEPTH_COMPONENT32- border color only,Fail
+spec@arb_depth_texture@texwrap formats bordercolor-swizzled,Fail
+spec@arb_depth_texture@texwrap formats bordercolor-swizzled@GL_DEPTH_COMPONENT16- swizzled- border color only,Fail
+spec@arb_depth_texture@texwrap formats bordercolor-swizzled@GL_DEPTH_COMPONENT24- swizzled- border color only,Fail
+spec@arb_depth_texture@texwrap formats bordercolor-swizzled@GL_DEPTH_COMPONENT32- swizzled- border color only,Fail
+spec@arb_depth_texture@texwrap formats,Fail
+spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT16- NPOT,Fail
+spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT24- NPOT,Fail
+spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT32- NPOT,Fail
+spec@arb_framebuffer_object@fbo-drawbuffers-none use_frag_out,Fail
+spec@arb_pixel_buffer_object@pbo-getteximage,Fail
+spec@arb_pixel_buffer_object@texsubimage array pbo,Fail
+spec@arb_point_sprite@arb_point_sprite-checkerboard,Fail
+spec@arb_point_sprite@arb_point_sprite-mipmap,Fail
+spec@arb_shader_storage_buffer_object@compiler@atomicmin-swizzle.vert,Fail
+spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgrad,Fail
+spec@arb_shader_texture_lod@execution@arb_shader_texture_lod-texgradcube,Fail
+spec@arb_texture_float@fbo-blending-formats,Fail
+spec@arb_texture_float@fbo-blending-formats@GL_ALPHA32F_ARB,Fail
+spec@arb_texture_float@fbo-blending-formats@GL_INTENSITY16F_ARB,Fail
+spec@arb_texture_float@fbo-blending-formats@GL_INTENSITY32F_ARB,Fail
+spec@arb_texture_float@fbo-blending-formats@GL_LUMINANCE16F_ARB,Fail
+spec@arb_texture_float@fbo-blending-formats@GL_LUMINANCE32F_ARB,Fail
+spec@arb_texture_float@fbo-blending-formats@GL_LUMINANCE_ALPHA32F_ARB,Fail
+spec@arb_texture_float@fbo-blending-formats@GL_RGB16F,Fail
+spec@arb_texture_float@fbo-blending-formats@GL_RGB32F,Fail
+spec@arb_texture_float@fbo-blending-formats@GL_RGBA32F,Fail
+spec@arb_texture_float@texwrap formats bordercolor,Fail
+spec@arb_texture_float@texwrap formats bordercolor@GL_ALPHA32F_ARB- border color only,Fail
+spec@arb_texture_float@texwrap formats bordercolor@GL_INTENSITY32F_ARB- border color only,Fail
+spec@arb_texture_float@texwrap formats bordercolor@GL_LUMINANCE32F_ARB- border color only,Fail
+spec@arb_texture_float@texwrap formats bordercolor@GL_LUMINANCE_ALPHA32F_ARB- border color only,Fail
+spec@arb_texture_float@texwrap formats bordercolor@GL_RGB32F- border color only,Fail
+spec@arb_texture_float@texwrap formats bordercolor@GL_RGBA32F- border color only,Fail
+spec@arb_texture_float@texwrap formats bordercolor-swizzled,Fail
+spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_ALPHA32F_ARB- swizzled- border color only,Fail
+spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_INTENSITY32F_ARB- swizzled- border color only,Fail
+spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_LUMINANCE32F_ARB- swizzled- border color only,Fail
+spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_LUMINANCE_ALPHA32F_ARB- swizzled- border color only,Fail
+spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGB32F- swizzled- border color only,Fail
+spec@arb_texture_float@texwrap formats bordercolor-swizzled@GL_RGBA32F- swizzled- border color only,Fail
+spec@arb_texture_rectangle@1-1-linear-texture,Fail
+spec@arb_texture_rg@fbo-blending-formats-float,Fail
+spec@arb_texture_rg@fbo-blending-formats-float@GL_R32F,Fail
+spec@arb_texture_rg@fbo-blending-formats-float@GL_RG32F,Fail
+spec@arb_texture_rg@texwrap formats bordercolor,Fail
+spec@arb_texture_rg@texwrap formats bordercolor@GL_R16- border color only,Fail
+spec@arb_texture_rg@texwrap formats bordercolor@GL_RG16- border color only,Fail
+spec@arb_texture_rg@texwrap formats bordercolor-swizzled,Fail
+spec@arb_texture_rg@texwrap formats bordercolor-swizzled@GL_R16- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats bordercolor-swizzled@GL_RG16- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats-float bordercolor,Fail
+spec@arb_texture_rg@texwrap formats-float bordercolor@GL_R32F- border color only,Fail
+spec@arb_texture_rg@texwrap formats-float bordercolor@GL_RG32F- border color only,Fail
+spec@arb_texture_rg@texwrap formats-float bordercolor-swizzled,Fail
+spec@arb_texture_rg@texwrap formats-float bordercolor-swizzled@GL_R32F- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats-float bordercolor-swizzled@GL_RG32F- swizzled- border color only,Fail
+spec@arb_texture_rg@texwrap formats-float,Fail
+spec@arb_texture_rg@texwrap formats-float@GL_R32F- NPOT,Fail
+spec@arb_texture_rg@texwrap formats-float@GL_RG32F- NPOT,Fail
+spec@arb_transform_feedback2@change objects while paused (gles3),Fail
+spec@egl 1.4@egl-copy-buffers,Crash
+spec@egl 1.4@eglterminate then unbind context,Fail
+spec@egl_ext_protected_content@conformance,Fail
+spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail
+spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_rgba,Fail
+spec@egl_khr_surfaceless_context@viewport,Fail
+spec@egl_mesa_configless_context@basic,Fail
+spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
+spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail
+spec@ext_framebuffer_multisample@interpolation 2 centroid-edges,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-edges,Fail
+spec@ext_framebuffer_object@fbo-blending-format-quirks,Fail
+spec@ext_framebuffer_object@getteximage-formats init-by-clear-and-render,Fail
+spec@ext_framebuffer_object@getteximage-formats init-by-rendering,Fail
+spec@ext_gpu_shader4@execution@texelfetch@fs-texelfetch-isampler1darray,Fail
+spec@ext_gpu_shader4@execution@texelfetch@fs-texelfetch-sampler1darray,Fail
+spec@ext_gpu_shader4@execution@texelfetch@fs-texelfetch-usampler1darray,Fail
+spec@ext_gpu_shader4@execution@texelfetchoffset@fs-texelfetch-isampler1darray,Fail
+spec@ext_gpu_shader4@execution@texelfetchoffset@fs-texelfetch-sampler1darray,Fail
+spec@ext_gpu_shader4@execution@texelfetchoffset@fs-texelfetch-usampler1darray,Fail
+spec@ext_gpu_shader4@execution@texelfetchoffset@vs-texelfetch-isampler1darray,Fail
+spec@ext_gpu_shader4@execution@texelfetchoffset@vs-texelfetch-sampler1darray,Fail
+spec@ext_gpu_shader4@execution@texelfetchoffset@vs-texelfetch-usampler1darray,Fail
+spec@ext_gpu_shader4@execution@texelfetch@vs-texelfetch-isampler1darray,Fail
+spec@ext_gpu_shader4@execution@texelfetch@vs-texelfetch-sampler1darray,Fail
+spec@ext_gpu_shader4@execution@texelfetch@vs-texelfetch-usampler1darray,Fail
+spec@ext_gpu_shader4@tex-miplevel-selection gpu4texture() 1darray,Fail
+spec@ext_gpu_shader4@tex-miplevel-selection gpu4texture() 1darrayshadow,Fail
+spec@ext_gpu_shader4@tex-miplevel-selection gpu4texture(bias) 1darray,Fail
+spec@ext_gpu_shader4@tex-miplevel-selection gpu4texture(bias) 1darrayshadow,Fail
+spec@ext_gpu_shader4@tex-miplevel-selection gpu4texture() cubeshadow,Fail
+spec@ext_gpu_shader4@tex-miplevel-selection gpu4texturegrad 1darray,Fail
+spec@ext_gpu_shader4@tex-miplevel-selection gpu4texturegrad 1darrayshadow,Fail
+spec@ext_gpu_shader4@tex-miplevel-selection gpu4texturegradoffset 1darray,Fail
+spec@ext_gpu_shader4@tex-miplevel-selection gpu4texturegradoffset 1darrayshadow,Fail
+spec@ext_gpu_shader4@tex-miplevel-selection gpu4texturelod 1darray,Fail
+spec@ext_gpu_shader4@tex-miplevel-selection gpu4texturelod 1darrayshadow,Fail
+spec@ext_gpu_shader4@tex-miplevel-selection gpu4texturelodoffset 1darray,Fail
+spec@ext_gpu_shader4@tex-miplevel-selection gpu4texturelodoffset 1darrayshadow,Fail
+spec@ext_gpu_shader4@tex-miplevel-selection gpu4textureoffset 1darray,Fail
+spec@ext_gpu_shader4@tex-miplevel-selection gpu4textureoffset 1darrayshadow,Fail
+spec@ext_packed_depth_stencil@texwrap formats bordercolor,Fail
+spec@ext_packed_depth_stencil@texwrap formats bordercolor@GL_DEPTH24_STENCIL8- border color only,Fail
+spec@ext_packed_depth_stencil@texwrap formats bordercolor-swizzled,Fail
+spec@ext_packed_depth_stencil@texwrap formats bordercolor-swizzled@GL_DEPTH24_STENCIL8- swizzled- border color only,Fail
+spec@ext_packed_depth_stencil@texwrap formats,Fail
+spec@ext_packed_depth_stencil@texwrap formats@GL_DEPTH24_STENCIL8- NPOT,Fail
+spec@ext_packed_float@query-rgba-signed-components,Fail
+spec@ext_texture_array@array-texture,Fail
+spec@ext_texture_array@fbo-generatemipmap-array rgb9_e5,Fail
+spec@ext_texture_array@fbo-generatemipmap-array,Fail
+spec@ext_texture_array@texsubimage array,Fail
+spec@ext_texture_integer@getteximage-clamping gl_arb_texture_rg,Fail
+spec@ext_texture_integer@getteximage-clamping,Fail
+spec@ext_texture_lod_bias@lodbias,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor@GL_ALPHA16_SNORM- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor@GL_INTENSITY16_SNORM- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor@GL_LUMINANCE16_ALPHA16_SNORM- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor@GL_LUMINANCE16_SNORM- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor@GL_R16_SNORM- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor@GL_RG16_SNORM- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor@GL_RGB16_SNORM- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor@GL_RGBA16_SNORM- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor-swizzled,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_ALPHA16_SNORM- swizzled- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_INTENSITY16_SNORM- swizzled- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_LUMINANCE16_ALPHA16_SNORM- swizzled- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_LUMINANCE16_SNORM- swizzled- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_R16_SNORM- swizzled- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_RG16_SNORM- swizzled- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_RGB16_SNORM- swizzled- border color only,Fail
+spec@ext_texture_snorm@texwrap formats bordercolor-swizzled@GL_RGBA16_SNORM- swizzled- border color only,Fail
+spec@arb_texture_storage@texture-storage@cube array texture,Fail
+spec@glsl-1.10@execution@glsl-fs-inline-explosion,Crash
+spec@glsl-1.10@execution@glsl-vs-inline-explosion,Crash
+spec@glsl-1.20@compiler@invalid-vec4-array-to-vec3-array-conversion.vert,Fail
+spec@glsl-1.20@execution@clipping@vs-clip-vertex-primitives,Fail
+spec@glsl-1.20@execution@fs-underflow-mul-compare-zero,Fail
+spec@intel_performance_query@intel_performance_query-issue_2235,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp,Fail
+spec@khr_texture_compression_astc@sliced-3d-miptree-gles srgb-fp@sRGB decode full precision,Fail
+spec@nv_image_formats@compiler@declaration-disallow-r16-snorm-2d-array.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-r16-snorm-2d-array.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-r16-snorm-2d.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-r16-snorm-2d.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-r16-snorm-3d.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-r16-snorm-3d.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-r16-snorm-cube.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-r16-snorm-cube.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-r16-unorm-2d-array.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-r16-unorm-2d-array.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-r16-unorm-2d.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-r16-unorm-2d.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-r16-unorm-3d.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-r16-unorm-3d.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-r16-unorm-cube.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-r16-unorm-cube.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rg16-snorm-2d-array.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rg16-snorm-2d-array.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rg16-snorm-2d.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rg16-snorm-2d.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rg16-snorm-3d.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rg16-snorm-3d.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rg16-snorm-cube.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rg16-snorm-cube.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rg16-unorm-2d-array.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rg16-unorm-2d-array.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rg16-unorm-2d.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rg16-unorm-2d.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rg16-unorm-3d.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rg16-unorm-3d.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rg16-unorm-cube.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rg16-unorm-cube.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rgba16-snorm-2d-array.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rgba16-snorm-2d-array.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rgba16-snorm-2d.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rgba16-snorm-2d.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rgba16-snorm-3d.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rgba16-snorm-3d.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rgba16-snorm-cube.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rgba16-snorm-cube.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rgba16-unorm-2d-array.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rgba16-unorm-2d-array.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rgba16-unorm-2d.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rgba16-unorm-2d.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rgba16-unorm-3d.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rgba16-unorm-3d.vert,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rgba16-unorm-cube.frag,Fail
+spec@nv_image_formats@compiler@declaration-disallow-rgba16-unorm-cube.vert,Fail
+spec@nv_read_depth@read_depth_gles3,Fail
+spec@oes_egl_image_external_essl3@oes_egl_image_external_essl3,Crash
+spec@oes_shader_io_blocks@compiler@layout-location-aliasing.vert,Fail
diff --git a/lib/mesa/src/broadcom/ci/v3d-rpi4-flakes.txt b/lib/mesa/src/broadcom/ci/v3d-rpi4-flakes.txt
new file mode 100644
index 000000000..a17f2c79c
--- /dev/null
+++ b/lib/mesa/src/broadcom/ci/v3d-rpi4-flakes.txt
@@ -0,0 +1,11 @@
+dEQP-GLES31.functional.compute.shared_var.basic_type.ivec3_highp
+dEQP-GLES31.functional.ssbo.layout.single_basic_type.packed.highp_mat2
+KHR-GLES31.core.shader_image_load_store.basic-glsl-earlyFragTests
+
+glx@glx_arb_sync_control@swapbuffersmsc-divisor-zero
+glx@glx_arb_sync_control@waitformsc
+spec@!opengl 1.1@depthstencil-default_fb-drawpixels-24_8 samples=4
+spec@!opengl 1.1@depthstencil-default_fb-drawpixels-32f_24_8_rev samples=2
+spec@!opengl 1.1@depthstencil-default_fb-drawpixels-float-and-ushort samples=4
+spec@arb_occlusion_query@occlusion_query_order
+spec@egl_chromium_sync_control@conformance
diff --git a/lib/mesa/src/broadcom/ci/v3d-rpi4-skips.txt b/lib/mesa/src/broadcom/ci/v3d-rpi4-skips.txt
new file mode 100644
index 000000000..e6b1076a5
--- /dev/null
+++ b/lib/mesa/src/broadcom/ci/v3d-rpi4-skips.txt
@@ -0,0 +1,40 @@
+# Slow tests (> 1 minute to run)
+spec@!opengl 1.1@streaming-texture-leak
+spec@!opengl 1.2@tex3d-maxsize
+spec@ext_texture_env_combine@texture-env-combine
+spec@glsl-1.10@execution@loops@glsl-fs-unroll-explosion
+spec@glsl-1.10@execution@loops@glsl-vs-unroll-explosion
+spec@!opengl 1.0@gl-1.0-blend-func
+
+# Extensions not supported
+spec@arb_gpu_shader_fp64.*
+spec@arb_gpu_shader_gpu5.*
+spec@arb_gpu_shader_int64.*
+spec@arb_tessellation_shader.*
+spec@arb_texture_cube_map.*
+spec@glsl-1.30.*
+spec@glsl-1.40.*
+spec@glsl-1.50.*
+spec@glsl-3.*
+spec@glsl-4.*
+spec@glsl-es-3.20.*
+# Slow tests (> 1 minute to run)
+spec@!opengl 1.1@streaming-texture-leak
+spec@!opengl 1.2@tex3d-maxsize
+spec@ext_texture_env_combine@texture-env-combine
+spec@glsl-1.10@execution@loops@glsl-fs-unroll-explosion
+spec@glsl-1.10@execution@loops@glsl-vs-unroll-explosion
+spec@!opengl 1.0@gl-1.0-blend-func
+
+# Extensions not supported
+spec@arb_gpu_shader_fp64.*
+spec@arb_gpu_shader_gpu5.*
+spec@arb_gpu_shader_int64.*
+spec@arb_tessellation_shader.*
+spec@arb_texture_cube_map.*
+spec@glsl-1.30.*
+spec@glsl-1.40.*
+spec@glsl-1.50.*
+spec@glsl-3.*
+spec@glsl-4.*
+spec@glsl-es-3.20.*
diff --git a/lib/mesa/src/broadcom/ci/v3dv-rpi4-flakes.txt b/lib/mesa/src/broadcom/ci/v3dv-rpi4-flakes.txt
new file mode 100644
index 000000000..0d22f002d
--- /dev/null
+++ b/lib/mesa/src/broadcom/ci/v3dv-rpi4-flakes.txt
@@ -0,0 +1,5 @@
+dEQP-VK.api.external.fence.opaque_fd.reset_permanent
+dEQP-VK.api.external.fence.opaque_fd.reset_temporary
+dEQP-VK.api.external.fence.opaque_fd.signal_export_import_wait_permanent
+dEQP-VK.ssbo.layout.instance_array_basic_type.std430.uvec4
+dEQP-VK.wsi.display.get_display_plane_capabilities
diff --git a/lib/mesa/src/broadcom/ci/v3dv-rpi4-skips.txt b/lib/mesa/src/broadcom/ci/v3dv-rpi4-skips.txt
new file mode 100644
index 000000000..bf6a82c19
--- /dev/null
+++ b/lib/mesa/src/broadcom/ci/v3dv-rpi4-skips.txt
@@ -0,0 +1,21 @@
+# Broadcom waivers
+dEQP-VK.draw.inverted_depth_ranges.nodepthclamp_deltazero
+dEQP-VK.rasterization.depth_bias.d32_sfloat
+
+# Timeout tests (> 1 minute to run)
+dEQP-VK.api.object_management.max_concurrent.query_pool
+dEQP-VK.graphicsfuzz.spv-stable-maze-flatten-copy-composite
+dEQP-VK.memory.mapping.dedicated_alloc.buffer.full.variable.implicit_unmap
+dEQP-VK.memory.mapping.dedicated_alloc.image.full.variable.implicit_unmap
+dEQP-VK.memory.mapping.suballocation.full.variable.implicit_unmap
+dEQP-VK.spirv_assembly.instruction.graphics.spirv_ids_abuse.lots_ids_geom
+dEQP-VK.spirv_assembly.instruction.graphics.spirv_ids_abuse.lots_ids_vert
+dEQP-VK.ssbo.layout.random.all_shared_buffer.5
+dEQP-VK.ssbo.layout.random.arrays_of_arrays.13
+dEQP-VK.ssbo.layout.random.nested_structs_arrays.0
+dEQP-VK.texture.explicit_lod.2d.sizes.128x128_linear_linear_mipmap_linear_clamp
+dEQP-VK.texture.explicit_lod.2d.sizes.128x128_linear_linear_mipmap_linear_repeat
+dEQP-VK.texture.explicit_lod.2d.sizes.128x128_nearest_linear_mipmap_linear_clamp
+dEQP-VK.texture.explicit_lod.2d.sizes.128x128_nearest_linear_mipmap_linear_repeat
+dEQP-VK.ubo.random.all_out_of_order_offsets.45
+dEQP-VK.ubo.random.all_shared_buffer.48
diff --git a/lib/mesa/src/broadcom/ci/vc4-rpi3-fails.txt b/lib/mesa/src/broadcom/ci/vc4-rpi3-fails.txt
new file mode 100644
index 000000000..d0833cd4f
--- /dev/null
+++ b/lib/mesa/src/broadcom/ci/vc4-rpi3-fails.txt
@@ -0,0 +1,1611 @@
+KHR-GLES2.core.internalformat.texture2d.depth_component_unsigned_int_depth_component16,Fail
+KHR-GLES2.core.internalformat.texture2d.depth_component_unsigned_int_depth_component24,Fail
+KHR-GLES2.core.internalformat.texture2d.depth_component_unsigned_short_depth_component16,Fail
+
+# https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/3133
+KHR-GLES2.texture_3d.copy_sub_image.negative,Fail
+KHR-GLES2.texture_3d.copy_sub_image.rgba,Fail
+
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_clamp_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_clamp_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_clamp_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_clamp_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_clamp_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_clamp_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_clamp_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_clamp_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_clamp_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_mirror_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_mirror_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_mirror_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_mirror_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_mirror_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_mirror_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_mirror_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_mirror_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_mirror_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_repeat_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_repeat_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_repeat_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_repeat_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_repeat_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_repeat_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_repeat_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_repeat_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_linear_repeat_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_clamp_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_clamp_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_clamp_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_clamp_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_clamp_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_clamp_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_clamp_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_clamp_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_clamp_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_mirror_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_mirror_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_mirror_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_mirror_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_mirror_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_mirror_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_mirror_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_mirror_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_mirror_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_repeat_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_repeat_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_repeat_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_repeat_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_repeat_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_repeat_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_repeat_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_repeat_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_linear_repeat_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_clamp_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_clamp_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_clamp_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_clamp_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_clamp_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_clamp_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_clamp_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_clamp_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_clamp_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_mirror_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_mirror_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_mirror_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_mirror_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_mirror_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_mirror_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_mirror_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_mirror_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_mirror_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_repeat_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_repeat_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_repeat_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_repeat_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_repeat_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_repeat_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_repeat_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_repeat_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_linear_nearest_repeat_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_clamp_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_clamp_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_clamp_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_clamp_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_clamp_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_clamp_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_clamp_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_clamp_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_clamp_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_mirror_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_mirror_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_mirror_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_mirror_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_mirror_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_mirror_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_mirror_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_mirror_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_mirror_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_repeat_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_repeat_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_repeat_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_repeat_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_repeat_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_repeat_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_repeat_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_repeat_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_linear_repeat_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_clamp_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_clamp_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_clamp_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_clamp_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_clamp_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_clamp_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_clamp_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_clamp_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_clamp_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_mirror_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_mirror_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_mirror_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_mirror_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_mirror_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_mirror_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_mirror_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_mirror_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_mirror_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_repeat_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_repeat_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_repeat_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_repeat_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_repeat_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_repeat_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_repeat_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_repeat_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_mipmap_nearest_nearest_repeat_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_clamp_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_clamp_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_clamp_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_clamp_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_clamp_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_clamp_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_clamp_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_clamp_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_clamp_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_mirror_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_mirror_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_mirror_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_mirror_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_mirror_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_mirror_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_mirror_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_mirror_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_mirror_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_repeat_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_repeat_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_repeat_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_repeat_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_repeat_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_repeat_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_repeat_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_repeat_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.linear_nearest_repeat_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_clamp_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_clamp_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_clamp_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_clamp_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_clamp_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_clamp_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_clamp_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_clamp_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_clamp_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_mirror_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_mirror_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_mirror_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_mirror_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_mirror_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_mirror_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_mirror_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_mirror_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_mirror_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_repeat_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_repeat_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_repeat_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_repeat_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_repeat_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_repeat_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_repeat_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_repeat_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_linear_repeat_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_clamp_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_clamp_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_clamp_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_clamp_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_clamp_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_clamp_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_clamp_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_clamp_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_clamp_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_mirror_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_mirror_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_mirror_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_mirror_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_mirror_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_mirror_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_mirror_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_mirror_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_mirror_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_repeat_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_repeat_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_repeat_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_repeat_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_repeat_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_repeat_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_repeat_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_repeat_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_linear_repeat_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_clamp_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_clamp_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_clamp_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_clamp_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_clamp_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_clamp_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_clamp_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_clamp_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_clamp_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_mirror_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_mirror_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_mirror_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_mirror_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_mirror_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_mirror_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_mirror_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_mirror_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_mirror_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_repeat_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_repeat_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_repeat_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_repeat_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_repeat_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_repeat_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_repeat_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_repeat_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_linear_nearest_repeat_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_clamp_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_clamp_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_clamp_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_clamp_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_clamp_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_clamp_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_clamp_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_clamp_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_clamp_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_mirror_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_mirror_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_mirror_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_mirror_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_mirror_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_mirror_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_mirror_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_mirror_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_mirror_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_repeat_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_repeat_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_repeat_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_repeat_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_repeat_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_repeat_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_repeat_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_repeat_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_linear_repeat_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_clamp_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_clamp_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_clamp_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_clamp_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_clamp_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_clamp_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_clamp_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_clamp_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_clamp_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_mirror_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_mirror_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_mirror_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_mirror_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_mirror_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_mirror_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_mirror_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_mirror_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_mirror_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_repeat_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_repeat_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_repeat_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_repeat_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_repeat_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_repeat_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_repeat_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_repeat_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_mipmap_nearest_nearest_repeat_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_clamp_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_clamp_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_clamp_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_clamp_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_clamp_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_clamp_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_clamp_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_clamp_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_clamp_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_mirror_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_mirror_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_mirror_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_mirror_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_mirror_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_mirror_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_mirror_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_mirror_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_mirror_repeat_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_repeat_clamp_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_repeat_clamp_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_repeat_clamp_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_repeat_mirror_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_repeat_mirror_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_repeat_mirror_repeat,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_repeat_repeat_clamp,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_repeat_repeat_mirror,Fail
+KHR-GLES2.texture_3d.filtering.combinations.nearest_nearest_repeat_repeat_repeat,Fail
+
+# https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/3134
+KHR-GLES2.texture_3d.filtering.combinations.negative,Fail
+
+KHR-GLES2.texture_3d.filtering.formats.rgba8_linear,Fail
+KHR-GLES2.texture_3d.filtering.formats.rgba8_linear_mipmap_linear,Fail
+KHR-GLES2.texture_3d.filtering.formats.rgba8_linear_mipmap_nearest,Fail
+KHR-GLES2.texture_3d.filtering.formats.rgba8_nearest,Fail
+KHR-GLES2.texture_3d.filtering.formats.rgba8_nearest_mipmap_linear,Fail
+KHR-GLES2.texture_3d.filtering.formats.rgba8_nearest_mipmap_nearest,Fail
+KHR-GLES2.texture_3d.filtering.sizes.128x32x64_linear,Fail
+KHR-GLES2.texture_3d.filtering.sizes.128x32x64_linear_mipmap_linear,Fail
+KHR-GLES2.texture_3d.filtering.sizes.128x32x64_linear_mipmap_nearest,Fail
+KHR-GLES2.texture_3d.filtering.sizes.128x32x64_nearest,Fail
+KHR-GLES2.texture_3d.filtering.sizes.128x32x64_nearest_mipmap_linear,Fail
+KHR-GLES2.texture_3d.filtering.sizes.128x32x64_nearest_mipmap_nearest,Fail
+KHR-GLES2.texture_3d.filtering.sizes.32x64x16_linear,Fail
+KHR-GLES2.texture_3d.filtering.sizes.32x64x16_linear_mipmap_linear,Fail
+KHR-GLES2.texture_3d.filtering.sizes.32x64x16_linear_mipmap_nearest,Fail
+KHR-GLES2.texture_3d.filtering.sizes.32x64x16_nearest,Fail
+KHR-GLES2.texture_3d.filtering.sizes.32x64x16_nearest_mipmap_linear,Fail
+KHR-GLES2.texture_3d.filtering.sizes.32x64x16_nearest_mipmap_nearest,Fail
+KHR-GLES2.texture_3d.filtering.sizes.3x7x5_linear,Fail
+KHR-GLES2.texture_3d.filtering.sizes.3x7x5_linear_mipmap_nearest,Fail
+KHR-GLES2.texture_3d.filtering.sizes.3x7x5_nearest,Fail
+KHR-GLES2.texture_3d.filtering.sizes.3x7x5_nearest_mipmap_linear,Fail
+KHR-GLES2.texture_3d.filtering.sizes.3x7x5_nearest_mipmap_nearest,Fail
+KHR-GLES2.texture_3d.filtering.sizes.4x8x8_linear,Fail
+KHR-GLES2.texture_3d.filtering.sizes.4x8x8_linear_mipmap_nearest,Fail
+KHR-GLES2.texture_3d.filtering.sizes.4x8x8_nearest,Fail
+KHR-GLES2.texture_3d.filtering.sizes.4x8x8_nearest_mipmap_linear,Fail
+KHR-GLES2.texture_3d.filtering.sizes.4x8x8_nearest_mipmap_nearest,Fail
+KHR-GLES2.texture_3d.filtering.sizes.63x63x63_linear,Fail
+KHR-GLES2.texture_3d.filtering.sizes.63x63x63_linear_mipmap_linear,Fail
+KHR-GLES2.texture_3d.filtering.sizes.63x63x63_linear_mipmap_nearest,Fail
+KHR-GLES2.texture_3d.filtering.sizes.63x63x63_nearest,Fail
+KHR-GLES2.texture_3d.filtering.sizes.63x63x63_nearest_mipmap_linear,Fail
+KHR-GLES2.texture_3d.filtering.sizes.63x63x63_nearest_mipmap_nearest,Fail
+KHR-GLES2.texture_3d.framebuffer_texture.rgba,Fail
+KHR-GLES2.texture_3d.sub_image.rgba8,Fail
+dEQP-EGL.functional.color_clears.multi_context.gles2.rgb888_pbuffer,Crash
+dEQP-EGL.functional.color_clears.multi_context.gles2.rgb888_window,Crash
+dEQP-EGL.functional.color_clears.multi_context.gles2.rgba8888_pbuffer,Crash
+dEQP-EGL.functional.color_clears.multi_context.gles2.rgba8888_window,Crash
+dEQP-EGL.functional.color_clears.multi_thread.gles2.rgb888_pbuffer,Crash
+dEQP-EGL.functional.color_clears.multi_thread.gles2.rgb888_window,Crash
+dEQP-EGL.functional.color_clears.multi_thread.gles2.rgba8888_pbuffer,Crash
+dEQP-EGL.functional.color_clears.multi_thread.gles2.rgba8888_window,Crash
+dEQP-EGL.functional.color_clears.single_context.gles2.rgb888_pbuffer,Crash
+dEQP-EGL.functional.color_clears.single_context.gles2.rgb888_window,Crash
+dEQP-EGL.functional.color_clears.single_context.gles2.rgba8888_pbuffer,Crash
+dEQP-EGL.functional.color_clears.single_context.gles2.rgba8888_window,Crash
+dEQP-EGL.functional.create_context.no_config,Fail
+dEQP-EGL.functional.render.multi_context.gles2.rgb888_pbuffer,Crash
+dEQP-EGL.functional.render.multi_context.gles2.rgb888_window,Crash
+dEQP-EGL.functional.render.multi_context.gles2.rgba8888_pbuffer,Crash
+dEQP-EGL.functional.render.multi_context.gles2.rgba8888_window,Crash
+dEQP-EGL.functional.render.multi_thread.gles2.rgb888_pbuffer,Crash
+dEQP-EGL.functional.render.multi_thread.gles2.rgb888_window,Crash
+dEQP-EGL.functional.render.multi_thread.gles2.rgba8888_pbuffer,Crash
+dEQP-EGL.functional.render.multi_thread.gles2.rgba8888_window,Crash
+dEQP-EGL.functional.render.single_context.gles2.rgb888_pbuffer,Crash
+dEQP-EGL.functional.render.single_context.gles2.rgb888_window,Crash
+dEQP-EGL.functional.render.single_context.gles2.rgba8888_pbuffer,Crash
+dEQP-EGL.functional.render.single_context.gles2.rgba8888_window,Crash
+dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_center,Fail
+dEQP-GLES2.functional.clipping.line.wide_line_clip_viewport_corner,Fail
+dEQP-GLES2.functional.depth_stencil_clear.depth_stencil_masked,Fail
+dEQP-GLES2.functional.draw.draw_arrays.line_loop.multiple_attributes,Fail
+dEQP-GLES2.functional.draw.draw_arrays.line_loop.single_attribute,Fail
+dEQP-GLES2.functional.fbo.render.texsubimage.after_render_tex2d_rgba,Fail
+dEQP-GLES2.functional.fbo.render.texsubimage.between_render_tex2d_rgba,Fail
+dEQP-GLES2.functional.negative_api.vertex_array.vertex_attrib,Fail
+dEQP-GLES2.functional.negative_api.vertex_array.vertex_attribv,Fail
+dEQP-GLES2.functional.texture.filtering.2d.nearest_mipmap_linear_linear_mirror_rgba8888,Fail
+dEQP-GLES2.functional.texture.filtering.2d.nearest_mipmap_linear_linear_repeat_rgba8888,Fail
+dEQP-GLES2.functional.texture.filtering.2d.nearest_mipmap_nearest_linear_mirror_rgba8888,Fail
+dEQP-GLES2.functional.texture.filtering.2d.nearest_mipmap_nearest_linear_repeat_rgba8888,Fail
+dEQP-GLES2.functional.texture.mipmap.2d.basic.linear_linear_repeat_non_square,Fail
+dEQP-GLES2.functional.texture.mipmap.2d.basic.nearest_linear_clamp_non_square,Fail
+dEQP-GLES2.functional.texture.mipmap.2d.basic.nearest_linear_mirror_non_square,Fail
+dEQP-GLES2.functional.texture.mipmap.2d.basic.nearest_linear_repeat_non_square,Fail
+dEQP-GLES2.functional.texture.specification.basic_copytexsubimage2d.2d_rgba,Fail
+dEQP-GLES2.functional.texture.specification.basic_copytexsubimage2d.cube_rgba,Fail
+dEQP-GLES2.functional.texture.wrap.clamp_clamp_nearest_npot_etc1,Fail
+
+glx@glx-copy-sub-buffer samples=2,Crash
+glx@glx-copy-sub-buffer samples=4,Crash
+glx@glx-make-current,Crash
+glx@glx-multithread-buffer,Fail
+glx@glx-query-drawable-glx_fbconfig_id-window,Fail
+glx@glx-swap-pixmap-bad,Fail
+glx@glx-visuals-depth -pixmap,Crash
+glx@glx-visuals-depth,Crash
+glx@glx-visuals-stencil -pixmap,Crash
+glx@glx-visuals-stencil,Crash
+glx@glx_arb_create_context_es2_profile@invalid opengl es version,Fail
+glx@glx_arb_create_context_no_error@no error,Fail
+glx@glx_ext_import_context@free context,Fail
+glx@glx_ext_import_context@get context id,Fail
+glx@glx_ext_import_context@get current display,Fail
+glx@glx_ext_import_context@import context- multi process,Fail
+glx@glx_ext_import_context@import context- single process,Fail
+glx@glx_ext_import_context@imported context has same context id,Fail
+glx@glx_ext_import_context@make current- multi process,Fail
+glx@glx_ext_import_context@make current- single process,Fail
+glx@glx_ext_import_context@query context info,Fail
+shaders@glsl-arb-fragment-coord-conventions,Fail
+shaders@glsl-bug-110796,Fail
+shaders@glsl-max-vertex-attrib,Fail
+shaders@glsl-predication-on-large-array,Fail
+spec@!opengl 1.0@gl-1.0-bitmap-heart-dance,Fail
+spec@!opengl 1.0@gl-1.0-dlist-bitmap,Crash
+spec@!opengl 1.0@gl-1.0-drawbuffer-modes,Fail
+spec@!opengl 1.0@gl-1.0-edgeflag,Fail
+spec@!opengl 1.0@gl-1.0-edgeflag-const,Fail
+spec@!opengl 1.0@gl-1.0-edgeflag-quads,Fail
+spec@!opengl 1.0@gl-1.0-logicop,Crash
+spec@!opengl 1.0@gl-1.0-no-op-paths,Fail
+spec@!opengl 1.0@gl-1.0-scissor-offscreen,Fail
+spec@!opengl 1.0@gl-1.0-user-clip-all-planes,Fail
+spec@!opengl 1.1@clipflat,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glBegin/End(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawArrays(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_POLYGON)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUADS)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_QUAD_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLES)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_FAN)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CCW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_FILL)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: center top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: left top PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right bottom PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right middle PV: FIRST,Fail
+spec@!opengl 1.1@clipflat@glDrawElements(GL_TRIANGLE_STRIP)- glFrontFace(GL_CW)- glPolygonMode(GL_LINE)- quadrant: right top PV: FIRST,Fail
+spec@!opengl 1.1@depthstencil-default_fb-blit samples=2,Crash
+spec@!opengl 1.1@depthstencil-default_fb-blit samples=4,Crash
+spec@!opengl 1.1@depthstencil-default_fb-clear samples=2,Crash
+spec@!opengl 1.1@depthstencil-default_fb-clear samples=4,Crash
+spec@!opengl 1.1@depthstencil-default_fb-clear,Fail
+spec@!opengl 1.1@depthstencil-default_fb-copypixels samples=2,Crash
+spec@!opengl 1.1@depthstencil-default_fb-copypixels samples=4,Crash
+spec@!opengl 1.1@depthstencil-default_fb-drawpixels-24_8 samples=2,Crash
+spec@!opengl 1.1@depthstencil-default_fb-drawpixels-24_8 samples=4,Crash
+spec@!opengl 1.1@depthstencil-default_fb-drawpixels-float-and-ushort samples=2,Crash
+spec@!opengl 1.1@depthstencil-default_fb-drawpixels-float-and-ushort samples=4,Crash
+spec@!opengl 1.1@depthstencil-default_fb-readpixels-24_8 samples=2,Crash
+spec@!opengl 1.1@depthstencil-default_fb-readpixels-24_8 samples=4,Crash
+spec@!opengl 1.1@depthstencil-default_fb-readpixels-float-and-ushort samples=2,Crash
+spec@!opengl 1.1@depthstencil-default_fb-readpixels-float-and-ushort samples=4,Crash
+spec@!opengl 1.1@draw-pixels,Fail
+spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 varray gl_line_loop,Fail
+spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 varray gl_polygon,Crash
+spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 varray gl_quad_strip,Crash
+spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 varray gl_quads,Crash
+spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 varray gl_triangle_fan,Fail
+spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 vbo gl_line_loop,Fail
+spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 vbo gl_polygon,Crash
+spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 vbo gl_quad_strip,Crash
+spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 vbo gl_quads,Crash
+spec@!opengl 1.1@gl-1.1-drawarrays-vertex-count 100000 vbo gl_triangle_fan,Fail
+spec@!opengl 1.1@line-flat-clip-color,Fail
+spec@!opengl 1.1@linestipple,Fail
+spec@!opengl 1.1@linestipple@Baseline,Fail
+spec@!opengl 1.1@linestipple@Factor 2x,Fail
+spec@!opengl 1.1@linestipple@Factor 3x,Fail
+spec@!opengl 1.1@linestipple@Line loop,Fail
+spec@!opengl 1.1@linestipple@Line strip,Fail
+spec@!opengl 1.1@linestipple@Restarting lines within a single Begin-End block,Fail
+spec@!opengl 1.1@polygon-mode,Fail
+spec@!opengl 1.1@polygon-mode-offset,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on bottom edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on left edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 0: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 1: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 1: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 1: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 2: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on bottom edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on left edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 3: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on bottom edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on left edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 4: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 5: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 5: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 5: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected blue pixel in center,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on right edge,Fail
+spec@!opengl 1.1@polygon-mode-offset@config 6: Expected white pixel on top edge,Fail
+spec@!opengl 1.1@read-front clear-front-first samples=2,Crash
+spec@!opengl 1.1@read-front clear-front-first samples=4,Crash
+spec@!opengl 1.1@read-front samples=2,Crash
+spec@!opengl 1.1@read-front samples=4,Crash
+spec@!opengl 1.1@tex-upside-down-miptree,Fail
+spec@!opengl 1.1@texsubimage-unpack,Fail
+spec@!opengl 1.1@texwrap 2d proj,Fail
+spec@!opengl 1.1@texwrap 2d proj@GL_RGBA8- NPOT- projected,Fail
+spec@!opengl 1.1@texwrap 2d proj@GL_RGBA8- projected,Fail
+spec@!opengl 1.1@texwrap 2d proj@GL_RGBA8- swizzled- projected,Fail
+spec@!opengl 1.1@texwrap 2d,Fail
+spec@!opengl 1.1@texwrap 2d@GL_RGBA8,Fail
+spec@!opengl 1.1@texwrap 2d@GL_RGBA8- NPOT,Fail
+spec@!opengl 1.1@texwrap 2d@GL_RGBA8- swizzled,Fail
+spec@!opengl 1.1@texwrap formats,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB10,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB10- NPOT,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB10- swizzled,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB10_A2,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB10_A2- NPOT,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB10_A2- swizzled,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB12,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB12- NPOT,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB12- swizzled,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB16,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB16- NPOT,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB16- swizzled,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB5,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB5- NPOT,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB5- swizzled,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB5_A1,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB5_A1- NPOT,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB5_A1- swizzled,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB8,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB8- NPOT,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGB8- swizzled,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGBA12,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGBA12- NPOT,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGBA12- swizzled,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGBA16,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGBA16- NPOT,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGBA16- swizzled,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGBA8,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGBA8- NPOT,Fail
+spec@!opengl 1.1@texwrap formats@GL_RGBA8- swizzled,Fail
+spec@!opengl 1.1@windowoverlap,Fail
+spec@!opengl 1.2@copyteximage 3d,Fail
+spec@!opengl 1.2@getteximage-targets 3d,Fail
+spec@!opengl 1.2@lodclamp,Fail
+spec@!opengl 1.2@lodclamp-between,Fail
+spec@!opengl 1.2@lodclamp-between-max,Fail
+spec@!opengl 1.2@mipmap-setup,Fail
+spec@!opengl 1.2@tex3d,Fail
+spec@!opengl 1.2@tex3d-maxsize,Fail
+spec@!opengl 1.2@texwrap 3d proj,Fail
+spec@!opengl 1.2@texwrap 3d proj@GL_RGBA8- NPOT- projected,Fail
+spec@!opengl 1.2@texwrap 3d proj@GL_RGBA8- projected,Fail
+spec@!opengl 1.2@texwrap 3d proj@GL_RGBA8- swizzled- projected,Fail
+spec@!opengl 1.2@texwrap 3d,Fail
+spec@!opengl 1.2@texwrap 3d@GL_RGBA8,Fail
+spec@!opengl 1.2@texwrap 3d@GL_RGBA8- NPOT,Fail
+spec@!opengl 1.2@texwrap 3d@GL_RGBA8- swizzled,Fail
+spec@!opengl 1.3@tex3d-depth1,Fail
+spec@!opengl 1.4@gl-1.4-polygon-offset,Fail
+spec@!opengl 1.4@tex-miplevel-selection,Fail
+spec@!opengl 1.4@tex-miplevel-selection-lod,Fail
+spec@!opengl 1.4@tex-miplevel-selection-lod-bias,Fail
+spec@!opengl 1.5@depth-tex-compare,Fail
+spec@!opengl 2.0@attrib-assignments,Fail
+spec@!opengl 2.0@gl-2.0-edgeflag,Fail
+spec@!opengl 2.0@gl-2.0-edgeflag-immediate,Fail
+spec@!opengl 2.0@occlusion-query-discard,Fail
+spec@!opengl 2.0@tex3d-npot,Fail
+spec@!opengl 2.1@minmax,Fail
+spec@!opengl 2.1@pbo,Fail
+spec@!opengl 2.1@pbo@test_polygon_stip,Fail
+spec@!opengl 2.1@polygon-stipple-fs,Fail
+spec@arb_arrays_of_arrays@execution@glsl-arrays-copy-size-mismatch,Fail
+spec@arb_depth_texture@depth-level-clamp,Fail
+spec@arb_depth_texture@texwrap formats,Fail
+spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT16,Fail
+spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT16- NPOT,Fail
+spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT16- swizzled,Fail
+spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT24,Fail
+spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT24- NPOT,Fail
+spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT24- swizzled,Fail
+spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT32,Fail
+spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT32- NPOT,Fail
+spec@arb_depth_texture@texwrap formats@GL_DEPTH_COMPONENT32- swizzled,Fail
+spec@arb_draw_elements_base_vertex@arb_draw_elements_base_vertex-negative-index,Crash
+spec@arb_draw_elements_base_vertex@arb_draw_elements_base_vertex-negative-index-user_varrays,Crash
+spec@arb_es2_compatibility@texwrap formats,Fail
+spec@arb_es2_compatibility@texwrap formats@GL_RGB565,Fail
+spec@arb_es2_compatibility@texwrap formats@GL_RGB565- NPOT,Fail
+spec@arb_es2_compatibility@texwrap formats@GL_RGB565- swizzled,Fail
+spec@arb_fragment_coord_conventions@fp-arb-fragment-coord-conventions-integer,Fail
+spec@arb_fragment_coord_conventions@fp-arb-fragment-coord-conventions-none,Fail
+spec@arb_fragment_program@fp-indirections2,Fail
+spec@arb_fragment_program@minmax,Fail
+spec@arb_framebuffer_object@arb_framebuffer_object-depth-stencil-blit stencil gl_depth24_stencil8,Fail
+spec@arb_framebuffer_object@arb_framebuffer_object-depth-stencil-blit stencil gl_stencil_index1,Fail
+spec@arb_framebuffer_object@arb_framebuffer_object-depth-stencil-blit stencil gl_stencil_index16,Fail
+spec@arb_framebuffer_object@arb_framebuffer_object-depth-stencil-blit stencil gl_stencil_index4,Fail
+spec@arb_framebuffer_object@arb_framebuffer_object-depth-stencil-blit stencil gl_stencil_index8,Fail
+spec@arb_framebuffer_object@fbo-attachments-blit-scaled-linear,Fail
+spec@arb_framebuffer_object@fbo-blit-stretch,Fail
+spec@arb_framebuffer_object@fbo-generatemipmap-3d,Fail
+spec@arb_framebuffer_object@fbo-mipmap-copypix,Fail
+spec@arb_framebuffer_object@framebuffer-blit-levels draw stencil,Fail
+spec@arb_framebuffer_object@framebuffer-blit-levels read stencil,Fail
+spec@arb_framebuffer_object@mixed-buffer-sizes,Fail
+spec@arb_framebuffer_object@same-attachment-glframebuffertexture2d-gl_depth_stencil_attachment,Fail
+spec@arb_framebuffer_srgb@arb_framebuffer_srgb-srgb_conformance,Fail
+spec@arb_framebuffer_srgb@blit renderbuffer linear_to_srgb downsample disabled clear,Crash
+spec@arb_framebuffer_srgb@blit renderbuffer linear_to_srgb downsample disabled render,Crash
+spec@arb_framebuffer_srgb@blit renderbuffer linear_to_srgb downsample enabled clear,Crash
+spec@arb_framebuffer_srgb@blit renderbuffer linear_to_srgb downsample enabled render,Crash
+spec@arb_framebuffer_srgb@blit renderbuffer linear_to_srgb msaa disabled clear,Crash
+spec@arb_framebuffer_srgb@blit renderbuffer linear_to_srgb msaa disabled render,Crash
+spec@arb_framebuffer_srgb@blit renderbuffer linear_to_srgb msaa enabled clear,Crash
+spec@arb_framebuffer_srgb@blit renderbuffer linear_to_srgb msaa enabled render,Crash
+spec@arb_framebuffer_srgb@blit renderbuffer srgb_to_linear downsample disabled clear,Crash
+spec@arb_framebuffer_srgb@blit renderbuffer srgb_to_linear downsample disabled render,Crash
+spec@arb_framebuffer_srgb@blit renderbuffer srgb_to_linear downsample enabled clear,Crash
+spec@arb_framebuffer_srgb@blit renderbuffer srgb_to_linear downsample enabled render,Crash
+spec@arb_framebuffer_srgb@blit renderbuffer srgb_to_linear msaa disabled clear,Crash
+spec@arb_framebuffer_srgb@blit renderbuffer srgb_to_linear msaa disabled render,Crash
+spec@arb_framebuffer_srgb@blit renderbuffer srgb_to_linear msaa enabled clear,Crash
+spec@arb_framebuffer_srgb@blit renderbuffer srgb_to_linear msaa enabled render,Crash
+spec@arb_framebuffer_srgb@blit texture linear_to_srgb downsample disabled clear,Crash
+spec@arb_framebuffer_srgb@blit texture linear_to_srgb downsample disabled render,Crash
+spec@arb_framebuffer_srgb@blit texture linear_to_srgb downsample enabled clear,Crash
+spec@arb_framebuffer_srgb@blit texture linear_to_srgb downsample enabled render,Crash
+spec@arb_framebuffer_srgb@blit texture linear_to_srgb msaa disabled clear,Crash
+spec@arb_framebuffer_srgb@blit texture linear_to_srgb msaa disabled render,Crash
+spec@arb_framebuffer_srgb@blit texture linear_to_srgb msaa enabled clear,Crash
+spec@arb_framebuffer_srgb@blit texture linear_to_srgb msaa enabled render,Crash
+spec@arb_framebuffer_srgb@blit texture srgb_to_linear downsample disabled clear,Crash
+spec@arb_framebuffer_srgb@blit texture srgb_to_linear downsample disabled render,Crash
+spec@arb_framebuffer_srgb@blit texture srgb_to_linear downsample enabled clear,Crash
+spec@arb_framebuffer_srgb@blit texture srgb_to_linear downsample enabled render,Crash
+spec@arb_framebuffer_srgb@blit texture srgb_to_linear msaa disabled clear,Crash
+spec@arb_framebuffer_srgb@blit texture srgb_to_linear msaa disabled render,Crash
+spec@arb_framebuffer_srgb@blit texture srgb_to_linear msaa enabled clear,Crash
+spec@arb_framebuffer_srgb@blit texture srgb_to_linear msaa enabled render,Crash
+spec@arb_internalformat_query2@all internalformat_<x>_size pname checks,Fail
+spec@arb_internalformat_query2@all internalformat_<x>_size pname checks@GL_INTERNALFORMAT_ALPHA_SIZE,Fail
+spec@arb_internalformat_query2@all internalformat_<x>_size pname checks@GL_INTERNALFORMAT_BLUE_SIZE,Fail
+spec@arb_internalformat_query2@all internalformat_<x>_size pname checks@GL_INTERNALFORMAT_DEPTH_SIZE,Fail
+spec@arb_internalformat_query2@all internalformat_<x>_size pname checks@GL_INTERNALFORMAT_GREEN_SIZE,Fail
+spec@arb_internalformat_query2@all internalformat_<x>_size pname checks@GL_INTERNALFORMAT_RED_SIZE,Fail
+spec@arb_internalformat_query2@api error checks,Fail
+spec@arb_internalformat_query2@max dimensions related pname checks,Fail
+spec@arb_internalformat_query2@max dimensions related pname checks@GL_MAX_COMBINED_DIMENSIONS,Fail
+spec@arb_internalformat_query2@max dimensions related pname checks@GL_MAX_DEPTH,Fail
+spec@arb_internalformat_query2@max dimensions related pname checks@GL_MAX_HEIGHT,Fail
+spec@arb_internalformat_query2@max dimensions related pname checks@GL_MAX_WIDTH,Fail
+spec@arb_occlusion_query2@render,Fail
+spec@arb_occlusion_query@occlusion_query,Fail
+spec@arb_occlusion_query@occlusion_query_conform,Fail
+spec@arb_occlusion_query@occlusion_query_meta_fragments,Fail
+spec@arb_occlusion_query@occlusion_query_meta_save,Fail
+spec@arb_pixel_buffer_object@fbo-pbo-readpixels-small,Fail
+spec@arb_pixel_buffer_object@pbo-getteximage,Fail
+spec@arb_pixel_buffer_object@texsubimage-unpack pbo,Fail
+spec@arb_point_sprite@arb_point_sprite-mipmap,Fail
+spec@arb_provoking_vertex@arb-provoking-vertex-render,Fail
+spec@arb_sampler_objects@sampler-objects,Fail
+spec@arb_shader_texture_lod@execution@glsl-fs-texturelod-01,Fail
+spec@arb_texture_multisample@arb_texture_multisample-teximage-3d-multisample,Fail
+spec@arb_texture_rectangle@1-1-linear-texture,Fail
+spec@arb_texture_rectangle@copyteximage rect samples=2,Crash
+spec@arb_texture_rectangle@copyteximage rect samples=4,Crash
+spec@arb_texture_rectangle@texrect-many,Crash
+spec@arb_texture_storage@texture-storage,Fail
+spec@arb_texture_storage@texture-storage@3D mipmapped ,Fail
+spec@arb_texture_storage@texture-storage@3D non-mipmapped ,Fail
+spec@arb_vertex_program@minmax,Fail
+spec@egl 1.4@egl-copy-buffers,Crash
+spec@egl 1.4@eglterminate then unbind context,Fail
+spec@egl 1.4@largest possible eglcreatepbuffersurface and then glclear,Fail
+spec@egl_ext_protected_content@conformance,Fail
+spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_depth_component24,Fail
+spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_rgba,Fail
+spec@egl_khr_surfaceless_context@viewport,Fail
+spec@egl_mesa_configless_context@basic,Fail
+spec@ext_direct_state_access@multi-texture,Crash
+spec@ext_direct_state_access@multi-texture@MultiTexImage3DEXT,Fail
+spec@ext_direct_state_access@multi-texture@MultiTexSubImage1DEXT,Fail
+spec@ext_direct_state_access@textures,Fail
+spec@ext_direct_state_access@textures@CopyTextureSubImage3DEXT + display list GL_COMPILE,Fail
+spec@ext_direct_state_access@textures@CopyTextureSubImage3DEXT + display list GL_COMPILE_AND_EXECUTE,Fail
+spec@ext_direct_state_access@textures@CopyTextureSubImage3DEXT,Fail
+spec@ext_direct_state_access@textures@GL_PROXY_TEXTURE_1D + glTex* + display list GL_COMPILE,Fail
+spec@ext_direct_state_access@textures@GL_PROXY_TEXTURE_1D + glTex* + display list GL_COMPILE_AND_EXECUTE,Fail
+spec@ext_direct_state_access@textures@GL_PROXY_TEXTURE_1D + glTex*,Fail
+spec@ext_direct_state_access@textures@GL_PROXY_TEXTURE_3D + glTex* + display list GL_COMPILE,Fail
+spec@ext_direct_state_access@textures@GL_PROXY_TEXTURE_3D + glTex* + display list GL_COMPILE_AND_EXECUTE,Fail
+spec@ext_direct_state_access@textures@GL_PROXY_TEXTURE_3D + glTex*,Fail
+spec@ext_direct_state_access@textures@TextureImage3DEXT + display list GL_COMPILE,Fail
+spec@ext_direct_state_access@textures@TextureImage3DEXT + display list GL_COMPILE_AND_EXECUTE,Fail
+spec@ext_direct_state_access@textures@TextureImage3DEXT,Fail
+spec@ext_direct_state_access@textures@TextureSubImage2DEXT + display list GL_COMPILE,Fail
+spec@ext_direct_state_access@textures@TextureSubImage2DEXT + display list GL_COMPILE_AND_EXECUTE,Fail
+spec@ext_direct_state_access@textures@TextureSubImage2DEXT,Fail
+spec@ext_direct_state_access@textures@TextureSubImage3DEXT + display list GL_COMPILE,Fail
+spec@ext_direct_state_access@textures@TextureSubImage3DEXT + display list GL_COMPILE_AND_EXECUTE,Fail
+spec@ext_direct_state_access@textures@TextureSubImage3DEXT,Fail
+spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
+spec@ext_framebuffer_multisample@blit-flipped 2 x,Crash
+spec@ext_framebuffer_multisample@blit-flipped 2 y,Crash
+spec@ext_framebuffer_multisample@blit-flipped 4 x,Crash
+spec@ext_framebuffer_multisample@blit-flipped 4 y,Crash
+spec@ext_framebuffer_multisample@blit-mismatched-formats,Fail
+spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 downsample,Crash
+spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 msaa,Crash
+spec@ext_framebuffer_multisample@clip-and-scissor-blit 2 upsample,Crash
+spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 downsample,Crash
+spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 msaa,Crash
+spec@ext_framebuffer_multisample@clip-and-scissor-blit 4 upsample,Crash
+spec@ext_framebuffer_multisample@enable-flag,Crash
+spec@ext_framebuffer_multisample@interpolation 2 centroid-edges,Fail
+spec@ext_framebuffer_multisample@interpolation 4 centroid-edges,Fail
+spec@ext_framebuffer_multisample@line-smooth 2,Crash
+spec@ext_framebuffer_multisample@line-smooth 4,Crash
+spec@ext_framebuffer_multisample@multisample-blit 2 color linear,Crash
+spec@ext_framebuffer_multisample@multisample-blit 2 color,Crash
+spec@ext_framebuffer_multisample@multisample-blit 2 depth,Crash
+spec@ext_framebuffer_multisample@multisample-blit 2 stencil,Crash
+spec@ext_framebuffer_multisample@multisample-blit 4 color linear,Crash
+spec@ext_framebuffer_multisample@multisample-blit 4 color,Crash
+spec@ext_framebuffer_multisample@multisample-blit 4 depth,Crash
+spec@ext_framebuffer_multisample@multisample-blit 4 stencil,Crash
+spec@ext_framebuffer_multisample@no-color 2 depth combined,Crash
+spec@ext_framebuffer_multisample@no-color 2 depth single,Crash
+spec@ext_framebuffer_multisample@no-color 2 depth-computed combined,Crash
+spec@ext_framebuffer_multisample@no-color 2 depth-computed single,Crash
+spec@ext_framebuffer_multisample@no-color 2 stencil combined,Crash
+spec@ext_framebuffer_multisample@no-color 2 stencil single,Crash
+spec@ext_framebuffer_multisample@no-color 4 depth combined,Crash
+spec@ext_framebuffer_multisample@no-color 4 depth single,Crash
+spec@ext_framebuffer_multisample@no-color 4 depth-computed combined,Crash
+spec@ext_framebuffer_multisample@no-color 4 depth-computed single,Crash
+spec@ext_framebuffer_multisample@no-color 4 stencil combined,Crash
+spec@ext_framebuffer_multisample@no-color 4 stencil single,Crash
+spec@ext_framebuffer_multisample@point-smooth 2,Crash
+spec@ext_framebuffer_multisample@point-smooth 4,Crash
+spec@ext_framebuffer_multisample@polygon-smooth 2,Crash
+spec@ext_framebuffer_multisample@polygon-smooth 4,Crash
+spec@ext_framebuffer_multisample@sample-alpha-to-coverage 2 color,Fail
+spec@ext_framebuffer_multisample@sample-alpha-to-coverage 2 depth,Crash
+spec@ext_framebuffer_multisample@sample-alpha-to-coverage 4 color,Fail
+spec@ext_framebuffer_multisample@sample-alpha-to-coverage 4 depth,Crash
+spec@ext_framebuffer_multisample@sample-coverage 2 inverted,Crash
+spec@ext_framebuffer_multisample@sample-coverage 2 non-inverted,Crash
+spec@ext_framebuffer_multisample@sample-coverage 4 inverted,Crash
+spec@ext_framebuffer_multisample@sample-coverage 4 non-inverted,Crash
+spec@ext_framebuffer_multisample@unaligned-blit 2 color downsample,Crash
+spec@ext_framebuffer_multisample@unaligned-blit 2 color msaa,Crash
+spec@ext_framebuffer_multisample@unaligned-blit 2 color upsample,Crash
+spec@ext_framebuffer_multisample@unaligned-blit 2 depth downsample,Crash
+spec@ext_framebuffer_multisample@unaligned-blit 2 depth msaa,Crash
+spec@ext_framebuffer_multisample@unaligned-blit 2 depth upsample,Crash
+spec@ext_framebuffer_multisample@unaligned-blit 4 color downsample,Crash
+spec@ext_framebuffer_multisample@unaligned-blit 4 color msaa,Crash
+spec@ext_framebuffer_multisample@unaligned-blit 4 color upsample,Crash
+spec@ext_framebuffer_multisample@unaligned-blit 4 depth downsample,Crash
+spec@ext_framebuffer_multisample@unaligned-blit 4 depth msaa,Crash
+spec@ext_framebuffer_multisample@unaligned-blit 4 depth upsample,Crash
+spec@ext_framebuffer_multisample@upsample 2 color linear,Crash
+spec@ext_framebuffer_multisample@upsample 2 color,Crash
+spec@ext_framebuffer_multisample@upsample 2 depth,Crash
+spec@ext_framebuffer_multisample@upsample 2 stencil,Crash
+spec@ext_framebuffer_multisample@upsample 4 color linear,Crash
+spec@ext_framebuffer_multisample@upsample 4 color,Crash
+spec@ext_framebuffer_multisample@upsample 4 depth,Crash
+spec@ext_framebuffer_multisample@upsample 4 stencil,Crash
+spec@ext_framebuffer_multisample_blit_scaled@negative-blit-scaled,Crash
+spec@ext_framebuffer_object@fbo-3d,Fail
+spec@ext_framebuffer_object@fbo-blending-format-quirks,Fail
+spec@ext_framebuffer_object@fbo-depth-sample-compare,Fail
+spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index1-blit,Fail
+spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index16-blit,Fail
+spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index4-blit,Fail
+spec@ext_framebuffer_object@fbo-stencil-gl_stencil_index8-blit,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-export,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y212,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y410,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y412,Fail
+spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y416,Fail
+spec@ext_occlusion_query_boolean@any-samples,Fail
+spec@ext_packed_depth_stencil@depth_stencil texture,Fail
+spec@ext_packed_depth_stencil@fbo-depthstencil-gl_depth24_stencil8-clear,Fail
+spec@ext_packed_depth_stencil@fbo-stencil-gl_depth24_stencil8-blit,Fail
+spec@ext_packed_depth_stencil@texwrap formats,Fail
+spec@ext_packed_depth_stencil@texwrap formats@GL_DEPTH24_STENCIL8,Fail
+spec@ext_packed_depth_stencil@texwrap formats@GL_DEPTH24_STENCIL8- NPOT,Fail
+spec@ext_packed_depth_stencil@texwrap formats@GL_DEPTH24_STENCIL8- swizzled,Fail
+spec@ext_provoking_vertex@provoking-vertex,Fail
+spec@ext_texture_format_bgra8888@api-errors,Fail
+spec@ext_texture_srgb@texwrap formats bordercolor,Fail
+spec@ext_texture_srgb@texwrap formats bordercolor-swizzled,Fail
+spec@ext_texture_srgb@texwrap formats bordercolor-swizzled@GL_SLUMINANCE8- swizzled- border color only,Fail
+spec@ext_texture_srgb@texwrap formats bordercolor-swizzled@GL_SLUMINANCE8_ALPHA8- swizzled- border color only,Fail
+spec@ext_texture_srgb@texwrap formats bordercolor-swizzled@GL_SRGB8- swizzled- border color only,Fail
+spec@ext_texture_srgb@texwrap formats bordercolor-swizzled@GL_SRGB8_ALPHA8- swizzled- border color only,Fail
+spec@ext_texture_srgb@texwrap formats bordercolor@GL_SLUMINANCE8- border color only,Fail
+spec@ext_texture_srgb@texwrap formats bordercolor@GL_SLUMINANCE8_ALPHA8- border color only,Fail
+spec@ext_texture_srgb@texwrap formats bordercolor@GL_SRGB8- border color only,Fail
+spec@ext_texture_srgb@texwrap formats bordercolor@GL_SRGB8_ALPHA8- border color only,Fail
+spec@ext_texture_srgb@texwrap formats,Fail
+spec@ext_texture_srgb@texwrap formats@GL_SLUMINANCE8,Fail
+spec@ext_texture_srgb@texwrap formats@GL_SLUMINANCE8- NPOT,Fail
+spec@ext_texture_srgb@texwrap formats@GL_SLUMINANCE8- swizzled,Fail
+spec@ext_texture_srgb@texwrap formats@GL_SLUMINANCE8_ALPHA8,Fail
+spec@ext_texture_srgb@texwrap formats@GL_SLUMINANCE8_ALPHA8- NPOT,Fail
+spec@ext_texture_srgb@texwrap formats@GL_SLUMINANCE8_ALPHA8- swizzled,Fail
+spec@ext_texture_srgb@texwrap formats@GL_SRGB8,Fail
+spec@ext_texture_srgb@texwrap formats@GL_SRGB8- NPOT,Fail
+spec@ext_texture_srgb@texwrap formats@GL_SRGB8- swizzled,Fail
+spec@ext_texture_srgb@texwrap formats@GL_SRGB8_ALPHA8,Fail
+spec@ext_texture_srgb@texwrap formats@GL_SRGB8_ALPHA8- NPOT,Fail
+spec@ext_texture_srgb@texwrap formats@GL_SRGB8_ALPHA8- swizzled,Fail
+spec@glsl-1.10@built-in constants,Fail
+spec@glsl-1.10@built-in constants@gl_MaxVertexAttribs,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-cos-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-cos-vec2,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-cos-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-cos-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-exp-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-exp-vec2,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-exp-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-exp-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-exp2-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-exp2-vec2,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-exp2-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-exp2-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-log-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-log-vec2,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-log-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-log-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-log2-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-log2-vec2,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-log2-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-log2-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-pow-float-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-pow-vec2-vec2,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-pow-vec3-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-pow-vec4-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-sin-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-sin-vec2,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-sin-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-sin-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-tan-float,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-tan-vec2,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-tan-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@fs-tan-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-cos-float,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-cos-vec2,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-cos-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-cos-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-exp-float,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-exp-vec2,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-exp-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-exp-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-exp2-float,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-exp2-vec2,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-exp2-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-exp2-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-log-float,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-log-vec2,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-log-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-log-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-log2-float,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-log2-vec2,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-log2-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-log2-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-pow-float-float,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-pow-vec2-vec2,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-pow-vec3-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-pow-vec4-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-sin-float,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-sin-vec2,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-sin-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-sin-vec4,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-tan-float,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-tan-vec2,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-tan-vec3,Fail
+spec@glsl-1.10@execution@built-in-functions@vs-tan-vec4,Fail
+spec@glsl-1.10@execution@fs-texture-select,Fail
+spec@glsl-1.10@execution@glsl-fs-convolution-2,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-sampler-numbering-2,Fail
+spec@glsl-1.10@execution@samplers@glsl-fs-sampler-numbering-3,Fail
+spec@glsl-1.10@execution@samplers@in-parameter-array,Fail
+spec@glsl-1.10@execution@texture3d,Fail
+spec@glsl-1.20@built-in constants,Fail
+spec@glsl-1.20@built-in constants@gl_MaxVertexAttribs,Fail
+spec@glsl-1.20@execution@fs-nan-builtin-max,Fail
+spec@glsl-1.20@execution@fs-nan-builtin-min,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture() 1d,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture() 1dshadow,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture() 2d,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture() 2dshadow,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture() 3d,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture() cube,Crash
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture(bias) 1d,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture(bias) 1dshadow,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture(bias) 2d,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture(bias) 2dshadow,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture(bias) 3d,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:texture(bias) cube,Crash
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj 1d,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj 1d_projvec4,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj 1dshadow,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj 2d,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj 2d_projvec4,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj 2dshadow,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj 3d,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj(bias) 1d,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj(bias) 1d_projvec4,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj(bias) 1dshadow,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj(bias) 2d,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj(bias) 2d_projvec4,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj(bias) 2dshadow,Fail
+spec@glsl-1.20@execution@tex-miplevel-selection gl2:textureproj(bias) 3d,Fail
+spec@glsl-1.20@execution@variable-indexing@fs-temp-array-mat4-index-col-row-wr,Fail
+spec@glsl-1.20@execution@variable-indexing@vs-temp-array-mat4-index-col-row-wr,Fail
+spec@glsl-1.20@execution@vs-nan-builtin-max,Fail
+spec@glsl-1.20@execution@vs-nan-builtin-min,Fail
+spec@intel_performance_query@intel_performance_query-issue_2235,Fail
+spec@khr_texture_compression_astc@basic-gles,Fail
+spec@khr_texture_compression_astc@miptree-gl ldr,Fail
+spec@khr_texture_compression_astc@miptree-gl ldr@LDR Profile,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-fp,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-fp@sRGB decode full precision,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-sd,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb-sd@sRGB skip decode,Fail
+spec@khr_texture_compression_astc@miptree-gl srgb@sRGB decode,Fail
+spec@khr_texture_compression_astc@miptree-gles ldr,Fail
+spec@khr_texture_compression_astc@miptree-gles ldr@LDR Profile,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb,Fail
+spec@khr_texture_compression_astc@miptree-gles srgb-fp,Fail
+spec@oes_compressed_etc1_rgb8_texture@miptree,Fail
diff --git a/lib/mesa/src/broadcom/ci/vc4-rpi3-flakes.txt b/lib/mesa/src/broadcom/ci/vc4-rpi3-flakes.txt
new file mode 100644
index 000000000..895a2f767
--- /dev/null
+++ b/lib/mesa/src/broadcom/ci/vc4-rpi3-flakes.txt
@@ -0,0 +1,39 @@
+dEQP-GLES2.functional.clipping.triangle_vertex.clip_three.clip_neg_x_and_pos_x_and_neg_x_neg_y_neg_z
+dEQP-GLES2.functional.clipping.triangle_vertex.clip_three.clip_neg_x_and_pos_x_and_pos_y_pos_z
+dEQP-GLES2.functional.clipping.triangle_vertex.clip_three.clip_neg_x_and_pos_x_neg_y_pos_z_and_neg_x_pos_y_neg_z
+dEQP-GLES2.functional.clipping.triangle_vertex.clip_three.clip_pos_x_and_neg_x_neg_y_pos_z_and_neg_x_pos_y_neg_z
+dEQP-GLES2.functional.draw.random.51
+dEQP-GLES2.functional.fragment_ops.blend.rgb_func_alpha_func.src.one_minus_src_alpha_constant_color
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_direct_write_dynamic_loop_subscript_read_vertex
+dEQP-GLES2.functional.shaders.loops.do_while_dynamic_iterations.basic_mediump_int_vertex
+dEQP-GLES2.functional.shaders.loops.do_while_dynamic_iterations.conditional_continue_vertex
+dEQP-GLES2.functional.shaders.loops.do_while_dynamic_iterations.function_call_inout_vertex
+dEQP-GLES2.functional.shaders.loops.do_while_dynamic_iterations.function_call_return_vertex
+dEQP-GLES2.functional.shaders.loops.do_while_dynamic_iterations.nested_sequence_vertex
+dEQP-GLES2.functional.shaders.loops.while_constant_iterations.select_iteration_count_vertex
+dEQP-GLES2.functional.shaders.loops.while_dynamic_iterations.function_call_return_vertex
+dEQP-GLES2.functional.shaders.loops.while_dynamic_iterations.infinite_with_conditional_break_vertex
+dEQP-GLES2.functional.shaders.loops.while_dynamic_iterations.post_increment_vertex
+dEQP-GLES2.functional.shaders.loops.while_dynamic_iterations.single_iteration_vertex
+dEQP-GLES2.functional.shaders.operator.unary_operator.pre_decrement_result.mediump_vec3_fragment
+dEQP-GLES2.functional.shaders.random.exponential.fragment.51
+dEQP-GLES2.functional.shaders.random.texture.fragment.129
+dEQP-GLES2.functional.shaders.return.output_write_in_func_never_vertex
+dEQP-GLES2.functional.texture.filtering.2d.linear_linear_clamp_rgb888_pot
+dEQP-GLES2.functional.texture.filtering.cube.linear_mipmap_linear_nearest_mirror_rgba8888
+dEQP-GLES2.functional.texture.filtering.cube.nearest_linear_mirror_rgba8888_pot
+dEQP-GLES2.functional.texture.filtering.cube.nearest_mipmap_linear_linear_clamp_rgba8888
+dEQP-GLES2.functional.texture.filtering.cube.nearest_mipmap_linear_nearest_repeat_l8
+dEQP-GLES2.functional.texture.filtering.cube.nearest_mipmap_nearest_linear_clamp_rgba8888
+dEQP-GLES2.functional.texture.filtering.cube.nearest_mipmap_nearest_linear_mirror_rgba8888
+dEQP-GLES2.functional.texture.mipmap.cube.generate.rgb565_fastest
+dEQP-GLES2.functional.texture.size.cube.256x256_rgb888
+
+glx@glx-multi-window-single-context
+shaders@glsl-vs-loop
+shaders@glsl-vs-loop-nested
+spec@arb_framebuffer_srgb@blit renderbuffer srgb single_sampled enabled clear
+spec@egl_chromium_sync_control@conformance
+spec@ext_packed_depth_stencil@fbo-stencil-gl_depth24_stencil8-readpixels
+spec@!opengl 1.1@depthstencil-default_fb-drawpixels-float-and-ushort samples=2
+spec@!opengl 1.1@depthstencil-default_fb-drawpixels-float-and-ushort samples=4
diff --git a/lib/mesa/src/broadcom/ci/vc4-rpi3-skips.txt b/lib/mesa/src/broadcom/ci/vc4-rpi3-skips.txt
new file mode 100644
index 000000000..692eaff24
--- /dev/null
+++ b/lib/mesa/src/broadcom/ci/vc4-rpi3-skips.txt
@@ -0,0 +1,46 @@
+# Note: skips lists for CI are just a list of lines that, when
+# non-zero-length and not starting with '#', will regex match to
+# delete lines from the test list. Be careful.
+
+# This is causing a binning memory overflow problem
+dEQP-GLES2.functional.fragment_ops.scissor.outside_render_line
+
+# These are very slow
+dEQP-GLES2.functional.uniform_api.random.3
+dEQP-GLES2.functional.uniform_api.random.79
+
+# Conformance issue: VC4 needs dynamic loops in the VS to cause a
+# shader link failure.
+#
+# The issue is that the HW doesn't have an exec mask at dispatch
+# for the VS, so the shouldn't-be-exec channels have undefined
+# contents and may cause infinite loops, leading to GPU hangs. The
+# process of GPU hang reset causes flakes in whatever other jobs are
+# running simultaneously, so we can't even leave these in the flakes
+# list for tracking.
+dEQP-GLES2.functional.shaders.loops.*dynamic.*vertex
+
+# Timeout tests (> 1 minute to run)
+KHR-GLES2.texture_3d.filtering.sizes.3x7x5_linear_mipmap_linear
+KHR-GLES2.texture_3d.filtering.sizes.4x8x8_linear_mipmap_linear
+
+# Slow tests (> 1 minute to run)
+spec@ext_framebuffer_multisample@accuracy
+glx@glx-multithread-texture
+spec@arb_internalformat_query2@all internalformat_<x>_type pname checks
+spec@!opengl 1.1@streaming-texture-leak
+spec@!opengl 1.0@gl-1.0-blend-func
+shaders@glsl-predication-on-large-array
+
+# Extensions not supported
+spec@arb_gpu_shader_fp64.*
+spec@arb_gpu_shader_gpu5.*
+spec@arb_gpu_shader_int64.*
+spec@arb_tessellation_shader.*
+spec@arb_texture_cube_map.*
+spec@glsl-1.30.*
+spec@glsl-1.40.*
+spec@glsl-1.50.*
+spec@glsl-3.*
+spec@glsl-4.*
+spec@glsl-es-3.*
diff --git a/lib/mesa/src/broadcom/cle/v3d_decoder.c b/lib/mesa/src/broadcom/cle/v3d_decoder.c
index 364419074..97dd8ce84 100644
--- a/lib/mesa/src/broadcom/cle/v3d_decoder.c
+++ b/lib/mesa/src/broadcom/cle/v3d_decoder.c
@@ -674,11 +674,11 @@ v3d_spec_load(const struct v3d_device_info *devinfo)
for (int i = 0; i < ARRAY_SIZE(genxml_files_table); i++) {
if (i != 0) {
- assert(genxml_files_table[i - 1].gen_10 <
- genxml_files_table[i].gen_10);
+ assert(genxml_files_table[i - 1].ver_10 <
+ genxml_files_table[i].ver_10);
}
- if (genxml_files_table[i].gen_10 <= devinfo->ver) {
+ if (genxml_files_table[i].ver_10 <= devinfo->ver) {
text_offset = genxml_files_table[i].offset;
text_length = genxml_files_table[i].length;
}
diff --git a/lib/mesa/src/broadcom/cle/v3d_packet_v33.xml b/lib/mesa/src/broadcom/cle/v3d_packet_v33.xml
index 2fdc685ae..de80a6b64 100644
--- a/lib/mesa/src/broadcom/cle/v3d_packet_v33.xml
+++ b/lib/mesa/src/broadcom/cle/v3d_packet_v33.xml
@@ -950,11 +950,7 @@
<field name="Double-buffer in non-ms mode" size="1" start="15" type="bool"/>
<field name="Multisample Mode (4x)" size="1" start="14" type="bool"/>
- <field name="Maximum BPP of all render targets" size="2" start="12" type="uint">
- <value name="Render target maximum 32bpp" value="0"/>
- <value name="Render target maximum 64bpp" value="1"/>
- <value name="Render target maximum 128bpp" value="2"/>
- </field>
+ <field name="Maximum BPP of all render targets" size="2" start="12" type="Internal BPP"/>
<field name="Number of Render Targets" size="4" start="8" type="uint" minus_one="true"/>
@@ -992,11 +988,7 @@
<field name="Double-buffer in non-ms mode" size="1" start="43" type="bool"/>
<field name="Multisample Mode (4x)" size="1" start="42" type="bool"/>
- <field name="Maximum BPP of all render targets" size="2" start="40" type="uint">
- <value name="Render target maximum 32bpp" value="0"/>
- <value name="Render target maximum 64bpp" value="1"/>
- <value name="Render target maximum 128bpp" value="2"/>
- </field>
+ <field name="Maximum BPP of all render targets" size="2" start="40" type="Internal BPP"/>
<field name="Image Height (pixels)" size="16" start="24" type="uint"/>
<field name="Image Width (pixels)" size="16" start="8" type="uint"/>
diff --git a/lib/mesa/src/broadcom/clif/clif_dump.c b/lib/mesa/src/broadcom/clif/clif_dump.c
index bf84c0b96..0aaa6b6ad 100644
--- a/lib/mesa/src/broadcom/clif/clif_dump.c
+++ b/lib/mesa/src/broadcom/clif/clif_dump.c
@@ -52,7 +52,7 @@ clif_dump_add_address_to_worklist(struct clif_dump *clif,
struct clif_dump *
clif_dump_init(const struct v3d_device_info *devinfo,
- FILE *out, bool pretty)
+ FILE *out, bool pretty, bool nobin)
{
struct clif_dump *clif = rzalloc(NULL, struct clif_dump);
@@ -60,6 +60,7 @@ clif_dump_init(const struct v3d_device_info *devinfo,
clif->out = out;
clif->spec = v3d_spec_load(devinfo);
clif->pretty = pretty;
+ clif->nobin = nobin;
list_inithead(&clif->worklist);
@@ -238,6 +239,9 @@ static void
clif_dump_binary(struct clif_dump *clif, struct clif_bo *bo,
uint32_t start, uint32_t end)
{
+ if (clif->pretty && clif->nobin)
+ return;
+
if (start == end)
return;
diff --git a/lib/mesa/src/broadcom/clif/clif_dump.h b/lib/mesa/src/broadcom/clif/clif_dump.h
index 8de3a2cbe..63f3ae77d 100644
--- a/lib/mesa/src/broadcom/clif/clif_dump.h
+++ b/lib/mesa/src/broadcom/clif/clif_dump.h
@@ -32,7 +32,7 @@ struct clif_dump;
struct drm_v3d_submit_cl;
struct clif_dump *clif_dump_init(const struct v3d_device_info *devinfo,
- FILE *output, bool pretty);
+ FILE *output, bool pretty, bool nobin);
void clif_dump(struct clif_dump *clif, const struct drm_v3d_submit_cl *submit);
void clif_dump_destroy(struct clif_dump *clif);
diff --git a/lib/mesa/src/broadcom/clif/clif_private.h b/lib/mesa/src/broadcom/clif/clif_private.h
index 597d0b506..d96bfd12d 100644
--- a/lib/mesa/src/broadcom/clif/clif_private.h
+++ b/lib/mesa/src/broadcom/clif/clif_private.h
@@ -54,6 +54,11 @@ struct clif_dump {
* output.
*/
bool pretty;
+
+ /**
+ * Flag to no dump the binary resources.
+ */
+ bool nobin;
};
enum reloc_worklist_type {
diff --git a/lib/mesa/src/broadcom/common/v3d_debug.c b/lib/mesa/src/broadcom/common/v3d_debug.c
index 64a2426b9..508a2b7c7 100644
--- a/lib/mesa/src/broadcom/common/v3d_debug.c
+++ b/lib/mesa/src/broadcom/common/v3d_debug.c
@@ -34,33 +34,65 @@
#include "common/v3d_debug.h"
#include "util/macros.h"
-#include "util/debug.h"
+#include "util/u_debug.h"
#include "c11/threads.h"
uint32_t V3D_DEBUG = 0;
-static const struct debug_control debug_control[] = {
- { "cl", V3D_DEBUG_CL},
- { "clif", V3D_DEBUG_CLIF},
- { "qpu", V3D_DEBUG_QPU},
- { "vir", V3D_DEBUG_VIR},
- { "nir", V3D_DEBUG_NIR},
- { "tgsi", V3D_DEBUG_TGSI},
- { "shaderdb", V3D_DEBUG_SHADERDB},
- { "surface", V3D_DEBUG_SURFACE},
- { "perf", V3D_DEBUG_PERF},
- { "norast", V3D_DEBUG_NORAST},
- { "fs", V3D_DEBUG_FS},
- { "gs", V3D_DEBUG_GS},
- { "vs", V3D_DEBUG_VS},
- { "cs", V3D_DEBUG_CS},
- { "always_flush", V3D_DEBUG_ALWAYS_FLUSH},
- { "precompile", V3D_DEBUG_PRECOMPILE},
- { "ra", V3D_DEBUG_RA},
- { "dump_spirv", V3D_DEBUG_DUMP_SPIRV},
- { NULL, 0 }
+static const struct debug_named_value debug_control[] = {
+ { "cl", V3D_DEBUG_CL,
+ "Dump command list during creation" },
+ { "cl_nobin", V3D_DEBUG_CL_NO_BIN,
+ "Dump command listduring creation, excluding binary resources" },
+ { "clif", V3D_DEBUG_CLIF,
+ "Dump command list (CLIF format) during creation", },
+ { "qpu", V3D_DEBUG_QPU,
+ "Dump generated QPU instructions" },
+ { "vir", V3D_DEBUG_VIR,
+ "Dump VIR during program compile" },
+ { "nir", V3D_DEBUG_NIR,
+ "Dump NIR during program compile" },
+ { "tgsi", V3D_DEBUG_TGSI,
+ "Dump TGSI during program compile" },
+ { "shaderdb", V3D_DEBUG_SHADERDB,
+ "Dump program compile information for shader-db analysis" },
+ { "surface", V3D_DEBUG_SURFACE,
+ "Print resource layout information" },
+ { "perf", V3D_DEBUG_PERF,
+ "Print during runtime performance-related events" },
+ { "norast", V3D_DEBUG_NORAST,
+ "Skip actual hardware execution of commands" },
+ { "fs", V3D_DEBUG_FS,
+ "Dump fragment shaders" },
+ { "gs", V3D_DEBUG_GS,
+ "Dump geometry shaders" },
+ { "vs", V3D_DEBUG_VS,
+ "Dump vertex shaders" },
+ { "cs", V3D_DEBUG_CS,
+ "Dump computer shaders" },
+ { "always_flush", V3D_DEBUG_ALWAYS_FLUSH,
+ "Flush after each draw call" },
+ { "precompile", V3D_DEBUG_PRECOMPILE,
+ "Precompiles shader variant at shader state creation time" },
+ { "ra", V3D_DEBUG_RA,
+ "Dump register allocation failures" },
+ { "dump_spirv", V3D_DEBUG_DUMP_SPIRV,
+ "Dump SPIR-V code" },
+ { "tmu32", V3D_DEBUG_TMU_32BIT,
+ "Force 32-bit precision on all TMU operations" },
+ /* This can lead to incorrect behavior for applications that do
+ * require full 32-bit precision, but can improve performance
+ * for those that don't.
+ */
+ { "tmu16", V3D_DEBUG_TMU_16BIT,
+ "Force 16-bit precision on all TMU operations" },
+ { "noloopunroll", V3D_DEBUG_NO_LOOP_UNROLL,
+ "Disable loop unrolling" },
+ { NULL }
};
+DEBUG_GET_ONCE_FLAGS_OPTION(v3d_debug, "V3D_DEBUG", debug_control, 0)
+
uint32_t
v3d_debug_flag_for_shader_stage(gl_shader_stage stage)
{
@@ -76,20 +108,11 @@ v3d_debug_flag_for_shader_stage(gl_shader_stage stage)
return flags[stage];
}
-static void
-v3d_process_debug_variable_once(void)
-{
- V3D_DEBUG = parse_debug_string(getenv("V3D_DEBUG"), debug_control);
-
- if (V3D_DEBUG & V3D_DEBUG_SHADERDB)
- V3D_DEBUG |= V3D_DEBUG_NORAST;
-}
-
void
v3d_process_debug_variable(void)
{
- static once_flag v3d_process_debug_variable_flag = ONCE_FLAG_INIT;
+ V3D_DEBUG = debug_get_option_v3d_debug();
- call_once(&v3d_process_debug_variable_flag,
- v3d_process_debug_variable_once);
+ if (V3D_DEBUG & V3D_DEBUG_SHADERDB)
+ V3D_DEBUG |= V3D_DEBUG_NORAST;
}
diff --git a/lib/mesa/src/broadcom/common/v3d_debug.h b/lib/mesa/src/broadcom/common/v3d_debug.h
index efa269758..72d632568 100644
--- a/lib/mesa/src/broadcom/common/v3d_debug.h
+++ b/lib/mesa/src/broadcom/common/v3d_debug.h
@@ -59,6 +59,10 @@ extern uint32_t V3D_DEBUG;
#define V3D_DEBUG_PRECOMPILE (1 << 15)
#define V3D_DEBUG_RA (1 << 16)
#define V3D_DEBUG_DUMP_SPIRV (1 << 17)
+#define V3D_DEBUG_TMU_32BIT (1 << 18)
+#define V3D_DEBUG_TMU_16BIT (1 << 19)
+#define V3D_DEBUG_NO_LOOP_UNROLL (1 << 20)
+#define V3D_DEBUG_CL_NO_BIN (1 << 21)
#define V3D_DEBUG_SHADERS (V3D_DEBUG_TGSI | V3D_DEBUG_NIR | \
V3D_DEBUG_VIR | V3D_DEBUG_QPU | \
@@ -81,11 +85,6 @@ extern uint32_t V3D_DEBUG;
#define dbg_printf(...) fprintf(stderr, __VA_ARGS__)
#endif /* HAVE_ANDROID_PLATFORM */
-#define DBG(flag, ...) do { \
- if (unlikely(V3D_DEBUG & (flag))) \
- dbg_printf(__VA_ARGS__); \
-} while(0)
-
extern uint32_t v3d_debug_flag_for_shader_stage(gl_shader_stage stage);
extern void v3d_process_debug_variable(void);
diff --git a/lib/mesa/src/broadcom/common/v3d_limits.h b/lib/mesa/src/broadcom/common/v3d_limits.h
index a974ebc58..129e53e29 100644
--- a/lib/mesa/src/broadcom/common/v3d_limits.h
+++ b/lib/mesa/src/broadcom/common/v3d_limits.h
@@ -62,4 +62,6 @@
#define V3D_MAX_POINT_SIZE 512.0f
#define V3D_MAX_LINE_WIDTH 32
+#define V3D_MAX_BUFFER_RANGE (1 << 27)
+
#endif /* V3D_LIMITS_H */
diff --git a/lib/mesa/src/broadcom/common/v3d_tiling.c b/lib/mesa/src/broadcom/common/v3d_tiling.c
new file mode 100644
index 000000000..22f84811e
--- /dev/null
+++ b/lib/mesa/src/broadcom/common/v3d_tiling.c
@@ -0,0 +1,492 @@
+/*
+ * Copyright © 2014-2017 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file v3d_tiling.c
+ *
+ * Handles information about the V3D tiling formats, and loading and storing
+ * from them.
+ */
+
+#include <stdint.h>
+#include "v3d_tiling.h"
+#include "broadcom/common/v3d_cpu_tiling.h"
+
+/** Return the width in pixels of a 64-byte microtile. */
+uint32_t
+v3d_utile_width(int cpp)
+{
+ switch (cpp) {
+ case 1:
+ case 2:
+ return 8;
+ case 4:
+ case 8:
+ return 4;
+ case 16:
+ return 2;
+ default:
+ unreachable("unknown cpp");
+ }
+}
+
+/** Return the height in pixels of a 64-byte microtile. */
+uint32_t
+v3d_utile_height(int cpp)
+{
+ switch (cpp) {
+ case 1:
+ return 8;
+ case 2:
+ case 4:
+ return 4;
+ case 8:
+ case 16:
+ return 2;
+ default:
+ unreachable("unknown cpp");
+ }
+}
+
+/**
+ * Returns the byte address for a given pixel within a utile.
+ *
+ * Utiles are 64b blocks of pixels in raster order, with 32bpp being a 4x4
+ * arrangement.
+ */
+static inline uint32_t
+v3d_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y)
+{
+ uint32_t utile_w = v3d_utile_width(cpp);
+
+ assert(x < utile_w && y < v3d_utile_height(cpp));
+
+ return x * cpp + y * utile_w * cpp;
+}
+
+/**
+ * Returns the byte offset for a given pixel in a LINEARTILE layout.
+ *
+ * LINEARTILE is a single line of utiles in either the X or Y direction.
+ */
+static inline uint32_t
+v3d_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y)
+{
+ uint32_t utile_w = v3d_utile_width(cpp);
+ uint32_t utile_h = v3d_utile_height(cpp);
+ uint32_t utile_index_x = x / utile_w;
+ uint32_t utile_index_y = y / utile_h;
+
+ assert(utile_index_x == 0 || utile_index_y == 0);
+
+ return (64 * (utile_index_x + utile_index_y) +
+ v3d_get_utile_pixel_offset(cpp,
+ x & (utile_w - 1),
+ y & (utile_h - 1)));
+}
+
+/**
+ * Returns the byte offset for a given pixel in a UBLINEAR layout.
+ *
+ * UBLINEAR is the layout where pixels are arranged in UIF blocks (2x2
+ * utiles), and the UIF blocks are in 1 or 2 columns in raster order.
+ */
+static inline uint32_t
+v3d_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y,
+ int ublinear_number)
+{
+ uint32_t utile_w = v3d_utile_width(cpp);
+ uint32_t utile_h = v3d_utile_height(cpp);
+ uint32_t ub_w = utile_w * 2;
+ uint32_t ub_h = utile_h * 2;
+ uint32_t ub_x = x / ub_w;
+ uint32_t ub_y = y / ub_h;
+
+ return (256 * (ub_y * ublinear_number +
+ ub_x) +
+ ((x & utile_w) ? 64 : 0) +
+ ((y & utile_h) ? 128 : 0) +
+ + v3d_get_utile_pixel_offset(cpp,
+ x & (utile_w - 1),
+ y & (utile_h - 1)));
+}
+
+static inline uint32_t
+v3d_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h,
+ uint32_t x, uint32_t y)
+{
+ return v3d_get_ublinear_pixel_offset(cpp, x, y, 2);
+}
+
+static inline uint32_t
+v3d_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h,
+ uint32_t x, uint32_t y)
+{
+ return v3d_get_ublinear_pixel_offset(cpp, x, y, 1);
+}
+
+/**
+ * Returns the byte offset for a given pixel in a UIF layout.
+ *
+ * UIF is the general V3D tiling layout shared across 3D, media, and scanout.
+ * It stores pixels in UIF blocks (2x2 utiles), and UIF blocks are stored in
+ * 4x4 groups, and those 4x4 groups are then stored in raster order.
+ */
+static inline uint32_t
+v3d_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y,
+ bool do_xor)
+{
+ uint32_t utile_w = v3d_utile_width(cpp);
+ uint32_t utile_h = v3d_utile_height(cpp);
+ uint32_t mb_width = utile_w * 2;
+ uint32_t mb_height = utile_h * 2;
+ uint32_t log2_mb_width = ffs(mb_width) - 1;
+ uint32_t log2_mb_height = ffs(mb_height) - 1;
+
+ /* Macroblock X, y */
+ uint32_t mb_x = x >> log2_mb_width;
+ uint32_t mb_y = y >> log2_mb_height;
+ /* X, y within the macroblock */
+ uint32_t mb_pixel_x = x - (mb_x << log2_mb_width);
+ uint32_t mb_pixel_y = y - (mb_y << log2_mb_height);
+
+ if (do_xor && (mb_x / 4) & 1)
+ mb_y ^= 0x10;
+
+ uint32_t mb_h = align(image_h, 1 << log2_mb_height) >> log2_mb_height;
+ uint32_t mb_id = ((mb_x / 4) * ((mb_h - 1) * 4)) + mb_x + mb_y * 4;
+
+ uint32_t mb_base_addr = mb_id * 256;
+
+ bool top = mb_pixel_y < utile_h;
+ bool left = mb_pixel_x < utile_w;
+
+ /* Docs have this in pixels, we do bytes here. */
+ uint32_t mb_tile_offset = (!top * 128 + !left * 64);
+
+ uint32_t utile_x = mb_pixel_x & (utile_w - 1);
+ uint32_t utile_y = mb_pixel_y & (utile_h - 1);
+
+ uint32_t mb_pixel_address = (mb_base_addr +
+ mb_tile_offset +
+ v3d_get_utile_pixel_offset(cpp,
+ utile_x,
+ utile_y));
+
+ return mb_pixel_address;
+}
+
+static inline uint32_t
+v3d_get_uif_xor_pixel_offset(uint32_t cpp, uint32_t image_h,
+ uint32_t x, uint32_t y)
+{
+ return v3d_get_uif_pixel_offset(cpp, image_h, x, y, true);
+}
+
+static inline uint32_t
+v3d_get_uif_no_xor_pixel_offset(uint32_t cpp, uint32_t image_h,
+ uint32_t x, uint32_t y)
+{
+ return v3d_get_uif_pixel_offset(cpp, image_h, x, y, false);
+}
+
+/* Loads/stores non-utile-aligned boxes by walking over the destination
+ * rectangle, computing the address on the GPU, and storing/loading a pixel at
+ * a time.
+ */
+static inline void
+v3d_move_pixels_unaligned(void *gpu, uint32_t gpu_stride,
+ void *cpu, uint32_t cpu_stride,
+ int cpp, uint32_t image_h,
+ const struct pipe_box *box,
+ uint32_t (*get_pixel_offset)(uint32_t cpp,
+ uint32_t image_h,
+ uint32_t x, uint32_t y),
+ bool is_load)
+{
+ for (uint32_t y = 0; y < box->height; y++) {
+ void *cpu_row = cpu + y * cpu_stride;
+
+ for (int x = 0; x < box->width; x++) {
+ uint32_t pixel_offset = get_pixel_offset(cpp, image_h,
+ box->x + x,
+ box->y + y);
+
+ if (false) {
+ fprintf(stderr, "%3d,%3d -> %d\n",
+ box->x + x, box->y + y,
+ pixel_offset);
+ }
+
+ if (is_load) {
+ memcpy(cpu_row + x * cpp,
+ gpu + pixel_offset,
+ cpp);
+ } else {
+ memcpy(gpu + pixel_offset,
+ cpu_row + x * cpp,
+ cpp);
+ }
+ }
+ }
+}
+
+/* Breaks the image down into utiles and calls either the fast whole-utile
+ * load/store functions, or the unaligned fallback case.
+ */
+static inline void
+v3d_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride,
+ void *cpu, uint32_t cpu_stride,
+ int cpp, uint32_t image_h,
+ const struct pipe_box *box,
+ uint32_t (*get_pixel_offset)(uint32_t cpp,
+ uint32_t image_h,
+ uint32_t x, uint32_t y),
+ bool is_load)
+{
+ uint32_t utile_w = v3d_utile_width(cpp);
+ uint32_t utile_h = v3d_utile_height(cpp);
+ uint32_t utile_gpu_stride = utile_w * cpp;
+ uint32_t x1 = box->x;
+ uint32_t y1 = box->y;
+ uint32_t x2 = box->x + box->width;
+ uint32_t y2 = box->y + box->height;
+ uint32_t align_x1 = align(x1, utile_w);
+ uint32_t align_y1 = align(y1, utile_h);
+ uint32_t align_x2 = x2 & ~(utile_w - 1);
+ uint32_t align_y2 = y2 & ~(utile_h - 1);
+
+ /* Load/store all the whole utiles first. */
+ for (uint32_t y = align_y1; y < align_y2; y += utile_h) {
+ void *cpu_row = cpu + (y - box->y) * cpu_stride;
+
+ for (uint32_t x = align_x1; x < align_x2; x += utile_w) {
+ void *utile_gpu = (gpu +
+ get_pixel_offset(cpp, image_h, x, y));
+ void *utile_cpu = cpu_row + (x - box->x) * cpp;
+
+ if (is_load) {
+ v3d_load_utile(utile_cpu, cpu_stride,
+ utile_gpu, utile_gpu_stride);
+ } else {
+ v3d_store_utile(utile_gpu, utile_gpu_stride,
+ utile_cpu, cpu_stride);
+ }
+ }
+ }
+
+ /* If there were no aligned utiles in the middle, load/store the whole
+ * thing unaligned.
+ */
+ if (align_y2 <= align_y1 ||
+ align_x2 <= align_x1) {
+ v3d_move_pixels_unaligned(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, image_h,
+ box,
+ get_pixel_offset, is_load);
+ return;
+ }
+
+ /* Load/store the partial utiles. */
+ struct pipe_box partial_boxes[4] = {
+ /* Top */
+ {
+ .x = x1,
+ .width = x2 - x1,
+ .y = y1,
+ .height = align_y1 - y1,
+ },
+ /* Bottom */
+ {
+ .x = x1,
+ .width = x2 - x1,
+ .y = align_y2,
+ .height = y2 - align_y2,
+ },
+ /* Left */
+ {
+ .x = x1,
+ .width = align_x1 - x1,
+ .y = align_y1,
+ .height = align_y2 - align_y1,
+ },
+ /* Right */
+ {
+ .x = align_x2,
+ .width = x2 - align_x2,
+ .y = align_y1,
+ .height = align_y2 - align_y1,
+ },
+ };
+ for (int i = 0; i < ARRAY_SIZE(partial_boxes); i++) {
+ void *partial_cpu = (cpu +
+ (partial_boxes[i].y - y1) * cpu_stride +
+ (partial_boxes[i].x - x1) * cpp);
+
+ v3d_move_pixels_unaligned(gpu, gpu_stride,
+ partial_cpu, cpu_stride,
+ cpp, image_h,
+ &partial_boxes[i],
+ get_pixel_offset, is_load);
+ }
+}
+
+static inline void
+v3d_move_pixels_general(void *gpu, uint32_t gpu_stride,
+ void *cpu, uint32_t cpu_stride,
+ int cpp, uint32_t image_h,
+ const struct pipe_box *box,
+ uint32_t (*get_pixel_offset)(uint32_t cpp,
+ uint32_t image_h,
+ uint32_t x, uint32_t y),
+ bool is_load)
+{
+ switch (cpp) {
+ case 1:
+ v3d_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 1, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ case 2:
+ v3d_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 2, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ case 4:
+ v3d_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 4, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ case 8:
+ v3d_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 8, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ case 16:
+ v3d_move_pixels_general_percpp(gpu, gpu_stride,
+ cpu, cpu_stride,
+ 16, image_h, box,
+ get_pixel_offset,
+ is_load);
+ break;
+ }
+}
+
+static inline void
+v3d_move_tiled_image(void *gpu, uint32_t gpu_stride,
+ void *cpu, uint32_t cpu_stride,
+ enum v3d_tiling_mode tiling_format,
+ int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box,
+ bool is_load)
+{
+ switch (tiling_format) {
+ case V3D_TILING_UIF_XOR:
+ v3d_move_pixels_general(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, image_h, box,
+ v3d_get_uif_xor_pixel_offset,
+ is_load);
+ break;
+ case V3D_TILING_UIF_NO_XOR:
+ v3d_move_pixels_general(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, image_h, box,
+ v3d_get_uif_no_xor_pixel_offset,
+ is_load);
+ break;
+ case V3D_TILING_UBLINEAR_2_COLUMN:
+ v3d_move_pixels_general(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, image_h, box,
+ v3d_get_ublinear_2_column_pixel_offset,
+ is_load);
+ break;
+ case V3D_TILING_UBLINEAR_1_COLUMN:
+ v3d_move_pixels_general(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, image_h, box,
+ v3d_get_ublinear_1_column_pixel_offset,
+ is_load);
+ break;
+ case V3D_TILING_LINEARTILE:
+ v3d_move_pixels_general(gpu, gpu_stride,
+ cpu, cpu_stride,
+ cpp, image_h, box,
+ v3d_get_lt_pixel_offset,
+ is_load);
+ break;
+ default:
+ unreachable("Unsupported tiling format");
+ break;
+ }
+}
+
+/**
+ * Loads pixel data from the start (microtile-aligned) box in \p src to the
+ * start of \p dst according to the given tiling format.
+ */
+void
+v3d_load_tiled_image(void *dst, uint32_t dst_stride,
+ void *src, uint32_t src_stride,
+ enum v3d_tiling_mode tiling_format, int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box)
+{
+ v3d_move_tiled_image(src, src_stride,
+ dst, dst_stride,
+ tiling_format,
+ cpp,
+ image_h,
+ box,
+ true);
+}
+
+/**
+ * Stores pixel data from the start of \p src into a (microtile-aligned) box in
+ * \p dst according to the given tiling format.
+ */
+void
+v3d_store_tiled_image(void *dst, uint32_t dst_stride,
+ void *src, uint32_t src_stride,
+ enum v3d_tiling_mode tiling_format, int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box)
+{
+ v3d_move_tiled_image(dst, dst_stride,
+ src, src_stride,
+ tiling_format,
+ cpp,
+ image_h,
+ box,
+ false);
+}
diff --git a/lib/mesa/src/broadcom/common/v3d_tiling.h b/lib/mesa/src/broadcom/common/v3d_tiling.h
new file mode 100644
index 000000000..08ae7cce8
--- /dev/null
+++ b/lib/mesa/src/broadcom/common/v3d_tiling.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef V3D_TILING_H
+#define V3D_TILING_H
+
+#include "util/u_box.h"
+
+/* A UIFblock is a 256-byte region of memory that's 256-byte aligned. These
+ * will be grouped in 4x4 blocks (left-to-right, then top-to-bottom) in a 4KB
+ * page. Those pages are then arranged left-to-right, top-to-bottom, to cover
+ * an image.
+ *
+ * The inside of a UIFblock, for packed pixels, will be split into 4 64-byte
+ * utiles. Utiles may be 8x8 (8bpp), 8x4(16bpp) or 4x4 (32bpp).
+ */
+
+/**
+ * Tiling mode enum used for v3d_resource.c, which maps directly to the Memory
+ * Format field of render target and Z/Stencil config.
+ */
+enum v3d_tiling_mode {
+ /* Untiled resources. Not valid as texture inputs. */
+ V3D_TILING_RASTER,
+
+ /* Single line of u-tiles. */
+ V3D_TILING_LINEARTILE,
+
+ /* Departure from standard 4-UIF block column format. */
+ V3D_TILING_UBLINEAR_1_COLUMN,
+
+ /* Departure from standard 4-UIF block column format. */
+ V3D_TILING_UBLINEAR_2_COLUMN,
+
+ /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is
+ * split 2x2 into utiles.
+ */
+ V3D_TILING_UIF_NO_XOR,
+
+ /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is
+ * split 2x2 into utiles.
+ */
+ V3D_TILING_UIF_XOR,
+};
+
+uint32_t v3d_utile_width(int cpp) ATTRIBUTE_CONST;
+uint32_t v3d_utile_height(int cpp) ATTRIBUTE_CONST;
+bool v3d_size_is_lt(uint32_t width, uint32_t height, int cpp) ATTRIBUTE_CONST;
+void v3d_load_tiled_image(void *dst, uint32_t dst_stride,
+ void *src, uint32_t src_stride,
+ enum v3d_tiling_mode tiling_format, int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box);
+void v3d_store_tiled_image(void *dst, uint32_t dst_stride,
+ void *src, uint32_t src_stride,
+ enum v3d_tiling_mode tiling_format, int cpp,
+ uint32_t image_h,
+ const struct pipe_box *box);
+
+#endif /* V3D_TILING_H */
diff --git a/lib/mesa/src/broadcom/common/v3d_util.c b/lib/mesa/src/broadcom/common/v3d_util.c
new file mode 100644
index 000000000..424656fd8
--- /dev/null
+++ b/lib/mesa/src/broadcom/common/v3d_util.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright © 2021 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "v3d_util.h"
+#include "util/macros.h"
+
+/* Choose a number of workgroups per supergroup that maximizes
+ * lane occupancy. We can pack up to 16 workgroups into a supergroup.
+ */
+uint32_t
+v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo,
+ bool has_subgroups,
+ bool has_tsy_barrier,
+ uint32_t threads,
+ uint32_t num_wgs,
+ uint32_t wg_size)
+{
+ /* FIXME: subgroups may restrict supergroup packing. For now, we disable it
+ * completely if the shader uses subgroups.
+ */
+ if (has_subgroups)
+ return 1;
+
+ /* Compute maximum number of batches in a supergroup for this workgroup size.
+ * Each batch is 16 elements, and we can have up to 16 work groups in a
+ * supergroup:
+ *
+ * max_batches_per_sg = (wg_size * max_wgs_per_sg) / elements_per_batch
+ * since max_wgs_per_sg = 16 and elements_per_batch = 16, we get:
+ * max_batches_per_sg = wg_size
+ */
+ uint32_t max_batches_per_sg = wg_size;
+
+ /* QPU threads will stall at TSY barriers until the entire supergroup
+ * reaches the barrier. Limit the supergroup size to half the QPU threads
+ * available, so we can have at least 2 supergroups executing in parallel
+ * and we don't stall all our QPU threads when a supergroup hits a barrier.
+ */
+ if (has_tsy_barrier) {
+ uint32_t max_qpu_threads = devinfo->qpu_count * threads;
+ max_batches_per_sg = MIN2(max_batches_per_sg, max_qpu_threads / 2);
+ }
+ uint32_t max_wgs_per_sg = max_batches_per_sg * 16 / wg_size;
+
+ uint32_t best_wgs_per_sg = 1;
+ uint32_t best_unused_lanes = 16;
+ for (uint32_t wgs_per_sg = 1; wgs_per_sg <= max_wgs_per_sg; wgs_per_sg++) {
+ /* Don't try to pack more workgroups per supergroup than the total amount
+ * of workgroups dispatched.
+ */
+ if (wgs_per_sg > num_wgs)
+ return best_wgs_per_sg;
+
+ /* Compute wasted lines for this configuration and keep track of the
+ * config with less waste.
+ */
+ uint32_t unused_lanes = (16 - ((wgs_per_sg * wg_size) % 16)) & 0x0f;
+ if (unused_lanes == 0)
+ return wgs_per_sg;
+
+ if (unused_lanes < best_unused_lanes) {
+ best_wgs_per_sg = wgs_per_sg;
+ best_unused_lanes = unused_lanes;
+ }
+ }
+
+ return best_wgs_per_sg;
+}
diff --git a/lib/mesa/src/broadcom/common/v3d_util.h b/lib/mesa/src/broadcom/common/v3d_util.h
new file mode 100644
index 000000000..b9804f235
--- /dev/null
+++ b/lib/mesa/src/broadcom/common/v3d_util.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2021 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef V3D_UTIL_H
+#define V3D_UTIL_H
+
+#include "common/v3d_device_info.h"
+
+uint32_t
+v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo,
+ bool has_subgroups,
+ bool has_tsy_barrier,
+ uint32_t threads,
+ uint32_t num_wgs,
+ uint32_t wg_size);
+
+#endif
diff --git a/lib/mesa/src/broadcom/compiler/nir_to_vir.c b/lib/mesa/src/broadcom/compiler/nir_to_vir.c
index c70d12881..d0a89f1a7 100644
--- a/lib/mesa/src/broadcom/compiler/nir_to_vir.c
+++ b/lib/mesa/src/broadcom/compiler/nir_to_vir.c
@@ -68,6 +68,39 @@
#define V3D_TSY_DEC_SEMAPHORE 14
#define V3D_TSY_SET_QUORUM_FREE_ALL 15
+enum v3d_tmu_op_type
+{
+ V3D_TMU_OP_TYPE_REGULAR,
+ V3D_TMU_OP_TYPE_ATOMIC,
+ V3D_TMU_OP_TYPE_CACHE
+};
+
+static enum v3d_tmu_op_type
+v3d_tmu_get_type_from_op(uint32_t tmu_op, bool is_write)
+{
+ switch(tmu_op) {
+ case V3D_TMU_OP_WRITE_ADD_READ_PREFETCH:
+ case V3D_TMU_OP_WRITE_SUB_READ_CLEAR:
+ case V3D_TMU_OP_WRITE_XCHG_READ_FLUSH:
+ case V3D_TMU_OP_WRITE_CMPXCHG_READ_FLUSH:
+ case V3D_TMU_OP_WRITE_UMIN_FULL_L1_CLEAR:
+ return is_write ? V3D_TMU_OP_TYPE_ATOMIC : V3D_TMU_OP_TYPE_CACHE;
+ case V3D_TMU_OP_WRITE_UMAX:
+ case V3D_TMU_OP_WRITE_SMIN:
+ case V3D_TMU_OP_WRITE_SMAX:
+ assert(is_write);
+ FALLTHROUGH;
+ case V3D_TMU_OP_WRITE_AND_READ_INC:
+ case V3D_TMU_OP_WRITE_OR_READ_DEC:
+ case V3D_TMU_OP_WRITE_XOR_READ_NOT:
+ return V3D_TMU_OP_TYPE_ATOMIC;
+ case V3D_TMU_OP_REGULAR:
+ return V3D_TMU_OP_TYPE_REGULAR;
+
+ default:
+ unreachable("Unknown tmu_op\n");
+ }
+}
static void
ntq_emit_cf_list(struct v3d_compile *c, struct exec_list *list);
@@ -282,6 +315,8 @@ ntq_add_pending_tmu_flush(struct v3d_compile *c,
if (c->disable_tmu_pipelining)
ntq_flush_tmu(c);
+ else if (c->tmu.flush_count > 1)
+ c->pipelined_any_tmu = true;
}
enum emit_mode {
@@ -565,11 +600,10 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
&tmu_writes);
}
- /* The spec says that for atomics, the TYPE field is
- * ignored, but that doesn't seem to be the case for
- * CMPXCHG. Just use the number of tmud writes we did
- * to decide the type (or choose "32bit" for atomic
- * reads, which has been fine).
+ /* For atomics we use 32bit except for CMPXCHG, that we need
+ * to use VEC2. For the rest of the cases we use the number of
+ * tmud writes we did to decide the type. For cache operations
+ * the type is ignored.
*/
uint32_t config = 0;
if (mode == MODE_EMIT) {
@@ -580,6 +614,9 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
assert(tmu_writes > 0);
num_components = tmu_writes - 1;
}
+ bool is_atomic =
+ v3d_tmu_get_type_from_op(tmu_op, !is_load) ==
+ V3D_TMU_OP_TYPE_ATOMIC;
uint32_t perquad =
is_load && !vir_in_nonuniform_control_flow(c)
@@ -587,7 +624,9 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
: GENERAL_TMU_LOOKUP_PER_PIXEL;
config = 0xffffff00 | tmu_op << 3 | perquad;
- if (num_components == 1) {
+ if (tmu_op == V3D_TMU_OP_WRITE_CMPXCHG_READ_FLUSH) {
+ config |= GENERAL_TMU_LOOKUP_TYPE_VEC2;
+ } else if (is_atomic || num_components == 1) {
config |= GENERAL_TMU_LOOKUP_TYPE_32BIT_UI;
} else {
config |= GENERAL_TMU_LOOKUP_TYPE_VEC2 +
@@ -1191,6 +1230,18 @@ out:
return V3D_QPU_COND_IFNA;
}
+static struct qreg
+ntq_emit_cond_to_bool(struct v3d_compile *c, enum v3d_qpu_cond cond)
+{
+ struct qreg result =
+ vir_MOV(c, vir_SEL(c, cond,
+ vir_uniform_ui(c, ~0),
+ vir_uniform_ui(c, 0)));
+ c->flags_temp = result.index;
+ c->flags_cond = cond;
+ return result;
+}
+
static void
ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
{
@@ -1354,11 +1405,7 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
enum v3d_qpu_cond cond;
ASSERTED bool ok = ntq_emit_comparison(c, instr, &cond);
assert(ok);
- result = vir_MOV(c, vir_SEL(c, cond,
- vir_uniform_ui(c, ~0),
- vir_uniform_ui(c, 0)));
- c->flags_temp = result.index;
- c->flags_cond = cond;
+ result = ntq_emit_cond_to_bool(c, cond);
break;
}
@@ -1438,11 +1485,7 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
case nir_op_uadd_carry:
vir_set_pf(c, vir_ADD_dest(c, vir_nop_reg(), src[0], src[1]),
V3D_QPU_PF_PUSHC);
- result = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA,
- vir_uniform_ui(c, ~0),
- vir_uniform_ui(c, 0)));
- c->flags_temp = result.index;
- c->flags_cond = V3D_QPU_COND_IFA;
+ result = ntq_emit_cond_to_bool(c, V3D_QPU_COND_IFA);
break;
case nir_op_pack_half_2x16_split:
@@ -1627,6 +1670,15 @@ vir_emit_tlb_color_write(struct v3d_compile *c, unsigned rt)
static void
emit_frag_end(struct v3d_compile *c)
{
+ /* If the shader has no non-TLB side effects and doesn't write Z
+ * we can promote it to enabling early_fragment_tests even
+ * if the user didn't.
+ */
+ if (c->output_position_index == -1 &&
+ !(c->s->info.num_images || c->s->info.num_ssbos)) {
+ c->s->info.fs.early_fragment_tests = true;
+ }
+
if (c->output_sample_mask_index != -1) {
vir_SETMSF_dest(c, vir_nop_reg(),
vir_AND(c,
@@ -1651,7 +1703,8 @@ emit_frag_end(struct v3d_compile *c)
}
struct qreg tlbu_reg = vir_magic_reg(V3D_QPU_WADDR_TLBU);
- if (c->output_position_index != -1) {
+ if (c->output_position_index != -1 &&
+ !c->s->info.fs.early_fragment_tests) {
struct qinst *inst = vir_MOV_dest(c, tlbu_reg,
c->outputs[c->output_position_index]);
uint8_t tlb_specifier = TLB_TYPE_DEPTH;
@@ -1711,17 +1764,22 @@ emit_frag_end(struct v3d_compile *c)
static inline void
vir_VPM_WRITE_indirect(struct v3d_compile *c,
struct qreg val,
- struct qreg vpm_index)
+ struct qreg vpm_index,
+ bool uniform_vpm_index)
{
assert(c->devinfo->ver >= 40);
- vir_STVPMV(c, vpm_index, val);
+ if (uniform_vpm_index)
+ vir_STVPMV(c, vpm_index, val);
+ else
+ vir_STVPMD(c, vpm_index, val);
}
static void
vir_VPM_WRITE(struct v3d_compile *c, struct qreg val, uint32_t vpm_index)
{
if (c->devinfo->ver >= 40) {
- vir_VPM_WRITE_indirect(c, val, vir_uniform_ui(c, vpm_index));
+ vir_VPM_WRITE_indirect(c, val,
+ vir_uniform_ui(c, vpm_index), true);
} else {
/* XXX: v3d33_vir_vpm_write_setup(c); */
vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val);
@@ -1774,7 +1832,7 @@ mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
}
void
-v3d_optimize_nir(struct nir_shader *s)
+v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s)
{
bool progress;
unsigned lower_flrp =
@@ -1787,7 +1845,7 @@ v3d_optimize_nir(struct nir_shader *s)
NIR_PASS_V(s, nir_lower_vars_to_ssa);
NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
- NIR_PASS(progress, s, nir_lower_phis_to_scalar);
+ NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
NIR_PASS(progress, s, nir_copy_prop);
NIR_PASS(progress, s, nir_opt_remove_phis);
NIR_PASS(progress, s, nir_opt_dce);
@@ -1825,6 +1883,14 @@ v3d_optimize_nir(struct nir_shader *s)
NIR_PASS(progress, s, nir_opt_undef);
NIR_PASS(progress, s, nir_lower_undef_to_zero);
+
+ if (c && !c->disable_loop_unrolling &&
+ s->options->max_unroll_iterations > 0) {
+ bool local_progress = false;
+ NIR_PASS(local_progress, s, nir_opt_loop_unroll);
+ c->unrolled_any_loops |= local_progress;
+ progress |= local_progress;
+ }
} while (progress);
nir_move_options sink_opts =
@@ -1836,15 +1902,11 @@ v3d_optimize_nir(struct nir_shader *s)
}
static int
-driver_location_compare(const void *in_a, const void *in_b)
+driver_location_compare(const nir_variable *a, const nir_variable *b)
{
- const nir_variable *const *a = in_a;
- const nir_variable *const *b = in_b;
-
- if ((*a)->data.driver_location == (*b)->data.driver_location)
- return (*a)->data.location_frac - (*b)->data.location_frac;
-
- return (*a)->data.driver_location - (*b)->data.driver_location;
+ return a->data.driver_location == b->data.driver_location ?
+ a->data.location_frac - b->data.location_frac :
+ a->data.driver_location - b->data.driver_location;
}
static struct qreg
@@ -1984,49 +2046,36 @@ program_reads_point_coord(struct v3d_compile *c)
}
static void
-get_sorted_input_variables(struct v3d_compile *c,
- unsigned *num_entries,
- nir_variable ***vars)
-{
- *num_entries = 0;
- nir_foreach_shader_in_variable(var, c->s)
- (*num_entries)++;
-
- *vars = ralloc_array(c, nir_variable *, *num_entries);
-
- unsigned i = 0;
- nir_foreach_shader_in_variable(var, c->s)
- (*vars)[i++] = var;
-
- /* Sort the variables so that we emit the input setup in
- * driver_location order. This is required for VPM reads, whose data
- * is fetched into the VPM in driver_location (TGSI register index)
- * order.
- */
- qsort(*vars, *num_entries, sizeof(**vars), driver_location_compare);
-}
-
-static void
ntq_setup_gs_inputs(struct v3d_compile *c)
{
- nir_variable **vars;
- unsigned num_entries;
- get_sorted_input_variables(c, &num_entries, &vars);
-
- for (unsigned i = 0; i < num_entries; i++) {
- nir_variable *var = vars[i];
+ nir_sort_variables_with_modes(c->s, driver_location_compare,
+ nir_var_shader_in);
+ nir_foreach_shader_in_variable(var, c->s) {
/* All GS inputs are arrays with as many entries as vertices
* in the input primitive, but here we only care about the
* per-vertex input type.
*/
- const struct glsl_type *type = glsl_without_array(var->type);
+ assert(glsl_type_is_array(var->type));
+ const struct glsl_type *type = glsl_get_array_element(var->type);
unsigned array_len = MAX2(glsl_get_length(type), 1);
unsigned loc = var->data.driver_location;
resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
(loc + array_len) * 4);
+ if (var->data.compact) {
+ for (unsigned j = 0; j < array_len; j++) {
+ unsigned input_idx = c->num_inputs++;
+ unsigned loc_frac = var->data.location_frac + j;
+ unsigned loc = var->data.location + loc_frac / 4;
+ unsigned comp = loc_frac % 4;
+ c->input_slots[input_idx] =
+ v3d_slot_from_slot_and_component(loc, comp);
+ }
+ continue;
+ }
+
for (unsigned j = 0; j < array_len; j++) {
unsigned num_elements = glsl_get_vector_elements(type);
for (unsigned k = 0; k < num_elements; k++) {
@@ -2044,12 +2093,10 @@ ntq_setup_gs_inputs(struct v3d_compile *c)
static void
ntq_setup_fs_inputs(struct v3d_compile *c)
{
- nir_variable **vars;
- unsigned num_entries;
- get_sorted_input_variables(c, &num_entries, &vars);
+ nir_sort_variables_with_modes(c->s, driver_location_compare,
+ nir_var_shader_in);
- for (unsigned i = 0; i < num_entries; i++) {
- nir_variable *var = vars[i];
+ nir_foreach_shader_in_variable(var, c->s) {
unsigned var_len = glsl_count_vec4_slots(var->type, false, false);
unsigned loc = var->data.driver_location;
@@ -2062,6 +2109,14 @@ ntq_setup_fs_inputs(struct v3d_compile *c)
if (var->data.location == VARYING_SLOT_POS) {
emit_fragcoord_input(c, loc);
+ } else if (var->data.location == VARYING_SLOT_PRIMITIVE_ID &&
+ !c->fs_key->has_gs) {
+ /* If the fragment shader reads gl_PrimitiveID and we
+ * don't have a geometry shader in the pipeline to write
+ * it then we program the hardware to inject it as
+ * an implicit varying. Take it from there.
+ */
+ c->inputs[loc * 4] = c->primitive_id;
} else if (util_varying_is_point_coord(var->data.location,
c->fs_key->point_sprite_mask)) {
c->inputs[loc * 4 + 0] = c->point_x;
@@ -2342,8 +2397,16 @@ ntq_emit_load_uniform(struct v3d_compile *c, nir_intrinsic_instr *instr)
static void
ntq_emit_load_input(struct v3d_compile *c, nir_intrinsic_instr *instr)
{
- /* XXX: Use ldvpmv (uniform offset) or ldvpmd (non-uniform offset)
- * and enable PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR.
+ /* XXX: Use ldvpmv (uniform offset) or ldvpmd (non-uniform offset).
+ *
+ * Right now the driver sets PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR even
+ * if we don't support non-uniform offsets because we also set the
+ * lower_all_io_to_temps option in the NIR compiler. This ensures that
+ * any indirect indexing on in/out variables is turned into indirect
+ * indexing on temporary variables instead, that we handle by lowering
+ * to scratch. If we implement non-uniform offset here we might be able
+ * to avoid the temp and scratch lowering, which involves copying from
+ * the input to the temp variable, possibly making code more optimal.
*/
unsigned offset =
nir_intrinsic_base(instr) + nir_src_as_uint(instr->src[0]);
@@ -2448,10 +2511,10 @@ emit_store_output_gs(struct v3d_compile *c, nir_intrinsic_instr *instr)
* different offsets in the VPM and we need to use the scatter write
* instruction to have a different offset for each lane.
*/
- if (nir_src_is_dynamically_uniform(instr->src[1]))
- vir_VPM_WRITE_indirect(c, val, offset);
- else
- vir_STVPMD(c, offset, val);
+ bool is_uniform_offset =
+ !vir_in_nonuniform_control_flow(c) &&
+ !nir_src_is_divergent(instr->src[1]);
+ vir_VPM_WRITE_indirect(c, val, offset, is_uniform_offset);
if (vir_in_nonuniform_control_flow(c)) {
struct qinst *last_inst =
@@ -2461,33 +2524,37 @@ emit_store_output_gs(struct v3d_compile *c, nir_intrinsic_instr *instr)
}
static void
+emit_store_output_vs(struct v3d_compile *c, nir_intrinsic_instr *instr)
+{
+ assert(c->s->info.stage == MESA_SHADER_VERTEX);
+ assert(instr->num_components == 1);
+
+ uint32_t base = nir_intrinsic_base(instr);
+ struct qreg val = ntq_get_src(c, instr->src[0], 0);
+
+ if (nir_src_is_const(instr->src[1])) {
+ vir_VPM_WRITE(c, val,
+ base + nir_src_as_uint(instr->src[1]));
+ } else {
+ struct qreg offset = vir_ADD(c,
+ ntq_get_src(c, instr->src[1], 1),
+ vir_uniform_ui(c, base));
+ bool is_uniform_offset =
+ !vir_in_nonuniform_control_flow(c) &&
+ !nir_src_is_divergent(instr->src[1]);
+ vir_VPM_WRITE_indirect(c, val, offset, is_uniform_offset);
+ }
+}
+
+static void
ntq_emit_store_output(struct v3d_compile *c, nir_intrinsic_instr *instr)
{
- /* XXX perf: Use stvpmv with uniform non-constant offsets and
- * stvpmd with non-uniform offsets and enable
- * PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR.
- */
- if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
+ if (c->s->info.stage == MESA_SHADER_FRAGMENT)
ntq_emit_color_write(c, instr);
- } else if (c->s->info.stage == MESA_SHADER_GEOMETRY) {
+ else if (c->s->info.stage == MESA_SHADER_GEOMETRY)
emit_store_output_gs(c, instr);
- } else {
- assert(c->s->info.stage == MESA_SHADER_VERTEX);
- assert(instr->num_components == 1);
-
- uint32_t base = nir_intrinsic_base(instr);
- if (nir_src_is_const(instr->src[1])) {
- vir_VPM_WRITE(c,
- ntq_get_src(c, instr->src[0], 0),
- base + nir_src_as_uint(instr->src[1]));
- } else {
- vir_VPM_WRITE_indirect(c,
- ntq_get_src(c, instr->src[0], 0),
- vir_ADD(c,
- ntq_get_src(c, instr->src[1], 1),
- vir_uniform_ui(c, base)));
- }
- }
+ else
+ emit_store_output_vs(c, instr);
}
/**
@@ -2707,6 +2774,41 @@ ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr)
}
}
+static inline struct qreg
+emit_load_local_invocation_index(struct v3d_compile *c)
+{
+ return vir_SHR(c, c->cs_payload[1],
+ vir_uniform_ui(c, 32 - c->local_invocation_index_bits));
+}
+
+/* Various subgroup operations rely on the A flags, so this helper ensures that
+ * A flags represents currently active lanes in the subgroup.
+ */
+static void
+set_a_flags_for_subgroup(struct v3d_compile *c)
+{
+ /* MSF returns 0 for disabled lanes in compute shaders so
+ * PUSHZ will set A=1 for disabled lanes. We want the inverse
+ * of this but we don't have any means to negate the A flags
+ * directly, but we can do it by repeating the same operation
+ * with NORZ (A = ~A & ~Z).
+ */
+ assert(c->s->info.stage == MESA_SHADER_COMPUTE);
+ vir_set_pf(c, vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_PF_PUSHZ);
+ vir_set_uf(c, vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_UF_NORZ);
+
+ /* If we are under non-uniform control flow we also need to
+ * AND the A flags with the current execute mask.
+ */
+ if (vir_in_nonuniform_control_flow(c)) {
+ const uint32_t bidx = c->cur_block->index;
+ vir_set_uf(c, vir_XOR_dest(c, vir_nop_reg(),
+ c->execute,
+ vir_uniform_ui(c, bidx)),
+ V3D_QPU_UF_ANDZ);
+ }
+}
+
static void
ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
{
@@ -2772,7 +2874,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
case nir_intrinsic_get_ssbo_size:
ntq_store_dest(c, &instr->dest, 0,
vir_uniform(c, QUNIFORM_GET_SSBO_SIZE,
- nir_src_as_uint(instr->src[0])));
+ nir_src_comp_as_uint(instr->src[0], 0)));
break;
case nir_intrinsic_get_ubo_size:
@@ -2830,11 +2932,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
case nir_intrinsic_load_helper_invocation:
vir_set_pf(c, vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_PF_PUSHZ);
- struct qreg qdest = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA,
- vir_uniform_ui(c, ~0),
- vir_uniform_ui(c, 0)));
- c->flags_temp = qdest.index;
- c->flags_cond = V3D_QPU_COND_IFA;
+ struct qreg qdest = ntq_emit_cond_to_bool(c, V3D_QPU_COND_IFA);
ntq_store_dest(c, &instr->dest, 0, qdest);
break;
@@ -2960,7 +3058,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
vir_emit_thrsw(c);
break;
- case nir_intrinsic_load_num_work_groups:
+ case nir_intrinsic_load_num_workgroups:
for (int i = 0; i < 3; i++) {
ntq_store_dest(c, &instr->dest, i,
vir_uniform(c, QUNIFORM_NUM_WORK_GROUPS,
@@ -2968,27 +3066,49 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
}
break;
- case nir_intrinsic_load_local_invocation_index:
- ntq_store_dest(c, &instr->dest, 0,
- vir_SHR(c, c->cs_payload[1],
- vir_uniform_ui(c, 32 - c->local_invocation_index_bits)));
+ case nir_intrinsic_load_workgroup_id: {
+ struct qreg x = vir_AND(c, c->cs_payload[0],
+ vir_uniform_ui(c, 0xffff));
+
+ struct qreg y = vir_SHR(c, c->cs_payload[0],
+ vir_uniform_ui(c, 16));
+
+ struct qreg z = vir_AND(c, c->cs_payload[1],
+ vir_uniform_ui(c, 0xffff));
+
+ /* We only support dispatch base in Vulkan */
+ if (c->key->environment == V3D_ENVIRONMENT_VULKAN) {
+ x = vir_ADD(c, x,
+ vir_uniform(c, QUNIFORM_WORK_GROUP_BASE, 0));
+ y = vir_ADD(c, y,
+ vir_uniform(c, QUNIFORM_WORK_GROUP_BASE, 1));
+ z = vir_ADD(c, z,
+ vir_uniform(c, QUNIFORM_WORK_GROUP_BASE, 2));
+ }
+
+ ntq_store_dest(c, &instr->dest, 0, vir_MOV(c, x));
+ ntq_store_dest(c, &instr->dest, 1, vir_MOV(c, y));
+ ntq_store_dest(c, &instr->dest, 2, vir_MOV(c, z));
break;
+ }
- case nir_intrinsic_load_work_group_id:
+ case nir_intrinsic_load_local_invocation_index:
ntq_store_dest(c, &instr->dest, 0,
- vir_AND(c, c->cs_payload[0],
- vir_uniform_ui(c, 0xffff)));
- ntq_store_dest(c, &instr->dest, 1,
- vir_SHR(c, c->cs_payload[0],
- vir_uniform_ui(c, 16)));
- ntq_store_dest(c, &instr->dest, 2,
- vir_AND(c, c->cs_payload[1],
- vir_uniform_ui(c, 0xffff)));
+ emit_load_local_invocation_index(c));
break;
- case nir_intrinsic_load_subgroup_id:
- ntq_store_dest(c, &instr->dest, 0, vir_EIDX(c));
+ case nir_intrinsic_load_subgroup_id: {
+ /* This is basically the batch index, which is the Local
+ * Invocation Index divided by the SIMD width).
+ */
+ STATIC_ASSERT(util_is_power_of_two_nonzero(V3D_CHANNELS));
+ const uint32_t divide_shift = ffs(V3D_CHANNELS) - 1;
+ struct qreg lii = emit_load_local_invocation_index(c);
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_SHR(c, lii,
+ vir_uniform_ui(c, divide_shift)));
break;
+ }
case nir_intrinsic_load_per_vertex_input: {
/* The vertex shader writes all its used outputs into
@@ -3002,11 +3122,17 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
*
* col: vertex index, row = varying index
*/
+ assert(nir_src_is_const(instr->src[1]));
+ uint32_t location =
+ nir_intrinsic_io_semantics(instr).location +
+ nir_src_as_uint(instr->src[1]);
+ uint32_t component = nir_intrinsic_component(instr);
+
int32_t row_idx = -1;
for (int i = 0; i < c->num_inputs; i++) {
struct v3d_varying_slot slot = c->input_slots[i];
- if (v3d_slot_get_slot(slot) == nir_intrinsic_io_semantics(instr).location &&
- v3d_slot_get_component(slot) == nir_intrinsic_component(instr)) {
+ if (v3d_slot_get_slot(slot) == location &&
+ v3d_slot_get_component(slot) == component) {
row_idx = i;
break;
}
@@ -3033,6 +3159,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
* VPM output header. According to docs, we should read this
* using ldvpm(v,d)_in (See Table 71).
*/
+ assert(c->s->info.stage == MESA_SHADER_GEOMETRY);
ntq_store_dest(c, &instr->dest, 0,
vir_LDVPMV_IN(c, vir_uniform_ui(c, 0)));
break;
@@ -3146,6 +3273,37 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
break;
}
+ case nir_intrinsic_load_subgroup_size:
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_uniform_ui(c, V3D_CHANNELS));
+ break;
+
+ case nir_intrinsic_load_subgroup_invocation:
+ ntq_store_dest(c, &instr->dest, 0, vir_EIDX(c));
+ break;
+
+ case nir_intrinsic_elect: {
+ set_a_flags_for_subgroup(c);
+ struct qreg first = vir_FLAFIRST(c);
+
+ /* Produce a boolean result from Flafirst */
+ vir_set_pf(c, vir_XOR_dest(c, vir_nop_reg(),
+ first, vir_uniform_ui(c, 1)),
+ V3D_QPU_PF_PUSHZ);
+ struct qreg result = ntq_emit_cond_to_bool(c, V3D_QPU_COND_IFA);
+ ntq_store_dest(c, &instr->dest, 0, result);
+ break;
+ }
+
+ case nir_intrinsic_load_num_subgroups:
+ unreachable("Should have been lowered");
+ break;
+
+ case nir_intrinsic_load_view_index:
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_uniform(c, QUNIFORM_VIEW_INDEX, 0));
+ break;
+
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(&instr->instr, stderr);
@@ -3632,9 +3790,15 @@ nir_to_vir(struct v3d_compile *c)
c->payload_w_centroid = vir_MOV(c, vir_reg(QFILE_REG, 1));
c->payload_z = vir_MOV(c, vir_reg(QFILE_REG, 2));
- /* V3D 4.x can disable implicit point coordinate varyings if
- * they are not used.
- */
+ /* V3D 4.x can disable implicit varyings if they are not used */
+ c->fs_uses_primitive_id =
+ nir_find_variable_with_location(c->s, nir_var_shader_in,
+ VARYING_SLOT_PRIMITIVE_ID);
+ if (c->fs_uses_primitive_id && !c->fs_key->has_gs) {
+ c->primitive_id =
+ emit_fragment_varying(c, NULL, -1, 0, 0);
+ }
+
if (c->fs_key->is_points &&
(c->devinfo->ver < 40 || program_reads_point_coord(c))) {
c->point_x = emit_fragment_varying(c, NULL, -1, 0, 0);
@@ -3668,9 +3832,9 @@ nir_to_vir(struct v3d_compile *c)
/* Set up the division between gl_LocalInvocationIndex and
* wg_in_mem in the payload reg.
*/
- int wg_size = (c->s->info.cs.local_size[0] *
- c->s->info.cs.local_size[1] *
- c->s->info.cs.local_size[2]);
+ int wg_size = (c->s->info.workgroup_size[0] *
+ c->s->info.workgroup_size[1] *
+ c->s->info.workgroup_size[2]);
c->local_invocation_index_bits =
ffs(util_next_power_of_two(MAX2(wg_size, 64))) - 1;
assert(c->local_invocation_index_bits <= 8);
@@ -3678,9 +3842,9 @@ nir_to_vir(struct v3d_compile *c)
if (c->s->info.shared_size) {
struct qreg wg_in_mem = vir_SHR(c, c->cs_payload[1],
vir_uniform_ui(c, 16));
- if (c->s->info.cs.local_size[0] != 1 ||
- c->s->info.cs.local_size[1] != 1 ||
- c->s->info.cs.local_size[2] != 1) {
+ if (c->s->info.workgroup_size[0] != 1 ||
+ c->s->info.workgroup_size[1] != 1 ||
+ c->s->info.workgroup_size[2] != 1) {
int wg_bits = (16 -
c->local_invocation_index_bits);
int wg_mask = (1 << wg_bits) - 1;
@@ -3731,46 +3895,6 @@ nir_to_vir(struct v3d_compile *c)
}
}
-const nir_shader_compiler_options v3d_nir_options = {
- .lower_add_sat = true,
- .lower_all_io_to_temps = true,
- .lower_extract_byte = true,
- .lower_extract_word = true,
- .lower_bitfield_insert_to_shifts = true,
- .lower_bitfield_extract_to_shifts = true,
- .lower_bitfield_reverse = true,
- .lower_bit_count = true,
- .lower_cs_local_id_from_index = true,
- .lower_ffract = true,
- .lower_fmod = true,
- .lower_pack_unorm_2x16 = true,
- .lower_pack_snorm_2x16 = true,
- .lower_pack_unorm_4x8 = true,
- .lower_pack_snorm_4x8 = true,
- .lower_unpack_unorm_4x8 = true,
- .lower_unpack_snorm_4x8 = true,
- .lower_pack_half_2x16 = true,
- .lower_unpack_half_2x16 = true,
- .lower_fdiv = true,
- .lower_find_lsb = true,
- .lower_ffma16 = true,
- .lower_ffma32 = true,
- .lower_ffma64 = true,
- .lower_flrp32 = true,
- .lower_fpow = true,
- .lower_fsat = true,
- .lower_fsqrt = true,
- .lower_ifind_msb = true,
- .lower_isign = true,
- .lower_ldexp = true,
- .lower_mul_high = true,
- .lower_wpos_pntc = true,
- .lower_rotate = true,
- .lower_to_scalar = true,
- .has_fsub = true,
- .has_isub = true,
-};
-
/**
* When demoting a shader down to single-threaded, removes the THRSW
* instructions (one will still be inserted at v3d_vir_to_qpu() for the
@@ -3789,9 +3913,25 @@ vir_remove_thrsw(struct v3d_compile *c)
c->last_thrsw = NULL;
}
-void
-vir_emit_last_thrsw(struct v3d_compile *c)
+/**
+ * This makes sure we have a top-level last thread switch which signals the
+ * start of the last thread section, which may include adding a new thrsw
+ * instruction if needed. We don't allow spilling in the last thread section, so
+ * if we need to do any spills that inject additional thread switches later on,
+ * we ensure this thread switch will still be the last thread switch in the
+ * program, which makes last thread switch signalling a lot easier when we have
+ * spilling. If in the end we don't need to spill to compile the program and we
+ * injected a new thread switch instruction here only for that, we will
+ * eventually restore the previous last thread switch and remove the one we
+ * added here.
+ */
+static void
+vir_emit_last_thrsw(struct v3d_compile *c,
+ struct qinst **restore_last_thrsw,
+ bool *restore_scoreboard_lock)
{
+ *restore_last_thrsw = c->last_thrsw;
+
/* On V3D before 4.1, we need a TMU op to be outstanding when thread
* switching, so disable threads if we didn't do any TMU ops (each of
* which would have emitted a THRSW).
@@ -3800,7 +3940,7 @@ vir_emit_last_thrsw(struct v3d_compile *c)
c->threads = 1;
if (c->last_thrsw)
vir_remove_thrsw(c);
- return;
+ *restore_last_thrsw = NULL;
}
/* If we're threaded and the last THRSW was in conditional code, then
@@ -3823,8 +3963,34 @@ vir_emit_last_thrsw(struct v3d_compile *c)
vir_emit_thrsw(c);
}
+ /* If we have not inserted a last thread switch yet, do it now to ensure
+ * any potential spilling we do happens before this. If we don't spill
+ * in the end, we will restore the previous one.
+ */
+ if (*restore_last_thrsw == c->last_thrsw) {
+ if (*restore_last_thrsw)
+ (*restore_last_thrsw)->is_last_thrsw = false;
+ *restore_scoreboard_lock = c->lock_scoreboard_on_first_thrsw;
+ vir_emit_thrsw(c);
+ } else {
+ *restore_last_thrsw = c->last_thrsw;
+ }
+
+ assert(c->last_thrsw);
+ c->last_thrsw->is_last_thrsw = true;
+}
+
+static void
+vir_restore_last_thrsw(struct v3d_compile *c,
+ struct qinst *thrsw,
+ bool scoreboard_lock)
+{
+ assert(c->last_thrsw);
+ vir_remove_instruction(c, c->last_thrsw);
+ c->last_thrsw = thrsw;
if (c->last_thrsw)
c->last_thrsw->is_last_thrsw = true;
+ c->lock_scoreboard_on_first_thrsw = scoreboard_lock;
}
/* There's a flag in the shader for "center W is needed for reasons other than
@@ -3862,8 +4028,14 @@ v3d_nir_to_vir(struct v3d_compile *c)
nir_to_vir(c);
+ bool restore_scoreboard_lock = false;
+ struct qinst *restore_last_thrsw;
+
/* Emit the last THRSW before STVPM and TLB writes. */
- vir_emit_last_thrsw(c);
+ vir_emit_last_thrsw(c,
+ &restore_last_thrsw,
+ &restore_scoreboard_lock);
+
switch (c->s->info.stage) {
case MESA_SHADER_FRAGMENT:
@@ -3962,6 +4134,12 @@ v3d_nir_to_vir(struct v3d_compile *c)
vir_remove_thrsw(c);
}
+ /* If we didn't spill, then remove the last thread switch we injected
+ * artificially (if any) and restore the previous one.
+ */
+ if (!c->spills && c->last_thrsw != restore_last_thrsw)
+ vir_restore_last_thrsw(c, restore_last_thrsw, restore_scoreboard_lock);
+
if (c->spills &&
(V3D_DEBUG & (V3D_DEBUG_VIR |
v3d_debug_flag_for_shader_stage(c->s->info.stage)))) {
diff --git a/lib/mesa/src/broadcom/compiler/qpu_schedule.c b/lib/mesa/src/broadcom/compiler/qpu_schedule.c
index 8af2e8ef2..7b9891e86 100644
--- a/lib/mesa/src/broadcom/compiler/qpu_schedule.c
+++ b/lib/mesa/src/broadcom/compiler/qpu_schedule.c
@@ -492,7 +492,8 @@ struct choose_scoreboard {
int last_thrsw_tick;
int last_branch_tick;
int last_setmsf_tick;
- bool tlb_locked;
+ bool first_thrsw_emitted;
+ bool last_thrsw_emitted;
bool fixup_ldvary;
int ldvary_count;
};
@@ -576,10 +577,26 @@ writes_too_soon_after_write(const struct v3d_device_info *devinfo,
}
static bool
-pixel_scoreboard_too_soon(struct choose_scoreboard *scoreboard,
+scoreboard_is_locked(struct choose_scoreboard *scoreboard,
+ bool lock_scoreboard_on_first_thrsw)
+{
+ if (lock_scoreboard_on_first_thrsw) {
+ return scoreboard->first_thrsw_emitted &&
+ scoreboard->tick - scoreboard->last_thrsw_tick >= 3;
+ }
+
+ return scoreboard->last_thrsw_emitted &&
+ scoreboard->tick - scoreboard->last_thrsw_tick >= 3;
+}
+
+static bool
+pixel_scoreboard_too_soon(struct v3d_compile *c,
+ struct choose_scoreboard *scoreboard,
const struct v3d_qpu_instr *inst)
{
- return (scoreboard->tick == 0 && qpu_inst_is_tlb(inst));
+ return qpu_inst_is_tlb(inst) &&
+ !scoreboard_is_locked(scoreboard,
+ c->lock_scoreboard_on_first_thrsw);
}
static bool
@@ -868,9 +885,9 @@ qpu_convert_add_to_mul(struct v3d_qpu_instr *inst)
inst->flags.mc = inst->flags.ac;
inst->flags.mpf = inst->flags.apf;
inst->flags.muf = inst->flags.auf;
- inst->flags.ac = V3D_QPU_PF_NONE;
+ inst->flags.ac = V3D_QPU_COND_NONE;
inst->flags.apf = V3D_QPU_PF_NONE;
- inst->flags.auf = V3D_QPU_PF_NONE;
+ inst->flags.auf = V3D_QPU_UF_NONE;
}
static bool
@@ -1053,12 +1070,12 @@ retry:
if (writes_too_soon_after_write(c->devinfo, scoreboard, n->inst))
continue;
- /* "A scoreboard wait must not occur in the first two
- * instructions of a fragment shader. This is either the
- * explicit Wait for Scoreboard signal or an implicit wait
- * with the first tile-buffer read or write instruction."
+ /* "Before doing a TLB access a scoreboard wait must have been
+ * done. This happens either on the first or last thread
+ * switch, depending on a setting (scb_wait_on_first_thrsw) in
+ * the shader state."
*/
- if (pixel_scoreboard_too_soon(scoreboard, inst))
+ if (pixel_scoreboard_too_soon(c, scoreboard, inst))
continue;
/* ldunif and ldvary both write r5, but ldunif does so a tick
@@ -1131,12 +1148,10 @@ retry:
continue;
}
- /* Don't merge in something that will lock the TLB.
- * Hopwefully what we have in inst will release some
- * other instructions, allowing us to delay the
- * TLB-locking instruction until later.
+ /* Don't merge TLB instructions before we have acquired
+ * the scoreboard lock.
*/
- if (!scoreboard->tlb_locked && qpu_inst_is_tlb(inst))
+ if (pixel_scoreboard_too_soon(c, scoreboard, inst))
continue;
/* When we succesfully pair up an ldvary we then try
@@ -1273,9 +1288,6 @@ update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
if (inst->sig.ldvary)
scoreboard->last_ldvary_tick = scoreboard->tick;
-
- if (qpu_inst_is_tlb(inst))
- scoreboard->tlb_locked = true;
}
static void
@@ -1490,6 +1502,11 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
return false;
}
+ if (v3d_qpu_sig_writes_address(c->devinfo, &inst->sig) &&
+ !inst->sig_magic) {
+ return false;
+ }
+
if (c->devinfo->ver < 40 && inst->alu.add.op == V3D_QPU_A_SETMSF)
return false;
@@ -1747,6 +1764,8 @@ emit_thrsw(struct v3d_compile *c,
merge_inst = inst;
}
+ scoreboard->first_thrsw_emitted = true;
+
/* If we're emitting the last THRSW (other than program end), then
* signal that to the HW by emitting two THRSWs in a row.
*/
@@ -1758,6 +1777,7 @@ emit_thrsw(struct v3d_compile *c,
struct qinst *second_inst =
(struct qinst *)merge_inst->link.next;
second_inst->qpu.sig.thrsw = true;
+ scoreboard->last_thrsw_emitted = true;
}
/* Make sure the thread end executes within the program lifespan */
@@ -1981,6 +2001,17 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
if (alu_reads_register(inst, false, ldvary_magic, ldvary_index))
return false;
+ /* The implicit ldvary destination may not be written to by a signal
+ * in the instruction following ldvary. Since we are planning to move
+ * ldvary to the previous instruction, this means we need to check if
+ * the current instruction has any other signal that could create this
+ * conflict. The only other signal that can write to the implicit
+ * ldvary destination that is compatible with ldvary in the same
+ * instruction is ldunif.
+ */
+ if (inst->sig.ldunif)
+ return false;
+
/* The previous instruction can't write to the same destination as the
* ldvary.
*/
diff --git a/lib/mesa/src/broadcom/compiler/v3d_compiler.h b/lib/mesa/src/broadcom/compiler/v3d_compiler.h
index f229f414e..f728327f6 100644
--- a/lib/mesa/src/broadcom/compiler/v3d_compiler.h
+++ b/lib/mesa/src/broadcom/compiler/v3d_compiler.h
@@ -299,6 +299,11 @@ enum quniform_contents {
*/
QUNIFORM_NUM_WORK_GROUPS,
+ /* Base workgroup offset passed to vkCmdDispatchBase in the dimension
+ * selected by the data value.
+ */
+ QUNIFORM_WORK_GROUP_BASE,
+
/**
* Returns the the offset of the scratch buffer for register spilling.
*/
@@ -320,6 +325,11 @@ enum quniform_contents {
* out-of-bounds accesses into the tile state during binning.
*/
QUNIFORM_FB_LAYERS,
+
+ /**
+ * Current value of gl_ViewIndex for Multiview rendering.
+ */
+ QUNIFORM_VIEW_INDEX,
};
static inline uint32_t v3d_unit_data_create(uint32_t unit, uint32_t value)
@@ -416,6 +426,19 @@ struct v3d_fs_key {
uint32_t point_sprite_mask;
struct pipe_rt_blend_state blend;
+
+ /* If the fragment shader reads gl_PrimitiveID then we have 2 scenarios:
+ *
+ * - If there is a geometry shader, then gl_PrimitiveID must be written
+ * by it and the fragment shader loads it as a regular explicit input
+ * varying. This is the only valid use case in GLES 3.1.
+ *
+ * - If there is not a geometry shader (allowed since GLES 3.2 and
+ * Vulkan 1.0), then gl_PrimitiveID must be implicitly written by
+ * hardware and is considered an implicit input varying in the
+ * fragment shader.
+ */
+ bool has_gs;
};
struct v3d_gs_key {
@@ -544,10 +567,10 @@ enum v3d_compilation_result {
struct v3d_compiler {
const struct v3d_device_info *devinfo;
struct ra_regs *regs;
- unsigned int reg_class_any[3];
- unsigned int reg_class_r5[3];
- unsigned int reg_class_phys[3];
- unsigned int reg_class_phys_or_acc[3];
+ struct ra_class *reg_class_any[3];
+ struct ra_class *reg_class_r5[3];
+ struct ra_class *reg_class_phys[3];
+ struct ra_class *reg_class_phys_or_acc[3];
};
/**
@@ -631,6 +654,9 @@ struct v3d_compile {
bool writes_z;
bool uses_implicit_point_line_varyings;
+ /* True if a fragment shader reads gl_PrimitiveID */
+ bool fs_uses_primitive_id;
+
/* If the fragment shader does anything that requires to force
* per-sample MSAA, such as reading gl_SampleID.
*/
@@ -646,12 +672,14 @@ struct v3d_compile {
* TMU spills.
*/
bool disable_tmu_pipelining;
+ bool pipelined_any_tmu;
/* Disable sorting of UBO loads with constant offset. This may
* increase the chances of being able to compile shaders with high
* register pressure.
*/
bool disable_constant_ubo_load_sorting;
+ bool sorted_any_ubo_loads;
/* Emits ldunif for each new uniform, even if the uniform was already
* emitted in the same block. Useful to compile shaders with high
@@ -660,6 +688,10 @@ struct v3d_compile {
*/
bool disable_ldunif_opt;
+ /* Disables loop unrolling to reduce register pressure. */
+ bool disable_loop_unrolling;
+ bool unrolled_any_loops;
+
/* Minimum number of threads we are willing to use to register allocate
* a shader with the current compilation strategy. This only prevents
* us from lowering the thread count to register allocate successfully,
@@ -668,6 +700,13 @@ struct v3d_compile {
*/
uint32_t min_threads_for_reg_alloc;
+ /* Whether TMU spills are allowed. If this is disabled it may cause
+ * register allocation to fail. We set this to favor other compilation
+ * strategies that can reduce register pressure and hopefully reduce or
+ * eliminate TMU spills in the shader.
+ */
+ bool tmu_spilling_allowed;
+
/* The UBO index and block used with the last unifa load, as well as the
* current unifa offset *after* emitting that load. This is used to skip
* unifa writes (and their 3 delay slot) when the next UBO load reads
@@ -683,7 +722,7 @@ struct v3d_compile {
struct qreg execute;
bool in_control_flow;
- struct qreg line_x, point_x, point_y;
+ struct qreg line_x, point_x, point_y, primitive_id;
/**
* Instance ID, which comes in before the vertex attribute payload if
@@ -710,6 +749,9 @@ struct v3d_compile {
struct qreg cs_shared_offset;
int local_invocation_index_bits;
+ /* If the shader uses subgroup functionality */
+ bool has_subgroups;
+
uint8_t vattr_sizes[V3D_MAX_VS_INPUTS / 4];
uint32_t vpm_output_size;
@@ -833,6 +875,8 @@ struct v3d_prog_data {
bool single_seg;
bool tmu_dirty_rcl;
+
+ bool has_control_barrier;
};
struct v3d_vs_prog_data {
@@ -895,11 +939,16 @@ struct v3d_gs_prog_data {
/* Number of GS invocations */
uint8_t num_invocations;
+
+ bool writes_psiz;
};
struct v3d_fs_prog_data {
struct v3d_prog_data base;
+ /* Whether the program reads gl_PrimitiveID */
+ bool uses_pid;
+
struct v3d_varying_slot input_slots[V3D_MAX_FS_INPUTS];
/* Array of flat shade flags.
@@ -927,19 +976,38 @@ struct v3d_compute_prog_data {
/* Size in bytes of the workgroup's shared space. */
uint32_t shared_size;
uint16_t local_size[3];
+ /* If the shader uses subgroup functionality */
+ bool has_subgroups;
+};
+
+struct vpm_config {
+ uint32_t As;
+ uint32_t Vc;
+ uint32_t Gs;
+ uint32_t Gd;
+ uint32_t Gv;
+ uint32_t Ve;
+ uint32_t gs_width;
};
+bool
+v3d_compute_vpm_config(struct v3d_device_info *devinfo,
+ struct v3d_vs_prog_data *vs_bin,
+ struct v3d_vs_prog_data *vs,
+ struct v3d_gs_prog_data *gs_bin,
+ struct v3d_gs_prog_data *gs,
+ struct vpm_config *vpm_cfg_bin,
+ struct vpm_config *vpm_cfg);
+
static inline bool
vir_has_uniform(struct qinst *inst)
{
return inst->uniform != ~0;
}
-extern const nir_shader_compiler_options v3d_nir_options;
-
const struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo);
void v3d_compiler_free(const struct v3d_compiler *compiler);
-void v3d_optimize_nir(struct nir_shader *s);
+void v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s);
uint64_t *v3d_compile(const struct v3d_compiler *compiler,
struct v3d_key *key,
@@ -981,6 +1049,7 @@ struct v3d_qpu_instr v3d_qpu_nop(void);
struct qreg vir_emit_def(struct v3d_compile *c, struct qinst *inst);
struct qinst *vir_emit_nondef(struct v3d_compile *c, struct qinst *inst);
void vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond);
+enum v3d_qpu_cond vir_get_cond(struct qinst *inst);
void vir_set_pf(struct v3d_compile *c, struct qinst *inst, enum v3d_qpu_pf pf);
void vir_set_uf(struct v3d_compile *c, struct qinst *inst, enum v3d_qpu_uf uf);
void vir_set_unpack(struct qinst *inst, int src,
@@ -988,7 +1057,6 @@ void vir_set_unpack(struct qinst *inst, int src,
void vir_set_pack(struct qinst *inst, enum v3d_qpu_output_pack pack);
struct qreg vir_get_temp(struct v3d_compile *c);
-void vir_emit_last_thrsw(struct v3d_compile *c);
void vir_calculate_live_intervals(struct v3d_compile *c);
int vir_get_nsrc(struct qinst *inst);
bool vir_has_side_effects(struct v3d_compile *c, struct qinst *inst);
@@ -1216,6 +1284,8 @@ VIR_A_ALU1(NEG)
VIR_A_ALU1(FLAPUSH)
VIR_A_ALU1(FLBPUSH)
VIR_A_ALU1(FLPOP)
+VIR_A_ALU0(FLAFIRST)
+VIR_A_ALU0(FLNAFIRST)
VIR_A_ALU1(SETMSF)
VIR_A_ALU1(SETREVF)
VIR_A_ALU0(TIDX)
@@ -1345,30 +1415,6 @@ vir_TLB_COLOR_READ(struct v3d_compile *c)
return vir_emit_def(c, ldtlb);
}
-/*
-static inline struct qreg
-vir_LOAD_IMM(struct v3d_compile *c, uint32_t val)
-{
- return vir_emit_def(c, vir_inst(QOP_LOAD_IMM, c->undef,
- vir_reg(QFILE_LOAD_IMM, val), c->undef));
-}
-
-static inline struct qreg
-vir_LOAD_IMM_U2(struct v3d_compile *c, uint32_t val)
-{
- return vir_emit_def(c, vir_inst(QOP_LOAD_IMM_U2, c->undef,
- vir_reg(QFILE_LOAD_IMM, val),
- c->undef));
-}
-static inline struct qreg
-vir_LOAD_IMM_I2(struct v3d_compile *c, uint32_t val)
-{
- return vir_emit_def(c, vir_inst(QOP_LOAD_IMM_I2, c->undef,
- vir_reg(QFILE_LOAD_IMM, val),
- c->undef));
-}
-*/
-
static inline struct qinst *
vir_BRANCH(struct v3d_compile *c, enum v3d_qpu_branch_cond cond)
{
diff --git a/lib/mesa/src/broadcom/compiler/v3d_nir_lower_io.c b/lib/mesa/src/broadcom/compiler/v3d_nir_lower_io.c
index 655f74fd4..895b1a391 100644
--- a/lib/mesa/src/broadcom/compiler/v3d_nir_lower_io.c
+++ b/lib/mesa/src/broadcom/compiler/v3d_nir_lower_io.c
@@ -24,6 +24,8 @@
#include "compiler/v3d_compiler.h"
#include "compiler/nir/nir_builder.h"
+#include "util/u_helpers.h"
+
/**
* Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
* intrinsics into something amenable to the V3D architecture.
@@ -325,6 +327,59 @@ v3d_nir_lower_vertex_input(struct v3d_compile *c, nir_builder *b,
nir_intrinsic_set_component(instr, (comp + 2) % 4);
}
+/* Sometimes the origin of gl_PointCoord is in the upper left rather than the
+ * lower left so we need to flip it.
+ *
+ * This is needed for Vulkan, Gallium uses lower_wpos_pntc.
+ */
+static void
+v3d_nir_lower_fragment_input(struct v3d_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
+
+ /* Gallium uses lower_wpos_pntc */
+ if (c->key->environment == V3D_ENVIRONMENT_OPENGL)
+ return;
+
+ b->cursor = nir_after_instr(&intr->instr);
+
+ int comp = nir_intrinsic_component(intr);
+
+ nir_variable *input_var =
+ nir_find_variable_with_driver_location(c->s,
+ nir_var_shader_in,
+ nir_intrinsic_base(intr));
+
+ if (input_var && util_varying_is_point_coord(input_var->data.location,
+ c->fs_key->point_sprite_mask)) {
+ assert(intr->num_components == 1);
+
+ nir_ssa_def *result = &intr->dest.ssa;
+
+ switch (comp) {
+ case 0:
+ case 1:
+ if (!c->fs_key->is_points)
+ result = nir_imm_float(b, 0.0);
+ break;
+ case 2:
+ result = nir_imm_float(b, 0.0);
+ break;
+ case 3:
+ result = nir_imm_float(b, 1.0);
+ break;
+ }
+ if (c->fs_key->point_coord_upper_left && comp == 1)
+ result = nir_fsub(b, nir_imm_float(b, 1.0), result);
+ if (result != &intr->dest.ssa) {
+ nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
+ result,
+ result->parent_instr);
+ }
+ }
+}
+
static void
v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b,
struct nir_instr *instr,
@@ -338,6 +393,8 @@ v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b,
case nir_intrinsic_load_input:
if (c->s->info.stage == MESA_SHADER_VERTEX)
v3d_nir_lower_vertex_input(c, b, intr);
+ else if (c->s->info.stage == MESA_SHADER_FRAGMENT)
+ v3d_nir_lower_fragment_input(c, b, intr);
break;
case nir_intrinsic_load_uniform:
diff --git a/lib/mesa/src/broadcom/compiler/v3d_nir_lower_logic_ops.c b/lib/mesa/src/broadcom/compiler/v3d_nir_lower_logic_ops.c
index 2cd613b26..11782c734 100644
--- a/lib/mesa/src/broadcom/compiler/v3d_nir_lower_logic_ops.c
+++ b/lib/mesa/src/broadcom/compiler/v3d_nir_lower_logic_ops.c
@@ -202,12 +202,23 @@ v3d_get_format_swizzle_for_rt(struct v3d_compile *c, int rt)
}
static nir_ssa_def *
-v3d_nir_get_tlb_color(nir_builder *b, int rt, int sample)
+v3d_nir_get_tlb_color(nir_builder *b, struct v3d_compile *c, int rt, int sample)
{
- nir_ssa_def *color[4];
- for (int i = 0; i < 4; i++)
- color[i] = nir_load_tlb_color_v3d(b, 1, 32, nir_imm_int(b, rt), .base = sample, .component = i);
+ uint32_t num_components =
+ util_format_get_nr_components(c->fs_key->color_fmt[rt].format);
+ nir_ssa_def *color[4];
+ for (int i = 0; i < 4; i++) {
+ if (i < num_components) {
+ color[i] =
+ nir_load_tlb_color_v3d(b, 1, 32, nir_imm_int(b, rt),
+ .base = sample,
+ .component = i);
+ } else {
+ /* These will be DCEd */
+ color[i] = nir_imm_int(b, 0);
+ }
+ }
return nir_vec4(b, color[0], color[1], color[2], color[3]);
}
@@ -224,6 +235,22 @@ v3d_emit_logic_op_raw(struct v3d_compile *c, nir_builder *b,
nir_ssa_def *dst =
v3d_nir_get_swizzled_channel(b, dst_chans, fmt_swz[i]);
op_res[i] = v3d_logicop(b, c->fs_key->logicop_func, src, dst);
+
+ /* In Vulkan we configure our integer RTs to clamp, so we need
+ * to ignore result bits that don't fit in the destination RT
+ * component size.
+ */
+ if (c->key->environment == V3D_ENVIRONMENT_VULKAN) {
+ uint32_t bits =
+ util_format_get_component_bits(
+ c->fs_key->color_fmt[rt].format,
+ UTIL_FORMAT_COLORSPACE_RGB, i);
+ if (bits > 0 && bits < 32) {
+ nir_ssa_def *mask =
+ nir_imm_int(b, (1u << bits) - 1);
+ op_res[i] = nir_iand(b, op_res[i], mask);
+ }
+ }
}
nir_ssa_def *r[4];
@@ -257,7 +284,7 @@ static nir_ssa_def *
v3d_nir_emit_logic_op(struct v3d_compile *c, nir_builder *b,
nir_ssa_def *src, int rt, int sample)
{
- nir_ssa_def *dst = v3d_nir_get_tlb_color(b, rt, sample);
+ nir_ssa_def *dst = v3d_nir_get_tlb_color(b, c, rt, sample);
nir_ssa_def *src_chans[4], *dst_chans[4];
for (unsigned i = 0; i < 4; i++) {
diff --git a/lib/mesa/src/broadcom/compiler/v3d_nir_lower_robust_buffer_access.c b/lib/mesa/src/broadcom/compiler/v3d_nir_lower_robust_buffer_access.c
index e6a226b03..40f1cc23b 100644
--- a/lib/mesa/src/broadcom/compiler/v3d_nir_lower_robust_buffer_access.c
+++ b/lib/mesa/src/broadcom/compiler/v3d_nir_lower_robust_buffer_access.c
@@ -56,7 +56,7 @@ lower_load(struct v3d_compile *c,
nir_builder *b,
nir_intrinsic_instr *instr)
{
- uint32_t index = nir_src_as_uint(instr->src[0]);
+ uint32_t index = nir_src_comp_as_uint(instr->src[0], 0);
nir_intrinsic_op op;
if (instr->intrinsic == nir_intrinsic_load_ubo) {
@@ -75,7 +75,7 @@ lower_store(struct v3d_compile *c,
nir_builder *b,
nir_intrinsic_instr *instr)
{
- uint32_t index = nir_src_as_uint(instr->src[1]);
+ uint32_t index = nir_src_comp_as_uint(instr->src[1], 0);
rewrite_offset(b, instr, index, 2, nir_intrinsic_get_ssbo_size);
}
@@ -84,7 +84,7 @@ lower_atomic(struct v3d_compile *c,
nir_builder *b,
nir_intrinsic_instr *instr)
{
- uint32_t index = nir_src_as_uint(instr->src[0]);
+ uint32_t index = nir_src_comp_as_uint(instr->src[0], 0);
rewrite_offset(b, instr, index, 1, nir_intrinsic_get_ssbo_size);
}
diff --git a/lib/mesa/src/broadcom/compiler/vir.c b/lib/mesa/src/broadcom/compiler/vir.c
index 335a5a8e3..bf75a4da1 100644
--- a/lib/mesa/src/broadcom/compiler/vir.c
+++ b/lib/mesa/src/broadcom/compiler/vir.c
@@ -25,6 +25,7 @@
#include "v3d_compiler.h"
#include "util/u_prim.h"
#include "compiler/nir/nir_schedule.h"
+#include "compiler/nir/nir_builder.h"
int
vir_get_nsrc(struct qinst *inst)
@@ -242,6 +243,19 @@ vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond)
}
}
+enum v3d_qpu_cond
+vir_get_cond(struct qinst *inst)
+{
+ assert(inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU);
+
+ if (vir_is_add(inst))
+ return inst->qpu.flags.ac;
+ else if (vir_is_mul(inst))
+ return inst->qpu.flags.mc;
+ else /* NOP */
+ return V3D_QPU_COND_NONE;
+}
+
void
vir_set_pf(struct v3d_compile *c, struct qinst *inst, enum v3d_qpu_pf pf)
{
@@ -525,7 +539,10 @@ vir_compile_init(const struct v3d_compiler *compiler,
void *debug_output_data),
void *debug_output_data,
int program_id, int variant_id,
+ uint32_t max_threads,
uint32_t min_threads_for_reg_alloc,
+ bool tmu_spilling_allowed,
+ bool disable_loop_unrolling,
bool disable_constant_ubo_load_sorting,
bool disable_tmu_pipelining,
bool fallback_scheduler)
@@ -537,14 +554,17 @@ vir_compile_init(const struct v3d_compiler *compiler,
c->key = key;
c->program_id = program_id;
c->variant_id = variant_id;
- c->threads = 4;
+ c->threads = max_threads;
c->debug_output = debug_output;
c->debug_output_data = debug_output_data;
c->compilation_result = V3D_COMPILATION_SUCCEEDED;
c->min_threads_for_reg_alloc = min_threads_for_reg_alloc;
+ c->tmu_spilling_allowed = tmu_spilling_allowed;
c->fallback_scheduler = fallback_scheduler;
c->disable_tmu_pipelining = disable_tmu_pipelining;
c->disable_constant_ubo_load_sorting = disable_constant_ubo_load_sorting;
+ c->disable_loop_unrolling = V3D_DEBUG & V3D_DEBUG_NO_LOOP_UNROLL
+ ? true : disable_loop_unrolling;
s = nir_shader_clone(c, s);
c->s = s;
@@ -754,6 +774,9 @@ v3d_gs_set_prog_data(struct v3d_compile *c,
prog_data->out_prim_type = c->s->info.gs.output_primitive;
prog_data->num_invocations = c->s->info.gs.invocations;
+
+ prog_data->writes_psiz =
+ c->s->info.outputs_written & (1 << VARYING_SLOT_PSIZ);
}
static void
@@ -791,6 +814,7 @@ v3d_fs_set_prog_data(struct v3d_compile *c,
prog_data->lock_scoreboard_on_first_thrsw =
c->lock_scoreboard_on_first_thrsw;
prog_data->force_per_sample_msaa = c->force_per_sample_msaa;
+ prog_data->uses_pid = c->fs_uses_primitive_id;
}
static void
@@ -799,9 +823,11 @@ v3d_cs_set_prog_data(struct v3d_compile *c,
{
prog_data->shared_size = c->s->info.shared_size;
- prog_data->local_size[0] = c->s->info.cs.local_size[0];
- prog_data->local_size[1] = c->s->info.cs.local_size[1];
- prog_data->local_size[2] = c->s->info.cs.local_size[2];
+ prog_data->local_size[0] = c->s->info.workgroup_size[0];
+ prog_data->local_size[1] = c->s->info.workgroup_size[1];
+ prog_data->local_size[2] = c->s->info.workgroup_size[2];
+
+ prog_data->has_subgroups = c->has_subgroups;
}
static void
@@ -812,6 +838,7 @@ v3d_set_prog_data(struct v3d_compile *c,
prog_data->single_seg = !c->last_thrsw;
prog_data->spill_size = c->spill_size;
prog_data->tmu_dirty_rcl = c->tmu_dirty_rcl;
+ prog_data->has_control_barrier = c->s->info.uses_control_barrier;
v3d_set_prog_data_uniforms(c, prog_data);
@@ -866,7 +893,7 @@ v3d_nir_lower_vs_early(struct v3d_compile *c)
NIR_PASS_V(c->s, nir_remove_unused_io_vars,
nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
- v3d_optimize_nir(c->s);
+ v3d_optimize_nir(c, c->s);
NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
/* This must go before nir_lower_io */
@@ -900,7 +927,7 @@ v3d_nir_lower_gs_early(struct v3d_compile *c)
NIR_PASS_V(c->s, nir_remove_unused_io_vars,
nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
- v3d_optimize_nir(c->s);
+ v3d_optimize_nir(c, c->s);
NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
/* This must go before nir_lower_io */
@@ -958,14 +985,6 @@ v3d_nir_lower_fs_early(struct v3d_compile *c)
/* The lowering pass can introduce new sysval reads */
nir_shader_gather_info(c->s, nir_shader_get_entrypoint(c->s));
}
-
- /* If the shader has no non-TLB side effects, we can promote it to
- * enabling early_fragment_tests even if the user didn't.
- */
- if (!(c->s->info.num_images ||
- c->s->info.num_ssbos)) {
- c->s->info.fs.early_fragment_tests = true;
- }
}
static void
@@ -1330,11 +1349,10 @@ v3d_nir_sort_constant_ubo_loads_block(struct v3d_compile *c,
static bool
v3d_nir_sort_constant_ubo_loads(nir_shader *s, struct v3d_compile *c)
{
- bool progress = false;
nir_foreach_function(function, s) {
if (function->impl) {
nir_foreach_block(block, function->impl) {
- progress |=
+ c->sorted_any_ubo_loads |=
v3d_nir_sort_constant_ubo_loads_block(c, block);
}
nir_metadata_preserve(function->impl,
@@ -1342,6 +1360,77 @@ v3d_nir_sort_constant_ubo_loads(nir_shader *s, struct v3d_compile *c)
nir_metadata_dominance);
}
}
+ return c->sorted_any_ubo_loads;
+}
+
+static void
+lower_load_num_subgroups(struct v3d_compile *c,
+ nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ assert(c->s->info.stage == MESA_SHADER_COMPUTE);
+ assert(intr->intrinsic == nir_intrinsic_load_num_subgroups);
+
+ b->cursor = nir_after_instr(&intr->instr);
+ uint32_t num_subgroups =
+ DIV_ROUND_UP(c->s->info.workgroup_size[0] *
+ c->s->info.workgroup_size[1] *
+ c->s->info.workgroup_size[2], V3D_CHANNELS);
+ nir_ssa_def *result = nir_imm_int(b, num_subgroups);
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
+ nir_instr_remove(&intr->instr);
+}
+
+static bool
+lower_subgroup_intrinsics(struct v3d_compile *c,
+ nir_block *block, nir_builder *b)
+{
+ bool progress = false;
+ nir_foreach_instr_safe(inst, block) {
+ if (inst->type != nir_instr_type_intrinsic)
+ continue;;
+
+ nir_intrinsic_instr *intr =
+ nir_instr_as_intrinsic(inst);
+ if (!intr)
+ continue;
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_num_subgroups:
+ lower_load_num_subgroups(c, b, intr);
+ progress = true;
+ FALLTHROUGH;
+ case nir_intrinsic_load_subgroup_id:
+ case nir_intrinsic_load_subgroup_size:
+ case nir_intrinsic_load_subgroup_invocation:
+ case nir_intrinsic_elect:
+ c->has_subgroups = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return progress;
+}
+
+static bool
+v3d_nir_lower_subgroup_intrinsics(nir_shader *s, struct v3d_compile *c)
+{
+ bool progress = false;
+ nir_foreach_function(function, s) {
+ if (function->impl) {
+ nir_builder b;
+ nir_builder_init(&b, function->impl);
+
+ nir_foreach_block(block, function->impl)
+ progress |= lower_subgroup_intrinsics(c, block, &b);
+
+ nir_metadata_preserve(function->impl,
+ nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+ }
return progress;
}
@@ -1405,18 +1494,21 @@ v3d_attempt_compile(struct v3d_compile *c)
if (c->key->robust_buffer_access) {
/* v3d_nir_lower_robust_buffer_access assumes constant buffer
- * indices on ubo/ssbo intrinsics so run a copy propagation pass
- * before we run the lowering to warrant this. We also want to run
- * the lowering before v3d_optimize to clean-up redundant
- * get_buffer_size calls produced in the pass.
+ * indices on ubo/ssbo intrinsics so run copy propagation and
+ * constant folding passes before we run the lowering to warrant
+ * this. We also want to run the lowering before v3d_optimize to
+ * clean-up redundant get_buffer_size calls produced in the pass.
*/
NIR_PASS_V(c->s, nir_copy_prop);
+ NIR_PASS_V(c->s, nir_opt_constant_folding);
NIR_PASS_V(c->s, v3d_nir_lower_robust_buffer_access, c);
}
NIR_PASS_V(c->s, nir_lower_wrmasks, should_split_wrmask, c->s);
- v3d_optimize_nir(c->s);
+ NIR_PASS_V(c->s, v3d_nir_lower_subgroup_intrinsics, c);
+
+ v3d_optimize_nir(c, c->s);
/* Do late algebraic optimization to turn add(a, neg(b)) back into
* subs, then the mandatory cleanup after algebraic. Note that it may
@@ -1505,6 +1597,83 @@ int v3d_shaderdb_dump(struct v3d_compile *c,
c->nop_count);
}
+/* This is a list of incremental changes to the compilation strategy
+ * that will be used to try to compile the shader successfully. The
+ * default strategy is to enable all optimizations which will have
+ * the highest register pressure but is expected to produce most
+ * optimal code. Following strategies incrementally disable specific
+ * optimizations that are known to contribute to register pressure
+ * in order to be able to compile the shader successfully while meeting
+ * thread count requirements.
+ *
+ * V3D 4.1+ has a min thread count of 2, but we can use 1 here to also
+ * cover previous hardware as well (meaning that we are not limiting
+ * register allocation to any particular thread count). This is fine
+ * because v3d_nir_to_vir will cap this to the actual minimum.
+ */
+struct v3d_compiler_strategy {
+ const char *name;
+ uint32_t max_threads;
+ uint32_t min_threads;
+ bool disable_loop_unrolling;
+ bool disable_ubo_load_sorting;
+ bool disable_tmu_pipelining;
+ bool tmu_spilling_allowed;
+} static const strategies[] = {
+ /*0*/ { "default", 4, 4, false, false, false, false },
+ /*1*/ { "disable loop unrolling", 4, 4, true, false, false, false },
+ /*2*/ { "disable UBO load sorting", 4, 4, true, true, false, false },
+ /*3*/ { "disable TMU pipelining", 4, 4, true, true, true, false },
+ /*4*/ { "lower thread count", 2, 1, false, false, false, false },
+ /*5*/ { "disable loop unrolling (ltc)", 2, 1, true, false, false, false },
+ /*6*/ { "disable UBO load sorting (ltc)", 2, 1, true, true, false, false },
+ /*7*/ { "disable TMU pipelining (ltc)", 2, 1, true, true, true, true },
+ /*8*/ { "fallback scheduler", 2, 1, true, true, true, true }
+};
+
+/**
+ * If a particular optimization didn't make any progress during a compile
+ * attempt disabling it alone won't allow us to compile the shader successfuly,
+ * since we'll end up with the same code. Detect these scenarios so we can
+ * avoid wasting time with useless compiles. We should also consider if the
+ * strategy changes other aspects of the compilation process though, like
+ * spilling, and not skip it in that case.
+ */
+static bool
+skip_compile_strategy(struct v3d_compile *c, uint32_t idx)
+{
+ /* We decide if we can skip a strategy based on the optimizations that
+ * were active in the previous strategy, so we should only be calling this
+ * for strategies after the first.
+ */
+ assert(idx > 0);
+
+ /* Don't skip a strategy that changes spilling behavior */
+ if (strategies[idx].tmu_spilling_allowed !=
+ strategies[idx - 1].tmu_spilling_allowed) {
+ return false;
+ }
+
+ switch (idx) {
+ /* Loop unrolling: skip if we didn't unroll any loops */
+ case 1:
+ case 5:
+ return !c->unrolled_any_loops;
+ /* UBO load sorting: skip if we didn't sort any loads */
+ case 2:
+ case 6:
+ return !c->sorted_any_ubo_loads;
+ /* TMU pipelining: skip if we didn't pipeline any TMU ops */
+ case 3:
+ case 7:
+ return !c->pipelined_any_tmu;
+ /* Lower thread count: skip if we already tried less that 4 threads */
+ case 4:
+ return c->threads < 4;
+ default:
+ return false;
+ };
+}
uint64_t *v3d_compile(const struct v3d_compiler *compiler,
struct v3d_key *key,
struct v3d_prog_data **out_prog_data,
@@ -1515,40 +1684,41 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
int program_id, int variant_id,
uint32_t *final_assembly_size)
{
- struct v3d_compile *c;
+ struct v3d_compile *c = NULL;
+ for (int i = 0; i < ARRAY_SIZE(strategies); i++) {
+ /* Fallback strategy */
+ if (i > 0) {
+ assert(c);
+ if (skip_compile_strategy(c, i))
+ continue;
- /* This is a list of incremental changes to the compilation strategy
- * that will be used to try to compile the shader successfully. The
- * default strategy is to enable all optimizations which will have
- * the highest register pressure but is expected to produce most
- * optimal code. Following strategies incrementally disable specific
- * optimizations that are known to contribute to register pressure
- * in order to be able to compile the shader successfully while meeting
- * thread count requirements.
- *
- * V3D 4.1+ has a min thread count of 2, but we can use 1 here to also
- * cover previous hardware as well (meaning that we are not limiting
- * register allocation to any particular thread count). This is fine
- * because v3d_nir_to_vir will cap this to the actual minimum.
- */
- struct v3d_compiler_strategy {
- const char *name;
- uint32_t min_threads_for_reg_alloc;
- } static const strategies[] = {
- { "default", 4 },
- { "disable UBO load sorting", 1 },
- { "disable TMU pipelining", 1 },
- { "fallback scheduler", 1 }
- };
+ char *debug_msg;
+ int ret = asprintf(&debug_msg,
+ "Falling back to strategy '%s' for %s",
+ strategies[i].name,
+ vir_get_stage_name(c));
+
+ if (ret >= 0) {
+ if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF))
+ fprintf(stderr, "%s\n", debug_msg);
+
+ c->debug_output(debug_msg, c->debug_output_data);
+ free(debug_msg);
+ }
+
+ vir_compile_destroy(c);
+ }
- for (int i = 0; i < ARRAY_SIZE(strategies); i++) {
c = vir_compile_init(compiler, key, s,
debug_output, debug_output_data,
program_id, variant_id,
- strategies[i].min_threads_for_reg_alloc,
- i > 0, /* Disable UBO load sorting */
- i > 1, /* Disable TMU pipelining */
- i > 2 /* Fallback_scheduler */);
+ strategies[i].max_threads,
+ strategies[i].min_threads,
+ strategies[i].tmu_spilling_allowed,
+ strategies[i].disable_loop_unrolling,
+ strategies[i].disable_ubo_load_sorting,
+ strategies[i].disable_tmu_pipelining,
+ i == ARRAY_SIZE(strategies) - 1);
v3d_attempt_compile(c);
@@ -1557,23 +1727,6 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
V3D_COMPILATION_FAILED_REGISTER_ALLOCATION) {
break;
}
-
- /* Fallback strategy */
- char *debug_msg;
- int ret = asprintf(&debug_msg,
- "Falling back to strategy '%s' for %s",
- strategies[i + 1].name,
- vir_get_stage_name(c));
-
- if (ret >= 0) {
- if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF))
- fprintf(stderr, "%s\n", debug_msg);
-
- c->debug_output(debug_msg, c->debug_output_data);
- free(debug_msg);
- }
-
- vir_compile_destroy(c);
}
if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF) &&
@@ -1717,6 +1870,24 @@ try_opt_ldunif(struct v3d_compile *c, uint32_t index, struct qreg *unif)
{
uint32_t count = 20;
struct qinst *prev_inst = NULL;
+ assert(c->cur_block);
+
+#ifdef DEBUG
+ /* We can only reuse a uniform if it was emitted in the same block,
+ * so callers must make sure the current instruction is being emitted
+ * in the current block.
+ */
+ bool found = false;
+ vir_for_each_inst(inst, c->cur_block) {
+ if (&inst->link == c->cursor.link) {
+ found = true;
+ break;
+ }
+ }
+
+ assert(found || &c->cur_block->instructions == c->cursor.link);
+#endif
+
list_for_each_entry_from_rev(struct qinst, inst, c->cursor.link->prev,
&c->cur_block->instructions, link) {
if ((inst->qpu.sig.ldunif || inst->qpu.sig.ldunifrf) &&
@@ -1817,3 +1988,174 @@ vir_get_stage_name(struct v3d_compile *c)
else
return gl_shader_stage_name(c->s->info.stage);
}
+
+static inline uint32_t
+compute_vpm_size_in_sectors(const struct v3d_device_info *devinfo)
+{
+ assert(devinfo->vpm_size > 0);
+ const uint32_t sector_size = V3D_CHANNELS * sizeof(uint32_t) * 8;
+ return devinfo->vpm_size / sector_size;
+}
+
+/* Computes various parameters affecting VPM memory configuration for programs
+ * involving geometry shaders to ensure the program fits in memory and honors
+ * requirements described in section "VPM usage" of the programming manual.
+ */
+static bool
+compute_vpm_config_gs(struct v3d_device_info *devinfo,
+ struct v3d_vs_prog_data *vs,
+ struct v3d_gs_prog_data *gs,
+ struct vpm_config *vpm_cfg_out)
+{
+ const uint32_t A = vs->separate_segments ? 1 : 0;
+ const uint32_t Ad = vs->vpm_input_size;
+ const uint32_t Vd = vs->vpm_output_size;
+
+ const uint32_t vpm_size = compute_vpm_size_in_sectors(devinfo);
+
+ /* Try to fit program into our VPM memory budget by adjusting
+ * configurable parameters iteratively. We do this in two phases:
+ * the first phase tries to fit the program into the total available
+ * VPM memory. If we succeed at that, then the second phase attempts
+ * to fit the program into half of that budget so we can run bin and
+ * render programs in parallel.
+ */
+ struct vpm_config vpm_cfg[2];
+ struct vpm_config *final_vpm_cfg = NULL;
+ uint32_t phase = 0;
+
+ vpm_cfg[phase].As = 1;
+ vpm_cfg[phase].Gs = 1;
+ vpm_cfg[phase].Gd = gs->vpm_output_size;
+ vpm_cfg[phase].gs_width = gs->simd_width;
+
+ /* While there is a requirement that Vc >= [Vn / 16], this is
+ * always the case when tessellation is not present because in that
+ * case Vn can only be 6 at most (when input primitive is triangles
+ * with adjacency).
+ *
+ * We always choose Vc=2. We can't go lower than this due to GFXH-1744,
+ * and Broadcom has not found it worth it to increase it beyond this
+ * in general. Increasing Vc also increases VPM memory pressure which
+ * can turn up being detrimental for performance in some scenarios.
+ */
+ vpm_cfg[phase].Vc = 2;
+
+ /* Gv is a constraint on the hardware to not exceed the
+ * specified number of vertex segments per GS batch. If adding a
+ * new primitive to a GS batch would result in a range of more
+ * than Gv vertex segments being referenced by the batch, then
+ * the hardware will flush the batch and start a new one. This
+ * means that we can choose any value we want, we just need to
+ * be aware that larger values improve GS batch utilization
+ * at the expense of more VPM memory pressure (which can affect
+ * other performance aspects, such as GS dispatch width).
+ * We start with the largest value, and will reduce it if we
+ * find that total memory pressure is too high.
+ */
+ vpm_cfg[phase].Gv = 3;
+ do {
+ /* When GS is present in absence of TES, then we need to satisfy
+ * that Ve >= Gv. We go with the smallest value of Ve to avoid
+ * increasing memory pressure.
+ */
+ vpm_cfg[phase].Ve = vpm_cfg[phase].Gv;
+
+ uint32_t vpm_sectors =
+ A * vpm_cfg[phase].As * Ad +
+ (vpm_cfg[phase].Vc + vpm_cfg[phase].Ve) * Vd +
+ vpm_cfg[phase].Gs * vpm_cfg[phase].Gd;
+
+ /* Ideally we want to use no more than half of the available
+ * memory so we can execute a bin and render program in parallel
+ * without stalls. If we achieved that then we are done.
+ */
+ if (vpm_sectors <= vpm_size / 2) {
+ final_vpm_cfg = &vpm_cfg[phase];
+ break;
+ }
+
+ /* At the very least, we should not allocate more than the
+ * total available VPM memory. If we have a configuration that
+ * succeeds at this we save it and continue to see if we can
+ * meet the half-memory-use criteria too.
+ */
+ if (phase == 0 && vpm_sectors <= vpm_size) {
+ vpm_cfg[1] = vpm_cfg[0];
+ phase = 1;
+ }
+
+ /* Try lowering Gv */
+ if (vpm_cfg[phase].Gv > 0) {
+ vpm_cfg[phase].Gv--;
+ continue;
+ }
+
+ /* Try lowering GS dispatch width */
+ if (vpm_cfg[phase].gs_width > 1) {
+ do {
+ vpm_cfg[phase].gs_width >>= 1;
+ vpm_cfg[phase].Gd = align(vpm_cfg[phase].Gd, 2) / 2;
+ } while (vpm_cfg[phase].gs_width == 2);
+
+ /* Reset Gv to max after dropping dispatch width */
+ vpm_cfg[phase].Gv = 3;
+ continue;
+ }
+
+ /* We ran out of options to reduce memory pressure. If we
+ * are at phase 1 we have at least a valid configuration, so we
+ * we use that.
+ */
+ if (phase == 1)
+ final_vpm_cfg = &vpm_cfg[0];
+ break;
+ } while (true);
+
+ if (!final_vpm_cfg)
+ return false;
+
+ assert(final_vpm_cfg);
+ assert(final_vpm_cfg->Gd <= 16);
+ assert(final_vpm_cfg->Gv < 4);
+ assert(final_vpm_cfg->Ve < 4);
+ assert(final_vpm_cfg->Vc >= 2 && final_vpm_cfg->Vc <= 4);
+ assert(final_vpm_cfg->gs_width == 1 ||
+ final_vpm_cfg->gs_width == 4 ||
+ final_vpm_cfg->gs_width == 8 ||
+ final_vpm_cfg->gs_width == 16);
+
+ *vpm_cfg_out = *final_vpm_cfg;
+ return true;
+}
+
+bool
+v3d_compute_vpm_config(struct v3d_device_info *devinfo,
+ struct v3d_vs_prog_data *vs_bin,
+ struct v3d_vs_prog_data *vs,
+ struct v3d_gs_prog_data *gs_bin,
+ struct v3d_gs_prog_data *gs,
+ struct vpm_config *vpm_cfg_bin,
+ struct vpm_config *vpm_cfg)
+{
+ assert(vs && vs_bin);
+ assert((gs != NULL) == (gs_bin != NULL));
+
+ if (!gs) {
+ vpm_cfg_bin->As = 1;
+ vpm_cfg_bin->Ve = 0;
+ vpm_cfg_bin->Vc = vs_bin->vcm_cache_size;
+
+ vpm_cfg->As = 1;
+ vpm_cfg->Ve = 0;
+ vpm_cfg->Vc = vs->vcm_cache_size;
+ } else {
+ if (!compute_vpm_config_gs(devinfo, vs_bin, gs_bin, vpm_cfg_bin))
+ return false;
+
+ if (!compute_vpm_config_gs(devinfo, vs, gs, vpm_cfg))
+ return false;
+ }
+
+ return true;
+}
diff --git a/lib/mesa/src/broadcom/compiler/vir_live_variables.c b/lib/mesa/src/broadcom/compiler/vir_live_variables.c
index 48d0201dc..2fd6430a0 100644
--- a/lib/mesa/src/broadcom/compiler/vir_live_variables.c
+++ b/lib/mesa/src/broadcom/compiler/vir_live_variables.c
@@ -28,9 +28,12 @@
#include "util/register_allocate.h"
#include "v3d_compiler.h"
+/* Keeps track of conditional / partial writes in a block */
struct partial_update_state {
- struct qinst *insts[4];
- uint8_t channels;
+ /* Instruction doing a conditional or partial write */
+ struct qinst *inst;
+ /* Instruction that set the flags for the conditional write */
+ struct qinst *flags_inst;
};
static int
@@ -44,7 +47,8 @@ vir_reg_to_var(struct qreg reg)
static void
vir_setup_use(struct v3d_compile *c, struct qblock *block, int ip,
- struct qreg src)
+ struct partial_update_state *partial_update_ht, struct qinst *inst,
+ struct qreg src, struct qinst *flags_inst)
{
int var = vir_reg_to_var(src);
if (var == -1)
@@ -57,39 +61,39 @@ vir_setup_use(struct v3d_compile *c, struct qblock *block, int ip,
* use of a variable without having completely
* defined that variable within the block.
*/
- if (!BITSET_TEST(block->def, var))
- BITSET_SET(block->use, var);
-}
-
-static struct partial_update_state *
-get_partial_update_state(struct hash_table *partial_update_ht,
- struct qinst *inst)
-{
- struct hash_entry *entry =
- _mesa_hash_table_search(partial_update_ht,
- &inst->dst.index);
- if (entry)
- return entry->data;
-
- struct partial_update_state *state =
- rzalloc(partial_update_ht, struct partial_update_state);
-
- _mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state);
+ if (!BITSET_TEST(block->def, var)) {
+ /* If this use of var is conditional and the condition
+ * and flags match those of a previous instruction
+ * in the same block partially defining var then we
+ * consider var completely defined within the block.
+ */
+ if (BITSET_TEST(block->defout, var)) {
+ struct partial_update_state *state =
+ &partial_update_ht[var];
+ if (state->inst) {
+ if (vir_get_cond(inst) == vir_get_cond(state->inst) &&
+ flags_inst == state->flags_inst) {
+ return;
+ }
+ }
+ }
- return state;
+ BITSET_SET(block->use, var);
+ }
}
+/* The def[] bitset marks when an initialization in a
+ * block completely screens off previous updates of
+ * that variable.
+ */
static void
vir_setup_def(struct v3d_compile *c, struct qblock *block, int ip,
- struct hash_table *partial_update_ht, struct qinst *inst)
+ struct partial_update_state *partial_update, struct qinst *inst,
+ struct qinst *flags_inst)
{
if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
return;
- /* The def[] bitset marks when an initialization in a
- * block completely screens off previous updates of
- * that variable.
- */
int var = vir_reg_to_var(inst->dst);
if (var == -1)
return;
@@ -115,62 +119,22 @@ vir_setup_def(struct v3d_compile *c, struct qblock *block, int ip,
return;
}
- /* Finally, look at the condition code and packing and mark it as a
- * def. We need to make sure that we understand sequences
- * instructions like:
- *
- * mov.zs t0, t1
- * mov.zc t0, t2
+ /* Keep track of conditional writes.
*
- * or:
+ * Notice that the dst's live range for a conditional or partial writes
+ * will get extended up the control flow to the top of the program until
+ * we find a full write, making register allocation more difficult, so
+ * we should try our best to keep track of these and figure out if a
+ * combination of them actually writes the entire register so we can
+ * stop that process early and reduce liveness.
*
- * mmov t0.8a, t1
- * mmov t0.8b, t2
- * mmov t0.8c, t3
- * mmov t0.8d, t4
- *
- * as defining the temp within the block, because otherwise dst's live
- * range will get extended up the control flow to the top of the
- * program.
+ * FIXME: Track partial updates via pack/unpack.
*/
- struct partial_update_state *state =
- get_partial_update_state(partial_update_ht, inst);
- uint8_t mask = 0xf; /* XXX vir_channels_written(inst); */
-
- if (inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
- inst->qpu.flags.mc == V3D_QPU_COND_NONE) {
- state->channels |= mask;
- } else {
- for (int i = 0; i < 4; i++) {
- if (!(mask & (1 << i)))
- continue;
-
- /* XXXif (state->insts[i] &&
- state->insts[i]->cond ==
- qpu_cond_complement(inst->cond))
- state->channels |= 1 << i;
- else
- */
- state->insts[i] = inst;
- }
- }
-
- if (state->channels == 0xf)
- BITSET_SET(block->def, var);
-}
-
-static void
-sf_state_clear(struct hash_table *partial_update_ht)
-{
- hash_table_foreach(partial_update_ht, entry) {
- struct partial_update_state *state = entry->data;
-
- for (int i = 0; i < 4; i++) {
- if (state->insts[i] &&
- (state->insts[i]->qpu.flags.ac != V3D_QPU_COND_NONE ||
- state->insts[i]->qpu.flags.mc != V3D_QPU_COND_NONE))
- state->insts[i] = NULL;
- }
+ struct partial_update_state *state = &partial_update[var];
+ if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
+ inst->qpu.flags.mc != V3D_QPU_COND_NONE) {
+ state->inst = inst;
+ state->flags_inst = flags_inst;
}
}
@@ -184,23 +148,36 @@ sf_state_clear(struct hash_table *partial_update_ht)
static void
vir_setup_def_use(struct v3d_compile *c)
{
- struct hash_table *partial_update_ht =
- _mesa_hash_table_create(c, _mesa_hash_int, _mesa_key_int_equal);
+ struct partial_update_state *partial_update =
+ rzalloc_array(c, struct partial_update_state, c->num_temps);
int ip = 0;
vir_for_each_block(block, c) {
block->start_ip = ip;
- _mesa_hash_table_clear(partial_update_ht, NULL);
+ memset(partial_update, 0,
+ sizeof(struct partial_update_state) * c->num_temps);
+
+ struct qinst *flags_inst = NULL;
vir_for_each_inst(inst, block) {
- for (int i = 0; i < vir_get_nsrc(inst); i++)
- vir_setup_use(c, block, ip, inst->src[i]);
+ for (int i = 0; i < vir_get_nsrc(inst); i++) {
+ vir_setup_use(c, block, ip, partial_update,
+ inst, inst->src[i], flags_inst);
+ }
- vir_setup_def(c, block, ip, partial_update_ht, inst);
+ vir_setup_def(c, block, ip, partial_update,
+ inst, flags_inst);
- if (false /* XXX inst->uf */)
- sf_state_clear(partial_update_ht);
+ if (inst->qpu.flags.apf != V3D_QPU_PF_NONE ||
+ inst->qpu.flags.mpf != V3D_QPU_PF_NONE) {
+ flags_inst = inst;
+ }
+
+ if (inst->qpu.flags.auf != V3D_QPU_UF_NONE ||
+ inst->qpu.flags.muf != V3D_QPU_UF_NONE) {
+ flags_inst = NULL;
+ }
/* Payload registers: r0/1/2 contain W, centroid W,
* and Z at program start. Register allocation will
@@ -221,7 +198,7 @@ vir_setup_def_use(struct v3d_compile *c)
block->end_ip = ip;
}
- _mesa_hash_table_destroy(partial_update_ht, NULL);
+ ralloc_free(partial_update);
}
static bool
diff --git a/lib/mesa/src/broadcom/compiler/vir_opt_dead_code.c b/lib/mesa/src/broadcom/compiler/vir_opt_dead_code.c
index 55469402e..64c762c88 100644
--- a/lib/mesa/src/broadcom/compiler/vir_opt_dead_code.c
+++ b/lib/mesa/src/broadcom/compiler/vir_opt_dead_code.c
@@ -149,25 +149,30 @@ check_first_ldunifa(struct v3d_compile *c,
}
static bool
-increment_unifa_address(struct v3d_compile *c, struct qinst *unifa)
+increment_unifa_address(struct v3d_compile *c, struct qblock *block, struct qinst *unifa)
{
+ struct qblock *current_block = c->cur_block;
if (unifa->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
unifa->qpu.alu.mul.op == V3D_QPU_M_MOV) {
c->cursor = vir_after_inst(unifa);
+ c->cur_block = block;
struct qreg unifa_reg = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA);
vir_ADD_dest(c, unifa_reg, unifa->src[0], vir_uniform_ui(c, 4u));
vir_remove_instruction(c, unifa);
+ c->cur_block = current_block;
return true;
}
if (unifa->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
unifa->qpu.alu.add.op == V3D_QPU_A_ADD) {
c->cursor = vir_after_inst(unifa);
+ c->cur_block = block;
struct qreg unifa_reg = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA);
struct qreg tmp =
vir_ADD(c, unifa->src[1], vir_uniform_ui(c, 4u));
vir_ADD_dest(c, unifa_reg, unifa->src[0], tmp);
vir_remove_instruction(c, unifa);
+ c->cur_block = current_block;
return true;
}
@@ -271,7 +276,7 @@ vir_opt_dead_code(struct v3d_compile *c)
*/
if (is_first_ldunifa) {
assert(unifa);
- if (!increment_unifa_address(c, unifa))
+ if (!increment_unifa_address(c, block, unifa))
continue;
}
diff --git a/lib/mesa/src/broadcom/compiler/vir_opt_redundant_flags.c b/lib/mesa/src/broadcom/compiler/vir_opt_redundant_flags.c
index 8749f3cd6..4609ef9c3 100644
--- a/lib/mesa/src/broadcom/compiler/vir_opt_redundant_flags.c
+++ b/lib/mesa/src/broadcom/compiler/vir_opt_redundant_flags.c
@@ -107,9 +107,14 @@ vir_opt_redundant_flags_block(struct v3d_compile *c, struct qblock *block)
continue;
}
- /* Flags aren't preserved across a thrsw. */
- if (inst->qpu.sig.thrsw)
- last_flags = NULL;
+ /* Flags aren't preserved across a thrsw.
+ *
+ * In V3D 4.2+ flags are preserved across thread switches.
+ */
+ if (c->devinfo->ver < 42) {
+ if (inst->qpu.sig.thrsw)
+ last_flags = NULL;
+ }
if (inst->qpu.flags.apf != V3D_QPU_PF_NONE ||
inst->qpu.flags.mpf != V3D_QPU_PF_NONE) {
diff --git a/lib/mesa/src/broadcom/compiler/vir_register_allocate.c b/lib/mesa/src/broadcom/compiler/vir_register_allocate.c
index 41fc25729..08698b4ec 100644
--- a/lib/mesa/src/broadcom/compiler/vir_register_allocate.c
+++ b/lib/mesa/src/broadcom/compiler/vir_register_allocate.c
@@ -164,10 +164,8 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g,
}
for (unsigned i = 0; i < c->num_temps; i++) {
- int node = temp_to_node[i];
-
if (BITSET_TEST(c->spillable, i))
- ra_set_node_spill_cost(g, node, spill_costs[i]);
+ ra_set_node_spill_cost(g, temp_to_node[i], spill_costs[i]);
}
return ra_get_best_spill_node(g);
@@ -179,7 +177,12 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g,
void
v3d_setup_spill_base(struct v3d_compile *c)
{
- c->cursor = vir_before_block(vir_entry_block(c));
+ /* Setting up the spill base is done in the entry block; so change
+ * both the current block to emit and the cursor.
+ */
+ struct qblock *current_block = c->cur_block;
+ c->cur_block = vir_entry_block(c);
+ c->cursor = vir_before_block(c->cur_block);
int start_num_temps = c->num_temps;
@@ -206,16 +209,16 @@ v3d_setup_spill_base(struct v3d_compile *c)
for (int i = start_num_temps; i < c->num_temps; i++)
BITSET_CLEAR(c->spillable, i);
+ /* Restore the current block. */
+ c->cur_block = current_block;
c->cursor = vir_after_block(c->cur_block);
}
-static void
+static struct qinst *
v3d_emit_spill_tmua(struct v3d_compile *c, uint32_t spill_offset)
{
- vir_ADD_dest(c, vir_reg(QFILE_MAGIC,
- V3D_QPU_WADDR_TMUA),
- c->spill_base,
- vir_uniform_ui(c, spill_offset));
+ return vir_ADD_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUA),
+ c->spill_base, vir_uniform_ui(c, spill_offset));
}
@@ -223,12 +226,17 @@ static void
v3d_emit_tmu_spill(struct v3d_compile *c, struct qinst *inst,
struct qinst *position, uint32_t spill_offset)
{
+ assert(inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU);
+
c->cursor = vir_after_inst(position);
- inst->dst.index = c->num_temps++;
- vir_MOV_dest(c, vir_reg(QFILE_MAGIC,
- V3D_QPU_WADDR_TMUD),
- inst->dst);
- v3d_emit_spill_tmua(c, spill_offset);
+ inst->dst = vir_get_temp(c);
+ enum v3d_qpu_cond cond = vir_get_cond(inst);
+ struct qinst *tmp =
+ vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUD),
+ inst->dst);
+ tmp->qpu.flags.mc = cond;
+ tmp = v3d_emit_spill_tmua(c, spill_offset);
+ tmp->qpu.flags.ac = cond;
vir_emit_thrsw(c);
vir_TMUWT(c);
c->spills++;
@@ -253,7 +261,7 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp)
}
struct qinst *last_thrsw = c->last_thrsw;
- assert(!last_thrsw || last_thrsw->is_last_thrsw);
+ assert(last_thrsw && last_thrsw->is_last_thrsw);
int start_num_temps = c->num_temps;
@@ -339,29 +347,13 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp)
spill_offset);
}
}
-
- /* If we didn't have a last-thrsw inserted by nir_to_vir and
- * we've been inserting thrsws, then insert a new last_thrsw
- * right before we start the vpm/tlb sequence for the last
- * thread segment.
- */
- if (!is_uniform && !last_thrsw && c->last_thrsw &&
- (v3d_qpu_writes_vpm(&inst->qpu) ||
- v3d_qpu_uses_tlb(&inst->qpu))) {
- c->cursor = vir_before_inst(inst);
- vir_emit_thrsw(c);
-
- last_thrsw = c->last_thrsw;
- last_thrsw->is_last_thrsw = true;
- }
}
}
/* Make sure c->last_thrsw is the actual last thrsw, not just one we
* inserted in our most recent unspill.
*/
- if (last_thrsw)
- c->last_thrsw = last_thrsw;
+ c->last_thrsw = last_thrsw;
/* Don't allow spilling of our spilling instructions. There's no way
* they can help get things colored.
@@ -372,27 +364,63 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp)
c->disable_ldunif_opt = had_disable_ldunif_opt;
}
+struct node_to_temp_map {
+ uint32_t temp;
+ uint32_t priority;
+};
+
struct v3d_ra_select_callback_data {
uint32_t next_acc;
uint32_t next_phys;
+ struct node_to_temp_map *map;
};
-static unsigned int
-v3d_ra_select_callback(unsigned int n, BITSET_WORD *regs, void *data)
+/* Choosing accumulators improves chances of merging QPU instructions
+ * due to these merges requiring that at most 2 rf registers are used
+ * by the add and mul instructions.
+ */
+static bool
+v3d_ra_favor_accum(struct v3d_ra_select_callback_data *v3d_ra,
+ BITSET_WORD *regs,
+ int priority)
{
- struct v3d_ra_select_callback_data *v3d_ra = data;
- int r5 = ACC_INDEX + 5;
+ /* Favor accumulators if we have less that this number of physical
+ * registers. Accumulators have more restrictions (like being
+ * invalidated through thrsw), so running out of physical registers
+ * even if we have accumulators available can lead to register
+ * allocation failures.
+ */
+ static const int available_rf_threshold = 5;
+ int available_rf = 0 ;
+ for (int i = 0; i < PHYS_COUNT; i++) {
+ if (BITSET_TEST(regs, PHYS_INDEX + i))
+ available_rf++;
+ if (available_rf >= available_rf_threshold)
+ break;
+ }
+ if (available_rf < available_rf_threshold)
+ return true;
- /* Choose r5 for our ldunifs if possible (nobody else can load to that
- * reg, and it keeps the QPU cond field free from being occupied by
- * ldunifrf).
+ /* Favor accumulators for short-lived temps (our priority represents
+ * liveness), to prevent long-lived temps from grabbing accumulators
+ * and preventing follow-up instructions from using them, potentially
+ * leading to large portions of the shader being unable to use
+ * accumulators and therefore merge instructions successfully.
*/
- if (BITSET_TEST(regs, r5))
- return r5;
+ static const int priority_threshold = 20;
+ if (priority <= priority_threshold)
+ return true;
+
+ return false;
+}
- /* Choose an accumulator if possible (I think it's lower power than
- * phys regs), but round-robin through them to give post-RA
- * instruction selection more options.
+static bool
+v3d_ra_select_accum(struct v3d_ra_select_callback_data *v3d_ra,
+ BITSET_WORD *regs,
+ unsigned int *out)
+{
+ /* Round-robin through our accumulators to give post-RA instruction
+ * selection more options.
*/
for (int i = 0; i < ACC_COUNT; i++) {
int acc_off = (v3d_ra->next_acc + i) % ACC_COUNT;
@@ -400,20 +428,61 @@ v3d_ra_select_callback(unsigned int n, BITSET_WORD *regs, void *data)
if (BITSET_TEST(regs, acc)) {
v3d_ra->next_acc = acc_off + 1;
- return acc;
+ *out = acc;
+ return true;
}
}
+ return false;
+}
+
+static bool
+v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra,
+ BITSET_WORD *regs,
+ unsigned int *out)
+{
for (int i = 0; i < PHYS_COUNT; i++) {
int phys_off = (v3d_ra->next_phys + i) % PHYS_COUNT;
int phys = PHYS_INDEX + phys_off;
if (BITSET_TEST(regs, phys)) {
v3d_ra->next_phys = phys_off + 1;
- return phys;
+ *out = phys;
+ return true;
}
}
+ return false;
+}
+
+static unsigned int
+v3d_ra_select_callback(unsigned int n, BITSET_WORD *regs, void *data)
+{
+ struct v3d_ra_select_callback_data *v3d_ra = data;
+ int r5 = ACC_INDEX + 5;
+
+ /* Choose r5 for our ldunifs if possible (nobody else can load to that
+ * reg, and it keeps the QPU cond field free from being occupied by
+ * ldunifrf).
+ */
+ if (BITSET_TEST(regs, r5))
+ return r5;
+
+ unsigned int reg;
+ if (v3d_ra_favor_accum(v3d_ra, regs, v3d_ra->map[n].priority) &&
+ v3d_ra_select_accum(v3d_ra, regs, &reg)) {
+ return reg;
+ }
+
+ if (v3d_ra_select_rf(v3d_ra, regs, &reg))
+ return reg;
+
+ /* If we ran out of physical registers try to assign an accumulator
+ * if we didn't favor that option earlier.
+ */
+ if (v3d_ra_select_accum(v3d_ra, regs, &reg))
+ return reg;
+
unreachable("RA must pass us at least one possible reg.");
}
@@ -426,44 +495,37 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
int max_thread_index = (compiler->devinfo->ver >= 40 ? 2 : 3);
compiler->regs = ra_alloc_reg_set(compiler, PHYS_INDEX + PHYS_COUNT,
- true);
+ false);
if (!compiler->regs)
return false;
for (int threads = 0; threads < max_thread_index; threads++) {
compiler->reg_class_any[threads] =
- ra_alloc_reg_class(compiler->regs);
+ ra_alloc_contig_reg_class(compiler->regs, 1);
compiler->reg_class_r5[threads] =
- ra_alloc_reg_class(compiler->regs);
+ ra_alloc_contig_reg_class(compiler->regs, 1);
compiler->reg_class_phys_or_acc[threads] =
- ra_alloc_reg_class(compiler->regs);
+ ra_alloc_contig_reg_class(compiler->regs, 1);
compiler->reg_class_phys[threads] =
- ra_alloc_reg_class(compiler->regs);
+ ra_alloc_contig_reg_class(compiler->regs, 1);
for (int i = PHYS_INDEX;
i < PHYS_INDEX + (PHYS_COUNT >> threads); i++) {
- ra_class_add_reg(compiler->regs,
- compiler->reg_class_phys_or_acc[threads], i);
- ra_class_add_reg(compiler->regs,
- compiler->reg_class_phys[threads], i);
- ra_class_add_reg(compiler->regs,
- compiler->reg_class_any[threads], i);
+ ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
+ ra_class_add_reg(compiler->reg_class_phys[threads], i);
+ ra_class_add_reg(compiler->reg_class_any[threads], i);
}
for (int i = ACC_INDEX + 0; i < ACC_INDEX + ACC_COUNT - 1; i++) {
- ra_class_add_reg(compiler->regs,
- compiler->reg_class_phys_or_acc[threads], i);
- ra_class_add_reg(compiler->regs,
- compiler->reg_class_any[threads], i);
+ ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
+ ra_class_add_reg(compiler->reg_class_any[threads], i);
}
/* r5 can only store a single 32-bit value, so not much can
* use it.
*/
- ra_class_add_reg(compiler->regs,
- compiler->reg_class_r5[threads],
+ ra_class_add_reg(compiler->reg_class_r5[threads],
ACC_INDEX + 5);
- ra_class_add_reg(compiler->regs,
- compiler->reg_class_any[threads],
+ ra_class_add_reg(compiler->reg_class_any[threads],
ACC_INDEX + 5);
}
@@ -472,11 +534,6 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
return true;
}
-struct node_to_temp_map {
- uint32_t temp;
- uint32_t priority;
-};
-
static int
node_to_temp_priority(const void *in_a, const void *in_b)
{
@@ -506,15 +563,15 @@ get_spill_batch_size(struct v3d_compile *c)
return 20;
}
-/* Don't emit spills using the TMU until we've dropped thread count first. Also,
- * don't spill if we have enabled any other optimization that can lead to
- * higher register pressure, such as TMU pipelining, we rather recompile without
- * the optimization in that case.
+/* Don't emit spills using the TMU until we've dropped thread count first. We,
+ * may also disable spilling when certain optimizations that are known to
+ * increase register pressure are active so we favor recompiling with
+ * optimizations disabled instead of spilling.
*/
static inline bool
tmu_spilling_allowed(struct v3d_compile *c, int thread_index)
{
- return thread_index == 0 && c->disable_tmu_pipelining;
+ return thread_index == 0 && c->tmu_spilling_allowed;
}
#define CLASS_BIT_PHYS (1 << 0)
@@ -532,6 +589,7 @@ tmu_spilling_allowed(struct v3d_compile *c, int thread_index)
struct qpu_reg *
v3d_register_allocate(struct v3d_compile *c, bool *spilled)
{
+ uint32_t UNUSED start_num_temps = c->num_temps;
struct node_to_temp_map map[c->num_temps];
uint32_t temp_to_node[c->num_temps];
uint8_t class_bits[c->num_temps];
@@ -542,6 +600,7 @@ v3d_register_allocate(struct v3d_compile *c, bool *spilled)
* RF0-2.
*/
.next_phys = 3,
+ .map = map,
};
*spilled = false;
@@ -782,6 +841,12 @@ v3d_register_allocate(struct v3d_compile *c, bool *spilled)
return NULL;
}
+ /* Ensure that we are not accessing temp_to_node out of bounds. We
+ * should never trigger this assertion because `c->num_temps` only
+ * grows when we spill, in which case we return early and don't get
+ * here.
+ */
+ assert(start_num_temps == c->num_temps);
struct qpu_reg *temp_registers = calloc(c->num_temps,
sizeof(*temp_registers));
diff --git a/lib/mesa/src/broadcom/compiler/vir_to_qpu.c b/lib/mesa/src/broadcom/compiler/vir_to_qpu.c
index aa3354542..634b8961b 100644
--- a/lib/mesa/src/broadcom/compiler/vir_to_qpu.c
+++ b/lib/mesa/src/broadcom/compiler/vir_to_qpu.c
@@ -45,12 +45,6 @@ qpu_magic(enum v3d_qpu_waddr waddr)
return reg;
}
-static inline struct qpu_reg
-qpu_acc(int acc)
-{
- return qpu_magic(V3D_QPU_WADDR_R0 + acc);
-}
-
struct v3d_qpu_instr
v3d_qpu_nop(void)
{
@@ -219,8 +213,13 @@ v3d_generate_code_block(struct v3d_compile *c,
src[i] = qpu_magic(qinst->src[i].index);
break;
case QFILE_NULL:
+ /* QFILE_NULL is an undef, so we can load
+ * anything. Using reg 0
+ */
+ src[i] = qpu_reg(0);
+ break;
case QFILE_LOAD_IMM:
- src[i] = qpu_acc(0);
+ assert(!"not reached");
break;
case QFILE_TEMP:
src[i] = temp_registers[index];
@@ -238,7 +237,7 @@ v3d_generate_code_block(struct v3d_compile *c,
temp = new_qpu_nop_before(qinst);
temp->qpu.sig.ldvpm = true;
- src[i] = qpu_acc(3);
+ src[i] = qpu_magic(V3D_QPU_WADDR_R3);
break;
}
}
diff --git a/lib/mesa/src/broadcom/meson.build b/lib/mesa/src/broadcom/meson.build
index f558aaca4..2e1145dd0 100644
--- a/lib/mesa/src/broadcom/meson.build
+++ b/lib/mesa/src/broadcom/meson.build
@@ -50,16 +50,35 @@ foreach ver : v3d_versions
)
endforeach
+v3d_args = ['-DV3D_BUILD_NEON']
+
+v3d_neon_c_args = []
+if host_machine.cpu_family() == 'arm'
+ v3d_neon_c_args = '-mfpu=neon'
+endif
+
+libv3d_neon = static_library(
+ 'v3d_neon',
+ 'common/v3d_tiling.c',
+ include_directories : [
+ inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom,
+ ],
+ c_args : [v3d_args, v3d_neon_c_args],
+ gnu_symbol_visibility : 'hidden',
+ dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers],
+)
+
libbroadcom_v3d = static_library(
'libbroadcom_v3d',
[
- files('common/v3d_debug.c', 'common/v3d_device_info.c', 'clif/clif_dump.c'),
+ files('common/v3d_debug.c', 'common/v3d_device_info.c', 'clif/clif_dump.c', 'common/v3d_util.c'),
v3d_xml_pack,
],
include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_broadcom],
c_args : [no_override_init_args],
gnu_symbol_visibility : 'hidden',
link_whole : v3d_libs + per_version_libs,
+ link_with: [libv3d_neon],
build_by_default : false,
dependencies: [dep_valgrind, dep_thread],
)
diff --git a/lib/mesa/src/broadcom/qpu/qpu_disasm.h b/lib/mesa/src/broadcom/qpu/qpu_disasm.h
index efdf8ddb5..b02ec91d7 100644
--- a/lib/mesa/src/broadcom/qpu/qpu_disasm.h
+++ b/lib/mesa/src/broadcom/qpu/qpu_disasm.h
@@ -21,8 +21,8 @@
* IN THE SOFTWARE.
*/
-#ifndef VC5_QPU_DISASM_H
-#define VC5_QPU_DISASM_H
+#ifndef QPU_DISASM_H
+#define QPU_DISASM_H
#include "broadcom/common/v3d_device_info.h"
@@ -36,4 +36,4 @@ const char *v3d_qpu_disasm(const struct v3d_device_info *devinfo, uint64_t inst)
void v3d_qpu_dump(const struct v3d_device_info *devinfo, const
struct v3d_qpu_instr *instr);
-#endif /* VC5_QPU_DISASM_H */
+#endif /* QPU_DISASM_H */
diff --git a/lib/mesa/src/broadcom/qpu/qpu_instr.c b/lib/mesa/src/broadcom/qpu/qpu_instr.c
index 0bda9a42c..569c5fc40 100644
--- a/lib/mesa/src/broadcom/qpu/qpu_instr.c
+++ b/lib/mesa/src/broadcom/qpu/qpu_instr.c
@@ -137,6 +137,8 @@ v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
[V3D_QPU_A_TMUWT] = "tmuwt",
[V3D_QPU_A_VPMSETUP] = "vpmsetup",
[V3D_QPU_A_VPMWT] = "vpmwt",
+ [V3D_QPU_A_FLAFIRST] = "flafirst",
+ [V3D_QPU_A_FLNAFIRST] = "flnafirst",
[V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in",
[V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out",
[V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in",
@@ -406,6 +408,8 @@ static const uint8_t add_op_args[] = {
[V3D_QPU_A_BARRIERID] = D,
[V3D_QPU_A_TMUWT] = D,
[V3D_QPU_A_VPMWT] = D,
+ [V3D_QPU_A_FLAFIRST] = D,
+ [V3D_QPU_A_FLNAFIRST] = D,
[V3D_QPU_A_VPMSETUP] = D | A,
@@ -930,6 +934,8 @@ v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst)
case V3D_QPU_A_VFLNB:
case V3D_QPU_A_FLAPUSH:
case V3D_QPU_A_FLBPUSH:
+ case V3D_QPU_A_FLAFIRST:
+ case V3D_QPU_A_FLNAFIRST:
return true;
default:
break;
diff --git a/lib/mesa/src/broadcom/qpu/qpu_instr.h b/lib/mesa/src/broadcom/qpu/qpu_instr.h
index a87ed9ff3..4f165e939 100644
--- a/lib/mesa/src/broadcom/qpu/qpu_instr.h
+++ b/lib/mesa/src/broadcom/qpu/qpu_instr.h
@@ -94,7 +94,6 @@ enum v3d_qpu_waddr {
V3D_QPU_WADDR_R3 = 3,
V3D_QPU_WADDR_R4 = 4,
V3D_QPU_WADDR_R5 = 5,
- /* 6 is reserved, but note 3.2.2.8: "Result Writes" */
V3D_QPU_WADDR_NOP = 6,
V3D_QPU_WADDR_TLB = 7,
V3D_QPU_WADDR_TLBU = 8,
@@ -191,6 +190,8 @@ enum v3d_qpu_add_op {
V3D_QPU_A_TMUWT,
V3D_QPU_A_VPMSETUP,
V3D_QPU_A_VPMWT,
+ V3D_QPU_A_FLAFIRST,
+ V3D_QPU_A_FLNAFIRST,
V3D_QPU_A_LDVPMV_IN,
V3D_QPU_A_LDVPMV_OUT,
V3D_QPU_A_LDVPMD_IN,
diff --git a/lib/mesa/src/broadcom/qpu/qpu_pack.c b/lib/mesa/src/broadcom/qpu/qpu_pack.c
index 7502bbfb9..eee1e9f95 100644
--- a/lib/mesa/src/broadcom/qpu/qpu_pack.c
+++ b/lib/mesa/src/broadcom/qpu/qpu_pack.c
@@ -44,65 +44,65 @@
(((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
#endif /* QPU_MASK */
-#define VC5_QPU_OP_MUL_SHIFT 58
-#define VC5_QPU_OP_MUL_MASK QPU_MASK(63, 58)
+#define V3D_QPU_OP_MUL_SHIFT 58
+#define V3D_QPU_OP_MUL_MASK QPU_MASK(63, 58)
-#define VC5_QPU_SIG_SHIFT 53
-#define VC5_QPU_SIG_MASK QPU_MASK(57, 53)
+#define V3D_QPU_SIG_SHIFT 53
+#define V3D_QPU_SIG_MASK QPU_MASK(57, 53)
-#define VC5_QPU_COND_SHIFT 46
-#define VC5_QPU_COND_MASK QPU_MASK(52, 46)
-#define VC5_QPU_COND_SIG_MAGIC_ADDR (1 << 6)
+#define V3D_QPU_COND_SHIFT 46
+#define V3D_QPU_COND_MASK QPU_MASK(52, 46)
+#define V3D_QPU_COND_SIG_MAGIC_ADDR (1 << 6)
-#define VC5_QPU_MM QPU_MASK(45, 45)
-#define VC5_QPU_MA QPU_MASK(44, 44)
+#define V3D_QPU_MM QPU_MASK(45, 45)
+#define V3D_QPU_MA QPU_MASK(44, 44)
#define V3D_QPU_WADDR_M_SHIFT 38
#define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38)
-#define VC5_QPU_BRANCH_ADDR_LOW_SHIFT 35
-#define VC5_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35)
+#define V3D_QPU_BRANCH_ADDR_LOW_SHIFT 35
+#define V3D_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35)
#define V3D_QPU_WADDR_A_SHIFT 32
#define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32)
-#define VC5_QPU_BRANCH_COND_SHIFT 32
-#define VC5_QPU_BRANCH_COND_MASK QPU_MASK(34, 32)
+#define V3D_QPU_BRANCH_COND_SHIFT 32
+#define V3D_QPU_BRANCH_COND_MASK QPU_MASK(34, 32)
-#define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT 24
-#define VC5_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24)
+#define V3D_QPU_BRANCH_ADDR_HIGH_SHIFT 24
+#define V3D_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24)
-#define VC5_QPU_OP_ADD_SHIFT 24
-#define VC5_QPU_OP_ADD_MASK QPU_MASK(31, 24)
+#define V3D_QPU_OP_ADD_SHIFT 24
+#define V3D_QPU_OP_ADD_MASK QPU_MASK(31, 24)
-#define VC5_QPU_MUL_B_SHIFT 21
-#define VC5_QPU_MUL_B_MASK QPU_MASK(23, 21)
+#define V3D_QPU_MUL_B_SHIFT 21
+#define V3D_QPU_MUL_B_MASK QPU_MASK(23, 21)
-#define VC5_QPU_BRANCH_MSFIGN_SHIFT 21
-#define VC5_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21)
+#define V3D_QPU_BRANCH_MSFIGN_SHIFT 21
+#define V3D_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21)
-#define VC5_QPU_MUL_A_SHIFT 18
-#define VC5_QPU_MUL_A_MASK QPU_MASK(20, 18)
+#define V3D_QPU_MUL_A_SHIFT 18
+#define V3D_QPU_MUL_A_MASK QPU_MASK(20, 18)
-#define VC5_QPU_ADD_B_SHIFT 15
-#define VC5_QPU_ADD_B_MASK QPU_MASK(17, 15)
+#define V3D_QPU_ADD_B_SHIFT 15
+#define V3D_QPU_ADD_B_MASK QPU_MASK(17, 15)
-#define VC5_QPU_BRANCH_BDU_SHIFT 15
-#define VC5_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15)
+#define V3D_QPU_BRANCH_BDU_SHIFT 15
+#define V3D_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15)
-#define VC5_QPU_BRANCH_UB QPU_MASK(14, 14)
+#define V3D_QPU_BRANCH_UB QPU_MASK(14, 14)
-#define VC5_QPU_ADD_A_SHIFT 12
-#define VC5_QPU_ADD_A_MASK QPU_MASK(14, 12)
+#define V3D_QPU_ADD_A_SHIFT 12
+#define V3D_QPU_ADD_A_MASK QPU_MASK(14, 12)
-#define VC5_QPU_BRANCH_BDI_SHIFT 12
-#define VC5_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)
+#define V3D_QPU_BRANCH_BDI_SHIFT 12
+#define V3D_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)
-#define VC5_QPU_RADDR_A_SHIFT 6
-#define VC5_QPU_RADDR_A_MASK QPU_MASK(11, 6)
+#define V3D_QPU_RADDR_A_SHIFT 6
+#define V3D_QPU_RADDR_A_MASK QPU_MASK(11, 6)
-#define VC5_QPU_RADDR_B_SHIFT 0
-#define VC5_QPU_RADDR_B_MASK QPU_MASK(5, 0)
+#define V3D_QPU_RADDR_B_SHIFT 0
+#define V3D_QPU_RADDR_B_MASK QPU_MASK(5, 0)
#define THRSW .thrsw = true
#define LDUNIF .ldunif = true
@@ -207,9 +207,9 @@ static const struct v3d_qpu_sig v41_sig_map[] = {
[21] = { THRSW, LDVARY, WRTMUC },
[22] = { UCB, },
[23] = { ROT, },
- /* 24-30 reserved */
[24] = { LDUNIFA},
[25] = { LDUNIFARF },
+ /* 26-30 reserved */
[31] = { SMIMM, LDTMU, },
};
@@ -456,8 +456,15 @@ struct opcode_desc {
uint8_t mux_b_mask;
uint8_t mux_a_mask;
uint8_t op;
- /* 0 if it's the same across V3D versions, or a specific V3D version. */
- uint8_t ver;
+
+ /* first_ver == 0 if it's the same across all V3D versions.
+ * first_ver == X, last_ver == 0 if it's the same for all V3D versions
+ * starting from X
+ * first_ver == X, last_ver == Y if it's the same for all V3D versions
+ * on the range X through Y
+ */
+ uint8_t first_ver;
+ uint8_t last_ver;
};
static const struct opcode_desc add_ops[] = {
@@ -519,8 +526,10 @@ static const struct opcode_desc add_ops[] = {
{ 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 },
{ 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
{ 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
-
+ { 187, 187, 1 << 2, 1 << 7, V3D_QPU_A_FLAFIRST, 41 },
+ { 187, 187, 1 << 3, 1 << 0, V3D_QPU_A_FLNAFIRST, 41 },
{ 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
+
{ 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
{ 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 },
{ 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
@@ -576,9 +585,23 @@ static const struct opcode_desc mul_ops[] = {
{ 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },
};
+/* Returns true if op_desc should be filtered out based on devinfo->ver
+ * against op_desc->first_ver and op_desc->last_ver. Check notes about
+ * first_ver/last_ver on struct opcode_desc comments.
+ */
+static bool
+opcode_invalid_in_version(const struct v3d_device_info *devinfo,
+ const struct opcode_desc *op_desc)
+{
+ return (op_desc->first_ver != 0 && devinfo->ver < op_desc->first_ver) ||
+ (op_desc->last_ver != 0 && devinfo->ver > op_desc->last_ver);
+}
+
static const struct opcode_desc *
-lookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes,
- uint32_t opcode, uint32_t mux_a, uint32_t mux_b)
+lookup_opcode_from_packed(const struct v3d_device_info *devinfo,
+ const struct opcode_desc *opcodes,
+ size_t num_opcodes, uint32_t opcode,
+ uint32_t mux_a, uint32_t mux_b)
{
for (int i = 0; i < num_opcodes; i++) {
const struct opcode_desc *op_desc = &opcodes[i];
@@ -587,6 +610,9 @@ lookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes,
opcode > op_desc->opcode_last)
continue;
+ if (opcode_invalid_in_version(devinfo, op_desc))
+ continue;
+
if (!(op_desc->mux_b_mask & (1 << mux_b)))
continue;
@@ -716,9 +742,9 @@ static bool
v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
struct v3d_qpu_instr *instr)
{
- uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_ADD);
- uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_A);
- uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_B);
+ uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
+ uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A);
+ uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_B);
uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
uint32_t map_op = op;
@@ -731,8 +757,9 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
map_op = (map_op - 253 + 245);
const struct opcode_desc *desc =
- lookup_opcode(add_ops, ARRAY_SIZE(add_ops),
- map_op, mux_a, mux_b);
+ lookup_opcode_from_packed(devinfo, add_ops, ARRAY_SIZE(add_ops),
+ map_op, mux_a, mux_b);
+
if (!desc)
return false;
@@ -846,7 +873,7 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
instr->alu.add.magic_write = false;
- if (packed_inst & VC5_QPU_MA) {
+ if (packed_inst & V3D_QPU_MA) {
switch (instr->alu.add.op) {
case V3D_QPU_A_LDVPMV_IN:
instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
@@ -870,14 +897,15 @@ static bool
v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
struct v3d_qpu_instr *instr)
{
- uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_MUL);
- uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_A);
- uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_B);
+ uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
+ uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A);
+ uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_B);
{
const struct opcode_desc *desc =
- lookup_opcode(mul_ops, ARRAY_SIZE(mul_ops),
- op, mux_a, mux_b);
+ lookup_opcode_from_packed(devinfo, mul_ops,
+ ARRAY_SIZE(mul_ops),
+ op, mux_a, mux_b);
if (!desc)
return false;
@@ -933,11 +961,31 @@ v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
instr->alu.mul.a = mux_a;
instr->alu.mul.b = mux_b;
instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
- instr->alu.mul.magic_write = packed_inst & VC5_QPU_MM;
+ instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
return true;
}
+static const struct opcode_desc *
+lookup_opcode_from_instr(const struct v3d_device_info *devinfo,
+ const struct opcode_desc *opcodes, size_t num_opcodes,
+ uint8_t op)
+{
+ for (int i = 0; i < num_opcodes; i++) {
+ const struct opcode_desc *op_desc = &opcodes[i];
+
+ if (op_desc->op != op)
+ continue;
+
+ if (opcode_invalid_in_version(devinfo, op_desc))
+ continue;
+
+ return op_desc;
+ }
+
+ return NULL;
+}
+
static bool
v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
@@ -946,18 +994,14 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
uint32_t mux_a = instr->alu.add.a;
uint32_t mux_b = instr->alu.add.b;
int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
- const struct opcode_desc *desc;
+ const struct opcode_desc *desc =
+ lookup_opcode_from_instr(devinfo, add_ops, ARRAY_SIZE(add_ops),
+ instr->alu.add.op);
- int opcode;
- for (desc = add_ops; desc != &add_ops[ARRAY_SIZE(add_ops)];
- desc++) {
- if (desc->op == instr->alu.add.op)
- break;
- }
- if (desc == &add_ops[ARRAY_SIZE(add_ops)])
+ if (!desc)
return false;
- opcode = desc->opcode_first;
+ uint32_t opcode = desc->opcode_first;
/* If an operation doesn't use an arg, its mux values may be used to
* identify the operation type.
@@ -995,7 +1039,7 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
case V3D_QPU_A_LDVPMD_OUT:
case V3D_QPU_A_LDVPMG_OUT:
assert(!instr->alu.add.magic_write);
- *packed_instr |= VC5_QPU_MA;
+ *packed_instr |= V3D_QPU_MA;
break;
default:
@@ -1145,12 +1189,12 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
break;
}
- *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_ADD_A);
- *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B);
- *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD);
+ *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_ADD_A);
+ *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_ADD_B);
+ *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);
*packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
if (instr->alu.add.magic_write && !no_magic_write)
- *packed_instr |= VC5_QPU_MA;
+ *packed_instr |= V3D_QPU_MA;
return true;
}
@@ -1162,14 +1206,12 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
uint32_t mux_a = instr->alu.mul.a;
uint32_t mux_b = instr->alu.mul.b;
int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
- const struct opcode_desc *desc;
- for (desc = mul_ops; desc != &mul_ops[ARRAY_SIZE(mul_ops)];
- desc++) {
- if (desc->op == instr->alu.mul.op)
- break;
- }
- if (desc == &mul_ops[ARRAY_SIZE(mul_ops)])
+ const struct opcode_desc *desc =
+ lookup_opcode_from_instr(devinfo, mul_ops, ARRAY_SIZE(mul_ops),
+ instr->alu.mul.op);
+
+ if (!desc)
return false;
uint32_t opcode = desc->opcode_first;
@@ -1253,13 +1295,13 @@ v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
break;
}
- *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_MUL_A);
- *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_MUL_B);
+ *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_MUL_A);
+ *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_MUL_B);
- *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_MUL);
+ *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);
*packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
if (instr->alu.mul.magic_write)
- *packed_instr |= VC5_QPU_MM;
+ *packed_instr |= V3D_QPU_MM;
return true;
}
@@ -1272,14 +1314,14 @@ v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
instr->type = V3D_QPU_INSTR_TYPE_ALU;
if (!v3d_qpu_sig_unpack(devinfo,
- QPU_GET_FIELD(packed_instr, VC5_QPU_SIG),
+ QPU_GET_FIELD(packed_instr, V3D_QPU_SIG),
&instr->sig))
return false;
- uint32_t packed_cond = QPU_GET_FIELD(packed_instr, VC5_QPU_COND);
+ uint32_t packed_cond = QPU_GET_FIELD(packed_instr, V3D_QPU_COND);
if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
- instr->sig_addr = packed_cond & ~VC5_QPU_COND_SIG_MAGIC_ADDR;
- instr->sig_magic = packed_cond & VC5_QPU_COND_SIG_MAGIC_ADDR;
+ instr->sig_addr = packed_cond & ~V3D_QPU_COND_SIG_MAGIC_ADDR;
+ instr->sig_magic = packed_cond & V3D_QPU_COND_SIG_MAGIC_ADDR;
instr->flags.ac = V3D_QPU_COND_NONE;
instr->flags.mc = V3D_QPU_COND_NONE;
@@ -1292,8 +1334,8 @@ v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
return false;
}
- instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A);
- instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B);
+ instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A);
+ instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B);
if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
return false;
@@ -1311,7 +1353,7 @@ v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
{
instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
- uint32_t cond = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_COND);
+ uint32_t cond = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_COND);
if (cond == 0)
instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
@@ -1320,31 +1362,31 @@ v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
else
return false;
- uint32_t msfign = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_MSFIGN);
+ uint32_t msfign = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_MSFIGN);
if (msfign == 3)
return false;
instr->branch.msfign = msfign;
- instr->branch.bdi = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_BDI);
+ instr->branch.bdi = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_BDI);
- instr->branch.ub = packed_instr & VC5_QPU_BRANCH_UB;
+ instr->branch.ub = packed_instr & V3D_QPU_BRANCH_UB;
if (instr->branch.ub) {
instr->branch.bdu = QPU_GET_FIELD(packed_instr,
- VC5_QPU_BRANCH_BDU);
+ V3D_QPU_BRANCH_BDU);
}
instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
- VC5_QPU_RADDR_A);
+ V3D_QPU_RADDR_A);
instr->branch.offset = 0;
instr->branch.offset +=
QPU_GET_FIELD(packed_instr,
- VC5_QPU_BRANCH_ADDR_LOW) << 3;
+ V3D_QPU_BRANCH_ADDR_LOW) << 3;
instr->branch.offset +=
QPU_GET_FIELD(packed_instr,
- VC5_QPU_BRANCH_ADDR_HIGH) << 24;
+ V3D_QPU_BRANCH_ADDR_HIGH) << 24;
return true;
}
@@ -1354,10 +1396,10 @@ v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
uint64_t packed_instr,
struct v3d_qpu_instr *instr)
{
- if (QPU_GET_FIELD(packed_instr, VC5_QPU_OP_MUL) != 0) {
+ if (QPU_GET_FIELD(packed_instr, V3D_QPU_OP_MUL) != 0) {
return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
} else {
- uint32_t sig = QPU_GET_FIELD(packed_instr, VC5_QPU_SIG);
+ uint32_t sig = QPU_GET_FIELD(packed_instr, V3D_QPU_SIG);
if ((sig & 24) == 16) {
return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
@@ -1376,11 +1418,11 @@ v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
uint32_t sig;
if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
return false;
- *packed_instr |= QPU_SET_FIELD(sig, VC5_QPU_SIG);
+ *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
- *packed_instr |= QPU_SET_FIELD(instr->raddr_a, VC5_QPU_RADDR_A);
- *packed_instr |= QPU_SET_FIELD(instr->raddr_b, VC5_QPU_RADDR_B);
+ *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A);
+ *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B);
if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
return false;
@@ -1400,13 +1442,13 @@ v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
flags = instr->sig_addr;
if (instr->sig_magic)
- flags |= VC5_QPU_COND_SIG_MAGIC_ADDR;
+ flags |= V3D_QPU_COND_SIG_MAGIC_ADDR;
} else {
if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
return false;
}
- *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND);
+ *packed_instr |= QPU_SET_FIELD(flags, V3D_QPU_COND);
} else {
if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
return false;
@@ -1420,38 +1462,39 @@ v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *instr,
uint64_t *packed_instr)
{
- *packed_instr |= QPU_SET_FIELD(16, VC5_QPU_SIG);
+ *packed_instr |= QPU_SET_FIELD(16, V3D_QPU_SIG);
if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
*packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
V3D_QPU_BRANCH_COND_A0),
- VC5_QPU_BRANCH_COND);
+ V3D_QPU_BRANCH_COND);
}
*packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
- VC5_QPU_BRANCH_MSFIGN);
+ V3D_QPU_BRANCH_MSFIGN);
*packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
- VC5_QPU_BRANCH_BDI);
+ V3D_QPU_BRANCH_BDI);
if (instr->branch.ub) {
- *packed_instr |= VC5_QPU_BRANCH_UB;
+ *packed_instr |= V3D_QPU_BRANCH_UB;
*packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
- VC5_QPU_BRANCH_BDU);
+ V3D_QPU_BRANCH_BDU);
}
switch (instr->branch.bdi) {
case V3D_QPU_BRANCH_DEST_ABS:
case V3D_QPU_BRANCH_DEST_REL:
*packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
- VC5_QPU_BRANCH_MSFIGN);
+ V3D_QPU_BRANCH_MSFIGN);
*packed_instr |= QPU_SET_FIELD((instr->branch.offset &
~0xff000000) >> 3,
- VC5_QPU_BRANCH_ADDR_LOW);
+ V3D_QPU_BRANCH_ADDR_LOW);
*packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
- VC5_QPU_BRANCH_ADDR_HIGH);
+ V3D_QPU_BRANCH_ADDR_HIGH);
+ break;
default:
break;
}
@@ -1459,7 +1502,7 @@ v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
if (instr->branch.bdi == V3D_QPU_BRANCH_DEST_REGFILE ||
instr->branch.bdu == V3D_QPU_BRANCH_DEST_REGFILE) {
*packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
- VC5_QPU_RADDR_A);
+ V3D_QPU_RADDR_A);
}
return true;
diff --git a/lib/mesa/src/broadcom/qpu/tests/qpu_disasm.c b/lib/mesa/src/broadcom/qpu/tests/qpu_disasm.c
index 5922b409a..e6b1918b8 100644
--- a/lib/mesa/src/broadcom/qpu/tests/qpu_disasm.c
+++ b/lib/mesa/src/broadcom/qpu/tests/qpu_disasm.c
@@ -162,6 +162,7 @@ main(int argc, char **argv)
&instr.alu.add.b);
swap_pack(&instr.alu.add.a_unpack,
&instr.alu.add.b_unpack);
+ break;
default:
break;
}
diff --git a/lib/mesa/src/broadcom/simulator/v3d_simulator.c b/lib/mesa/src/broadcom/simulator/v3d_simulator.c
index 8d43bf6d5..494e5bb44 100644
--- a/lib/mesa/src/broadcom/simulator/v3d_simulator.c
+++ b/lib/mesa/src/broadcom/simulator/v3d_simulator.c
@@ -24,10 +24,10 @@
/**
* @file v3d_simulator.c
*
- * Implements VC5 simulation on top of a non-VC5 GEM fd.
+ * Implements V3D simulation on top of a non-V3D GEM fd.
*
- * This file's goal is to emulate the VC5 ioctls' behavior in the kernel on
- * top of the simpenrose software simulator. Generally, VC5 driver BOs have a
+ * This file's goal is to emulate the V3D ioctls' behavior in the kernel on
+ * top of the simpenrose software simulator. Generally, V3D driver BOs have a
* GEM-side copy of their contents and a simulator-side memory area that the
* GEM contents get copied into during simulation. Once simulation is done,
* the simulator's data is copied back out to the GEM BOs, so that rendering
@@ -40,8 +40,8 @@
* outside of this file still call ioctls directly on the fd).
*
* Another limitation is that BO import doesn't work unless the underlying
- * window system's BO size matches what VC5 is going to use, which of course
- * doesn't work out in practice. This means that for now, only DRI3 (VC5
+ * window system's BO size matches what V3D is going to use, which of course
+ * doesn't work out in practice. This means that for now, only DRI3 (V3D
* makes the winsys BOs) is supported, not DRI2 (window system makes the winys
* BOs).
*/
@@ -79,7 +79,7 @@ static struct v3d_simulator_state {
/* Base hardware address of the heap. */
uint32_t mem_base;
/* Size of the heap. */
- size_t mem_size;
+ uint32_t mem_size;
struct mem_block *heap;
struct mem_block *overflow;
@@ -87,6 +87,9 @@ static struct v3d_simulator_state {
/** Mapping from GEM fd to struct v3d_simulator_file * */
struct hash_table *fd_map;
+ /** Last performance monitor ID. */
+ uint32_t last_perfid;
+
struct util_dynarray bin_oom;
int refcount;
} sim_state = {
@@ -100,6 +103,11 @@ struct v3d_simulator_file {
/** Mapping from GEM handle to struct v3d_simulator_bo * */
struct hash_table *bo_map;
+ /** Dynamic array with performance monitors */
+ struct v3d_simulator_perfmon **perfmons;
+ uint32_t perfmons_size;
+ uint32_t active_perfid;
+
struct mem_block *gmp;
void *gmp_vaddr;
@@ -121,12 +129,34 @@ struct v3d_simulator_bo {
int handle;
};
+struct v3d_simulator_perfmon {
+ uint32_t ncounters;
+ uint8_t counters[DRM_V3D_MAX_PERF_COUNTERS];
+ uint64_t values[DRM_V3D_MAX_PERF_COUNTERS];
+};
+
static void *
int_to_key(int key)
{
return (void *)(uintptr_t)key;
}
+#define PERFMONS_ALLOC_SIZE 100
+
+static uint32_t
+perfmons_next_id(struct v3d_simulator_file *sim_file) {
+ sim_state.last_perfid++;
+ if (sim_state.last_perfid > sim_file->perfmons_size) {
+ sim_file->perfmons_size += PERFMONS_ALLOC_SIZE;
+ sim_file->perfmons = reralloc(sim_file,
+ sim_file->perfmons,
+ struct v3d_simulator_perfmon *,
+ sim_file->perfmons_size);
+ }
+
+ return sim_state.last_perfid;
+}
+
static struct v3d_simulator_file *
v3d_get_simulator_file_for_fd(int fd)
{
@@ -357,6 +387,46 @@ v3d_simulator_unpin_bos(struct v3d_simulator_file *file,
return 0;
}
+static struct v3d_simulator_perfmon *
+v3d_get_simulator_perfmon(int fd, uint32_t perfid)
+{
+ if (!perfid || perfid > sim_state.last_perfid)
+ return NULL;
+
+ struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd);
+
+ mtx_lock(&sim_state.mutex);
+ assert(perfid <= file->perfmons_size);
+ struct v3d_simulator_perfmon *perfmon = file->perfmons[perfid - 1];
+ mtx_unlock(&sim_state.mutex);
+
+ return perfmon;
+}
+
+static void
+v3d_simulator_perfmon_switch(int fd, uint32_t perfid)
+{
+ struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd);
+ struct v3d_simulator_perfmon *perfmon;
+
+ if (perfid == file->active_perfid)
+ return;
+
+ perfmon = v3d_get_simulator_perfmon(fd, file->active_perfid);
+ if (perfmon)
+ v3d41_simulator_perfmon_stop(sim_state.v3d,
+ perfmon->ncounters,
+ perfmon->values);
+
+ perfmon = v3d_get_simulator_perfmon(fd, perfid);
+ if (perfmon)
+ v3d41_simulator_perfmon_start(sim_state.v3d,
+ perfmon->ncounters,
+ perfmon->counters);
+
+ file->active_perfid = perfid;
+}
+
static int
v3d_simulator_submit_cl_ioctl(int fd, struct drm_v3d_submit_cl *submit)
{
@@ -369,6 +439,9 @@ v3d_simulator_submit_cl_ioctl(int fd, struct drm_v3d_submit_cl *submit)
mtx_lock(&sim_state.submit_lock);
bin_fd = fd;
+
+ v3d_simulator_perfmon_switch(fd, submit->perfmon_id);
+
if (sim_state.ver >= 41)
v3d41_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs);
else
@@ -402,9 +475,9 @@ void v3d_simulator_open_from_handle(int fd, int handle, uint32_t size)
}
/**
- * Simulated ioctl(fd, DRM_VC5_CREATE_BO) implementation.
+ * Simulated ioctl(fd, DRM_V3D_CREATE_BO) implementation.
*
- * Making a VC5 BO is just a matter of making a corresponding BO on the host.
+ * Making a V3D BO is just a matter of making a corresponding BO on the host.
*/
static int
v3d_simulator_create_bo_ioctl(int fd, struct drm_v3d_create_bo *args)
@@ -447,7 +520,7 @@ v3d_simulator_create_bo_ioctl(int fd, struct drm_v3d_create_bo *args)
}
/**
- * Simulated ioctl(fd, DRM_VC5_MMAP_BO) implementation.
+ * Simulated ioctl(fd, DRM_V3D_MMAP_BO) implementation.
*
* We've already grabbed the mmap offset when we created the sim bo, so just
* return it.
@@ -530,6 +603,8 @@ v3d_simulator_submit_csd_ioctl(int fd, struct drm_v3d_submit_csd *args)
for (int i = 0; i < args->bo_handle_count; i++)
v3d_simulator_copy_in_handle(file, bo_handles[i]);
+ v3d_simulator_perfmon_switch(fd, args->perfmon_id);
+
if (sim_state.ver >= 41)
ret = v3d41_simulator_submit_csd_ioctl(sim_state.v3d, args,
file->gmp->ofs);
@@ -542,6 +617,79 @@ v3d_simulator_submit_csd_ioctl(int fd, struct drm_v3d_submit_csd *args)
return ret;
}
+static int
+v3d_simulator_perfmon_create_ioctl(int fd, struct drm_v3d_perfmon_create *args)
+{
+ struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd);
+
+ if (args->ncounters == 0 ||
+ args->ncounters > DRM_V3D_MAX_PERF_COUNTERS)
+ return -EINVAL;
+
+ struct v3d_simulator_perfmon *perfmon = rzalloc(file,
+ struct v3d_simulator_perfmon);
+
+ perfmon->ncounters = args->ncounters;
+ for (int i = 0; i < args->ncounters; i++) {
+ if (args->counters[i] >= V3D_PERFCNT_NUM) {
+ ralloc_free(perfmon);
+ return -EINVAL;
+ } else {
+ perfmon->counters[i] = args->counters[i];
+ }
+ }
+
+ mtx_lock(&sim_state.mutex);
+ args->id = perfmons_next_id(file);
+ file->perfmons[args->id - 1] = perfmon;
+ mtx_unlock(&sim_state.mutex);
+
+ return 0;
+}
+
+static int
+v3d_simulator_perfmon_destroy_ioctl(int fd, struct drm_v3d_perfmon_destroy *args)
+{
+ struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd);
+ struct v3d_simulator_perfmon *perfmon =
+ v3d_get_simulator_perfmon(fd, args->id);
+
+ if (!perfmon)
+ return -EINVAL;
+
+ mtx_lock(&sim_state.mutex);
+ file->perfmons[args->id - 1] = NULL;
+ mtx_unlock(&sim_state.mutex);
+
+ ralloc_free(perfmon);
+
+ return 0;
+}
+
+static int
+v3d_simulator_perfmon_get_values_ioctl(int fd, struct drm_v3d_perfmon_get_values *args)
+{
+ struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd);
+
+ mtx_lock(&sim_state.submit_lock);
+
+ /* Stop the perfmon if it is still active */
+ if (args->id == file->active_perfid)
+ v3d_simulator_perfmon_switch(fd, 0);
+
+ mtx_unlock(&sim_state.submit_lock);
+
+ struct v3d_simulator_perfmon *perfmon =
+ v3d_get_simulator_perfmon(fd, args->id);
+
+ if (!perfmon)
+ return -EINVAL;
+
+ memcpy((void *)args->values_ptr, perfmon->values, perfmon->ncounters * sizeof(uint64_t));
+
+ return 0;
+}
+
int
v3d_simulator_ioctl(int fd, unsigned long request, void *args)
{
@@ -575,6 +723,15 @@ v3d_simulator_ioctl(int fd, unsigned long request, void *args)
case DRM_IOCTL_V3D_SUBMIT_CSD:
return v3d_simulator_submit_csd_ioctl(fd, args);
+ case DRM_IOCTL_V3D_PERFMON_CREATE:
+ return v3d_simulator_perfmon_create_ioctl(fd, args);
+
+ case DRM_IOCTL_V3D_PERFMON_DESTROY:
+ return v3d_simulator_perfmon_destroy_ioctl(fd, args);
+
+ case DRM_IOCTL_V3D_PERFMON_GET_VALUES:
+ return v3d_simulator_perfmon_get_values_ioctl(fd, args);
+
case DRM_IOCTL_GEM_OPEN:
case DRM_IOCTL_GEM_FLINK:
return drmIoctl(fd, request, args);
diff --git a/lib/mesa/src/broadcom/simulator/v3d_simulator_wrapper.cpp b/lib/mesa/src/broadcom/simulator/v3d_simulator_wrapper.cpp
index 15db767d5..88e439255 100644
--- a/lib/mesa/src/broadcom/simulator/v3d_simulator_wrapper.cpp
+++ b/lib/mesa/src/broadcom/simulator/v3d_simulator_wrapper.cpp
@@ -46,7 +46,7 @@ struct v3d_hw *v3d_hw_auto_new(void *in_params)
}
-uint32_t v3d_hw_get_mem(const struct v3d_hw *hw, size_t *size, void **p)
+uint32_t v3d_hw_get_mem(const struct v3d_hw *hw, uint32_t *size, void **p)
{
return hw->get_mem(size, p);
}
@@ -56,11 +56,6 @@ bool v3d_hw_alloc_mem(struct v3d_hw *hw, size_t min_size)
return hw->alloc_mem(min_size) == V3D_HW_ALLOC_SUCCESS;
}
-bool v3d_hw_has_gca(struct v3d_hw *hw)
-{
- return hw->has_gca();
-}
-
uint32_t v3d_hw_read_reg(struct v3d_hw *hw, uint32_t reg)
{
return hw->read_reg(reg);
@@ -89,5 +84,10 @@ v3d_hw_set_isr(struct v3d_hw *hw, void (*isr)(uint32_t status))
hw->set_isr(isr);
}
+uint32_t v3d_hw_get_hub_core()
+{
+ return V3D_HW_HUB_CORE;
+}
+
}
#endif /* USE_V3D_SIMULATOR */
diff --git a/lib/mesa/src/broadcom/simulator/v3d_simulator_wrapper.h b/lib/mesa/src/broadcom/simulator/v3d_simulator_wrapper.h
index b20ea2484..05b2a3361 100644
--- a/lib/mesa/src/broadcom/simulator/v3d_simulator_wrapper.h
+++ b/lib/mesa/src/broadcom/simulator/v3d_simulator_wrapper.h
@@ -31,14 +31,14 @@ extern "C" {
#endif
struct v3d_hw *v3d_hw_auto_new(void *params);
-uint32_t v3d_hw_get_mem(const struct v3d_hw *hw, size_t *size, void **p);
+uint32_t v3d_hw_get_mem(const struct v3d_hw *hw, uint32_t *size, void **p);
bool v3d_hw_alloc_mem(struct v3d_hw *hw, size_t min_size);
-bool v3d_hw_has_gca(struct v3d_hw *hw);
uint32_t v3d_hw_read_reg(struct v3d_hw *hw, uint32_t reg);
void v3d_hw_write_reg(struct v3d_hw *hw, uint32_t reg, uint32_t val);
void v3d_hw_tick(struct v3d_hw *hw);
int v3d_hw_get_version(struct v3d_hw *hw);
void v3d_hw_set_isr(struct v3d_hw *hw, void (*isr)(uint32_t status));
+uint32_t v3d_hw_get_hub_core();
#ifdef __cplusplus
}
diff --git a/lib/mesa/src/broadcom/simulator/v3dx_simulator.c b/lib/mesa/src/broadcom/simulator/v3dx_simulator.c
index cbf257859..07bbbe2f8 100644
--- a/lib/mesa/src/broadcom/simulator/v3dx_simulator.c
+++ b/lib/mesa/src/broadcom/simulator/v3dx_simulator.c
@@ -24,7 +24,7 @@
/**
* @file v3dx_simulator.c
*
- * Implements the actual HW interaction betweeh the GL driver's VC5 simulator and the simulator.
+ * Implements the actual HW interaction betweeh the GL driver's V3D simulator and the simulator.
*
* The register headers between V3D versions will have conflicting defines, so
* all register interactions appear in this file and are compiled per V3D version
@@ -41,6 +41,7 @@
#include "v3d_simulator_wrapper.h"
#include "util/macros.h"
+#include "util/bitscan.h"
#include "drm-uapi/v3d_drm.h"
#define HW_REGISTER_RO(x) (x)
@@ -57,9 +58,6 @@
static void
v3d_invalidate_l3(struct v3d_hw *v3d)
{
- if (!v3d_hw_has_gca(v3d))
- return;
-
#if V3D_VERSION < 40
uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL);
@@ -80,6 +78,12 @@ v3d_invalidate_l2c(struct v3d_hw *v3d)
V3D_CTL_0_L2CACTL_L2CENA_SET);
}
+enum v3d_l2t_cache_flush_mode {
+ V3D_CACHE_FLUSH_MODE_FLUSH,
+ V3D_CACHE_FLUSH_MODE_CLEAR,
+ V3D_CACHE_FLUSH_MODE_CLEAN,
+};
+
/* Invalidates texture L2 cachelines */
static void
v3d_invalidate_l2t(struct v3d_hw *v3d)
@@ -88,7 +92,23 @@ v3d_invalidate_l2t(struct v3d_hw *v3d)
V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
V3D_WRITE(V3D_CTL_0_L2TCACTL,
V3D_CTL_0_L2TCACTL_L2TFLS_SET |
- (0 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
+ (V3D_CACHE_FLUSH_MODE_FLUSH << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
+}
+
+/*
+ * Wait for l2tcactl, used for flushes.
+ *
+ * FIXME: for a multicore scenario we should pass here the core. All wrapper
+ * assumes just one core, so would be better to handle that on that case.
+ */
+static UNUSED void v3d_core_wait_l2tcactl(struct v3d_hw *v3d,
+ uint32_t ctrl)
+{
+ assert(!(ctrl & ~(V3D_CTL_0_L2TCACTL_TMUWCF_SET | V3D_CTL_0_L2TCACTL_L2TFLS_SET)));
+
+ while (V3D_READ(V3D_CTL_0_L2TCACTL) & ctrl) {
+ v3d_hw_tick(v3d);
+ }
}
/* Flushes dirty texture cachelines from the L1 write combiner */
@@ -98,7 +118,13 @@ v3d_flush_l1td(struct v3d_hw *v3d)
V3D_WRITE(V3D_CTL_0_L2TCACTL,
V3D_CTL_0_L2TCACTL_TMUWCF_SET);
- assert(!(V3D_READ(V3D_CTL_0_L2TCACTL) & V3D_CTL_0_L2TCACTL_L2TFLS_SET));
+ /* Note: here the kernel (and previous versions of the simulator
+ * wrapper) is using V3D_CTL_0_L2TCACTL_L2TFLS_SET, as with l2t. We
+ * understand that it makes more sense to do like this. We need to
+ * confirm which one is doing it correctly. So far things work fine on
+ * the simulator this way.
+ */
+ v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_TMUWCF_SET);
}
/* Flushes dirty texture L2 cachelines */
@@ -109,9 +135,9 @@ v3d_flush_l2t(struct v3d_hw *v3d)
V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
V3D_WRITE(V3D_CTL_0_L2TCACTL,
V3D_CTL_0_L2TCACTL_L2TFLS_SET |
- (2 << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
+ (V3D_CACHE_FLUSH_MODE_CLEAN << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
- assert(!(V3D_READ(V3D_CTL_0_L2TCACTL) & V3D_CTL_0_L2TCACTL_L2TFLS_SET));
+ v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_L2TFLS_SET);
}
/* Invalidates the slice caches. These are read-only caches. */
@@ -184,6 +210,8 @@ v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
struct drm_v3d_submit_csd *args,
uint32_t gmp_ofs)
{
+ int last_completed_jobs = (V3D_READ(V3D_CSD_0_STATUS) &
+ V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET);
g_gmp_ofs = gmp_ofs;
v3d_reload_gmp(v3d);
@@ -198,9 +226,13 @@ v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
/* CFG0 kicks off the job */
V3D_WRITE(V3D_CSD_0_QUEUED_CFG0, args->cfg[0]);
- while (V3D_READ(V3D_CSD_0_STATUS) &
- (V3D_CSD_0_STATUS_HAVE_CURRENT_DISPATCH_SET |
- V3D_CSD_0_STATUS_HAVE_QUEUED_DISPATCH_SET)) {
+ /* Now we wait for the dispatch to finish. The safest way is to check
+ * if NUM_COMPLETED_JOBS has increased. Note that in spite of that
+ * name that register field is about the number of completed
+ * dispatches.
+ */
+ while ((V3D_READ(V3D_CSD_0_STATUS) &
+ V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET) == last_completed_jobs) {
v3d_hw_tick(v3d);
}
@@ -234,6 +266,9 @@ v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
args->value = 1;
return 0;
+ case DRM_V3D_PARAM_SUPPORTS_PERFMON:
+ args->value = V3D_VERSION >= 41;
+ return 0;
}
if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) {
@@ -241,44 +276,139 @@ v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
return 0;
}
- fprintf(stderr, "Unknown DRM_IOCTL_VC5_GET_PARAM(%lld)\n",
+ fprintf(stderr, "Unknown DRM_IOCTL_V3D_GET_PARAM(%lld)\n",
(long long)args->value);
abort();
}
static struct v3d_hw *v3d_isr_hw;
+
+static void
+v3d_isr_core(struct v3d_hw *v3d,
+ unsigned core)
+{
+ /* FIXME: so far we are assuming just one core, and using only the _0_
+ * registers. If we add multiple-core on the simulator, we would need
+ * to pass core as a parameter, and chose the proper registers.
+ */
+ assert(core == 0);
+ uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS);
+ V3D_WRITE(V3D_CTL_0_INT_CLR, core_status);
+
+ if (core_status & V3D_CTL_0_INT_STS_INT_OUTOMEM_SET) {
+ uint32_t size = 256 * 1024;
+ uint32_t offset = v3d_simulator_get_spill(size);
+
+ v3d_reload_gmp(v3d);
+
+ V3D_WRITE(V3D_PTB_0_BPOA, offset);
+ V3D_WRITE(V3D_PTB_0_BPOS, size);
+ return;
+ }
+
+ if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {
+ fprintf(stderr, "GMP violation at 0x%08x\n",
+ V3D_READ(V3D_GMP_VIO_ADDR));
+ abort();
+ } else {
+ fprintf(stderr,
+ "Unexpected ISR with core status 0x%08x\n",
+ core_status);
+ }
+ abort();
+}
+
+static void
+handle_mmu_interruptions(struct v3d_hw *v3d,
+ uint32_t hub_status)
+{
+ bool wrv = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET;
+ bool pti = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET;
+ bool cap = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET;
+
+ if (!(pti || cap || wrv))
+ return;
+
+ const char *client = "?";
+ uint32_t axi_id = V3D_READ(V3D_MMU_VIO_ID);
+ uint32_t va_width = 30;
+
+#if V3D_VERSION >= 41
+ static const char *const v3d41_axi_ids[] = {
+ "L2T",
+ "PTB",
+ "PSE",
+ "TLB",
+ "CLE",
+ "TFU",
+ "MMU",
+ "GMP",
+ };
+
+ axi_id = axi_id >> 5;
+ if (axi_id < ARRAY_SIZE(v3d41_axi_ids))
+ client = v3d41_axi_ids[axi_id];
+
+ uint32_t mmu_debug = V3D_READ(V3D_MMU_DEBUG_INFO);
+
+ va_width += ((mmu_debug & V3D_MMU_DEBUG_INFO_VA_WIDTH_SET)
+ >> V3D_MMU_DEBUG_INFO_VA_WIDTH_LSB);
+#endif
+ /* Only the top bits (final number depends on the gen) of the virtual
+ * address are reported in the MMU VIO_ADDR register.
+ */
+ uint64_t vio_addr = ((uint64_t)V3D_READ(V3D_MMU_VIO_ADDR) <<
+ (va_width - 32));
+
+ /* Difference with the kernal: here were are going to abort after
+ * logging, so we don't bother with some stuff that the kernel does,
+ * like restoring the MMU ctrl bits
+ */
+
+ fprintf(stderr, "MMU error from client %s (%d) at 0x%llx%s%s%s\n",
+ client, axi_id, (long long) vio_addr,
+ wrv ? ", write violation" : "",
+ pti ? ", pte invalid" : "",
+ cap ? ", cap exceeded" : "");
+
+ abort();
+}
+
+static void
+v3d_isr_hub(struct v3d_hw *v3d)
+{
+ uint32_t hub_status = V3D_READ(V3D_HUB_CTL_INT_STS);
+
+ /* Acknowledge the interrupts we're handling here */
+ V3D_WRITE(V3D_HUB_CTL_INT_CLR, hub_status);
+
+ if (hub_status & V3D_HUB_CTL_INT_STS_INT_TFUC_SET) {
+ /* FIXME: we were not able to raise this exception. We let the
+ * unreachable here, so we could get one if it is raised on
+ * the future. In any case, note that for this case we would
+ * only be doing debugging log.
+ */
+ unreachable("TFU Conversion Complete interrupt not handled");
+ }
+
+ handle_mmu_interruptions(v3d, hub_status);
+}
+
static void
v3d_isr(uint32_t hub_status)
{
struct v3d_hw *v3d = v3d_isr_hw;
+ uint32_t mask = hub_status;
- /* Check the per-core bits */
- if (hub_status & (1 << 0)) {
- uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS);
- V3D_WRITE(V3D_CTL_0_INT_CLR, core_status);
-
- if (core_status & V3D_CTL_0_INT_STS_INT_OUTOMEM_SET) {
- uint32_t size = 256 * 1024;
- uint32_t offset = v3d_simulator_get_spill(size);
-
- v3d_reload_gmp(v3d);
-
- V3D_WRITE(V3D_PTB_0_BPOA, offset);
- V3D_WRITE(V3D_PTB_0_BPOS, size);
- return;
- }
-
- if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {
- fprintf(stderr, "GMP violation at 0x%08x\n",
- V3D_READ(V3D_GMP_VIO_ADDR));
- abort();
- } else {
- fprintf(stderr,
- "Unexpected ISR with core status 0x%08x\n",
- core_status);
- }
- abort();
+ /* Check the hub_status bits */
+ while (mask) {
+ unsigned core = u_bit_scan(&mask);
+
+ if (core == v3d_hw_get_hub_core())
+ v3d_isr_hub(v3d);
+ else
+ v3d_isr_core(v3d, core);
}
return;
@@ -299,11 +429,24 @@ v3dX(simulator_init_regs)(struct v3d_hw *v3d)
V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET);
#endif
+ /* FIXME: the kernel captures some additional core interrupts here,
+ * for tracing. Perhaps we should evaluate to do the same here and add
+ * some debug options.
+ */
uint32_t core_interrupts = (V3D_CTL_0_INT_STS_INT_GMPV_SET |
V3D_CTL_0_INT_STS_INT_OUTOMEM_SET);
V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts);
V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts);
+ uint32_t hub_interrupts =
+ (V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET | /* write violation */
+ V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET | /* page table invalid */
+ V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET | /* CAP exceeded */
+ V3D_HUB_CTL_INT_STS_INT_TFUC_SET); /* TFU conversion */
+
+ V3D_WRITE(V3D_HUB_CTL_INT_MSK_SET, ~hub_interrupts);
+ V3D_WRITE(V3D_HUB_CTL_INT_MSK_CLR, hub_interrupts);
+
v3d_isr_hw = v3d;
v3d_hw_set_isr(v3d, v3d_isr);
}
@@ -313,6 +456,12 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
struct drm_v3d_submit_cl *submit,
uint32_t gmp_ofs)
{
+ int last_bfc = (V3D_READ(V3D_CLE_0_BFC) &
+ V3D_CLE_0_BFC_BMFCT_SET);
+
+ int last_rfc = (V3D_READ(V3D_CLE_0_RFC) &
+ V3D_CLE_0_RFC_RMFCT_SET);
+
g_gmp_ofs = gmp_ofs;
v3d_reload_gmp(v3d);
@@ -336,8 +485,8 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
* scheduler implements this using the GPU scheduler blocking on the
* bin fence completing. (We don't use HW semaphores).
*/
- while (V3D_READ(V3D_CLE_0_CT0CA) !=
- V3D_READ(V3D_CLE_0_CT0EA)) {
+ while ((V3D_READ(V3D_CLE_0_BFC) &
+ V3D_CLE_0_BFC_BMFCT_SET) == last_bfc) {
v3d_hw_tick(v3d);
}
@@ -346,12 +495,55 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start);
V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end);
- while (V3D_READ(V3D_CLE_0_CT1CA) !=
- V3D_READ(V3D_CLE_0_CT1EA) ||
- V3D_READ(V3D_CLE_1_CT1CA) !=
- V3D_READ(V3D_CLE_1_CT1EA)) {
+ while ((V3D_READ(V3D_CLE_0_RFC) &
+ V3D_CLE_0_RFC_RMFCT_SET) == last_rfc) {
v3d_hw_tick(v3d);
}
}
+#if V3D_VERSION >= 41
+#define V3D_PCTR_0_PCTR_N(x) (V3D_PCTR_0_PCTR0 + 4 * (x))
+#define V3D_PCTR_0_SRC_N(x) (V3D_PCTR_0_SRC_0_3 + 4 * (x))
+#define V3D_PCTR_0_SRC_N_SHIFT(x) ((x) * 8)
+#define V3D_PCTR_0_SRC_N_MASK(x) (BITFIELD_RANGE(V3D_PCTR_0_SRC_N_SHIFT(x), \
+ V3D_PCTR_0_SRC_N_SHIFT(x) + 6))
+#endif
+
+void
+v3dX(simulator_perfmon_start)(struct v3d_hw *v3d,
+ uint32_t ncounters,
+ uint8_t *events)
+{
+#if V3D_VERSION >= 41
+ int i, j;
+ uint32_t source;
+ uint32_t mask = BITFIELD_RANGE(0, ncounters);
+
+ for (i = 0; i < ncounters; i+=4) {
+ source = i / 4;
+ uint32_t channels = 0;
+ for (j = 0; j < 4 && (i + j) < ncounters; j++)
+ channels |= events[i + j] << V3D_PCTR_0_SRC_N_SHIFT(j);
+ V3D_WRITE(V3D_PCTR_0_SRC_N(source), channels);
+ }
+ V3D_WRITE(V3D_PCTR_0_CLR, mask);
+ V3D_WRITE(V3D_PCTR_0_OVERFLOW, mask);
+ V3D_WRITE(V3D_PCTR_0_EN, mask);
+#endif
+}
+
+void v3dX(simulator_perfmon_stop)(struct v3d_hw *v3d,
+ uint32_t ncounters,
+ uint64_t *values)
+{
+#if V3D_VERSION >= 41
+ int i;
+
+ for (i = 0; i < ncounters; i++)
+ values[i] += V3D_READ(V3D_PCTR_0_PCTR_N(i));
+
+ V3D_WRITE(V3D_PCTR_0_EN, 0);
+#endif
+}
+
#endif /* USE_V3D_SIMULATOR */
diff --git a/lib/mesa/src/broadcom/simulator/v3dx_simulator.h b/lib/mesa/src/broadcom/simulator/v3dx_simulator.h
index 2c623d79a..145ae59c2 100644
--- a/lib/mesa/src/broadcom/simulator/v3dx_simulator.h
+++ b/lib/mesa/src/broadcom/simulator/v3dx_simulator.h
@@ -44,3 +44,9 @@ int v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d,
int v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
struct drm_v3d_submit_csd *args,
uint32_t gmp_offset);
+void v3dX(simulator_perfmon_start)(struct v3d_hw *v3d,
+ uint32_t ncounters,
+ uint8_t *events);
+void v3dX(simulator_perfmon_stop)(struct v3d_hw *v3d,
+ uint32_t ncounters,
+ uint64_t *values);
diff --git a/lib/mesa/src/broadcom/vulkan/meson.build b/lib/mesa/src/broadcom/vulkan/meson.build
index 88bee8c13..a1cc58637 100644
--- a/lib/mesa/src/broadcom/vulkan/meson.build
+++ b/lib/mesa/src/broadcom/vulkan/meson.build
@@ -25,32 +25,11 @@ v3dv_entrypoints = custom_target(
command : [
prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak',
'--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'v3dv',
+ '--device-prefix', 'ver42',
],
depend_files : vk_entrypoints_gen_depend_files,
)
-v3dv_extensions_c = custom_target(
- 'v3dv_extensions.c',
- input : ['v3dv_extensions_gen.py', vk_api_xml],
- output : 'v3dv_extensions.c',
- command : [
- prog_python, '@INPUT0@', '--xml', '@INPUT1@',
- '--out-c', '@OUTPUT@',
- ],
- depend_files : [files('v3dv_extensions.py'), vk_extensions_gen],
-)
-
-v3dv_extensions_h = custom_target(
- 'v3dv_extensions.h',
- input : ['v3dv_extensions_gen.py', vk_api_xml],
- output : 'v3dv_extensions.h',
- command : [
- prog_python, '@INPUT0@', '--xml', '@INPUT1@',
- '--out-h', '@OUTPUT@',
- ],
- depend_files : [files('v3dv_extensions.py'), vk_extensions_gen],
-)
-
libv3dv_files = files(
'v3dv_bo.c',
'v3dv_cl.c',
@@ -71,15 +50,27 @@ libv3dv_files = files(
'v3dv_query.c',
'v3dv_queue.c',
'v3dv_uniforms.c',
- 'v3dv_util.c',
'v3dv_wsi.c',
- 'v3d_tiling.c',
+)
+
+files_per_version = files(
+ 'v3dvx_cmd_buffer.c',
+ 'v3dvx_descriptor_set.c',
+ 'v3dvx_device.c',
+ 'v3dvx_formats.c',
+ 'v3dvx_image.c',
+ 'v3dvx_pipeline.c',
+ 'v3dvx_meta_common.c',
+ 'v3dvx_pipeline.c',
+ 'v3dvx_queue.c',
)
# The vulkan driver only supports version >= 42, which is the version present in
# Rpi4. We need to explicitly set it as we are reusing pieces from the GL v3d
# driver.
-v3dv_flags = ['-DV3D_VERSION=42']
+v3d_versions = ['42']
+
+v3dv_flags = []
dep_v3dv3 = dependency('v3dv3', required : false)
if dep_v3dv3.found()
@@ -94,39 +85,43 @@ v3dv_deps = [
idep_nir,
idep_nir_headers,
idep_vulkan_util,
+ idep_vulkan_wsi,
]
if with_platform_x11
v3dv_deps += dep_xcb_dri3
- v3dv_flags += [
- '-DVK_USE_PLATFORM_XCB_KHR',
- '-DVK_USE_PLATFORM_XLIB_KHR',
- ]
- libv3dv_files += files('v3dv_wsi_x11.c')
endif
if with_platform_wayland
v3dv_deps += [dep_wayland_client, dep_wl_protocols]
- v3dv_flags += '-DVK_USE_PLATFORM_WAYLAND_KHR'
- libv3dv_files += files('v3dv_wsi_wayland.c')
libv3dv_files += [wayland_drm_client_protocol_h, wayland_drm_protocol_c]
endif
-if system_has_kms_drm and not with_platform_android
- v3dv_flags += '-DVK_USE_PLATFORM_DISPLAY_KHR'
- libv3dv_files += files('v3dv_wsi_display.c')
-endif
+per_version_libs = []
+foreach ver : v3d_versions
+ per_version_libs += static_library(
+ 'v3dv-v' + ver,
+ [files_per_version, v3d_xml_pack, v3dv_entrypoints[0]],
+ include_directories : [
+ inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_broadcom,
+ inc_compiler, inc_util,
+ ],
+ c_args : [v3dv_flags, '-DV3D_VERSION=' + ver],
+ gnu_symbol_visibility : 'hidden',
+ dependencies : [v3dv_deps],
+)
+endforeach
libvulkan_broadcom = shared_library(
'vulkan_broadcom',
- [libv3dv_files, v3dv_entrypoints, v3dv_extensions_c, v3dv_extensions_h, sha1_h],
+ [libv3dv_files, v3dv_entrypoints, sha1_h],
include_directories : [
- inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_broadcom, inc_compiler, inc_util, inc_vulkan_wsi,
+ inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_broadcom, inc_compiler, inc_util,
],
link_with : [
libbroadcom_cle,
libbroadcom_v3d,
- libvulkan_wsi,
+ per_version_libs,
],
dependencies : v3dv_deps,
c_args : v3dv_flags,
@@ -150,14 +145,15 @@ endif
broadcom_icd = custom_target(
'broadcom_icd',
- input : 'v3dv_icd.py',
+ input : [vk_icd_gen, vk_api_xml],
output : 'broadcom_icd.@0@.json'.format(host_machine.cpu()),
command : [
- prog_python, '@INPUT@',
- '--lib-path', join_paths(get_option('prefix'), get_option('libdir')),
+ prog_python, '@INPUT0@',
+ '--api-version', '1.0', '--xml', '@INPUT1@',
+ '--lib-path', join_paths(get_option('prefix'), get_option('libdir'),
+ 'libvulkan_broadcom.so'),
'--out', '@OUTPUT@',
],
- depend_files : files('v3dv_extensions.py'),
build_by_default : true,
install_dir : with_vulkan_icd_dir,
install : true,
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_bo.c b/lib/mesa/src/broadcom/vulkan/v3dv_bo.c
index 459032990..71679ceec 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_bo.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_bo.c
@@ -184,6 +184,7 @@ v3dv_bo_init(struct v3dv_bo *bo,
bool private)
{
bo->handle = handle;
+ bo->handle_bit = 1ull << (handle % 64);
bo->size = size;
bo->offset = offset;
bo->map = NULL;
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_bo.h b/lib/mesa/src/broadcom/vulkan/v3dv_bo.h
index fd6754c48..ab2b8c735 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_bo.h
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_bo.h
@@ -30,6 +30,7 @@ struct v3dv_bo {
struct list_head list_link;
uint32_t handle;
+ uint64_t handle_bit;
uint32_t size;
uint32_t offset;
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_cl.c b/lib/mesa/src/broadcom/vulkan/v3dv_cl.c
index e9674b6c5..ed11f53c4 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_cl.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_cl.c
@@ -22,6 +22,13 @@
*/
#include "v3dv_private.h"
+
+/* We don't expect that the packets we use in this file change across hw
+ * versions, so we just explicitly set the V3D_VERSION and include v3dx_pack
+ * here
+ */
+#define V3D_VERSION 33
+#include "broadcom/common/v3d_macros.h"
#include "broadcom/cle/v3dx_pack.h"
void
@@ -72,10 +79,10 @@ cl_alloc_bo(struct v3dv_cl *cl, uint32_t space, bool use_branch)
cl_emit(cl, BRANCH, branch) {
branch.address = v3dv_cl_address(bo, 0);
}
+ } else {
+ v3dv_job_add_bo_unchecked(cl->job, bo);
}
- v3dv_job_add_bo(cl->job, bo);
-
cl->bo = bo;
cl->base = cl->bo->map;
cl->size = cl->bo->size;
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_cl.h b/lib/mesa/src/broadcom/vulkan/v3dv_cl.h
index a6a38b4aa..68d5acd45 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_cl.h
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_cl.h
@@ -46,6 +46,16 @@ struct v3dv_cl_reloc {
uint32_t offset;
};
+static inline void
+pack_emit_reloc(void *cl, const void *reloc) {}
+
+#define __gen_user_data struct v3dv_cl
+#define __gen_address_type struct v3dv_cl_reloc
+#define __gen_address_offset(reloc) (((reloc)->bo ? (reloc)->bo->offset : 0) + \
+ (reloc)->offset)
+#define __gen_emit_reloc cl_pack_emit_reloc
+#define __gen_unpack_address(cl, s, e) __unpack_address(cl, s, e)
+
struct v3dv_cl {
void *base;
struct v3dv_job *job;
@@ -194,7 +204,7 @@ void v3dv_cl_ensure_space_with_branch(struct v3dv_cl *cl, uint32_t space);
* Helper function called by the XML-generated pack functions for filling in
* an address field in shader records.
*
- * Since we have a private address space as of VC5, our BOs can have lifelong
+ * Since we have a private address space as of V3D, our BOs can have lifelong
* offsets, and all the kernel needs to know is which BOs need to be paged in
* for this exec.
*/
@@ -213,7 +223,7 @@ cl_pack_emit_reloc(struct v3dv_cl *cl, const struct v3dv_cl_reloc *reloc)
#define cl_emit_prepacked(cl, packet) \
cl_emit_prepacked_sized(cl, packet, sizeof(*(packet)))
-#define v3dv_pack(packed, packet, name) \
+#define v3dvx_pack(packed, packet, name) \
for (struct cl_packet_struct(packet) name = { \
cl_packet_header(packet) \
}, \
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c b/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c
index 6cb9de28a..ff914e048 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c
@@ -22,8 +22,6 @@
*/
#include "v3dv_private.h"
-#include "broadcom/cle/v3dx_pack.h"
-#include "util/half_float.h"
#include "util/u_pack_color.h"
#include "vk_format_info.h"
#include "vk_util.h"
@@ -57,6 +55,7 @@ const struct v3dv_dynamic_state default_dynamic_state = {
.slope_factor = 0.0f,
},
.line_width = 1.0f,
+ .color_write_enable = (1ull << (4 * V3D_MAX_DRAW_BUFFERS)) - 1,
};
void
@@ -65,17 +64,26 @@ v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo)
if (!bo)
return;
- if (_mesa_set_search(job->bos, bo))
- return;
+ if (job->bo_handle_mask & bo->handle_bit) {
+ if (_mesa_set_search(job->bos, bo))
+ return;
+ }
_mesa_set_add(job->bos, bo);
job->bo_count++;
+ job->bo_handle_mask |= bo->handle_bit;
}
-static void
-cmd_buffer_emit_render_pass_rcl(struct v3dv_cmd_buffer *cmd_buffer);
+void
+v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo)
+{
+ assert(bo);
+ _mesa_set_add(job->bos, bo);
+ job->bo_count++;
+ job->bo_handle_mask |= bo->handle_bit;
+}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateCommandPool(VkDevice _device,
const VkCommandPoolCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
@@ -90,7 +98,7 @@ v3dv_CreateCommandPool(VkDevice _device,
pool = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pool),
VK_OBJECT_TYPE_COMMAND_POOL);
if (pool == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
if (pAllocator)
pool->alloc = *pAllocator;
@@ -114,7 +122,7 @@ cmd_buffer_init(struct v3dv_cmd_buffer *cmd_buffer,
* buffer reset that would reset the loader's dispatch table for the
* command buffer, and any other relevant info from vk_object_base
*/
- const uint32_t base_size = sizeof(struct vk_object_base);
+ const uint32_t base_size = sizeof(struct vk_command_buffer);
uint8_t *cmd_buffer_driver_start = ((uint8_t *) cmd_buffer) + base_size;
memset(cmd_buffer_driver_start, 0, sizeof(*cmd_buffer) - base_size);
@@ -142,12 +150,20 @@ cmd_buffer_create(struct v3dv_device *device,
VkCommandBuffer *pCommandBuffer)
{
struct v3dv_cmd_buffer *cmd_buffer;
- cmd_buffer = vk_object_zalloc(&device->vk,
- &pool->alloc,
- sizeof(*cmd_buffer),
- VK_OBJECT_TYPE_COMMAND_BUFFER);
+ cmd_buffer = vk_zalloc2(&device->vk.alloc,
+ &pool->alloc,
+ sizeof(*cmd_buffer),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (cmd_buffer == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ VkResult result;
+ result = vk_command_buffer_init(&cmd_buffer->vk, &device->vk);
+ if (result != VK_SUCCESS) {
+ vk_free2(&device->vk.alloc, &pool->alloc, cmd_buffer);
+ return result;
+ }
cmd_buffer_init(cmd_buffer, device, pool, level);
@@ -332,18 +348,9 @@ cmd_buffer_destroy(struct v3dv_cmd_buffer *cmd_buffer)
{
list_del(&cmd_buffer->pool_link);
cmd_buffer_free_resources(cmd_buffer);
- vk_object_free(&cmd_buffer->device->vk, &cmd_buffer->pool->alloc, cmd_buffer);
-}
-
-void
-v3dv_job_emit_binning_flush(struct v3dv_job *job)
-{
- assert(job);
-
- v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(FLUSH));
- v3dv_return_if_oom(NULL, job);
-
- cl_emit(&job->bcl, FLUSH, flush);
+ vk_command_buffer_finish(&cmd_buffer->vk);
+ vk_free2(&cmd_buffer->device->vk.alloc, &cmd_buffer->pool->alloc,
+ cmd_buffer);
}
static bool
@@ -402,6 +409,13 @@ cmd_buffer_can_merge_subpass(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_subpass *prev_subpass = &state->pass->subpasses[state->subpass_idx];
struct v3dv_subpass *subpass = &state->pass->subpasses[subpass_idx];
+ /* Don't merge if the subpasses have different view masks, since in that
+ * case the framebuffer setup is different and we need to emit different
+ * RCLs.
+ */
+ if (subpass->view_mask != prev_subpass->view_mask)
+ return false;
+
/* Because the list of subpass attachments can include VK_ATTACHMENT_UNUSED,
* we need to check that for each subpass all its used attachments are
* used by the other subpass.
@@ -517,6 +531,7 @@ v3dv_job_start_frame(struct v3dv_job *job,
uint32_t width,
uint32_t height,
uint32_t layers,
+ bool allocate_tile_state_for_all_layers,
uint32_t render_target_count,
uint8_t max_internal_bpp,
bool msaa)
@@ -532,6 +547,16 @@ v3dv_job_start_frame(struct v3dv_job *job,
v3dv_cl_ensure_space_with_branch(&job->bcl, 256);
v3dv_return_if_oom(NULL, job);
+ /* We only need to allocate tile state for all layers if the binner
+ * writes primitives to layers other than the first. This can only be
+ * done using layered rendering (writing gl_Layer from a geometry shader),
+ * so for other cases of multilayered framebuffers (typically with
+ * meta copy/clear operations) that won't use layered rendering, we only
+ * need one layer worth of of tile state for the binner.
+ */
+ if (!allocate_tile_state_for_all_layers)
+ layers = 1;
+
/* The PTB will request the tile alloc initial size per tile at start
* of tile binning.
*/
@@ -561,7 +586,7 @@ v3dv_job_start_frame(struct v3dv_job *job,
return;
}
- v3dv_job_add_bo(job, job->tile_alloc);
+ v3dv_job_add_bo_unchecked(job, job->tile_alloc);
const uint32_t tsda_per_tile_size = 256;
const uint32_t tile_state_size = tiling->layers *
@@ -574,33 +599,12 @@ v3dv_job_start_frame(struct v3dv_job *job,
return;
}
- v3dv_job_add_bo(job, job->tile_state);
+ v3dv_job_add_bo_unchecked(job, job->tile_state);
- /* This must go before the binning mode configuration. It is
- * required for layered framebuffers to work.
- */
- cl_emit(&job->bcl, NUMBER_OF_LAYERS, config) {
- config.number_of_layers = layers;
- }
-
- cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
- config.width_in_pixels = tiling->width;
- config.height_in_pixels = tiling->height;
- config.number_of_render_targets = MAX2(tiling->render_target_count, 1);
- config.multisample_mode_4x = tiling->msaa;
- config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
- }
-
- /* There's definitely nothing in the VCD cache we want. */
- cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
-
- /* "Binning mode lists must have a Start Tile Binning item (6) after
- * any prefix state data before the binning list proper starts."
- */
- cl_emit(&job->bcl, START_TILE_BINNING, bin);
+ v3dv_X(job->device, job_emit_binning_prolog)(job, tiling, layers);
- job->ez_state = VC5_EZ_UNDECIDED;
- job->first_ez_state = VC5_EZ_UNDECIDED;
+ job->ez_state = V3D_EZ_UNDECIDED;
+ job->first_ez_state = V3D_EZ_UNDECIDED;
}
static void
@@ -617,19 +621,9 @@ cmd_buffer_end_render_pass_frame(struct v3dv_cmd_buffer *cmd_buffer)
* any RCL commands of its own.
*/
if (v3dv_cl_offset(&cmd_buffer->state.job->rcl) == 0)
- cmd_buffer_emit_render_pass_rcl(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_render_pass_rcl)(cmd_buffer);
- v3dv_job_emit_binning_flush(cmd_buffer->state.job);
-}
-
-static void
-cmd_buffer_end_render_pass_secondary(struct v3dv_cmd_buffer *cmd_buffer)
-{
- assert(cmd_buffer->state.job);
- v3dv_cl_ensure_space_with_branch(&cmd_buffer->state.job->bcl,
- cl_packet_length(RETURN_FROM_SUB_LIST));
- v3dv_return_if_oom(cmd_buffer, NULL);
- cl_emit(&cmd_buffer->state.job->bcl, RETURN_FROM_SUB_LIST, ret);
+ v3dv_X(cmd_buffer->device, job_emit_binning_flush)(cmd_buffer->state.job);
}
struct v3dv_job *
@@ -716,7 +710,7 @@ v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer)
cmd_buffer_end_render_pass_frame(cmd_buffer);
} else {
assert(job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY);
- cmd_buffer_end_render_pass_secondary(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_end_render_pass_secondary)(cmd_buffer);
}
}
@@ -803,7 +797,7 @@ v3dv_job_init(struct v3dv_job *job,
v3dv_cl_init(job, &job->indirect);
- if (V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH)
+ if (unlikely(V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH))
job->always_flush = true;
}
@@ -821,6 +815,7 @@ v3dv_job_init(struct v3dv_job *job,
* bits.
*/
cmd_buffer->state.dirty = ~0;
+ cmd_buffer->state.dirty_descriptor_stages = ~0;
/* Honor inheritance of occlussion queries in secondaries if requested */
if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
@@ -879,6 +874,7 @@ static VkResult
cmd_buffer_reset(struct v3dv_cmd_buffer *cmd_buffer,
VkCommandBufferResetFlags flags)
{
+ vk_command_buffer_reset(&cmd_buffer->vk);
if (cmd_buffer->status != V3DV_CMD_BUFFER_STATUS_INITIALIZED) {
struct v3dv_device *device = cmd_buffer->device;
struct v3dv_cmd_pool *pool = cmd_buffer->pool;
@@ -902,7 +898,7 @@ cmd_buffer_reset(struct v3dv_cmd_buffer *cmd_buffer,
return VK_SUCCESS;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_AllocateCommandBuffers(VkDevice _device,
const VkCommandBufferAllocateInfo *pAllocateInfo,
VkCommandBuffer *pCommandBuffers)
@@ -930,7 +926,7 @@ v3dv_AllocateCommandBuffers(VkDevice _device,
return result;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_FreeCommandBuffers(VkDevice device,
VkCommandPool commandPool,
uint32_t commandBufferCount,
@@ -946,7 +942,7 @@ v3dv_FreeCommandBuffers(VkDevice device,
}
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyCommandPool(VkDevice _device,
VkCommandPool commandPool,
const VkAllocationCallbacks *pAllocator)
@@ -965,7 +961,7 @@ v3dv_DestroyCommandPool(VkDevice _device,
vk_object_free(&device->vk, pAllocator, pool);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_TrimCommandPool(VkDevice device,
VkCommandPool commandPool,
VkCommandPoolTrimFlags flags)
@@ -1026,34 +1022,37 @@ cmd_buffer_subpass_handle_pending_resolves(struct v3dv_cmd_buffer *cmd_buffer)
struct v3dv_image_view *src_iview = fb->attachments[src_attachment_idx];
struct v3dv_image_view *dst_iview = fb->attachments[dst_attachment_idx];
- VkImageResolve region = {
+ VkImageResolve2KHR region = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR,
.srcSubresource = {
VK_IMAGE_ASPECT_COLOR_BIT,
- src_iview->base_level,
- src_iview->first_layer,
- src_iview->last_layer - src_iview->first_layer + 1,
+ src_iview->vk.base_mip_level,
+ src_iview->vk.base_array_layer,
+ src_iview->vk.layer_count,
},
.srcOffset = { 0, 0, 0 },
.dstSubresource = {
VK_IMAGE_ASPECT_COLOR_BIT,
- dst_iview->base_level,
- dst_iview->first_layer,
- dst_iview->last_layer - dst_iview->first_layer + 1,
+ dst_iview->vk.base_mip_level,
+ dst_iview->vk.base_array_layer,
+ dst_iview->vk.layer_count,
},
.dstOffset = { 0, 0, 0 },
- .extent = src_iview->image->extent,
+ .extent = src_iview->vk.image->extent,
};
- VkImage src_image_handle =
- v3dv_image_to_handle((struct v3dv_image *) src_iview->image);
- VkImage dst_image_handle =
- v3dv_image_to_handle((struct v3dv_image *) dst_iview->image);
- v3dv_CmdResolveImage(cmd_buffer_handle,
- src_image_handle,
- VK_IMAGE_LAYOUT_GENERAL,
- dst_image_handle,
- VK_IMAGE_LAYOUT_GENERAL,
- 1, &region);
+ struct v3dv_image *src_image = (struct v3dv_image *) src_iview->vk.image;
+ struct v3dv_image *dst_image = (struct v3dv_image *) dst_iview->vk.image;
+ VkResolveImageInfo2KHR resolve_info = {
+ .sType = VK_STRUCTURE_TYPE_RESOLVE_IMAGE_INFO_2_KHR,
+ .srcImage = v3dv_image_to_handle(src_image),
+ .srcImageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .dstImage = v3dv_image_to_handle(dst_image),
+ .dstImageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .regionCount = 1,
+ .pRegions = &region,
+ };
+ v3dv_CmdResolveImage2KHR(cmd_buffer_handle, &resolve_info);
}
cmd_buffer->state.framebuffer = restore_fb;
@@ -1116,7 +1115,7 @@ cmd_buffer_begin_render_pass_secondary(
return VK_SUCCESS;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_BeginCommandBuffer(VkCommandBuffer commandBuffer,
const VkCommandBufferBeginInfo *pBeginInfo)
{
@@ -1149,7 +1148,7 @@ v3dv_BeginCommandBuffer(VkCommandBuffer commandBuffer,
return VK_SUCCESS;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_ResetCommandBuffer(VkCommandBuffer commandBuffer,
VkCommandBufferResetFlags flags)
{
@@ -1157,7 +1156,7 @@ v3dv_ResetCommandBuffer(VkCommandBuffer commandBuffer,
return cmd_buffer_reset(cmd_buffer, flags);
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_ResetCommandPool(VkDevice device,
VkCommandPool commandPool,
VkCommandPoolResetFlags flags)
@@ -1176,22 +1175,6 @@ v3dv_ResetCommandPool(VkDevice device,
}
static void
-emit_clip_window(struct v3dv_job *job, const VkRect2D *rect)
-{
- assert(job);
-
- v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CLIP_WINDOW));
- v3dv_return_if_oom(NULL, job);
-
- cl_emit(&job->bcl, CLIP_WINDOW, clip) {
- clip.clip_window_left_pixel_coordinate = rect->offset.x;
- clip.clip_window_bottom_pixel_coordinate = rect->offset.y;
- clip.clip_window_width_in_pixels = rect->extent.width;
- clip.clip_window_height_in_pixels = rect->extent.height;
- }
-}
-
-static void
cmd_buffer_update_tile_alignment(struct v3dv_cmd_buffer *cmd_buffer)
{
/* Render areas and scissor/viewport are only relevant inside render passes,
@@ -1206,7 +1189,7 @@ cmd_buffer_update_tile_alignment(struct v3dv_cmd_buffer *cmd_buffer)
*/
assert(cmd_buffer->state.framebuffer);
cmd_buffer->state.tile_aligned_render_area =
- v3dv_subpass_area_is_tile_aligned(rect,
+ v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, rect,
cmd_buffer->state.framebuffer,
cmd_buffer->state.pass,
cmd_buffer->state.subpass_idx);
@@ -1218,42 +1201,6 @@ cmd_buffer_update_tile_alignment(struct v3dv_cmd_buffer *cmd_buffer)
}
}
-void
-v3dv_get_hw_clear_color(const VkClearColorValue *color,
- uint32_t internal_type,
- uint32_t internal_size,
- uint32_t *hw_color)
-{
- union util_color uc;
- switch (internal_type) {
- case V3D_INTERNAL_TYPE_8:
- util_pack_color(color->float32, PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
- memcpy(hw_color, uc.ui, internal_size);
- break;
- case V3D_INTERNAL_TYPE_8I:
- case V3D_INTERNAL_TYPE_8UI:
- hw_color[0] = ((color->uint32[0] & 0xff) |
- (color->uint32[1] & 0xff) << 8 |
- (color->uint32[2] & 0xff) << 16 |
- (color->uint32[3] & 0xff) << 24);
- break;
- case V3D_INTERNAL_TYPE_16F:
- util_pack_color(color->float32, PIPE_FORMAT_R16G16B16A16_FLOAT, &uc);
- memcpy(hw_color, uc.ui, internal_size);
- break;
- case V3D_INTERNAL_TYPE_16I:
- case V3D_INTERNAL_TYPE_16UI:
- hw_color[0] = ((color->uint32[0] & 0xffff) | color->uint32[1] << 16);
- hw_color[1] = ((color->uint32[2] & 0xffff) | color->uint32[3] << 16);
- break;
- case V3D_INTERNAL_TYPE_32F:
- case V3D_INTERNAL_TYPE_32I:
- case V3D_INTERNAL_TYPE_32UI:
- memcpy(hw_color, color->uint32, internal_size);
- break;
- }
-}
-
static void
cmd_buffer_state_set_attachment_clear_color(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t attachment_idx,
@@ -1265,18 +1212,19 @@ cmd_buffer_state_set_attachment_clear_color(struct v3dv_cmd_buffer *cmd_buffer,
&cmd_buffer->state.pass->attachments[attachment_idx];
uint32_t internal_type, internal_bpp;
- const struct v3dv_format *format = v3dv_get_format(attachment->desc.format);
- v3dv_get_internal_type_bpp_for_output_format(format->rt_type,
- &internal_type,
- &internal_bpp);
+ const struct v3dv_format *format =
+ v3dv_X(cmd_buffer->device, get_format)(attachment->desc.format);
+
+ v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_output_format)
+ (format->rt_type, &internal_type, &internal_bpp);
uint32_t internal_size = 4 << internal_bpp;
struct v3dv_cmd_buffer_attachment_state *attachment_state =
&cmd_buffer->state.attachments[attachment_idx];
- v3dv_get_hw_clear_color(color, internal_type, internal_size,
- &attachment_state->clear_value.color[0]);
+ v3dv_X(cmd_buffer->device, get_hw_clear_color)
+ (color, internal_type, internal_size, &attachment_state->clear_value.color[0]);
attachment_state->vk_clear_value.color = *color;
}
@@ -1370,7 +1318,7 @@ cmd_buffer_ensure_render_pass_attachment_state(struct v3dv_cmd_buffer *cmd_buffe
assert(state->attachment_alloc_count >= pass->attachment_count);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
const VkRenderPassBeginInfo *pRenderPassBegin,
VkSubpassContents contents)
@@ -1394,7 +1342,7 @@ v3dv_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
* to emit a new clip window to constraint it to the render area.
*/
uint32_t min_render_x = state->render_area.offset.x;
- uint32_t min_render_y = state->render_area.offset.x;
+ uint32_t min_render_y = state->render_area.offset.y;
uint32_t max_render_x = min_render_x + state->render_area.extent.width - 1;
uint32_t max_render_y = min_render_y + state->render_area.extent.height - 1;
uint32_t min_clip_x = state->clip_window.offset.x;
@@ -1410,7 +1358,7 @@ v3dv_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
v3dv_cmd_buffer_subpass_start(cmd_buffer, 0);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
@@ -1426,884 +1374,6 @@ v3dv_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents)
v3dv_cmd_buffer_subpass_start(cmd_buffer, state->subpass_idx + 1);
}
-void
-v3dv_render_pass_setup_render_target(struct v3dv_cmd_buffer *cmd_buffer,
- int rt,
- uint32_t *rt_bpp,
- uint32_t *rt_type,
- uint32_t *rt_clamp)
-{
- const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
-
- assert(state->subpass_idx < state->pass->subpass_count);
- const struct v3dv_subpass *subpass =
- &state->pass->subpasses[state->subpass_idx];
-
- if (rt >= subpass->color_count)
- return;
-
- struct v3dv_subpass_attachment *attachment = &subpass->color_attachments[rt];
- const uint32_t attachment_idx = attachment->attachment;
- if (attachment_idx == VK_ATTACHMENT_UNUSED)
- return;
-
- const struct v3dv_framebuffer *framebuffer = state->framebuffer;
- assert(attachment_idx < framebuffer->attachment_count);
- struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx];
- assert(iview->aspects & VK_IMAGE_ASPECT_COLOR_BIT);
-
- *rt_bpp = iview->internal_bpp;
- *rt_type = iview->internal_type;
- *rt_clamp =vk_format_is_int(iview->vk_format) ?
- V3D_RENDER_TARGET_CLAMP_INT : V3D_RENDER_TARGET_CLAMP_NONE;
-}
-
-static void
-cmd_buffer_render_pass_emit_load(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_cl *cl,
- struct v3dv_image_view *iview,
- uint32_t layer,
- uint32_t buffer)
-{
- const struct v3dv_image *image = iview->image;
- const struct v3d_resource_slice *slice = &image->slices[iview->base_level];
- uint32_t layer_offset = v3dv_layer_offset(image,
- iview->base_level,
- iview->first_layer + layer);
-
- cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
- load.buffer_to_load = buffer;
- load.address = v3dv_cl_address(image->mem->bo, layer_offset);
-
- load.input_image_format = iview->format->rt_type;
- load.r_b_swap = iview->swap_rb;
- load.memory_format = slice->tiling;
-
- if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
- slice->tiling == VC5_TILING_UIF_XOR) {
- load.height_in_ub_or_stride =
- slice->padded_height_of_output_image_in_uif_blocks;
- } else if (slice->tiling == VC5_TILING_RASTER) {
- load.height_in_ub_or_stride = slice->stride;
- }
-
- if (image->samples > VK_SAMPLE_COUNT_1_BIT)
- load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
- else
- load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
- }
-}
-
-static bool
-check_needs_load(const struct v3dv_cmd_buffer_state *state,
- VkImageAspectFlags aspect,
- uint32_t att_first_subpass_idx,
- VkAttachmentLoadOp load_op)
-{
- /* We call this with image->aspects & aspect, so 0 means the aspect we are
- * testing does not exist in the image.
- */
- if (!aspect)
- return false;
-
- /* Attachment load operations apply on the first subpass that uses the
- * attachment, otherwise we always need to load.
- */
- if (state->job->first_subpass > att_first_subpass_idx)
- return true;
-
- /* If the job is continuing a subpass started in another job, we always
- * need to load.
- */
- if (state->job->is_subpass_continue)
- return true;
-
- /* If the area is not aligned to tile boundaries, we always need to load */
- if (!state->tile_aligned_render_area)
- return true;
-
- /* The attachment load operations must be LOAD */
- return load_op == VK_ATTACHMENT_LOAD_OP_LOAD;
-}
-
-static bool
-check_needs_clear(const struct v3dv_cmd_buffer_state *state,
- VkImageAspectFlags aspect,
- uint32_t att_first_subpass_idx,
- VkAttachmentLoadOp load_op,
- bool do_clear_with_draw)
-{
- /* We call this with image->aspects & aspect, so 0 means the aspect we are
- * testing does not exist in the image.
- */
- if (!aspect)
- return false;
-
- /* If the aspect needs to be cleared with a draw call then we won't emit
- * the clear here.
- */
- if (do_clear_with_draw)
- return false;
-
- /* If this is resuming a subpass started with another job, then attachment
- * load operations don't apply.
- */
- if (state->job->is_subpass_continue)
- return false;
-
- /* If the render area is not aligned to tile boudaries we can't use the
- * TLB for a clear.
- */
- if (!state->tile_aligned_render_area)
- return false;
-
- /* If this job is running in a subpass other than the first subpass in
- * which this attachment is used then attachment load operations don't apply.
- */
- if (state->job->first_subpass != att_first_subpass_idx)
- return false;
-
- /* The attachment load operation must be CLEAR */
- return load_op == VK_ATTACHMENT_LOAD_OP_CLEAR;
-}
-
-static bool
-check_needs_store(const struct v3dv_cmd_buffer_state *state,
- VkImageAspectFlags aspect,
- uint32_t att_last_subpass_idx,
- VkAttachmentStoreOp store_op)
-{
- /* We call this with image->aspects & aspect, so 0 means the aspect we are
- * testing does not exist in the image.
- */
- if (!aspect)
- return false;
-
- /* Attachment store operations only apply on the last subpass where the
- * attachment is used, in other subpasses we always need to store.
- */
- if (state->subpass_idx < att_last_subpass_idx)
- return true;
-
- /* Attachment store operations only apply on the last job we emit on the the
- * last subpass where the attachment is used, otherwise we always need to
- * store.
- */
- if (!state->job->is_subpass_finish)
- return true;
-
- /* The attachment store operation must be STORE */
- return store_op == VK_ATTACHMENT_STORE_OP_STORE;
-}
-
-static void
-cmd_buffer_render_pass_emit_loads(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_cl *cl,
- uint32_t layer)
-{
- const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
- const struct v3dv_framebuffer *framebuffer = state->framebuffer;
- const struct v3dv_render_pass *pass = state->pass;
- const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx];
-
- for (uint32_t i = 0; i < subpass->color_count; i++) {
- uint32_t attachment_idx = subpass->color_attachments[i].attachment;
-
- if (attachment_idx == VK_ATTACHMENT_UNUSED)
- continue;
-
- const struct v3dv_render_pass_attachment *attachment =
- &state->pass->attachments[attachment_idx];
-
- /* According to the Vulkan spec:
- *
- * "The load operation for each sample in an attachment happens before
- * any recorded command which accesses the sample in the first subpass
- * where the attachment is used."
- *
- * If the load operation is CLEAR, we must only clear once on the first
- * subpass that uses the attachment (and in that case we don't LOAD).
- * After that, we always want to load so we don't lose any rendering done
- * by a previous subpass to the same attachment. We also want to load
- * if the current job is continuing subpass work started by a previous
- * job, for the same reason.
- *
- * If the render area is not aligned to tile boundaries then we have
- * tiles which are partially covered by it. In this case, we need to
- * load the tiles so we can preserve the pixels that are outside the
- * render area for any such tiles.
- */
- bool needs_load = check_needs_load(state,
- VK_IMAGE_ASPECT_COLOR_BIT,
- attachment->first_subpass,
- attachment->desc.loadOp);
- if (needs_load) {
- struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx];
- cmd_buffer_render_pass_emit_load(cmd_buffer, cl, iview,
- layer, RENDER_TARGET_0 + i);
- }
- }
-
- uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
- if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
- const struct v3dv_render_pass_attachment *ds_attachment =
- &state->pass->attachments[ds_attachment_idx];
-
- const VkImageAspectFlags ds_aspects =
- vk_format_aspects(ds_attachment->desc.format);
-
- const bool needs_depth_load =
- check_needs_load(state,
- ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
- ds_attachment->first_subpass,
- ds_attachment->desc.loadOp);
-
- const bool needs_stencil_load =
- check_needs_load(state,
- ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
- ds_attachment->first_subpass,
- ds_attachment->desc.stencilLoadOp);
-
- if (needs_depth_load || needs_stencil_load) {
- struct v3dv_image_view *iview =
- framebuffer->attachments[ds_attachment_idx];
- /* From the Vulkan spec:
- *
- * "When an image view of a depth/stencil image is used as a
- * depth/stencil framebuffer attachment, the aspectMask is ignored
- * and both depth and stencil image subresources are used."
- *
- * So we ignore the aspects from the subresource range of the image
- * view for the depth/stencil attachment, but we still need to restrict
- * the to aspects compatible with the render pass and the image.
- */
- const uint32_t zs_buffer =
- v3dv_zs_buffer(needs_depth_load, needs_stencil_load);
- cmd_buffer_render_pass_emit_load(cmd_buffer, cl,
- iview, layer, zs_buffer);
- }
- }
-
- cl_emit(cl, END_OF_LOADS, end);
-}
-
-static void
-cmd_buffer_render_pass_emit_store(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_cl *cl,
- uint32_t attachment_idx,
- uint32_t layer,
- uint32_t buffer,
- bool clear,
- bool is_multisample_resolve)
-{
- const struct v3dv_image_view *iview =
- cmd_buffer->state.framebuffer->attachments[attachment_idx];
- const struct v3dv_image *image = iview->image;
- const struct v3d_resource_slice *slice = &image->slices[iview->base_level];
- uint32_t layer_offset = v3dv_layer_offset(image,
- iview->base_level,
- iview->first_layer + layer);
-
- cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
- store.buffer_to_store = buffer;
- store.address = v3dv_cl_address(image->mem->bo, layer_offset);
- store.clear_buffer_being_stored = clear;
-
- store.output_image_format = iview->format->rt_type;
- store.r_b_swap = iview->swap_rb;
- store.memory_format = slice->tiling;
-
- if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
- slice->tiling == VC5_TILING_UIF_XOR) {
- store.height_in_ub_or_stride =
- slice->padded_height_of_output_image_in_uif_blocks;
- } else if (slice->tiling == VC5_TILING_RASTER) {
- store.height_in_ub_or_stride = slice->stride;
- }
-
- if (image->samples > VK_SAMPLE_COUNT_1_BIT)
- store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
- else if (is_multisample_resolve)
- store.decimate_mode = V3D_DECIMATE_MODE_4X;
- else
- store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
- }
-}
-
-static void
-cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_cl *cl,
- uint32_t layer)
-{
- struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
- const struct v3dv_subpass *subpass =
- &state->pass->subpasses[state->subpass_idx];
-
- bool has_stores = false;
- bool use_global_zs_clear = false;
- bool use_global_rt_clear = false;
-
- /* FIXME: separate stencil */
- uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
- if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
- const struct v3dv_render_pass_attachment *ds_attachment =
- &state->pass->attachments[ds_attachment_idx];
-
- assert(state->job->first_subpass >= ds_attachment->first_subpass);
- assert(state->subpass_idx >= ds_attachment->first_subpass);
- assert(state->subpass_idx <= ds_attachment->last_subpass);
-
- /* From the Vulkan spec, VkImageSubresourceRange:
- *
- * "When an image view of a depth/stencil image is used as a
- * depth/stencil framebuffer attachment, the aspectMask is ignored
- * and both depth and stencil image subresources are used."
- *
- * So we ignore the aspects from the subresource range of the image
- * view for the depth/stencil attachment, but we still need to restrict
- * the to aspects compatible with the render pass and the image.
- */
- const VkImageAspectFlags aspects =
- vk_format_aspects(ds_attachment->desc.format);
-
- /* Only clear once on the first subpass that uses the attachment */
- bool needs_depth_clear =
- check_needs_clear(state,
- aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
- ds_attachment->first_subpass,
- ds_attachment->desc.loadOp,
- subpass->do_depth_clear_with_draw);
-
- bool needs_stencil_clear =
- check_needs_clear(state,
- aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
- ds_attachment->first_subpass,
- ds_attachment->desc.stencilLoadOp,
- subpass->do_stencil_clear_with_draw);
-
- /* Skip the last store if it is not required */
- bool needs_depth_store =
- check_needs_store(state,
- aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
- ds_attachment->last_subpass,
- ds_attachment->desc.storeOp);
-
- bool needs_stencil_store =
- check_needs_store(state,
- aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
- ds_attachment->last_subpass,
- ds_attachment->desc.stencilStoreOp);
-
- /* GFXH-1689: The per-buffer store command's clear buffer bit is broken
- * for depth/stencil.
- *
- * There used to be some confusion regarding the Clear Tile Buffers
- * Z/S bit also being broken, but we confirmed with Broadcom that this
- * is not the case, it was just that some other hardware bugs (that we
- * need to work around, such as GFXH-1461) could cause this bit to behave
- * incorrectly.
- *
- * There used to be another issue where the RTs bit in the Clear Tile
- * Buffers packet also cleared Z/S, but Broadcom confirmed this is
- * fixed since V3D 4.1.
- *
- * So if we have to emit a clear of depth or stencil we don't use
- * the per-buffer store clear bit, even if we need to store the buffers,
- * instead we always have to use the Clear Tile Buffers Z/S bit.
- * If we have configured the job to do early Z/S clearing, then we
- * don't want to emit any Clear Tile Buffers command at all here.
- *
- * Note that GFXH-1689 is not reproduced in the simulator, where
- * using the clear buffer bit in depth/stencil stores works fine.
- */
- use_global_zs_clear = !state->job->early_zs_clear &&
- (needs_depth_clear || needs_stencil_clear);
- if (needs_depth_store || needs_stencil_store) {
- const uint32_t zs_buffer =
- v3dv_zs_buffer(needs_depth_store, needs_stencil_store);
- cmd_buffer_render_pass_emit_store(cmd_buffer, cl,
- ds_attachment_idx, layer,
- zs_buffer, false, false);
- has_stores = true;
- }
- }
-
- for (uint32_t i = 0; i < subpass->color_count; i++) {
- uint32_t attachment_idx = subpass->color_attachments[i].attachment;
-
- if (attachment_idx == VK_ATTACHMENT_UNUSED)
- continue;
-
- const struct v3dv_render_pass_attachment *attachment =
- &state->pass->attachments[attachment_idx];
-
- assert(state->job->first_subpass >= attachment->first_subpass);
- assert(state->subpass_idx >= attachment->first_subpass);
- assert(state->subpass_idx <= attachment->last_subpass);
-
- /* Only clear once on the first subpass that uses the attachment */
- bool needs_clear =
- check_needs_clear(state,
- VK_IMAGE_ASPECT_COLOR_BIT,
- attachment->first_subpass,
- attachment->desc.loadOp,
- false);
-
- /* Skip the last store if it is not required */
- bool needs_store =
- check_needs_store(state,
- VK_IMAGE_ASPECT_COLOR_BIT,
- attachment->last_subpass,
- attachment->desc.storeOp);
-
- /* If we need to resolve this attachment emit that store first. Notice
- * that we must not request a tile buffer clear here in that case, since
- * that would clear the tile buffer before we get to emit the actual
- * color attachment store below, since the clear happens after the
- * store is completed.
- *
- * If the attachment doesn't support TLB resolves then we will have to
- * fallback to doing the resolve in a shader separately after this
- * job, so we will need to store the multisampled sttachment even if that
- * wansn't requested by the client.
- */
- const bool needs_resolve =
- subpass->resolve_attachments &&
- subpass->resolve_attachments[i].attachment != VK_ATTACHMENT_UNUSED;
- if (needs_resolve && attachment->use_tlb_resolve) {
- const uint32_t resolve_attachment_idx =
- subpass->resolve_attachments[i].attachment;
- cmd_buffer_render_pass_emit_store(cmd_buffer, cl,
- resolve_attachment_idx, layer,
- RENDER_TARGET_0 + i,
- false, true);
- has_stores = true;
- } else if (needs_resolve) {
- needs_store = true;
- }
-
- /* Emit the color attachment store if needed */
- if (needs_store) {
- cmd_buffer_render_pass_emit_store(cmd_buffer, cl,
- attachment_idx, layer,
- RENDER_TARGET_0 + i,
- needs_clear && !use_global_rt_clear,
- false);
- has_stores = true;
- } else if (needs_clear) {
- use_global_rt_clear = true;
- }
- }
-
- /* We always need to emit at least one dummy store */
- if (!has_stores) {
- cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
- store.buffer_to_store = NONE;
- }
- }
-
- /* If we have any depth/stencil clears we can't use the per-buffer clear
- * bit and instead we have to emit a single clear of all tile buffers.
- */
- if (use_global_zs_clear || use_global_rt_clear) {
- cl_emit(cl, CLEAR_TILE_BUFFERS, clear) {
- clear.clear_z_stencil_buffer = use_global_zs_clear;
- clear.clear_all_render_targets = use_global_rt_clear;
- }
- }
-}
-
-static void
-cmd_buffer_render_pass_emit_per_tile_rcl(struct v3dv_cmd_buffer *cmd_buffer,
- uint32_t layer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- /* Emit the generic list in our indirect state -- the rcl will just
- * have pointers into it.
- */
- struct v3dv_cl *cl = &job->indirect;
- v3dv_cl_ensure_space(cl, 200, 1);
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
-
- cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
-
- cmd_buffer_render_pass_emit_loads(cmd_buffer, cl, layer);
-
- /* The binner starts out writing tiles assuming that the initial mode
- * is triangles, so make sure that's the case.
- */
- cl_emit(cl, PRIM_LIST_FORMAT, fmt) {
- fmt.primitive_type = LIST_TRIANGLES;
- }
-
- /* PTB assumes that value to be 0, but hw will not set it. */
- cl_emit(cl, SET_INSTANCEID, set) {
- set.instance_id = 0;
- }
-
- cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
-
- cmd_buffer_render_pass_emit_stores(cmd_buffer, cl, layer);
-
- cl_emit(cl, END_OF_TILE_MARKER, end);
-
- cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
-
- cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
- branch.start = tile_list_start;
- branch.end = v3dv_cl_get_address(cl);
- }
-}
-
-static void
-cmd_buffer_emit_render_pass_layer_rcl(struct v3dv_cmd_buffer *cmd_buffer,
- uint32_t layer)
-{
- const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
-
- struct v3dv_job *job = cmd_buffer->state.job;
- struct v3dv_cl *rcl = &job->rcl;
-
- /* If doing multicore binning, we would need to initialize each
- * core's tile list here.
- */
- const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
- const uint32_t tile_alloc_offset =
- 64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
- cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
- list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
- }
-
- cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
- config.number_of_bin_tile_lists = 1;
- config.total_frame_width_in_tiles = tiling->draw_tiles_x;
- config.total_frame_height_in_tiles = tiling->draw_tiles_y;
-
- config.supertile_width_in_tiles = tiling->supertile_width;
- config.supertile_height_in_tiles = tiling->supertile_height;
-
- config.total_frame_width_in_supertiles =
- tiling->frame_width_in_supertiles;
- config.total_frame_height_in_supertiles =
- tiling->frame_height_in_supertiles;
- }
-
- /* Start by clearing the tile buffer. */
- cl_emit(rcl, TILE_COORDINATES, coords) {
- coords.tile_column_number = 0;
- coords.tile_row_number = 0;
- }
-
- /* Emit an initial clear of the tile buffers. This is necessary
- * for any buffers that should be cleared (since clearing
- * normally happens at the *end* of the generic tile list), but
- * it's also nice to clear everything so the first tile doesn't
- * inherit any contents from some previous frame.
- *
- * Also, implement the GFXH-1742 workaround. There's a race in
- * the HW between the RCL updating the TLB's internal type/size
- * and the spawning of the QPU instances using the TLB's current
- * internal type/size. To make sure the QPUs get the right
- * state, we need 1 dummy store in between internal type/size
- * changes on V3D 3.x, and 2 dummy stores on 4.x.
- */
- for (int i = 0; i < 2; i++) {
- if (i > 0)
- cl_emit(rcl, TILE_COORDINATES, coords);
- cl_emit(rcl, END_OF_LOADS, end);
- cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
- store.buffer_to_store = NONE;
- }
- if (i == 0 && cmd_buffer->state.tile_aligned_render_area) {
- cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
- clear.clear_z_stencil_buffer = !job->early_zs_clear;
- clear.clear_all_render_targets = true;
- }
- }
- cl_emit(rcl, END_OF_TILE_MARKER, end);
- }
-
- cl_emit(rcl, FLUSH_VCD_CACHE, flush);
-
- cmd_buffer_render_pass_emit_per_tile_rcl(cmd_buffer, layer);
-
- uint32_t supertile_w_in_pixels =
- tiling->tile_width * tiling->supertile_width;
- uint32_t supertile_h_in_pixels =
- tiling->tile_height * tiling->supertile_height;
- const uint32_t min_x_supertile =
- state->render_area.offset.x / supertile_w_in_pixels;
- const uint32_t min_y_supertile =
- state->render_area.offset.y / supertile_h_in_pixels;
-
- uint32_t max_render_x = state->render_area.offset.x;
- if (state->render_area.extent.width > 0)
- max_render_x += state->render_area.extent.width - 1;
- uint32_t max_render_y = state->render_area.offset.y;
- if (state->render_area.extent.height > 0)
- max_render_y += state->render_area.extent.height - 1;
- const uint32_t max_x_supertile = max_render_x / supertile_w_in_pixels;
- const uint32_t max_y_supertile = max_render_y / supertile_h_in_pixels;
-
- for (int y = min_y_supertile; y <= max_y_supertile; y++) {
- for (int x = min_x_supertile; x <= max_x_supertile; x++) {
- cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
- coords.column_number_in_supertiles = x;
- coords.row_number_in_supertiles = y;
- }
- }
- }
-}
-
-static void
-set_rcl_early_z_config(struct v3dv_job *job,
- bool *early_z_disable,
- uint32_t *early_z_test_and_update_direction)
-{
- /* If this is true then we have not emitted any draw calls in this job
- * and we don't get any benefits form early Z.
- */
- if (!job->decided_global_ez_enable) {
- assert(job->draw_count == 0);
- *early_z_disable = true;
- return;
- }
-
- switch (job->first_ez_state) {
- case VC5_EZ_UNDECIDED:
- case VC5_EZ_LT_LE:
- *early_z_disable = false;
- *early_z_test_and_update_direction = EARLY_Z_DIRECTION_LT_LE;
- break;
- case VC5_EZ_GT_GE:
- *early_z_disable = false;
- *early_z_test_and_update_direction = EARLY_Z_DIRECTION_GT_GE;
- break;
- case VC5_EZ_DISABLED:
- *early_z_disable = true;
- break;
- }
-}
-
-static void
-cmd_buffer_emit_render_pass_rcl(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
- const struct v3dv_framebuffer *framebuffer = state->framebuffer;
-
- /* We can't emit the RCL until we have a framebuffer, which we may not have
- * if we are recording a secondary command buffer. In that case, we will
- * have to wait until vkCmdExecuteCommands is called from a primary command
- * buffer.
- */
- if (!framebuffer) {
- assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
- return;
- }
-
- const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
-
- const uint32_t fb_layers = framebuffer->layers;
- v3dv_cl_ensure_space_with_branch(&job->rcl, 200 +
- MAX2(fb_layers, 1) * 256 *
- cl_packet_length(SUPERTILE_COORDINATES));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- assert(state->subpass_idx < state->pass->subpass_count);
- const struct v3dv_render_pass *pass = state->pass;
- const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx];
- struct v3dv_cl *rcl = &job->rcl;
-
- /* Comon config must be the first TILE_RENDERING_MODE_CFG and
- * Z_STENCIL_CLEAR_VALUES must be last. The ones in between are optional
- * updates to the previous HW state.
- */
- bool do_early_zs_clear = false;
- const uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
- config.image_width_pixels = framebuffer->width;
- config.image_height_pixels = framebuffer->height;
- config.number_of_render_targets = MAX2(subpass->color_count, 1);
- config.multisample_mode_4x = tiling->msaa;
- config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
-
- if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
- const struct v3dv_image_view *iview =
- framebuffer->attachments[ds_attachment_idx];
- config.internal_depth_type = iview->internal_type;
-
- set_rcl_early_z_config(job,
- &config.early_z_disable,
- &config.early_z_test_and_update_direction);
-
- /* Early-Z/S clear can be enabled if the job is clearing and not
- * storing (or loading) depth. If a stencil aspect is also present
- * we have the same requirements for it, however, in this case we
- * can accept stencil loadOp DONT_CARE as well, so instead of
- * checking that stencil is cleared we check that is not loaded.
- *
- * Early-Z/S clearing is independent of Early Z/S testing, so it is
- * possible to enable one but not the other so long as their
- * respective requirements are met.
- */
- struct v3dv_render_pass_attachment *ds_attachment =
- &pass->attachments[ds_attachment_idx];
-
- const VkImageAspectFlags ds_aspects =
- vk_format_aspects(ds_attachment->desc.format);
-
- bool needs_depth_clear =
- check_needs_clear(state,
- ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
- ds_attachment->first_subpass,
- ds_attachment->desc.loadOp,
- subpass->do_depth_clear_with_draw);
-
- bool needs_depth_store =
- check_needs_store(state,
- ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
- ds_attachment->last_subpass,
- ds_attachment->desc.storeOp);
-
- do_early_zs_clear = needs_depth_clear && !needs_depth_store;
- if (do_early_zs_clear &&
- vk_format_has_stencil(ds_attachment->desc.format)) {
- bool needs_stencil_load =
- check_needs_load(state,
- ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
- ds_attachment->first_subpass,
- ds_attachment->desc.stencilLoadOp);
-
- bool needs_stencil_store =
- check_needs_store(state,
- ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
- ds_attachment->last_subpass,
- ds_attachment->desc.stencilStoreOp);
-
- do_early_zs_clear = !needs_stencil_load && !needs_stencil_store;
- }
-
- config.early_depth_stencil_clear = do_early_zs_clear;
- } else {
- config.early_z_disable = true;
- }
- }
-
- /* If we enabled early Z/S clear, then we can't emit any "Clear Tile Buffers"
- * commands with the Z/S bit set, so keep track of whether we enabled this
- * in the job so we can skip these later.
- */
- job->early_zs_clear = do_early_zs_clear;
-
- for (uint32_t i = 0; i < subpass->color_count; i++) {
- uint32_t attachment_idx = subpass->color_attachments[i].attachment;
- if (attachment_idx == VK_ATTACHMENT_UNUSED)
- continue;
-
- struct v3dv_image_view *iview =
- state->framebuffer->attachments[attachment_idx];
-
- const struct v3dv_image *image = iview->image;
- const struct v3d_resource_slice *slice = &image->slices[iview->base_level];
-
- const uint32_t *clear_color =
- &state->attachments[attachment_idx].clear_value.color[0];
-
- uint32_t clear_pad = 0;
- if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
- slice->tiling == VC5_TILING_UIF_XOR) {
- int uif_block_height = v3d_utile_height(image->cpp) * 2;
-
- uint32_t implicit_padded_height =
- align(framebuffer->height, uif_block_height) / uif_block_height;
-
- if (slice->padded_height_of_output_image_in_uif_blocks -
- implicit_padded_height >= 15) {
- clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
- }
- }
-
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
- clear.clear_color_low_32_bits = clear_color[0];
- clear.clear_color_next_24_bits = clear_color[1] & 0xffffff;
- clear.render_target_number = i;
- };
-
- if (iview->internal_bpp >= V3D_INTERNAL_BPP_64) {
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
- clear.clear_color_mid_low_32_bits =
- ((clear_color[1] >> 24) | (clear_color[2] << 8));
- clear.clear_color_mid_high_24_bits =
- ((clear_color[2] >> 24) | ((clear_color[3] & 0xffff) << 8));
- clear.render_target_number = i;
- };
- }
-
- if (iview->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
- clear.uif_padded_height_in_uif_blocks = clear_pad;
- clear.clear_color_high_16_bits = clear_color[3] >> 16;
- clear.render_target_number = i;
- };
- }
- }
-
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
- v3dv_render_pass_setup_render_target(cmd_buffer, 0,
- &rt.render_target_0_internal_bpp,
- &rt.render_target_0_internal_type,
- &rt.render_target_0_clamp);
- v3dv_render_pass_setup_render_target(cmd_buffer, 1,
- &rt.render_target_1_internal_bpp,
- &rt.render_target_1_internal_type,
- &rt.render_target_1_clamp);
- v3dv_render_pass_setup_render_target(cmd_buffer, 2,
- &rt.render_target_2_internal_bpp,
- &rt.render_target_2_internal_type,
- &rt.render_target_2_clamp);
- v3dv_render_pass_setup_render_target(cmd_buffer, 3,
- &rt.render_target_3_internal_bpp,
- &rt.render_target_3_internal_type,
- &rt.render_target_3_clamp);
- }
-
- /* Ends rendering mode config. */
- if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
- clear.z_clear_value =
- state->attachments[ds_attachment_idx].clear_value.z;
- clear.stencil_clear_value =
- state->attachments[ds_attachment_idx].clear_value.s;
- };
- } else {
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
- clear.z_clear_value = 1.0f;
- clear.stencil_clear_value = 0;
- };
- }
-
- /* Always set initial block size before the first branch, which needs
- * to match the value from binning mode config.
- */
- cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
- init.use_auto_chained_tile_lists = true;
- init.size_of_first_block_in_chained_tile_lists =
- TILE_ALLOCATION_BLOCK_SIZE_64B;
- }
-
- for (int layer = 0; layer < MAX2(1, fb_layers); layer++)
- cmd_buffer_emit_render_pass_layer_rcl(cmd_buffer, layer);
-
- cl_emit(rcl, END_OF_RENDERING, end);
-}
-
static void
cmd_buffer_emit_subpass_clears(struct v3dv_cmd_buffer *cmd_buffer)
{
@@ -2445,13 +1515,30 @@ cmd_buffer_subpass_create_job(struct v3dv_cmd_buffer *cmd_buffer,
uint8_t internal_bpp;
bool msaa;
- v3dv_framebuffer_compute_internal_bpp_msaa(framebuffer, subpass,
- &internal_bpp, &msaa);
+ v3dv_X(job->device, framebuffer_compute_internal_bpp_msaa)
+ (framebuffer, subpass, &internal_bpp, &msaa);
+
+ /* From the Vulkan spec:
+ *
+ * "If the render pass uses multiview, then layers must be one and
+ * each attachment requires a number of layers that is greater than
+ * the maximum bit index set in the view mask in the subpasses in
+ * which it is used."
+ *
+ * So when multiview is enabled, we take the number of layers from the
+ * last bit set in the view mask.
+ */
+ uint32_t layers = framebuffer->layers;
+ if (subpass->view_mask != 0) {
+ assert(framebuffer->layers == 1);
+ layers = util_last_bit(subpass->view_mask);
+ }
v3dv_job_start_frame(job,
framebuffer->width,
framebuffer->height,
- framebuffer->layers,
+ layers,
+ true,
subpass->color_count,
internal_bpp,
msaa);
@@ -2534,7 +1621,7 @@ v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer)
job->is_subpass_finish = true;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdEndRenderPass(VkCommandBuffer commandBuffer)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
@@ -2553,7 +1640,7 @@ v3dv_CmdEndRenderPass(VkCommandBuffer commandBuffer)
state->subpass_idx = -1;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_EndCommandBuffer(VkCommandBuffer commandBuffer)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
@@ -2580,44 +1667,6 @@ v3dv_EndCommandBuffer(VkCommandBuffer commandBuffer)
}
static void
-emit_occlusion_query(struct v3dv_cmd_buffer *cmd_buffer);
-
-static void
-ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
- uint32_t slot_size,
- uint32_t used_count,
- uint32_t *alloc_count,
- void **ptr);
-
-static void
-cmd_buffer_copy_secondary_end_query_state(struct v3dv_cmd_buffer *primary,
- struct v3dv_cmd_buffer *secondary)
-{
- struct v3dv_cmd_buffer_state *p_state = &primary->state;
- struct v3dv_cmd_buffer_state *s_state = &secondary->state;
-
- const uint32_t total_state_count =
- p_state->query.end.used_count + s_state->query.end.used_count;
- ensure_array_state(primary,
- sizeof(struct v3dv_end_query_cpu_job_info),
- total_state_count,
- &p_state->query.end.alloc_count,
- (void **) &p_state->query.end.states);
- v3dv_return_if_oom(primary, NULL);
-
- for (uint32_t i = 0; i < s_state->query.end.used_count; i++) {
- const struct v3dv_end_query_cpu_job_info *s_qstate =
- &secondary->state.query.end.states[i];
-
- struct v3dv_end_query_cpu_job_info *p_qstate =
- &p_state->query.end.states[p_state->query.end.used_count++];
-
- p_qstate->pool = s_qstate->pool;
- p_qstate->query = s_qstate->query;
- }
-}
-
-static void
clone_bo_list(struct v3dv_cmd_buffer *cmd_buffer,
struct list_head *dst,
struct list_head *src)
@@ -2645,9 +1694,9 @@ clone_bo_list(struct v3dv_cmd_buffer *cmd_buffer,
* for jobs recorded in secondary command buffers when we want to execute
* them in primaries.
*/
-static struct v3dv_job *
-job_clone_in_cmd_buffer(struct v3dv_job *job,
- struct v3dv_cmd_buffer *cmd_buffer)
+struct v3dv_job *
+v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
+ struct v3dv_cmd_buffer *cmd_buffer)
{
struct v3dv_job *clone_job = vk_alloc(&job->device->vk.alloc,
sizeof(struct v3dv_job), 8,
@@ -2676,163 +1725,6 @@ job_clone_in_cmd_buffer(struct v3dv_job *job,
return clone_job;
}
-static struct v3dv_job *
-cmd_buffer_subpass_split_for_barrier(struct v3dv_cmd_buffer *cmd_buffer,
- bool is_bcl_barrier)
-{
- assert(cmd_buffer->state.subpass_idx != -1);
- v3dv_cmd_buffer_finish_job(cmd_buffer);
- struct v3dv_job *job =
- v3dv_cmd_buffer_subpass_resume(cmd_buffer,
- cmd_buffer->state.subpass_idx);
- if (!job)
- return NULL;
-
- job->serialize = true;
- job->needs_bcl_sync = is_bcl_barrier;
- return job;
-}
-
-static void
-cmd_buffer_execute_inside_pass(struct v3dv_cmd_buffer *primary,
- uint32_t cmd_buffer_count,
- const VkCommandBuffer *cmd_buffers)
-{
- assert(primary->state.job);
-
- /* Emit occlusion query state if needed so the draw calls inside our
- * secondaries update the counters.
- */
- bool has_occlusion_query =
- primary->state.dirty & V3DV_CMD_DIRTY_OCCLUSION_QUERY;
- if (has_occlusion_query)
- emit_occlusion_query(primary);
-
- /* FIXME: if our primary job tiling doesn't enable MSSA but any of the
- * pipelines used by the secondaries do, we need to re-start the primary
- * job to enable MSAA. See cmd_buffer_restart_job_for_msaa_if_needed.
- */
- bool pending_barrier = false;
- bool pending_bcl_barrier = false;
- for (uint32_t i = 0; i < cmd_buffer_count; i++) {
- V3DV_FROM_HANDLE(v3dv_cmd_buffer, secondary, cmd_buffers[i]);
-
- assert(secondary->usage_flags &
- VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT);
-
- list_for_each_entry(struct v3dv_job, secondary_job,
- &secondary->jobs, list_link) {
- if (secondary_job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY) {
- /* If the job is a CL, then we branch to it from the primary BCL.
- * In this case the secondary's BCL is finished with a
- * RETURN_FROM_SUB_LIST command to return back to the primary BCL
- * once we are done executing it.
- */
- assert(v3dv_cl_offset(&secondary_job->rcl) == 0);
- assert(secondary_job->bcl.bo);
-
- /* Sanity check that secondary BCL ends with RETURN_FROM_SUB_LIST */
- STATIC_ASSERT(cl_packet_length(RETURN_FROM_SUB_LIST) == 1);
- assert(v3dv_cl_offset(&secondary_job->bcl) >= 1);
- assert(*(((uint8_t *)secondary_job->bcl.next) - 1) ==
- V3D42_RETURN_FROM_SUB_LIST_opcode);
-
- /* If this secondary has any barriers (or we had any pending barrier
- * to apply), then we can't just branch to it from the primary, we
- * need to split the primary to create a new job that can consume
- * the barriers first.
- *
- * FIXME: in this case, maybe just copy the secondary BCL without
- * the RETURN_FROM_SUB_LIST into the primary job to skip the
- * branch?
- */
- struct v3dv_job *primary_job = primary->state.job;
- if (!primary_job || secondary_job->serialize || pending_barrier) {
- const bool needs_bcl_barrier =
- secondary_job->needs_bcl_sync || pending_bcl_barrier;
- primary_job =
- cmd_buffer_subpass_split_for_barrier(primary,
- needs_bcl_barrier);
- v3dv_return_if_oom(primary, NULL);
-
- /* Since we have created a new primary we need to re-emit
- * occlusion query state.
- */
- if (has_occlusion_query)
- emit_occlusion_query(primary);
- }
-
- /* Make sure our primary job has all required BO references */
- set_foreach(secondary_job->bos, entry) {
- struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
- v3dv_job_add_bo(primary_job, bo);
- }
-
- /* Emit required branch instructions. We expect each of these
- * to end with a corresponding 'return from sub list' item.
- */
- list_for_each_entry(struct v3dv_bo, bcl_bo,
- &secondary_job->bcl.bo_list, list_link) {
- v3dv_cl_ensure_space_with_branch(&primary_job->bcl,
- cl_packet_length(BRANCH_TO_SUB_LIST));
- v3dv_return_if_oom(primary, NULL);
- cl_emit(&primary_job->bcl, BRANCH_TO_SUB_LIST, branch) {
- branch.address = v3dv_cl_address(bcl_bo, 0);
- }
- }
-
- primary_job->tmu_dirty_rcl |= secondary_job->tmu_dirty_rcl;
- } else if (secondary_job->type == V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS) {
- if (pending_barrier) {
- cmd_buffer_subpass_split_for_barrier(primary, pending_bcl_barrier);
- v3dv_return_if_oom(primary, NULL);
- }
-
- const struct v3dv_clear_attachments_cpu_job_info *info =
- &secondary_job->cpu.clear_attachments;
- v3dv_CmdClearAttachments(v3dv_cmd_buffer_to_handle(primary),
- info->attachment_count,
- info->attachments,
- info->rect_count,
- info->rects);
- } else {
- /* This is a regular job (CPU or GPU), so just finish the current
- * primary job (if any) and then add the secondary job to the
- * primary's job list right after it.
- */
- v3dv_cmd_buffer_finish_job(primary);
- job_clone_in_cmd_buffer(secondary_job, primary);
- if (pending_barrier) {
- secondary_job->serialize = true;
- if (pending_bcl_barrier)
- secondary_job->needs_bcl_sync = true;
- }
- }
-
- pending_barrier = false;
- pending_bcl_barrier = false;
- }
-
- /* If the secondary has recorded any vkCmdEndQuery commands, we need to
- * copy this state to the primary so it is processed properly when the
- * current primary job is finished.
- */
- cmd_buffer_copy_secondary_end_query_state(primary, secondary);
-
- /* If this secondary had any pending barrier state we will need that
- * barrier state consumed with whatever comes next in the primary.
- */
- assert(secondary->state.has_barrier || !secondary->state.has_bcl_barrier);
- pending_barrier = secondary->state.has_barrier;
- pending_bcl_barrier = secondary->state.has_bcl_barrier;
- }
-
- if (pending_barrier) {
- primary->state.has_barrier = true;
- primary->state.has_bcl_barrier |= pending_bcl_barrier;
- }
-}
-
static void
cmd_buffer_execute_outside_pass(struct v3dv_cmd_buffer *primary,
uint32_t cmd_buffer_count,
@@ -2862,9 +1754,8 @@ cmd_buffer_execute_outside_pass(struct v3dv_cmd_buffer *primary,
list_for_each_entry(struct v3dv_job, secondary_job,
&secondary->jobs, list_link) {
/* These can only happen inside a render pass */
- assert(secondary_job->type != V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS);
assert(secondary_job->type != V3DV_JOB_TYPE_GPU_CL_SECONDARY);
- struct v3dv_job *job = job_clone_in_cmd_buffer(secondary_job, primary);
+ struct v3dv_job *job = v3dv_job_clone_in_cmd_buffer(secondary_job, primary);
if (!job)
return;
@@ -2892,7 +1783,7 @@ cmd_buffer_execute_outside_pass(struct v3dv_cmd_buffer *primary,
}
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdExecuteCommands(VkCommandBuffer commandBuffer,
uint32_t commandBufferCount,
const VkCommandBuffer *pCommandBuffers)
@@ -2900,8 +1791,8 @@ v3dv_CmdExecuteCommands(VkCommandBuffer commandBuffer,
V3DV_FROM_HANDLE(v3dv_cmd_buffer, primary, commandBuffer);
if (primary->state.pass != NULL) {
- cmd_buffer_execute_inside_pass(primary,
- commandBufferCount, pCommandBuffers);
+ v3dv_X(primary->device, cmd_buffer_execute_inside_pass)
+ (primary, commandBufferCount, pCommandBuffers);
} else {
cmd_buffer_execute_outside_pass(primary,
commandBufferCount, pCommandBuffers);
@@ -2993,131 +1884,15 @@ cmd_buffer_bind_pipeline_static_state(struct v3dv_cmd_buffer *cmd_buffer,
}
}
- cmd_buffer->state.dynamic.mask = dynamic_mask;
- cmd_buffer->state.dirty |= dirty;
-}
-
-static void
-job_update_ez_state(struct v3dv_job *job,
- struct v3dv_pipeline *pipeline,
- struct v3dv_cmd_buffer *cmd_buffer)
-{
- /* If first_ez_state is VC5_EZ_DISABLED it means that we have already
- * determined that we should disable EZ completely for all draw calls in
- * this job. This will cause us to disable EZ for the entire job in the
- * Tile Rendering Mode RCL packet and when we do that we need to make sure
- * we never emit a draw call in the job with EZ enabled in the CFG_BITS
- * packet, so ez_state must also be VC5_EZ_DISABLED;
- */
- if (job->first_ez_state == VC5_EZ_DISABLED) {
- assert(job->ez_state == VC5_EZ_DISABLED);
- return;
- }
-
- /* This is part of the pre draw call handling, so we should be inside a
- * render pass.
- */
- assert(cmd_buffer->state.pass);
-
- /* If this is the first time we update EZ state for this job we first check
- * if there is anything that requires disabling it completely for the entire
- * job (based on state that is not related to the current draw call and
- * pipeline state).
- */
- if (!job->decided_global_ez_enable) {
- job->decided_global_ez_enable = true;
-
- struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
- assert(state->subpass_idx < state->pass->subpass_count);
- struct v3dv_subpass *subpass = &state->pass->subpasses[state->subpass_idx];
- if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED) {
- job->first_ez_state = VC5_EZ_DISABLED;
- job->ez_state = VC5_EZ_DISABLED;
- return;
- }
-
- /* GFXH-1918: the early-z buffer may load incorrect depth values
- * if the frame has odd width or height.
- *
- * So we need to disable EZ in this case.
- */
- const struct v3dv_render_pass_attachment *ds_attachment =
- &state->pass->attachments[subpass->ds_attachment.attachment];
-
- const VkImageAspectFlags ds_aspects =
- vk_format_aspects(ds_attachment->desc.format);
-
- bool needs_depth_load =
- check_needs_load(state,
- ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
- ds_attachment->first_subpass,
- ds_attachment->desc.loadOp);
-
- if (needs_depth_load) {
- struct v3dv_framebuffer *fb = state->framebuffer;
-
- if (!fb) {
- assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
- perf_debug("Loading depth aspect in a secondary command buffer "
- "without framebuffer info disables early-z tests.\n");
- job->first_ez_state = VC5_EZ_DISABLED;
- job->ez_state = VC5_EZ_DISABLED;
- return;
- }
-
- if (((fb->width % 2) != 0 || (fb->height % 2) != 0)) {
- perf_debug("Loading depth aspect for framebuffer with odd width "
- "or height disables early-Z tests.\n");
- job->first_ez_state = VC5_EZ_DISABLED;
- job->ez_state = VC5_EZ_DISABLED;
- return;
- }
+ if (!(dynamic_mask & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) {
+ if (dest->color_write_enable != src->color_write_enable) {
+ dest->color_write_enable = src->color_write_enable;
+ dirty |= V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE;
}
}
- /* Otherwise, we can decide to selectively enable or disable EZ for draw
- * calls using the CFG_BITS packet based on the bound pipeline state.
- */
-
- /* If the FS writes Z, then it may update against the chosen EZ direction */
- struct v3dv_shader_variant *fs_variant =
- pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
- if (fs_variant->prog_data.fs->writes_z) {
- job->ez_state = VC5_EZ_DISABLED;
- return;
- }
-
- switch (pipeline->ez_state) {
- case VC5_EZ_UNDECIDED:
- /* If the pipeline didn't pick a direction but didn't disable, then go
- * along with the current EZ state. This allows EZ optimization for Z
- * func == EQUAL or NEVER.
- */
- break;
-
- case VC5_EZ_LT_LE:
- case VC5_EZ_GT_GE:
- /* If the pipeline picked a direction, then it needs to match the current
- * direction if we've decided on one.
- */
- if (job->ez_state == VC5_EZ_UNDECIDED)
- job->ez_state = pipeline->ez_state;
- else if (job->ez_state != pipeline->ez_state)
- job->ez_state = VC5_EZ_DISABLED;
- break;
-
- case VC5_EZ_DISABLED:
- /* If the pipeline disables EZ because of a bad Z func or stencil
- * operation, then we can't do any more EZ in this frame.
- */
- job->ez_state = VC5_EZ_DISABLED;
- break;
- }
-
- if (job->first_ez_state == VC5_EZ_UNDECIDED &&
- job->ez_state != VC5_EZ_DISABLED) {
- job->first_ez_state = job->ez_state;
- }
+ cmd_buffer->state.dynamic.mask = dynamic_mask;
+ cmd_buffer->state.dirty |= dirty;
}
static void
@@ -3128,25 +1903,6 @@ bind_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer,
if (cmd_buffer->state.gfx.pipeline == pipeline)
return;
- /* Enable always flush if we are blending to sRGB render targets. This
- * fixes test failures in:
- * dEQP-VK.pipeline.blend.format.r8g8b8a8_srgb.*
- *
- * FIXME: not sure why we need this. The tile buffer is always linear, with
- * conversion from/to sRGB happening on tile load/store operations. This
- * means that when we enable flushing the only difference is that we convert
- * to sRGB on the store after each draw call and we convert from sRGB on the
- * load before each draw call, but the blend happens in linear format in the
- * tile buffer anyway, which is the same scenario as if we didn't flush.
- */
- assert(pipeline->subpass);
- if (pipeline->subpass->has_srgb_rt && pipeline->blend.enables) {
- assert(cmd_buffer->state.job);
- cmd_buffer->state.job->always_flush = true;
- perf_debug("flushing draw calls for subpass %d because bound pipeline "
- "uses sRGB blending\n", cmd_buffer->state.subpass_idx);
- }
-
cmd_buffer->state.gfx.pipeline = pipeline;
cmd_buffer_bind_pipeline_static_state(cmd_buffer, &pipeline->dynamic_state);
@@ -3167,7 +1923,7 @@ bind_compute_pipeline(struct v3dv_cmd_buffer *cmd_buffer,
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_COMPUTE_PIPELINE;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdBindPipeline(VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
VkPipeline _pipeline)
@@ -3223,7 +1979,7 @@ v3dv_viewport_compute_xform(const VkViewport *viewport,
scale[2] = min_abs_scale * (scale[2] < 0 ? -1.0f : 1.0f);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetViewport(VkCommandBuffer commandBuffer,
uint32_t firstViewport,
uint32_t viewportCount,
@@ -3256,7 +2012,7 @@ v3dv_CmdSetViewport(VkCommandBuffer commandBuffer,
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VIEWPORT;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetScissor(VkCommandBuffer commandBuffer,
uint32_t firstScissor,
uint32_t scissorCount,
@@ -3360,379 +2116,13 @@ emit_scissor(struct v3dv_cmd_buffer *cmd_buffer)
cmd_buffer->state.clip_window.extent.width = maxx - minx;
cmd_buffer->state.clip_window.extent.height = maxy - miny;
- emit_clip_window(cmd_buffer->state.job, &cmd_buffer->state.clip_window);
+ v3dv_X(cmd_buffer->device, job_emit_clip_window)
+ (cmd_buffer->state.job, &cmd_buffer->state.clip_window);
cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_SCISSOR;
}
static void
-emit_viewport(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
- /* FIXME: right now we only support one viewport. viewporst[0] would work
- * now, would need to change if we allow multiple viewports
- */
- float *vptranslate = dynamic->viewport.translate[0];
- float *vpscale = dynamic->viewport.scale[0];
-
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- const uint32_t required_cl_size =
- cl_packet_length(CLIPPER_XY_SCALING) +
- cl_packet_length(CLIPPER_Z_SCALE_AND_OFFSET) +
- cl_packet_length(CLIPPER_Z_MIN_MAX_CLIPPING_PLANES) +
- cl_packet_length(VIEWPORT_OFFSET);
- v3dv_cl_ensure_space_with_branch(&job->bcl, required_cl_size);
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
- clip.viewport_half_width_in_1_256th_of_pixel = vpscale[0] * 256.0f;
- clip.viewport_half_height_in_1_256th_of_pixel = vpscale[1] * 256.0f;
- }
-
- cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
- clip.viewport_z_offset_zc_to_zs = vptranslate[2];
- clip.viewport_z_scale_zc_to_zs = vpscale[2];
- }
- cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
- /* Vulkan's Z NDC is [0..1], unlile OpenGL which is [-1, 1] */
- float z1 = vptranslate[2];
- float z2 = vptranslate[2] + vpscale[2];
- clip.minimum_zw = MIN2(z1, z2);
- clip.maximum_zw = MAX2(z1, z2);
- }
-
- cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
- vp.viewport_centre_x_coordinate = vptranslate[0];
- vp.viewport_centre_y_coordinate = vptranslate[1];
- }
-
- cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_VIEWPORT;
-}
-
-static void
-emit_stencil(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
- struct v3dv_dynamic_state *dynamic_state = &cmd_buffer->state.dynamic;
-
- const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK |
- V3DV_DYNAMIC_STENCIL_WRITE_MASK |
- V3DV_DYNAMIC_STENCIL_REFERENCE;
-
- v3dv_cl_ensure_space_with_branch(&job->bcl,
- 2 * cl_packet_length(STENCIL_CFG));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- bool emitted_stencil = false;
- for (uint32_t i = 0; i < 2; i++) {
- if (pipeline->emit_stencil_cfg[i]) {
- if (dynamic_state->mask & dynamic_stencil_states) {
- cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
- pipeline->stencil_cfg[i], config) {
- if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK) {
- config.stencil_test_mask =
- i == 0 ? dynamic_state->stencil_compare_mask.front :
- dynamic_state->stencil_compare_mask.back;
- }
- if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK) {
- config.stencil_write_mask =
- i == 0 ? dynamic_state->stencil_write_mask.front :
- dynamic_state->stencil_write_mask.back;
- }
- if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_REFERENCE) {
- config.stencil_ref_value =
- i == 0 ? dynamic_state->stencil_reference.front :
- dynamic_state->stencil_reference.back;
- }
- }
- } else {
- cl_emit_prepacked(&job->bcl, &pipeline->stencil_cfg[i]);
- }
-
- emitted_stencil = true;
- }
- }
-
- if (emitted_stencil) {
- const uint32_t dynamic_stencil_dirty_flags =
- V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK |
- V3DV_CMD_DIRTY_STENCIL_WRITE_MASK |
- V3DV_CMD_DIRTY_STENCIL_REFERENCE;
- cmd_buffer->state.dirty &= ~dynamic_stencil_dirty_flags;
- }
-}
-
-static void
-emit_depth_bias(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
- assert(pipeline);
-
- if (!pipeline->depth_bias.enabled)
- return;
-
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(DEPTH_OFFSET));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
- cl_emit(&job->bcl, DEPTH_OFFSET, bias) {
- bias.depth_offset_factor = dynamic->depth_bias.slope_factor;
- bias.depth_offset_units = dynamic->depth_bias.constant_factor;
- if (pipeline->depth_bias.is_z16)
- bias.depth_offset_units *= 256.0f;
- bias.limit = dynamic->depth_bias.depth_bias_clamp;
- }
-
- cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_DEPTH_BIAS;
-}
-
-static void
-emit_line_width(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(LINE_WIDTH));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, LINE_WIDTH, line) {
- line.line_width = cmd_buffer->state.dynamic.line_width;
- }
-
- cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_LINE_WIDTH;
-}
-
-static void
-emit_sample_state(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
- assert(pipeline);
-
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(SAMPLE_STATE));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, SAMPLE_STATE, state) {
- state.coverage = 1.0f;
- state.mask = pipeline->sample_mask;
- }
-}
-
-static void
-emit_blend(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
- assert(pipeline);
-
- const uint32_t blend_packets_size =
- cl_packet_length(BLEND_ENABLES) +
- cl_packet_length(BLEND_CONSTANT_COLOR) +
- cl_packet_length(BLEND_CFG) * V3D_MAX_DRAW_BUFFERS +
- cl_packet_length(COLOR_WRITE_MASKS);
-
- v3dv_cl_ensure_space_with_branch(&job->bcl, blend_packets_size);
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- if (cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PIPELINE) {
- if (pipeline->blend.enables) {
- cl_emit(&job->bcl, BLEND_ENABLES, enables) {
- enables.mask = pipeline->blend.enables;
- }
- }
-
- for (uint32_t i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
- if (pipeline->blend.enables & (1 << i))
- cl_emit_prepacked(&job->bcl, &pipeline->blend.cfg[i]);
- }
-
- cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) {
- mask.mask = pipeline->blend.color_write_masks;
- }
- }
-
- if (pipeline->blend.needs_color_constants &&
- cmd_buffer->state.dirty & V3DV_CMD_DIRTY_BLEND_CONSTANTS) {
- struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
- cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) {
- color.red_f16 = _mesa_float_to_half(dynamic->blend_constants[0]);
- color.green_f16 = _mesa_float_to_half(dynamic->blend_constants[1]);
- color.blue_f16 = _mesa_float_to_half(dynamic->blend_constants[2]);
- color.alpha_f16 = _mesa_float_to_half(dynamic->blend_constants[3]);
- }
- cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_BLEND_CONSTANTS;
- }
-}
-
-static void
-emit_flat_shade_flags(struct v3dv_job *job,
- int varying_offset,
- uint32_t varyings,
- enum V3DX(Varying_Flags_Action) lower,
- enum V3DX(Varying_Flags_Action) higher)
-{
- v3dv_cl_ensure_space_with_branch(&job->bcl,
- cl_packet_length(FLAT_SHADE_FLAGS));
- v3dv_return_if_oom(NULL, job);
-
- cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
- flags.varying_offset_v0 = varying_offset;
- flags.flat_shade_flags_for_varyings_v024 = varyings;
- flags.action_for_flat_shade_flags_of_lower_numbered_varyings = lower;
- flags.action_for_flat_shade_flags_of_higher_numbered_varyings = higher;
- }
-}
-
-static void
-emit_noperspective_flags(struct v3dv_job *job,
- int varying_offset,
- uint32_t varyings,
- enum V3DX(Varying_Flags_Action) lower,
- enum V3DX(Varying_Flags_Action) higher)
-{
- v3dv_cl_ensure_space_with_branch(&job->bcl,
- cl_packet_length(NON_PERSPECTIVE_FLAGS));
- v3dv_return_if_oom(NULL, job);
-
- cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) {
- flags.varying_offset_v0 = varying_offset;
- flags.non_perspective_flags_for_varyings_v024 = varyings;
- flags.action_for_non_perspective_flags_of_lower_numbered_varyings = lower;
- flags.action_for_non_perspective_flags_of_higher_numbered_varyings = higher;
- }
-}
-
-static void
-emit_centroid_flags(struct v3dv_job *job,
- int varying_offset,
- uint32_t varyings,
- enum V3DX(Varying_Flags_Action) lower,
- enum V3DX(Varying_Flags_Action) higher)
-{
- v3dv_cl_ensure_space_with_branch(&job->bcl,
- cl_packet_length(CENTROID_FLAGS));
- v3dv_return_if_oom(NULL, job);
-
- cl_emit(&job->bcl, CENTROID_FLAGS, flags) {
- flags.varying_offset_v0 = varying_offset;
- flags.centroid_flags_for_varyings_v024 = varyings;
- flags.action_for_centroid_flags_of_lower_numbered_varyings = lower;
- flags.action_for_centroid_flags_of_higher_numbered_varyings = higher;
- }
-}
-
-static bool
-emit_varying_flags(struct v3dv_job *job,
- uint32_t num_flags,
- const uint32_t *flags,
- void (*flag_emit_callback)(struct v3dv_job *job,
- int varying_offset,
- uint32_t flags,
- enum V3DX(Varying_Flags_Action) lower,
- enum V3DX(Varying_Flags_Action) higher))
-{
- bool emitted_any = false;
- for (int i = 0; i < num_flags; i++) {
- if (!flags[i])
- continue;
-
- if (emitted_any) {
- flag_emit_callback(job, i, flags[i],
- V3D_VARYING_FLAGS_ACTION_UNCHANGED,
- V3D_VARYING_FLAGS_ACTION_UNCHANGED);
- } else if (i == 0) {
- flag_emit_callback(job, i, flags[i],
- V3D_VARYING_FLAGS_ACTION_UNCHANGED,
- V3D_VARYING_FLAGS_ACTION_ZEROED);
- } else {
- flag_emit_callback(job, i, flags[i],
- V3D_VARYING_FLAGS_ACTION_ZEROED,
- V3D_VARYING_FLAGS_ACTION_ZEROED);
- }
-
- emitted_any = true;
- }
-
- return emitted_any;
-}
-
-static void
-emit_varyings_state(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
-
- struct v3d_fs_prog_data *prog_data_fs =
- pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs;
-
- const uint32_t num_flags =
- ARRAY_SIZE(prog_data_fs->flat_shade_flags);
- const uint32_t *flat_shade_flags = prog_data_fs->flat_shade_flags;
- const uint32_t *noperspective_flags = prog_data_fs->noperspective_flags;
- const uint32_t *centroid_flags = prog_data_fs->centroid_flags;
-
- if (!emit_varying_flags(job, num_flags, flat_shade_flags,
- emit_flat_shade_flags)) {
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(ZERO_ALL_FLAT_SHADE_FLAGS));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags);
- }
-
- if (!emit_varying_flags(job, num_flags, noperspective_flags,
- emit_noperspective_flags)) {
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(ZERO_ALL_NON_PERSPECTIVE_FLAGS));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags);
- }
-
- if (!emit_varying_flags(job, num_flags, centroid_flags,
- emit_centroid_flags)) {
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(ZERO_ALL_CENTROID_FLAGS));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
- }
-}
-
-static void
-emit_configuration_bits(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
- assert(pipeline);
-
- job_update_ez_state(job, pipeline, cmd_buffer);
-
- v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CFG_BITS));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit_with_prepacked(&job->bcl, CFG_BITS, pipeline->cfg_bits, config) {
- config.early_z_enable = job->ez_state != VC5_EZ_DISABLED;
- config.early_z_updates_enable = config.early_z_enable &&
- pipeline->z_updates_enable;
- }
-}
-
-static void
update_gfx_uniform_state(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t dirty_uniform_state)
{
@@ -3746,13 +2136,26 @@ update_gfx_uniform_state(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
assert(pipeline);
- const bool dirty_descriptors_only =
- (cmd_buffer->state.dirty & dirty_uniform_state) ==
- V3DV_CMD_DIRTY_DESCRIPTOR_SETS;
+ const bool has_new_pipeline = dirty_uniform_state & V3DV_CMD_DIRTY_PIPELINE;
+ const bool has_new_viewport = dirty_uniform_state & V3DV_CMD_DIRTY_VIEWPORT;
+ const bool has_new_push_constants = dirty_uniform_state & V3DV_CMD_DIRTY_PUSH_CONSTANTS;
+ const bool has_new_descriptors = dirty_uniform_state & V3DV_CMD_DIRTY_DESCRIPTOR_SETS;
+ const bool has_new_view_index = dirty_uniform_state & V3DV_CMD_DIRTY_VIEW_INDEX;
+
+ /* VK_SHADER_STAGE_FRAGMENT_BIT */
+ const bool has_new_descriptors_fs =
+ has_new_descriptors &&
+ (cmd_buffer->state.dirty_descriptor_stages & VK_SHADER_STAGE_FRAGMENT_BIT);
- const bool needs_fs_update =
- !dirty_descriptors_only ||
- (pipeline->layout->shader_stages & VK_SHADER_STAGE_FRAGMENT_BIT);
+ const bool has_new_push_constants_fs =
+ has_new_push_constants &&
+ (cmd_buffer->state.dirty_push_constants_stages & VK_SHADER_STAGE_FRAGMENT_BIT);
+
+ const bool needs_fs_update = has_new_pipeline ||
+ has_new_view_index ||
+ has_new_push_constants_fs ||
+ has_new_descriptors_fs ||
+ has_new_view_index;
if (needs_fs_update) {
struct v3dv_shader_variant *fs_variant =
@@ -3762,221 +2165,69 @@ update_gfx_uniform_state(struct v3dv_cmd_buffer *cmd_buffer,
v3dv_write_uniforms(cmd_buffer, pipeline, fs_variant);
}
- const bool needs_vs_update =
- !dirty_descriptors_only ||
- (pipeline->layout->shader_stages & VK_SHADER_STAGE_VERTEX_BIT);
-
- if (needs_vs_update) {
- struct v3dv_shader_variant *vs_variant =
- pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
-
- struct v3dv_shader_variant *vs_bin_variant =
- pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN];
-
- cmd_buffer->state.uniforms.vs =
- v3dv_write_uniforms(cmd_buffer, pipeline, vs_variant);
-
- cmd_buffer->state.uniforms.vs_bin =
- v3dv_write_uniforms(cmd_buffer, pipeline, vs_bin_variant);
- }
-}
-
-static void
-emit_gl_shader_state(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
- struct v3dv_pipeline *pipeline = state->gfx.pipeline;
- assert(pipeline);
-
- struct v3d_vs_prog_data *prog_data_vs =
- pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
- struct v3d_vs_prog_data *prog_data_vs_bin =
- pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs;
- struct v3d_fs_prog_data *prog_data_fs =
- pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs;
-
- /* Update the cache dirty flag based on the shader progs data */
- job->tmu_dirty_rcl |= prog_data_vs_bin->base.tmu_dirty_rcl;
- job->tmu_dirty_rcl |= prog_data_vs->base.tmu_dirty_rcl;
- job->tmu_dirty_rcl |= prog_data_fs->base.tmu_dirty_rcl;
-
- /* See GFXH-930 workaround below */
- uint32_t num_elements_to_emit = MAX2(pipeline->va_count, 1);
-
- uint32_t shader_rec_offset =
- v3dv_cl_ensure_space(&job->indirect,
- cl_packet_length(GL_SHADER_STATE_RECORD) +
- num_elements_to_emit *
- cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD),
- 32);
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- struct v3dv_shader_variant *vs_variant =
- pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
- struct v3dv_shader_variant *vs_bin_variant =
- pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN];
- struct v3dv_shader_variant *fs_variant =
- pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
- struct v3dv_bo *assembly_bo = pipeline->shared_data->assembly_bo;
-
- struct v3dv_bo *default_attribute_values =
- pipeline->default_attribute_values != NULL ?
- pipeline->default_attribute_values :
- pipeline->device->default_attribute_float;
-
- cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_RECORD,
- pipeline->shader_state_record, shader) {
-
- /* FIXME: we are setting this values here and during the
- * prepacking. This is because both cl_emit_with_prepacked and v3dv_pack
- * asserts for minimum values of these. It would be good to get
- * v3dv_pack to assert on the final value if possible
- */
- shader.min_coord_shader_input_segments_required_in_play =
- pipeline->vpm_cfg_bin.As;
- shader.min_vertex_shader_input_segments_required_in_play =
- pipeline->vpm_cfg.As;
-
- shader.coordinate_shader_code_address =
- v3dv_cl_address(assembly_bo, vs_bin_variant->assembly_offset);
- shader.vertex_shader_code_address =
- v3dv_cl_address(assembly_bo, vs_variant->assembly_offset);
- shader.fragment_shader_code_address =
- v3dv_cl_address(assembly_bo, fs_variant->assembly_offset);
-
- shader.coordinate_shader_uniforms_address = cmd_buffer->state.uniforms.vs_bin;
- shader.vertex_shader_uniforms_address = cmd_buffer->state.uniforms.vs;
- shader.fragment_shader_uniforms_address = cmd_buffer->state.uniforms.fs;
-
- shader.address_of_default_attribute_values =
- v3dv_cl_address(default_attribute_values, 0);
- }
-
- /* Upload vertex element attributes (SHADER_STATE_ATTRIBUTE_RECORD) */
- bool cs_loaded_any = false;
- const bool cs_uses_builtins = prog_data_vs_bin->uses_iid ||
- prog_data_vs_bin->uses_biid ||
- prog_data_vs_bin->uses_vid;
- const uint32_t packet_length =
- cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
-
- uint32_t emitted_va_count = 0;
- for (uint32_t i = 0; emitted_va_count < pipeline->va_count; i++) {
- assert(i < MAX_VERTEX_ATTRIBS);
-
- if (pipeline->va[i].vk_format == VK_FORMAT_UNDEFINED)
- continue;
-
- const uint32_t binding = pipeline->va[i].binding;
+ /* VK_SHADER_STAGE_GEOMETRY_BIT */
+ if (pipeline->has_gs) {
+ const bool has_new_descriptors_gs =
+ has_new_descriptors &&
+ (cmd_buffer->state.dirty_descriptor_stages &
+ VK_SHADER_STAGE_GEOMETRY_BIT);
- /* We store each vertex attribute in the array using its driver location
- * as index.
- */
- const uint32_t location = i;
-
- struct v3dv_vertex_binding *c_vb = &cmd_buffer->state.vertex_bindings[binding];
-
- cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD,
- &pipeline->vertex_attrs[i * packet_length], attr) {
-
- assert(c_vb->buffer->mem->bo);
- attr.address = v3dv_cl_address(c_vb->buffer->mem->bo,
- c_vb->buffer->mem_offset +
- pipeline->va[i].offset +
- c_vb->offset);
-
- attr.number_of_values_read_by_coordinate_shader =
- prog_data_vs_bin->vattr_sizes[location];
- attr.number_of_values_read_by_vertex_shader =
- prog_data_vs->vattr_sizes[location];
-
- /* GFXH-930: At least one attribute must be enabled and read by CS
- * and VS. If we have attributes being consumed by the VS but not
- * the CS, then set up a dummy load of the last attribute into the
- * CS's VPM inputs. (Since CS is just dead-code-elimination compared
- * to VS, we can't have CS loading but not VS).
- *
- * GFXH-1602: first attribute must be active if using builtins.
- */
- if (prog_data_vs_bin->vattr_sizes[location])
- cs_loaded_any = true;
-
- if (i == 0 && cs_uses_builtins && !cs_loaded_any) {
- attr.number_of_values_read_by_coordinate_shader = 1;
- cs_loaded_any = true;
- } else if (i == pipeline->va_count - 1 && !cs_loaded_any) {
- attr.number_of_values_read_by_coordinate_shader = 1;
- cs_loaded_any = true;
- }
+ const bool has_new_push_constants_gs =
+ has_new_push_constants &&
+ (cmd_buffer->state.dirty_push_constants_stages &
+ VK_SHADER_STAGE_GEOMETRY_BIT);
- attr.maximum_index = 0xffffff;
- }
+ const bool needs_gs_update = has_new_viewport ||
+ has_new_view_index ||
+ has_new_pipeline ||
+ has_new_push_constants_gs ||
+ has_new_descriptors_gs;
- emitted_va_count++;
- }
+ if (needs_gs_update) {
+ struct v3dv_shader_variant *gs_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
- if (pipeline->va_count == 0) {
- /* GFXH-930: At least one attribute must be enabled and read
- * by CS and VS. If we have no attributes being consumed by
- * the shader, set up a dummy to be loaded into the VPM.
- */
- cl_emit(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
- /* Valid address of data whose value will be unused. */
- attr.address = v3dv_cl_address(job->indirect.bo, 0);
+ struct v3dv_shader_variant *gs_bin_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
- attr.type = ATTRIBUTE_FLOAT;
- attr.stride = 0;
- attr.vec_size = 1;
+ cmd_buffer->state.uniforms.gs =
+ v3dv_write_uniforms(cmd_buffer, pipeline, gs_variant);
- attr.number_of_values_read_by_coordinate_shader = 1;
- attr.number_of_values_read_by_vertex_shader = 1;
+ cmd_buffer->state.uniforms.gs_bin =
+ v3dv_write_uniforms(cmd_buffer, pipeline, gs_bin_variant);
}
}
- if (cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PIPELINE) {
- v3dv_cl_ensure_space_with_branch(&job->bcl,
- sizeof(pipeline->vcm_cache_size));
- v3dv_return_if_oom(cmd_buffer, NULL);
+ /* VK_SHADER_STAGE_VERTEX_BIT */
+ const bool has_new_descriptors_vs =
+ has_new_descriptors &&
+ (cmd_buffer->state.dirty_descriptor_stages & VK_SHADER_STAGE_VERTEX_BIT);
- cl_emit_prepacked(&job->bcl, &pipeline->vcm_cache_size);
- }
+ const bool has_new_push_constants_vs =
+ has_new_push_constants &&
+ (cmd_buffer->state.dirty_push_constants_stages & VK_SHADER_STAGE_VERTEX_BIT);
- v3dv_cl_ensure_space_with_branch(&job->bcl,
- cl_packet_length(GL_SHADER_STATE));
- v3dv_return_if_oom(cmd_buffer, NULL);
+ const bool needs_vs_update = has_new_viewport ||
+ has_new_view_index ||
+ has_new_pipeline ||
+ has_new_push_constants_vs ||
+ has_new_descriptors_vs;
- cl_emit(&job->bcl, GL_SHADER_STATE, state) {
- state.address = v3dv_cl_address(job->indirect.bo,
- shader_rec_offset);
- state.number_of_attribute_arrays = num_elements_to_emit;
- }
-
- cmd_buffer->state.dirty &= ~(V3DV_CMD_DIRTY_VERTEX_BUFFER |
- V3DV_CMD_DIRTY_DESCRIPTOR_SETS |
- V3DV_CMD_DIRTY_PUSH_CONSTANTS);
-}
+ if (needs_vs_update) {
+ struct v3dv_shader_variant *vs_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
-static void
-emit_occlusion_query(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
+ struct v3dv_shader_variant *vs_bin_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN];
- v3dv_cl_ensure_space_with_branch(&job->bcl,
- cl_packet_length(OCCLUSION_QUERY_COUNTER));
- v3dv_return_if_oom(cmd_buffer, NULL);
+ cmd_buffer->state.uniforms.vs =
+ v3dv_write_uniforms(cmd_buffer, pipeline, vs_variant);
- cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) {
- if (cmd_buffer->state.query.active_query) {
- counter.address =
- v3dv_cl_address(cmd_buffer->state.query.active_query, 0);
- }
+ cmd_buffer->state.uniforms.vs_bin =
+ v3dv_write_uniforms(cmd_buffer, pipeline, vs_bin_variant);
}
- cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_OCCLUSION_QUERY;
+ cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_VIEW_INDEX;
}
/* This stores command buffer state that we might be about to stomp for
@@ -4115,86 +2366,6 @@ v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,
state->meta.has_descriptor_state = false;
}
-/* FIXME: C&P from v3dx_draw. Refactor to common place? */
-static uint32_t
-v3d_hw_prim_type(enum pipe_prim_type prim_type)
-{
- switch (prim_type) {
- case PIPE_PRIM_POINTS:
- case PIPE_PRIM_LINES:
- case PIPE_PRIM_LINE_LOOP:
- case PIPE_PRIM_LINE_STRIP:
- case PIPE_PRIM_TRIANGLES:
- case PIPE_PRIM_TRIANGLE_STRIP:
- case PIPE_PRIM_TRIANGLE_FAN:
- return prim_type;
-
- case PIPE_PRIM_LINES_ADJACENCY:
- case PIPE_PRIM_LINE_STRIP_ADJACENCY:
- case PIPE_PRIM_TRIANGLES_ADJACENCY:
- case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
- return 8 + (prim_type - PIPE_PRIM_LINES_ADJACENCY);
-
- default:
- unreachable("Unsupported primitive type");
- }
-}
-
-struct v3dv_draw_info {
- uint32_t vertex_count;
- uint32_t instance_count;
- uint32_t first_vertex;
- uint32_t first_instance;
-};
-
-static void
-cmd_buffer_emit_draw(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_draw_info *info)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
- struct v3dv_pipeline *pipeline = state->gfx.pipeline;
-
- assert(pipeline);
-
- uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology);
-
- if (info->first_instance > 0) {
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(BASE_VERTEX_BASE_INSTANCE));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) {
- base.base_instance = info->first_instance;
- base.base_vertex = 0;
- }
- }
-
- if (info->instance_count > 1) {
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(VERTEX_ARRAY_INSTANCED_PRIMS));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMS, prim) {
- prim.mode = hw_prim_type;
- prim.index_of_first_vertex = info->first_vertex;
- prim.number_of_instances = info->instance_count;
- prim.instance_length = info->vertex_count;
- }
- } else {
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(VERTEX_ARRAY_PRIMS));
- v3dv_return_if_oom(cmd_buffer, NULL);
- cl_emit(&job->bcl, VERTEX_ARRAY_PRIMS, prim) {
- prim.mode = hw_prim_type;
- prim.length = info->vertex_count;
- prim.index_of_first_vertex = info->first_vertex;
- }
- }
-}
-
static struct v3dv_job *
cmd_buffer_pre_draw_split_job(struct v3dv_cmd_buffer *cmd_buffer)
{
@@ -4297,6 +2468,7 @@ cmd_buffer_restart_job_for_msaa_if_needed(struct v3dv_cmd_buffer *cmd_buffer)
old_job->frame_tiling.width,
old_job->frame_tiling.height,
old_job->frame_tiling.layers,
+ true,
old_job->frame_tiling.render_target_count,
old_job->frame_tiling.internal_bpp,
true /* msaa */);
@@ -4304,35 +2476,8 @@ cmd_buffer_restart_job_for_msaa_if_needed(struct v3dv_cmd_buffer *cmd_buffer)
v3dv_job_destroy(old_job);
}
-static void
-emit_index_buffer(struct v3dv_cmd_buffer *cmd_buffer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- /* We flag all state as dirty when we create a new job so make sure we
- * have a valid index buffer before attempting to emit state for it.
- */
- struct v3dv_buffer *ibuffer =
- v3dv_buffer_from_handle(cmd_buffer->state.index_buffer.buffer);
- if (ibuffer) {
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(INDEX_BUFFER_SETUP));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- const uint32_t offset = cmd_buffer->state.index_buffer.offset;
- cl_emit(&job->bcl, INDEX_BUFFER_SETUP, ib) {
- ib.address = v3dv_cl_address(ibuffer->mem->bo,
- ibuffer->mem_offset + offset);
- ib.size = ibuffer->mem->bo->size;
- }
- }
-
- cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_INDEX_BUFFER;
-}
-
-static void
-cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer)
+void
+v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer)
{
assert(cmd_buffer->state.gfx.pipeline);
assert(!(cmd_buffer->state.gfx.pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
@@ -4368,17 +2513,20 @@ cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer)
*dirty & (V3DV_CMD_DIRTY_PIPELINE |
V3DV_CMD_DIRTY_PUSH_CONSTANTS |
V3DV_CMD_DIRTY_DESCRIPTOR_SETS |
- V3DV_CMD_DIRTY_VIEWPORT);
+ V3DV_CMD_DIRTY_VIEWPORT |
+ V3DV_CMD_DIRTY_VIEW_INDEX);
if (dirty_uniform_state)
update_gfx_uniform_state(cmd_buffer, dirty_uniform_state);
+ struct v3dv_device *device = cmd_buffer->device;
+
if (dirty_uniform_state || (*dirty & V3DV_CMD_DIRTY_VERTEX_BUFFER))
- emit_gl_shader_state(cmd_buffer);
+ v3dv_X(device, cmd_buffer_emit_gl_shader_state)(cmd_buffer);
if (*dirty & (V3DV_CMD_DIRTY_PIPELINE)) {
- emit_configuration_bits(cmd_buffer);
- emit_varyings_state(cmd_buffer);
+ v3dv_X(device, cmd_buffer_emit_configuration_bits)(cmd_buffer);
+ v3dv_X(device, cmd_buffer_emit_varyings_state)(cmd_buffer);
}
if (*dirty & (V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR)) {
@@ -4386,46 +2534,69 @@ cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer)
}
if (*dirty & V3DV_CMD_DIRTY_VIEWPORT) {
- emit_viewport(cmd_buffer);
+ v3dv_X(device, cmd_buffer_emit_viewport)(cmd_buffer);
}
if (*dirty & V3DV_CMD_DIRTY_INDEX_BUFFER)
- emit_index_buffer(cmd_buffer);
+ v3dv_X(device, cmd_buffer_emit_index_buffer)(cmd_buffer);
const uint32_t dynamic_stencil_dirty_flags =
V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK |
V3DV_CMD_DIRTY_STENCIL_WRITE_MASK |
V3DV_CMD_DIRTY_STENCIL_REFERENCE;
if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | dynamic_stencil_dirty_flags))
- emit_stencil(cmd_buffer);
+ v3dv_X(device, cmd_buffer_emit_stencil)(cmd_buffer);
if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_DEPTH_BIAS))
- emit_depth_bias(cmd_buffer);
+ v3dv_X(device, cmd_buffer_emit_depth_bias)(cmd_buffer);
if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_BLEND_CONSTANTS))
- emit_blend(cmd_buffer);
+ v3dv_X(device, cmd_buffer_emit_blend)(cmd_buffer);
if (*dirty & V3DV_CMD_DIRTY_OCCLUSION_QUERY)
- emit_occlusion_query(cmd_buffer);
+ v3dv_X(device, cmd_buffer_emit_occlusion_query)(cmd_buffer);
if (*dirty & V3DV_CMD_DIRTY_LINE_WIDTH)
- emit_line_width(cmd_buffer);
+ v3dv_X(device, cmd_buffer_emit_line_width)(cmd_buffer);
if (*dirty & V3DV_CMD_DIRTY_PIPELINE)
- emit_sample_state(cmd_buffer);
+ v3dv_X(device, cmd_buffer_emit_sample_state)(cmd_buffer);
+
+ if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE))
+ v3dv_X(device, cmd_buffer_emit_color_write_mask)(cmd_buffer);
cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_PIPELINE;
}
+static inline void
+cmd_buffer_set_view_index(struct v3dv_cmd_buffer *cmd_buffer,
+ uint32_t view_index)
+{
+ cmd_buffer->state.view_index = view_index;
+ cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VIEW_INDEX;
+}
+
static void
cmd_buffer_draw(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_draw_info *info)
{
- cmd_buffer_emit_pre_draw(cmd_buffer);
- cmd_buffer_emit_draw(cmd_buffer, info);
+
+ struct v3dv_render_pass *pass = cmd_buffer->state.pass;
+ if (likely(!pass->multiview_enabled)) {
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw)(cmd_buffer, info);
+ return;
+ }
+
+ uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
+ while (view_mask) {
+ cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw)(cmd_buffer, info);
+ }
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdDraw(VkCommandBuffer commandBuffer,
uint32_t vertexCount,
uint32_t instanceCount,
@@ -4445,7 +2616,7 @@ v3dv_CmdDraw(VkCommandBuffer commandBuffer,
cmd_buffer_draw(cmd_buffer, &info);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdDrawIndexed(VkCommandBuffer commandBuffer,
uint32_t indexCount,
uint32_t instanceCount,
@@ -4458,56 +2629,26 @@ v3dv_CmdDrawIndexed(VkCommandBuffer commandBuffer,
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
- cmd_buffer_emit_pre_draw(cmd_buffer);
-
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
- uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology);
- uint8_t index_type = ffs(cmd_buffer->state.index_buffer.index_size) - 1;
- uint32_t index_offset = firstIndex * cmd_buffer->state.index_buffer.index_size;
-
- if (vertexOffset != 0 || firstInstance != 0) {
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(BASE_VERTEX_BASE_INSTANCE));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) {
- base.base_instance = firstInstance;
- base.base_vertex = vertexOffset;
- }
+ struct v3dv_render_pass *pass = cmd_buffer->state.pass;
+ if (likely(!pass->multiview_enabled)) {
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indexed)
+ (cmd_buffer, indexCount, instanceCount,
+ firstIndex, vertexOffset, firstInstance);
+ return;
}
- if (instanceCount == 1) {
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(INDEXED_PRIM_LIST));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, INDEXED_PRIM_LIST, prim) {
- prim.index_type = index_type;
- prim.length = indexCount;
- prim.index_offset = index_offset;
- prim.mode = hw_prim_type;
- prim.enable_primitive_restarts = pipeline->primitive_restart;
- }
- } else if (instanceCount > 1) {
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(INDEXED_INSTANCED_PRIM_LIST));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- cl_emit(&job->bcl, INDEXED_INSTANCED_PRIM_LIST, prim) {
- prim.index_type = index_type;
- prim.index_offset = index_offset;
- prim.mode = hw_prim_type;
- prim.enable_primitive_restarts = pipeline->primitive_restart;
- prim.number_of_instances = instanceCount;
- prim.instance_length = indexCount;
- }
+ uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
+ while (view_mask) {
+ cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indexed)
+ (cmd_buffer, indexCount, instanceCount,
+ firstIndex, vertexOffset, firstInstance);
}
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdDrawIndirect(VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
@@ -4521,28 +2662,24 @@ v3dv_CmdDrawIndirect(VkCommandBuffer commandBuffer,
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer);
- cmd_buffer_emit_pre_draw(cmd_buffer);
-
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
- uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology);
-
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS));
- v3dv_return_if_oom(cmd_buffer, NULL);
+ struct v3dv_render_pass *pass = cmd_buffer->state.pass;
+ if (likely(!pass->multiview_enabled)) {
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indirect)
+ (cmd_buffer, buffer, offset, drawCount, stride);
+ return;
+ }
- cl_emit(&job->bcl, INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS, prim) {
- prim.mode = hw_prim_type;
- prim.number_of_draw_indirect_array_records = drawCount;
- prim.stride_in_multiples_of_4_bytes = stride >> 2;
- prim.address = v3dv_cl_address(buffer->mem->bo,
- buffer->mem_offset + offset);
+ uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
+ while (view_mask) {
+ cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indirect)
+ (cmd_buffer, buffer, offset, drawCount, stride);
}
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
@@ -4556,31 +2693,24 @@ v3dv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer);
- cmd_buffer_emit_pre_draw(cmd_buffer);
-
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
- uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology);
- uint8_t index_type = ffs(cmd_buffer->state.index_buffer.index_size) - 1;
-
- v3dv_cl_ensure_space_with_branch(
- &job->bcl, cl_packet_length(INDIRECT_INDEXED_INSTANCED_PRIM_LIST));
- v3dv_return_if_oom(cmd_buffer, NULL);
+ struct v3dv_render_pass *pass = cmd_buffer->state.pass;
+ if (likely(!pass->multiview_enabled)) {
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_indexed_indirect)
+ (cmd_buffer, buffer, offset, drawCount, stride);
+ return;
+ }
- cl_emit(&job->bcl, INDIRECT_INDEXED_INSTANCED_PRIM_LIST, prim) {
- prim.index_type = index_type;
- prim.mode = hw_prim_type;
- prim.enable_primitive_restarts = pipeline->primitive_restart;
- prim.number_of_draw_indirect_indexed_records = drawCount;
- prim.stride_in_multiples_of_4_bytes = stride >> 2;
- prim.address = v3dv_cl_address(buffer->mem->bo,
- buffer->mem_offset + offset);
+ uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask;
+ while (view_mask) {
+ cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask));
+ v3dv_cmd_buffer_emit_pre_draw(cmd_buffer);
+ v3dv_X(cmd_buffer->device, cmd_buffer_emit_indexed_indirect)
+ (cmd_buffer, buffer, offset, drawCount, stride);
}
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdPipelineBarrier(VkCommandBuffer commandBuffer,
VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags dstStageMask,
@@ -4616,7 +2746,7 @@ v3dv_CmdPipelineBarrier(VkCommandBuffer commandBuffer,
}
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
uint32_t firstBinding,
uint32_t bindingCount,
@@ -4651,6 +2781,9 @@ static uint32_t
get_index_size(VkIndexType index_type)
{
switch (index_type) {
+ case VK_INDEX_TYPE_UINT8_EXT:
+ return 1;
+ break;
case VK_INDEX_TYPE_UINT16:
return 2;
break;
@@ -4662,7 +2795,7 @@ get_index_size(VkIndexType index_type)
}
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
VkBuffer buffer,
VkDeviceSize offset,
@@ -4683,7 +2816,7 @@ v3dv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_INDEX_BUFFER;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
VkStencilFaceFlags faceMask,
uint32_t compareMask)
@@ -4698,7 +2831,7 @@ v3dv_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
VkStencilFaceFlags faceMask,
uint32_t writeMask)
@@ -4713,7 +2846,7 @@ v3dv_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_STENCIL_WRITE_MASK;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetStencilReference(VkCommandBuffer commandBuffer,
VkStencilFaceFlags faceMask,
uint32_t reference)
@@ -4728,7 +2861,7 @@ v3dv_CmdSetStencilReference(VkCommandBuffer commandBuffer,
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_STENCIL_REFERENCE;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetDepthBias(VkCommandBuffer commandBuffer,
float depthBiasConstantFactor,
float depthBiasClamp,
@@ -4742,7 +2875,7 @@ v3dv_CmdSetDepthBias(VkCommandBuffer commandBuffer,
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_DEPTH_BIAS;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
float minDepthBounds,
float maxDepthBounds)
@@ -4752,7 +2885,7 @@ v3dv_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
*/
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetLineWidth(VkCommandBuffer commandBuffer,
float lineWidth)
{
@@ -4762,7 +2895,7 @@ v3dv_CmdSetLineWidth(VkCommandBuffer commandBuffer,
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_LINE_WIDTH;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
VkPipelineLayout _layout,
@@ -4784,18 +2917,16 @@ v3dv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
&cmd_buffer->state.compute.descriptor_state :
&cmd_buffer->state.gfx.descriptor_state;
+ VkShaderStageFlags dirty_stages = 0;
bool descriptor_state_changed = false;
for (uint32_t i = 0; i < descriptorSetCount; i++) {
V3DV_FROM_HANDLE(v3dv_descriptor_set, set, pDescriptorSets[i]);
uint32_t index = firstSet + i;
+ descriptor_state->valid |= (1u << index);
if (descriptor_state->descriptor_sets[index] != set) {
descriptor_state->descriptor_sets[index] = set;
- descriptor_state_changed = true;
- }
-
- if (!(descriptor_state->valid & (1u << index))) {
- descriptor_state->valid |= (1u << index);
+ dirty_stages |= set->layout->shader_stages;
descriptor_state_changed = true;
}
@@ -4804,20 +2935,24 @@ v3dv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
if (descriptor_state->dynamic_offsets[idx] != pDynamicOffsets[dyn_index]) {
descriptor_state->dynamic_offsets[idx] = pDynamicOffsets[dyn_index];
+ dirty_stages |= set->layout->shader_stages;
descriptor_state_changed = true;
}
}
}
if (descriptor_state_changed) {
- if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
+ if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_DESCRIPTOR_SETS;
- else
+ cmd_buffer->state.dirty_descriptor_stages |= dirty_stages & VK_SHADER_STAGE_ALL_GRAPHICS;
+ } else {
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS;
+ cmd_buffer->state.dirty_descriptor_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
+ }
}
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdPushConstants(VkCommandBuffer commandBuffer,
VkPipelineLayout layout,
VkShaderStageFlags stageFlags,
@@ -4833,9 +2968,10 @@ v3dv_CmdPushConstants(VkCommandBuffer commandBuffer,
memcpy((uint8_t *) cmd_buffer->push_constants_data + offset, pValues, size);
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_PUSH_CONSTANTS;
+ cmd_buffer->state.dirty_push_constants_stages |= stageFlags;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
const float blendConstants[4])
{
@@ -4853,6 +2989,26 @@ v3dv_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_BLEND_CONSTANTS;
}
+VKAPI_ATTR void VKAPI_CALL
+v3dv_CmdSetColorWriteEnableEXT(VkCommandBuffer commandBuffer,
+ uint32_t attachmentCount,
+ const VkBool32 *pColorWriteEnables)
+{
+ V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
+ uint32_t color_write_enable = 0;
+
+ for (uint32_t i = 0; i < attachmentCount; i++)
+ color_write_enable |= pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0;
+
+ if (state->dynamic.color_write_enable == color_write_enable)
+ return;
+
+ state->dynamic.color_write_enable = color_write_enable;
+
+ state->dirty |= V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE;
+}
+
void
v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_query_pool *pool,
@@ -4881,12 +3037,12 @@ v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer,
list_addtail(&job->list_link, &cmd_buffer->jobs);
}
-static void
-ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
- uint32_t slot_size,
- uint32_t used_count,
- uint32_t *alloc_count,
- void **ptr)
+void
+v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
+ uint32_t slot_size,
+ uint32_t used_count,
+ uint32_t *alloc_count,
+ void **ptr)
{
if (used_count >= *alloc_count) {
const uint32_t prev_slot_count = *alloc_count;
@@ -4915,10 +3071,11 @@ v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer,
VkQueryControlFlags flags)
{
/* FIXME: we only support one active query for now */
- assert(cmd_buffer->state.query.active_query == NULL);
+ assert(cmd_buffer->state.query.active_query.bo == NULL);
assert(query < pool->query_count);
- cmd_buffer->state.query.active_query = pool->queries[query].bo;
+ cmd_buffer->state.query.active_query.bo = pool->queries[query].bo;
+ cmd_buffer->state.query.active_query.offset = pool->queries[query].offset;
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_OCCLUSION_QUERY;
}
@@ -4928,7 +3085,7 @@ v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t query)
{
assert(query < pool->query_count);
- assert(cmd_buffer->state.query.active_query != NULL);
+ assert(cmd_buffer->state.query.active_query.bo != NULL);
if (cmd_buffer->state.pass) {
/* Queue the EndQuery in the command buffer state, we will create a CPU
@@ -4936,11 +3093,11 @@ v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
* render pass job in which they have been recorded.
*/
struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
- ensure_array_state(cmd_buffer,
- sizeof(struct v3dv_end_query_cpu_job_info),
- state->query.end.used_count,
- &state->query.end.alloc_count,
- (void **) &state->query.end.states);
+ v3dv_cmd_buffer_ensure_array_state(cmd_buffer,
+ sizeof(struct v3dv_end_query_cpu_job_info),
+ state->query.end.used_count,
+ &state->query.end.alloc_count,
+ (void **) &state->query.end.states);
v3dv_return_if_oom(cmd_buffer, NULL);
struct v3dv_end_query_cpu_job_info *info =
@@ -4948,6 +3105,27 @@ v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
info->pool = pool;
info->query = query;
+
+ /* From the Vulkan spec:
+ *
+ * "If queries are used while executing a render pass instance that has
+ * multiview enabled, the query uses N consecutive query indices in
+ * the query pool (starting at query) where N is the number of bits set
+ * in the view mask in the subpass the query is used in. How the
+ * numerical results of the query are distributed among the queries is
+ * implementation-dependent."
+ *
+ * In our case, only the first query is used but this means we still need
+ * to flag the other queries as available so we don't emit errors when
+ * the applications attempt to retrive values from them.
+ */
+ struct v3dv_render_pass *pass = cmd_buffer->state.pass;
+ if (!pass->multiview_enabled) {
+ info->count = 1;
+ } else {
+ struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx];
+ info->count = util_bitcount(subpass->view_mask);
+ }
} else {
/* Otherwise, schedule the CPU job immediately */
struct v3dv_job *job =
@@ -4958,10 +3136,14 @@ v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
job->cpu.query_end.pool = pool;
job->cpu.query_end.query = query;
+
+ /* Multiview queries cannot cross subpass boundaries */
+ job->cpu.query_end.count = 1;
+
list_addtail(&job->list_link, &cmd_buffer->jobs);
}
- cmd_buffer->state.query.active_query = NULL;
+ cmd_buffer->state.query.active_query.bo = NULL;
cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_OCCLUSION_QUERY;
}
@@ -5019,7 +3201,7 @@ v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
list_addtail(&job->list_link, &cmd_buffer->jobs);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdSetEvent(VkCommandBuffer commandBuffer,
VkEvent _event,
VkPipelineStageFlags stageMask)
@@ -5045,7 +3227,7 @@ v3dv_CmdSetEvent(VkCommandBuffer commandBuffer,
list_addtail(&job->list_link, &cmd_buffer->jobs);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdResetEvent(VkCommandBuffer commandBuffer,
VkEvent _event,
VkPipelineStageFlags stageMask)
@@ -5071,7 +3253,7 @@ v3dv_CmdResetEvent(VkCommandBuffer commandBuffer,
list_addtail(&job->list_link, &cmd_buffer->jobs);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdWaitEvents(VkCommandBuffer commandBuffer,
uint32_t eventCount,
const VkEvent *pEvents,
@@ -5124,7 +3306,7 @@ v3dv_CmdWaitEvents(VkCommandBuffer commandBuffer,
list_addtail(&job->list_link, &cmd_buffer->jobs);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdWriteTimestamp(VkCommandBuffer commandBuffer,
VkPipelineStageFlagBits pipelineStage,
VkQueryPool queryPool,
@@ -5136,7 +3318,8 @@ v3dv_CmdWriteTimestamp(VkCommandBuffer commandBuffer,
/* If this is called inside a render pass we need to finish the current
* job here...
*/
- if (cmd_buffer->state.pass)
+ struct v3dv_render_pass *pass = cmd_buffer->state.pass;
+ if (pass)
v3dv_cmd_buffer_finish_job(cmd_buffer);
struct v3dv_job *job =
@@ -5148,6 +3331,14 @@ v3dv_CmdWriteTimestamp(VkCommandBuffer commandBuffer,
job->cpu.query_timestamp.pool = query_pool;
job->cpu.query_timestamp.query = query;
+ if (!pass || !pass->multiview_enabled) {
+ job->cpu.query_timestamp.count = 1;
+ } else {
+ struct v3dv_subpass *subpass =
+ &pass->subpasses[cmd_buffer->state.subpass_idx];
+ job->cpu.query_timestamp.count = util_bitcount(subpass->view_mask);
+ }
+
list_addtail(&job->list_link, &cmd_buffer->jobs);
cmd_buffer->state.job = NULL;
@@ -5163,9 +3354,10 @@ cmd_buffer_emit_pre_dispatch(struct v3dv_cmd_buffer *cmd_buffer)
assert(cmd_buffer->state.compute.pipeline->active_stages ==
VK_SHADER_STAGE_COMPUTE_BIT);
- uint32_t *dirty = &cmd_buffer->state.dirty;
- *dirty &= ~(V3DV_CMD_DIRTY_COMPUTE_PIPELINE |
- V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS);
+ cmd_buffer->state.dirty &= ~(V3DV_CMD_DIRTY_COMPUTE_PIPELINE |
+ V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS);
+ cmd_buffer->state.dirty_descriptor_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT;
+ cmd_buffer->state.dirty_push_constants_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT;
}
#define V3D_CSD_CFG012_WG_COUNT_SHIFT 16
@@ -5230,6 +3422,9 @@ v3dv_cmd_buffer_rewrite_indirect_csd_job(
static struct v3dv_job *
cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
+ uint32_t base_offset_x,
+ uint32_t base_offset_y,
+ uint32_t base_offset_z,
uint32_t group_count_x,
uint32_t group_count_y,
uint32_t group_count_z,
@@ -5258,6 +3453,10 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
job->csd.wg_count[1] = group_count_y;
job->csd.wg_count[2] = group_count_z;
+ job->csd.wg_base[0] = base_offset_x;
+ job->csd.wg_base[1] = base_offset_y;
+ job->csd.wg_base[2] = base_offset_z;
+
submit->cfg[0] |= group_count_x << V3D_CSD_CFG012_WG_COUNT_SHIFT;
submit->cfg[1] |= group_count_y << V3D_CSD_CFG012_WG_COUNT_SHIFT;
submit->cfg[2] |= group_count_z << V3D_CSD_CFG012_WG_COUNT_SHIFT;
@@ -5265,20 +3464,32 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
const struct v3d_compute_prog_data *cpd =
cs_variant->prog_data.cs;
- const uint32_t wgs_per_sg = 1; /* FIXME */
+ const uint32_t num_wgs = group_count_x * group_count_y * group_count_z;
const uint32_t wg_size = cpd->local_size[0] *
cpd->local_size[1] *
cpd->local_size[2];
- submit->cfg[3] |= wgs_per_sg << V3D_CSD_CFG3_WGS_PER_SG_SHIFT;
- submit->cfg[3] |= ((DIV_ROUND_UP(wgs_per_sg * wg_size, 16) - 1) <<
- V3D_CSD_CFG3_BATCHES_PER_SG_M1_SHIFT);
+
+ uint32_t wgs_per_sg =
+ v3d_csd_choose_workgroups_per_supergroup(
+ &cmd_buffer->device->devinfo,
+ cs_variant->prog_data.cs->has_subgroups,
+ cs_variant->prog_data.cs->base.has_control_barrier,
+ cs_variant->prog_data.cs->base.threads,
+ num_wgs, wg_size);
+
+ uint32_t batches_per_sg = DIV_ROUND_UP(wgs_per_sg * wg_size, 16);
+ uint32_t whole_sgs = num_wgs / wgs_per_sg;
+ uint32_t rem_wgs = num_wgs - whole_sgs * wgs_per_sg;
+ uint32_t num_batches = batches_per_sg * whole_sgs +
+ DIV_ROUND_UP(rem_wgs * wg_size, 16);
+
+ submit->cfg[3] |= (wgs_per_sg & 0xf) << V3D_CSD_CFG3_WGS_PER_SG_SHIFT;
+ submit->cfg[3] |= (batches_per_sg - 1) << V3D_CSD_CFG3_BATCHES_PER_SG_M1_SHIFT;
submit->cfg[3] |= (wg_size & 0xff) << V3D_CSD_CFG3_WG_SIZE_SHIFT;
if (wg_size_out)
*wg_size_out = wg_size;
- uint32_t batches_per_wg = DIV_ROUND_UP(wg_size, 16);
- submit->cfg[4] = batches_per_wg *
- (group_count_x * group_count_y * group_count_z) - 1;
+ submit->cfg[4] = num_batches - 1;
assert(submit->cfg[4] != ~0);
assert(pipeline->shared_data->assembly_bo);
@@ -5302,7 +3513,7 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
}
}
- v3dv_job_add_bo(job, cs_assembly_bo);
+ v3dv_job_add_bo_unchecked(job, cs_assembly_bo);
struct v3dv_cl_reloc uniforms =
v3dv_write_uniforms_wg_offsets(cmd_buffer, pipeline,
cs_variant,
@@ -5316,6 +3527,9 @@ cmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer,
static void
cmd_buffer_dispatch(struct v3dv_cmd_buffer *cmd_buffer,
+ uint32_t base_offset_x,
+ uint32_t base_offset_y,
+ uint32_t base_offset_z,
uint32_t group_count_x,
uint32_t group_count_y,
uint32_t group_count_z)
@@ -5325,6 +3539,9 @@ cmd_buffer_dispatch(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_job *job =
cmd_buffer_create_csd_job(cmd_buffer,
+ base_offset_x,
+ base_offset_y,
+ base_offset_z,
group_count_x,
group_count_y,
group_count_z,
@@ -5334,7 +3551,7 @@ cmd_buffer_dispatch(struct v3dv_cmd_buffer *cmd_buffer,
cmd_buffer->state.job = NULL;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdDispatch(VkCommandBuffer commandBuffer,
uint32_t groupCountX,
uint32_t groupCountY,
@@ -5343,9 +3560,28 @@ v3dv_CmdDispatch(VkCommandBuffer commandBuffer,
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
cmd_buffer_emit_pre_dispatch(cmd_buffer);
- cmd_buffer_dispatch(cmd_buffer, groupCountX, groupCountY, groupCountZ);
+ cmd_buffer_dispatch(cmd_buffer, 0, 0, 0,
+ groupCountX, groupCountY, groupCountZ);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+v3dv_CmdDispatchBase(VkCommandBuffer commandBuffer,
+ uint32_t baseGroupX,
+ uint32_t baseGroupY,
+ uint32_t baseGroupZ,
+ uint32_t groupCountX,
+ uint32_t groupCountY,
+ uint32_t groupCountZ)
+{
+ V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ cmd_buffer_emit_pre_dispatch(cmd_buffer);
+ cmd_buffer_dispatch(cmd_buffer,
+ baseGroupX, baseGroupY, baseGroupZ,
+ groupCountX, groupCountY, groupCountZ);
}
+
static void
cmd_buffer_dispatch_indirect(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_buffer *buffer,
@@ -5370,6 +3606,7 @@ cmd_buffer_dispatch_indirect(struct v3dv_cmd_buffer *cmd_buffer,
*/
struct v3dv_job *csd_job =
cmd_buffer_create_csd_job(cmd_buffer,
+ 0, 0, 0,
1, 1, 1,
&job->cpu.csd_indirect.wg_uniform_offsets[0],
&job->cpu.csd_indirect.wg_size);
@@ -5392,7 +3629,7 @@ cmd_buffer_dispatch_indirect(struct v3dv_cmd_buffer *cmd_buffer,
cmd_buffer->state.job = NULL;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset)
@@ -5405,3 +3642,10 @@ v3dv_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
cmd_buffer_emit_pre_dispatch(cmd_buffer);
cmd_buffer_dispatch_indirect(cmd_buffer, buffer, offset);
}
+
+VKAPI_ATTR void VKAPI_CALL
+v3dv_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask)
+{
+ /* Nothing to do here since we only support a single device */
+ assert(deviceMask == 0x1);
+}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_descriptor_set.c b/lib/mesa/src/broadcom/vulkan/v3dv_descriptor_set.c
index 3487d701a..14a93cea4 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_descriptor_set.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_descriptor_set.c
@@ -27,42 +27,20 @@
#include "v3dv_private.h"
/*
- * Returns how much space a given descriptor type needs on a bo (GPU
- * memory).
- */
-static uint32_t
-descriptor_bo_size(VkDescriptorType type)
-{
- switch(type) {
- case VK_DESCRIPTOR_TYPE_SAMPLER:
- return sizeof(struct v3dv_sampler_descriptor);
- case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- return sizeof(struct v3dv_combined_image_sampler_descriptor);
- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
- case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- return sizeof(struct v3dv_sampled_image_descriptor);
- default:
- return 0;
- }
-}
-
-/*
* For a given descriptor defined by the descriptor_set it belongs, its
* binding layout, and array_index, it returns the map region assigned to it
* from the descriptor pool bo.
*/
static void*
-descriptor_bo_map(struct v3dv_descriptor_set *set,
+descriptor_bo_map(struct v3dv_device *device,
+ struct v3dv_descriptor_set *set,
const struct v3dv_descriptor_set_binding_layout *binding_layout,
uint32_t array_index)
{
- assert(descriptor_bo_size(binding_layout->type) > 0);
+ assert(v3dv_X(device, descriptor_bo_size)(binding_layout->type) > 0);
return set->pool->bo->map +
set->base_offset + binding_layout->descriptor_offset +
- array_index * descriptor_bo_size(binding_layout->type);
+ array_index * v3dv_X(device, descriptor_bo_size)(binding_layout->type);
}
static bool
@@ -125,7 +103,8 @@ v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_stat
* validation or adding extra offsets if the bo contains more that one field.
*/
static struct v3dv_cl_reloc
-v3dv_descriptor_map_get_descriptor_bo(struct v3dv_descriptor_state *descriptor_state,
+v3dv_descriptor_map_get_descriptor_bo(struct v3dv_device *device,
+ struct v3dv_descriptor_state *descriptor_state,
struct v3dv_descriptor_map *map,
struct v3dv_pipeline_layout *pipeline_layout,
uint32_t index,
@@ -146,7 +125,7 @@ v3dv_descriptor_map_get_descriptor_bo(struct v3dv_descriptor_state *descriptor_s
const struct v3dv_descriptor_set_binding_layout *binding_layout =
&set->layout->binding[binding_number];
- assert(descriptor_bo_size(binding_layout->type) > 0);
+ assert(v3dv_X(device, descriptor_bo_size)(binding_layout->type) > 0);
*out_type = binding_layout->type;
uint32_t array_index = map->array_index[index];
@@ -155,7 +134,7 @@ v3dv_descriptor_map_get_descriptor_bo(struct v3dv_descriptor_state *descriptor_s
struct v3dv_cl_reloc reloc = {
.bo = set->pool->bo,
.offset = set->base_offset + binding_layout->descriptor_offset +
- array_index * descriptor_bo_size(binding_layout->type),
+ array_index * v3dv_X(device, descriptor_bo_size)(binding_layout->type),
};
return reloc;
@@ -218,24 +197,23 @@ v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
struct v3dv_cl_reloc
-v3dv_descriptor_map_get_sampler_state(struct v3dv_descriptor_state *descriptor_state,
+v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device,
+ struct v3dv_descriptor_state *descriptor_state,
struct v3dv_descriptor_map *map,
struct v3dv_pipeline_layout *pipeline_layout,
uint32_t index)
{
VkDescriptorType type;
struct v3dv_cl_reloc reloc =
- v3dv_descriptor_map_get_descriptor_bo(descriptor_state, map,
+ v3dv_descriptor_map_get_descriptor_bo(device, descriptor_state, map,
pipeline_layout,
index, &type);
assert(type == VK_DESCRIPTOR_TYPE_SAMPLER ||
type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
- if (type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
- reloc.offset += offsetof(struct v3dv_combined_image_sampler_descriptor,
- sampler_state);
- }
+ if (type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+ reloc.offset += v3dv_X(device, combined_image_sampler_sampler_state_offset)();
return reloc;
}
@@ -262,7 +240,7 @@ v3dv_descriptor_map_get_texture_format(struct v3dv_descriptor_state *descriptor_
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
assert(descriptor->image_view);
- *out_vk_format = descriptor->image_view->vk_format;
+ *out_vk_format = descriptor->image_view->vk.format;
return descriptor->image_view->format;
default:
unreachable("descriptor type doesn't has a texture format");
@@ -288,23 +266,28 @@ v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_stat
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: {
assert(descriptor->image_view);
- return descriptor->image_view->image->mem->bo;
+ struct v3dv_image *image =
+ (struct v3dv_image *) descriptor->image_view->vk.image;
+ return image->mem->bo;
+ }
default:
unreachable("descriptor type doesn't has a texture bo");
}
}
struct v3dv_cl_reloc
-v3dv_descriptor_map_get_texture_shader_state(struct v3dv_descriptor_state *descriptor_state,
+v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device,
+ struct v3dv_descriptor_state *descriptor_state,
struct v3dv_descriptor_map *map,
struct v3dv_pipeline_layout *pipeline_layout,
uint32_t index)
{
VkDescriptorType type;
struct v3dv_cl_reloc reloc =
- v3dv_descriptor_map_get_descriptor_bo(descriptor_state, map,
+ v3dv_descriptor_map_get_descriptor_bo(device,
+ descriptor_state, map,
pipeline_layout,
index, &type);
@@ -315,10 +298,8 @@ v3dv_descriptor_map_get_texture_shader_state(struct v3dv_descriptor_state *descr
type == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER ||
type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
- if (type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
- reloc.offset += offsetof(struct v3dv_combined_image_sampler_descriptor,
- texture_state);
- }
+ if (type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+ reloc.offset += v3dv_X(device, combined_image_sampler_texture_state_offset)();
return reloc;
}
@@ -330,7 +311,7 @@ v3dv_descriptor_map_get_texture_shader_state(struct v3dv_descriptor_state *descr
* just multiple descriptor set layouts pasted together."
*/
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreatePipelineLayout(VkDevice _device,
const VkPipelineLayoutCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
@@ -345,7 +326,7 @@ v3dv_CreatePipelineLayout(VkDevice _device,
layout = vk_object_zalloc(&device->vk, pAllocator, sizeof(*layout),
VK_OBJECT_TYPE_PIPELINE_LAYOUT);
if (layout == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
layout->num_sets = pCreateInfo->setLayoutCount;
@@ -380,7 +361,7 @@ v3dv_CreatePipelineLayout(VkDevice _device,
return VK_SUCCESS;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyPipelineLayout(VkDevice _device,
VkPipelineLayout _pipelineLayout,
const VkAllocationCallbacks *pAllocator)
@@ -393,7 +374,7 @@ v3dv_DestroyPipelineLayout(VkDevice _device,
vk_object_free(&device->vk, pAllocator, pipeline_layout);
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateDescriptorPool(VkDevice _device,
const VkDescriptorPoolCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
@@ -435,7 +416,7 @@ v3dv_CreateDescriptorPool(VkDevice _device,
assert(pCreateInfo->pPoolSizes[i].descriptorCount > 0);
descriptor_count += pCreateInfo->pPoolSizes[i].descriptorCount;
- bo_size += descriptor_bo_size(pCreateInfo->pPoolSizes[i].type) *
+ bo_size += v3dv_X(device, descriptor_bo_size)(pCreateInfo->pPoolSizes[i].type) *
pCreateInfo->pPoolSizes[i].descriptorCount;
}
@@ -452,7 +433,7 @@ v3dv_CreateDescriptorPool(VkDevice _device,
VK_OBJECT_TYPE_DESCRIPTOR_POOL);
if (!pool)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
pool->host_memory_base = (uint8_t*)pool + sizeof(struct v3dv_descriptor_pool);
@@ -482,7 +463,7 @@ v3dv_CreateDescriptorPool(VkDevice _device,
out_of_device_memory:
vk_object_free(&device->vk, pAllocator, pool);
- return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
static void
@@ -506,7 +487,7 @@ descriptor_set_destroy(struct v3dv_device *device,
vk_object_free(&device->vk, NULL, set);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyDescriptorPool(VkDevice _device,
VkDescriptorPool _pool,
const VkAllocationCallbacks *pAllocator)
@@ -531,7 +512,7 @@ v3dv_DestroyDescriptorPool(VkDevice _device,
vk_object_free(&device->vk, pAllocator, pool);
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_ResetDescriptorPool(VkDevice _device,
VkDescriptorPool descriptorPool,
VkDescriptorPoolResetFlags flags)
@@ -558,7 +539,7 @@ v3dv_ResetDescriptorPool(VkDevice _device,
return VK_SUCCESS;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateDescriptorSetLayout(VkDevice _device,
const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
@@ -602,7 +583,7 @@ v3dv_CreateDescriptorSetLayout(VkDevice _device,
VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT);
if (!set_layout)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
/* We just allocate all the immutable samplers at the end of the struct */
struct v3dv_sampler *samplers = (void*) &set_layout->binding[num_bindings];
@@ -614,7 +595,7 @@ v3dv_CreateDescriptorSetLayout(VkDevice _device,
pCreateInfo->bindingCount, &bindings);
if (result != VK_SUCCESS) {
vk_object_free(&device->vk, pAllocator, set_layout);
- return vk_error(device->instance, result);
+ return vk_error(device, result);
}
memset(set_layout->binding, 0,
@@ -680,7 +661,7 @@ v3dv_CreateDescriptorSetLayout(VkDevice _device,
set_layout->binding[binding_number].descriptor_offset = set_layout->bo_size;
set_layout->bo_size +=
- descriptor_bo_size(set_layout->binding[binding_number].type) *
+ v3dv_X(device, descriptor_bo_size)(set_layout->binding[binding_number].type) *
binding->descriptorCount;
}
@@ -694,7 +675,7 @@ v3dv_CreateDescriptorSetLayout(VkDevice _device,
return VK_SUCCESS;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyDescriptorSetLayout(VkDevice _device,
VkDescriptorSetLayout _set_layout,
const VkAllocationCallbacks *pAllocator)
@@ -716,7 +697,7 @@ out_of_pool_memory(const struct v3dv_device *device,
* by allocating a new pool, so they don't point to real issues.
*/
if (!pool->is_driver_internal)
- return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY)
+ return vk_error(device, VK_ERROR_OUT_OF_POOL_MEMORY);
else
return VK_ERROR_OUT_OF_POOL_MEMORY;
}
@@ -745,7 +726,7 @@ descriptor_set_create(struct v3dv_device *device,
VK_OBJECT_TYPE_DESCRIPTOR_SET);
if (!set)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
set->pool = pool;
@@ -818,15 +799,14 @@ descriptor_set_create(struct v3dv_device *device,
for (uint32_t i = 0; i < layout->binding[b].array_size; i++) {
uint32_t combined_offset =
layout->binding[b].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ?
- offsetof(struct v3dv_combined_image_sampler_descriptor, sampler_state) :
- 0;
+ v3dv_X(device, combined_image_sampler_sampler_state_offset)() : 0;
- void *desc_map = descriptor_bo_map(set, &layout->binding[b], i);
+ void *desc_map = descriptor_bo_map(device, set, &layout->binding[b], i);
desc_map += combined_offset;
memcpy(desc_map,
samplers[i].sampler_state,
- cl_packet_length(SAMPLER_STATE));
+ sizeof(samplers[i].sampler_state));
}
}
@@ -835,7 +815,7 @@ descriptor_set_create(struct v3dv_device *device,
return VK_SUCCESS;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_AllocateDescriptorSets(VkDevice _device,
const VkDescriptorSetAllocateInfo *pAllocateInfo,
VkDescriptorSet *pDescriptorSets)
@@ -869,7 +849,7 @@ v3dv_AllocateDescriptorSets(VkDevice _device,
return result;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_FreeDescriptorSets(VkDevice _device,
VkDescriptorPool descriptorPool,
uint32_t count,
@@ -888,7 +868,8 @@ v3dv_FreeDescriptorSets(VkDevice _device,
}
static void
-descriptor_bo_copy(struct v3dv_descriptor_set *dst_set,
+descriptor_bo_copy(struct v3dv_device *device,
+ struct v3dv_descriptor_set *dst_set,
const struct v3dv_descriptor_set_binding_layout *dst_binding_layout,
uint32_t dst_array_index,
struct v3dv_descriptor_set *src_set,
@@ -897,31 +878,55 @@ descriptor_bo_copy(struct v3dv_descriptor_set *dst_set,
{
assert(dst_binding_layout->type == src_binding_layout->type);
- void *dst_map = descriptor_bo_map(dst_set, dst_binding_layout, dst_array_index);
- void *src_map = descriptor_bo_map(src_set, src_binding_layout, src_array_index);
+ void *dst_map = descriptor_bo_map(device, dst_set, dst_binding_layout, dst_array_index);
+ void *src_map = descriptor_bo_map(device, src_set, src_binding_layout, src_array_index);
+
+ memcpy(dst_map, src_map, v3dv_X(device, descriptor_bo_size)(src_binding_layout->type));
+}
+
+static void
+write_buffer_descriptor(struct v3dv_descriptor *descriptor,
+ VkDescriptorType desc_type,
+ const VkDescriptorBufferInfo *buffer_info)
+{
+ V3DV_FROM_HANDLE(v3dv_buffer, buffer, buffer_info->buffer);
- memcpy(dst_map, src_map, descriptor_bo_size(src_binding_layout->type));
+ descriptor->type = desc_type;
+ descriptor->buffer = buffer;
+ descriptor->offset = buffer_info->offset;
+ if (buffer_info->range == VK_WHOLE_SIZE) {
+ descriptor->range = buffer->size - buffer_info->offset;
+ } else {
+ assert(descriptor->range <= UINT32_MAX);
+ descriptor->range = buffer_info->range;
+ }
}
static void
-write_image_descriptor(VkDescriptorType desc_type,
+write_image_descriptor(struct v3dv_device *device,
+ struct v3dv_descriptor *descriptor,
+ VkDescriptorType desc_type,
struct v3dv_descriptor_set *set,
const struct v3dv_descriptor_set_binding_layout *binding_layout,
struct v3dv_image_view *iview,
struct v3dv_sampler *sampler,
uint32_t array_index)
{
- void *desc_map = descriptor_bo_map(set, binding_layout, array_index);
+ descriptor->type = desc_type;
+ descriptor->sampler = sampler;
+ descriptor->image_view = iview;
+
+ void *desc_map = descriptor_bo_map(device, set,
+ binding_layout, array_index);
if (iview) {
const uint32_t tex_state_index =
- iview->type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY ||
+ iview->vk.view_type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY ||
desc_type != VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ? 0 : 1;
memcpy(desc_map,
iview->texture_shader_state[tex_state_index],
sizeof(iview->texture_shader_state[0]));
- desc_map += offsetof(struct v3dv_combined_image_sampler_descriptor,
- sampler_state);
+ desc_map += v3dv_X(device, combined_image_sampler_sampler_state_offset)();
}
if (sampler && !binding_layout->immutable_samplers_offset) {
@@ -936,28 +941,33 @@ write_image_descriptor(VkDescriptorType desc_type,
static void
-write_buffer_view_descriptor(VkDescriptorType desc_type,
+write_buffer_view_descriptor(struct v3dv_device *device,
+ struct v3dv_descriptor *descriptor,
+ VkDescriptorType desc_type,
struct v3dv_descriptor_set *set,
const struct v3dv_descriptor_set_binding_layout *binding_layout,
struct v3dv_buffer_view *bview,
uint32_t array_index)
{
- void *desc_map = descriptor_bo_map(set, binding_layout, array_index);
-
assert(bview);
+ descriptor->type = desc_type;
+ descriptor->buffer_view = bview;
+
+ void *desc_map = descriptor_bo_map(device, set, binding_layout, array_index);
memcpy(desc_map,
bview->texture_shader_state,
sizeof(bview->texture_shader_state));
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_UpdateDescriptorSets(VkDevice _device,
uint32_t descriptorWriteCount,
const VkWriteDescriptorSet *pDescriptorWrites,
uint32_t descriptorCopyCount,
const VkCopyDescriptorSet *pDescriptorCopies)
{
+ V3DV_FROM_HANDLE(v3dv_device, device, _device);
for (uint32_t i = 0; i < descriptorWriteCount; i++) {
const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i];
V3DV_FROM_HANDLE(v3dv_descriptor_set, set, writeset->dstSet);
@@ -971,8 +981,6 @@ v3dv_UpdateDescriptorSets(VkDevice _device,
descriptor += writeset->dstArrayElement;
for (uint32_t j = 0; j < writeset->descriptorCount; ++j) {
- descriptor->type = writeset->descriptorType;
-
switch(writeset->descriptorType) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -980,16 +988,8 @@ v3dv_UpdateDescriptorSets(VkDevice _device,
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
const VkDescriptorBufferInfo *buffer_info = writeset->pBufferInfo + j;
- V3DV_FROM_HANDLE(v3dv_buffer, buffer, buffer_info->buffer);
-
- descriptor->buffer = buffer;
- descriptor->offset = buffer_info->offset;
- if (buffer_info->range == VK_WHOLE_SIZE) {
- descriptor->range = buffer->size - buffer_info->offset;
- } else {
- assert(descriptor->range <= UINT32_MAX);
- descriptor->range = buffer_info->range;
- }
+ write_buffer_descriptor(descriptor, writeset->descriptorType,
+ buffer_info);
break;
}
case VK_DESCRIPTOR_TYPE_SAMPLER: {
@@ -999,10 +999,7 @@ v3dv_UpdateDescriptorSets(VkDevice _device,
*/
const VkDescriptorImageInfo *image_info = writeset->pImageInfo + j;
V3DV_FROM_HANDLE(v3dv_sampler, sampler, image_info->sampler);
-
- descriptor->sampler = sampler;
-
- write_image_descriptor(writeset->descriptorType,
+ write_image_descriptor(device, descriptor, writeset->descriptorType,
set, binding_layout, NULL, sampler,
writeset->dstArrayElement + j);
@@ -1013,10 +1010,7 @@ v3dv_UpdateDescriptorSets(VkDevice _device,
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: {
const VkDescriptorImageInfo *image_info = writeset->pImageInfo + j;
V3DV_FROM_HANDLE(v3dv_image_view, iview, image_info->imageView);
-
- descriptor->image_view = iview;
-
- write_image_descriptor(writeset->descriptorType,
+ write_image_descriptor(device, descriptor, writeset->descriptorType,
set, binding_layout, iview, NULL,
writeset->dstArrayElement + j);
@@ -1026,11 +1020,7 @@ v3dv_UpdateDescriptorSets(VkDevice _device,
const VkDescriptorImageInfo *image_info = writeset->pImageInfo + j;
V3DV_FROM_HANDLE(v3dv_image_view, iview, image_info->imageView);
V3DV_FROM_HANDLE(v3dv_sampler, sampler, image_info->sampler);
-
- descriptor->image_view = iview;
- descriptor->sampler = sampler;
-
- write_image_descriptor(writeset->descriptorType,
+ write_image_descriptor(device, descriptor, writeset->descriptorType,
set, binding_layout, iview, sampler,
writeset->dstArrayElement + j);
@@ -1040,12 +1030,7 @@ v3dv_UpdateDescriptorSets(VkDevice _device,
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: {
V3DV_FROM_HANDLE(v3dv_buffer_view, buffer_view,
writeset->pTexelBufferView[j]);
-
- assert(buffer_view);
-
- descriptor->buffer_view = buffer_view;
-
- write_buffer_view_descriptor(writeset->descriptorType,
+ write_buffer_view_descriptor(device, descriptor, writeset->descriptorType,
set, binding_layout, buffer_view,
writeset->dstArrayElement + j);
break;
@@ -1086,8 +1071,9 @@ v3dv_UpdateDescriptorSets(VkDevice _device,
dst_descriptor++;
src_descriptor++;
- if (descriptor_bo_size(src_binding_layout->type) > 0) {
- descriptor_bo_copy(dst_set, dst_binding_layout,
+ if (v3dv_X(device, descriptor_bo_size)(src_binding_layout->type) > 0) {
+ descriptor_bo_copy(device,
+ dst_set, dst_binding_layout,
j + copyset->dstArrayElement,
src_set, src_binding_layout,
j + copyset->srcArrayElement);
@@ -1096,3 +1082,197 @@ v3dv_UpdateDescriptorSets(VkDevice _device,
}
}
}
+
+VKAPI_ATTR void VKAPI_CALL
+v3dv_GetDescriptorSetLayoutSupport(
+ VkDevice _device,
+ const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
+ VkDescriptorSetLayoutSupport *pSupport)
+{
+ V3DV_FROM_HANDLE(v3dv_device, device, _device);
+ VkDescriptorSetLayoutBinding *bindings = NULL;
+ VkResult result = vk_create_sorted_bindings(
+ pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings);
+ if (result != VK_SUCCESS) {
+ pSupport->supported = false;
+ return;
+ }
+
+ bool supported = true;
+
+ uint32_t desc_host_size = sizeof(struct v3dv_descriptor);
+ uint32_t host_size = sizeof(struct v3dv_descriptor_set);
+ uint32_t bo_size = 0;
+ for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
+ const VkDescriptorSetLayoutBinding *binding = bindings + i;
+
+ if ((UINT32_MAX - host_size) / desc_host_size < binding->descriptorCount) {
+ supported = false;
+ break;
+ }
+
+ uint32_t desc_bo_size = v3dv_X(device, descriptor_bo_size)(binding->descriptorType);
+ if (desc_bo_size > 0 &&
+ (UINT32_MAX - bo_size) / desc_bo_size < binding->descriptorCount) {
+ supported = false;
+ break;
+ }
+
+ host_size += binding->descriptorCount * desc_host_size;
+ bo_size += binding->descriptorCount * desc_bo_size;
+ }
+
+ free(bindings);
+
+ pSupport->supported = supported;
+}
+
+VkResult
+v3dv_CreateDescriptorUpdateTemplate(
+ VkDevice _device,
+ const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkDescriptorUpdateTemplate *pDescriptorUpdateTemplate)
+{
+ V3DV_FROM_HANDLE(v3dv_device, device, _device);
+ struct v3dv_descriptor_update_template *template;
+
+ size_t size = sizeof(*template) +
+ pCreateInfo->descriptorUpdateEntryCount * sizeof(template->entries[0]);
+ template = vk_object_alloc(&device->vk, pAllocator, size,
+ VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE);
+ if (template == NULL)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ template->bind_point = pCreateInfo->pipelineBindPoint;
+
+ assert(pCreateInfo->templateType ==
+ VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET);
+ template->set = pCreateInfo->set;
+
+ template->entry_count = pCreateInfo->descriptorUpdateEntryCount;
+ for (uint32_t i = 0; i < template->entry_count; i++) {
+ const VkDescriptorUpdateTemplateEntry *pEntry =
+ &pCreateInfo->pDescriptorUpdateEntries[i];
+
+ template->entries[i] = (struct v3dv_descriptor_template_entry) {
+ .type = pEntry->descriptorType,
+ .binding = pEntry->dstBinding,
+ .array_element = pEntry->dstArrayElement,
+ .array_count = pEntry->descriptorCount,
+ .offset = pEntry->offset,
+ .stride = pEntry->stride,
+ };
+ }
+
+ *pDescriptorUpdateTemplate =
+ v3dv_descriptor_update_template_to_handle(template);
+
+ return VK_SUCCESS;
+}
+
+void
+v3dv_DestroyDescriptorUpdateTemplate(
+ VkDevice _device,
+ VkDescriptorUpdateTemplate descriptorUpdateTemplate,
+ const VkAllocationCallbacks *pAllocator)
+{
+ V3DV_FROM_HANDLE(v3dv_device, device, _device);
+ V3DV_FROM_HANDLE(v3dv_descriptor_update_template, template,
+ descriptorUpdateTemplate);
+
+ if (!template)
+ return;
+
+ vk_object_free(&device->vk, pAllocator, template);
+}
+
+void
+v3dv_UpdateDescriptorSetWithTemplate(
+ VkDevice _device,
+ VkDescriptorSet descriptorSet,
+ VkDescriptorUpdateTemplate descriptorUpdateTemplate,
+ const void *pData)
+{
+ V3DV_FROM_HANDLE(v3dv_device, device, _device);
+ V3DV_FROM_HANDLE(v3dv_descriptor_set, set, descriptorSet);
+ V3DV_FROM_HANDLE(v3dv_descriptor_update_template, template,
+ descriptorUpdateTemplate);
+
+ for (int i = 0; i < template->entry_count; i++) {
+ const struct v3dv_descriptor_template_entry *entry =
+ &template->entries[i];
+
+ const struct v3dv_descriptor_set_binding_layout *binding_layout =
+ set->layout->binding + entry->binding;
+
+ struct v3dv_descriptor *descriptor =
+ set->descriptors +
+ binding_layout->descriptor_index +
+ entry->array_element;
+
+ switch (entry->type) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ for (uint32_t j = 0; j < entry->array_count; j++) {
+ const VkDescriptorBufferInfo *info =
+ pData + entry->offset + j * entry->stride;
+ write_buffer_descriptor(descriptor + j, entry->type, info);
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ for (uint32_t j = 0; j < entry->array_count; j++) {
+ const VkDescriptorImageInfo *info =
+ pData + entry->offset + j * entry->stride;
+ V3DV_FROM_HANDLE(v3dv_image_view, iview, info->imageView);
+ V3DV_FROM_HANDLE(v3dv_sampler, sampler, info->sampler);
+ write_image_descriptor(device, descriptor + j, entry->type,
+ set, binding_layout, iview, sampler,
+ entry->array_element + j);
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ for (uint32_t j = 0; j < entry->array_count; j++) {
+ const VkBufferView *_bview =
+ pData + entry->offset + j * entry->stride;
+ V3DV_FROM_HANDLE(v3dv_buffer_view, bview, *_bview);
+ write_buffer_view_descriptor(device, descriptor + j, entry->type,
+ set, binding_layout, bview,
+ entry->array_element + j);
+ }
+ break;
+
+ default:
+ unreachable("Unsupported descriptor type");
+ }
+ }
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+v3dv_CreateSamplerYcbcrConversion(
+ VkDevice _device,
+ const VkSamplerYcbcrConversionCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkSamplerYcbcrConversion *pYcbcrConversion)
+{
+ unreachable("Ycbcr sampler conversion is not supported");
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+v3dv_DestroySamplerYcbcrConversion(
+ VkDevice _device,
+ VkSamplerYcbcrConversion YcbcrConversion,
+ const VkAllocationCallbacks *pAllocator)
+{
+ unreachable("Ycbcr sampler conversion is not supported");
+}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_device.c b/lib/mesa/src/broadcom/vulkan/v3dv_device.c
index 496f93e28..de085bf09 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_device.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_device.c
@@ -30,12 +30,17 @@
#include <unistd.h>
#include <xf86drm.h>
+#ifdef MAJOR_IN_MKDEV
+#include <sys/mkdev.h>
+#endif
+#ifdef MAJOR_IN_SYSMACROS
+#include <sys/sysmacros.h>
+#endif
+
#include "v3dv_private.h"
#include "common/v3d_debug.h"
-#include "broadcom/cle/v3dx_pack.h"
-
#include "compiler/v3d_compiler.h"
#include "drm-uapi/v3d_drm.h"
@@ -61,34 +66,96 @@
#include "drm-uapi/i915_drm.h"
#endif
-static void *
-default_alloc_func(void *pUserData, size_t size, size_t align,
- VkSystemAllocationScope allocationScope)
-{
- return malloc(size);
-}
+#define V3DV_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION)
-static void *
-default_realloc_func(void *pUserData, void *pOriginal, size_t size,
- size_t align, VkSystemAllocationScope allocationScope)
+VKAPI_ATTR VkResult VKAPI_CALL
+v3dv_EnumerateInstanceVersion(uint32_t *pApiVersion)
{
- return realloc(pOriginal, size);
+ *pApiVersion = V3DV_API_VERSION;
+ return VK_SUCCESS;
}
-static void
-default_free_func(void *pUserData, void *pMemory)
-{
- free(pMemory);
-}
+#if defined(VK_USE_PLATFORM_WIN32_KHR) || \
+ defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
+ defined(VK_USE_PLATFORM_XCB_KHR) || \
+ defined(VK_USE_PLATFORM_XLIB_KHR) || \
+ defined(VK_USE_PLATFORM_DISPLAY_KHR)
+#define V3DV_USE_WSI_PLATFORM
+#endif
-static const VkAllocationCallbacks default_alloc = {
- .pUserData = NULL,
- .pfnAllocation = default_alloc_func,
- .pfnReallocation = default_realloc_func,
- .pfnFree = default_free_func,
+static const struct vk_instance_extension_table instance_extensions = {
+ .KHR_device_group_creation = true,
+#ifdef VK_USE_PLATFORM_DISPLAY_KHR
+ .KHR_display = true,
+ .KHR_get_display_properties2 = true,
+#endif
+ .KHR_external_fence_capabilities = true,
+ .KHR_external_memory_capabilities = true,
+ .KHR_external_semaphore_capabilities = true,
+ .KHR_get_physical_device_properties2 = true,
+#ifdef V3DV_USE_WSI_PLATFORM
+ .KHR_get_surface_capabilities2 = true,
+ .KHR_surface = true,
+ .KHR_surface_protected_capabilities = true,
+#endif
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+ .KHR_wayland_surface = true,
+#endif
+#ifdef VK_USE_PLATFORM_XCB_KHR
+ .KHR_xcb_surface = true,
+#endif
+#ifdef VK_USE_PLATFORM_XLIB_KHR
+ .KHR_xlib_surface = true,
+#endif
+ .EXT_debug_report = true,
};
-VkResult
+static void
+get_device_extensions(const struct v3dv_physical_device *device,
+ struct vk_device_extension_table *ext)
+{
+ *ext = (struct vk_device_extension_table) {
+ .KHR_bind_memory2 = true,
+ .KHR_copy_commands2 = true,
+ .KHR_dedicated_allocation = true,
+ .KHR_device_group = true,
+ .KHR_descriptor_update_template = true,
+ .KHR_external_fence = true,
+ .KHR_external_fence_fd = true,
+ .KHR_external_memory = true,
+ .KHR_external_memory_fd = true,
+ .KHR_external_semaphore = true,
+ .KHR_external_semaphore_fd = true,
+ .KHR_get_memory_requirements2 = true,
+ .KHR_image_format_list = true,
+ .KHR_relaxed_block_layout = true,
+ .KHR_maintenance1 = true,
+ .KHR_maintenance2 = true,
+ .KHR_maintenance3 = true,
+ .KHR_multiview = true,
+ .KHR_shader_non_semantic_info = true,
+ .KHR_sampler_mirror_clamp_to_edge = true,
+ .KHR_storage_buffer_storage_class = true,
+ .KHR_uniform_buffer_standard_layout = true,
+#ifdef V3DV_USE_WSI_PLATFORM
+ .KHR_swapchain = true,
+ .KHR_incremental_present = true,
+#endif
+ .KHR_variable_pointers = true,
+ .EXT_color_write_enable = true,
+ .EXT_custom_border_color = true,
+ .EXT_external_memory_dma_buf = true,
+ .EXT_index_type_uint8 = true,
+ .EXT_physical_device_drm = true,
+ .EXT_pipeline_creation_cache_control = true,
+ .EXT_pipeline_creation_feedback = true,
+ .EXT_private_data = true,
+ .EXT_provoking_vertex = true,
+ .EXT_vertex_attribute_divisor = true,
+ };
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_EnumerateInstanceExtensionProperties(const char *pLayerName,
uint32_t *pPropertyCount,
VkExtensionProperties *pProperties)
@@ -98,10 +165,10 @@ v3dv_EnumerateInstanceExtensionProperties(const char *pLayerName,
return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
return vk_enumerate_instance_extension_properties(
- &v3dv_instance_extensions_supported, pPropertyCount, pProperties);
+ &instance_extensions, pPropertyCount, pProperties);
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkInstance *pInstance)
@@ -112,25 +179,27 @@ v3dv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
if (pAllocator == NULL)
- pAllocator = &default_alloc;
+ pAllocator = vk_default_allocator();
- instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+ instance = vk_alloc(pAllocator, sizeof(*instance), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
if (!instance)
return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
struct vk_instance_dispatch_table dispatch_table;
vk_instance_dispatch_table_from_entrypoints(
&dispatch_table, &v3dv_instance_entrypoints, true);
+ vk_instance_dispatch_table_from_entrypoints(
+ &dispatch_table, &wsi_instance_entrypoints, false);
result = vk_instance_init(&instance->vk,
- &v3dv_instance_extensions_supported,
+ &instance_extensions,
&dispatch_table,
pCreateInfo, pAllocator);
if (result != VK_SUCCESS) {
vk_free(pAllocator, instance);
- return vk_error(instance, result);
+ return vk_error(NULL, result);
}
v3d_process_debug_variable();
@@ -208,7 +277,7 @@ physical_device_finish(struct v3dv_physical_device *device)
mtx_destroy(&device->mutex);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyInstance(VkInstance _instance,
const VkAllocationCallbacks *pAllocator)
{
@@ -550,14 +619,14 @@ init_uuids(struct v3dv_physical_device *device)
const struct build_id_note *note =
build_id_find_nhdr_for_addr(init_uuids);
if (!note) {
- return vk_errorf((struct v3dv_instance*) device->vk.instance,
+ return vk_errorf(device->vk.instance,
VK_ERROR_INITIALIZATION_FAILED,
"Failed to find build-id");
}
unsigned build_id_len = build_id_length(note);
if (build_id_len < 20) {
- return vk_errorf((struct v3dv_instance*) device->vk.instance,
+ return vk_errorf(device->vk.instance,
VK_ERROR_INITIALIZATION_FAILED,
"build-id too short. It needs to be a SHA");
}
@@ -627,6 +696,8 @@ physical_device_init(struct v3dv_physical_device *device,
struct vk_physical_device_dispatch_table dispatch_table;
vk_physical_device_dispatch_table_from_entrypoints
(&dispatch_table, &v3dv_physical_device_entrypoints, true);
+ vk_physical_device_dispatch_table_from_entrypoints(
+ &dispatch_table, &wsi_physical_device_entrypoints, false);
result = vk_physical_device_init(&device->vk, &instance->vk, NULL,
&dispatch_table);
@@ -648,17 +719,48 @@ physical_device_init(struct v3dv_physical_device *device,
* we postpone that until a swapchain is created.
*/
+ const char *primary_path;
+#if !using_v3d_simulator
+ if (drm_primary_device)
+ primary_path = drm_primary_device->nodes[DRM_NODE_PRIMARY];
+ else
+ primary_path = NULL;
+#else
+ primary_path = drm_render_device->nodes[DRM_NODE_PRIMARY];
+#endif
+
+ struct stat primary_stat = {0}, render_stat = {0};
+
+ device->has_primary = primary_path;
+ if (device->has_primary) {
+ if (stat(primary_path, &primary_stat) != 0) {
+ result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
+ "failed to stat DRM primary node %s",
+ primary_path);
+ goto fail;
+ }
+
+ device->primary_devid = primary_stat.st_rdev;
+ }
+
+ if (fstat(render_fd, &render_stat) != 0) {
+ result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
+ "failed to stat DRM render node %s",
+ path);
+ goto fail;
+ }
+ device->has_render = true;
+ device->render_devid = render_stat.st_rdev;
+
if (instance->vk.enabled_extensions.KHR_display) {
#if !using_v3d_simulator
/* Open the primary node on the vc4 display device */
assert(drm_primary_device);
- const char *primary_path = drm_primary_device->nodes[DRM_NODE_PRIMARY];
master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
#else
/* There is only one device with primary and render nodes.
* Open its primary node.
*/
- const char *primary_path = drm_render_device->nodes[DRM_NODE_PRIMARY];
master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
#endif
}
@@ -722,8 +824,7 @@ physical_device_init(struct v3dv_physical_device *device,
goto fail;
}
- v3dv_physical_device_get_supported_extensions(device,
- &device->vk.supported_extensions);
+ get_device_extensions(device, &device->vk.supported_extensions);
pthread_mutex_init(&device->mutex, NULL);
@@ -835,7 +936,7 @@ instance_ensure_physical_device(struct v3dv_instance *instance)
return VK_SUCCESS;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_EnumeratePhysicalDevices(VkInstance _instance,
uint32_t *pPhysicalDeviceCount,
VkPhysicalDevice *pPhysicalDevices)
@@ -858,7 +959,37 @@ v3dv_EnumeratePhysicalDevices(VkInstance _instance,
return vk_outarray_status(&out);
}
-void
+VKAPI_ATTR VkResult VKAPI_CALL
+v3dv_EnumeratePhysicalDeviceGroups(
+ VkInstance _instance,
+ uint32_t *pPhysicalDeviceGroupCount,
+ VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties)
+{
+ V3DV_FROM_HANDLE(v3dv_instance, instance, _instance);
+ VK_OUTARRAY_MAKE(out, pPhysicalDeviceGroupProperties,
+ pPhysicalDeviceGroupCount);
+
+ VkResult result = instance_ensure_physical_device(instance);
+ if (result != VK_SUCCESS)
+ return result;
+
+ assert(instance->physicalDeviceCount == 1);
+
+ vk_outarray_append(&out, p) {
+ p->physicalDeviceCount = 1;
+ memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
+ p->physicalDevices[0] =
+ v3dv_physical_device_to_handle(&instance->physicalDevice);
+ p->subsetAllocation = false;
+
+ vk_foreach_struct(ext, p->pNext)
+ v3dv_debug_ignored_stype(ext->sType);
+ }
+
+ return vk_outarray_status(&out);
+}
+
+VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,
VkPhysicalDeviceFeatures *pFeatures)
{
@@ -869,7 +1000,7 @@ v3dv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,
.fullDrawIndexUint32 = false, /* Only available since V3D 4.4.9.1 */
.imageCubeArray = true,
.independentBlend = true,
- .geometryShader = false,
+ .geometryShader = true,
.tessellationShader = false,
.sampleRateShading = true,
.dualSrcBlend = false,
@@ -886,7 +1017,7 @@ v3dv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,
.multiViewport = false,
.samplerAnisotropy = true,
.textureCompressionETC2 = true,
- .textureCompressionASTC_LDR = false,
+ .textureCompressionASTC_LDR = true,
/* Note that textureCompressionBC requires that the driver support all
* the BC formats. V3D 4.2 only support the BC1-3, so we can't claim
* that we support it.
@@ -896,7 +1027,7 @@ v3dv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,
.pipelineStatisticsQuery = false,
.vertexPipelineStoresAndAtomics = true,
.fragmentStoresAndAtomics = true,
- .shaderTessellationAndGeometryPointSize = false,
+ .shaderTessellationAndGeometryPointSize = true,
.shaderImageGatherExtended = false,
.shaderStorageImageExtendedFormats = true,
.shaderStorageImageMultisample = false,
@@ -927,14 +1058,45 @@ v3dv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,
};
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
VkPhysicalDeviceFeatures2 *pFeatures)
{
v3dv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
+ VkPhysicalDeviceVulkan11Features vk11 = {
+ .storageBuffer16BitAccess = false,
+ .uniformAndStorageBuffer16BitAccess = false,
+ .storagePushConstant16 = false,
+ .storageInputOutput16 = false,
+ .multiview = true,
+ .multiviewGeometryShader = false,
+ .multiviewTessellationShader = false,
+ .variablePointersStorageBuffer = true,
+ /* FIXME: this needs support for non-constant index on UBO/SSBO */
+ .variablePointers = false,
+ .protectedMemory = false,
+ .samplerYcbcrConversion = false,
+ .shaderDrawParameters = false,
+ };
+
vk_foreach_struct(ext, pFeatures->pNext) {
switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
+ VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
+ (VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
+ features->customBorderColors = true;
+ features->customBorderColorWithoutFormat = false;
+ break;
+ }
+
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR: {
+ VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR *features =
+ (VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR *)ext;
+ features->uniformBufferStandardLayout = true;
+ break;
+ }
+
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: {
VkPhysicalDevicePrivateDataFeaturesEXT *features =
(VkPhysicalDevicePrivateDataFeaturesEXT *)ext;
@@ -942,6 +1104,87 @@ v3dv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
+ VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
+ (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
+ features->indexTypeUint8 = true;
+ break;
+ }
+
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: {
+ VkPhysicalDeviceColorWriteEnableFeaturesEXT *features = (void *) ext;
+ features->colorWriteEnable = true;
+ break;
+ }
+
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: {
+ VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *features = (void *) ext;
+ features->pipelineCreationCacheControl = true;
+ break;
+ }
+
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {
+ VkPhysicalDeviceProvokingVertexFeaturesEXT *features = (void *) ext;
+ features->provokingVertexLast = true;
+ /* FIXME: update when supporting EXT_transform_feedback */
+ features->transformFeedbackPreservesProvokingVertex = false;
+ break;
+ }
+
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
+ VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
+ (void *) ext;
+ features->vertexAttributeInstanceRateDivisor = true;
+ features->vertexAttributeInstanceRateZeroDivisor = false;
+ break;
+ }
+
+ /* Vulkan 1.1 */
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES: {
+ VkPhysicalDeviceVulkan11Features *features =
+ (VkPhysicalDeviceVulkan11Features *)ext;
+ memcpy(features, &vk11, sizeof(VkPhysicalDeviceVulkan11Features));
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
+ VkPhysicalDevice16BitStorageFeatures *features = (void *) ext;
+ features->storageBuffer16BitAccess = vk11.storageBuffer16BitAccess;
+ features->uniformAndStorageBuffer16BitAccess =
+ vk11.uniformAndStorageBuffer16BitAccess;
+ features->storagePushConstant16 = vk11.storagePushConstant16;
+ features->storageInputOutput16 = vk11.storageInputOutput16;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
+ VkPhysicalDeviceMultiviewFeatures *features = (void *) ext;
+ features->multiview = vk11.multiview;
+ features->multiviewGeometryShader = vk11.multiviewGeometryShader;
+ features->multiviewTessellationShader = vk11.multiviewTessellationShader;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
+ VkPhysicalDeviceProtectedMemoryFeatures *features = (void *) ext;
+ features->protectedMemory = vk11.protectedMemory;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
+ VkPhysicalDeviceSamplerYcbcrConversionFeatures *features = (void *) ext;
+ features->samplerYcbcrConversion = vk11.samplerYcbcrConversion;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
+ VkPhysicalDeviceShaderDrawParametersFeatures *features = (void *) ext;
+ features->shaderDrawParameters = vk11.shaderDrawParameters;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {
+ VkPhysicalDeviceVariablePointersFeatures *features = (void *) ext;
+ features->variablePointersStorageBuffer =
+ vk11.variablePointersStorageBuffer;
+ features->variablePointers = vk11.variablePointers;
+ break;
+ }
+
default:
v3dv_debug_ignored_stype(ext->sType);
break;
@@ -949,6 +1192,20 @@ v3dv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
}
}
+VKAPI_ATTR void VKAPI_CALL
+v3dv_GetDeviceGroupPeerMemoryFeatures(VkDevice device,
+ uint32_t heapIndex,
+ uint32_t localDeviceIndex,
+ uint32_t remoteDeviceIndex,
+ VkPeerMemoryFeatureFlags *pPeerMemoryFeatures)
+{
+ assert(localDeviceIndex == 0 && remoteDeviceIndex == 0);
+ *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
+ VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
+ VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
+ VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
+}
+
uint32_t
v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev)
{
@@ -987,11 +1244,16 @@ v3dv_physical_device_device_id(struct v3dv_physical_device *dev)
return devid;
#else
- return dev->devinfo.ver;
+ switch (dev->devinfo.ver) {
+ case 42:
+ return 0xBE485FD3; /* Broadcom deviceID for 2711 */
+ default:
+ unreachable("Unsupported V3D version");
+ }
#endif
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
VkPhysicalDeviceProperties *pProperties)
{
@@ -1009,7 +1271,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
const uint32_t v3d_coord_shift = 6;
- const uint32_t v3d_point_line_granularity = 2.0f / (1 << v3d_coord_shift);
+ const float v3d_point_line_granularity = 2.0f / (1 << v3d_coord_shift);
const uint32_t max_fb_size = 4096;
const VkSampleCountFlags supported_sample_counts =
@@ -1028,8 +1290,8 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
.maxImageDimensionCube = 4096,
.maxImageArrayLayers = 2048,
.maxTexelBufferElements = (1ul << 28),
- .maxUniformBufferRange = (1ul << 27),
- .maxStorageBufferRange = (1ul << 27),
+ .maxUniformBufferRange = V3D_MAX_BUFFER_RANGE,
+ .maxStorageBufferRange = V3D_MAX_BUFFER_RANGE,
.maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
.maxMemoryAllocationCount = mem_size / page_size,
.maxSamplerAllocationCount = 64 * 1024,
@@ -1075,11 +1337,11 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
.maxTessellationEvaluationOutputComponents = 0,
/* Geometry limits */
- .maxGeometryShaderInvocations = 0,
- .maxGeometryInputComponents = 0,
- .maxGeometryOutputComponents = 0,
- .maxGeometryOutputVertices = 0,
- .maxGeometryTotalOutputComponents = 0,
+ .maxGeometryShaderInvocations = 32,
+ .maxGeometryInputComponents = 64,
+ .maxGeometryOutputComponents = 64,
+ .maxGeometryOutputVertices = 256,
+ .maxGeometryTotalOutputComponents = 1024,
/* Fragment limits */
.maxFragmentInputComponents = max_varying_components,
@@ -1108,7 +1370,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
2.0 * max_fb_size - 1 },
.viewportSubPixelBits = 0,
.minMemoryMapAlignment = page_size,
- .minTexelBufferOffsetAlignment = VC5_UIFBLOCK_SIZE,
+ .minTexelBufferOffsetAlignment = V3D_UIFBLOCK_SIZE,
.minUniformBufferOffsetAlignment = 32,
.minStorageBufferOffsetAlignment = 32,
.minTexelOffset = -8,
@@ -1151,7 +1413,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
};
*pProperties = (VkPhysicalDeviceProperties) {
- .apiVersion = v3dv_physical_device_api_version(pdevice),
+ .apiVersion = V3DV_API_VERSION,
.driverVersion = vk_get_driver_version(),
.vendorID = v3dv_physical_device_vendor_id(pdevice),
.deviceID = v3dv_physical_device_device_id(pdevice),
@@ -1166,7 +1428,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
VkPhysicalDeviceProperties2 *pProperties)
{
@@ -1176,6 +1438,26 @@ v3dv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
vk_foreach_struct(ext, pProperties->pNext) {
switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
+ VkPhysicalDeviceCustomBorderColorPropertiesEXT *props =
+ (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
+ props->maxCustomBorderColorSamplers = V3D_MAX_TEXTURE_SAMPLERS;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
+ VkPhysicalDeviceProvokingVertexPropertiesEXT *props =
+ (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
+ props->provokingVertexModePerPipeline = true;
+ /* FIXME: update when supporting EXT_transform_feedback */
+ props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
+ VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props =
+ (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
+ props->maxVertexAttribDivisor = 0xffff;
+ break;
+ }
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {
VkPhysicalDeviceIDProperties *id_props =
(VkPhysicalDeviceIDProperties *)ext;
@@ -1185,11 +1467,78 @@ v3dv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
id_props->deviceLUIDValid = false;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
+ VkPhysicalDeviceDrmPropertiesEXT *props =
+ (VkPhysicalDeviceDrmPropertiesEXT *)ext;
+ props->hasPrimary = pdevice->has_primary;
+ if (props->hasPrimary) {
+ props->primaryMajor = (int64_t) major(pdevice->primary_devid);
+ props->primaryMinor = (int64_t) minor(pdevice->primary_devid);
+ }
+ props->hasRender = pdevice->has_render;
+ if (props->hasRender) {
+ props->renderMajor = (int64_t) major(pdevice->render_devid);
+ props->renderMinor = (int64_t) minor(pdevice->render_devid);
+ }
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
+ VkPhysicalDeviceMaintenance3Properties *props =
+ (VkPhysicalDeviceMaintenance3Properties *)ext;
+ /* We don't really have special restrictions for the maximum
+ * descriptors per set, other than maybe not exceeding the limits
+ * of addressable memory in a single allocation on either the host
+ * or the GPU. This will be a much larger limit than any of the
+ * per-stage limits already available in Vulkan though, so in practice,
+ * it is not expected to limit anything beyond what is already
+ * constrained through per-stage limits.
+ */
+ uint32_t max_host_descriptors =
+ (UINT32_MAX - sizeof(struct v3dv_descriptor_set)) /
+ sizeof(struct v3dv_descriptor);
+ uint32_t max_gpu_descriptors =
+ (UINT32_MAX / v3dv_X(pdevice, max_descriptor_bo_size)());
+ props->maxPerSetDescriptors =
+ MIN2(max_host_descriptors, max_gpu_descriptors);
+
+ /* Minimum required by the spec */
+ props->maxMemoryAllocationSize = MAX_MEMORY_ALLOCATION_SIZE;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {
+ VkPhysicalDeviceMultiviewProperties *props =
+ (VkPhysicalDeviceMultiviewProperties *)ext;
+ props->maxMultiviewViewCount = MAX_MULTIVIEW_VIEW_COUNT;
+ props->maxMultiviewInstanceIndex = UINT32_MAX - 1;
+ break;
+ }
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT:
/* Do nothing, not even logging. This is a non-PCI device, so we will
* never provide this extension.
*/
break;
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
+ VkPhysicalDevicePointClippingProperties *props =
+ (VkPhysicalDevicePointClippingProperties *)ext;
+ props->pointClippingBehavior =
+ VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
+ VkPhysicalDeviceProtectedMemoryProperties *props =
+ (VkPhysicalDeviceProtectedMemoryProperties *)ext;
+ props->protectedNoFault = false;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
+ VkPhysicalDeviceSubgroupProperties *props =
+ (VkPhysicalDeviceSubgroupProperties *)ext;
+ props->subgroupSize = V3D_CHANNELS;
+ props->supportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
+ props->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT;
+ props->quadOperationsInAllStages = false;
+ break;
+ }
default:
v3dv_debug_ignored_stype(ext->sType);
break;
@@ -1208,7 +1557,7 @@ v3dv_queue_family_properties = {
.minImageTransferGranularity = { 1, 1, 1 },
};
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice,
uint32_t *pCount,
VkQueueFamilyProperties *pQueueFamilyProperties)
@@ -1220,7 +1569,7 @@ v3dv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice,
}
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,
uint32_t *pQueueFamilyPropertyCount,
VkQueueFamilyProperties2 *pQueueFamilyProperties)
@@ -1236,7 +1585,7 @@ v3dv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,
}
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,
VkPhysicalDeviceMemoryProperties *pMemoryProperties)
{
@@ -1244,7 +1593,7 @@ v3dv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,
*pMemoryProperties = device->memory;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,
VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
{
@@ -1260,7 +1609,7 @@ v3dv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,
}
}
-PFN_vkVoidFunction
+VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
v3dv_GetInstanceProcAddr(VkInstance _instance,
const char *pName)
{
@@ -1303,7 +1652,7 @@ vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,
return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
VkLayerProperties *pProperties)
{
@@ -1315,7 +1664,7 @@ v3dv_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice,
uint32_t *pPropertyCount,
VkLayerProperties *pProperties)
@@ -1327,16 +1676,19 @@ v3dv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice,
return VK_SUCCESS;
}
- return vk_error((struct v3dv_instance*) physical_device->vk.instance,
- VK_ERROR_LAYER_NOT_PRESENT);
+ return vk_error(physical_device, VK_ERROR_LAYER_NOT_PRESENT);
}
static VkResult
-queue_init(struct v3dv_device *device, struct v3dv_queue *queue)
+queue_init(struct v3dv_device *device, struct v3dv_queue *queue,
+ const VkDeviceQueueCreateInfo *create_info,
+ uint32_t index_in_family)
{
- vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE);
+ VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info,
+ index_in_family);
+ if (result != VK_SUCCESS)
+ return result;
queue->device = device;
- queue->flags = 0;
queue->noop_job = NULL;
list_inithead(&queue->submit_wait_list);
pthread_mutex_init(&queue->mutex, NULL);
@@ -1346,7 +1698,7 @@ queue_init(struct v3dv_device *device, struct v3dv_queue *queue)
static void
queue_finish(struct v3dv_queue *queue)
{
- vk_object_base_finish(&queue->base);
+ vk_queue_finish(&queue->vk);
assert(list_is_empty(&queue->submit_wait_list));
if (queue->noop_job)
v3dv_job_destroy(queue->noop_job);
@@ -1371,7 +1723,7 @@ destroy_device_meta(struct v3dv_device *device)
v3dv_meta_texel_buffer_copy_finish(device);
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
const VkDeviceCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
@@ -1384,19 +1736,6 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
- /* Check enabled features */
- if (pCreateInfo->pEnabledFeatures) {
- VkPhysicalDeviceFeatures supported_features;
- v3dv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
- VkBool32 *supported_feature = (VkBool32 *)&supported_features;
- VkBool32 *enabled_feature = (VkBool32 *)pCreateInfo->pEnabledFeatures;
- unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
- for (uint32_t i = 0; i < num_features; i++) {
- if (enabled_feature[i] && !supported_feature[i])
- return vk_error(instance, VK_ERROR_FEATURE_NOT_PRESENT);
- }
- }
-
/* Check requested queues (we only expose one queue ) */
assert(pCreateInfo->queueCreateInfoCount == 1);
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
@@ -1415,11 +1754,13 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
struct vk_device_dispatch_table dispatch_table;
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
&v3dv_device_entrypoints, true);
+ vk_device_dispatch_table_from_entrypoints(&dispatch_table,
+ &wsi_device_entrypoints, false);
result = vk_device_init(&device->vk, &physical_device->vk,
&dispatch_table, pCreateInfo, pAllocator);
if (result != VK_SUCCESS) {
vk_free(&device->vk.alloc, device);
- return vk_error(instance, result);
+ return vk_error(NULL, result);
}
device->instance = instance;
@@ -1432,20 +1773,31 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
pthread_mutex_init(&device->mutex, NULL);
- result = queue_init(device, &device->queue);
+ result = queue_init(device, &device->queue,
+ pCreateInfo->pQueueCreateInfos, 0);
if (result != VK_SUCCESS)
goto fail;
device->devinfo = physical_device->devinfo;
- if (pCreateInfo->pEnabledFeatures) {
+ /* Vulkan 1.1 and VK_KHR_get_physical_device_properties2 added
+ * VkPhysicalDeviceFeatures2 which can be used in the pNext chain of
+ * vkDeviceCreateInfo, in which case it should be used instead of
+ * pEnabledFeatures.
+ */
+ const VkPhysicalDeviceFeatures2 *features2 =
+ vk_find_struct_const(pCreateInfo->pNext, PHYSICAL_DEVICE_FEATURES_2);
+ if (features2) {
+ memcpy(&device->features, &features2->features,
+ sizeof(device->features));
+ } else if (pCreateInfo->pEnabledFeatures) {
memcpy(&device->features, pCreateInfo->pEnabledFeatures,
sizeof(device->features));
-
- if (device->features.robustBufferAccess)
- perf_debug("Device created with Robust Buffer Access enabled.\n");
}
+ if (device->features.robustBufferAccess)
+ perf_debug("Device created with Robust Buffer Access enabled.\n");
+
int ret = drmSyncobjCreate(physical_device->render_fd,
DRM_SYNCOBJ_CREATE_SIGNALED,
&device->last_job_sync);
@@ -1454,9 +1806,12 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
goto fail;
}
+#ifdef DEBUG
+ v3dv_X(device, device_check_prepacked_sizes)();
+#endif
init_device_meta(device);
v3dv_bo_cache_init(device);
- v3dv_pipeline_cache_init(&device->default_pipeline_cache, device,
+ v3dv_pipeline_cache_init(&device->default_pipeline_cache, device, 0,
device->instance->default_pipeline_cache_enabled);
device->default_attribute_float =
v3dv_pipeline_create_default_attribute_values(device, NULL);
@@ -1472,7 +1827,7 @@ fail:
return result;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyDevice(VkDevice _device,
const VkAllocationCallbacks *pAllocator)
{
@@ -1496,24 +1851,10 @@ v3dv_DestroyDevice(VkDevice _device,
v3dv_bo_cache_destroy(device);
vk_device_finish(&device->vk);
- vk_free2(&default_alloc, pAllocator, device);
+ vk_free2(&device->vk.alloc, pAllocator, device);
}
-void
-v3dv_GetDeviceQueue(VkDevice _device,
- uint32_t queueFamilyIndex,
- uint32_t queueIndex,
- VkQueue *pQueue)
-{
- V3DV_FROM_HANDLE(v3dv_device, device, _device);
-
- assert(queueIndex == 0);
- assert(queueFamilyIndex == 0);
-
- *pQueue = v3dv_queue_to_handle(&device->queue);
-}
-
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_DeviceWaitIdle(VkDevice _device)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
@@ -1526,8 +1867,7 @@ device_alloc(struct v3dv_device *device,
VkDeviceSize size)
{
/* Our kernel interface is 32-bit */
- if (size > UINT32_MAX)
- return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ assert(size <= UINT32_MAX);
mem->bo = v3dv_bo_alloc(device, size, "device_alloc", false);
if (!mem->bo)
@@ -1546,7 +1886,9 @@ device_free_wsi_dumb(int32_t display_fd, int32_t dumb_handle)
struct drm_mode_destroy_dumb destroy_dumb = {
.handle = dumb_handle,
};
- v3dv_ioctl(display_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destroy_dumb);
+ if (v3dv_ioctl(display_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destroy_dumb)) {
+ fprintf(stderr, "destroy dumb object %d: %s\n", dumb_handle, strerror(errno));
+ }
}
static void
@@ -1724,7 +2066,7 @@ fail_create:
#endif
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_AllocateMemory(VkDevice _device,
const VkMemoryAllocateInfo *pAllocateInfo,
const VkAllocationCallbacks *pAllocator,
@@ -1759,6 +2101,22 @@ v3dv_AllocateMemory(VkDevice _device,
case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
fd_info = (void *)ext;
break;
+ case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO:
+ /* We don't support VK_KHR_buffer_device_address or multiple
+ * devices per device group, so we can ignore this.
+ */
+ break;
+ case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR:
+ /* We don't have particular optimizations associated with memory
+ * allocations that won't be suballocated to multiple resources.
+ */
+ break;
+ case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR:
+ /* The mask of handle types specified here must be supported
+ * according to VkExternalImageFormatProperties, so it must be
+ * fd or dmabuf, which don't have special requirements for us.
+ */
+ break;
default:
v3dv_debug_ignored_stype(ext->sType);
break;
@@ -1766,32 +2124,40 @@ v3dv_AllocateMemory(VkDevice _device,
}
VkResult result = VK_SUCCESS;
- if (wsi_info) {
- result = device_alloc_for_wsi(device, pAllocator, mem,
- pAllocateInfo->allocationSize);
- } else if (fd_info && fd_info->handleType) {
- assert(fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
- fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
- result = device_import_bo(device, pAllocator,
- fd_info->fd, pAllocateInfo->allocationSize,
- &mem->bo);
- mem->has_bo_ownership = false;
- if (result == VK_SUCCESS)
- close(fd_info->fd);
+
+ /* We always allocate device memory in multiples of a page, so round up
+ * requested size to that.
+ */
+ VkDeviceSize alloc_size = ALIGN(pAllocateInfo->allocationSize, 4096);
+
+ if (unlikely(alloc_size > MAX_MEMORY_ALLOCATION_SIZE)) {
+ result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
} else {
- result = device_alloc(device, mem, pAllocateInfo->allocationSize);
+ if (wsi_info) {
+ result = device_alloc_for_wsi(device, pAllocator, mem, alloc_size);
+ } else if (fd_info && fd_info->handleType) {
+ assert(fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
+ fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
+ result = device_import_bo(device, pAllocator,
+ fd_info->fd, alloc_size, &mem->bo);
+ mem->has_bo_ownership = false;
+ if (result == VK_SUCCESS)
+ close(fd_info->fd);
+ } else {
+ result = device_alloc(device, mem, alloc_size);
+ }
}
if (result != VK_SUCCESS) {
vk_object_free(&device->vk, pAllocator, mem);
- return vk_error(device->instance, result);
+ return vk_error(device, result);
}
*pMem = v3dv_device_memory_to_handle(mem);
return result;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_FreeMemory(VkDevice _device,
VkDeviceMemory _mem,
const VkAllocationCallbacks *pAllocator)
@@ -1810,7 +2176,7 @@ v3dv_FreeMemory(VkDevice _device,
vk_object_free(&device->vk, pAllocator, mem);
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_MapMemory(VkDevice _device,
VkDeviceMemory _memory,
VkDeviceSize offset,
@@ -1835,13 +2201,13 @@ v3dv_MapMemory(VkDevice _device,
*/
VkResult result = device_map(device, mem);
if (result != VK_SUCCESS)
- return vk_error(device->instance, result);
+ return vk_error(device, result);
*ppData = ((uint8_t *) mem->bo->map) + offset;
return VK_SUCCESS;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_UnmapMemory(VkDevice _device,
VkDeviceMemory _memory)
{
@@ -1854,7 +2220,7 @@ v3dv_UnmapMemory(VkDevice _device,
device_unmap(device, mem);
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_FlushMappedMemoryRanges(VkDevice _device,
uint32_t memoryRangeCount,
const VkMappedMemoryRange *pMemoryRanges)
@@ -1862,7 +2228,7 @@ v3dv_FlushMappedMemoryRanges(VkDevice _device,
return VK_SUCCESS;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_InvalidateMappedMemoryRanges(VkDevice _device,
uint32_t memoryRangeCount,
const VkMappedMemoryRange *pMemoryRanges)
@@ -1870,28 +2236,40 @@ v3dv_InvalidateMappedMemoryRanges(VkDevice _device,
return VK_SUCCESS;
}
-void
-v3dv_GetImageMemoryRequirements(VkDevice _device,
- VkImage _image,
- VkMemoryRequirements *pMemoryRequirements)
+VKAPI_ATTR void VKAPI_CALL
+v3dv_GetImageMemoryRequirements2(VkDevice device,
+ const VkImageMemoryRequirementsInfo2 *pInfo,
+ VkMemoryRequirements2 *pMemoryRequirements)
{
- V3DV_FROM_HANDLE(v3dv_image, image, _image);
+ V3DV_FROM_HANDLE(v3dv_image, image, pInfo->image);
- assert(image->size > 0);
+ pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
+ .memoryTypeBits = 0x1,
+ .alignment = image->alignment,
+ .size = image->size
+ };
- pMemoryRequirements->size = image->size;
- pMemoryRequirements->alignment = image->alignment;
- pMemoryRequirements->memoryTypeBits = 0x1;
+ vk_foreach_struct(ext, pMemoryRequirements->pNext) {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
+ VkMemoryDedicatedRequirements *req =
+ (VkMemoryDedicatedRequirements *) ext;
+ req->requiresDedicatedAllocation = image->vk.external_handle_types != 0;
+ req->prefersDedicatedAllocation = image->vk.external_handle_types != 0;
+ break;
+ }
+ default:
+ v3dv_debug_ignored_stype(ext->sType);
+ break;
+ }
+ }
}
-VkResult
-v3dv_BindImageMemory(VkDevice _device,
- VkImage _image,
- VkDeviceMemory _memory,
- VkDeviceSize memoryOffset)
+static void
+bind_image_memory(const VkBindImageMemoryInfo *info)
{
- V3DV_FROM_HANDLE(v3dv_device_memory, mem, _memory);
- V3DV_FROM_HANDLE(v3dv_image, image, _image);
+ V3DV_FROM_HANDLE(v3dv_image, image, info->image);
+ V3DV_FROM_HANDLE(v3dv_device_memory, mem, info->memory);
/* Valid usage:
*
@@ -1899,36 +2277,75 @@ v3dv_BindImageMemory(VkDevice _device,
* the VkMemoryRequirements structure returned from a call to
* vkGetImageMemoryRequirements with image"
*/
- assert(memoryOffset % image->alignment == 0);
- assert(memoryOffset < mem->bo->size);
+ assert(info->memoryOffset % image->alignment == 0);
+ assert(info->memoryOffset < mem->bo->size);
image->mem = mem;
- image->mem_offset = memoryOffset;
+ image->mem_offset = info->memoryOffset;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+v3dv_BindImageMemory2(VkDevice _device,
+ uint32_t bindInfoCount,
+ const VkBindImageMemoryInfo *pBindInfos)
+{
+ for (uint32_t i = 0; i < bindInfoCount; i++) {
+ const VkBindImageMemorySwapchainInfoKHR *swapchain_info =
+ vk_find_struct_const(pBindInfos->pNext,
+ BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR);
+ if (swapchain_info && swapchain_info->swapchain) {
+ struct v3dv_image *swapchain_image =
+ v3dv_wsi_get_image_from_swapchain(swapchain_info->swapchain,
+ swapchain_info->imageIndex);
+ VkBindImageMemoryInfo swapchain_bind = {
+ .sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO,
+ .image = pBindInfos[i].image,
+ .memory = v3dv_device_memory_to_handle(swapchain_image->mem),
+ .memoryOffset = swapchain_image->mem_offset,
+ };
+ bind_image_memory(&swapchain_bind);
+ } else {
+ bind_image_memory(&pBindInfos[i]);
+ }
+ }
return VK_SUCCESS;
}
-void
-v3dv_GetBufferMemoryRequirements(VkDevice _device,
- VkBuffer _buffer,
- VkMemoryRequirements* pMemoryRequirements)
+VKAPI_ATTR void VKAPI_CALL
+v3dv_GetBufferMemoryRequirements2(VkDevice device,
+ const VkBufferMemoryRequirementsInfo2 *pInfo,
+ VkMemoryRequirements2 *pMemoryRequirements)
{
- V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer);
+ V3DV_FROM_HANDLE(v3dv_buffer, buffer, pInfo->buffer);
+
+ pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
+ .memoryTypeBits = 0x1,
+ .alignment = buffer->alignment,
+ .size = align64(buffer->size, buffer->alignment),
+ };
- pMemoryRequirements->memoryTypeBits = 0x1;
- pMemoryRequirements->alignment = buffer->alignment;
- pMemoryRequirements->size =
- align64(buffer->size, pMemoryRequirements->alignment);
+ vk_foreach_struct(ext, pMemoryRequirements->pNext) {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
+ VkMemoryDedicatedRequirements *req =
+ (VkMemoryDedicatedRequirements *) ext;
+ req->requiresDedicatedAllocation = false;
+ req->prefersDedicatedAllocation = false;
+ break;
+ }
+ default:
+ v3dv_debug_ignored_stype(ext->sType);
+ break;
+ }
+ }
}
-VkResult
-v3dv_BindBufferMemory(VkDevice _device,
- VkBuffer _buffer,
- VkDeviceMemory _memory,
- VkDeviceSize memoryOffset)
+static void
+bind_buffer_memory(const VkBindBufferMemoryInfo *info)
{
- V3DV_FROM_HANDLE(v3dv_device_memory, mem, _memory);
- V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer);
+ V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->buffer);
+ V3DV_FROM_HANDLE(v3dv_device_memory, mem, info->memory);
/* Valid usage:
*
@@ -1936,16 +2353,26 @@ v3dv_BindBufferMemory(VkDevice _device,
* the VkMemoryRequirements structure returned from a call to
* vkGetBufferMemoryRequirements with buffer"
*/
- assert(memoryOffset % buffer->alignment == 0);
- assert(memoryOffset < mem->bo->size);
+ assert(info->memoryOffset % buffer->alignment == 0);
+ assert(info->memoryOffset < mem->bo->size);
buffer->mem = mem;
- buffer->mem_offset = memoryOffset;
+ buffer->mem_offset = info->memoryOffset;
+}
+
+
+VKAPI_ATTR VkResult VKAPI_CALL
+v3dv_BindBufferMemory2(VkDevice device,
+ uint32_t bindInfoCount,
+ const VkBindBufferMemoryInfo *pBindInfos)
+{
+ for (uint32_t i = 0; i < bindInfoCount; i++)
+ bind_buffer_memory(&pBindInfos[i]);
return VK_SUCCESS;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateBuffer(VkDevice _device,
const VkBufferCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
@@ -1963,7 +2390,7 @@ v3dv_CreateBuffer(VkDevice _device,
buffer = vk_object_zalloc(&device->vk, pAllocator, sizeof(*buffer),
VK_OBJECT_TYPE_BUFFER);
if (buffer == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
buffer->size = pCreateInfo->size;
buffer->usage = pCreateInfo->usage;
@@ -1979,7 +2406,7 @@ v3dv_CreateBuffer(VkDevice _device,
return VK_SUCCESS;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyBuffer(VkDevice _device,
VkBuffer _buffer,
const VkAllocationCallbacks *pAllocator)
@@ -1993,67 +2420,7 @@ v3dv_DestroyBuffer(VkDevice _device,
vk_object_free(&device->vk, pAllocator, buffer);
}
-/**
- * This computes the maximum bpp used by any of the render targets used by
- * a particular subpass and checks if any of those render targets are
- * multisampled. If we don't have a subpass (when we are not inside a
- * render pass), then we assume that all framebuffer attachments are used.
- */
-void
-v3dv_framebuffer_compute_internal_bpp_msaa(
- const struct v3dv_framebuffer *framebuffer,
- const struct v3dv_subpass *subpass,
- uint8_t *max_bpp,
- bool *msaa)
-{
- STATIC_ASSERT(RENDER_TARGET_MAXIMUM_32BPP == 0);
- *max_bpp = RENDER_TARGET_MAXIMUM_32BPP;
- *msaa = false;
-
- if (subpass) {
- for (uint32_t i = 0; i < subpass->color_count; i++) {
- uint32_t att_idx = subpass->color_attachments[i].attachment;
- if (att_idx == VK_ATTACHMENT_UNUSED)
- continue;
-
- const struct v3dv_image_view *att = framebuffer->attachments[att_idx];
- assert(att);
-
- if (att->aspects & VK_IMAGE_ASPECT_COLOR_BIT)
- *max_bpp = MAX2(*max_bpp, att->internal_bpp);
-
- if (att->image->samples > VK_SAMPLE_COUNT_1_BIT)
- *msaa = true;
- }
-
- if (!*msaa && subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) {
- const struct v3dv_image_view *att =
- framebuffer->attachments[subpass->ds_attachment.attachment];
- assert(att);
-
- if (att->image->samples > VK_SAMPLE_COUNT_1_BIT)
- *msaa = true;
- }
-
- return;
- }
-
- assert(framebuffer->attachment_count <= 4);
- for (uint32_t i = 0; i < framebuffer->attachment_count; i++) {
- const struct v3dv_image_view *att = framebuffer->attachments[i];
- assert(att);
-
- if (att->aspects & VK_IMAGE_ASPECT_COLOR_BIT)
- *max_bpp = MAX2(*max_bpp, att->internal_bpp);
-
- if (att->image->samples > VK_SAMPLE_COUNT_1_BIT)
- *msaa = true;
- }
-
- return;
-}
-
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateFramebuffer(VkDevice _device,
const VkFramebufferCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
@@ -2069,7 +2436,7 @@ v3dv_CreateFramebuffer(VkDevice _device,
framebuffer = vk_object_zalloc(&device->vk, pAllocator, size,
VK_OBJECT_TYPE_FRAMEBUFFER);
if (framebuffer == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
framebuffer->width = pCreateInfo->width;
framebuffer->height = pCreateInfo->height;
@@ -2081,7 +2448,7 @@ v3dv_CreateFramebuffer(VkDevice _device,
for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
framebuffer->attachments[i] =
v3dv_image_view_from_handle(pCreateInfo->pAttachments[i]);
- if (framebuffer->attachments[i]->aspects & VK_IMAGE_ASPECT_COLOR_BIT)
+ if (framebuffer->attachments[i]->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT)
framebuffer->color_attachment_count++;
}
@@ -2090,7 +2457,7 @@ v3dv_CreateFramebuffer(VkDevice _device,
return VK_SUCCESS;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyFramebuffer(VkDevice _device,
VkFramebuffer _fb,
const VkAllocationCallbacks *pAllocator)
@@ -2104,7 +2471,7 @@ v3dv_DestroyFramebuffer(VkDevice _device,
vk_object_free(&device->vk, pAllocator, fb);
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetMemoryFdPropertiesKHR(VkDevice _device,
VkExternalMemoryHandleTypeFlagBits handleType,
int fd,
@@ -2119,11 +2486,11 @@ v3dv_GetMemoryFdPropertiesKHR(VkDevice _device,
(1 << pdevice->memory.memoryTypeCount) - 1;
return VK_SUCCESS;
default:
- return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
}
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetMemoryFdKHR(VkDevice _device,
const VkMemoryGetFdInfoKHR *pGetFdInfo,
int *pFd)
@@ -2140,14 +2507,14 @@ v3dv_GetMemoryFdKHR(VkDevice _device,
mem->bo->handle,
DRM_CLOEXEC, &fd);
if (ret)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
*pFd = fd;
return VK_SUCCESS;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateEvent(VkDevice _device,
const VkEventCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
@@ -2158,7 +2525,7 @@ v3dv_CreateEvent(VkDevice _device,
vk_object_zalloc(&device->vk, pAllocator, sizeof(*event),
VK_OBJECT_TYPE_EVENT);
if (!event)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
/* Events are created in the unsignaled state */
event->state = false;
@@ -2167,7 +2534,7 @@ v3dv_CreateEvent(VkDevice _device,
return VK_SUCCESS;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyEvent(VkDevice _device,
VkEvent _event,
const VkAllocationCallbacks *pAllocator)
@@ -2181,14 +2548,14 @@ v3dv_DestroyEvent(VkDevice _device,
vk_object_free(&device->vk, pAllocator, event);
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetEventStatus(VkDevice _device, VkEvent _event)
{
V3DV_FROM_HANDLE(v3dv_event, event, _event);
return p_atomic_read(&event->state) ? VK_EVENT_SET : VK_EVENT_RESET;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_SetEvent(VkDevice _device, VkEvent _event)
{
V3DV_FROM_HANDLE(v3dv_event, event, _event);
@@ -2196,7 +2563,7 @@ v3dv_SetEvent(VkDevice _device, VkEvent _event)
return VK_SUCCESS;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_ResetEvent(VkDevice _device, VkEvent _event)
{
V3DV_FROM_HANDLE(v3dv_event, event, _event);
@@ -2204,101 +2571,7 @@ v3dv_ResetEvent(VkDevice _device, VkEvent _event)
return VK_SUCCESS;
}
-static const enum V3DX(Wrap_Mode) vk_to_v3d_wrap_mode[] = {
- [VK_SAMPLER_ADDRESS_MODE_REPEAT] = V3D_WRAP_MODE_REPEAT,
- [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = V3D_WRAP_MODE_MIRROR,
- [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = V3D_WRAP_MODE_CLAMP,
- [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = V3D_WRAP_MODE_MIRROR_ONCE,
- [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = V3D_WRAP_MODE_BORDER,
-};
-
-static const enum V3DX(Compare_Function)
-vk_to_v3d_compare_func[] = {
- [VK_COMPARE_OP_NEVER] = V3D_COMPARE_FUNC_NEVER,
- [VK_COMPARE_OP_LESS] = V3D_COMPARE_FUNC_LESS,
- [VK_COMPARE_OP_EQUAL] = V3D_COMPARE_FUNC_EQUAL,
- [VK_COMPARE_OP_LESS_OR_EQUAL] = V3D_COMPARE_FUNC_LEQUAL,
- [VK_COMPARE_OP_GREATER] = V3D_COMPARE_FUNC_GREATER,
- [VK_COMPARE_OP_NOT_EQUAL] = V3D_COMPARE_FUNC_NOTEQUAL,
- [VK_COMPARE_OP_GREATER_OR_EQUAL] = V3D_COMPARE_FUNC_GEQUAL,
- [VK_COMPARE_OP_ALWAYS] = V3D_COMPARE_FUNC_ALWAYS,
-};
-
-static void
-pack_sampler_state(struct v3dv_sampler *sampler,
- const VkSamplerCreateInfo *pCreateInfo)
-{
- enum V3DX(Border_Color_Mode) border_color_mode;
-
- /* For now we only support the preset Vulkan border color modes. If we
- * want to implement VK_EXT_custom_border_color in the future we would have
- * to use V3D_BORDER_COLOR_FOLLOWS, and fill up border_color_word_[0/1/2/3]
- * SAMPLER_STATE.
- */
- switch (pCreateInfo->borderColor) {
- case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
- case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
- border_color_mode = V3D_BORDER_COLOR_0000;
- break;
- case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
- case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
- border_color_mode = V3D_BORDER_COLOR_0001;
- break;
- case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
- case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
- border_color_mode = V3D_BORDER_COLOR_1111;
- break;
- default:
- unreachable("Unknown border color");
- break;
- }
-
- /* For some texture formats, when clamping to transparent black border the
- * CTS expects alpha to be set to 1 instead of 0, but the border color mode
- * will take priority over the texture state swizzle, so the only way to
- * fix that is to apply a swizzle in the shader. Here we keep track of
- * whether we are activating that mode and we will decide if we need to
- * activate the texture swizzle lowering in the shader key at compile time
- * depending on the actual texture format.
- */
- if ((pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
- pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
- pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER) &&
- border_color_mode == V3D_BORDER_COLOR_0000) {
- sampler->clamp_to_transparent_black_border = true;
- }
-
- v3dv_pack(sampler->sampler_state, SAMPLER_STATE, s) {
- if (pCreateInfo->anisotropyEnable) {
- s.anisotropy_enable = true;
- if (pCreateInfo->maxAnisotropy > 8)
- s.maximum_anisotropy = 3;
- else if (pCreateInfo->maxAnisotropy > 4)
- s.maximum_anisotropy = 2;
- else if (pCreateInfo->maxAnisotropy > 2)
- s.maximum_anisotropy = 1;
- }
-
- s.border_color_mode = border_color_mode;
-
- s.wrap_i_border = false; /* Also hardcoded on v3d */
- s.wrap_s = vk_to_v3d_wrap_mode[pCreateInfo->addressModeU];
- s.wrap_t = vk_to_v3d_wrap_mode[pCreateInfo->addressModeV];
- s.wrap_r = vk_to_v3d_wrap_mode[pCreateInfo->addressModeW];
- s.fixed_bias = pCreateInfo->mipLodBias;
- s.max_level_of_detail = MIN2(MAX2(0, pCreateInfo->maxLod), 15);
- s.min_level_of_detail = MIN2(MAX2(0, pCreateInfo->minLod), 15);
- s.srgb_disable = 0; /* Not even set by v3d */
- s.depth_compare_function =
- vk_to_v3d_compare_func[pCreateInfo->compareEnable ?
- pCreateInfo->compareOp : VK_COMPARE_OP_NEVER];
- s.mip_filter_nearest = pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_NEAREST;
- s.min_filter_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST;
- s.mag_filter_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST;
- }
-}
-
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateSampler(VkDevice _device,
const VkSamplerCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
@@ -2312,18 +2585,23 @@ v3dv_CreateSampler(VkDevice _device,
sampler = vk_object_zalloc(&device->vk, pAllocator, sizeof(*sampler),
VK_OBJECT_TYPE_SAMPLER);
if (!sampler)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
sampler->compare_enable = pCreateInfo->compareEnable;
sampler->unnormalized_coordinates = pCreateInfo->unnormalizedCoordinates;
- pack_sampler_state(sampler, pCreateInfo);
+
+ const VkSamplerCustomBorderColorCreateInfoEXT *bc_info =
+ vk_find_struct_const(pCreateInfo->pNext,
+ SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
+
+ v3dv_X(device, pack_sampler_state)(sampler, pCreateInfo, bc_info);
*pSampler = v3dv_sampler_to_handle(sampler);
return VK_SUCCESS;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroySampler(VkDevice _device,
VkSampler _sampler,
const VkAllocationCallbacks *pAllocator)
@@ -2337,7 +2615,7 @@ v3dv_DestroySampler(VkDevice _device,
vk_object_free(&device->vk, pAllocator, sampler);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_GetDeviceMemoryCommitment(VkDevice device,
VkDeviceMemory memory,
VkDeviceSize *pCommittedMemoryInBytes)
@@ -2345,17 +2623,17 @@ v3dv_GetDeviceMemoryCommitment(VkDevice device,
*pCommittedMemoryInBytes = 0;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_GetImageSparseMemoryRequirements(
- VkDevice device,
- VkImage image,
- uint32_t *pSparseMemoryRequirementCount,
- VkSparseImageMemoryRequirements *pSparseMemoryRequirements)
+ VkDevice device,
+ VkImage image,
+ uint32_t *pSparseMemoryRequirementCount,
+ VkSparseImageMemoryRequirements *pSparseMemoryRequirements)
{
*pSparseMemoryRequirementCount = 0;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_GetImageSparseMemoryRequirements2(
VkDevice device,
const VkImageSparseMemoryRequirementsInfo2 *pInfo,
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_formats.c b/lib/mesa/src/broadcom/vulkan/v3dv_formats.c
index cefa1418b..6e32d341a 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_formats.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_formats.c
@@ -25,300 +25,14 @@
#include "vk_util.h"
#include "vk_format_info.h"
-#include "broadcom/cle/v3dx_pack.h"
#include "drm-uapi/drm_fourcc.h"
#include "util/format/u_format.h"
#include "vulkan/wsi/wsi_common.h"
-#define SWIZ(x,y,z,w) { \
- PIPE_SWIZZLE_##x, \
- PIPE_SWIZZLE_##y, \
- PIPE_SWIZZLE_##z, \
- PIPE_SWIZZLE_##w \
-}
-
-#define FORMAT(vk, rt, tex, swiz, return_size, supports_filtering) \
- [VK_FORMAT_##vk] = { \
- true, \
- V3D_OUTPUT_IMAGE_FORMAT_##rt, \
- TEXTURE_DATA_FORMAT_##tex, \
- swiz, \
- return_size, \
- supports_filtering, \
- }
-
-#define SWIZ_X001 SWIZ(X, 0, 0, 1)
-#define SWIZ_XY01 SWIZ(X, Y, 0, 1)
-#define SWIZ_XYZ1 SWIZ(X, Y, Z, 1)
-#define SWIZ_XYZW SWIZ(X, Y, Z, W)
-#define SWIZ_YZWX SWIZ(Y, Z, W, X)
-#define SWIZ_YZW1 SWIZ(Y, Z, W, 1)
-#define SWIZ_ZYXW SWIZ(Z, Y, X, W)
-#define SWIZ_ZYX1 SWIZ(Z, Y, X, 1)
-#define SWIZ_XXXY SWIZ(X, X, X, Y)
-#define SWIZ_XXX1 SWIZ(X, X, X, 1)
-#define SWIZ_XXXX SWIZ(X, X, X, X)
-#define SWIZ_000X SWIZ(0, 0, 0, X)
-#define SWIZ_WXYZ SWIZ(W, X, Y, Z)
-
-/* FIXME: expand format table to describe whether the format is supported
- * for buffer surfaces (texel buffers, vertex buffers, etc).
- */
-static const struct v3dv_format format_table[] = {
- /* Color, 4 channels */
- FORMAT(B8G8R8A8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_ZYXW, 16, true),
- FORMAT(B8G8R8A8_UNORM, RGBA8, RGBA8, SWIZ_ZYXW, 16, true),
-
- FORMAT(R8G8B8A8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_XYZW, 16, true),
- FORMAT(R8G8B8A8_UNORM, RGBA8, RGBA8, SWIZ_XYZW, 16, true),
- FORMAT(R8G8B8A8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZW, 16, true),
- FORMAT(R8G8B8A8_SINT, RGBA8I, RGBA8I, SWIZ_XYZW, 16, false),
- FORMAT(R8G8B8A8_UINT, RGBA8UI, RGBA8UI, SWIZ_XYZW, 16, false),
-
- FORMAT(R16G16B16A16_SFLOAT, RGBA16F, RGBA16F, SWIZ_XYZW, 16, true),
- FORMAT(R16G16B16A16_UNORM, NO, RGBA16, SWIZ_XYZW, 32, true),
- FORMAT(R16G16B16A16_SNORM, NO, RGBA16_SNORM, SWIZ_XYZW, 32, true),
- FORMAT(R16G16B16A16_SINT, RGBA16I, RGBA16I, SWIZ_XYZW, 16, false),
- FORMAT(R16G16B16A16_UINT, RGBA16UI, RGBA16UI, SWIZ_XYZW, 16, false),
-
- FORMAT(R32G32B32A32_SFLOAT, RGBA32F, RGBA32F, SWIZ_XYZW, 32, false),
- FORMAT(R32G32B32A32_SINT, RGBA32I, RGBA32I, SWIZ_XYZW, 32, false),
- FORMAT(R32G32B32A32_UINT, RGBA32UI, RGBA32UI, SWIZ_XYZW, 32, false),
-
- /* Color, 3 channels */
- FORMAT(R32G32B32_SFLOAT, NO, NO, SWIZ_XYZ1, 0, false),
- FORMAT(R32G32B32_UINT, NO, NO, SWIZ_XYZ1, 0, false),
- FORMAT(R32G32B32_SINT, NO, NO, SWIZ_XYZ1, 0, false),
-
- /* Color, 2 channels */
- FORMAT(R8G8_UNORM, RG8, RG8, SWIZ_XY01, 16, true),
- FORMAT(R8G8_SNORM, NO, RG8_SNORM, SWIZ_XY01, 16, true),
- FORMAT(R8G8_SINT, RG8I, RG8I, SWIZ_XY01, 16, false),
- FORMAT(R8G8_UINT, RG8UI, RG8UI, SWIZ_XY01, 16, false),
-
- FORMAT(R16G16_UNORM, NO, RG16, SWIZ_XY01, 32, true),
- FORMAT(R16G16_SNORM, NO, RG16_SNORM, SWIZ_XY01, 32, true),
- FORMAT(R16G16_SFLOAT, RG16F, RG16F, SWIZ_XY01, 16, true),
- FORMAT(R16G16_SINT, RG16I, RG16I, SWIZ_XY01, 16, false),
- FORMAT(R16G16_UINT, RG16UI, RG16UI, SWIZ_XY01, 16, false),
-
- FORMAT(R32G32_SFLOAT, RG32F, RG32F, SWIZ_XY01, 32, false),
- FORMAT(R32G32_SINT, RG32I, RG32I, SWIZ_XY01, 32, false),
- FORMAT(R32G32_UINT, RG32UI, RG32UI, SWIZ_XY01, 32, false),
-
- /* Color, 1 channel */
- FORMAT(R8_UNORM, R8, R8, SWIZ_X001, 16, true),
- FORMAT(R8_SNORM, NO, R8_SNORM, SWIZ_X001, 16, true),
- FORMAT(R8_SINT, R8I, R8I, SWIZ_X001, 16, false),
- FORMAT(R8_UINT, R8UI, R8UI, SWIZ_X001, 16, false),
-
- FORMAT(R16_UNORM, NO, R16, SWIZ_X001, 32, true),
- FORMAT(R16_SNORM, NO, R16_SNORM, SWIZ_X001, 32, true),
- FORMAT(R16_SFLOAT, R16F, R16F, SWIZ_X001, 16, true),
- FORMAT(R16_SINT, R16I, R16I, SWIZ_X001, 16, false),
- FORMAT(R16_UINT, R16UI, R16UI, SWIZ_X001, 16, false),
-
- FORMAT(R32_SFLOAT, R32F, R32F, SWIZ_X001, 32, false),
- FORMAT(R32_SINT, R32I, R32I, SWIZ_X001, 32, false),
- FORMAT(R32_UINT, R32UI, R32UI, SWIZ_X001, 32, false),
-
- /* Color, packed */
- FORMAT(B4G4R4A4_UNORM_PACK16, ABGR4444, RGBA4, SWIZ_ZYXW, 16, true), /* Swap RB */
- FORMAT(R5G6B5_UNORM_PACK16, BGR565, RGB565, SWIZ_XYZ1, 16, true),
- FORMAT(R5G5B5A1_UNORM_PACK16, ABGR1555, RGB5_A1, SWIZ_XYZW, 16, true),
- FORMAT(A1R5G5B5_UNORM_PACK16, RGBA5551, A1_RGB5, SWIZ_ZYXW, 16, true), /* Swap RB */
- FORMAT(A8B8G8R8_UNORM_PACK32, RGBA8, RGBA8, SWIZ_XYZW, 16, true), /* RGBA8 UNORM */
- FORMAT(A8B8G8R8_SNORM_PACK32, NO, RGBA8_SNORM, SWIZ_XYZW, 16, true), /* RGBA8 SNORM */
- FORMAT(A8B8G8R8_UINT_PACK32, RGBA8UI, RGBA8UI, SWIZ_XYZW, 16, true), /* RGBA8 UINT */
- FORMAT(A8B8G8R8_SINT_PACK32, RGBA8I, RGBA8I, SWIZ_XYZW, 16, true), /* RGBA8 SINT */
- FORMAT(A8B8G8R8_SRGB_PACK32, SRGB8_ALPHA8, RGBA8, SWIZ_XYZW, 16, true), /* RGBA8 sRGB */
- FORMAT(A2B10G10R10_UNORM_PACK32,RGB10_A2, RGB10_A2, SWIZ_XYZW, 16, true),
- FORMAT(A2B10G10R10_UINT_PACK32, RGB10_A2UI, RGB10_A2UI, SWIZ_XYZW, 16, true),
- FORMAT(E5B9G9R9_UFLOAT_PACK32, NO, RGB9_E5, SWIZ_XYZ1, 16, true),
- FORMAT(B10G11R11_UFLOAT_PACK32, R11F_G11F_B10F,R11F_G11F_B10F, SWIZ_XYZ1, 16, true),
-
- /* Depth */
- FORMAT(D16_UNORM, D16, DEPTH_COMP16, SWIZ_X001, 32, false),
- FORMAT(D32_SFLOAT, D32F, DEPTH_COMP32F, SWIZ_X001, 32, false),
- FORMAT(X8_D24_UNORM_PACK32, D24S8, DEPTH24_X8, SWIZ_X001, 32, false),
-
- /* Depth + Stencil */
- FORMAT(D24_UNORM_S8_UINT, D24S8, DEPTH24_X8, SWIZ_X001, 32, false),
-
- /* Compressed: ETC2 / EAC */
- FORMAT(ETC2_R8G8B8_UNORM_BLOCK, NO, RGB8_ETC2, SWIZ_XYZ1, 16, true),
- FORMAT(ETC2_R8G8B8_SRGB_BLOCK, NO, RGB8_ETC2, SWIZ_XYZ1, 16, true),
- FORMAT(ETC2_R8G8B8A1_UNORM_BLOCK, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, true),
- FORMAT(ETC2_R8G8B8A1_SRGB_BLOCK, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, true),
- FORMAT(ETC2_R8G8B8A8_UNORM_BLOCK, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, true),
- FORMAT(ETC2_R8G8B8A8_SRGB_BLOCK, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, true),
- FORMAT(EAC_R11_UNORM_BLOCK, NO, R11_EAC, SWIZ_X001, 16, true),
- FORMAT(EAC_R11_SNORM_BLOCK, NO, SIGNED_R11_EAC, SWIZ_X001, 16, true),
- FORMAT(EAC_R11G11_UNORM_BLOCK, NO, RG11_EAC, SWIZ_XY01, 16, true),
- FORMAT(EAC_R11G11_SNORM_BLOCK, NO, SIGNED_RG11_EAC, SWIZ_XY01, 16, true),
-
- /* Compressed: BC1-3 */
- FORMAT(BC1_RGB_UNORM_BLOCK, NO, BC1, SWIZ_XYZ1, 16, true),
- FORMAT(BC1_RGB_SRGB_BLOCK, NO, BC1, SWIZ_XYZ1, 16, true),
- FORMAT(BC1_RGBA_UNORM_BLOCK, NO, BC1, SWIZ_XYZW, 16, true),
- FORMAT(BC1_RGBA_SRGB_BLOCK, NO, BC1, SWIZ_XYZW, 16, true),
- FORMAT(BC2_UNORM_BLOCK, NO, BC2, SWIZ_XYZW, 16, true),
- FORMAT(BC2_SRGB_BLOCK, NO, BC2, SWIZ_XYZW, 16, true),
- FORMAT(BC3_UNORM_BLOCK, NO, BC3, SWIZ_XYZW, 16, true),
- FORMAT(BC3_SRGB_BLOCK, NO, BC3, SWIZ_XYZW, 16, true),
-};
-
-const struct v3dv_format *
-v3dv_get_format(VkFormat format)
-{
- if (format < ARRAY_SIZE(format_table) && format_table[format].supported)
- return &format_table[format];
- else
- return NULL;
-}
-
-void
-v3dv_get_internal_type_bpp_for_output_format(uint32_t format,
- uint32_t *type,
- uint32_t *bpp)
-{
- switch (format) {
- case V3D_OUTPUT_IMAGE_FORMAT_RGBA8:
- case V3D_OUTPUT_IMAGE_FORMAT_RGB8:
- case V3D_OUTPUT_IMAGE_FORMAT_RG8:
- case V3D_OUTPUT_IMAGE_FORMAT_R8:
- case V3D_OUTPUT_IMAGE_FORMAT_ABGR4444:
- case V3D_OUTPUT_IMAGE_FORMAT_BGR565:
- case V3D_OUTPUT_IMAGE_FORMAT_ABGR1555:
- *type = V3D_INTERNAL_TYPE_8;
- *bpp = V3D_INTERNAL_BPP_32;
- break;
-
- case V3D_OUTPUT_IMAGE_FORMAT_RGBA8I:
- case V3D_OUTPUT_IMAGE_FORMAT_RG8I:
- case V3D_OUTPUT_IMAGE_FORMAT_R8I:
- *type = V3D_INTERNAL_TYPE_8I;
- *bpp = V3D_INTERNAL_BPP_32;
- break;
-
- case V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI:
- case V3D_OUTPUT_IMAGE_FORMAT_RG8UI:
- case V3D_OUTPUT_IMAGE_FORMAT_R8UI:
- *type = V3D_INTERNAL_TYPE_8UI;
- *bpp = V3D_INTERNAL_BPP_32;
- break;
-
- case V3D_OUTPUT_IMAGE_FORMAT_SRGB8_ALPHA8:
- case V3D_OUTPUT_IMAGE_FORMAT_SRGB:
- case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2:
- case V3D_OUTPUT_IMAGE_FORMAT_R11F_G11F_B10F:
- case V3D_OUTPUT_IMAGE_FORMAT_RGBA16F:
- /* Note that sRGB RTs are stored in the tile buffer at 16F,
- * and the conversion to sRGB happens at tilebuffer load/store.
- */
- *type = V3D_INTERNAL_TYPE_16F;
- *bpp = V3D_INTERNAL_BPP_64;
- break;
-
- case V3D_OUTPUT_IMAGE_FORMAT_RG16F:
- case V3D_OUTPUT_IMAGE_FORMAT_R16F:
- *type = V3D_INTERNAL_TYPE_16F;
- /* Use 64bpp to make sure the TLB doesn't throw away the alpha
- * channel before alpha test happens.
- */
- *bpp = V3D_INTERNAL_BPP_64;
- break;
-
- case V3D_OUTPUT_IMAGE_FORMAT_RGBA16I:
- *type = V3D_INTERNAL_TYPE_16I;
- *bpp = V3D_INTERNAL_BPP_64;
- break;
-
- case V3D_OUTPUT_IMAGE_FORMAT_RG16I:
- case V3D_OUTPUT_IMAGE_FORMAT_R16I:
- *type = V3D_INTERNAL_TYPE_16I;
- *bpp = V3D_INTERNAL_BPP_32;
- break;
-
- case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2UI:
- case V3D_OUTPUT_IMAGE_FORMAT_RGBA16UI:
- *type = V3D_INTERNAL_TYPE_16UI;
- *bpp = V3D_INTERNAL_BPP_64;
- break;
-
- case V3D_OUTPUT_IMAGE_FORMAT_RG16UI:
- case V3D_OUTPUT_IMAGE_FORMAT_R16UI:
- *type = V3D_INTERNAL_TYPE_16UI;
- *bpp = V3D_INTERNAL_BPP_32;
- break;
-
- case V3D_OUTPUT_IMAGE_FORMAT_RGBA32I:
- *type = V3D_INTERNAL_TYPE_32I;
- *bpp = V3D_INTERNAL_BPP_128;
- break;
-
- case V3D_OUTPUT_IMAGE_FORMAT_RG32I:
- *type = V3D_INTERNAL_TYPE_32I;
- *bpp = V3D_INTERNAL_BPP_64;
- break;
-
- case V3D_OUTPUT_IMAGE_FORMAT_R32I:
- *type = V3D_INTERNAL_TYPE_32I;
- *bpp = V3D_INTERNAL_BPP_32;
- break;
-
- case V3D_OUTPUT_IMAGE_FORMAT_RGBA32UI:
- *type = V3D_INTERNAL_TYPE_32UI;
- *bpp = V3D_INTERNAL_BPP_128;
- break;
-
- case V3D_OUTPUT_IMAGE_FORMAT_RG32UI:
- *type = V3D_INTERNAL_TYPE_32UI;
- *bpp = V3D_INTERNAL_BPP_64;
- break;
-
- case V3D_OUTPUT_IMAGE_FORMAT_R32UI:
- *type = V3D_INTERNAL_TYPE_32UI;
- *bpp = V3D_INTERNAL_BPP_32;
- break;
-
- case V3D_OUTPUT_IMAGE_FORMAT_RGBA32F:
- *type = V3D_INTERNAL_TYPE_32F;
- *bpp = V3D_INTERNAL_BPP_128;
- break;
-
- case V3D_OUTPUT_IMAGE_FORMAT_RG32F:
- *type = V3D_INTERNAL_TYPE_32F;
- *bpp = V3D_INTERNAL_BPP_64;
- break;
-
- case V3D_OUTPUT_IMAGE_FORMAT_R32F:
- *type = V3D_INTERNAL_TYPE_32F;
- *bpp = V3D_INTERNAL_BPP_32;
- break;
-
- default:
- /* Provide some default values, as we'll be called at RB
- * creation time, even if an RB with this format isn't supported.
- */
- *type = V3D_INTERNAL_TYPE_8;
- *bpp = V3D_INTERNAL_BPP_32;
- break;
- }
-}
-
-bool
-v3dv_format_supports_tlb_resolve(const struct v3dv_format *format)
-{
- uint32_t type, bpp;
- v3dv_get_internal_type_bpp_for_output_format(format->rt_type, &type, &bpp);
- return type == V3D_INTERNAL_TYPE_8 || type == V3D_INTERNAL_TYPE_16F;
-}
-
const uint8_t *
-v3dv_get_format_swizzle(VkFormat f)
+v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f)
{
- const struct v3dv_format *vf = v3dv_get_format(f);
+ const struct v3dv_format *vf = v3dv_X(device, get_format)(f);
static const uint8_t fallback[] = {0, 1, 2, 3};
if (!vf)
@@ -331,57 +45,18 @@ uint8_t
v3dv_get_tex_return_size(const struct v3dv_format *vf,
bool compare_enable)
{
+ if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_16BIT))
+ return 16;
+
+ if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_32BIT))
+ return 32;
+
if (compare_enable)
return 16;
return vf->return_size;
}
-bool
-v3dv_tfu_supports_tex_format(const struct v3d_device_info *devinfo,
- uint32_t tex_format)
-{
- assert(devinfo->ver >= 42);
-
- switch (tex_format) {
- case TEXTURE_DATA_FORMAT_R8:
- case TEXTURE_DATA_FORMAT_R8_SNORM:
- case TEXTURE_DATA_FORMAT_RG8:
- case TEXTURE_DATA_FORMAT_RG8_SNORM:
- case TEXTURE_DATA_FORMAT_RGBA8:
- case TEXTURE_DATA_FORMAT_RGBA8_SNORM:
- case TEXTURE_DATA_FORMAT_RGB565:
- case TEXTURE_DATA_FORMAT_RGBA4:
- case TEXTURE_DATA_FORMAT_RGB5_A1:
- case TEXTURE_DATA_FORMAT_RGB10_A2:
- case TEXTURE_DATA_FORMAT_R16:
- case TEXTURE_DATA_FORMAT_R16_SNORM:
- case TEXTURE_DATA_FORMAT_RG16:
- case TEXTURE_DATA_FORMAT_RG16_SNORM:
- case TEXTURE_DATA_FORMAT_RGBA16:
- case TEXTURE_DATA_FORMAT_RGBA16_SNORM:
- case TEXTURE_DATA_FORMAT_R16F:
- case TEXTURE_DATA_FORMAT_RG16F:
- case TEXTURE_DATA_FORMAT_RGBA16F:
- case TEXTURE_DATA_FORMAT_R11F_G11F_B10F:
- case TEXTURE_DATA_FORMAT_R4:
- case TEXTURE_DATA_FORMAT_RGB9_E5:
- case TEXTURE_DATA_FORMAT_R32F:
- case TEXTURE_DATA_FORMAT_RG32F:
- case TEXTURE_DATA_FORMAT_RGBA32F:
- case TEXTURE_DATA_FORMAT_RGB8_ETC2:
- case TEXTURE_DATA_FORMAT_RGB8_PUNCHTHROUGH_ALPHA1:
- case TEXTURE_DATA_FORMAT_RGBA8_ETC2_EAC:
- case TEXTURE_DATA_FORMAT_R11_EAC:
- case TEXTURE_DATA_FORMAT_SIGNED_R11_EAC:
- case TEXTURE_DATA_FORMAT_RG11_EAC:
- case TEXTURE_DATA_FORMAT_SIGNED_RG11_EAC:
- return true;
- default:
- return false;
- }
-}
-
/* Some cases of transfer operations are raw data copies that don't depend
* on the semantics of the pixel format (no pixel format conversions are
* involved). In these cases, it is safe to choose any format supported by
@@ -389,7 +64,7 @@ v3dv_tfu_supports_tex_format(const struct v3d_device_info *devinfo,
* TFU paths with formats that are not TFU supported otherwise.
*/
const struct v3dv_format *
-v3dv_get_compatible_tfu_format(const struct v3d_device_info *devinfo,
+v3dv_get_compatible_tfu_format(struct v3dv_device *device,
uint32_t bpp,
VkFormat *out_vk_format)
{
@@ -406,32 +81,15 @@ v3dv_get_compatible_tfu_format(const struct v3d_device_info *devinfo,
if (out_vk_format)
*out_vk_format = vk_format;
- const struct v3dv_format *format = v3dv_get_format(vk_format);
- assert(v3dv_tfu_supports_tex_format(devinfo, format->tex_type));
+ const struct v3dv_format *format = v3dv_X(device, get_format)(vk_format);
+ assert(v3dv_X(device, tfu_supports_tex_format)(format->tex_type));
return format;
}
-static bool
-format_supports_blending(const struct v3dv_format *format)
-{
- /* Hardware blending is only supported on render targets that are configured
- * 4x8-bit unorm, 2x16-bit float or 4x16-bit float.
- */
- uint32_t type, bpp;
- v3dv_get_internal_type_bpp_for_output_format(format->rt_type, &type, &bpp);
- switch (type) {
- case V3D_INTERNAL_TYPE_8:
- return bpp == V3D_INTERNAL_BPP_32;
- case V3D_INTERNAL_TYPE_16F:
- return bpp == V3D_INTERNAL_BPP_32 || V3D_INTERNAL_BPP_64;
- default:
- return false;
- }
-}
-
static VkFormatFeatureFlags
-image_format_features(VkFormat vk_format,
+image_format_features(struct v3dv_physical_device *pdevice,
+ VkFormat vk_format,
const struct v3dv_format *v3dv_format,
VkImageTiling tiling)
{
@@ -476,7 +134,7 @@ image_format_features(VkFormat vk_format,
if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
VK_FORMAT_FEATURE_BLIT_DST_BIT;
- if (format_supports_blending(v3dv_format))
+ if (v3dv_X(pdevice, format_supports_blending)(v3dv_format))
flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
} else if (aspects & zs_aspects) {
flags |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT |
@@ -556,33 +214,35 @@ buffer_format_features(VkFormat vk_format, const struct v3dv_format *v3dv_format
}
bool
-v3dv_buffer_format_supports_features(VkFormat vk_format,
+v3dv_buffer_format_supports_features(struct v3dv_device *device,
+ VkFormat vk_format,
VkFormatFeatureFlags features)
{
- const struct v3dv_format *v3dv_format = v3dv_get_format(vk_format);
+ const struct v3dv_format *v3dv_format = v3dv_X(device, get_format)(vk_format);
const VkFormatFeatureFlags supported =
buffer_format_features(vk_format, v3dv_format);
return (supported & features) == features;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice,
VkFormat format,
VkFormatProperties* pFormatProperties)
{
- const struct v3dv_format *v3dv_format = v3dv_get_format(format);
+ V3DV_FROM_HANDLE(v3dv_physical_device, pdevice, physicalDevice);
+ const struct v3dv_format *v3dv_format = v3dv_X(pdevice, get_format)(format);
*pFormatProperties = (VkFormatProperties) {
.linearTilingFeatures =
- image_format_features(format, v3dv_format, VK_IMAGE_TILING_LINEAR),
+ image_format_features(pdevice, format, v3dv_format, VK_IMAGE_TILING_LINEAR),
.optimalTilingFeatures =
- image_format_features(format, v3dv_format, VK_IMAGE_TILING_OPTIMAL),
+ image_format_features(pdevice, format, v3dv_format, VK_IMAGE_TILING_OPTIMAL),
.bufferFeatures =
buffer_format_features(format, v3dv_format),
};
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice,
VkFormat format,
VkFormatProperties2 *pFormatProperties)
@@ -600,12 +260,16 @@ v3dv_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice,
vk_outarray_append(&out, mod_props) {
mod_props->drmFormatModifier = DRM_FORMAT_MOD_LINEAR;
mod_props->drmFormatModifierPlaneCount = 1;
+ mod_props->drmFormatModifierTilingFeatures =
+ pFormatProperties->formatProperties.linearTilingFeatures;
}
}
if (pFormatProperties->formatProperties.optimalTilingFeatures) {
vk_outarray_append(&out, mod_props) {
mod_props->drmFormatModifier = DRM_FORMAT_MOD_BROADCOM_UIF;
mod_props->drmFormatModifierPlaneCount = 1;
+ mod_props->drmFormatModifierTilingFeatures =
+ pFormatProperties->formatProperties.optimalTilingFeatures;
}
}
break;
@@ -625,12 +289,24 @@ get_image_format_properties(
VkImageFormatProperties *pImageFormatProperties,
VkSamplerYcbcrConversionImageFormatProperties *pYcbcrImageFormatProperties)
{
- const struct v3dv_format *v3dv_format = v3dv_get_format(info->format);
+ const struct v3dv_format *v3dv_format = v3dv_X(physical_device, get_format)(info->format);
VkFormatFeatureFlags format_feature_flags =
- image_format_features(info->format, v3dv_format, tiling);
+ image_format_features(physical_device, info->format, v3dv_format, tiling);
if (!format_feature_flags)
goto unsupported;
+ /* This allows users to create uncompressed views of compressed images,
+ * however this is not something the hardware supports naturally and requires
+ * the driver to lie when programming the texture state to make the hardware
+ * sample with the uncompressed view correctly, and even then, there are
+ * issues when running on real hardware.
+ *
+ * See https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11336
+ * for details.
+ */
+ if (info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT)
+ goto unsupported;
+
if (info->usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) {
if (!(format_feature_flags & VK_FORMAT_FEATURE_TRANSFER_SRC_BIT)) {
goto unsupported;
@@ -775,7 +451,7 @@ static const VkExternalMemoryProperties prime_fd_props = {
VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
};
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPhysicalDeviceImageFormatProperties(
VkPhysicalDevice physicalDevice,
VkFormat format,
@@ -801,7 +477,7 @@ v3dv_GetPhysicalDeviceImageFormatProperties(
pImageFormatProperties, NULL);
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice,
const VkPhysicalDeviceImageFormatInfo2 *base_info,
VkImageFormatProperties2 *base_props)
@@ -875,7 +551,7 @@ done:
return result;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceSparseImageFormatProperties(
VkPhysicalDevice physicalDevice,
VkFormat format,
@@ -889,7 +565,7 @@ v3dv_GetPhysicalDeviceSparseImageFormatProperties(
*pPropertyCount = 0;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceSparseImageFormatProperties2(
VkPhysicalDevice physicalDevice,
const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo,
@@ -899,7 +575,7 @@ v3dv_GetPhysicalDeviceSparseImageFormatProperties2(
*pPropertyCount = 0;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceExternalBufferProperties(
VkPhysicalDevice physicalDevice,
const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo,
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_image.c b/lib/mesa/src/broadcom/vulkan/v3dv_image.c
index 2935d7e8b..d03814d98 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_image.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_image.c
@@ -23,7 +23,6 @@
#include "v3dv_private.h"
-#include "broadcom/cle/v3dx_pack.h"
#include "drm-uapi/drm_fourcc.h"
#include "util/format/u_format.h"
#include "util/u_math.h"
@@ -77,9 +76,9 @@ v3d_setup_slices(struct v3dv_image *image)
{
assert(image->cpp > 0);
- uint32_t width = image->extent.width;
- uint32_t height = image->extent.height;
- uint32_t depth = image->extent.depth;
+ uint32_t width = image->vk.extent.width;
+ uint32_t height = image->vk.extent.height;
+ uint32_t depth = image->vk.extent.depth;
/* Note that power-of-two padding is based on level 1. These are not
* equivalent to just util_next_power_of_two(dimension), because at a
@@ -95,21 +94,21 @@ v3d_setup_slices(struct v3dv_image *image)
uint32_t uif_block_w = utile_w * 2;
uint32_t uif_block_h = utile_h * 2;
- uint32_t block_width = vk_format_get_blockwidth(image->vk_format);
- uint32_t block_height = vk_format_get_blockheight(image->vk_format);
+ uint32_t block_width = vk_format_get_blockwidth(image->vk.format);
+ uint32_t block_height = vk_format_get_blockheight(image->vk.format);
- assert(image->samples == VK_SAMPLE_COUNT_1_BIT ||
- image->samples == VK_SAMPLE_COUNT_4_BIT);
- bool msaa = image->samples != VK_SAMPLE_COUNT_1_BIT;
+ assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT ||
+ image->vk.samples == VK_SAMPLE_COUNT_4_BIT);
+ bool msaa = image->vk.samples != VK_SAMPLE_COUNT_1_BIT;
bool uif_top = msaa;
- assert(image->array_size > 0);
+ assert(image->vk.array_layers > 0);
assert(depth > 0);
- assert(image->levels >= 1);
+ assert(image->vk.mip_levels >= 1);
uint32_t offset = 0;
- for (int32_t i = image->levels - 1; i >= 0; i--) {
+ for (int32_t i = image->vk.mip_levels - 1; i >= 0; i--) {
struct v3d_resource_slice *slice = &image->slices[i];
uint32_t level_width, level_height, level_depth;
@@ -135,21 +134,21 @@ v3d_setup_slices(struct v3dv_image *image)
level_height = DIV_ROUND_UP(level_height, block_height);
if (!image->tiled) {
- slice->tiling = VC5_TILING_RASTER;
- if (image->type == VK_IMAGE_TYPE_1D)
+ slice->tiling = V3D_TILING_RASTER;
+ if (image->vk.image_type == VK_IMAGE_TYPE_1D)
level_width = align(level_width, 64 / image->cpp);
} else {
if ((i != 0 || !uif_top) &&
(level_width <= utile_w || level_height <= utile_h)) {
- slice->tiling = VC5_TILING_LINEARTILE;
+ slice->tiling = V3D_TILING_LINEARTILE;
level_width = align(level_width, utile_w);
level_height = align(level_height, utile_h);
} else if ((i != 0 || !uif_top) && level_width <= uif_block_w) {
- slice->tiling = VC5_TILING_UBLINEAR_1_COLUMN;
+ slice->tiling = V3D_TILING_UBLINEAR_1_COLUMN;
level_width = align(level_width, uif_block_w);
level_height = align(level_height, uif_block_h);
} else if ((i != 0 || !uif_top) && level_width <= 2 * uif_block_w) {
- slice->tiling = VC5_TILING_UBLINEAR_2_COLUMN;
+ slice->tiling = V3D_TILING_UBLINEAR_2_COLUMN;
level_width = align(level_width, 2 * uif_block_w);
level_height = align(level_height, uif_block_h);
} else {
@@ -167,10 +166,10 @@ v3d_setup_slices(struct v3dv_image *image)
* perfectly misaligned.
*/
if ((level_height / uif_block_h) %
- (VC5_PAGE_CACHE_SIZE / VC5_UIFBLOCK_ROW_SIZE) == 0) {
- slice->tiling = VC5_TILING_UIF_XOR;
+ (V3D_PAGE_CACHE_SIZE / V3D_UIFBLOCK_ROW_SIZE) == 0) {
+ slice->tiling = V3D_TILING_UIF_XOR;
} else {
- slice->tiling = VC5_TILING_UIF_NO_XOR;
+ slice->tiling = V3D_TILING_UIF_NO_XOR;
}
}
}
@@ -178,8 +177,8 @@ v3d_setup_slices(struct v3dv_image *image)
slice->offset = offset;
slice->stride = level_width * image->cpp;
slice->padded_height = level_height;
- if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
- slice->tiling == VC5_TILING_UIF_XOR) {
+ if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
+ slice->tiling == V3D_TILING_UIF_XOR) {
slice->padded_height_of_output_image_in_uif_blocks =
slice->padded_height / (2 * v3d_utile_height(image->cpp));
}
@@ -195,7 +194,7 @@ v3d_setup_slices(struct v3dv_image *image)
if (i == 1 &&
level_width > 4 * uif_block_w &&
level_height > PAGE_CACHE_MINUS_1_5_UB_ROWS * uif_block_h) {
- slice_total_size = align(slice_total_size, VC5_UIFCFG_PAGE_SIZE);
+ slice_total_size = align(slice_total_size, V3D_UIFCFG_PAGE_SIZE);
}
offset += slice_total_size;
@@ -211,13 +210,12 @@ v3d_setup_slices(struct v3dv_image *image)
*
* We additionally align to 4k, which improves UIF XOR performance.
*/
- image->alignment =
- image->tiling == VK_IMAGE_TILING_LINEAR ? image->cpp : 4096;
+ image->alignment = image->tiled ? 4096 : image->cpp;
uint32_t align_offset =
align(image->slices[0].offset, image->alignment) - image->slices[0].offset;
if (align_offset) {
image->size += align_offset;
- for (int i = 0; i < image->levels; i++)
+ for (int i = 0; i < image->vk.mip_levels; i++)
image->slices[i].offset += align_offset;
}
@@ -225,10 +223,10 @@ v3d_setup_slices(struct v3dv_image *image)
* one full mipmap tree to the next (64b aligned). For 3D textures,
* we need to program the stride between slices of miplevel 0.
*/
- if (image->type != VK_IMAGE_TYPE_3D) {
+ if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
image->cube_map_stride =
align(image->slices[0].offset + image->slices[0].size, 64);
- image->size += image->cube_map_stride * (image->array_size - 1);
+ image->size += image->cube_map_stride * (image->vk.array_layers - 1);
} else {
image->cube_map_stride = image->slices[0].size;
}
@@ -239,29 +237,23 @@ v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer
{
const struct v3d_resource_slice *slice = &image->slices[level];
- if (image->type == VK_IMAGE_TYPE_3D)
+ if (image->vk.image_type == VK_IMAGE_TYPE_3D)
return image->mem_offset + slice->offset + layer * slice->size;
else
return image->mem_offset + slice->offset + layer * image->cube_map_stride;
}
-VkResult
-v3dv_CreateImage(VkDevice _device,
- const VkImageCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator,
- VkImage *pImage)
+static VkResult
+create_image(struct v3dv_device *device,
+ const VkImageCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkImage *pImage)
{
- V3DV_FROM_HANDLE(v3dv_device, device, _device);
struct v3dv_image *image = NULL;
- assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
-
- v3dv_assert(pCreateInfo->mipLevels > 0);
- v3dv_assert(pCreateInfo->arrayLayers > 0);
- v3dv_assert(pCreateInfo->samples > 0);
- v3dv_assert(pCreateInfo->extent.width > 0);
- v3dv_assert(pCreateInfo->extent.height > 0);
- v3dv_assert(pCreateInfo->extent.depth > 0);
+ image = vk_image_create(&device->vk, pCreateInfo, pAllocator, sizeof(*image));
+ if (image == NULL)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
/* When using the simulator the WSI common code will see that our
* driver wsi device doesn't match the display device and because of that
@@ -272,68 +264,60 @@ v3dv_CreateImage(VkDevice _device,
* As a result, on that path, swapchain images do not have any special
* requirements and are not created with the pNext structs below.
*/
+ VkImageTiling tiling = pCreateInfo->tiling;
uint64_t modifier = DRM_FORMAT_MOD_INVALID;
- if (pCreateInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
+ if (tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
const VkImageDrmFormatModifierListCreateInfoEXT *mod_info =
vk_find_struct_const(pCreateInfo->pNext,
IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
- assert(mod_info);
- for (uint32_t i = 0; i < mod_info->drmFormatModifierCount; i++) {
- switch (mod_info->pDrmFormatModifiers[i]) {
- case DRM_FORMAT_MOD_LINEAR:
- if (modifier == DRM_FORMAT_MOD_INVALID)
- modifier = DRM_FORMAT_MOD_LINEAR;
- break;
- case DRM_FORMAT_MOD_BROADCOM_UIF:
- modifier = DRM_FORMAT_MOD_BROADCOM_UIF;
- break;
+ const VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod_info =
+ vk_find_struct_const(pCreateInfo->pNext,
+ IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
+ assert(mod_info || explicit_mod_info);
+
+ if (mod_info) {
+ for (uint32_t i = 0; i < mod_info->drmFormatModifierCount; i++) {
+ switch (mod_info->pDrmFormatModifiers[i]) {
+ case DRM_FORMAT_MOD_LINEAR:
+ if (modifier == DRM_FORMAT_MOD_INVALID)
+ modifier = DRM_FORMAT_MOD_LINEAR;
+ break;
+ case DRM_FORMAT_MOD_BROADCOM_UIF:
+ modifier = DRM_FORMAT_MOD_BROADCOM_UIF;
+ break;
+ }
}
+ } else {
+ modifier = explicit_mod_info->drmFormatModifier;
}
- } else {
- const struct wsi_image_create_info *wsi_info =
- vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
- if (wsi_info)
- modifier = DRM_FORMAT_MOD_LINEAR;
- }
-
- /* 1D and 1D_ARRAY textures are always raster-order */
- VkImageTiling tiling;
- if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D)
- tiling = VK_IMAGE_TILING_LINEAR;
- else if (modifier == DRM_FORMAT_MOD_INVALID)
- tiling = pCreateInfo->tiling;
- else if (modifier == DRM_FORMAT_MOD_BROADCOM_UIF)
- tiling = VK_IMAGE_TILING_OPTIMAL;
- else
+ assert(modifier == DRM_FORMAT_MOD_LINEAR ||
+ modifier == DRM_FORMAT_MOD_BROADCOM_UIF);
+ } else if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
+ image->vk.wsi_legacy_scanout) {
tiling = VK_IMAGE_TILING_LINEAR;
+ }
- const struct v3dv_format *format = v3dv_get_format(pCreateInfo->format);
+ const struct v3dv_format *format =
+ v3dv_X(device, get_format)(pCreateInfo->format);
v3dv_assert(format != NULL && format->supported);
- image = vk_object_zalloc(&device->vk, pAllocator, sizeof(*image),
- VK_OBJECT_TYPE_IMAGE);
- if (!image)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
assert(pCreateInfo->samples == VK_SAMPLE_COUNT_1_BIT ||
pCreateInfo->samples == VK_SAMPLE_COUNT_4_BIT);
- image->type = pCreateInfo->imageType;
- image->extent = pCreateInfo->extent;
- image->vk_format = pCreateInfo->format;
image->format = format;
- image->aspects = vk_format_aspects(image->vk_format);
- image->levels = pCreateInfo->mipLevels;
- image->array_size = pCreateInfo->arrayLayers;
- image->samples = pCreateInfo->samples;
- image->usage = pCreateInfo->usage;
- image->flags = pCreateInfo->flags;
+ image->cpp = vk_format_get_blocksize(image->vk.format);
+ image->tiled = tiling == VK_IMAGE_TILING_OPTIMAL ||
+ (tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT &&
+ modifier != DRM_FORMAT_MOD_LINEAR);
- image->drm_format_mod = modifier;
- image->tiling = tiling;
- image->tiled = tiling == VK_IMAGE_TILING_OPTIMAL;
+ image->vk.tiling = tiling;
+ image->vk.drm_format_mod = modifier;
- image->cpp = vk_format_get_blocksize(image->vk_format);
+ /* Our meta paths can create image views with compatible formats for any
+ * image, so always set this flag to keep the common Vulkan image code
+ * happy.
+ */
+ image->vk.create_flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
v3d_setup_slices(image);
@@ -342,7 +326,71 @@ v3dv_CreateImage(VkDevice _device,
return VK_SUCCESS;
}
-void
+static VkResult
+create_image_from_swapchain(struct v3dv_device *device,
+ const VkImageCreateInfo *pCreateInfo,
+ const VkImageSwapchainCreateInfoKHR *swapchain_info,
+ const VkAllocationCallbacks *pAllocator,
+ VkImage *pImage)
+{
+ struct v3dv_image *swapchain_image =
+ v3dv_wsi_get_image_from_swapchain(swapchain_info->swapchain, 0);
+ assert(swapchain_image);
+
+ VkImageCreateInfo local_create_info = *pCreateInfo;
+ local_create_info.pNext = NULL;
+
+ /* Added by wsi code. */
+ local_create_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+
+ /* The spec requires TILING_OPTIMAL as input, but the swapchain image may
+ * privately use a different tiling. See spec anchor
+ * #swapchain-wsi-image-create-info .
+ */
+ assert(local_create_info.tiling == VK_IMAGE_TILING_OPTIMAL);
+ local_create_info.tiling = swapchain_image->vk.tiling;
+
+ VkImageDrmFormatModifierListCreateInfoEXT local_modifier_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT,
+ .drmFormatModifierCount = 1,
+ .pDrmFormatModifiers = &swapchain_image->vk.drm_format_mod,
+ };
+
+ if (swapchain_image->vk.drm_format_mod != DRM_FORMAT_MOD_INVALID)
+ __vk_append_struct(&local_create_info, &local_modifier_info);
+
+ assert(swapchain_image->vk.image_type == local_create_info.imageType);
+ assert(swapchain_image->vk.format == local_create_info.format);
+ assert(swapchain_image->vk.extent.width == local_create_info.extent.width);
+ assert(swapchain_image->vk.extent.height == local_create_info.extent.height);
+ assert(swapchain_image->vk.extent.depth == local_create_info.extent.depth);
+ assert(swapchain_image->vk.array_layers == local_create_info.arrayLayers);
+ assert(swapchain_image->vk.samples == local_create_info.samples);
+ assert(swapchain_image->vk.tiling == local_create_info.tiling);
+ assert((swapchain_image->vk.usage & local_create_info.usage) ==
+ local_create_info.usage);
+
+ return create_image(device, &local_create_info, pAllocator, pImage);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+v3dv_CreateImage(VkDevice _device,
+ const VkImageCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkImage *pImage)
+{
+ V3DV_FROM_HANDLE(v3dv_device, device, _device);
+
+ const VkImageSwapchainCreateInfoKHR *swapchain_info =
+ vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR);
+ if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE)
+ return create_image_from_swapchain(device, pCreateInfo, swapchain_info,
+ pAllocator, pImage);
+
+ return create_image(device, pCreateInfo, pAllocator, pImage);
+}
+
+VKAPI_ATTR void VKAPI_CALL
v3dv_GetImageSubresourceLayout(VkDevice device,
VkImage _image,
const VkImageSubresource *subresource,
@@ -358,7 +406,7 @@ v3dv_GetImageSubresourceLayout(VkDevice device,
layout->depthPitch = image->cube_map_stride;
layout->arrayPitch = image->cube_map_stride;
- if (image->type != VK_IMAGE_TYPE_3D) {
+ if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
layout->size = slice->size;
} else {
/* For 3D images, the size of the slice represents the size of a 2D slice
@@ -368,7 +416,7 @@ v3dv_GetImageSubresourceLayout(VkDevice device,
* arranged in memory from last to first).
*/
if (subresource->mipLevel == 0) {
- layout->size = slice->size * image->extent.depth;
+ layout->size = slice->size * image->vk.extent.depth;
} else {
const struct v3d_resource_slice *prev_slice =
&image->slices[subresource->mipLevel - 1];
@@ -377,23 +425,7 @@ v3dv_GetImageSubresourceLayout(VkDevice device,
}
}
-VkResult
-v3dv_GetImageDrmFormatModifierPropertiesEXT(
- VkDevice device,
- VkImage _image,
- VkImageDrmFormatModifierPropertiesEXT *pProperties)
-{
- V3DV_FROM_HANDLE(v3dv_image, image, _image);
-
- assert(pProperties->sType ==
- VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT);
-
- pProperties->drmFormatModifier = image->drm_format_mod;
-
- return VK_SUCCESS;
-}
-
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyImage(VkDevice _device,
VkImage _image,
const VkAllocationCallbacks* pAllocator)
@@ -404,7 +436,7 @@ v3dv_DestroyImage(VkDevice _device,
if (image == NULL)
return;
- vk_object_free(&device->vk, pAllocator, image);
+ vk_image_destroy(&device->vk, pAllocator, &image->vk);
}
VkImageViewType
@@ -419,138 +451,10 @@ v3dv_image_type_to_view_type(VkImageType type)
}
}
-/*
- * This method translates pipe_swizzle to the swizzle values used at the
- * packet TEXTURE_SHADER_STATE
- *
- * FIXME: C&P from v3d, common place?
- */
-static uint32_t
-translate_swizzle(unsigned char pipe_swizzle)
-{
- switch (pipe_swizzle) {
- case PIPE_SWIZZLE_0:
- return 0;
- case PIPE_SWIZZLE_1:
- return 1;
- case PIPE_SWIZZLE_X:
- case PIPE_SWIZZLE_Y:
- case PIPE_SWIZZLE_Z:
- case PIPE_SWIZZLE_W:
- return 2 + pipe_swizzle;
- default:
- unreachable("unknown swizzle");
- }
-}
-
-/*
- * Packs and ensure bo for the shader state (the latter can be temporal).
- */
-static void
-pack_texture_shader_state_helper(struct v3dv_device *device,
- struct v3dv_image_view *image_view,
- bool for_cube_map_array_storage)
-{
- assert(!for_cube_map_array_storage ||
- image_view->type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY);
- const uint32_t index = for_cube_map_array_storage ? 1 : 0;
-
- assert(image_view->image);
- const struct v3dv_image *image = image_view->image;
-
- assert(image->samples == VK_SAMPLE_COUNT_1_BIT ||
- image->samples == VK_SAMPLE_COUNT_4_BIT);
- const uint32_t msaa_scale = image->samples == VK_SAMPLE_COUNT_1_BIT ? 1 : 2;
-
- v3dv_pack(image_view->texture_shader_state[index], TEXTURE_SHADER_STATE, tex) {
-
- tex.level_0_is_strictly_uif =
- (image->slices[0].tiling == VC5_TILING_UIF_XOR ||
- image->slices[0].tiling == VC5_TILING_UIF_NO_XOR);
-
- tex.level_0_xor_enable = (image->slices[0].tiling == VC5_TILING_UIF_XOR);
-
- if (tex.level_0_is_strictly_uif)
- tex.level_0_ub_pad = image->slices[0].ub_pad;
-
- /* FIXME: v3d never sets uif_xor_disable, but uses it on the following
- * check so let's set the default value
- */
- tex.uif_xor_disable = false;
- if (tex.uif_xor_disable ||
- tex.level_0_is_strictly_uif) {
- tex.extended = true;
- }
-
- tex.base_level = image_view->base_level;
- tex.max_level = image_view->max_level;
-
- tex.swizzle_r = translate_swizzle(image_view->swizzle[0]);
- tex.swizzle_g = translate_swizzle(image_view->swizzle[1]);
- tex.swizzle_b = translate_swizzle(image_view->swizzle[2]);
- tex.swizzle_a = translate_swizzle(image_view->swizzle[3]);
-
- tex.texture_type = image_view->format->tex_type;
-
- if (image->type == VK_IMAGE_TYPE_3D) {
- tex.image_depth = image->extent.depth;
- } else {
- tex.image_depth = (image_view->last_layer - image_view->first_layer) + 1;
- }
-
- /* Empirical testing with CTS shows that when we are sampling from cube
- * arrays we want to set image depth to layers / 6, but not when doing
- * image load/store.
- */
- if (image_view->type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY &&
- !for_cube_map_array_storage) {
- assert(tex.image_depth % 6 == 0);
- tex.image_depth /= 6;
- }
-
- tex.image_height = image->extent.height * msaa_scale;
- tex.image_width = image->extent.width * msaa_scale;
-
- /* On 4.x, the height of a 1D texture is redefined to be the
- * upper 14 bits of the width (which is only usable with txf).
- */
- if (image->type == VK_IMAGE_TYPE_1D) {
- tex.image_height = tex.image_width >> 14;
- }
- tex.image_width &= (1 << 14) - 1;
- tex.image_height &= (1 << 14) - 1;
-
- tex.array_stride_64_byte_aligned = image->cube_map_stride / 64;
-
- tex.srgb = vk_format_is_srgb(image_view->vk_format);
-
- /* At this point we don't have the job. That's the reason the first
- * parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to
- * add the bo to the job. This also means that we need to add manually
- * the image bo to the job using the texture.
- */
- const uint32_t base_offset =
- image->mem->bo->offset +
- v3dv_layer_offset(image, 0, image_view->first_layer);
- tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset);
- }
-}
-
-static void
-pack_texture_shader_state(struct v3dv_device *device,
- struct v3dv_image_view *iview)
-{
- pack_texture_shader_state_helper(device, iview, false);
- if (iview->type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
- pack_texture_shader_state_helper(device, iview, true);
-}
-
static enum pipe_swizzle
-vk_component_mapping_to_pipe_swizzle(VkComponentSwizzle comp,
- VkComponentSwizzle swz)
+vk_component_mapping_to_pipe_swizzle(VkComponentSwizzle swz)
{
- if (swz == VK_COMPONENT_SWIZZLE_IDENTITY)
- swz = comp;
+ assert(swz != VK_COMPONENT_SWIZZLE_IDENTITY);
switch (swz) {
case VK_COMPONENT_SWIZZLE_ZERO:
@@ -570,7 +474,7 @@ vk_component_mapping_to_pipe_swizzle(VkComponentSwizzle comp,
};
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateImageView(VkDevice _device,
const VkImageViewCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
@@ -580,56 +484,15 @@ v3dv_CreateImageView(VkDevice _device,
V3DV_FROM_HANDLE(v3dv_image, image, pCreateInfo->image);
struct v3dv_image_view *iview;
- iview = vk_object_zalloc(&device->vk, pAllocator, sizeof(*iview),
- VK_OBJECT_TYPE_IMAGE_VIEW);
+ iview = vk_image_view_create(&device->vk, pCreateInfo, pAllocator,
+ sizeof(*iview));
if (iview == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
- assert(range->layerCount > 0);
- assert(range->baseMipLevel < image->levels);
-
-#ifdef DEBUG
- switch (image->type) {
- case VK_IMAGE_TYPE_1D:
- case VK_IMAGE_TYPE_2D:
- assert(range->baseArrayLayer + v3dv_layer_count(image, range) - 1 <=
- image->array_size);
- break;
- case VK_IMAGE_TYPE_3D:
- assert(range->baseArrayLayer + v3dv_layer_count(image, range) - 1
- <= u_minify(image->extent.depth, range->baseMipLevel));
- /* VK_KHR_maintenance1 */
- assert(pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D ||
- ((image->flags & VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT) &&
- range->levelCount == 1 && range->layerCount == 1));
- assert(pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D_ARRAY ||
- ((image->flags & VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT) &&
- range->levelCount == 1));
- break;
- default:
- unreachable("bad VkImageType");
- }
-#endif
-
- iview->image = image;
- iview->aspects = range->aspectMask;
- iview->type = pCreateInfo->viewType;
-
- iview->base_level = range->baseMipLevel;
- iview->max_level = iview->base_level + v3dv_level_count(image, range) - 1;
- iview->extent = (VkExtent3D) {
- .width = u_minify(image->extent.width , iview->base_level),
- .height = u_minify(image->extent.height, iview->base_level),
- .depth = u_minify(image->extent.depth , iview->base_level),
- };
-
- iview->first_layer = range->baseArrayLayer;
- iview->last_layer = range->baseArrayLayer +
- v3dv_layer_count(image, range) - 1;
- iview->offset =
- v3dv_layer_offset(image, iview->base_level, iview->first_layer);
+ iview->offset = v3dv_layer_offset(image, iview->vk.base_mip_level,
+ iview->vk.base_array_layer);
/* If we have D24S8 format but the view only selects the stencil aspect
* we want to re-interpret the format as RGBA8_UINT, then map our stencil
@@ -653,44 +516,40 @@ v3dv_CreateImageView(VkDevice _device,
* better to reimplement the latter using vk component
*/
image_view_swizzle[0] =
- vk_component_mapping_to_pipe_swizzle(VK_COMPONENT_SWIZZLE_R,
- pCreateInfo->components.r);
+ vk_component_mapping_to_pipe_swizzle(iview->vk.swizzle.r);
image_view_swizzle[1] =
- vk_component_mapping_to_pipe_swizzle(VK_COMPONENT_SWIZZLE_G,
- pCreateInfo->components.g);
+ vk_component_mapping_to_pipe_swizzle(iview->vk.swizzle.g);
image_view_swizzle[2] =
- vk_component_mapping_to_pipe_swizzle(VK_COMPONENT_SWIZZLE_B,
- pCreateInfo->components.b);
+ vk_component_mapping_to_pipe_swizzle(iview->vk.swizzle.b);
image_view_swizzle[3] =
- vk_component_mapping_to_pipe_swizzle(VK_COMPONENT_SWIZZLE_A,
- pCreateInfo->components.a);
+ vk_component_mapping_to_pipe_swizzle(iview->vk.swizzle.a);
}
- iview->vk_format = format;
- iview->format = v3dv_get_format(format);
+ iview->vk.format = format;
+ iview->format = v3dv_X(device, get_format)(format);
assert(iview->format && iview->format->supported);
- if (vk_format_is_depth_or_stencil(iview->vk_format)) {
- iview->internal_type = v3dv_get_internal_depth_type(iview->vk_format);
+ if (vk_format_is_depth_or_stencil(iview->vk.format)) {
+ iview->internal_type =
+ v3dv_X(device, get_internal_depth_type)(iview->vk.format);
} else {
- v3dv_get_internal_type_bpp_for_output_format(iview->format->rt_type,
- &iview->internal_type,
- &iview->internal_bpp);
+ v3dv_X(device, get_internal_type_bpp_for_output_format)
+ (iview->format->rt_type, &iview->internal_type, &iview->internal_bpp);
}
- const uint8_t *format_swizzle = v3dv_get_format_swizzle(format);
+ const uint8_t *format_swizzle = v3dv_get_format_swizzle(device, format);
util_format_compose_swizzles(format_swizzle, image_view_swizzle,
iview->swizzle);
iview->swap_rb = iview->swizzle[0] == PIPE_SWIZZLE_Z;
- pack_texture_shader_state(device, iview);
+ v3dv_X(device, pack_texture_shader_state)(device, iview);
*pView = v3dv_image_view_to_handle(iview);
return VK_SUCCESS;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyImageView(VkDevice _device,
VkImageView imageView,
const VkAllocationCallbacks* pAllocator)
@@ -701,52 +560,10 @@ v3dv_DestroyImageView(VkDevice _device,
if (image_view == NULL)
return;
- vk_object_free(&device->vk, pAllocator, image_view);
-}
-
-static void
-pack_texture_shader_state_from_buffer_view(struct v3dv_device *device,
- struct v3dv_buffer_view *buffer_view)
-{
- assert(buffer_view->buffer);
- const struct v3dv_buffer *buffer = buffer_view->buffer;
-
- v3dv_pack(buffer_view->texture_shader_state, TEXTURE_SHADER_STATE, tex) {
- tex.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X);
- tex.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y);
- tex.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z);
- tex.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W);
-
- tex.image_depth = 1;
-
- /* On 4.x, the height of a 1D texture is redefined to be the upper 14
- * bits of the width (which is only usable with txf) (or in other words,
- * we are providing a 28 bit field for size, but split on the usual
- * 14bit height/width).
- */
- tex.image_width = buffer_view->num_elements;
- tex.image_height = tex.image_width >> 14;
- tex.image_width &= (1 << 14) - 1;
- tex.image_height &= (1 << 14) - 1;
-
- tex.texture_type = buffer_view->format->tex_type;
- tex.srgb = vk_format_is_srgb(buffer_view->vk_format);
-
- /* At this point we don't have the job. That's the reason the first
- * parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to
- * add the bo to the job. This also means that we need to add manually
- * the image bo to the job using the texture.
- */
- const uint32_t base_offset =
- buffer->mem->bo->offset +
- buffer->mem_offset +
- buffer_view->offset;
-
- tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset);
- }
+ vk_image_view_destroy(&device->vk, pAllocator, &image_view->vk);
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateBufferView(VkDevice _device,
const VkBufferViewCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
@@ -754,14 +571,14 @@ v3dv_CreateBufferView(VkDevice _device,
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
- const struct v3dv_buffer *buffer =
+ struct v3dv_buffer *buffer =
v3dv_buffer_from_handle(pCreateInfo->buffer);
struct v3dv_buffer_view *view =
vk_object_zalloc(&device->vk, pAllocator, sizeof(*view),
VK_OBJECT_TYPE_BUFFER_VIEW);
if (!view)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
uint32_t range;
if (pCreateInfo->range == VK_WHOLE_SIZE)
@@ -777,22 +594,21 @@ v3dv_CreateBufferView(VkDevice _device,
view->size = view->offset + range;
view->num_elements = num_elements;
view->vk_format = pCreateInfo->format;
- view->format = v3dv_get_format(view->vk_format);
+ view->format = v3dv_X(device, get_format)(view->vk_format);
- v3dv_get_internal_type_bpp_for_output_format(view->format->rt_type,
- &view->internal_type,
- &view->internal_bpp);
+ v3dv_X(device, get_internal_type_bpp_for_output_format)
+ (view->format->rt_type, &view->internal_type, &view->internal_bpp);
if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT ||
buffer->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT)
- pack_texture_shader_state_from_buffer_view(device, view);
+ v3dv_X(device, pack_texture_shader_state_from_buffer_view)(device, view);
*pView = v3dv_buffer_view_to_handle(view);
return VK_SUCCESS;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyBufferView(VkDevice _device,
VkBufferView bufferView,
const VkAllocationCallbacks *pAllocator)
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_limits.h b/lib/mesa/src/broadcom/vulkan/v3dv_limits.h
index a5ddb66e4..aaab1ce03 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_limits.h
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_limits.h
@@ -44,7 +44,7 @@
#define MAX_INPUT_ATTACHMENTS 4
#define MAX_UNIFORM_BUFFERS 12
-#define MAX_STORAGE_BUFFERS 4
+#define MAX_STORAGE_BUFFERS 8
#define MAX_DYNAMIC_UNIFORM_BUFFERS 8
#define MAX_DYNAMIC_STORAGE_BUFFERS 4
@@ -53,21 +53,22 @@
#define MAX_RENDER_TARGETS 4
+#define MAX_MULTIVIEW_VIEW_COUNT 16
+
/* These are tunable parameters in the HW design, but all the V3D
* implementations agree.
*/
-#define VC5_UIFCFG_BANKS 8
-#define VC5_UIFCFG_PAGE_SIZE 4096
-#define VC5_UIFCFG_XOR_VALUE (1 << 4)
-#define VC5_PAGE_CACHE_SIZE (VC5_UIFCFG_PAGE_SIZE * VC5_UIFCFG_BANKS)
-#define VC5_UBLOCK_SIZE 64
-#define VC5_UIFBLOCK_SIZE (4 * VC5_UBLOCK_SIZE)
-#define VC5_UIFBLOCK_ROW_SIZE (4 * VC5_UIFBLOCK_SIZE)
+#define V3D_UIFCFG_BANKS 8
+#define V3D_UIFCFG_PAGE_SIZE 4096
+#define V3D_UIFCFG_XOR_VALUE (1 << 4)
+#define V3D_PAGE_CACHE_SIZE (V3D_UIFCFG_PAGE_SIZE * V3D_UIFCFG_BANKS)
+#define V3D_UBLOCK_SIZE 64
+#define V3D_UIFBLOCK_SIZE (4 * V3D_UBLOCK_SIZE)
+#define V3D_UIFBLOCK_ROW_SIZE (4 * V3D_UIFBLOCK_SIZE)
-#define PAGE_UB_ROWS (VC5_UIFCFG_PAGE_SIZE / VC5_UIFBLOCK_ROW_SIZE)
+#define PAGE_UB_ROWS (V3D_UIFCFG_PAGE_SIZE / V3D_UIFBLOCK_ROW_SIZE)
#define PAGE_UB_ROWS_TIMES_1_5 ((PAGE_UB_ROWS * 3) >> 1)
-#define PAGE_CACHE_UB_ROWS (VC5_PAGE_CACHE_SIZE / VC5_UIFBLOCK_ROW_SIZE)
+#define PAGE_CACHE_UB_ROWS (V3D_PAGE_CACHE_SIZE / V3D_UIFBLOCK_ROW_SIZE)
#define PAGE_CACHE_MINUS_1_5_UB_ROWS (PAGE_CACHE_UB_ROWS - PAGE_UB_ROWS_TIMES_1_5)
-
#endif /* V3DV_LIMITS_H */
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_meta_clear.c b/lib/mesa/src/broadcom/vulkan/v3dv_meta_clear.c
index 0a38edb21..5555c690b 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_meta_clear.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_meta_clear.c
@@ -22,13 +22,175 @@
*/
#include "v3dv_private.h"
+#include "v3dv_meta_common.h"
-#include "broadcom/cle/v3dx_pack.h"
#include "compiler/nir/nir_builder.h"
#include "vk_format_info.h"
#include "util/u_pack_color.h"
static void
+get_hw_clear_color(struct v3dv_device *device,
+ const VkClearColorValue *color,
+ VkFormat fb_format,
+ VkFormat image_format,
+ uint32_t internal_type,
+ uint32_t internal_bpp,
+ uint32_t *hw_color)
+{
+ const uint32_t internal_size = 4 << internal_bpp;
+
+ /* If the image format doesn't match the framebuffer format, then we are
+ * trying to clear an unsupported tlb format using a compatible
+ * format for the framebuffer. In this case, we want to make sure that
+ * we pack the clear value according to the original format semantics,
+ * not the compatible format.
+ */
+ if (fb_format == image_format) {
+ v3dv_X(device, get_hw_clear_color)(color, internal_type, internal_size,
+ hw_color);
+ } else {
+ union util_color uc;
+ enum pipe_format pipe_image_format =
+ vk_format_to_pipe_format(image_format);
+ util_pack_color(color->float32, pipe_image_format, &uc);
+ memcpy(hw_color, uc.ui, internal_size);
+ }
+}
+
+/* Returns true if the implementation is able to handle the case, false
+ * otherwise.
+*/
+static bool
+clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_image *image,
+ const VkClearValue *clear_value,
+ const VkImageSubresourceRange *range)
+{
+ const VkOffset3D origin = { 0, 0, 0 };
+ VkFormat fb_format;
+ if (!v3dv_meta_can_use_tlb(image, &origin, &fb_format))
+ return false;
+
+ uint32_t internal_type, internal_bpp;
+ v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
+ (fb_format, range->aspectMask,
+ &internal_type, &internal_bpp);
+
+ union v3dv_clear_value hw_clear_value = { 0 };
+ if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
+ get_hw_clear_color(cmd_buffer->device, &clear_value->color, fb_format,
+ image->vk.format, internal_type, internal_bpp,
+ &hw_clear_value.color[0]);
+ } else {
+ assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) ||
+ (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT));
+ hw_clear_value.z = clear_value->depthStencil.depth;
+ hw_clear_value.s = clear_value->depthStencil.stencil;
+ }
+
+ uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
+ uint32_t min_level = range->baseMipLevel;
+ uint32_t max_level = range->baseMipLevel + level_count;
+
+ /* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively.
+ * Instead, we need to consider the full depth dimension of the image, which
+ * goes from 0 up to the level's depth extent.
+ */
+ uint32_t min_layer;
+ uint32_t max_layer;
+ if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
+ min_layer = range->baseArrayLayer;
+ max_layer = range->baseArrayLayer +
+ vk_image_subresource_layer_count(&image->vk, range);
+ } else {
+ min_layer = 0;
+ max_layer = 0;
+ }
+
+ for (uint32_t level = min_level; level < max_level; level++) {
+ if (image->vk.image_type == VK_IMAGE_TYPE_3D)
+ max_layer = u_minify(image->vk.extent.depth, level);
+
+ uint32_t width = u_minify(image->vk.extent.width, level);
+ uint32_t height = u_minify(image->vk.extent.height, level);
+
+ struct v3dv_job *job =
+ v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
+
+ if (!job)
+ return true;
+
+ v3dv_job_start_frame(job, width, height, max_layer, false,
+ 1, internal_bpp,
+ image->vk.samples > VK_SAMPLE_COUNT_1_BIT);
+
+ struct v3dv_meta_framebuffer framebuffer;
+ v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
+ internal_type,
+ &job->frame_tiling);
+
+ v3dv_X(job->device, job_emit_binning_flush)(job);
+
+ /* If this triggers it is an application bug: the spec requires
+ * that any aspects to clear are present in the image.
+ */
+ assert(range->aspectMask & image->vk.aspects);
+
+ v3dv_X(job->device, meta_emit_clear_image_rcl)
+ (job, image, &framebuffer, &hw_clear_value,
+ range->aspectMask, min_layer, max_layer, level);
+
+ v3dv_cmd_buffer_finish_job(cmd_buffer);
+ }
+
+ return true;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,
+ VkImage _image,
+ VkImageLayout imageLayout,
+ const VkClearColorValue *pColor,
+ uint32_t rangeCount,
+ const VkImageSubresourceRange *pRanges)
+{
+ V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
+ V3DV_FROM_HANDLE(v3dv_image, image, _image);
+
+ const VkClearValue clear_value = {
+ .color = *pColor,
+ };
+
+ for (uint32_t i = 0; i < rangeCount; i++) {
+ if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
+ continue;
+ unreachable("Unsupported color clear.");
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
+ VkImage _image,
+ VkImageLayout imageLayout,
+ const VkClearDepthStencilValue *pDepthStencil,
+ uint32_t rangeCount,
+ const VkImageSubresourceRange *pRanges)
+{
+ V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
+ V3DV_FROM_HANDLE(v3dv_image, image, _image);
+
+ const VkClearValue clear_value = {
+ .depthStencil = *pDepthStencil,
+ };
+
+ for (uint32_t i = 0; i < rangeCount; i++) {
+ if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
+ continue;
+ unreachable("Unsupported depth/stencil clear.");
+ }
+}
+
+static void
destroy_color_clear_pipeline(VkDevice _device,
uint64_t pipeline,
VkAllocationCallbacks *alloc)
@@ -54,12 +216,20 @@ static VkResult
create_color_clear_pipeline_layout(struct v3dv_device *device,
VkPipelineLayout *pipeline_layout)
{
+ /* FIXME: this is abusing a bit the API, since not all of our clear
+ * pipelines have a geometry shader. We could create 2 different pipeline
+ * layouts, but this works for us for now.
+ */
+ VkPushConstantRange ranges[2] = {
+ { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16 },
+ { VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4 },
+ };
+
VkPipelineLayoutCreateInfo info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 0,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges =
- &(VkPushConstantRange) { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16 },
+ .pushConstantRangeCount = 2,
+ .pPushConstantRanges = ranges,
};
return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
@@ -70,12 +240,20 @@ static VkResult
create_depth_clear_pipeline_layout(struct v3dv_device *device,
VkPipelineLayout *pipeline_layout)
{
+ /* FIXME: this is abusing a bit the API, since not all of our clear
+ * pipelines have a geometry shader. We could create 2 different pipeline
+ * layouts, but this works for us for now.
+ */
+ VkPushConstantRange ranges[2] = {
+ { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4 },
+ { VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4 },
+ };
+
VkPipelineLayoutCreateInfo info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 0,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges =
- &(VkPushConstantRange) { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4 },
+ .pushConstantRangeCount = 2,
+ .pPushConstantRanges = ranges
};
return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
@@ -178,6 +356,70 @@ get_clear_rect_vs()
}
static nir_shader *
+get_clear_rect_gs(uint32_t push_constant_layer_base)
+{
+ /* FIXME: this creates a geometry shader that takes the index of a single
+ * layer to clear from push constants, so we need to emit a draw call for
+ * each layer that we want to clear. We could actually do better and have it
+ * take a range of layers and then emit one triangle per layer to clear,
+ * however, if we were to do this we would need to be careful not to exceed
+ * the maximum number of output vertices allowed in a geometry shader.
+ */
+ const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
+ "meta clear gs");
+ nir_shader *nir = b.shader;
+ nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
+ nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
+ (1ull << VARYING_SLOT_LAYER);
+ nir->info.gs.input_primitive = GL_TRIANGLES;
+ nir->info.gs.output_primitive = GL_TRIANGLE_STRIP;
+ nir->info.gs.vertices_in = 3;
+ nir->info.gs.vertices_out = 3;
+ nir->info.gs.invocations = 1;
+ nir->info.gs.active_stream_mask = 0x1;
+
+ /* in vec4 gl_Position[3] */
+ nir_variable *gs_in_pos =
+ nir_variable_create(b.shader, nir_var_shader_in,
+ glsl_array_type(glsl_vec4_type(), 3, 0),
+ "in_gl_Position");
+ gs_in_pos->data.location = VARYING_SLOT_POS;
+
+ /* out vec4 gl_Position */
+ nir_variable *gs_out_pos =
+ nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
+ "out_gl_Position");
+ gs_out_pos->data.location = VARYING_SLOT_POS;
+
+ /* out float gl_Layer */
+ nir_variable *gs_out_layer =
+ nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
+ "out_gl_Layer");
+ gs_out_layer->data.location = VARYING_SLOT_LAYER;
+
+ /* Emit output triangle */
+ for (uint32_t i = 0; i < 3; i++) {
+ /* gl_Position from shader input */
+ nir_deref_instr *in_pos_i =
+ nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
+ nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
+
+ /* gl_Layer from push constants */
+ nir_ssa_def *layer =
+ nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
+ .base = push_constant_layer_base, .range = 4);
+ nir_store_var(&b, gs_out_layer, layer, 0x1);
+
+ nir_emit_vertex(&b, 0);
+ }
+
+ nir_end_primitive(&b, 0);
+
+ return nir;
+}
+
+static nir_shader *
get_color_clear_rect_fs(uint32_t rt_idx, VkFormat format)
{
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
@@ -224,6 +466,7 @@ create_pipeline(struct v3dv_device *device,
uint32_t subpass_idx,
uint32_t samples,
struct nir_shader *vs_nir,
+ struct nir_shader *gs_nir,
struct nir_shader *fs_nir,
const VkPipelineVertexInputStateCreateInfo *vi_state,
const VkPipelineDepthStencilStateCreateInfo *ds_state,
@@ -231,32 +474,41 @@ create_pipeline(struct v3dv_device *device,
const VkPipelineLayout layout,
VkPipeline *pipeline)
{
+ VkPipelineShaderStageCreateInfo stages[3] = { 0 };
struct vk_shader_module vs_m;
+ struct vk_shader_module gs_m;
struct vk_shader_module fs_m;
+ uint32_t stage_count = 0;
v3dv_shader_module_internal_init(device, &vs_m, vs_nir);
- if (fs_nir)
- v3dv_shader_module_internal_init(device, &fs_m, fs_nir);
+ stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+ stages[stage_count].stage = VK_SHADER_STAGE_VERTEX_BIT;
+ stages[stage_count].module = vk_shader_module_to_handle(&vs_m);
+ stages[stage_count].pName = "main";
+ stage_count++;
+
+ if (gs_nir) {
+ v3dv_shader_module_internal_init(device, &gs_m, gs_nir);
+ stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+ stages[stage_count].stage = VK_SHADER_STAGE_GEOMETRY_BIT;
+ stages[stage_count].module = vk_shader_module_to_handle(&gs_m);
+ stages[stage_count].pName = "main";
+ stage_count++;
+ }
- VkPipelineShaderStageCreateInfo stages[2] = {
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_VERTEX_BIT,
- .module = vk_shader_module_to_handle(&vs_m),
- .pName = "main",
- },
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
- .module = fs_nir ? vk_shader_module_to_handle(&fs_m) : VK_NULL_HANDLE,
- .pName = "main",
- },
- };
+ if (fs_nir) {
+ v3dv_shader_module_internal_init(device, &fs_m, fs_nir);
+ stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+ stages[stage_count].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
+ stages[stage_count].module = vk_shader_module_to_handle(&fs_m);
+ stages[stage_count].pName = "main";
+ stage_count++;
+ }
VkGraphicsPipelineCreateInfo info = {
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .stageCount = fs_nir ? 2 : 1,
+ .stageCount = stage_count,
.pStages = stages,
.pVertexInputState = vi_state,
@@ -342,11 +594,13 @@ create_color_clear_pipeline(struct v3dv_device *device,
VkFormat format,
uint32_t samples,
uint32_t components,
+ bool is_layered,
VkPipelineLayout pipeline_layout,
VkPipeline *pipeline)
{
nir_shader *vs_nir = get_clear_rect_vs();
nir_shader *fs_nir = get_color_clear_rect_fs(rt_idx, format);
+ nir_shader *gs_nir = is_layered ? get_clear_rect_gs(16) : NULL;
const VkPipelineVertexInputStateCreateInfo vi_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
@@ -384,7 +638,7 @@ create_color_clear_pipeline(struct v3dv_device *device,
return create_pipeline(device,
pass, subpass_idx,
samples,
- vs_nir, fs_nir,
+ vs_nir, gs_nir, fs_nir,
&vi_state,
&ds_state,
&cb_state,
@@ -398,6 +652,7 @@ create_depth_clear_pipeline(struct v3dv_device *device,
struct v3dv_render_pass *pass,
uint32_t subpass_idx,
uint32_t samples,
+ bool is_layered,
VkPipelineLayout pipeline_layout,
VkPipeline *pipeline)
{
@@ -407,6 +662,7 @@ create_depth_clear_pipeline(struct v3dv_device *device,
nir_shader *vs_nir = get_clear_rect_vs();
nir_shader *fs_nir = has_depth ? get_depth_clear_rect_fs() : NULL;
+ nir_shader *gs_nir = is_layered ? get_clear_rect_gs(4) : NULL;
const VkPipelineVertexInputStateCreateInfo vi_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
@@ -441,7 +697,7 @@ create_depth_clear_pipeline(struct v3dv_device *device,
return create_pipeline(device,
pass, subpass_idx,
samples,
- vs_nir, fs_nir,
+ vs_nir, gs_nir, fs_nir,
&vi_state,
&ds_state,
&cb_state,
@@ -499,7 +755,8 @@ static inline uint64_t
get_color_clear_pipeline_cache_key(uint32_t rt_idx,
VkFormat format,
uint32_t samples,
- uint32_t components)
+ uint32_t components,
+ bool is_layered)
{
assert(rt_idx < V3D_MAX_DRAW_BUFFERS);
@@ -518,6 +775,9 @@ get_color_clear_pipeline_cache_key(uint32_t rt_idx,
key |= ((uint64_t) components) << bit_offset;
bit_offset += 4;
+ key |= (is_layered ? 1ull : 0ull) << bit_offset;
+ bit_offset += 1;
+
assert(bit_offset <= 64);
return key;
}
@@ -525,7 +785,8 @@ get_color_clear_pipeline_cache_key(uint32_t rt_idx,
static inline uint64_t
get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,
VkFormat format,
- uint32_t samples)
+ uint32_t samples,
+ bool is_layered)
{
uint64_t key = 0;
uint32_t bit_offset = 0;
@@ -544,6 +805,9 @@ get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,
key |= ((uint64_t) has_stencil) << bit_offset;
bit_offset++;;
+ key |= (is_layered ? 1ull : 0ull) << bit_offset;
+ bit_offset += 1;
+
assert(bit_offset <= 64);
return key;
}
@@ -557,6 +821,7 @@ get_color_clear_pipeline(struct v3dv_device *device,
VkFormat format,
uint32_t samples,
uint32_t components,
+ bool is_layered,
struct v3dv_meta_color_clear_pipeline **pipeline)
{
assert(vk_format_is_color(format));
@@ -580,8 +845,8 @@ get_color_clear_pipeline(struct v3dv_device *device,
uint64_t key;
if (can_cache_pipeline) {
- key =
- get_color_clear_pipeline_cache_key(rt_idx, format, samples, components);
+ key = get_color_clear_pipeline_cache_key(rt_idx, format, samples,
+ components, is_layered);
mtx_lock(&device->meta.mtx);
struct hash_entry *entry =
_mesa_hash_table_search(device->meta.color_clear.cache, &key);
@@ -621,6 +886,7 @@ get_color_clear_pipeline(struct v3dv_device *device,
format,
samples,
components,
+ is_layered,
device->meta.color_clear.p_layout,
&(*pipeline)->pipeline);
if (result != VK_SUCCESS)
@@ -660,6 +926,7 @@ get_depth_clear_pipeline(struct v3dv_device *device,
struct v3dv_render_pass *pass,
uint32_t subpass_idx,
uint32_t attachment_idx,
+ bool is_layered,
struct v3dv_meta_depth_clear_pipeline **pipeline)
{
assert(subpass_idx < pass->subpass_count);
@@ -673,7 +940,7 @@ get_depth_clear_pipeline(struct v3dv_device *device,
assert(vk_format_is_depth_or_stencil(format));
const uint64_t key =
- get_depth_clear_pipeline_cache_key(aspects, format, samples);
+ get_depth_clear_pipeline_cache_key(aspects, format, samples, is_layered);
mtx_lock(&device->meta.mtx);
struct hash_entry *entry =
_mesa_hash_table_search(device->meta.depth_clear.cache, &key);
@@ -696,6 +963,7 @@ get_depth_clear_pipeline(struct v3dv_device *device,
pass,
subpass_idx,
samples,
+ is_layered,
device->meta.depth_clear.p_layout,
&(*pipeline)->pipeline);
if (result != VK_SUCCESS)
@@ -722,272 +990,15 @@ fail:
return result;
}
-static VkFormat
-get_color_format_for_depth_stencil_format(VkFormat format)
-{
- /* For single depth/stencil aspect formats, we just choose a compatible
- * 1 channel format, but for combined depth/stencil we want an RGBA format
- * so we can specify the channels we want to write.
- */
- switch (format) {
- case VK_FORMAT_D16_UNORM:
- return VK_FORMAT_R16_UINT;
- case VK_FORMAT_D32_SFLOAT:
- return VK_FORMAT_R32_SFLOAT;
- case VK_FORMAT_X8_D24_UNORM_PACK32:
- case VK_FORMAT_D24_UNORM_S8_UINT:
- return VK_FORMAT_R8G8B8A8_UINT;
- default:
- unreachable("Unsupported depth/stencil format");
- };
-}
-
-/**
- * Emits a scissored quad in the clear color, however, unlike the subpass
- * versions, this creates its own framebuffer setup with a single color
- * attachment, and therefore spanws new jobs, making it much slower than the
- * subpass version.
- *
- * This path is only used when we have clears on layers other than the
- * base layer in a framebuffer attachment, since we don't currently
- * support any form of layered rendering that would allow us to implement
- * this in the subpass version.
- *
- * Notice this can also handle depth/stencil formats by rendering to the
- * depth/stencil target using a compatible color format.
- */
-static void
-emit_color_clear_rect(struct v3dv_cmd_buffer *cmd_buffer,
- uint32_t attachment_idx,
- VkFormat rt_format,
- uint32_t rt_samples,
- uint32_t rt_components,
- VkClearColorValue clear_color,
- const VkClearRect *rect)
-{
- assert(cmd_buffer->state.pass);
- struct v3dv_device *device = cmd_buffer->device;
- struct v3dv_render_pass *pass = cmd_buffer->state.pass;
-
- assert(attachment_idx != VK_ATTACHMENT_UNUSED &&
- attachment_idx < pass->attachment_count);
-
- struct v3dv_meta_color_clear_pipeline *pipeline = NULL;
- VkResult result =
- get_color_clear_pipeline(device,
- NULL, 0, /* Not using current subpass */
- 0, attachment_idx,
- rt_format, rt_samples, rt_components,
- &pipeline);
- if (result != VK_SUCCESS) {
- if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
- v3dv_flag_oom(cmd_buffer, NULL);
- return;
- }
- assert(pipeline && pipeline->pipeline && pipeline->pass);
-
- /* Since we are not emitting the draw call in the current subpass we should
- * be caching the clear pipeline and we don't have to take care of destorying
- * it below.
- */
- assert(pipeline->cached);
-
- /* Store command buffer state for the current subpass before we interrupt
- * it to emit the color clear pass and then finish the job for the
- * interrupted subpass.
- */
- v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
- v3dv_cmd_buffer_finish_job(cmd_buffer);
-
- struct v3dv_framebuffer *subpass_fb =
- v3dv_framebuffer_from_handle(cmd_buffer->state.meta.framebuffer);
- VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
- VkDevice device_handle = v3dv_device_to_handle(cmd_buffer->device);
-
- /* If we are clearing a depth/stencil attachment as a color attachment
- * then we need to configure the framebuffer to the compatible color
- * format.
- */
- const struct v3dv_image_view *att_iview =
- subpass_fb->attachments[attachment_idx];
- const bool is_depth_or_stencil =
- vk_format_is_depth_or_stencil(att_iview->vk_format);
-
- /* Emit the pass for each attachment layer, which creates a framebuffer
- * for each selected layer of the attachment and then renders a scissored
- * quad in the clear color.
- */
- uint32_t dirty_dynamic_state = 0;
- for (uint32_t i = 0; i < rect->layerCount; i++) {
- VkImageViewCreateInfo fb_layer_view_info = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = v3dv_image_to_handle((struct v3dv_image *)att_iview->image),
- .viewType =
- v3dv_image_type_to_view_type(att_iview->image->type),
- .format = is_depth_or_stencil ? rt_format : att_iview->vk_format,
- .subresourceRange = {
- .aspectMask = is_depth_or_stencil ? VK_IMAGE_ASPECT_COLOR_BIT :
- att_iview->aspects,
- .baseMipLevel = att_iview->base_level,
- .levelCount = att_iview->max_level - att_iview->base_level + 1,
- .baseArrayLayer = att_iview->first_layer + rect->baseArrayLayer + i,
- .layerCount = 1,
- },
- };
- VkImageView fb_attachment;
- result = v3dv_CreateImageView(v3dv_device_to_handle(device),
- &fb_layer_view_info,
- &device->vk.alloc, &fb_attachment);
- if (result != VK_SUCCESS)
- goto fail;
-
- v3dv_cmd_buffer_add_private_obj(
- cmd_buffer, (uintptr_t)fb_attachment,
- (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
-
- VkFramebufferCreateInfo fb_info = {
- .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
- .renderPass = v3dv_render_pass_to_handle(pass),
- .attachmentCount = 1,
- .pAttachments = &fb_attachment,
- .width = subpass_fb->width,
- .height = subpass_fb->height,
- .layers = 1,
- };
-
- VkFramebuffer fb;
- result = v3dv_CreateFramebuffer(device_handle, &fb_info,
- &cmd_buffer->device->vk.alloc, &fb);
- if (result != VK_SUCCESS)
- goto fail;
-
- v3dv_cmd_buffer_add_private_obj(
- cmd_buffer, (uintptr_t)fb,
- (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
-
- VkRenderPassBeginInfo rp_info = {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = pipeline->pass,
- .framebuffer = fb,
- .renderArea = {
- .offset = { rect->rect.offset.x, rect->rect.offset.y },
- .extent = { rect->rect.extent.width, rect->rect.extent.height } },
- .clearValueCount = 0,
- };
-
- v3dv_CmdBeginRenderPass(cmd_buffer_handle, &rp_info,
- VK_SUBPASS_CONTENTS_INLINE);
-
- struct v3dv_job *job = cmd_buffer->state.job;
- if (!job)
- goto fail;
- job->is_subpass_continue = true;
-
- v3dv_CmdPushConstants(cmd_buffer_handle,
- device->meta.color_clear.p_layout,
- VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
- &clear_color);
-
- v3dv_CmdBindPipeline(cmd_buffer_handle,
- VK_PIPELINE_BIND_POINT_GRAPHICS,
- pipeline->pipeline);
-
- const VkViewport viewport = {
- .x = rect->rect.offset.x,
- .y = rect->rect.offset.y,
- .width = rect->rect.extent.width,
- .height = rect->rect.extent.height,
- .minDepth = 0.0f,
- .maxDepth = 1.0f
- };
- v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
- v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rect->rect);
-
- v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
-
- v3dv_CmdEndRenderPass(cmd_buffer_handle);
- }
-
- /* The clear pipeline sets viewport and scissor state, so we need
- * to restore it
- */
- dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
-
-fail:
- v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true);
-}
-
-static void
-emit_ds_clear_rect(struct v3dv_cmd_buffer *cmd_buffer,
- VkImageAspectFlags aspects,
- uint32_t attachment_idx,
- VkClearDepthStencilValue clear_ds,
- const VkClearRect *rect)
-{
- assert(cmd_buffer->state.pass);
- assert(attachment_idx != VK_ATTACHMENT_UNUSED);
- assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
-
- VkFormat format =
- cmd_buffer->state.pass->attachments[attachment_idx].desc.format;
- assert ((aspects & ~vk_format_aspects(format)) == 0);
-
- uint32_t samples =
- cmd_buffer->state.pass->attachments[attachment_idx].desc.samples;
-
- enum pipe_format pformat = vk_format_to_pipe_format(format);
- VkClearColorValue clear_color;
- uint32_t clear_zs =
- util_pack_z_stencil(pformat, clear_ds.depth, clear_ds.stencil);
-
- /* We implement depth/stencil clears by turning them into color clears
- * with a compatible color format.
- */
- VkFormat color_format = get_color_format_for_depth_stencil_format(format);
-
- uint32_t comps;
- if (color_format == VK_FORMAT_R8G8B8A8_UINT) {
- /* We are clearing a D24 format so we need to select the channels that we
- * are being asked to clear to avoid clearing aspects that should be
- * preserved. Also, the hardware uses the MSB channels to store the D24
- * component, so we need to shift the components in the clear value to
- * match that.
- */
- comps = 0;
- if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
- comps |= VK_COLOR_COMPONENT_R_BIT;
- clear_color.uint32[0] = clear_zs >> 24;
- }
- if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
- comps |= VK_COLOR_COMPONENT_G_BIT |
- VK_COLOR_COMPONENT_B_BIT |
- VK_COLOR_COMPONENT_A_BIT;
- clear_color.uint32[1] = (clear_zs >> 0) & 0xff;
- clear_color.uint32[2] = (clear_zs >> 8) & 0xff;
- clear_color.uint32[3] = (clear_zs >> 16) & 0xff;
- }
- } else {
- /* For anything else we use a single component format */
- comps = VK_COLOR_COMPONENT_R_BIT;
- clear_color.uint32[0] = clear_zs;
- }
-
- emit_color_clear_rect(cmd_buffer, attachment_idx,
- color_format, samples, comps,
- clear_color, rect);
-}
-
-/* Emits a scissored quad in the clear color.
- *
- * This path only works for clears to the base layer in the framebuffer, since
- * we don't currently support any form of layered rendering.
- */
+/* Emits a scissored quad in the clear color */
static void
emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_render_pass *pass,
struct v3dv_subpass *subpass,
uint32_t rt_idx,
const VkClearColorValue *clear_color,
+ bool is_layered,
+ bool all_rects_same_layers,
uint32_t rect_count,
const VkClearRect *rects)
{
@@ -1016,6 +1027,7 @@ emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
format,
samples,
components,
+ is_layered,
&pipeline);
if (result != VK_SUCCESS) {
if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
@@ -1040,7 +1052,6 @@ emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
for (uint32_t i = 0; i < rect_count; i++) {
- assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
const VkViewport viewport = {
.x = rects[i].rect.offset.x,
.y = rects[i].rect.offset.y,
@@ -1051,7 +1062,20 @@ emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
};
v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
- v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
+
+ if (is_layered) {
+ for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
+ layer_offset++) {
+ uint32_t layer = rects[i].baseArrayLayer + layer_offset;
+ v3dv_CmdPushConstants(cmd_buffer_handle,
+ cmd_buffer->device->meta.depth_clear.p_layout,
+ VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4, &layer);
+ v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
+ }
+ } else {
+ assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
+ v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
+ }
}
/* Subpass pipelines can't be cached because they include a reference to the
@@ -1068,9 +1092,6 @@ emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
/* Emits a scissored quad, clearing the depth aspect by writing to gl_FragDepth
* and the stencil aspect by using stencil testing.
- *
- * This path only works for clears to the base layer in the framebuffer, since
- * we don't currently support any form of layered rendering.
*/
static void
emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
@@ -1078,6 +1099,8 @@ emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_subpass *subpass,
VkImageAspectFlags aspects,
const VkClearDepthStencilValue *clear_ds,
+ bool is_layered,
+ bool all_rects_same_layers,
uint32_t rect_count,
const VkClearRect *rects)
{
@@ -1094,6 +1117,7 @@ emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
pass,
cmd_buffer->state.subpass_idx,
attachment_idx,
+ is_layered,
&pipeline);
if (result != VK_SUCCESS) {
if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
@@ -1130,7 +1154,6 @@ emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
}
for (uint32_t i = 0; i < rect_count; i++) {
- assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
const VkViewport viewport = {
.x = rects[i].rect.offset.x,
.y = rects[i].rect.offset.y,
@@ -1141,485 +1164,46 @@ emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
};
v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
- v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
- }
-
- v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);
-}
-
-static void
-emit_tlb_clear_store(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_cl *cl,
- uint32_t attachment_idx,
- uint32_t layer,
- uint32_t buffer)
-{
- const struct v3dv_image_view *iview =
- cmd_buffer->state.framebuffer->attachments[attachment_idx];
- const struct v3dv_image *image = iview->image;
- const struct v3d_resource_slice *slice = &image->slices[iview->base_level];
- uint32_t layer_offset = v3dv_layer_offset(image,
- iview->base_level,
- iview->first_layer + layer);
-
- cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
- store.buffer_to_store = buffer;
- store.address = v3dv_cl_address(image->mem->bo, layer_offset);
- store.clear_buffer_being_stored = false;
-
- store.output_image_format = iview->format->rt_type;
- store.r_b_swap = iview->swap_rb;
- store.memory_format = slice->tiling;
-
- if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
- slice->tiling == VC5_TILING_UIF_XOR) {
- store.height_in_ub_or_stride =
- slice->padded_height_of_output_image_in_uif_blocks;
- } else if (slice->tiling == VC5_TILING_RASTER) {
- store.height_in_ub_or_stride = slice->stride;
- }
-
- if (image->samples > VK_SAMPLE_COUNT_1_BIT)
- store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
- else
- store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
- }
-}
-
-static void
-emit_tlb_clear_stores(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_cl *cl,
- uint32_t attachment_count,
- const VkClearAttachment *attachments,
- uint32_t layer)
-{
- struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
- const struct v3dv_subpass *subpass =
- &state->pass->subpasses[state->subpass_idx];
-
- bool has_stores = false;
- for (uint32_t i = 0; i < attachment_count; i++) {
- uint32_t attachment_idx;
- uint32_t buffer;
- if (attachments[i].aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT |
- VK_IMAGE_ASPECT_STENCIL_BIT)) {
- attachment_idx = subpass->ds_attachment.attachment;
- buffer = v3dv_zs_buffer_from_aspect_bits(attachments[i].aspectMask);
+ if (is_layered) {
+ for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
+ layer_offset++) {
+ uint32_t layer = rects[i].baseArrayLayer + layer_offset;
+ v3dv_CmdPushConstants(cmd_buffer_handle,
+ cmd_buffer->device->meta.depth_clear.p_layout,
+ VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4, &layer);
+ v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
+ }
} else {
- uint32_t rt_idx = attachments[i].colorAttachment;
- attachment_idx = subpass->color_attachments[rt_idx].attachment;
- buffer = RENDER_TARGET_0 + rt_idx;
+ assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
+ v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
}
-
- if (attachment_idx == VK_ATTACHMENT_UNUSED)
- continue;
-
- has_stores = true;
- emit_tlb_clear_store(cmd_buffer, cl, attachment_idx, layer, buffer);
- }
-
- if (!has_stores) {
- cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
- store.buffer_to_store = NONE;
- }
- }
-}
-
-static void
-emit_tlb_clear_per_tile_rcl(struct v3dv_cmd_buffer *cmd_buffer,
- uint32_t attachment_count,
- const VkClearAttachment *attachments,
- uint32_t layer)
-{
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- struct v3dv_cl *cl = &job->indirect;
- v3dv_cl_ensure_space(cl, 200, 1);
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
-
- cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
-
- cl_emit(cl, END_OF_LOADS, end); /* Nothing to load */
-
- cl_emit(cl, PRIM_LIST_FORMAT, fmt) {
- fmt.primitive_type = LIST_TRIANGLES;
}
- cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
-
- emit_tlb_clear_stores(cmd_buffer, cl, attachment_count, attachments, layer);
-
- cl_emit(cl, END_OF_TILE_MARKER, end);
-
- cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
-
- cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
- branch.start = tile_list_start;
- branch.end = v3dv_cl_get_address(cl);
- }
+ v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);
}
static void
-emit_tlb_clear_layer_rcl(struct v3dv_cmd_buffer *cmd_buffer,
- uint32_t attachment_count,
- const VkClearAttachment *attachments,
- uint32_t layer)
+gather_layering_info(uint32_t rect_count, const VkClearRect *rects,
+ bool *is_layered, bool *all_rects_same_layers)
{
- const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
- const struct v3dv_framebuffer *framebuffer = state->framebuffer;
-
- struct v3dv_job *job = cmd_buffer->state.job;
- struct v3dv_cl *rcl = &job->rcl;
-
- const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
-
- const uint32_t tile_alloc_offset =
- 64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
- cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
- list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
- }
-
- cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
- config.number_of_bin_tile_lists = 1;
- config.total_frame_width_in_tiles = tiling->draw_tiles_x;
- config.total_frame_height_in_tiles = tiling->draw_tiles_y;
-
- config.supertile_width_in_tiles = tiling->supertile_width;
- config.supertile_height_in_tiles = tiling->supertile_height;
-
- config.total_frame_width_in_supertiles =
- tiling->frame_width_in_supertiles;
- config.total_frame_height_in_supertiles =
- tiling->frame_height_in_supertiles;
- }
-
- /* Emit the clear and also the workaround for GFXH-1742 */
- for (int i = 0; i < 2; i++) {
- cl_emit(rcl, TILE_COORDINATES, coords);
- cl_emit(rcl, END_OF_LOADS, end);
- cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
- store.buffer_to_store = NONE;
- }
- if (i == 0) {
- cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
- clear.clear_z_stencil_buffer = true;
- clear.clear_all_render_targets = true;
- }
+ *all_rects_same_layers = true;
+
+ uint32_t min_layer = rects[0].baseArrayLayer;
+ uint32_t max_layer = rects[0].baseArrayLayer + rects[0].layerCount - 1;
+ for (uint32_t i = 1; i < rect_count; i++) {
+ if (rects[i].baseArrayLayer != rects[i - 1].baseArrayLayer ||
+ rects[i].layerCount != rects[i - 1].layerCount) {
+ *all_rects_same_layers = false;
+ min_layer = MIN2(min_layer, rects[i].baseArrayLayer);
+ max_layer = MAX2(max_layer, rects[i].baseArrayLayer +
+ rects[i].layerCount - 1);
}
- cl_emit(rcl, END_OF_TILE_MARKER, end);
}
- cl_emit(rcl, FLUSH_VCD_CACHE, flush);
-
- emit_tlb_clear_per_tile_rcl(cmd_buffer, attachment_count, attachments, layer);
-
- uint32_t supertile_w_in_pixels =
- tiling->tile_width * tiling->supertile_width;
- uint32_t supertile_h_in_pixels =
- tiling->tile_height * tiling->supertile_height;
-
- const uint32_t max_render_x = framebuffer->width - 1;
- const uint32_t max_render_y = framebuffer->height - 1;
- const uint32_t max_x_supertile = max_render_x / supertile_w_in_pixels;
- const uint32_t max_y_supertile = max_render_y / supertile_h_in_pixels;
-
- for (int y = 0; y <= max_y_supertile; y++) {
- for (int x = 0; x <= max_x_supertile; x++) {
- cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
- coords.column_number_in_supertiles = x;
- coords.row_number_in_supertiles = y;
- }
- }
- }
+ *is_layered = !(min_layer == 0 && max_layer == 0);
}
-static void
-emit_tlb_clear_job(struct v3dv_cmd_buffer *cmd_buffer,
- uint32_t attachment_count,
- const VkClearAttachment *attachments,
- uint32_t base_layer,
- uint32_t layer_count)
-{
- const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
- const struct v3dv_framebuffer *framebuffer = state->framebuffer;
- const struct v3dv_subpass *subpass =
- &state->pass->subpasses[state->subpass_idx];
- struct v3dv_job *job = cmd_buffer->state.job;
- assert(job);
-
- /* Check how many color attachments we have and also if we have a
- * depth/stencil attachment.
- */
- uint32_t color_attachment_count = 0;
- VkClearAttachment color_attachments[4];
- const VkClearDepthStencilValue *ds_clear_value = NULL;
- uint8_t internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F;
- for (uint32_t i = 0; i < attachment_count; i++) {
- if (attachments[i].aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT |
- VK_IMAGE_ASPECT_STENCIL_BIT)) {
- assert(subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED);
- ds_clear_value = &attachments[i].clearValue.depthStencil;
- struct v3dv_render_pass_attachment *att =
- &state->pass->attachments[subpass->ds_attachment.attachment];
- internal_depth_type = v3dv_get_internal_depth_type(att->desc.format);
- } else if (attachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
- color_attachments[color_attachment_count++] = attachments[i];
- }
- }
-
- uint8_t internal_bpp;
- bool msaa;
- v3dv_framebuffer_compute_internal_bpp_msaa(framebuffer, subpass,
- &internal_bpp, &msaa);
-
- v3dv_job_start_frame(job,
- framebuffer->width,
- framebuffer->height,
- framebuffer->layers,
- color_attachment_count,
- internal_bpp, msaa);
-
- struct v3dv_cl *rcl = &job->rcl;
- v3dv_cl_ensure_space_with_branch(rcl, 200 +
- layer_count * 256 *
- cl_packet_length(SUPERTILE_COORDINATES));
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
- config.early_z_disable = true;
- config.image_width_pixels = framebuffer->width;
- config.image_height_pixels = framebuffer->height;
- config.number_of_render_targets = MAX2(color_attachment_count, 1);
- config.multisample_mode_4x = false; /* FIXME */
- config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
- config.internal_depth_type = internal_depth_type;
- }
-
- for (uint32_t i = 0; i < color_attachment_count; i++) {
- uint32_t rt_idx = color_attachments[i].colorAttachment;
- uint32_t attachment_idx = subpass->color_attachments[rt_idx].attachment;
- if (attachment_idx == VK_ATTACHMENT_UNUSED)
- continue;
-
- const struct v3dv_render_pass_attachment *attachment =
- &state->pass->attachments[attachment_idx];
-
- uint32_t internal_type, internal_bpp, internal_size;
- const struct v3dv_format *format =
- v3dv_get_format(attachment->desc.format);
- v3dv_get_internal_type_bpp_for_output_format(format->rt_type,
- &internal_type,
- &internal_bpp);
- internal_size = 4 << internal_bpp;
-
- uint32_t clear_color[4] = { 0 };
- v3dv_get_hw_clear_color(&color_attachments[i].clearValue.color,
- internal_type,
- internal_size,
- clear_color);
-
- struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx];
- const struct v3dv_image *image = iview->image;
- const struct v3d_resource_slice *slice = &image->slices[iview->base_level];
-
- uint32_t clear_pad = 0;
- if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
- slice->tiling == VC5_TILING_UIF_XOR) {
- int uif_block_height = v3d_utile_height(image->cpp) * 2;
-
- uint32_t implicit_padded_height =
- align(framebuffer->height, uif_block_height) / uif_block_height;
-
- if (slice->padded_height_of_output_image_in_uif_blocks -
- implicit_padded_height >= 15) {
- clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
- }
- }
-
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
- clear.clear_color_low_32_bits = clear_color[0];
- clear.clear_color_next_24_bits = clear_color[1] & 0xffffff;
- clear.render_target_number = i;
- };
-
- if (iview->internal_bpp >= V3D_INTERNAL_BPP_64) {
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
- clear.clear_color_mid_low_32_bits =
- ((clear_color[1] >> 24) | (clear_color[2] << 8));
- clear.clear_color_mid_high_24_bits =
- ((clear_color[2] >> 24) | ((clear_color[3] & 0xffff) << 8));
- clear.render_target_number = i;
- };
- }
-
- if (iview->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
- clear.uif_padded_height_in_uif_blocks = clear_pad;
- clear.clear_color_high_16_bits = clear_color[3] >> 16;
- clear.render_target_number = i;
- };
- }
- }
-
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
- v3dv_render_pass_setup_render_target(cmd_buffer, 0,
- &rt.render_target_0_internal_bpp,
- &rt.render_target_0_internal_type,
- &rt.render_target_0_clamp);
- v3dv_render_pass_setup_render_target(cmd_buffer, 1,
- &rt.render_target_1_internal_bpp,
- &rt.render_target_1_internal_type,
- &rt.render_target_1_clamp);
- v3dv_render_pass_setup_render_target(cmd_buffer, 2,
- &rt.render_target_2_internal_bpp,
- &rt.render_target_2_internal_type,
- &rt.render_target_2_clamp);
- v3dv_render_pass_setup_render_target(cmd_buffer, 3,
- &rt.render_target_3_internal_bpp,
- &rt.render_target_3_internal_type,
- &rt.render_target_3_clamp);
- }
-
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
- clear.z_clear_value = ds_clear_value ? ds_clear_value->depth : 1.0f;
- clear.stencil_clear_value = ds_clear_value ? ds_clear_value->stencil : 0;
- };
-
- cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
- init.use_auto_chained_tile_lists = true;
- init.size_of_first_block_in_chained_tile_lists =
- TILE_ALLOCATION_BLOCK_SIZE_64B;
- }
-
- for (int layer = base_layer; layer < base_layer + layer_count; layer++) {
- emit_tlb_clear_layer_rcl(cmd_buffer,
- attachment_count,
- attachments,
- layer);
- }
-
- cl_emit(rcl, END_OF_RENDERING, end);
-}
-
-static void
-emit_tlb_clear(struct v3dv_cmd_buffer *cmd_buffer,
- uint32_t attachment_count,
- const VkClearAttachment *attachments,
- uint32_t base_layer,
- uint32_t layer_count)
-{
- struct v3dv_job *job =
- v3dv_cmd_buffer_start_job(cmd_buffer, cmd_buffer->state.subpass_idx,
- V3DV_JOB_TYPE_GPU_CL);
-
- /* vkCmdClearAttachments runs inside a render pass */
- job->is_subpass_continue = true;
-
- emit_tlb_clear_job(cmd_buffer,
- attachment_count,
- attachments,
- base_layer, layer_count);
-
- v3dv_cmd_buffer_subpass_resume(cmd_buffer, cmd_buffer->state.subpass_idx);
-}
-
-static bool
-is_subrect(const VkRect2D *r0, const VkRect2D *r1)
-{
- return r0->offset.x <= r1->offset.x &&
- r0->offset.y <= r1->offset.y &&
- r0->offset.x + r0->extent.width >= r1->offset.x + r1->extent.width &&
- r0->offset.y + r0->extent.height >= r1->offset.y + r1->extent.height;
-}
-
-static bool
-can_use_tlb_clear(struct v3dv_cmd_buffer *cmd_buffer,
- uint32_t rect_count,
- const VkClearRect* rects)
-{
- const struct v3dv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
-
- const VkRect2D *render_area = &cmd_buffer->state.render_area;
-
- /* Check if we are clearing a single region covering the entire framebuffer
- * and that we are not constrained by the current render area.
- *
- * From the Vulkan 1.0 spec:
- *
- * "The vkCmdClearAttachments command is not affected by the bound
- * pipeline state."
- *
- * So we can ignore scissor and viewport state for this check.
- */
- const VkRect2D fb_rect = {
- { 0, 0 },
- { framebuffer->width, framebuffer->height }
- };
-
- return rect_count == 1 &&
- is_subrect(&rects[0].rect, &fb_rect) &&
- is_subrect(render_area, &fb_rect);
-}
-
-static void
-handle_deferred_clear_attachments(struct v3dv_cmd_buffer *cmd_buffer,
- uint32_t attachmentCount,
- const VkClearAttachment *pAttachments,
- uint32_t rectCount,
- const VkClearRect *pRects)
-{
- /* Finish the current job */
- v3dv_cmd_buffer_finish_job(cmd_buffer);
-
- /* Add a deferred clear attachments job right after that we will process
- * when we execute this secondary command buffer into a primary.
- */
- struct v3dv_job *job =
- v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
- V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS,
- cmd_buffer,
- cmd_buffer->state.subpass_idx);
- v3dv_return_if_oom(cmd_buffer, NULL);
-
- job->cpu.clear_attachments.rects =
- vk_alloc(&cmd_buffer->device->vk.alloc,
- sizeof(VkClearRect) * rectCount, 8,
- VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
- if (!job->cpu.clear_attachments.rects) {
- v3dv_flag_oom(cmd_buffer, NULL);
- return;
- }
-
- job->cpu.clear_attachments.attachment_count = attachmentCount;
- memcpy(job->cpu.clear_attachments.attachments, pAttachments,
- sizeof(VkClearAttachment) * attachmentCount);
-
- job->cpu.clear_attachments.rect_count = rectCount;
- memcpy(job->cpu.clear_attachments.rects, pRects,
- sizeof(VkClearRect) * rectCount);
-
- list_addtail(&job->list_link, &cmd_buffer->jobs);
-
- /* Resume the subpass so we can continue recording commands */
- v3dv_cmd_buffer_subpass_resume(cmd_buffer,
- cmd_buffer->state.subpass_idx);
-}
-
-static bool
-all_clear_rects_in_base_layer(uint32_t rect_count, const VkClearRect *rects)
-{
- for (uint32_t i = 0; i < rect_count; i++) {
- if (rects[i].baseArrayLayer != 0 || rects[i].layerCount != 1)
- return false;
- }
- return true;
-}
-
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,
uint32_t attachmentCount,
const VkClearAttachment *pAttachments,
@@ -1631,117 +1215,31 @@ v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,
/* We can only clear attachments in the current subpass */
assert(attachmentCount <= 5); /* 4 color + D/S */
- /* Clear attachments may clear multiple layers of the framebuffer, which
- * currently requires that we emit multiple jobs (one per layer) and
- * therefore requires that we have the framebuffer information available
- * to select the destination layers.
- *
- * For secondary command buffers the framebuffer state may not be available
- * until they are executed inside a primary command buffer, so in that case
- * we need to defer recording of the command until that moment.
- *
- * FIXME: once we add support for geometry shaders in the driver we could
- * avoid emitting a job per layer to implement this by always using the clear
- * rect path below with a passthrough geometry shader to select the layer to
- * clear. If we did that we would not need to special case secondary command
- * buffers here and we could ensure that any secondary command buffer in a
- * render pass only has on job with a partial CL, which would simplify things
- * quite a bit.
- */
- if (!cmd_buffer->state.framebuffer) {
- assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
- handle_deferred_clear_attachments(cmd_buffer,
- attachmentCount, pAttachments,
- rectCount, pRects);
- return;
- }
-
- assert(cmd_buffer->state.framebuffer);
-
struct v3dv_render_pass *pass = cmd_buffer->state.pass;
assert(cmd_buffer->state.subpass_idx < pass->subpass_count);
struct v3dv_subpass *subpass =
&cmd_buffer->state.pass->subpasses[cmd_buffer->state.subpass_idx];
- /* First we try to handle this by emitting a clear rect inside the
- * current job for this subpass. This should be optimal but this method
- * cannot handle clearing layers other than the base layer, since we don't
- * support any form of layered rendering yet.
- */
- if (all_clear_rects_in_base_layer(rectCount, pRects)) {
- for (uint32_t i = 0; i < attachmentCount; i++) {
- if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
- emit_subpass_color_clear_rects(cmd_buffer, pass, subpass,
- pAttachments[i].colorAttachment,
- &pAttachments[i].clearValue.color,
- rectCount, pRects);
- } else {
- emit_subpass_ds_clear_rects(cmd_buffer, pass, subpass,
- pAttachments[i].aspectMask,
- &pAttachments[i].clearValue.depthStencil,
- rectCount, pRects);
- }
- }
- return;
- }
-
- perf_debug("Falling back to slow path for vkCmdClearAttachments due to "
- "clearing layers other than the base array layer.\n");
-
- /* If we can't handle this as a draw call inside the current job then we
- * will have to spawn jobs for the clears, which will be slow. In that case,
- * try to use the TLB to clear if possible.
- */
- if (can_use_tlb_clear(cmd_buffer, rectCount, pRects)) {
- emit_tlb_clear(cmd_buffer, attachmentCount, pAttachments,
- pRects[0].baseArrayLayer, pRects[0].layerCount);
- return;
- }
-
- /* Otherwise, fall back to drawing rects with the clear value using a
- * separate job. This is the slowest path.
+ /* Emit a clear rect inside the current job for this subpass. For layered
+ * framebuffers, we use a geometry shader to redirect clears to the
+ * appropriate layers.
*/
+ bool is_layered, all_rects_same_layers;
+ gather_layering_info(rectCount, pRects, &is_layered, &all_rects_same_layers);
for (uint32_t i = 0; i < attachmentCount; i++) {
- uint32_t attachment_idx = VK_ATTACHMENT_UNUSED;
-
- if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
- uint32_t rt_idx = pAttachments[i].colorAttachment;
- attachment_idx = subpass->color_attachments[rt_idx].attachment;
- } else if (pAttachments[i].aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT |
- VK_IMAGE_ASPECT_STENCIL_BIT)) {
- attachment_idx = subpass->ds_attachment.attachment;
- }
-
- if (attachment_idx == VK_ATTACHMENT_UNUSED)
- continue;
-
if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
- const uint32_t components = VK_COLOR_COMPONENT_R_BIT |
- VK_COLOR_COMPONENT_G_BIT |
- VK_COLOR_COMPONENT_B_BIT |
- VK_COLOR_COMPONENT_A_BIT;
- const uint32_t samples =
- cmd_buffer->state.pass->attachments[attachment_idx].desc.samples;
- const VkFormat format =
- cmd_buffer->state.pass->attachments[attachment_idx].desc.format;
- for (uint32_t j = 0; j < rectCount; j++) {
- emit_color_clear_rect(cmd_buffer,
- attachment_idx,
- format,
- samples,
- components,
- pAttachments[i].clearValue.color,
- &pRects[j]);
- }
+ emit_subpass_color_clear_rects(cmd_buffer, pass, subpass,
+ pAttachments[i].colorAttachment,
+ &pAttachments[i].clearValue.color,
+ is_layered, all_rects_same_layers,
+ rectCount, pRects);
} else {
- for (uint32_t j = 0; j < rectCount; j++) {
- emit_ds_clear_rect(cmd_buffer,
- pAttachments[i].aspectMask,
- attachment_idx,
- pAttachments[i].clearValue.depthStencil,
- &pRects[j]);
- }
+ emit_subpass_ds_clear_rects(cmd_buffer, pass, subpass,
+ pAttachments[i].aspectMask,
+ &pAttachments[i].clearValue.depthStencil,
+ is_layered, all_rects_same_layers,
+ rectCount, pRects);
}
}
}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_meta_common.h b/lib/mesa/src/broadcom/vulkan/v3dv_meta_common.h
new file mode 100644
index 000000000..555b55f90
--- /dev/null
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_meta_common.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright © 2021 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#ifndef V3DV_META_COMMON_H
+#define V3DV_META_COMMON_H
+
+/* Disable level 0 write, just write following mipmaps */
+#define V3D_TFU_IOA_DIMTW (1 << 0)
+#define V3D_TFU_IOA_FORMAT_SHIFT 3
+#define V3D_TFU_IOA_FORMAT_LINEARTILE 3
+#define V3D_TFU_IOA_FORMAT_UBLINEAR_1_COLUMN 4
+#define V3D_TFU_IOA_FORMAT_UBLINEAR_2_COLUMN 5
+#define V3D_TFU_IOA_FORMAT_UIF_NO_XOR 6
+#define V3D_TFU_IOA_FORMAT_UIF_XOR 7
+
+#define V3D_TFU_ICFG_NUMMM_SHIFT 5
+#define V3D_TFU_ICFG_TTYPE_SHIFT 9
+
+#define V3D_TFU_ICFG_OPAD_SHIFT 22
+
+#define V3D_TFU_ICFG_FORMAT_SHIFT 18
+#define V3D_TFU_ICFG_FORMAT_RASTER 0
+#define V3D_TFU_ICFG_FORMAT_SAND_128 1
+#define V3D_TFU_ICFG_FORMAT_SAND_256 2
+#define V3D_TFU_ICFG_FORMAT_LINEARTILE 11
+#define V3D_TFU_ICFG_FORMAT_UBLINEAR_1_COLUMN 12
+#define V3D_TFU_ICFG_FORMAT_UBLINEAR_2_COLUMN 13
+#define V3D_TFU_ICFG_FORMAT_UIF_NO_XOR 14
+#define V3D_TFU_ICFG_FORMAT_UIF_XOR 15
+
+/**
+ * Copy/Clear operations implemented in v3dv_meta_*.c that use the TLB hardware
+ * need to figure out TLB programming from the target image data instead of an
+ * actual Vulkan framebuffer object. For the most part, the job's frame tiling
+ * information is enough for this, however we still need additional information
+ * such us the internal type of our single render target, so we use this
+ * auxiliary struct to pass that information around.
+ */
+struct v3dv_meta_framebuffer {
+ /* The internal type of the single render target */
+ uint32_t internal_type;
+
+ /* Supertile coverage */
+ uint32_t min_x_supertile;
+ uint32_t min_y_supertile;
+ uint32_t max_x_supertile;
+ uint32_t max_y_supertile;
+
+ /* Format info */
+ VkFormat vk_format;
+ const struct v3dv_format *format;
+ uint8_t internal_depth_type;
+};
+
+#endif
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_meta_copy.c b/lib/mesa/src/broadcom/vulkan/v3dv_meta_copy.c
index d998d7d8a..85cd8e066 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_meta_copy.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_meta_copy.c
@@ -22,11 +22,12 @@
*/
#include "v3dv_private.h"
+#include "v3dv_meta_common.h"
#include "compiler/nir/nir_builder.h"
-#include "broadcom/cle/v3dx_pack.h"
#include "vk_format_info.h"
#include "util/u_pack_color.h"
+#include "vulkan/util/vk_common_entrypoints.h"
static uint32_t
meta_blit_key_hash(const void *key)
@@ -169,13 +170,25 @@ create_texel_buffer_copy_pipeline_layout(struct v3dv_device *device,
}
assert(*p_layout == 0);
+ /* FIXME: this is abusing a bit the API, since not all of our copy
+ * pipelines have a geometry shader. We could create 2 different pipeline
+ * layouts, but this works for us for now.
+ */
+#define TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET 0
+#define TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET 16
+#define TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET 20
+#define TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET 24
+ VkPushConstantRange ranges[2] = {
+ { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 24 },
+ { VK_SHADER_STAGE_GEOMETRY_BIT, 24, 4 },
+ };
+
VkPipelineLayoutCreateInfo p_layout_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = ds_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges =
- &(VkPushConstantRange) { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 20 },
+ .pushConstantRangeCount = 2,
+ .pPushConstantRanges = ranges,
};
result =
@@ -229,640 +242,127 @@ v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device)
}
}
-static inline bool
-can_use_tlb(struct v3dv_image *image,
- const VkOffset3D *offset,
- VkFormat *compat_format);
-
-/**
- * Copy operations implemented in this file don't operate on a framebuffer
- * object provided by the user, however, since most use the TLB for this,
- * we still need to have some representation of the framebuffer. For the most
- * part, the job's frame tiling information is enough for this, however we
- * still need additional information such us the internal type of our single
- * render target, so we use this auxiliary struct to pass that information
- * around.
- */
-struct framebuffer_data {
- /* The internal type of the single render target */
- uint32_t internal_type;
-
- /* Supertile coverage */
- uint32_t min_x_supertile;
- uint32_t min_y_supertile;
- uint32_t max_x_supertile;
- uint32_t max_y_supertile;
-
- /* Format info */
- VkFormat vk_format;
- const struct v3dv_format *format;
- uint8_t internal_depth_type;
-};
-
-static void
-setup_framebuffer_data(struct framebuffer_data *fb,
- VkFormat vk_format,
- uint32_t internal_type,
- const struct v3dv_frame_tiling *tiling)
-{
- fb->internal_type = internal_type;
-
- /* Supertile coverage always starts at 0,0 */
- uint32_t supertile_w_in_pixels =
- tiling->tile_width * tiling->supertile_width;
- uint32_t supertile_h_in_pixels =
- tiling->tile_height * tiling->supertile_height;
-
- fb->min_x_supertile = 0;
- fb->min_y_supertile = 0;
- fb->max_x_supertile = (tiling->width - 1) / supertile_w_in_pixels;
- fb->max_y_supertile = (tiling->height - 1) / supertile_h_in_pixels;
-
- fb->vk_format = vk_format;
- fb->format = v3dv_get_format(vk_format);
-
- fb->internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F;
- if (vk_format_is_depth_or_stencil(vk_format))
- fb->internal_depth_type = v3dv_get_internal_depth_type(vk_format);
-}
-
-/* This chooses a tile buffer format that is appropriate for the copy operation.
- * Typically, this is the image render target type, however, if we are copying
- * depth/stencil to/from a buffer the hardware can't do raster loads/stores, so
- * we need to load and store to/from a tile color buffer using a compatible
- * color format.
- */
-static uint32_t
-choose_tlb_format(struct framebuffer_data *framebuffer,
- VkImageAspectFlags aspect,
- bool for_store,
- bool is_copy_to_buffer,
- bool is_copy_from_buffer)
-{
- if (is_copy_to_buffer || is_copy_from_buffer) {
- switch (framebuffer->vk_format) {
- case VK_FORMAT_D16_UNORM:
- return V3D_OUTPUT_IMAGE_FORMAT_R16UI;
- case VK_FORMAT_D32_SFLOAT:
- return V3D_OUTPUT_IMAGE_FORMAT_R32F;
- case VK_FORMAT_X8_D24_UNORM_PACK32:
- return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
- case VK_FORMAT_D24_UNORM_S8_UINT:
- /* When storing the stencil aspect of a combined depth/stencil image
- * to a buffer, the Vulkan spec states that the output buffer must
- * have packed stencil values, so we choose an R8UI format for our
- * store outputs. For the load input we still want RGBA8UI since the
- * source image contains 4 channels (including the 3 channels
- * containing the 24-bit depth value).
- *
- * When loading the stencil aspect of a combined depth/stencil image
- * from a buffer, we read packed 8-bit stencil values from the buffer
- * that we need to put into the LSB of the 32-bit format (the R
- * channel), so we use R8UI. For the store, if we used R8UI then we
- * would write 8-bit stencil values consecutively over depth channels,
- * so we need to use RGBA8UI. This will write each stencil value in
- * its correct position, but will overwrite depth values (channels G
- * B,A) with undefined values. To fix this, we will have to restore
- * the depth aspect from the Z tile buffer, which we should pre-load
- * from the image before the store).
- */
- if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
- return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
- } else {
- assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT);
- if (is_copy_to_buffer) {
- return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI :
- V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
- } else {
- assert(is_copy_from_buffer);
- return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI :
- V3D_OUTPUT_IMAGE_FORMAT_R8UI;
- }
- }
- default: /* Color formats */
- return framebuffer->format->rt_type;
- break;
- }
- } else {
- return framebuffer->format->rt_type;
- }
-}
-
-static inline bool
-format_needs_rb_swap(VkFormat format)
-{
- const uint8_t *swizzle = v3dv_get_format_swizzle(format);
- return swizzle[0] == PIPE_SWIZZLE_Z;
-}
-
-static void
-get_internal_type_bpp_for_image_aspects(VkFormat vk_format,
- VkImageAspectFlags aspect_mask,
- uint32_t *internal_type,
- uint32_t *internal_bpp)
-{
- const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
- VK_IMAGE_ASPECT_STENCIL_BIT;
-
- /* We can't store depth/stencil pixel formats to a raster format, so
- * so instead we load our depth/stencil aspects to a compatible color
- * format.
- */
- /* FIXME: pre-compute this at image creation time? */
- if (aspect_mask & ds_aspects) {
- switch (vk_format) {
- case VK_FORMAT_D16_UNORM:
- *internal_type = V3D_INTERNAL_TYPE_16UI;
- *internal_bpp = V3D_INTERNAL_BPP_64;
- break;
- case VK_FORMAT_D32_SFLOAT:
- *internal_type = V3D_INTERNAL_TYPE_32F;
- *internal_bpp = V3D_INTERNAL_BPP_128;
- break;
- case VK_FORMAT_X8_D24_UNORM_PACK32:
- case VK_FORMAT_D24_UNORM_S8_UINT:
- /* Use RGBA8 format so we can relocate the X/S bits in the appropriate
- * place to match Vulkan expectations. See the comment on the tile
- * load command for more details.
- */
- *internal_type = V3D_INTERNAL_TYPE_8UI;
- *internal_bpp = V3D_INTERNAL_BPP_32;
- break;
- default:
- assert(!"unsupported format");
- break;
- }
- } else {
- const struct v3dv_format *format = v3dv_get_format(vk_format);
- v3dv_get_internal_type_bpp_for_output_format(format->rt_type,
- internal_type,
- internal_bpp);
- }
-}
-
-struct rcl_clear_info {
- const union v3dv_clear_value *clear_value;
- struct v3dv_image *image;
- VkImageAspectFlags aspects;
- uint32_t layer;
- uint32_t level;
-};
-
-static struct v3dv_cl *
-emit_rcl_prologue(struct v3dv_job *job,
- struct framebuffer_data *fb,
- const struct rcl_clear_info *clear_info)
-{
- const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
-
- struct v3dv_cl *rcl = &job->rcl;
- v3dv_cl_ensure_space_with_branch(rcl, 200 +
- tiling->layers * 256 *
- cl_packet_length(SUPERTILE_COORDINATES));
- if (job->cmd_buffer->state.oom)
- return NULL;
-
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
- config.early_z_disable = true;
- config.image_width_pixels = tiling->width;
- config.image_height_pixels = tiling->height;
- config.number_of_render_targets = 1;
- config.multisample_mode_4x = tiling->msaa;
- config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
- config.internal_depth_type = fb->internal_depth_type;
- }
-
- if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) {
- uint32_t clear_pad = 0;
- if (clear_info->image) {
- const struct v3dv_image *image = clear_info->image;
- const struct v3d_resource_slice *slice =
- &image->slices[clear_info->level];
- if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
- slice->tiling == VC5_TILING_UIF_XOR) {
- int uif_block_height = v3d_utile_height(image->cpp) * 2;
-
- uint32_t implicit_padded_height =
- align(tiling->height, uif_block_height) / uif_block_height;
-
- if (slice->padded_height_of_output_image_in_uif_blocks -
- implicit_padded_height >= 15) {
- clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
- }
- }
- }
-
- const uint32_t *color = &clear_info->clear_value->color[0];
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
- clear.clear_color_low_32_bits = color[0];
- clear.clear_color_next_24_bits = color[1] & 0x00ffffff;
- clear.render_target_number = 0;
- };
-
- if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
- clear.clear_color_mid_low_32_bits =
- ((color[1] >> 24) | (color[2] << 8));
- clear.clear_color_mid_high_24_bits =
- ((color[2] >> 24) | ((color[3] & 0xffff) << 8));
- clear.render_target_number = 0;
- };
- }
-
- if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
- clear.uif_padded_height_in_uif_blocks = clear_pad;
- clear.clear_color_high_16_bits = color[3] >> 16;
- clear.render_target_number = 0;
- };
- }
- }
-
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
- rt.render_target_0_internal_bpp = tiling->internal_bpp;
- rt.render_target_0_internal_type = fb->internal_type;
- rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
- }
-
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
- clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f;
- clear.stencil_clear_value = clear_info ? clear_info->clear_value->s : 0;
- };
-
- cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
- init.use_auto_chained_tile_lists = true;
- init.size_of_first_block_in_chained_tile_lists =
- TILE_ALLOCATION_BLOCK_SIZE_64B;
- }
-
- return rcl;
-}
-
-static void
-emit_frame_setup(struct v3dv_job *job,
- uint32_t layer,
- const union v3dv_clear_value *clear_value)
+static VkFormat
+get_compatible_tlb_format(VkFormat format)
{
- v3dv_return_if_oom(NULL, job);
-
- const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
-
- struct v3dv_cl *rcl = &job->rcl;
-
- const uint32_t tile_alloc_offset =
- 64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
- cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
- list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
- }
-
- cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
- config.number_of_bin_tile_lists = 1;
- config.total_frame_width_in_tiles = tiling->draw_tiles_x;
- config.total_frame_height_in_tiles = tiling->draw_tiles_y;
-
- config.supertile_width_in_tiles = tiling->supertile_width;
- config.supertile_height_in_tiles = tiling->supertile_height;
-
- config.total_frame_width_in_supertiles =
- tiling->frame_width_in_supertiles;
- config.total_frame_height_in_supertiles =
- tiling->frame_height_in_supertiles;
- }
-
- /* Implement GFXH-1742 workaround. Also, if we are clearing we have to do
- * it here.
- */
- for (int i = 0; i < 2; i++) {
- cl_emit(rcl, TILE_COORDINATES, coords);
- cl_emit(rcl, END_OF_LOADS, end);
- cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
- store.buffer_to_store = NONE;
- }
- if (clear_value && i == 0) {
- cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
- clear.clear_z_stencil_buffer = true;
- clear.clear_all_render_targets = true;
- }
- }
- cl_emit(rcl, END_OF_TILE_MARKER, end);
- }
-
- cl_emit(rcl, FLUSH_VCD_CACHE, flush);
-}
+ switch (format) {
+ case VK_FORMAT_R8G8B8A8_SNORM:
+ return VK_FORMAT_R8G8B8A8_UINT;
-static void
-emit_supertile_coordinates(struct v3dv_job *job,
- struct framebuffer_data *framebuffer)
-{
- v3dv_return_if_oom(NULL, job);
+ case VK_FORMAT_R8G8_SNORM:
+ return VK_FORMAT_R8G8_UINT;
- struct v3dv_cl *rcl = &job->rcl;
+ case VK_FORMAT_R8_SNORM:
+ return VK_FORMAT_R8_UINT;
- const uint32_t min_y = framebuffer->min_y_supertile;
- const uint32_t max_y = framebuffer->max_y_supertile;
- const uint32_t min_x = framebuffer->min_x_supertile;
- const uint32_t max_x = framebuffer->max_x_supertile;
+ case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
+ return VK_FORMAT_A8B8G8R8_UINT_PACK32;
- for (int y = min_y; y <= max_y; y++) {
- for (int x = min_x; x <= max_x; x++) {
- cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
- coords.column_number_in_supertiles = x;
- coords.row_number_in_supertiles = y;
- }
- }
- }
-}
+ case VK_FORMAT_R16_UNORM:
+ case VK_FORMAT_R16_SNORM:
+ return VK_FORMAT_R16_UINT;
-static void
-emit_linear_load(struct v3dv_cl *cl,
- uint32_t buffer,
- struct v3dv_bo *bo,
- uint32_t offset,
- uint32_t stride,
- uint32_t format)
-{
- cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
- load.buffer_to_load = buffer;
- load.address = v3dv_cl_address(bo, offset);
- load.input_image_format = format;
- load.memory_format = VC5_TILING_RASTER;
- load.height_in_ub_or_stride = stride;
- load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
- }
-}
+ case VK_FORMAT_R16G16_UNORM:
+ case VK_FORMAT_R16G16_SNORM:
+ return VK_FORMAT_R16G16_UINT;
-static void
-emit_linear_store(struct v3dv_cl *cl,
- uint32_t buffer,
- struct v3dv_bo *bo,
- uint32_t offset,
- uint32_t stride,
- bool msaa,
- uint32_t format)
-{
- cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
- store.buffer_to_store = RENDER_TARGET_0;
- store.address = v3dv_cl_address(bo, offset);
- store.clear_buffer_being_stored = false;
- store.output_image_format = format;
- store.memory_format = VC5_TILING_RASTER;
- store.height_in_ub_or_stride = stride;
- store.decimate_mode = msaa ? V3D_DECIMATE_MODE_ALL_SAMPLES :
- V3D_DECIMATE_MODE_SAMPLE_0;
- }
-}
+ case VK_FORMAT_R16G16B16A16_UNORM:
+ case VK_FORMAT_R16G16B16A16_SNORM:
+ return VK_FORMAT_R16G16B16A16_UINT;
-static void
-emit_image_load(struct v3dv_cl *cl,
- struct framebuffer_data *framebuffer,
- struct v3dv_image *image,
- VkImageAspectFlags aspect,
- uint32_t layer,
- uint32_t mip_level,
- bool is_copy_to_buffer,
- bool is_copy_from_buffer)
-{
- uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
+ case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
+ return VK_FORMAT_R32_SFLOAT;
- /* For image to/from buffer copies we always load to and store from RT0,
- * even for depth/stencil aspects, because the hardware can't do raster
- * stores or loads from/to the depth/stencil tile buffers.
+ /* We can't render to compressed formats using the TLB so instead we use
+ * a compatible format with the same bpp as the compressed format. Because
+ * the compressed format's bpp is for a full block (i.e. 4x4 pixels in the
+ * case of ETC), when we implement copies with the compatible format we
+ * will have to divide offsets and dimensions on the compressed image by
+ * the compressed block size.
*/
- bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
- aspect == VK_IMAGE_ASPECT_COLOR_BIT;
-
- const struct v3d_resource_slice *slice = &image->slices[mip_level];
- cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
- load.buffer_to_load = load_to_color_tlb ?
- RENDER_TARGET_0 : v3dv_zs_buffer_from_aspect_bits(aspect);
-
- load.address = v3dv_cl_address(image->mem->bo, layer_offset);
-
- load.input_image_format = choose_tlb_format(framebuffer, aspect, false,
- is_copy_to_buffer,
- is_copy_from_buffer);
- load.memory_format = slice->tiling;
-
- /* When copying depth/stencil images to a buffer, for D24 formats Vulkan
- * expects the depth value in the LSB bits of each 32-bit pixel.
- * Unfortunately, the hardware seems to put the S8/X8 bits there and the
- * depth bits on the MSB. To work around that we can reverse the channel
- * order and then swap the R/B channels to get what we want.
- *
- * NOTE: reversing and swapping only gets us the behavior we want if the
- * operations happen in that exact order, which seems to be the case when
- * done on the tile buffer load operations. On the store, it seems the
- * order is not the same. The order on the store is probably reversed so
- * that reversing and swapping on both the load and the store preserves
- * the original order of the channels in memory.
- *
- * Notice that we only need to do this when copying to a buffer, where
- * depth and stencil aspects are copied as separate regions and
- * the spec expects them to be tightly packed.
- */
- bool needs_rb_swap = false;
- bool needs_chan_reverse = false;
- if (is_copy_to_buffer &&
- (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
- (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
- (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
- needs_rb_swap = true;
- needs_chan_reverse = true;
- } else if (!is_copy_from_buffer && !is_copy_to_buffer &&
- (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
- /* This is not a raw data copy (i.e. we are clearing the image),
- * so we need to make sure we respect the format swizzle.
- */
- needs_rb_swap = format_needs_rb_swap(framebuffer->vk_format);
- }
-
- load.r_b_swap = needs_rb_swap;
- load.channel_reverse = needs_chan_reverse;
+ case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
+ case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
+ case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
+ case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
+ case VK_FORMAT_BC2_UNORM_BLOCK:
+ case VK_FORMAT_BC2_SRGB_BLOCK:
+ case VK_FORMAT_BC3_SRGB_BLOCK:
+ case VK_FORMAT_BC3_UNORM_BLOCK:
+ case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
+ case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
+ case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
+ case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
+ case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
+ case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
+ case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
+ case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
+ case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
+ case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
+ case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
+ case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
+ case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
+ case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
+ case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
+ case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
+ case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
+ case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
+ case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
+ case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
+ case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
+ case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
+ case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
+ case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
+ case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
+ case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
+ case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
+ case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
+ return VK_FORMAT_R32G32B32A32_UINT;
- if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
- slice->tiling == VC5_TILING_UIF_XOR) {
- load.height_in_ub_or_stride =
- slice->padded_height_of_output_image_in_uif_blocks;
- } else if (slice->tiling == VC5_TILING_RASTER) {
- load.height_in_ub_or_stride = slice->stride;
- }
+ case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
+ case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
+ case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
+ case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
+ case VK_FORMAT_EAC_R11_UNORM_BLOCK:
+ case VK_FORMAT_EAC_R11_SNORM_BLOCK:
+ case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
+ case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
+ case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
+ case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
+ return VK_FORMAT_R16G16B16A16_UINT;
- if (image->samples > VK_SAMPLE_COUNT_1_BIT)
- load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
- else
- load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
+ default:
+ return VK_FORMAT_UNDEFINED;
}
}
-static void
-emit_image_store(struct v3dv_cl *cl,
- struct framebuffer_data *framebuffer,
- struct v3dv_image *image,
- VkImageAspectFlags aspect,
- uint32_t layer,
- uint32_t mip_level,
- bool is_copy_to_buffer,
- bool is_copy_from_buffer,
- bool is_multisample_resolve)
+/**
+ * Checks if we can implement an image copy or clear operation using the TLB
+ * hardware.
+ */
+bool
+v3dv_meta_can_use_tlb(struct v3dv_image *image,
+ const VkOffset3D *offset,
+ VkFormat *compat_format)
{
- uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
-
- bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
- aspect == VK_IMAGE_ASPECT_COLOR_BIT;
-
- const struct v3d_resource_slice *slice = &image->slices[mip_level];
- cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
- store.buffer_to_store = store_from_color_tlb ?
- RENDER_TARGET_0 : v3dv_zs_buffer_from_aspect_bits(aspect);
-
- store.address = v3dv_cl_address(image->mem->bo, layer_offset);
- store.clear_buffer_being_stored = false;
-
- /* See rationale in emit_image_load() */
- bool needs_rb_swap = false;
- bool needs_chan_reverse = false;
- if (is_copy_from_buffer &&
- (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
- (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
- (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
- needs_rb_swap = true;
- needs_chan_reverse = true;
- } else if (!is_copy_from_buffer && !is_copy_to_buffer &&
- (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
- needs_rb_swap = format_needs_rb_swap(framebuffer->vk_format);
- }
-
- store.r_b_swap = needs_rb_swap;
- store.channel_reverse = needs_chan_reverse;
-
- store.output_image_format = choose_tlb_format(framebuffer, aspect, true,
- is_copy_to_buffer,
- is_copy_from_buffer);
- store.memory_format = slice->tiling;
- if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
- slice->tiling == VC5_TILING_UIF_XOR) {
- store.height_in_ub_or_stride =
- slice->padded_height_of_output_image_in_uif_blocks;
- } else if (slice->tiling == VC5_TILING_RASTER) {
- store.height_in_ub_or_stride = slice->stride;
- }
+ if (offset->x != 0 || offset->y != 0)
+ return false;
- if (image->samples > VK_SAMPLE_COUNT_1_BIT)
- store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
- else if (is_multisample_resolve)
- store.decimate_mode = V3D_DECIMATE_MODE_4X;
- else
- store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
+ if (image->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) {
+ if (compat_format)
+ *compat_format = image->vk.format;
+ return true;
}
-}
-
-static void
-emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job,
- struct framebuffer_data *framebuffer,
- struct v3dv_buffer *buffer,
- struct v3dv_image *image,
- uint32_t layer_offset,
- const VkBufferImageCopy *region)
-{
- struct v3dv_cl *cl = &job->indirect;
- v3dv_cl_ensure_space(cl, 200, 1);
- v3dv_return_if_oom(NULL, job);
-
- struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
-
- cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
-
- /* Load image to TLB */
- assert((image->type != VK_IMAGE_TYPE_3D &&
- layer_offset < region->imageSubresource.layerCount) ||
- layer_offset < image->extent.depth);
-
- const uint32_t image_layer = image->type != VK_IMAGE_TYPE_3D ?
- region->imageSubresource.baseArrayLayer + layer_offset :
- region->imageOffset.z + layer_offset;
-
- emit_image_load(cl, framebuffer, image,
- region->imageSubresource.aspectMask,
- image_layer,
- region->imageSubresource.mipLevel,
- true, false);
-
- cl_emit(cl, END_OF_LOADS, end);
-
- cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
-
- /* Store TLB to buffer */
- uint32_t width, height;
- if (region->bufferRowLength == 0)
- width = region->imageExtent.width;
- else
- width = region->bufferRowLength;
-
- if (region->bufferImageHeight == 0)
- height = region->imageExtent.height;
- else
- height = region->bufferImageHeight;
- /* Handle copy from compressed format */
- width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk_format));
- height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk_format));
-
- /* If we are storing stencil from a combined depth/stencil format the
- * Vulkan spec states that the output buffer must have packed stencil
- * values, where each stencil value is 1 byte.
+ /* If the image format is not TLB-supported, then check if we can use
+ * a compatible format instead.
*/
- uint32_t cpp =
- region->imageSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
- 1 : image->cpp;
- uint32_t buffer_stride = width * cpp;
- uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset +
- height * buffer_stride * layer_offset;
-
- uint32_t format = choose_tlb_format(framebuffer,
- region->imageSubresource.aspectMask,
- true, true, false);
- bool msaa = image->samples > VK_SAMPLE_COUNT_1_BIT;
-
- emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo,
- buffer_offset, buffer_stride, msaa, format);
-
- cl_emit(cl, END_OF_TILE_MARKER, end);
-
- cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
-
- cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
- branch.start = tile_list_start;
- branch.end = v3dv_cl_get_address(cl);
+ if (compat_format) {
+ *compat_format = get_compatible_tlb_format(image->vk.format);
+ if (*compat_format != VK_FORMAT_UNDEFINED)
+ return true;
}
-}
-static void
-emit_copy_layer_to_buffer(struct v3dv_job *job,
- struct v3dv_buffer *buffer,
- struct v3dv_image *image,
- struct framebuffer_data *framebuffer,
- uint32_t layer,
- const VkBufferImageCopy *region)
-{
- emit_frame_setup(job, layer, NULL);
- emit_copy_layer_to_buffer_per_tile_list(job, framebuffer, buffer,
- image, layer, region);
- emit_supertile_coordinates(job, framebuffer);
-}
-
-static void
-emit_copy_image_to_buffer_rcl(struct v3dv_job *job,
- struct v3dv_buffer *buffer,
- struct v3dv_image *image,
- struct framebuffer_data *framebuffer,
- const VkBufferImageCopy *region)
-{
- struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
- v3dv_return_if_oom(NULL, job);
-
- for (int layer = 0; layer < job->frame_tiling.layers; layer++)
- emit_copy_layer_to_buffer(job, buffer, image, framebuffer, layer, region);
- cl_emit(rcl, END_OF_RENDERING, end);
+ return false;
}
/* Implements a copy using the TLB.
@@ -879,19 +379,19 @@ static bool
copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_buffer *buffer,
struct v3dv_image *image,
- const VkBufferImageCopy *region)
+ const VkBufferImageCopy2KHR *region)
{
VkFormat fb_format;
- if (!can_use_tlb(image, &region->imageOffset, &fb_format))
+ if (!v3dv_meta_can_use_tlb(image, &region->imageOffset, &fb_format))
return false;
uint32_t internal_type, internal_bpp;
- get_internal_type_bpp_for_image_aspects(fb_format,
- region->imageSubresource.aspectMask,
- &internal_type, &internal_bpp);
+ v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
+ (fb_format, region->imageSubresource.aspectMask,
+ &internal_type, &internal_bpp);
uint32_t num_layers;
- if (image->type != VK_IMAGE_TYPE_3D)
+ if (image->vk.image_type != VK_IMAGE_TYPE_3D)
num_layers = region->imageSubresource.layerCount;
else
num_layers = region->imageExtent.depth;
@@ -903,19 +403,21 @@ copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
return true;
/* Handle copy from compressed format using a compatible format */
- const uint32_t block_w = vk_format_get_blockwidth(image->vk_format);
- const uint32_t block_h = vk_format_get_blockheight(image->vk_format);
+ const uint32_t block_w = vk_format_get_blockwidth(image->vk.format);
+ const uint32_t block_h = vk_format_get_blockheight(image->vk.format);
const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
- v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, false);
+ v3dv_job_start_frame(job, width, height, num_layers, false,
+ 1, internal_bpp, false);
- struct framebuffer_data framebuffer;
- setup_framebuffer_data(&framebuffer, fb_format, internal_type,
- &job->frame_tiling);
+ struct v3dv_meta_framebuffer framebuffer;
+ v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
+ internal_type, &job->frame_tiling);
- v3dv_job_emit_binning_flush(job);
- emit_copy_image_to_buffer_rcl(job, buffer, image, &framebuffer, region);
+ v3dv_X(job->device, job_emit_binning_flush)(job);
+ v3dv_X(job->device, meta_emit_copy_image_to_buffer_rcl)
+ (job, buffer, image, &framebuffer, region);
v3dv_cmd_buffer_finish_job(cmd_buffer);
@@ -930,7 +432,7 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
VkFormat src_format,
VkColorComponentFlags cmask,
VkComponentMapping *cswizzle,
- const VkImageBlit *region,
+ const VkImageBlit2KHR *region,
VkFilter filter,
bool dst_is_padded_image);
@@ -942,7 +444,7 @@ static bool
copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_buffer *buffer,
struct v3dv_image *image,
- const VkBufferImageCopy *region)
+ const VkBufferImageCopy2KHR *region)
{
bool handled = false;
@@ -991,10 +493,10 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
dst_format = VK_FORMAT_R8G8B8A8_UINT;
break;
case VK_IMAGE_ASPECT_DEPTH_BIT:
- assert(image->vk_format == VK_FORMAT_D32_SFLOAT ||
- image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
- image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32);
- if (image->vk_format == VK_FORMAT_D32_SFLOAT) {
+ assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
+ image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
+ image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
+ if (image->vk.format == VK_FORMAT_D32_SFLOAT) {
src_format = VK_FORMAT_R32_UINT;
dst_format = VK_FORMAT_R32_UINT;
} else {
@@ -1016,7 +518,7 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
break;
case VK_IMAGE_ASPECT_STENCIL_BIT:
assert(copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT);
- assert(image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT);
+ assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
/* Copying from S8D24. We want to write 8-bit stencil values only,
* so adjust the buffer bpp for that. Since the hardware stores stencil
* in the LSB, we can just do a RGBA8UI to R8UI blit.
@@ -1070,14 +572,14 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
buf_height = region->bufferImageHeight;
/* If the image is compressed, the bpp refers to blocks, not pixels */
- uint32_t block_width = vk_format_get_blockwidth(image->vk_format);
- uint32_t block_height = vk_format_get_blockheight(image->vk_format);
+ uint32_t block_width = vk_format_get_blockwidth(image->vk.format);
+ uint32_t block_height = vk_format_get_blockheight(image->vk.format);
buf_width = buf_width / block_width;
buf_height = buf_height / block_height;
/* Compute layers to copy */
uint32_t num_layers;
- if (image->type != VK_IMAGE_TYPE_3D)
+ if (image->vk.image_type != VK_IMAGE_TYPE_3D)
num_layers = region->imageSubresource.layerCount;
else
num_layers = region->imageExtent.depth;
@@ -1094,17 +596,17 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
VkResult result;
struct v3dv_device *device = cmd_buffer->device;
VkDevice _device = v3dv_device_to_handle(device);
- if (vk_format_is_compressed(image->vk_format)) {
+ if (vk_format_is_compressed(image->vk.format)) {
VkImage uiview;
VkImageCreateInfo uiview_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = VK_IMAGE_TYPE_3D,
.format = dst_format,
- .extent = { buf_width, buf_height, image->extent.depth },
- .mipLevels = image->levels,
- .arrayLayers = image->array_size,
- .samples = image->samples,
- .tiling = image->tiling,
+ .extent = { buf_width, buf_height, image->vk.extent.depth },
+ .mipLevels = image->vk.mip_levels,
+ .arrayLayers = image->vk.array_layers,
+ .samples = image->vk.samples,
+ .tiling = image->vk.tiling,
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
@@ -1118,9 +620,10 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
cmd_buffer, (uintptr_t)uiview,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
- result = v3dv_BindImageMemory(_device, uiview,
- v3dv_device_memory_to_handle(image->mem),
- image->mem_offset);
+ result =
+ vk_common_BindImageMemory(_device, uiview,
+ v3dv_device_memory_to_handle(image->mem),
+ image->mem_offset);
if (result != VK_SUCCESS)
return handled;
@@ -1158,9 +661,10 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
/* Bind the buffer memory to the image */
VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset +
i * buf_width * buf_height * buffer_bpp;
- result = v3dv_BindImageMemory(_device, buffer_image,
- v3dv_device_memory_to_handle(buffer->mem),
- buffer_offset);
+ result =
+ vk_common_BindImageMemory(_device, buffer_image,
+ v3dv_device_memory_to_handle(buffer->mem),
+ buffer_offset);
if (result != VK_SUCCESS)
return handled;
@@ -1172,7 +676,8 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
* image, but that we need to blit to a S8D24 destination (the only
* stencil format we support).
*/
- const VkImageBlit blit_region = {
+ const VkImageBlit2KHR blit_region = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
.srcSubresource = {
.aspectMask = copy_aspect,
.mipLevel = region->imageSubresource.mipLevel,
@@ -1225,309 +730,26 @@ copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
return true;
}
-static VkFormat
-get_compatible_tlb_format(VkFormat format)
-{
- switch (format) {
- case VK_FORMAT_R8G8B8A8_SNORM:
- return VK_FORMAT_R8G8B8A8_UINT;
-
- case VK_FORMAT_R8G8_SNORM:
- return VK_FORMAT_R8G8_UINT;
-
- case VK_FORMAT_R8_SNORM:
- return VK_FORMAT_R8_UINT;
-
- case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
- return VK_FORMAT_A8B8G8R8_UINT_PACK32;
-
- case VK_FORMAT_R16_UNORM:
- case VK_FORMAT_R16_SNORM:
- return VK_FORMAT_R16_UINT;
-
- case VK_FORMAT_R16G16_UNORM:
- case VK_FORMAT_R16G16_SNORM:
- return VK_FORMAT_R16G16_UINT;
-
- case VK_FORMAT_R16G16B16A16_UNORM:
- case VK_FORMAT_R16G16B16A16_SNORM:
- return VK_FORMAT_R16G16B16A16_UINT;
-
- case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
- return VK_FORMAT_R32_SFLOAT;
+VKAPI_ATTR void VKAPI_CALL
+v3dv_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,
+ const VkCopyImageToBufferInfo2KHR *info)
- /* We can't render to compressed formats using the TLB so instead we use
- * a compatible format with the same bpp as the compressed format. Because
- * the compressed format's bpp is for a full block (i.e. 4x4 pixels in the
- * case of ETC), when we implement copies with the compatible format we
- * will have to divide offsets and dimensions on the compressed image by
- * the compressed block size.
- */
- case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
- case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
- case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
- case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
- case VK_FORMAT_BC2_UNORM_BLOCK:
- case VK_FORMAT_BC2_SRGB_BLOCK:
- case VK_FORMAT_BC3_SRGB_BLOCK:
- case VK_FORMAT_BC3_UNORM_BLOCK:
- return VK_FORMAT_R32G32B32A32_UINT;
-
- case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
- case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
- case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
- case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
- case VK_FORMAT_EAC_R11_UNORM_BLOCK:
- case VK_FORMAT_EAC_R11_SNORM_BLOCK:
- case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
- case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
- case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
- case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
- return VK_FORMAT_R16G16B16A16_UINT;
-
- default:
- return VK_FORMAT_UNDEFINED;
- }
-}
-
-static inline bool
-can_use_tlb(struct v3dv_image *image,
- const VkOffset3D *offset,
- VkFormat *compat_format)
-{
- if (offset->x != 0 || offset->y != 0)
- return false;
-
- if (image->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) {
- if (compat_format)
- *compat_format = image->vk_format;
- return true;
- }
-
- /* If the image format is not TLB-supported, then check if we can use
- * a compatible format instead.
- */
- if (compat_format) {
- *compat_format = get_compatible_tlb_format(image->vk_format);
- if (*compat_format != VK_FORMAT_UNDEFINED)
- return true;
- }
-
- return false;
-}
-
-void
-v3dv_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
- VkImage srcImage,
- VkImageLayout srcImageLayout,
- VkBuffer destBuffer,
- uint32_t regionCount,
- const VkBufferImageCopy *pRegions)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
- V3DV_FROM_HANDLE(v3dv_image, image, srcImage);
- V3DV_FROM_HANDLE(v3dv_buffer, buffer, destBuffer);
+ V3DV_FROM_HANDLE(v3dv_image, image, info->srcImage);
+ V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->dstBuffer);
- assert(image->samples == VK_SAMPLE_COUNT_1_BIT);
+ assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
- for (uint32_t i = 0; i < regionCount; i++) {
- if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, &pRegions[i]))
+ for (uint32_t i = 0; i < info->regionCount; i++) {
+ if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, &info->pRegions[i]))
continue;
- if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, &pRegions[i]))
+ if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, &info->pRegions[i]))
continue;
unreachable("Unsupported image to buffer copy.");
}
}
-static void
-emit_copy_image_layer_per_tile_list(struct v3dv_job *job,
- struct framebuffer_data *framebuffer,
- struct v3dv_image *dst,
- struct v3dv_image *src,
- uint32_t layer_offset,
- const VkImageCopy *region)
-{
- struct v3dv_cl *cl = &job->indirect;
- v3dv_cl_ensure_space(cl, 200, 1);
- v3dv_return_if_oom(NULL, job);
-
- struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
-
- cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
-
- assert((src->type != VK_IMAGE_TYPE_3D &&
- layer_offset < region->srcSubresource.layerCount) ||
- layer_offset < src->extent.depth);
-
- const uint32_t src_layer = src->type != VK_IMAGE_TYPE_3D ?
- region->srcSubresource.baseArrayLayer + layer_offset :
- region->srcOffset.z + layer_offset;
-
- emit_image_load(cl, framebuffer, src,
- region->srcSubresource.aspectMask,
- src_layer,
- region->srcSubresource.mipLevel,
- false, false);
-
- cl_emit(cl, END_OF_LOADS, end);
-
- cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
-
- assert((dst->type != VK_IMAGE_TYPE_3D &&
- layer_offset < region->dstSubresource.layerCount) ||
- layer_offset < dst->extent.depth);
-
- const uint32_t dst_layer = dst->type != VK_IMAGE_TYPE_3D ?
- region->dstSubresource.baseArrayLayer + layer_offset :
- region->dstOffset.z + layer_offset;
-
- emit_image_store(cl, framebuffer, dst,
- region->dstSubresource.aspectMask,
- dst_layer,
- region->dstSubresource.mipLevel,
- false, false, false);
-
- cl_emit(cl, END_OF_TILE_MARKER, end);
-
- cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
-
- cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
- branch.start = tile_list_start;
- branch.end = v3dv_cl_get_address(cl);
- }
-}
-
-static void
-emit_copy_image_layer(struct v3dv_job *job,
- struct v3dv_image *dst,
- struct v3dv_image *src,
- struct framebuffer_data *framebuffer,
- uint32_t layer,
- const VkImageCopy *region)
-{
- emit_frame_setup(job, layer, NULL);
- emit_copy_image_layer_per_tile_list(job, framebuffer, dst, src, layer, region);
- emit_supertile_coordinates(job, framebuffer);
-}
-
-static void
-emit_copy_image_rcl(struct v3dv_job *job,
- struct v3dv_image *dst,
- struct v3dv_image *src,
- struct framebuffer_data *framebuffer,
- const VkImageCopy *region)
-{
- struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
- v3dv_return_if_oom(NULL, job);
-
- for (int layer = 0; layer < job->frame_tiling.layers; layer++)
- emit_copy_image_layer(job, dst, src, framebuffer, layer, region);
- cl_emit(rcl, END_OF_RENDERING, end);
-}
-
-/* Disable level 0 write, just write following mipmaps */
-#define V3D_TFU_IOA_DIMTW (1 << 0)
-#define V3D_TFU_IOA_FORMAT_SHIFT 3
-#define V3D_TFU_IOA_FORMAT_LINEARTILE 3
-#define V3D_TFU_IOA_FORMAT_UBLINEAR_1_COLUMN 4
-#define V3D_TFU_IOA_FORMAT_UBLINEAR_2_COLUMN 5
-#define V3D_TFU_IOA_FORMAT_UIF_NO_XOR 6
-#define V3D_TFU_IOA_FORMAT_UIF_XOR 7
-
-#define V3D_TFU_ICFG_NUMMM_SHIFT 5
-#define V3D_TFU_ICFG_TTYPE_SHIFT 9
-
-#define V3D_TFU_ICFG_OPAD_SHIFT 22
-
-#define V3D_TFU_ICFG_FORMAT_SHIFT 18
-#define V3D_TFU_ICFG_FORMAT_RASTER 0
-#define V3D_TFU_ICFG_FORMAT_SAND_128 1
-#define V3D_TFU_ICFG_FORMAT_SAND_256 2
-#define V3D_TFU_ICFG_FORMAT_LINEARTILE 11
-#define V3D_TFU_ICFG_FORMAT_UBLINEAR_1_COLUMN 12
-#define V3D_TFU_ICFG_FORMAT_UBLINEAR_2_COLUMN 13
-#define V3D_TFU_ICFG_FORMAT_UIF_NO_XOR 14
-#define V3D_TFU_ICFG_FORMAT_UIF_XOR 15
-
-static void
-emit_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_image *dst,
- uint32_t dst_mip_level,
- uint32_t dst_layer,
- struct v3dv_image *src,
- uint32_t src_mip_level,
- uint32_t src_layer,
- uint32_t width,
- uint32_t height,
- const struct v3dv_format *format)
-{
- const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level];
- const struct v3d_resource_slice *dst_slice = &dst->slices[dst_mip_level];
-
- assert(dst->mem && dst->mem->bo);
- const struct v3dv_bo *dst_bo = dst->mem->bo;
-
- assert(src->mem && src->mem->bo);
- const struct v3dv_bo *src_bo = src->mem->bo;
-
- struct drm_v3d_submit_tfu tfu = {
- .ios = (height << 16) | width,
- .bo_handles = {
- dst_bo->handle,
- src_bo->handle != dst_bo->handle ? src_bo->handle : 0
- },
- };
-
- const uint32_t src_offset =
- src_bo->offset + v3dv_layer_offset(src, src_mip_level, src_layer);
- tfu.iia |= src_offset;
-
- uint32_t icfg;
- if (src_slice->tiling == VC5_TILING_RASTER) {
- icfg = V3D_TFU_ICFG_FORMAT_RASTER;
- } else {
- icfg = V3D_TFU_ICFG_FORMAT_LINEARTILE +
- (src_slice->tiling - VC5_TILING_LINEARTILE);
- }
- tfu.icfg |= icfg << V3D_TFU_ICFG_FORMAT_SHIFT;
-
- const uint32_t dst_offset =
- dst_bo->offset + v3dv_layer_offset(dst, dst_mip_level, dst_layer);
- tfu.ioa |= dst_offset;
-
- tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE +
- (dst_slice->tiling - VC5_TILING_LINEARTILE)) <<
- V3D_TFU_IOA_FORMAT_SHIFT;
- tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT;
-
- switch (src_slice->tiling) {
- case VC5_TILING_UIF_NO_XOR:
- case VC5_TILING_UIF_XOR:
- tfu.iis |= src_slice->padded_height / (2 * v3d_utile_height(src->cpp));
- break;
- case VC5_TILING_RASTER:
- tfu.iis |= src_slice->stride / src->cpp;
- break;
- default:
- break;
- }
-
- /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
- * OPAD field for the destination (how many extra UIF blocks beyond
- * those necessary to cover the height).
- */
- if (dst_slice->tiling == VC5_TILING_UIF_NO_XOR ||
- dst_slice->tiling == VC5_TILING_UIF_XOR) {
- uint32_t uif_block_h = 2 * v3d_utile_height(dst->cpp);
- uint32_t implicit_padded_height = align(height, uif_block_h);
- uint32_t icfg =
- (dst_slice->padded_height - implicit_padded_height) / uif_block_h;
- tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT;
- }
-
- v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);
-}
-
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
@@ -1536,17 +758,17 @@ static bool
copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *dst,
struct v3dv_image *src,
- const VkImageCopy *region)
+ const VkImageCopy2KHR *region)
{
/* Destination can't be raster format */
- if (dst->tiling == VK_IMAGE_TILING_LINEAR)
+ if (dst->vk.tiling == VK_IMAGE_TILING_LINEAR)
return false;
/* We can only do full copies, so if the format is D24S8 both aspects need
* to be copied. We only need to check the dst format because the spec
* states that depth/stencil formats must match exactly.
*/
- if (dst->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
+ if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
VK_IMAGE_ASPECT_STENCIL_BIT;
if (region->dstSubresource.aspectMask != ds_aspects)
@@ -1562,8 +784,8 @@ copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
* checking against the region dimensions, which are in units of the source
* image format.
*/
- if (vk_format_is_compressed(dst->vk_format) !=
- vk_format_is_compressed(src->vk_format)) {
+ if (vk_format_is_compressed(dst->vk.format) !=
+ vk_format_is_compressed(src->vk.format)) {
return false;
}
@@ -1576,8 +798,8 @@ copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
return false;
const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
- uint32_t dst_width = u_minify(dst->extent.width, dst_mip_level);
- uint32_t dst_height = u_minify(dst->extent.height, dst_mip_level);
+ uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level);
+ uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level);
if (region->extent.width != dst_width || region->extent.height != dst_height)
return false;
@@ -1587,15 +809,15 @@ copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
* members represent the texel dimensions of the source image and not
* the destination."
*/
- const uint32_t block_w = vk_format_get_blockwidth(src->vk_format);
- const uint32_t block_h = vk_format_get_blockheight(src->vk_format);
+ const uint32_t block_w = vk_format_get_blockwidth(src->vk.format);
+ const uint32_t block_h = vk_format_get_blockheight(src->vk.format);
uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
/* Account for sample count */
- assert(dst->samples == src->samples);
- if (dst->samples > VK_SAMPLE_COUNT_1_BIT) {
- assert(dst->samples == VK_SAMPLE_COUNT_4_BIT);
+ assert(dst->vk.samples == src->vk.samples);
+ if (dst->vk.samples > VK_SAMPLE_COUNT_1_BIT) {
+ assert(dst->vk.samples == VK_SAMPLE_COUNT_4_BIT);
width *= 2;
height *= 2;
}
@@ -1614,24 +836,24 @@ copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
*/
assert(dst->cpp == src->cpp);
const struct v3dv_format *format =
- v3dv_get_compatible_tfu_format(&cmd_buffer->device->devinfo,
+ v3dv_get_compatible_tfu_format(cmd_buffer->device,
dst->cpp, NULL);
/* Emit a TFU job for each layer to blit */
- const uint32_t layer_count = dst->type != VK_IMAGE_TYPE_3D ?
+ const uint32_t layer_count = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
region->dstSubresource.layerCount :
region->extent.depth;
const uint32_t src_mip_level = region->srcSubresource.mipLevel;
- const uint32_t base_src_layer = src->type != VK_IMAGE_TYPE_3D ?
+ const uint32_t base_src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
region->srcSubresource.baseArrayLayer : region->srcOffset.z;
- const uint32_t base_dst_layer = dst->type != VK_IMAGE_TYPE_3D ?
+ const uint32_t base_dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
region->dstSubresource.baseArrayLayer : region->dstOffset.z;
for (uint32_t i = 0; i < layer_count; i++) {
- emit_tfu_job(cmd_buffer,
- dst, dst_mip_level, base_dst_layer + i,
- src, src_mip_level, base_src_layer + i,
- width, height, format);
+ v3dv_X(cmd_buffer->device, meta_emit_tfu_job)
+ (cmd_buffer, dst, dst_mip_level, base_dst_layer + i,
+ src, src_mip_level, base_src_layer + i,
+ width, height, format);
}
return true;
@@ -1645,11 +867,11 @@ static bool
copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *dst,
struct v3dv_image *src,
- const VkImageCopy *region)
+ const VkImageCopy2KHR *region)
{
VkFormat fb_format;
- if (!can_use_tlb(src, &region->srcOffset, &fb_format) ||
- !can_use_tlb(dst, &region->dstOffset, &fb_format)) {
+ if (!v3dv_meta_can_use_tlb(src, &region->srcOffset, &fb_format) ||
+ !v3dv_meta_can_use_tlb(dst, &region->dstOffset, &fb_format)) {
return false;
}
@@ -1662,9 +884,9 @@ copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
assert(region->dstSubresource.aspectMask ==
region->srcSubresource.aspectMask);
uint32_t internal_type, internal_bpp;
- get_internal_type_bpp_for_image_aspects(fb_format,
- region->dstSubresource.aspectMask,
- &internal_type, &internal_bpp);
+ v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
+ (fb_format, region->dstSubresource.aspectMask,
+ &internal_type, &internal_bpp);
/* From the Vulkan spec with VK_KHR_maintenance1, VkImageCopy valid usage:
*
@@ -1672,12 +894,12 @@ copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
* srcSubresource (for non-3D) must match the number of slices of the
* extent (for 3D) or layers of the dstSubresource (for non-3D)."
*/
- assert((src->type != VK_IMAGE_TYPE_3D ?
+ assert((src->vk.image_type != VK_IMAGE_TYPE_3D ?
region->srcSubresource.layerCount : region->extent.depth) ==
- (dst->type != VK_IMAGE_TYPE_3D ?
+ (dst->vk.image_type != VK_IMAGE_TYPE_3D ?
region->dstSubresource.layerCount : region->extent.depth));
uint32_t num_layers;
- if (dst->type != VK_IMAGE_TYPE_3D)
+ if (dst->vk.image_type != VK_IMAGE_TYPE_3D)
num_layers = region->dstSubresource.layerCount;
else
num_layers = region->extent.depth;
@@ -1689,20 +911,20 @@ copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
return true;
/* Handle copy to compressed image using compatible format */
- const uint32_t block_w = vk_format_get_blockwidth(dst->vk_format);
- const uint32_t block_h = vk_format_get_blockheight(dst->vk_format);
+ const uint32_t block_w = vk_format_get_blockwidth(dst->vk.format);
+ const uint32_t block_h = vk_format_get_blockheight(dst->vk.format);
const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
- v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp,
- src->samples > VK_SAMPLE_COUNT_1_BIT);
+ v3dv_job_start_frame(job, width, height, num_layers, false, 1, internal_bpp,
+ src->vk.samples > VK_SAMPLE_COUNT_1_BIT);
- struct framebuffer_data framebuffer;
- setup_framebuffer_data(&framebuffer, fb_format, internal_type,
- &job->frame_tiling);
+ struct v3dv_meta_framebuffer framebuffer;
+ v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
+ internal_type, &job->frame_tiling);
- v3dv_job_emit_binning_flush(job);
- emit_copy_image_rcl(job, dst, src, &framebuffer, region);
+ v3dv_X(job->device, job_emit_binning_flush)(job);
+ v3dv_X(job->device, meta_emit_copy_image_rcl)(job, dst, src, &framebuffer, region);
v3dv_cmd_buffer_finish_job(cmd_buffer);
@@ -1734,18 +956,18 @@ create_image_alias(struct v3dv_cmd_buffer *cmd_buffer,
VkImageCreateInfo info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
- .imageType = src->type,
+ .imageType = src->vk.image_type,
.format = format,
.extent = {
- .width = src->extent.width * width_scale,
- .height = src->extent.height * height_scale,
- .depth = src->extent.depth,
+ .width = src->vk.extent.width * width_scale,
+ .height = src->vk.extent.height * height_scale,
+ .depth = src->vk.extent.depth,
},
- .mipLevels = src->levels,
- .arrayLayers = src->array_size,
- .samples = src->samples,
- .tiling = src->tiling,
- .usage = src->usage,
+ .mipLevels = src->vk.mip_levels,
+ .arrayLayers = src->vk.array_layers,
+ .samples = src->vk.samples,
+ .tiling = src->vk.tiling,
+ .usage = src->vk.usage,
};
VkImage _image;
@@ -1770,12 +992,12 @@ static bool
copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *dst,
struct v3dv_image *src,
- const VkImageCopy *region)
+ const VkImageCopy2KHR *region)
{
- const uint32_t src_block_w = vk_format_get_blockwidth(src->vk_format);
- const uint32_t src_block_h = vk_format_get_blockheight(src->vk_format);
- const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk_format);
- const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk_format);
+ const uint32_t src_block_w = vk_format_get_blockwidth(src->vk.format);
+ const uint32_t src_block_h = vk_format_get_blockheight(src->vk.format);
+ const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk.format);
+ const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk.format);
const float block_scale_w = (float)src_block_w / (float)dst_block_w;
const float block_scale_h = (float)src_block_h / (float)dst_block_h;
@@ -1789,7 +1011,7 @@ copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
float src_scale_h = 1.0f;
float dst_scale_w = block_scale_w;
float dst_scale_h = block_scale_h;
- if (vk_format_is_compressed(src->vk_format)) {
+ if (vk_format_is_compressed(src->vk.format)) {
/* If we are copying from a compressed format we should be aware that we
* are going to texture from the source image, and the texture setup
* knows the actual size of the image, so we need to choose a format
@@ -1813,18 +1035,13 @@ copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
*/
assert(src->cpp == dst->cpp);
- uint32_t divisor_w, divisor_h;
format = VK_FORMAT_R32G32_UINT;
switch (src->cpp) {
case 16:
format = VK_FORMAT_R32G32B32A32_UINT;
- divisor_w = 4;
- divisor_h = 4;
break;
case 8:
format = VK_FORMAT_R16G16B16A16_UINT;
- divisor_w = 4;
- divisor_h = 4;
break;
default:
unreachable("Unsupported compressed format");
@@ -1833,10 +1050,10 @@ copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
/* Create image views of the src/dst images that we can interpret in
* terms of the canonical format.
*/
- src_scale_w /= divisor_w;
- src_scale_h /= divisor_h;
- dst_scale_w /= divisor_w;
- dst_scale_h /= divisor_h;
+ src_scale_w /= src_block_w;
+ src_scale_h /= src_block_h;
+ dst_scale_w /= src_block_w;
+ dst_scale_h /= src_block_h;
src = create_image_alias(cmd_buffer, src,
src_scale_w, src_scale_h, format);
@@ -1845,11 +1062,11 @@ copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
dst_scale_w, dst_scale_h, format);
} else {
format = src->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ?
- src->vk_format : get_compatible_tlb_format(src->vk_format);
+ src->vk.format : get_compatible_tlb_format(src->vk.format);
if (format == VK_FORMAT_UNDEFINED)
return false;
- const struct v3dv_format *f = v3dv_get_format(format);
+ const struct v3dv_format *f = v3dv_X(cmd_buffer->device, get_format)(format);
if (!f->supported || f->tex_type == TEXTURE_DATA_FORMAT_NO)
return false;
}
@@ -1895,7 +1112,8 @@ copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
dst_start.z + region->extent.depth,
};
- const VkImageBlit blit_region = {
+ const VkImageBlit2KHR blit_region = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
.srcSubresource = region->srcSubresource,
.srcOffsets = { src_start, src_end },
.dstSubresource = region->dstSubresource,
@@ -1912,466 +1130,42 @@ copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
return handled;
}
-void
-v3dv_CmdCopyImage(VkCommandBuffer commandBuffer,
- VkImage srcImage,
- VkImageLayout srcImageLayout,
- VkImage dstImage,
- VkImageLayout dstImageLayout,
- uint32_t regionCount,
- const VkImageCopy *pRegions)
+VKAPI_ATTR void VKAPI_CALL
+v3dv_CmdCopyImage2KHR(VkCommandBuffer commandBuffer,
+ const VkCopyImageInfo2KHR *info)
+
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
- V3DV_FROM_HANDLE(v3dv_image, src, srcImage);
- V3DV_FROM_HANDLE(v3dv_image, dst, dstImage);
+ V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
+ V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
- assert(src->samples == dst->samples);
+ assert(src->vk.samples == dst->vk.samples);
- for (uint32_t i = 0; i < regionCount; i++) {
- if (copy_image_tfu(cmd_buffer, dst, src, &pRegions[i]))
+ for (uint32_t i = 0; i < info->regionCount; i++) {
+ if (copy_image_tfu(cmd_buffer, dst, src, &info->pRegions[i]))
continue;
- if (copy_image_tlb(cmd_buffer, dst, src, &pRegions[i]))
+ if (copy_image_tlb(cmd_buffer, dst, src, &info->pRegions[i]))
continue;
- if (copy_image_blit(cmd_buffer, dst, src, &pRegions[i]))
+ if (copy_image_blit(cmd_buffer, dst, src, &info->pRegions[i]))
continue;
unreachable("Image copy not supported");
}
}
-static void
-emit_clear_image_per_tile_list(struct v3dv_job *job,
- struct framebuffer_data *framebuffer,
- struct v3dv_image *image,
- VkImageAspectFlags aspects,
- uint32_t layer,
- uint32_t level)
-{
- struct v3dv_cl *cl = &job->indirect;
- v3dv_cl_ensure_space(cl, 200, 1);
- v3dv_return_if_oom(NULL, job);
-
- struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
-
- cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
-
- cl_emit(cl, END_OF_LOADS, end);
-
- cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
-
- emit_image_store(cl, framebuffer, image, aspects, layer, level,
- false, false, false);
-
- cl_emit(cl, END_OF_TILE_MARKER, end);
-
- cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
-
- cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
- branch.start = tile_list_start;
- branch.end = v3dv_cl_get_address(cl);
- }
-}
-
-static void
-emit_clear_image(struct v3dv_job *job,
- struct v3dv_image *image,
- struct framebuffer_data *framebuffer,
- VkImageAspectFlags aspects,
- uint32_t layer,
- uint32_t level)
-{
- emit_clear_image_per_tile_list(job, framebuffer, image, aspects, layer, level);
- emit_supertile_coordinates(job, framebuffer);
-}
-
-static void
-emit_clear_image_rcl(struct v3dv_job *job,
- struct v3dv_image *image,
- struct framebuffer_data *framebuffer,
- const union v3dv_clear_value *clear_value,
- VkImageAspectFlags aspects,
- uint32_t layer,
- uint32_t level)
-{
- const struct rcl_clear_info clear_info = {
- .clear_value = clear_value,
- .image = image,
- .aspects = aspects,
- .layer = layer,
- .level = level,
- };
-
- struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
- v3dv_return_if_oom(NULL, job);
-
- emit_frame_setup(job, 0, clear_value);
- emit_clear_image(job, image, framebuffer, aspects, layer, level);
- cl_emit(rcl, END_OF_RENDERING, end);
-}
-
-static void
-get_hw_clear_color(const VkClearColorValue *color,
- VkFormat fb_format,
- VkFormat image_format,
- uint32_t internal_type,
- uint32_t internal_bpp,
- uint32_t *hw_color)
-{
- const uint32_t internal_size = 4 << internal_bpp;
-
- /* If the image format doesn't match the framebuffer format, then we are
- * trying to clear an unsupported tlb format using a compatible
- * format for the framebuffer. In this case, we want to make sure that
- * we pack the clear value according to the original format semantics,
- * not the compatible format.
- */
- if (fb_format == image_format) {
- v3dv_get_hw_clear_color(color, internal_type, internal_size, hw_color);
- } else {
- union util_color uc;
- enum pipe_format pipe_image_format =
- vk_format_to_pipe_format(image_format);
- util_pack_color(color->float32, pipe_image_format, &uc);
- memcpy(hw_color, uc.ui, internal_size);
- }
-}
-
-/* Returns true if the implementation is able to handle the case, false
- * otherwise.
-*/
-static bool
-clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_image *image,
- const VkClearValue *clear_value,
- const VkImageSubresourceRange *range)
-{
- const VkOffset3D origin = { 0, 0, 0 };
- VkFormat fb_format;
- if (!can_use_tlb(image, &origin, &fb_format))
- return false;
-
- uint32_t internal_type, internal_bpp;
- get_internal_type_bpp_for_image_aspects(fb_format, range->aspectMask,
- &internal_type, &internal_bpp);
-
- union v3dv_clear_value hw_clear_value = { 0 };
- if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
- get_hw_clear_color(&clear_value->color, fb_format, image->vk_format,
- internal_type, internal_bpp, &hw_clear_value.color[0]);
- } else {
- assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) ||
- (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT));
- hw_clear_value.z = clear_value->depthStencil.depth;
- hw_clear_value.s = clear_value->depthStencil.stencil;
- }
-
- uint32_t level_count = range->levelCount == VK_REMAINING_MIP_LEVELS ?
- image->levels - range->baseMipLevel :
- range->levelCount;
- uint32_t min_level = range->baseMipLevel;
- uint32_t max_level = range->baseMipLevel + level_count;
-
- /* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively.
- * Instead, we need to consider the full depth dimension of the image, which
- * goes from 0 up to the level's depth extent.
- */
- uint32_t min_layer;
- uint32_t max_layer;
- if (image->type != VK_IMAGE_TYPE_3D) {
- uint32_t layer_count = range->layerCount == VK_REMAINING_ARRAY_LAYERS ?
- image->array_size - range->baseArrayLayer :
- range->layerCount;
- min_layer = range->baseArrayLayer;
- max_layer = range->baseArrayLayer + layer_count;
- } else {
- min_layer = 0;
- max_layer = 0;
- }
-
- for (uint32_t level = min_level; level < max_level; level++) {
- if (image->type == VK_IMAGE_TYPE_3D)
- max_layer = u_minify(image->extent.depth, level);
- for (uint32_t layer = min_layer; layer < max_layer; layer++) {
- uint32_t width = u_minify(image->extent.width, level);
- uint32_t height = u_minify(image->extent.height, level);
-
- struct v3dv_job *job =
- v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
-
- if (!job)
- return true;
-
- /* We start a a new job for each layer so the frame "depth" is 1 */
- v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp,
- image->samples > VK_SAMPLE_COUNT_1_BIT);
-
- struct framebuffer_data framebuffer;
- setup_framebuffer_data(&framebuffer, fb_format, internal_type,
- &job->frame_tiling);
-
- v3dv_job_emit_binning_flush(job);
-
- /* If this triggers it is an application bug: the spec requires
- * that any aspects to clear are present in the image.
- */
- assert(range->aspectMask & image->aspects);
-
- emit_clear_image_rcl(job, image, &framebuffer, &hw_clear_value,
- range->aspectMask, layer, level);
-
- v3dv_cmd_buffer_finish_job(cmd_buffer);
- }
- }
-
- return true;
-}
-
-void
-v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,
- VkImage _image,
- VkImageLayout imageLayout,
- const VkClearColorValue *pColor,
- uint32_t rangeCount,
- const VkImageSubresourceRange *pRanges)
-{
- V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
- V3DV_FROM_HANDLE(v3dv_image, image, _image);
-
- const VkClearValue clear_value = {
- .color = *pColor,
- };
-
- for (uint32_t i = 0; i < rangeCount; i++) {
- if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
- continue;
- unreachable("Unsupported color clear.");
- }
-}
-
-void
-v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
- VkImage _image,
- VkImageLayout imageLayout,
- const VkClearDepthStencilValue *pDepthStencil,
- uint32_t rangeCount,
- const VkImageSubresourceRange *pRanges)
+VKAPI_ATTR void VKAPI_CALL
+v3dv_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,
+ const VkCopyBufferInfo2KHR *pCopyBufferInfo)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
- V3DV_FROM_HANDLE(v3dv_image, image, _image);
-
- const VkClearValue clear_value = {
- .depthStencil = *pDepthStencil,
- };
-
- for (uint32_t i = 0; i < rangeCount; i++) {
- if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
- continue;
- unreachable("Unsupported depth/stencil clear.");
- }
-}
-
-static void
-emit_copy_buffer_per_tile_list(struct v3dv_job *job,
- struct v3dv_bo *dst,
- struct v3dv_bo *src,
- uint32_t dst_offset,
- uint32_t src_offset,
- uint32_t stride,
- uint32_t format)
-{
- struct v3dv_cl *cl = &job->indirect;
- v3dv_cl_ensure_space(cl, 200, 1);
- v3dv_return_if_oom(NULL, job);
-
- struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
-
- cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
-
- emit_linear_load(cl, RENDER_TARGET_0, src, src_offset, stride, format);
-
- cl_emit(cl, END_OF_LOADS, end);
-
- cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
-
- emit_linear_store(cl, RENDER_TARGET_0,
- dst, dst_offset, stride, false, format);
-
- cl_emit(cl, END_OF_TILE_MARKER, end);
-
- cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
-
- cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
- branch.start = tile_list_start;
- branch.end = v3dv_cl_get_address(cl);
- }
-}
-
-static void
-emit_copy_buffer(struct v3dv_job *job,
- struct v3dv_bo *dst,
- struct v3dv_bo *src,
- uint32_t dst_offset,
- uint32_t src_offset,
- struct framebuffer_data *framebuffer,
- uint32_t format,
- uint32_t item_size)
-{
- const uint32_t stride = job->frame_tiling.width * item_size;
- emit_copy_buffer_per_tile_list(job, dst, src,
- dst_offset, src_offset,
- stride, format);
- emit_supertile_coordinates(job, framebuffer);
-}
-
-static void
-emit_copy_buffer_rcl(struct v3dv_job *job,
- struct v3dv_bo *dst,
- struct v3dv_bo *src,
- uint32_t dst_offset,
- uint32_t src_offset,
- struct framebuffer_data *framebuffer,
- uint32_t format,
- uint32_t item_size)
-{
- struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
- v3dv_return_if_oom(NULL, job);
-
- emit_frame_setup(job, 0, NULL);
-
- emit_copy_buffer(job, dst, src, dst_offset, src_offset,
- framebuffer, format, item_size);
-
- cl_emit(rcl, END_OF_RENDERING, end);
-}
-
-/* Figure out a TLB size configuration for a number of pixels to process.
- * Beware that we can't "render" more than 4096x4096 pixels in a single job,
- * if the pixel count is larger than this, the caller might need to split
- * the job and call this function multiple times.
- */
-static void
-framebuffer_size_for_pixel_count(uint32_t num_pixels,
- uint32_t *width,
- uint32_t *height)
-{
- assert(num_pixels > 0);
-
- const uint32_t max_dim_pixels = 4096;
- const uint32_t max_pixels = max_dim_pixels * max_dim_pixels;
-
- uint32_t w, h;
- if (num_pixels > max_pixels) {
- w = max_dim_pixels;
- h = max_dim_pixels;
- } else {
- w = num_pixels;
- h = 1;
- while (w > max_dim_pixels || ((w % 2) == 0 && w > 2 * h)) {
- w >>= 1;
- h <<= 1;
- }
- }
- assert(w <= max_dim_pixels && h <= max_dim_pixels);
- assert(w * h <= num_pixels);
- assert(w > 0 && h > 0);
-
- *width = w;
- *height = h;
-}
-
-static struct v3dv_job *
-copy_buffer(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_bo *dst,
- uint32_t dst_offset,
- struct v3dv_bo *src,
- uint32_t src_offset,
- const VkBufferCopy *region)
-{
- const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
- const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
-
- /* Select appropriate pixel format for the copy operation based on the
- * size to copy and the alignment of the source and destination offsets.
- */
- src_offset += region->srcOffset;
- dst_offset += region->dstOffset;
- uint32_t item_size = 4;
- while (item_size > 1 &&
- (src_offset % item_size != 0 || dst_offset % item_size != 0)) {
- item_size /= 2;
- }
-
- while (item_size > 1 && region->size % item_size != 0)
- item_size /= 2;
-
- assert(region->size % item_size == 0);
- uint32_t num_items = region->size / item_size;
- assert(num_items > 0);
-
- uint32_t format;
- VkFormat vk_format;
- switch (item_size) {
- case 4:
- format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
- vk_format = VK_FORMAT_R8G8B8A8_UINT;
- break;
- case 2:
- format = V3D_OUTPUT_IMAGE_FORMAT_RG8UI;
- vk_format = VK_FORMAT_R8G8_UINT;
- break;
- default:
- format = V3D_OUTPUT_IMAGE_FORMAT_R8UI;
- vk_format = VK_FORMAT_R8_UINT;
- break;
- }
-
- struct v3dv_job *job = NULL;
- while (num_items > 0) {
- job = v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
- if (!job)
- return NULL;
-
- uint32_t width, height;
- framebuffer_size_for_pixel_count(num_items, &width, &height);
-
- v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp, false);
-
- struct framebuffer_data framebuffer;
- setup_framebuffer_data(&framebuffer, vk_format, internal_type,
- &job->frame_tiling);
-
- v3dv_job_emit_binning_flush(job);
-
- emit_copy_buffer_rcl(job, dst, src, dst_offset, src_offset,
- &framebuffer, format, item_size);
-
- v3dv_cmd_buffer_finish_job(cmd_buffer);
-
- const uint32_t items_copied = width * height;
- const uint32_t bytes_copied = items_copied * item_size;
- num_items -= items_copied;
- src_offset += bytes_copied;
- dst_offset += bytes_copied;
- }
-
- return job;
-}
-
-void
-v3dv_CmdCopyBuffer(VkCommandBuffer commandBuffer,
- VkBuffer srcBuffer,
- VkBuffer dstBuffer,
- uint32_t regionCount,
- const VkBufferCopy *pRegions)
-{
- V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
- V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, srcBuffer);
- V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
-
- for (uint32_t i = 0; i < regionCount; i++) {
- copy_buffer(cmd_buffer,
- dst_buffer->mem->bo, dst_buffer->mem_offset,
- src_buffer->mem->bo, src_buffer->mem_offset,
- &pRegions[i]);
+ V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
+ V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
+
+ for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
+ v3dv_X(cmd_buffer->device, meta_copy_buffer)
+ (cmd_buffer,
+ dst_buffer->mem->bo, dst_buffer->mem_offset,
+ src_buffer->mem->bo, src_buffer->mem_offset,
+ &pCopyBufferInfo->pRegions[i]);
}
}
@@ -2385,7 +1179,7 @@ destroy_update_buffer_cb(VkDevice _device,
v3dv_bo_free(device, bo);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
@@ -2412,16 +1206,17 @@ v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
v3dv_bo_unmap(cmd_buffer->device, src_bo);
- VkBufferCopy region = {
+ VkBufferCopy2KHR region = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2_KHR,
.srcOffset = 0,
.dstOffset = dstOffset,
.size = dataSize,
};
struct v3dv_job *copy_job =
- copy_buffer(cmd_buffer,
- dst_buffer->mem->bo, dst_buffer->mem_offset,
- src_bo, 0,
- &region);
+ v3dv_X(cmd_buffer->device, meta_copy_buffer)
+ (cmd_buffer, dst_buffer->mem->bo, dst_buffer->mem_offset,
+ src_bo, 0, &region);
+
if (!copy_job)
return;
@@ -2429,118 +1224,7 @@ v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb);
}
-static void
-emit_fill_buffer_per_tile_list(struct v3dv_job *job,
- struct v3dv_bo *bo,
- uint32_t offset,
- uint32_t stride)
-{
- struct v3dv_cl *cl = &job->indirect;
- v3dv_cl_ensure_space(cl, 200, 1);
- v3dv_return_if_oom(NULL, job);
-
- struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
-
- cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
-
- cl_emit(cl, END_OF_LOADS, end);
-
- cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
-
- emit_linear_store(cl, RENDER_TARGET_0, bo, offset, stride, false,
- V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI);
-
- cl_emit(cl, END_OF_TILE_MARKER, end);
-
- cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
-
- cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
- branch.start = tile_list_start;
- branch.end = v3dv_cl_get_address(cl);
- }
-}
-
-static void
-emit_fill_buffer(struct v3dv_job *job,
- struct v3dv_bo *bo,
- uint32_t offset,
- struct framebuffer_data *framebuffer)
-{
- const uint32_t stride = job->frame_tiling.width * 4;
- emit_fill_buffer_per_tile_list(job, bo, offset, stride);
- emit_supertile_coordinates(job, framebuffer);
-}
-
-static void
-emit_fill_buffer_rcl(struct v3dv_job *job,
- struct v3dv_bo *bo,
- uint32_t offset,
- struct framebuffer_data *framebuffer,
- uint32_t data)
-{
- const union v3dv_clear_value clear_value = {
- .color = { data, 0, 0, 0 },
- };
-
- const struct rcl_clear_info clear_info = {
- .clear_value = &clear_value,
- .image = NULL,
- .aspects = VK_IMAGE_ASPECT_COLOR_BIT,
- .layer = 0,
- .level = 0,
- };
-
- struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
- v3dv_return_if_oom(NULL, job);
-
- emit_frame_setup(job, 0, &clear_value);
- emit_fill_buffer(job, bo, offset, framebuffer);
- cl_emit(rcl, END_OF_RENDERING, end);
-}
-
-static void
-fill_buffer(struct v3dv_cmd_buffer *cmd_buffer,
- struct v3dv_bo *bo,
- uint32_t offset,
- uint32_t size,
- uint32_t data)
-{
- assert(size > 0 && size % 4 == 0);
- assert(offset + size <= bo->size);
-
- const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
- const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
- uint32_t num_items = size / 4;
-
- while (num_items > 0) {
- struct v3dv_job *job =
- v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
- if (!job)
- return;
-
- uint32_t width, height;
- framebuffer_size_for_pixel_count(num_items, &width, &height);
-
- v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp, false);
-
- struct framebuffer_data framebuffer;
- setup_framebuffer_data(&framebuffer, VK_FORMAT_R8G8B8A8_UINT,
- internal_type, &job->frame_tiling);
-
- v3dv_job_emit_binning_flush(job);
-
- emit_fill_buffer_rcl(job, bo, offset, &framebuffer, data);
-
- v3dv_cmd_buffer_finish_job(cmd_buffer);
-
- const uint32_t items_copied = width * height;
- const uint32_t bytes_copied = items_copied * 4;
- num_items -= items_copied;
- offset += bytes_copied;
- }
-}
-
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
@@ -2562,7 +1246,8 @@ v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,
size -= size % 4;
}
- fill_buffer(cmd_buffer, bo, dstOffset, size, data);
+ v3dv_X(cmd_buffer->device, meta_fill_buffer)
+ (cmd_buffer, bo, dstOffset, size, data);
}
/**
@@ -2573,12 +1258,12 @@ static bool
copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
- const VkBufferImageCopy *region)
+ const VkBufferImageCopy2KHR *region)
{
- assert(image->samples == VK_SAMPLE_COUNT_1_BIT);
+ assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
/* Destination can't be raster format */
- if (image->tiling == VK_IMAGE_TILING_LINEAR)
+ if (image->vk.tiling == VK_IMAGE_TILING_LINEAR)
return false;
/* We can't copy D24S8 because buffer to image copies only copy one aspect
@@ -2588,8 +1273,8 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
* is not a straight copy, we would havew to swizzle the channels, which the
* TFU can't do.
*/
- if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
- image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) {
+ if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
+ image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
return false;
}
@@ -2610,12 +1295,12 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
else
height = region->bufferImageHeight;
- if (width != image->extent.width || height != image->extent.height)
+ if (width != image->vk.extent.width || height != image->vk.extent.height)
return false;
/* Handle region semantics for compressed images */
- const uint32_t block_w = vk_format_get_blockwidth(image->vk_format);
- const uint32_t block_h = vk_format_get_blockheight(image->vk_format);
+ const uint32_t block_w = vk_format_get_blockwidth(image->vk.format);
+ const uint32_t block_h = vk_format_get_blockheight(image->vk.format);
width = DIV_ROUND_UP(width, block_w);
height = DIV_ROUND_UP(height, block_h);
@@ -2625,14 +1310,14 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
* texel size instead, which expands the list of formats we can handle here.
*/
const struct v3dv_format *format =
- v3dv_get_compatible_tfu_format(&cmd_buffer->device->devinfo,
+ v3dv_get_compatible_tfu_format(cmd_buffer->device,
image->cpp, NULL);
const uint32_t mip_level = region->imageSubresource.mipLevel;
const struct v3d_resource_slice *slice = &image->slices[mip_level];
uint32_t num_layers;
- if (image->type != VK_IMAGE_TYPE_3D)
+ if (image->vk.image_type != VK_IMAGE_TYPE_3D)
num_layers = region->imageSubresource.layerCount;
else
num_layers = region->imageExtent.depth;
@@ -2647,7 +1332,11 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
/* Emit a TFU job per layer to copy */
const uint32_t buffer_stride = width * image->cpp;
for (int i = 0; i < num_layers; i++) {
- uint32_t layer = region->imageSubresource.baseArrayLayer + i;
+ uint32_t layer;
+ if (image->vk.image_type != VK_IMAGE_TYPE_3D)
+ layer = region->imageSubresource.baseArrayLayer + i;
+ else
+ layer = region->imageOffset.z + i;
struct drm_v3d_submit_tfu tfu = {
.ios = (height << 16) | width,
@@ -2671,7 +1360,7 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
tfu.ioa |= dst_offset;
tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE +
- (slice->tiling - VC5_TILING_LINEARTILE)) <<
+ (slice->tiling - V3D_TILING_LINEARTILE)) <<
V3D_TFU_IOA_FORMAT_SHIFT;
tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT;
@@ -2679,8 +1368,8 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
* OPAD field for the destination (how many extra UIF blocks beyond
* those necessary to cover the height).
*/
- if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
- slice->tiling == VC5_TILING_UIF_XOR) {
+ if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
+ slice->tiling == V3D_TILING_UIF_XOR) {
uint32_t uif_block_h = 2 * v3d_utile_height(image->cpp);
uint32_t implicit_padded_height = align(height, uif_block_h);
uint32_t icfg =
@@ -2694,140 +1383,6 @@ copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
return true;
}
-static void
-emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job,
- struct framebuffer_data *framebuffer,
- struct v3dv_image *image,
- struct v3dv_buffer *buffer,
- uint32_t layer,
- const VkBufferImageCopy *region)
-{
- struct v3dv_cl *cl = &job->indirect;
- v3dv_cl_ensure_space(cl, 200, 1);
- v3dv_return_if_oom(NULL, job);
-
- struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
-
- cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
-
- const VkImageSubresourceLayers *imgrsc = &region->imageSubresource;
- assert((image->type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) ||
- layer < image->extent.depth);
-
- /* Load TLB from buffer */
- uint32_t width, height;
- if (region->bufferRowLength == 0)
- width = region->imageExtent.width;
- else
- width = region->bufferRowLength;
-
- if (region->bufferImageHeight == 0)
- height = region->imageExtent.height;
- else
- height = region->bufferImageHeight;
-
- /* Handle copy to compressed format using a compatible format */
- width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk_format));
- height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk_format));
-
- uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
- 1 : image->cpp;
- uint32_t buffer_stride = width * cpp;
- uint32_t buffer_offset =
- buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer;
-
- uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask,
- false, false, true);
-
- emit_linear_load(cl, RENDER_TARGET_0, buffer->mem->bo,
- buffer_offset, buffer_stride, format);
-
- /* Because we can't do raster loads/stores of Z/S formats we need to
- * use a color tile buffer with a compatible RGBA color format instead.
- * However, when we are uploading a single aspect to a combined
- * depth/stencil image we have the problem that our tile buffer stores don't
- * allow us to mask out the other aspect, so we always write all four RGBA
- * channels to the image and we end up overwriting that other aspect with
- * undefined values. To work around that, we first load the aspect we are
- * not copying from the image memory into a proper Z/S tile buffer. Then we
- * do our store from the color buffer for the aspect we are copying, and
- * after that, we do another store from the Z/S tile buffer to restore the
- * other aspect to its original value.
- */
- if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
- if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
- emit_image_load(cl, framebuffer, image, VK_IMAGE_ASPECT_STENCIL_BIT,
- imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
- false, false);
- } else {
- assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
- emit_image_load(cl, framebuffer, image, VK_IMAGE_ASPECT_DEPTH_BIT,
- imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
- false, false);
- }
- }
-
- cl_emit(cl, END_OF_LOADS, end);
-
- cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
-
- /* Store TLB to image */
- emit_image_store(cl, framebuffer, image, imgrsc->aspectMask,
- imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
- false, true, false);
-
- if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
- if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
- emit_image_store(cl, framebuffer, image, VK_IMAGE_ASPECT_STENCIL_BIT,
- imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
- false, false, false);
- } else {
- assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
- emit_image_store(cl, framebuffer, image, VK_IMAGE_ASPECT_DEPTH_BIT,
- imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
- false, false, false);
- }
- }
-
- cl_emit(cl, END_OF_TILE_MARKER, end);
-
- cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
-
- cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
- branch.start = tile_list_start;
- branch.end = v3dv_cl_get_address(cl);
- }
-}
-
-static void
-emit_copy_buffer_to_layer(struct v3dv_job *job,
- struct v3dv_image *image,
- struct v3dv_buffer *buffer,
- struct framebuffer_data *framebuffer,
- uint32_t layer,
- const VkBufferImageCopy *region)
-{
- emit_frame_setup(job, layer, NULL);
- emit_copy_buffer_to_layer_per_tile_list(job, framebuffer, image, buffer,
- layer, region);
- emit_supertile_coordinates(job, framebuffer);
-}
-
-static void
-emit_copy_buffer_to_image_rcl(struct v3dv_job *job,
- struct v3dv_image *image,
- struct v3dv_buffer *buffer,
- struct framebuffer_data *framebuffer,
- const VkBufferImageCopy *region)
-{
- struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
- v3dv_return_if_oom(NULL, job);
-
- for (int layer = 0; layer < job->frame_tiling.layers; layer++)
- emit_copy_buffer_to_layer(job, image, buffer, framebuffer, layer, region);
- cl_emit(rcl, END_OF_RENDERING, end);
-}
-
/**
* Returns true if the implementation supports the requested operation (even if
* it failed to process it, for example, due to an out-of-memory error).
@@ -2836,19 +1391,19 @@ static bool
copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
- const VkBufferImageCopy *region)
+ const VkBufferImageCopy2KHR *region)
{
VkFormat fb_format;
- if (!can_use_tlb(image, &region->imageOffset, &fb_format))
+ if (!v3dv_meta_can_use_tlb(image, &region->imageOffset, &fb_format))
return false;
uint32_t internal_type, internal_bpp;
- get_internal_type_bpp_for_image_aspects(fb_format,
- region->imageSubresource.aspectMask,
- &internal_type, &internal_bpp);
+ v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
+ (fb_format, region->imageSubresource.aspectMask,
+ &internal_type, &internal_bpp);
uint32_t num_layers;
- if (image->type != VK_IMAGE_TYPE_3D)
+ if (image->vk.image_type != VK_IMAGE_TYPE_3D)
num_layers = region->imageSubresource.layerCount;
else
num_layers = region->imageExtent.depth;
@@ -2860,19 +1415,21 @@ copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
return true;
/* Handle copy to compressed format using a compatible format */
- const uint32_t block_w = vk_format_get_blockwidth(image->vk_format);
- const uint32_t block_h = vk_format_get_blockheight(image->vk_format);
+ const uint32_t block_w = vk_format_get_blockwidth(image->vk.format);
+ const uint32_t block_h = vk_format_get_blockheight(image->vk.format);
const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
- v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, false);
+ v3dv_job_start_frame(job, width, height, num_layers, false,
+ 1, internal_bpp, false);
- struct framebuffer_data framebuffer;
- setup_framebuffer_data(&framebuffer, fb_format, internal_type,
- &job->frame_tiling);
+ struct v3dv_meta_framebuffer framebuffer;
+ v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
+ internal_type, &job->frame_tiling);
- v3dv_job_emit_binning_flush(job);
- emit_copy_buffer_to_image_rcl(job, image, buffer, &framebuffer, region);
+ v3dv_X(job->device, job_emit_binning_flush)(job);
+ v3dv_X(job->device, meta_emit_copy_buffer_to_image_rcl)
+ (job, image, buffer, &framebuffer, region);
v3dv_cmd_buffer_finish_job(cmd_buffer);
@@ -2883,7 +1440,7 @@ static bool
create_tiled_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
- const VkBufferImageCopy *region)
+ const VkBufferImageCopy2KHR *region)
{
if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, region))
return true;
@@ -2980,6 +1537,7 @@ static void
get_texel_buffer_copy_pipeline_cache_key(VkFormat format,
VkColorComponentFlags cmask,
VkComponentMapping *cswizzle,
+ bool is_layered,
uint8_t *key)
{
memset(key, 0, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
@@ -2992,6 +1550,12 @@ get_texel_buffer_copy_pipeline_cache_key(VkFormat format,
*p = cmask;
p++;
+ /* Note that that we are using a single byte for this, so we could pack
+ * more data into this 32-bit slot in the future.
+ */
+ *p = is_layered ? 1 : 0;
+ p++;
+
memcpy(p, cswizzle, sizeof(VkComponentMapping));
p += sizeof(VkComponentMapping) / sizeof(uint32_t);
@@ -3011,6 +1575,7 @@ static bool
create_pipeline(struct v3dv_device *device,
struct v3dv_render_pass *pass,
struct nir_shader *vs_nir,
+ struct nir_shader *gs_nir,
struct nir_shader *fs_nir,
const VkPipelineVertexInputStateCreateInfo *vi_state,
const VkPipelineDepthStencilStateCreateInfo *ds_state,
@@ -3036,6 +1601,71 @@ get_texel_buffer_copy_vs()
return b.shader;
}
+static nir_shader *
+get_texel_buffer_copy_gs()
+{
+ /* FIXME: this creates a geometry shader that takes the index of a single
+ * layer to clear from push constants, so we need to emit a draw call for
+ * each layer that we want to clear. We could actually do better and have it
+ * take a range of layers however, if we were to do this, we would need to
+ * be careful not to exceed the maximum number of output vertices allowed in
+ * a geometry shader.
+ */
+ const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
+ "meta texel buffer copy gs");
+ nir_shader *nir = b.shader;
+ nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
+ nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
+ (1ull << VARYING_SLOT_LAYER);
+ nir->info.gs.input_primitive = GL_TRIANGLES;
+ nir->info.gs.output_primitive = GL_TRIANGLE_STRIP;
+ nir->info.gs.vertices_in = 3;
+ nir->info.gs.vertices_out = 3;
+ nir->info.gs.invocations = 1;
+ nir->info.gs.active_stream_mask = 0x1;
+
+ /* in vec4 gl_Position[3] */
+ nir_variable *gs_in_pos =
+ nir_variable_create(b.shader, nir_var_shader_in,
+ glsl_array_type(glsl_vec4_type(), 3, 0),
+ "in_gl_Position");
+ gs_in_pos->data.location = VARYING_SLOT_POS;
+
+ /* out vec4 gl_Position */
+ nir_variable *gs_out_pos =
+ nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
+ "out_gl_Position");
+ gs_out_pos->data.location = VARYING_SLOT_POS;
+
+ /* out float gl_Layer */
+ nir_variable *gs_out_layer =
+ nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
+ "out_gl_Layer");
+ gs_out_layer->data.location = VARYING_SLOT_LAYER;
+
+ /* Emit output triangle */
+ for (uint32_t i = 0; i < 3; i++) {
+ /* gl_Position from shader input */
+ nir_deref_instr *in_pos_i =
+ nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
+ nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
+
+ /* gl_Layer from push constants */
+ nir_ssa_def *layer =
+ nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
+ .base = TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET,
+ .range = 4);
+ nir_store_var(&b, gs_out_layer, layer, 0x1);
+
+ nir_emit_vertex(&b, 0);
+ }
+
+ nir_end_primitive(&b, 0);
+
+ return nir;
+}
+
static nir_ssa_def *
load_frag_coord(nir_builder *b)
{
@@ -3101,15 +1731,21 @@ get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,
* texel buffer.
*/
nir_ssa_def *box =
- nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
+ nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0),
+ .base = TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET,
+ .range = 16);
/* Load the buffer stride (this comes in texel units) */
nir_ssa_def *stride =
- nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
+ nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
+ .base = TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET,
+ .range = 4);
/* Load the buffer offset (this comes in texel units) */
nir_ssa_def *offset =
- nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 20, .range = 4);
+ nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
+ .base = TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET,
+ .range = 4);
nir_ssa_def *coord = nir_f2i32(&b, load_frag_coord(&b));
@@ -3165,6 +1801,7 @@ create_texel_buffer_copy_pipeline(struct v3dv_device *device,
VkFormat format,
VkColorComponentFlags cmask,
VkComponentMapping *cswizzle,
+ bool is_layered,
VkRenderPass _pass,
VkPipelineLayout pipeline_layout,
VkPipeline *pipeline)
@@ -3175,6 +1812,7 @@ create_texel_buffer_copy_pipeline(struct v3dv_device *device,
nir_shader *vs_nir = get_texel_buffer_copy_vs();
nir_shader *fs_nir = get_texel_buffer_copy_fs(device, format, cswizzle);
+ nir_shader *gs_nir = is_layered ? get_texel_buffer_copy_gs() : NULL;
const VkPipelineVertexInputStateCreateInfo vi_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
@@ -3210,7 +1848,7 @@ create_texel_buffer_copy_pipeline(struct v3dv_device *device,
return create_pipeline(device,
pass,
- vs_nir, fs_nir,
+ vs_nir, gs_nir, fs_nir,
&vi_state,
&ds_state,
&cb_state,
@@ -3226,12 +1864,14 @@ get_copy_texel_buffer_pipeline(
VkColorComponentFlags cmask,
VkComponentMapping *cswizzle,
VkImageType image_type,
+ bool is_layered,
struct v3dv_meta_texel_buffer_copy_pipeline **pipeline)
{
bool ok = true;
uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
- get_texel_buffer_copy_pipeline_cache_key(format, cmask, cswizzle, key);
+ get_texel_buffer_copy_pipeline_cache_key(format, cmask, cswizzle, is_layered,
+ key);
mtx_lock(&device->meta.mtx);
struct hash_entry *entry =
@@ -3257,7 +1897,8 @@ get_copy_texel_buffer_pipeline(
goto fail;
ok =
- create_texel_buffer_copy_pipeline(device, format, cmask, cswizzle,
+ create_texel_buffer_copy_pipeline(device,
+ format, cmask, cswizzle, is_layered,
(*pipeline)->pass,
device->meta.texel_buffer_copy.p_layout,
&(*pipeline)->pipeline);
@@ -3297,7 +1938,7 @@ texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
VkColorComponentFlags cmask,
VkComponentMapping *cswizzle,
uint32_t region_count,
- const VkBufferImageCopy *regions)
+ const VkBufferImageCopy2KHR *regions)
{
VkResult result;
bool handled = false;
@@ -3320,7 +1961,7 @@ texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
return handled;
/* FIXME: we only handle uncompressed images for now. */
- if (vk_format_is_compressed(image->vk_format))
+ if (vk_format_is_compressed(image->vk.format))
return handled;
const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
@@ -3336,7 +1977,8 @@ texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
*/
if (!(buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT)) {
if (v3dv_buffer_format_supports_features(
- src_format, VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT)) {
+ cmd_buffer->device, src_format,
+ VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT)) {
buffer->usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
} else {
return handled;
@@ -3348,11 +1990,29 @@ texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
*/
handled = true;
+
+ /* Compute the number of layers to copy.
+ *
+ * If we are batching (region_count > 1) all our regions have the same
+ * image subresource so we can take this from the first region. For 3D
+ * images we require the same depth extent.
+ */
+ const VkImageSubresourceLayers *resource = &regions[0].imageSubresource;
+ uint32_t num_layers;
+ if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
+ num_layers = resource->layerCount;
+ } else {
+ assert(region_count == 1);
+ num_layers = regions[0].imageExtent.depth;
+ }
+ assert(num_layers > 0);
+
/* Get the texel buffer copy pipeline */
struct v3dv_meta_texel_buffer_copy_pipeline *pipeline = NULL;
bool ok = get_copy_texel_buffer_pipeline(cmd_buffer->device,
dst_format, cmask, cswizzle,
- image->type, &pipeline);
+ image->vk.image_type, num_layers > 1,
+ &pipeline);
if (!ok)
return handled;
assert(pipeline && pipeline->pipeline && pipeline->pass);
@@ -3422,78 +2082,58 @@ texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
0, 1, &set,
0, NULL);
- /* Compute the number of layers to copy.
+ /* Setup framebuffer.
*
- * If we are batching (region_count > 1) all our regions have the same
- * image subresource so we can take this from the first region.
+ * For 3D images, this creates a layered framebuffer with a number of
+ * layers matching the depth extent of the 3D image.
*/
- const VkImageSubresourceLayers *resource = &regions[0].imageSubresource;
- uint32_t num_layers;
- if (image->type != VK_IMAGE_TYPE_3D) {
- num_layers = resource->layerCount;
- } else {
- assert(region_count == 1);
- num_layers = regions[0].imageExtent.depth;
- }
- assert(num_layers > 0);
-
- /* Sanity check: we can only batch multiple regions together if they have
- * the same framebuffer (so the same layer).
- */
- assert(num_layers == 1 || region_count == 1);
-
- /* For each layer */
- for (uint32_t l = 0; l < num_layers; l++) {
- /* Setup framebuffer for this layer.
- *
- * FIXME: once we support geometry shaders, we should be able to have
- * one layered framebuffer and emit just one draw call for
- * all layers using layered rendering. At that point, we should
- * also be able to batch multi-layered regions as well.
- */
- VkImageViewCreateInfo image_view_info = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = v3dv_image_to_handle(image),
- .viewType = v3dv_image_type_to_view_type(image->type),
- .format = dst_format,
- .subresourceRange = {
- .aspectMask = aspect,
- .baseMipLevel = resource->mipLevel,
- .levelCount = 1,
- .baseArrayLayer = resource->baseArrayLayer + l,
- .layerCount = 1
- },
- };
- VkImageView image_view;
- result = v3dv_CreateImageView(_device, &image_view_info,
- &cmd_buffer->device->vk.alloc, &image_view);
- if (result != VK_SUCCESS)
- goto fail;
+ uint32_t fb_width = u_minify(image->vk.extent.width, resource->mipLevel);
+ uint32_t fb_height = u_minify(image->vk.extent.height, resource->mipLevel);
+ VkImageViewCreateInfo image_view_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = v3dv_image_to_handle(image),
+ .viewType = v3dv_image_type_to_view_type(image->vk.image_type),
+ .format = dst_format,
+ .subresourceRange = {
+ .aspectMask = aspect,
+ .baseMipLevel = resource->mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = resource->baseArrayLayer,
+ .layerCount = num_layers,
+ },
+ };
+ VkImageView image_view;
+ result = v3dv_CreateImageView(_device, &image_view_info,
+ &cmd_buffer->device->vk.alloc, &image_view);
+ if (result != VK_SUCCESS)
+ goto fail;
- v3dv_cmd_buffer_add_private_obj(
- cmd_buffer, (uintptr_t)image_view,
- (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
+ v3dv_cmd_buffer_add_private_obj(
+ cmd_buffer, (uintptr_t)image_view,
+ (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
- VkFramebufferCreateInfo fb_info = {
- .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
- .renderPass = pipeline->pass,
- .attachmentCount = 1,
- .pAttachments = &image_view,
- .width = u_minify(image->extent.width, resource->mipLevel),
- .height = u_minify(image->extent.height, resource->mipLevel),
- .layers = 1,
- };
+ VkFramebufferCreateInfo fb_info = {
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .renderPass = pipeline->pass,
+ .attachmentCount = 1,
+ .pAttachments = &image_view,
+ .width = fb_width,
+ .height = fb_height,
+ .layers = num_layers,
+ };
- VkFramebuffer fb;
- result = v3dv_CreateFramebuffer(_device, &fb_info,
- &cmd_buffer->device->vk.alloc, &fb);
- if (result != VK_SUCCESS)
- goto fail;
+ VkFramebuffer fb;
+ result = v3dv_CreateFramebuffer(_device, &fb_info,
+ &cmd_buffer->device->vk.alloc, &fb);
+ if (result != VK_SUCCESS)
+ goto fail;
- v3dv_cmd_buffer_add_private_obj(
- cmd_buffer, (uintptr_t)fb,
- (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
+ v3dv_cmd_buffer_add_private_obj(
+ cmd_buffer, (uintptr_t)fb,
+ (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
+ /* For each layer */
+ for (uint32_t l = 0; l < num_layers; l++) {
/* Start render pass for this layer.
*
* If the we only have one region to copy, then we might be able to
@@ -3513,15 +2153,15 @@ texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
v3dv_render_pass_from_handle(pipeline->pass);
can_skip_tlb_load =
cmask == full_cmask &&
- v3dv_subpass_area_is_tile_aligned(&render_area,
+ v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
v3dv_framebuffer_from_handle(fb),
pipeline_pass, 0);
}
} else {
render_area.offset.x = 0;
render_area.offset.y = 0;
- render_area.extent.width = fb_info.width;
- render_area.extent.height = fb_info.height;
+ render_area.extent.width = fb_width;
+ render_area.extent.height = fb_height;
}
VkRenderPassBeginInfo rp_info = {
@@ -3538,10 +2178,21 @@ texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
if (!job)
goto fail;
+ /* If we are using a layered copy we need to specify the layer for the
+ * Geometry Shader.
+ */
+ if (num_layers > 1) {
+ uint32_t layer = resource->baseArrayLayer + l;
+ v3dv_CmdPushConstants(_cmd_buffer,
+ cmd_buffer->device->meta.texel_buffer_copy.p_layout,
+ VK_SHADER_STAGE_GEOMETRY_BIT,
+ 24, 4, &layer);
+ }
+
/* For each region */
dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
for (uint32_t r = 0; r < region_count; r++) {
- const VkBufferImageCopy *region = &regions[r];
+ const VkBufferImageCopy2KHR *region = &regions[r];
/* Obtain the 2D buffer region spec */
uint32_t buf_width, buf_height;
@@ -3612,7 +2263,7 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
VkColorComponentFlags cmask,
VkComponentMapping *cswizzle,
uint32_t region_count,
- const VkBufferImageCopy *regions)
+ const VkBufferImageCopy2KHR *regions)
{
/* Since we can't sample linear images we need to upload the linear
* buffer to a tiled image that we can use as a blit source, which
@@ -3636,7 +2287,7 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = VK_IMAGE_TYPE_2D,
.format = src_format,
- .extent = { image->extent.width, image->extent.height, 1 },
+ .extent = { image->vk.extent.width, image->vk.extent.height, 1 },
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
@@ -3653,7 +2304,7 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
return handled;
VkMemoryRequirements reqs;
- v3dv_GetImageMemoryRequirements(_device, dummy_image, &reqs);
+ vk_common_GetImageMemoryRequirements(_device, dummy_image, &reqs);
v3dv_DestroyImage(_device, dummy_image, &device->vk.alloc);
VkDeviceMemory mem;
@@ -3676,7 +2327,7 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
* image subresource so we can take this from the first region.
*/
uint32_t num_layers;
- if (image->type != VK_IMAGE_TYPE_3D)
+ if (image->vk.image_type != VK_IMAGE_TYPE_3D)
num_layers = regions[0].imageSubresource.layerCount;
else
num_layers = regions[0].imageExtent.depth;
@@ -3687,14 +2338,14 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
*/
assert(num_layers == 1 || region_count == 1);
- const uint32_t block_width = vk_format_get_blockwidth(image->vk_format);
- const uint32_t block_height = vk_format_get_blockheight(image->vk_format);
+ const uint32_t block_width = vk_format_get_blockwidth(image->vk.format);
+ const uint32_t block_height = vk_format_get_blockheight(image->vk.format);
/* Copy regions by uploading each region to a temporary tiled image using
* the memory we have just allocated as storage.
*/
for (uint32_t r = 0; r < region_count; r++) {
- const VkBufferImageCopy *region = &regions[r];
+ const VkBufferImageCopy2KHR *region = &regions[r];
/* Obtain the 2D buffer region spec */
uint32_t buf_width, buf_height;
@@ -3741,14 +2392,15 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
cmd_buffer, (uintptr_t)buffer_image,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
- result = v3dv_BindImageMemory(_device, buffer_image, mem, 0);
+ result = vk_common_BindImageMemory(_device, buffer_image, mem, 0);
if (result != VK_SUCCESS)
return handled;
/* Upload buffer contents for the selected layer */
const VkDeviceSize buf_offset_bytes =
region->bufferOffset + i * buf_height * buf_width * buffer_bpp;
- const VkBufferImageCopy buffer_image_copy = {
+ const VkBufferImageCopy2KHR buffer_image_copy = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2_KHR,
.bufferOffset = buf_offset_bytes,
.bufferRowLength = region->bufferRowLength / block_width,
.bufferImageHeight = region->bufferImageHeight / block_height,
@@ -3782,7 +2434,8 @@ copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
* image, but that we need to blit to a S8D24 destination (the only
* stencil format we support).
*/
- const VkImageBlit blit_region = {
+ const VkImageBlit2KHR blit_region = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
.srcSubresource = {
.aspectMask = aspect,
.mipLevel = 0,
@@ -3840,7 +2493,7 @@ copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
uint32_t region_count,
- const VkBufferImageCopy *regions,
+ const VkBufferImageCopy2KHR *regions,
bool use_texel_buffer)
{
/* We can only call this with region_count > 1 if we can batch the regions
@@ -3890,9 +2543,9 @@ copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer,
dst_format = src_format;
break;
case VK_IMAGE_ASPECT_DEPTH_BIT:
- assert(image->vk_format == VK_FORMAT_D32_SFLOAT ||
- image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
- image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32);
+ assert(image->vk.format == VK_FORMAT_D32_SFLOAT ||
+ image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
+ image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32);
src_format = VK_FORMAT_R8G8B8A8_UINT;
dst_format = src_format;
aspect = VK_IMAGE_ASPECT_COLOR_BIT;
@@ -3901,8 +2554,8 @@ copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer,
* in the buffer is stored in the 24-LSB, but V3D wants it in the
* 24-MSB.
*/
- if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
- image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) {
+ if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT ||
+ image->vk.format == VK_FORMAT_X8_D24_UNORM_PACK32) {
cmask = VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT |
VK_COLOR_COMPONENT_A_BIT;
@@ -3920,7 +2573,7 @@ copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer,
* blit to an RGBA8UI destination masking out writes to components
* GBA (which map to the D24 component of a S8D24 image).
*/
- assert(image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT);
+ assert(image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT);
buf_bpp = 1;
src_format = VK_FORMAT_R8_UINT;
dst_format = VK_FORMAT_R8G8B8A8_UINT;
@@ -3970,16 +2623,16 @@ static bool
copy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *image,
struct v3dv_buffer *buffer,
- const VkBufferImageCopy *region)
+ const VkBufferImageCopy2KHR *region)
{
/* FIXME */
- if (vk_format_is_depth_or_stencil(image->vk_format))
+ if (vk_format_is_depth_or_stencil(image->vk.format))
return false;
- if (vk_format_is_compressed(image->vk_format))
+ if (vk_format_is_compressed(image->vk.format))
return false;
- if (image->tiling == VK_IMAGE_TILING_LINEAR)
+ if (image->vk.tiling == VK_IMAGE_TILING_LINEAR)
return false;
uint32_t buffer_width, buffer_height;
@@ -3997,7 +2650,7 @@ copy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t buffer_layer_stride = buffer_stride * buffer_height;
uint32_t num_layers;
- if (image->type != VK_IMAGE_TYPE_3D)
+ if (image->vk.image_type != VK_IMAGE_TYPE_3D)
num_layers = region->imageSubresource.layerCount;
else
num_layers = region->imageExtent.depth;
@@ -4028,50 +2681,55 @@ copy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer,
return true;
}
-void
-v3dv_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
- VkBuffer srcBuffer,
- VkImage dstImage,
- VkImageLayout dstImageLayout,
- uint32_t regionCount,
- const VkBufferImageCopy *pRegions)
+VKAPI_ATTR void VKAPI_CALL
+v3dv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,
+ const VkCopyBufferToImageInfo2KHR *info)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
- V3DV_FROM_HANDLE(v3dv_buffer, buffer, srcBuffer);
- V3DV_FROM_HANDLE(v3dv_image, image, dstImage);
+ V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->srcBuffer);
+ V3DV_FROM_HANDLE(v3dv_image, image, info->dstImage);
- assert(image->samples == VK_SAMPLE_COUNT_1_BIT);
+ assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT);
uint32_t r = 0;
- while (r < regionCount) {
+ while (r < info->regionCount) {
/* The TFU and TLB paths can only copy one region at a time and the region
* needs to start at the origin. We try these first for the common case
* where we are copying full images, since they should be the fastest.
*/
uint32_t batch_size = 1;
- if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &pRegions[r]))
+ if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &info->pRegions[r]))
goto handled;
- if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &pRegions[r]))
+ if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &info->pRegions[r]))
goto handled;
/* Otherwise, we are copying subrects, so we fallback to copying
* via shader and texel buffers and we try to batch the regions
- * if possible. We can only batch copies if they target the same
- * image subresource (so they have the same framebuffer spec).
+ * if possible. We can only batch copies if they have the same
+ * framebuffer spec, which is mostly determined by the image
+ * subresource of the region.
*/
- const VkImageSubresourceLayers *rsc = &pRegions[r].imageSubresource;
- if (image->type != VK_IMAGE_TYPE_3D) {
- for (uint32_t s = r + 1; s < regionCount; s++) {
- const VkImageSubresourceLayers *rsc_s = &pRegions[s].imageSubresource;
- if (memcmp(rsc, rsc_s, sizeof(VkImageSubresourceLayers)) != 0)
+ const VkImageSubresourceLayers *rsc = &info->pRegions[r].imageSubresource;
+ for (uint32_t s = r + 1; s < info->regionCount; s++) {
+ const VkImageSubresourceLayers *rsc_s =
+ &info->pRegions[s].imageSubresource;
+
+ if (memcmp(rsc, rsc_s, sizeof(VkImageSubresourceLayers)) != 0)
+ break;
+
+ /* For 3D images we also need to check the depth extent */
+ if (image->vk.image_type == VK_IMAGE_TYPE_3D &&
+ info->pRegions[s].imageExtent.depth !=
+ info->pRegions[r].imageExtent.depth) {
break;
- batch_size++;
}
+
+ batch_size++;
}
if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
- batch_size, &pRegions[r], true)) {
+ batch_size, &info->pRegions[r], true)) {
goto handled;
}
@@ -4081,13 +2739,14 @@ v3dv_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
* slow it might not be worth it and we should instead put more effort
* in handling more cases with the other paths.
*/
- if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer, &pRegions[r])) {
+ if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer,
+ &info->pRegions[r])) {
batch_size = 1;
goto handled;
}
if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
- batch_size, &pRegions[r], false)) {
+ batch_size, &info->pRegions[r], false)) {
goto handled;
}
@@ -4114,17 +2773,17 @@ static bool
blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *dst,
struct v3dv_image *src,
- const VkImageBlit *region)
+ const VkImageBlit2KHR *region)
{
- assert(dst->samples == VK_SAMPLE_COUNT_1_BIT);
- assert(src->samples == VK_SAMPLE_COUNT_1_BIT);
+ assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT);
+ assert(src->vk.samples == VK_SAMPLE_COUNT_1_BIT);
/* Format must match */
- if (src->vk_format != dst->vk_format)
+ if (src->vk.format != dst->vk.format)
return false;
/* Destination can't be raster format */
- if (dst->tiling == VK_IMAGE_TILING_LINEAR)
+ if (dst->vk.tiling == VK_IMAGE_TILING_LINEAR)
return false;
/* Source region must start at (0,0) */
@@ -4136,8 +2795,8 @@ blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
return false;
const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
- const uint32_t dst_width = u_minify(dst->extent.width, dst_mip_level);
- const uint32_t dst_height = u_minify(dst->extent.height, dst_mip_level);
+ const uint32_t dst_width = u_minify(dst->vk.extent.width, dst_mip_level);
+ const uint32_t dst_height = u_minify(dst->vk.extent.height, dst_mip_level);
if (region->dstOffsets[1].x < dst_width - 1||
region->dstOffsets[1].y < dst_height - 1) {
return false;
@@ -4152,7 +2811,7 @@ blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
/* If the format is D24S8 both aspects need to be copied, since the TFU
* can't be programmed to copy only one aspect of the image.
*/
- if (dst->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
+ if (dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
VK_IMAGE_ASPECT_STENCIL_BIT;
if (region->dstSubresource.aspectMask != ds_aspects)
@@ -4165,7 +2824,7 @@ blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
* compatible based on its texel size.
*/
const struct v3dv_format *format =
- v3dv_get_compatible_tfu_format(&cmd_buffer->device->devinfo,
+ v3dv_get_compatible_tfu_format(cmd_buffer->device,
dst->cpp, NULL);
/* Emit a TFU job for each layer to blit */
@@ -4175,7 +2834,7 @@ blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t min_dst_layer;
uint32_t max_dst_layer;
bool dst_mirror_z = false;
- if (dst->type == VK_IMAGE_TYPE_3D) {
+ if (dst->vk.image_type == VK_IMAGE_TYPE_3D) {
compute_blit_3d_layers(region->dstOffsets,
&min_dst_layer, &max_dst_layer,
&dst_mirror_z);
@@ -4187,7 +2846,7 @@ blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t min_src_layer;
uint32_t max_src_layer;
bool src_mirror_z = false;
- if (src->type == VK_IMAGE_TYPE_3D) {
+ if (src->vk.image_type == VK_IMAGE_TYPE_3D) {
compute_blit_3d_layers(region->srcOffsets,
&min_src_layer, &max_src_layer,
&src_mirror_z);
@@ -4212,10 +2871,10 @@ blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
dst_mirror_z ? max_dst_layer - i - 1: min_dst_layer + i;
const uint32_t src_layer =
src_mirror_z ? max_src_layer - i - 1: min_src_layer + i;
- emit_tfu_job(cmd_buffer,
- dst, dst_mip_level, dst_layer,
- src, src_mip_level, src_layer,
- dst_width, dst_height, format);
+ v3dv_X(cmd_buffer->device, meta_emit_tfu_job)
+ (cmd_buffer, dst, dst_mip_level, dst_layer,
+ src, src_mip_level, src_layer,
+ dst_width, dst_height, format);
}
return true;
@@ -4657,6 +3316,7 @@ get_color_blit_fs(struct v3dv_device *device,
if (dst_bit_size >= src_bit_size)
continue;
+ assert(dst_bit_size > 0);
if (util_format_is_pure_uint(dst_pformat)) {
nir_ssa_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);
c[i] = nir_umin(&b, c[i], max);
@@ -4679,6 +3339,7 @@ static bool
create_pipeline(struct v3dv_device *device,
struct v3dv_render_pass *pass,
struct nir_shader *vs_nir,
+ struct nir_shader *gs_nir,
struct nir_shader *fs_nir,
const VkPipelineVertexInputStateCreateInfo *vi_state,
const VkPipelineDepthStencilStateCreateInfo *ds_state,
@@ -4688,12 +3349,15 @@ create_pipeline(struct v3dv_device *device,
VkPipeline *pipeline)
{
struct vk_shader_module vs_m;
+ struct vk_shader_module gs_m;
struct vk_shader_module fs_m;
+ uint32_t num_stages = gs_nir ? 3 : 2;
+
v3dv_shader_module_internal_init(device, &vs_m, vs_nir);
v3dv_shader_module_internal_init(device, &fs_m, fs_nir);
- VkPipelineShaderStageCreateInfo stages[2] = {
+ VkPipelineShaderStageCreateInfo stages[3] = {
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
@@ -4706,12 +3370,23 @@ create_pipeline(struct v3dv_device *device,
.module = vk_shader_module_to_handle(&fs_m),
.pName = "main",
},
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_GEOMETRY_BIT,
+ .module = VK_NULL_HANDLE,
+ .pName = "main",
+ },
};
+ if (gs_nir) {
+ v3dv_shader_module_internal_init(device, &gs_m, gs_nir);
+ stages[2].module = vk_shader_module_to_handle(&gs_m);
+ }
+
VkGraphicsPipelineCreateInfo info = {
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .stageCount = 2,
+ .stageCount = num_stages,
.pStages = stages,
.pVertexInputState = vi_state,
@@ -4863,7 +3538,7 @@ create_blit_pipeline(struct v3dv_device *device,
return create_pipeline(device,
pass,
- vs_nir, fs_nir,
+ vs_nir, NULL, fs_nir,
&vi_state,
&ds_state,
&cb_state,
@@ -5096,7 +3771,7 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
VkFormat src_format,
VkColorComponentFlags cmask,
VkComponentMapping *cswizzle,
- const VkImageBlit *_region,
+ const VkImageBlit2KHR *_region,
VkFilter filter,
bool dst_is_padded_image)
{
@@ -5107,14 +3782,14 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
/* We don't support rendering to linear depth/stencil, this should have
* been rewritten to a compatible color blit by the caller.
*/
- assert(dst->tiling != VK_IMAGE_TILING_LINEAR ||
+ assert(dst->vk.tiling != VK_IMAGE_TILING_LINEAR ||
!vk_format_is_depth_or_stencil(dst_format));
/* Can't sample from linear images */
- if (src->tiling == VK_IMAGE_TILING_LINEAR && src->type != VK_IMAGE_TYPE_1D)
+ if (src->vk.tiling == VK_IMAGE_TILING_LINEAR && src->vk.image_type != VK_IMAGE_TYPE_1D)
return false;
- VkImageBlit region = *_region;
+ VkImageBlit2KHR region = *_region;
/* Rewrite combined D/S blits to compatible color blits */
if (vk_format_is_depth_or_stencil(dst_format)) {
assert(src_format == dst_format);
@@ -5169,23 +3844,23 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
* need to apply those same semantics here when we compute the size of the
* destination image level.
*/
- const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk_format);
- const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk_format);
- const uint32_t src_block_w = vk_format_get_blockwidth(src->vk_format);
- const uint32_t src_block_h = vk_format_get_blockheight(src->vk_format);
+ const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk.format);
+ const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk.format);
+ const uint32_t src_block_w = vk_format_get_blockwidth(src->vk.format);
+ const uint32_t src_block_h = vk_format_get_blockheight(src->vk.format);
const uint32_t dst_level_w =
- u_minify(DIV_ROUND_UP(dst->extent.width * src_block_w, dst_block_w),
+ u_minify(DIV_ROUND_UP(dst->vk.extent.width * src_block_w, dst_block_w),
region.dstSubresource.mipLevel);
const uint32_t dst_level_h =
- u_minify(DIV_ROUND_UP(dst->extent.height * src_block_h, dst_block_h),
+ u_minify(DIV_ROUND_UP(dst->vk.extent.height * src_block_h, dst_block_h),
region.dstSubresource.mipLevel);
const uint32_t src_level_w =
- u_minify(src->extent.width, region.srcSubresource.mipLevel);
+ u_minify(src->vk.extent.width, region.srcSubresource.mipLevel);
const uint32_t src_level_h =
- u_minify(src->extent.height, region.srcSubresource.mipLevel);
+ u_minify(src->vk.extent.height, region.srcSubresource.mipLevel);
const uint32_t src_level_d =
- u_minify(src->extent.depth, region.srcSubresource.mipLevel);
+ u_minify(src->vk.extent.depth, region.srcSubresource.mipLevel);
uint32_t dst_x, dst_y, dst_w, dst_h;
bool dst_mirror_x, dst_mirror_y;
@@ -5204,7 +3879,7 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t min_dst_layer;
uint32_t max_dst_layer;
bool dst_mirror_z = false;
- if (dst->type != VK_IMAGE_TYPE_3D) {
+ if (dst->vk.image_type != VK_IMAGE_TYPE_3D) {
min_dst_layer = region.dstSubresource.baseArrayLayer;
max_dst_layer = min_dst_layer + region.dstSubresource.layerCount;
} else {
@@ -5216,7 +3891,7 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t min_src_layer;
uint32_t max_src_layer;
bool src_mirror_z = false;
- if (src->type != VK_IMAGE_TYPE_3D) {
+ if (src->vk.image_type != VK_IMAGE_TYPE_3D) {
min_src_layer = region.srcSubresource.baseArrayLayer;
max_src_layer = min_src_layer + region.srcSubresource.layerCount;
} else {
@@ -5238,7 +3913,7 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
(float)(src_y + src_h),
};
- if (src->samples == VK_SAMPLE_COUNT_1_BIT) {
+ if (src->vk.samples == VK_SAMPLE_COUNT_1_BIT) {
coords[0] /= (float)src_level_w;
coords[1] /= (float)src_level_h;
coords[2] /= (float)src_level_w;
@@ -5270,8 +3945,8 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
/* Get the blit pipeline */
struct v3dv_meta_blit_pipeline *pipeline = NULL;
bool ok = get_blit_pipeline(cmd_buffer->device,
- dst_format, src_format, cmask, src->type,
- dst->samples, src->samples,
+ dst_format, src_format, cmask, src->vk.image_type,
+ dst->vk.samples, src->vk.samples,
&pipeline);
if (!ok)
return handled;
@@ -5341,7 +4016,7 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
VkImageViewCreateInfo dst_image_view_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = v3dv_image_to_handle(dst),
- .viewType = v3dv_image_type_to_view_type(dst->type),
+ .viewType = v3dv_image_type_to_view_type(dst->vk.image_type),
.format = dst_format,
.subresourceRange = {
.aspectMask = aspects,
@@ -5399,7 +4074,7 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
VkImageViewCreateInfo src_image_view_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = v3dv_image_to_handle(src),
- .viewType = v3dv_image_type_to_view_type(src->type),
+ .viewType = v3dv_image_type_to_view_type(src->vk.image_type),
.format = src_format,
.components = *cswizzle,
.subresourceRange = {
@@ -5407,7 +4082,7 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
.baseMipLevel = region.srcSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer =
- src->type == VK_IMAGE_TYPE_3D ? 0 : min_src_layer + i,
+ src->vk.image_type == VK_IMAGE_TYPE_3D ? 0 : min_src_layer + i,
.layerCount = 1
},
};
@@ -5457,8 +4132,8 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
v3dv_render_pass_from_handle(pipeline->pass);
can_skip_tlb_load =
cmask == full_cmask &&
- v3dv_subpass_area_is_tile_aligned(&render_area, framebuffer,
- pipeline_pass, 0);
+ v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
+ framebuffer, pipeline_pass, 0);
}
/* Record blit */
@@ -5481,7 +4156,7 @@ blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
* based on the ratio of the depth of the source and the destination
* images, picking the coordinate in the middle of each step.
*/
- if (src->type == VK_IMAGE_TYPE_3D) {
+ if (src->vk.image_type == VK_IMAGE_TYPE_3D) {
tex_coords[4] =
!mirror_z ?
(min_src_layer + (i + 0.5f) * src_z_step) / (float)src_level_d :
@@ -5505,150 +4180,58 @@ fail:
return handled;
}
-void
-v3dv_CmdBlitImage(VkCommandBuffer commandBuffer,
- VkImage srcImage,
- VkImageLayout srcImageLayout,
- VkImage dstImage,
- VkImageLayout dstImageLayout,
- uint32_t regionCount,
- const VkImageBlit* pRegions,
- VkFilter filter)
+VKAPI_ATTR void VKAPI_CALL
+v3dv_CmdBlitImage2KHR(VkCommandBuffer commandBuffer,
+ const VkBlitImageInfo2KHR *pBlitImageInfo)
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
- V3DV_FROM_HANDLE(v3dv_image, src, srcImage);
- V3DV_FROM_HANDLE(v3dv_image, dst, dstImage);
+ V3DV_FROM_HANDLE(v3dv_image, src, pBlitImageInfo->srcImage);
+ V3DV_FROM_HANDLE(v3dv_image, dst, pBlitImageInfo->dstImage);
/* This command can only happen outside a render pass */
assert(cmd_buffer->state.pass == NULL);
assert(cmd_buffer->state.job == NULL);
/* From the Vulkan 1.0 spec, vkCmdBlitImage valid usage */
- assert(dst->samples == VK_SAMPLE_COUNT_1_BIT &&
- src->samples == VK_SAMPLE_COUNT_1_BIT);
+ assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT &&
+ src->vk.samples == VK_SAMPLE_COUNT_1_BIT);
/* We don't export VK_FORMAT_FEATURE_BLIT_DST_BIT on compressed formats */
- assert(!vk_format_is_compressed(dst->vk_format));
+ assert(!vk_format_is_compressed(dst->vk.format));
- for (uint32_t i = 0; i < regionCount; i++) {
- if (blit_tfu(cmd_buffer, dst, src, &pRegions[i]))
+ for (uint32_t i = 0; i < pBlitImageInfo->regionCount; i++) {
+ if (blit_tfu(cmd_buffer, dst, src, &pBlitImageInfo->pRegions[i]))
continue;
if (blit_shader(cmd_buffer,
- dst, dst->vk_format,
- src, src->vk_format,
+ dst, dst->vk.format,
+ src, src->vk.format,
0, NULL,
- &pRegions[i], filter, true)) {
+ &pBlitImageInfo->pRegions[i],
+ pBlitImageInfo->filter, true)) {
continue;
}
unreachable("Unsupported blit operation");
}
}
-static void
-emit_resolve_image_layer_per_tile_list(struct v3dv_job *job,
- struct framebuffer_data *framebuffer,
- struct v3dv_image *dst,
- struct v3dv_image *src,
- uint32_t layer_offset,
- const VkImageResolve *region)
-{
- struct v3dv_cl *cl = &job->indirect;
- v3dv_cl_ensure_space(cl, 200, 1);
- v3dv_return_if_oom(NULL, job);
-
- struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
-
- cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
-
- assert((src->type != VK_IMAGE_TYPE_3D &&
- layer_offset < region->srcSubresource.layerCount) ||
- layer_offset < src->extent.depth);
-
- const uint32_t src_layer = src->type != VK_IMAGE_TYPE_3D ?
- region->srcSubresource.baseArrayLayer + layer_offset :
- region->srcOffset.z + layer_offset;
-
- emit_image_load(cl, framebuffer, src,
- region->srcSubresource.aspectMask,
- src_layer,
- region->srcSubresource.mipLevel,
- false, false);
-
- cl_emit(cl, END_OF_LOADS, end);
-
- cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
-
- assert((dst->type != VK_IMAGE_TYPE_3D &&
- layer_offset < region->dstSubresource.layerCount) ||
- layer_offset < dst->extent.depth);
-
- const uint32_t dst_layer = dst->type != VK_IMAGE_TYPE_3D ?
- region->dstSubresource.baseArrayLayer + layer_offset :
- region->dstOffset.z + layer_offset;
-
- emit_image_store(cl, framebuffer, dst,
- region->dstSubresource.aspectMask,
- dst_layer,
- region->dstSubresource.mipLevel,
- false, false, true);
-
- cl_emit(cl, END_OF_TILE_MARKER, end);
-
- cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
-
- cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
- branch.start = tile_list_start;
- branch.end = v3dv_cl_get_address(cl);
- }
-}
-
-static void
-emit_resolve_image_layer(struct v3dv_job *job,
- struct v3dv_image *dst,
- struct v3dv_image *src,
- struct framebuffer_data *framebuffer,
- uint32_t layer,
- const VkImageResolve *region)
-{
- emit_frame_setup(job, layer, NULL);
- emit_resolve_image_layer_per_tile_list(job, framebuffer,
- dst, src, layer, region);
- emit_supertile_coordinates(job, framebuffer);
-}
-
-static void
-emit_resolve_image_rcl(struct v3dv_job *job,
- struct v3dv_image *dst,
- struct v3dv_image *src,
- struct framebuffer_data *framebuffer,
- const VkImageResolve *region)
-{
- struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
- v3dv_return_if_oom(NULL, job);
-
- for (int layer = 0; layer < job->frame_tiling.layers; layer++)
- emit_resolve_image_layer(job, dst, src, framebuffer, layer, region);
- cl_emit(rcl, END_OF_RENDERING, end);
-}
-
static bool
resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *dst,
struct v3dv_image *src,
- const VkImageResolve *region)
+ const VkImageResolve2KHR *region)
{
- if (!can_use_tlb(src, &region->srcOffset, NULL) ||
- !can_use_tlb(dst, &region->dstOffset, NULL)) {
+ if (!v3dv_meta_can_use_tlb(src, &region->srcOffset, NULL) ||
+ !v3dv_meta_can_use_tlb(dst, &region->dstOffset, NULL)) {
return false;
}
- if (!v3dv_format_supports_tlb_resolve(src->format))
+ if (!v3dv_X(cmd_buffer->device, format_supports_tlb_resolve)(src->format))
return false;
- const VkFormat fb_format = src->vk_format;
+ const VkFormat fb_format = src->vk.format;
uint32_t num_layers;
- if (dst->type != VK_IMAGE_TYPE_3D)
+ if (dst->vk.image_type != VK_IMAGE_TYPE_3D)
num_layers = region->dstSubresource.layerCount;
else
num_layers = region->extent.depth;
@@ -5659,24 +4242,26 @@ resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
if (!job)
return true;
- const uint32_t block_w = vk_format_get_blockwidth(dst->vk_format);
- const uint32_t block_h = vk_format_get_blockheight(dst->vk_format);
+ const uint32_t block_w = vk_format_get_blockwidth(dst->vk.format);
+ const uint32_t block_h = vk_format_get_blockheight(dst->vk.format);
const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
uint32_t internal_type, internal_bpp;
- get_internal_type_bpp_for_image_aspects(fb_format,
- region->srcSubresource.aspectMask,
- &internal_type, &internal_bpp);
+ v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
+ (fb_format, region->srcSubresource.aspectMask,
+ &internal_type, &internal_bpp);
- v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, true);
+ v3dv_job_start_frame(job, width, height, num_layers, false,
+ 1, internal_bpp, true);
- struct framebuffer_data framebuffer;
- setup_framebuffer_data(&framebuffer, fb_format, internal_type,
- &job->frame_tiling);
+ struct v3dv_meta_framebuffer framebuffer;
+ v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
+ internal_type, &job->frame_tiling);
- v3dv_job_emit_binning_flush(job);
- emit_resolve_image_rcl(job, dst, src, &framebuffer, region);
+ v3dv_X(job->device, job_emit_binning_flush)(job);
+ v3dv_X(job->device, meta_emit_resolve_image_rcl)(job, dst, src,
+ &framebuffer, region);
v3dv_cmd_buffer_finish_job(cmd_buffer);
return true;
@@ -5686,9 +4271,10 @@ static bool
resolve_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_image *dst,
struct v3dv_image *src,
- const VkImageResolve *region)
+ const VkImageResolve2KHR *region)
{
- const VkImageBlit blit_region = {
+ const VkImageBlit2KHR blit_region = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
.srcSubresource = region->srcSubresource,
.srcOffsets = {
region->srcOffset,
@@ -5707,36 +4293,32 @@ resolve_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
},
};
return blit_shader(cmd_buffer,
- dst, dst->vk_format,
- src, src->vk_format,
+ dst, dst->vk.format,
+ src, src->vk.format,
0, NULL,
&blit_region, VK_FILTER_NEAREST, true);
}
-void
-v3dv_CmdResolveImage(VkCommandBuffer commandBuffer,
- VkImage srcImage,
- VkImageLayout srcImageLayout,
- VkImage dstImage,
- VkImageLayout dstImageLayout,
- uint32_t regionCount,
- const VkImageResolve *pRegions)
+VKAPI_ATTR void VKAPI_CALL
+v3dv_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,
+ const VkResolveImageInfo2KHR *info)
+
{
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
- V3DV_FROM_HANDLE(v3dv_image, src, srcImage);
- V3DV_FROM_HANDLE(v3dv_image, dst, dstImage);
+ V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
+ V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
/* This command can only happen outside a render pass */
assert(cmd_buffer->state.pass == NULL);
assert(cmd_buffer->state.job == NULL);
- assert(src->samples == VK_SAMPLE_COUNT_4_BIT);
- assert(dst->samples == VK_SAMPLE_COUNT_1_BIT);
+ assert(src->vk.samples == VK_SAMPLE_COUNT_4_BIT);
+ assert(dst->vk.samples == VK_SAMPLE_COUNT_1_BIT);
- for (uint32_t i = 0; i < regionCount; i++) {
- if (resolve_image_tlb(cmd_buffer, dst, src, &pRegions[i]))
+ for (uint32_t i = 0; i < info->regionCount; i++) {
+ if (resolve_image_tlb(cmd_buffer, dst, src, &info->pRegions[i]))
continue;
- if (resolve_image_blit(cmd_buffer, dst, src, &pRegions[i]))
+ if (resolve_image_blit(cmd_buffer, dst, src, &info->pRegions[i]))
continue;
unreachable("Unsupported multismaple resolve operation");
}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_pass.c b/lib/mesa/src/broadcom/vulkan/v3dv_pass.c
index 0f03dfe67..1b03c0d79 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_pass.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_pass.c
@@ -22,7 +22,6 @@
*/
#include "v3dv_private.h"
-#include "vk_format_info.h"
static uint32_t
num_subpass_attachments(const VkSubpassDescription *desc)
@@ -34,18 +33,26 @@ num_subpass_attachments(const VkSubpassDescription *desc)
}
static void
-set_use_tlb_resolve(struct v3dv_render_pass_attachment *att)
+set_use_tlb_resolve(struct v3dv_device *device,
+ struct v3dv_render_pass_attachment *att)
{
- const struct v3dv_format *format = v3dv_get_format(att->desc.format);
- att->use_tlb_resolve = v3dv_format_supports_tlb_resolve(format);
+ const struct v3dv_format *format = v3dv_X(device, get_format)(att->desc.format);
+ att->use_tlb_resolve = v3dv_X(device, format_supports_tlb_resolve)(format);
}
static void
-pass_find_subpass_range_for_attachments(struct v3dv_render_pass *pass)
+pass_find_subpass_range_for_attachments(struct v3dv_device *device,
+ struct v3dv_render_pass *pass)
{
for (uint32_t i = 0; i < pass->attachment_count; i++) {
pass->attachments[i].first_subpass = pass->subpass_count - 1;
pass->attachments[i].last_subpass = 0;
+ if (pass->multiview_enabled) {
+ for (uint32_t j = 0; j < MAX_MULTIVIEW_VIEW_COUNT; j++) {
+ pass->attachments[i].views[j].first_subpass = pass->subpass_count - 1;
+ pass->attachments[i].views[j].last_subpass = 0;
+ }
+ }
}
for (uint32_t i = 0; i < pass->subpass_count; i++) {
@@ -56,14 +63,26 @@ pass_find_subpass_range_for_attachments(struct v3dv_render_pass *pass)
if (attachment_idx == VK_ATTACHMENT_UNUSED)
continue;
- if (i < pass->attachments[attachment_idx].first_subpass)
- pass->attachments[attachment_idx].first_subpass = i;
- if (i > pass->attachments[attachment_idx].last_subpass)
- pass->attachments[attachment_idx].last_subpass = i;
+ struct v3dv_render_pass_attachment *att =
+ &pass->attachments[attachment_idx];
+
+ if (i < att->first_subpass)
+ att->first_subpass = i;
+ if (i > att->last_subpass)
+ att->last_subpass = i;
+
+ uint32_t view_mask = subpass->view_mask;
+ while (view_mask) {
+ uint32_t view_index = u_bit_scan(&view_mask);
+ if (i < att->views[view_index].first_subpass)
+ att->views[view_index].first_subpass = i;
+ if (i > att->views[view_index].last_subpass)
+ att->views[view_index].last_subpass = i;
+ }
if (subpass->resolve_attachments &&
subpass->resolve_attachments[j].attachment != VK_ATTACHMENT_UNUSED) {
- set_use_tlb_resolve(&pass->attachments[attachment_idx]);
+ set_use_tlb_resolve(device, att);
}
}
@@ -100,7 +119,7 @@ pass_find_subpass_range_for_attachments(struct v3dv_render_pass *pass)
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateRenderPass(VkDevice _device,
const VkRenderPassCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
@@ -111,6 +130,10 @@ v3dv_CreateRenderPass(VkDevice _device,
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
+ const VkRenderPassMultiviewCreateInfo *multiview_info =
+ vk_find_struct_const(pCreateInfo->pNext, RENDER_PASS_MULTIVIEW_CREATE_INFO);
+ bool multiview_enabled = multiview_info && multiview_info->subpassCount > 0;
+
size_t size = sizeof(*pass);
size_t subpasses_offset = size;
size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
@@ -120,8 +143,9 @@ v3dv_CreateRenderPass(VkDevice _device,
pass = vk_object_zalloc(&device->vk, pAllocator, size,
VK_OBJECT_TYPE_RENDER_PASS);
if (pass == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ pass->multiview_enabled = multiview_enabled;
pass->attachment_count = pCreateInfo->attachmentCount;
pass->attachments = (void *) pass + attachments_offset;
pass->subpass_count = pCreateInfo->subpassCount;
@@ -144,7 +168,7 @@ v3dv_CreateRenderPass(VkDevice _device,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pass->subpass_attachments == NULL) {
vk_object_free(&device->vk, pAllocator, pass);
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
} else {
pass->subpass_attachments = NULL;
@@ -157,6 +181,8 @@ v3dv_CreateRenderPass(VkDevice _device,
subpass->input_count = desc->inputAttachmentCount;
subpass->color_count = desc->colorAttachmentCount;
+ if (multiview_enabled)
+ subpass->view_mask = multiview_info->pViewMasks[i];
if (desc->inputAttachmentCount > 0) {
subpass->input_attachments = p;
@@ -175,16 +201,10 @@ v3dv_CreateRenderPass(VkDevice _device,
p += desc->colorAttachmentCount;
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
- const uint32_t attachment_idx =
- desc->pColorAttachments[j].attachment;
subpass->color_attachments[j] = (struct v3dv_subpass_attachment) {
- .attachment = attachment_idx,
+ .attachment = desc->pColorAttachments[j].attachment,
.layout = desc->pColorAttachments[j].layout,
};
- if (attachment_idx != VK_ATTACHMENT_UNUSED) {
- VkFormat format = pass->attachments[attachment_idx].desc.format;
- subpass->has_srgb_rt |= vk_format_is_srgb(format);
- }
}
}
@@ -230,7 +250,7 @@ v3dv_CreateRenderPass(VkDevice _device,
}
}
- pass_find_subpass_range_for_attachments(pass);
+ pass_find_subpass_range_for_attachments(device, pass);
/* FIXME: handle subpass dependencies */
@@ -239,7 +259,7 @@ v3dv_CreateRenderPass(VkDevice _device,
return VK_SUCCESS;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyRenderPass(VkDevice _device,
VkRenderPass _pass,
const VkAllocationCallbacks *pAllocator)
@@ -255,7 +275,8 @@ v3dv_DestroyRenderPass(VkDevice _device,
}
static void
-subpass_get_granularity(struct v3dv_render_pass *pass,
+subpass_get_granularity(struct v3dv_device *device,
+ struct v3dv_render_pass *pass,
uint32_t subpass_idx,
VkExtent2D *granularity)
{
@@ -283,11 +304,11 @@ subpass_get_granularity(struct v3dv_render_pass *pass,
continue;
const VkAttachmentDescription *desc =
&pass->attachments[attachment_idx].desc;
- const struct v3dv_format *format = v3dv_get_format(desc->format);
+ const struct v3dv_format *format = v3dv_X(device, get_format)(desc->format);
uint32_t internal_type, internal_bpp;
- v3dv_get_internal_type_bpp_for_output_format(format->rt_type,
- &internal_type,
- &internal_bpp);
+ v3dv_X(device, get_internal_type_bpp_for_output_format)
+ (format->rt_type, &internal_type, &internal_bpp);
+
max_internal_bpp = MAX2(max_internal_bpp, internal_bpp);
}
@@ -306,12 +327,13 @@ subpass_get_granularity(struct v3dv_render_pass *pass,
};
}
-void
-v3dv_GetRenderAreaGranularity(VkDevice device,
+VKAPI_ATTR void VKAPI_CALL
+v3dv_GetRenderAreaGranularity(VkDevice _device,
VkRenderPass renderPass,
VkExtent2D *pGranularity)
{
V3DV_FROM_HANDLE(v3dv_render_pass, pass, renderPass);
+ V3DV_FROM_HANDLE(v3dv_device, device, _device);
*pGranularity = (VkExtent2D) {
.width = 64,
@@ -320,7 +342,7 @@ v3dv_GetRenderAreaGranularity(VkDevice device,
for (uint32_t i = 0; i < pass->subpass_count; i++) {
VkExtent2D sg;
- subpass_get_granularity(pass, i, &sg);
+ subpass_get_granularity(device, pass, i, &sg);
pGranularity->width = MIN2(pGranularity->width, sg.width);
pGranularity->height = MIN2(pGranularity->height, sg.height);
}
@@ -348,7 +370,8 @@ v3dv_GetRenderAreaGranularity(VkDevice device,
* In that case, we can't flag the area as being aligned.
*/
bool
-v3dv_subpass_area_is_tile_aligned(const VkRect2D *area,
+v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
+ const VkRect2D *area,
struct v3dv_framebuffer *fb,
struct v3dv_render_pass *pass,
uint32_t subpass_idx)
@@ -356,7 +379,7 @@ v3dv_subpass_area_is_tile_aligned(const VkRect2D *area,
assert(subpass_idx < pass->subpass_count);
VkExtent2D granularity;
- subpass_get_granularity(pass, subpass_idx, &granularity);
+ subpass_get_granularity(device, pass, subpass_idx, &granularity);
return area->offset.x % granularity.width == 0 &&
area->offset.y % granularity.height == 0 &&
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_pipeline.c b/lib/mesa/src/broadcom/vulkan/v3dv_pipeline.c
index 35cf35592..daa6c7550 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_pipeline.c
@@ -34,10 +34,13 @@
#include "nir/nir_serialize.h"
#include "util/u_atomic.h"
+#include "util/u_prim.h"
+#include "util/os_time.h"
#include "vulkan/util/vk_format.h"
-#include "broadcom/cle/v3dx_pack.h"
+static VkResult
+compute_vpm_config(struct v3dv_pipeline *pipeline);
void
v3dv_print_v3d_key(struct v3d_key *key,
@@ -120,11 +123,15 @@ pipeline_free_stages(struct v3dv_device *device,
*/
destroy_pipeline_stage(device, pipeline->vs, pAllocator);
destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator);
+ destroy_pipeline_stage(device, pipeline->gs, pAllocator);
+ destroy_pipeline_stage(device, pipeline->gs_bin, pAllocator);
destroy_pipeline_stage(device, pipeline->fs, pAllocator);
destroy_pipeline_stage(device, pipeline->cs, pAllocator);
pipeline->vs = NULL;
pipeline->vs_bin = NULL;
+ pipeline->gs = NULL;
+ pipeline->gs_bin = NULL;
pipeline->fs = NULL;
pipeline->cs = NULL;
}
@@ -157,7 +164,7 @@ v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,
vk_object_free(&device->vk, pAllocator, pipeline);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyPipeline(VkDevice _device,
VkPipeline _pipeline,
const VkAllocationCallbacks *pAllocator)
@@ -172,20 +179,27 @@ v3dv_DestroyPipeline(VkDevice _device,
}
static const struct spirv_to_nir_options default_spirv_options = {
- .caps = { false },
+ .caps = {
+ .device_group = true,
+ .multiview = true,
+ .subgroup_basic = true,
+ .variable_pointers = true,
+ },
.ubo_addr_format = nir_address_format_32bit_index_offset,
.ssbo_addr_format = nir_address_format_32bit_index_offset,
.phys_ssbo_addr_format = nir_address_format_64bit_global,
.push_const_addr_format = nir_address_format_logical,
.shared_addr_format = nir_address_format_32bit_offset,
- .frag_coord_is_sysval = false,
};
const nir_shader_compiler_options v3dv_nir_options = {
- .lower_add_sat = true,
+ .lower_uadd_sat = true,
+ .lower_iadd_sat = true,
.lower_all_io_to_temps = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_extract_to_shifts = true,
.lower_bitfield_reverse = true,
@@ -228,11 +242,16 @@ const nir_shader_compiler_options v3dv_nir_options = {
.lower_wpos_pntc = true,
.lower_rotate = true,
.lower_to_scalar = true,
+ .lower_device_index_to_zero = true,
.has_fsub = true,
.has_isub = true,
.vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
* needs to be supported */
.lower_interpolate_at = true,
+ .max_unroll_iterations = 16,
+ .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
+ .divergence_analysis_options =
+ nir_divergence_multiple_workgroup_per_compute_subgroup
};
const nir_shader_compiler_options *
@@ -250,9 +269,7 @@ v3dv_pipeline_get_nir_options(void)
})
static void
-nir_optimize(nir_shader *nir,
- struct v3dv_pipeline_stage *stage,
- bool allow_copies)
+nir_optimize(nir_shader *nir, bool allow_copies)
{
bool progress;
@@ -276,7 +293,7 @@ nir_optimize(nir_shader *nir,
OPT(nir_lower_alu_to_scalar, NULL, NULL);
OPT(nir_copy_prop);
- OPT(nir_lower_phis_to_scalar);
+ OPT(nir_lower_phis_to_scalar, false);
OPT(nir_copy_prop);
OPT(nir_opt_dce);
@@ -313,9 +330,29 @@ nir_optimize(nir_shader *nir,
}
static void
-preprocess_nir(nir_shader *nir,
- struct v3dv_pipeline_stage *stage)
+preprocess_nir(nir_shader *nir)
{
+ /* We have to lower away local variable initializers right before we
+ * inline functions. That way they get properly initialized at the top
+ * of the function and not at the top of its caller.
+ */
+ NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
+ NIR_PASS_V(nir, nir_lower_returns);
+ NIR_PASS_V(nir, nir_inline_functions);
+ NIR_PASS_V(nir, nir_opt_deref);
+
+ /* Pick off the single entrypoint that we want */
+ foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
+ if (func->is_entrypoint)
+ func->name = ralloc_strdup(func, "main");
+ else
+ exec_node_remove(&func->node);
+ }
+ assert(exec_list_length(&nir->functions) == 1);
+
+ /* Vulkan uses the separate-shader linking model */
+ nir->info.separate_shader = true;
+
/* Make sure we lower variable initializers on output variables so that
* nir_remove_dead_variables below sees the corresponding stores
*/
@@ -353,7 +390,7 @@ preprocess_nir(nir_shader *nir,
nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
NULL);
- NIR_PASS_V(nir, nir_propagate_invariant);
+ NIR_PASS_V(nir, nir_propagate_invariant, false);
NIR_PASS_V(nir, nir_lower_io_to_temporaries,
nir_shader_get_entrypoint(nir), true, false);
@@ -369,15 +406,14 @@ preprocess_nir(nir_shader *nir,
NIR_PASS_V(nir, nir_split_var_copies);
NIR_PASS_V(nir, nir_split_struct_vars, nir_var_function_temp);
- nir_optimize(nir, stage, true);
+ nir_optimize(nir, true);
NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
/* Lower a bunch of stuff */
NIR_PASS_V(nir, nir_lower_var_copies);
- NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in |
- nir_var_shader_out, UINT32_MAX);
+ NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);
NIR_PASS_V(nir, nir_lower_indirect_derefs,
nir_var_function_temp, 2);
@@ -389,49 +425,7 @@ preprocess_nir(nir_shader *nir,
NIR_PASS_V(nir, nir_lower_frexp);
/* Get rid of split copies */
- nir_optimize(nir, stage, false);
-}
-
-/* FIXME: This is basically the same code at anv, tu and radv. Move to common
- * place?
- */
-static struct nir_spirv_specialization*
-vk_spec_info_to_nir_spirv(const VkSpecializationInfo *spec_info,
- uint32_t *out_num_spec_entries)
-{
- if (spec_info == NULL || spec_info->mapEntryCount == 0)
- return NULL;
-
- uint32_t num_spec_entries = spec_info->mapEntryCount;
- struct nir_spirv_specialization *spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));
-
- for (uint32_t i = 0; i < num_spec_entries; i++) {
- VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
- const void *data = spec_info->pData + entry.offset;
- assert(data + entry.size <= spec_info->pData + spec_info->dataSize);
-
- spec_entries[i].id = spec_info->pMapEntries[i].constantID;
- switch (entry.size) {
- case 8:
- spec_entries[i].value.u64 = *(const uint64_t *)data;
- break;
- case 4:
- spec_entries[i].value.u32 = *(const uint32_t *)data;
- break;
- case 2:
- spec_entries[i].value.u16 = *(const uint16_t *)data;
- break;
- case 1:
- spec_entries[i].value.u8 = *(const uint8_t *)data;
- break;
- default:
- assert(!"Invalid spec constant size");
- break;
- }
- }
-
- *out_num_spec_entries = num_spec_entries;
- return spec_entries;
+ nir_optimize(nir, false);
}
static nir_shader *
@@ -445,7 +439,7 @@ shader_module_compile_to_nir(struct v3dv_device *device,
uint32_t *spirv = (uint32_t *) stage->module->data;
assert(stage->module->size % 4 == 0);
- if (V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV)
+ if (unlikely(V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV))
v3dv_print_spirv(stage->module->data, stage->module->size, stderr);
uint32_t num_spec_entries = 0;
@@ -472,37 +466,23 @@ shader_module_compile_to_nir(struct v3dv_device *device,
}
assert(nir->info.stage == broadcom_shader_stage_to_gl(stage->stage));
- if (V3D_DEBUG & (V3D_DEBUG_NIR |
- v3d_debug_flag_for_shader_stage(stage->stage))) {
+ const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
+ .frag_coord = true,
+ .point_coord = true,
+ };
+ NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
+
+ if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
+ v3d_debug_flag_for_shader_stage(
+ broadcom_shader_stage_to_gl(stage->stage))))) {
fprintf(stderr, "Initial form: %s prog %d NIR:\n",
- gl_shader_stage_name(stage->stage),
+ broadcom_shader_stage_name(stage->stage),
stage->program_id);
nir_print_shader(nir, stderr);
fprintf(stderr, "\n");
}
- /* We have to lower away local variable initializers right before we
- * inline functions. That way they get properly initialized at the top
- * of the function and not at the top of its caller.
- */
- NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
- NIR_PASS_V(nir, nir_lower_returns);
- NIR_PASS_V(nir, nir_inline_functions);
- NIR_PASS_V(nir, nir_opt_deref);
-
- /* Pick off the single entrypoint that we want */
- foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
- if (func->is_entrypoint)
- func->name = ralloc_strdup(func, "main");
- else
- exec_node_remove(&func->node);
- }
- assert(exec_list_length(&nir->functions) == 1);
-
- /* Vulkan uses the separate-shader linking model */
- nir->info.separate_shader = true;
-
- preprocess_nir(nir, stage);
+ preprocess_nir(nir);
return nir;
}
@@ -567,11 +547,46 @@ lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
instr->intrinsic = nir_intrinsic_load_uniform;
}
+static struct v3dv_descriptor_map*
+pipeline_get_descriptor_map(struct v3dv_pipeline *pipeline,
+ VkDescriptorType desc_type,
+ gl_shader_stage gl_stage,
+ bool is_sampler)
+{
+ enum broadcom_shader_stage broadcom_stage =
+ gl_shader_stage_to_broadcom(gl_stage);
+
+ assert(pipeline->shared_data &&
+ pipeline->shared_data->maps[broadcom_stage]);
+
+ switch(desc_type) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ return &pipeline->shared_data->maps[broadcom_stage]->sampler_map;
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ return &pipeline->shared_data->maps[broadcom_stage]->texture_map;
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ return is_sampler ?
+ &pipeline->shared_data->maps[broadcom_stage]->sampler_map :
+ &pipeline->shared_data->maps[broadcom_stage]->texture_map;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ return &pipeline->shared_data->maps[broadcom_stage]->ubo_map;
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map;
+ default:
+ unreachable("Descriptor type unknown or not having a descriptor map");
+ }
+}
+
/* Gathers info from the intrinsic (set and binding) and then lowers it so it
* could be used by the v3d_compiler */
static void
lower_vulkan_resource_index(nir_builder *b,
nir_intrinsic_instr *instr,
+ nir_shader *shader,
struct v3dv_pipeline *pipeline,
const struct v3dv_pipeline_layout *layout)
{
@@ -585,13 +600,13 @@ lower_vulkan_resource_index(nir_builder *b,
struct v3dv_descriptor_set_binding_layout *binding_layout =
&set_layout->binding[binding];
unsigned index = 0;
+ const VkDescriptorType desc_type = nir_intrinsic_desc_type(instr);
- switch (nir_intrinsic_desc_type(instr)) {
+ switch (desc_type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
struct v3dv_descriptor_map *descriptor_map =
- nir_intrinsic_desc_type(instr) == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ?
- &pipeline->shared_data->ubo_map : &pipeline->shared_data->ssbo_map;
+ pipeline_get_descriptor_map(pipeline, desc_type, shader->info.stage, false);
if (!const_val)
unreachable("non-constant vulkan_resource_index array index");
@@ -601,7 +616,7 @@ lower_vulkan_resource_index(nir_builder *b,
binding_layout->array_size,
32 /* return_size: doesn't really apply for this case */);
- if (nir_intrinsic_desc_type(instr) == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
+ if (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
/* skip index 0 which is used for push constants */
index++;
}
@@ -614,13 +629,11 @@ lower_vulkan_resource_index(nir_builder *b,
}
/* Since we use the deref pass, both vulkan_resource_index and
- * vulkan_load_descriptor returns a vec2. But for the index the backend
- * expect just one scalar (like with get_ssbo_size), so lets return here
- * just it. Then on load_descriptor we would recreate the vec2, keeping the
- * second component (unused right now) to zero.
+ * vulkan_load_descriptor return a vec2 providing an index and
+ * offset. Our backend compiler only cares about the index part.
*/
nir_ssa_def_rewrite_uses(&instr->dest.ssa,
- nir_imm_int(b, index));
+ nir_imm_ivec2(b, index, 0));
nir_instr_remove(&instr->instr);
}
@@ -629,6 +642,7 @@ lower_vulkan_resource_index(nir_builder *b,
*/
static uint8_t
lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx,
+ nir_shader *shader,
struct v3dv_pipeline *pipeline,
const struct v3dv_pipeline_layout *layout)
{
@@ -704,11 +718,17 @@ lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx,
deref->var->data.index + base_index :
base_index;
- uint8_t return_size = relaxed_precision || instr->is_shadow ? 16 : 32;
+ uint8_t return_size;
+ if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_16BIT))
+ return_size = 16;
+ else if (unlikely(V3D_DEBUG & V3D_DEBUG_TMU_32BIT))
+ return_size = 32;
+ else
+ return_size = relaxed_precision || instr->is_shadow ? 16 : 32;
- struct v3dv_descriptor_map *map = is_sampler ?
- &pipeline->shared_data->sampler_map :
- &pipeline->shared_data->texture_map;
+ struct v3dv_descriptor_map *map =
+ pipeline_get_descriptor_map(pipeline, binding_layout->type,
+ shader->info.stage, is_sampler);
int desc_index =
descriptor_map_add(map,
deref->var->data.descriptor_set,
@@ -727,6 +747,7 @@ lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx,
static bool
lower_sampler(nir_builder *b, nir_tex_instr *instr,
+ nir_shader *shader,
struct v3dv_pipeline *pipeline,
const struct v3dv_pipeline_layout *layout)
{
@@ -736,13 +757,14 @@ lower_sampler(nir_builder *b, nir_tex_instr *instr,
nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
if (texture_idx >= 0)
- return_size = lower_tex_src_to_offset(b, instr, texture_idx, pipeline, layout);
+ return_size = lower_tex_src_to_offset(b, instr, texture_idx, shader,
+ pipeline, layout);
int sampler_idx =
nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
if (sampler_idx >= 0)
- lower_tex_src_to_offset(b, instr, sampler_idx, pipeline, layout);
+ lower_tex_src_to_offset(b, instr, sampler_idx, shader, pipeline, layout);
if (texture_idx < 0 && sampler_idx < 0)
return false;
@@ -762,6 +784,7 @@ lower_sampler(nir_builder *b, nir_tex_instr *instr,
static void
lower_image_deref(nir_builder *b,
nir_intrinsic_instr *instr,
+ nir_shader *shader,
struct v3dv_pipeline *pipeline,
const struct v3dv_pipeline_layout *layout)
{
@@ -811,8 +834,12 @@ lower_image_deref(nir_builder *b,
assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||
binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
+ struct v3dv_descriptor_map *map =
+ pipeline_get_descriptor_map(pipeline, binding_layout->type,
+ shader->info.stage, false);
+
int desc_index =
- descriptor_map_add(&pipeline->shared_data->texture_map,
+ descriptor_map_add(map,
deref->var->data.descriptor_set,
deref->var->data.binding,
array_index,
@@ -832,6 +859,7 @@ lower_image_deref(nir_builder *b,
static bool
lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
+ nir_shader *shader,
struct v3dv_pipeline *pipeline,
const struct v3dv_pipeline_layout *layout)
{
@@ -850,16 +878,14 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
return true;
case nir_intrinsic_vulkan_resource_index:
- lower_vulkan_resource_index(b, instr, pipeline, layout);
+ lower_vulkan_resource_index(b, instr, shader, pipeline, layout);
return true;
case nir_intrinsic_load_vulkan_descriptor: {
- /* We are not using it, as loading the descriptor happens as part of the
- * load/store instruction, so the simpler is just doing a no-op. We just
- * lower the desc back to a vec2, as it is what load_ssbo/ubo expects.
+ /* Loading the descriptor happens as part of load/store instructions,
+ * so for us this is a no-op.
*/
- nir_ssa_def *desc = nir_vec2(b, instr->src[0].ssa, nir_imm_int(b, 0));
- nir_ssa_def_rewrite_uses(&instr->dest.ssa, desc);
+ nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa);
nir_instr_remove(&instr->instr);
return true;
}
@@ -878,7 +904,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
case nir_intrinsic_image_deref_atomic_comp_swap:
case nir_intrinsic_image_deref_size:
case nir_intrinsic_image_deref_samples:
- lower_image_deref(b, instr, pipeline, layout);
+ lower_image_deref(b, instr, shader, pipeline, layout);
return true;
default:
@@ -888,6 +914,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
static bool
lower_impl(nir_function_impl *impl,
+ nir_shader *shader,
struct v3dv_pipeline *pipeline,
const struct v3dv_pipeline_layout *layout)
{
@@ -901,11 +928,12 @@ lower_impl(nir_function_impl *impl,
switch (instr->type) {
case nir_instr_type_tex:
progress |=
- lower_sampler(&b, nir_instr_as_tex(instr), pipeline, layout);
+ lower_sampler(&b, nir_instr_as_tex(instr), shader, pipeline, layout);
break;
case nir_instr_type_intrinsic:
progress |=
- lower_intrinsic(&b, nir_instr_as_intrinsic(instr), pipeline, layout);
+ lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader,
+ pipeline, layout);
break;
default:
break;
@@ -925,7 +953,7 @@ lower_pipeline_layout_info(nir_shader *shader,
nir_foreach_function(function, shader) {
if (function->impl)
- progress |= lower_impl(function->impl, pipeline, layout);
+ progress |= lower_impl(function->impl, shader, pipeline, layout);
}
return progress;
@@ -950,6 +978,18 @@ lower_fs_io(nir_shader *nir)
}
static void
+lower_gs_io(struct nir_shader *nir)
+{
+ NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
+
+ nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
+ MESA_SHADER_GEOMETRY);
+
+ nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
+ MESA_SHADER_GEOMETRY);
+}
+
+static void
lower_vs_io(struct nir_shader *nir)
{
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
@@ -982,13 +1022,16 @@ pipeline_populate_v3d_key(struct v3d_key *key,
uint32_t ucp_enables,
bool robust_buffer_access)
{
+ assert(p_stage->pipeline->shared_data &&
+ p_stage->pipeline->shared_data->maps[p_stage->stage]);
+
/* The following values are default values used at pipeline create. We use
* there 32 bit as default return size.
*/
struct v3dv_descriptor_map *sampler_map =
- &p_stage->pipeline->shared_data->sampler_map;
+ &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map;
struct v3dv_descriptor_map *texture_map =
- &p_stage->pipeline->shared_data->texture_map;
+ &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map;
key->num_tex_used = texture_map->num_desc;
assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);
@@ -1010,12 +1053,23 @@ pipeline_populate_v3d_key(struct v3d_key *key,
key->sampler[sampler_idx].return_size == 32 ? 4 : 2;
}
-
-
- /* default value. Would be override on the vs/gs populate methods when GS
- * gets supported
- */
- key->is_last_geometry_stage = true;
+ switch (p_stage->stage) {
+ case BROADCOM_SHADER_VERTEX:
+ case BROADCOM_SHADER_VERTEX_BIN:
+ key->is_last_geometry_stage = p_stage->pipeline->gs == NULL;
+ break;
+ case BROADCOM_SHADER_GEOMETRY:
+ case BROADCOM_SHADER_GEOMETRY_BIN:
+ /* FIXME: while we don't implement tessellation shaders */
+ key->is_last_geometry_stage = true;
+ break;
+ case BROADCOM_SHADER_FRAGMENT:
+ case BROADCOM_SHADER_COMPUTE:
+ key->is_last_geometry_stage = false;
+ break;
+ default:
+ unreachable("unsupported shader stage");
+ }
/* Vulkan doesn't have fixed function state for user clip planes. Instead,
* shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler
@@ -1073,8 +1127,11 @@ static void
pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
const struct v3dv_pipeline_stage *p_stage,
+ bool has_geometry_shader,
uint32_t ucp_enables)
{
+ assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT);
+
memset(key, 0, sizeof(*key));
const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
@@ -1087,9 +1144,11 @@ pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
key->is_points = (topology == PIPE_PRIM_POINTS);
key->is_lines = (topology >= PIPE_PRIM_LINES &&
topology <= PIPE_PRIM_LINE_STRIP);
+ key->has_gs = has_geometry_shader;
const VkPipelineColorBlendStateCreateInfo *cb_info =
- pCreateInfo->pColorBlendState;
+ !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
+ pCreateInfo->pColorBlendState : NULL;
key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
vk_to_pipe_logicop[cb_info->logicOp] :
@@ -1139,7 +1198,8 @@ pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
*/
if (key->logicop_func != PIPE_LOGICOP_COPY) {
key->color_fmt[i].format = fb_pipe_format;
- key->color_fmt[i].swizzle = v3dv_get_format_swizzle(fb_format);
+ key->color_fmt[i].swizzle =
+ v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format);
}
const struct util_format_description *desc =
@@ -1173,43 +1233,140 @@ pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
}
static void
-pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
+setup_stage_outputs_from_next_stage_inputs(
+ uint8_t next_stage_num_inputs,
+ struct v3d_varying_slot *next_stage_input_slots,
+ uint8_t *num_used_outputs,
+ struct v3d_varying_slot *used_output_slots,
+ uint32_t size_of_used_output_slots)
+{
+ *num_used_outputs = next_stage_num_inputs;
+ memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots);
+}
+
+static void
+pipeline_populate_v3d_gs_key(struct v3d_gs_key *key,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
const struct v3dv_pipeline_stage *p_stage)
{
+ assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY ||
+ p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN);
+
memset(key, 0, sizeof(*key));
const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
- /* Vulkan specifies a point size per vertex, so true for if the prim are
- * points, like on ES2)
- */
- const VkPipelineInputAssemblyStateCreateInfo *ia_info =
- pCreateInfo->pInputAssemblyState;
- uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
+ struct v3dv_pipeline *pipeline = p_stage->pipeline;
- /* FIXME: not enough to being PRIM_POINTS, on gallium the full check is
- * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
- key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS);
+ key->per_vertex_point_size =
+ p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);
+
+ key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
- key->is_coord = p_stage->stage == BROADCOM_SHADER_VERTEX_BIN;
+ assert(key->base.is_last_geometry_stage);
if (key->is_coord) {
- /* The only output varying on coord shaders are for transform
+ /* Output varyings in the last binning shader are only used for transform
* feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
*/
key->num_used_outputs = 0;
} else {
- struct v3dv_pipeline *pipeline = p_stage->pipeline;
struct v3dv_shader_variant *fs_variant =
pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
- key->num_used_outputs = fs_variant->prog_data.fs->num_inputs;
-
STATIC_ASSERT(sizeof(key->used_outputs) ==
sizeof(fs_variant->prog_data.fs->input_slots));
- memcpy(key->used_outputs, fs_variant->prog_data.fs->input_slots,
- sizeof(key->used_outputs));
+
+ setup_stage_outputs_from_next_stage_inputs(
+ fs_variant->prog_data.fs->num_inputs,
+ fs_variant->prog_data.fs->input_slots,
+ &key->num_used_outputs,
+ key->used_outputs,
+ sizeof(key->used_outputs));
+ }
+}
+
+static void
+pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const struct v3dv_pipeline_stage *p_stage)
+{
+ assert(p_stage->stage == BROADCOM_SHADER_VERTEX ||
+ p_stage->stage == BROADCOM_SHADER_VERTEX_BIN);
+
+ memset(key, 0, sizeof(*key));
+
+ const bool rba = p_stage->pipeline->device->features.robustBufferAccess;
+ pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);
+
+ struct v3dv_pipeline *pipeline = p_stage->pipeline;
+
+ /* Vulkan specifies a point size per vertex, so true for if the prim are
+ * points, like on ES2)
+ */
+ const VkPipelineInputAssemblyStateCreateInfo *ia_info =
+ pCreateInfo->pInputAssemblyState;
+ uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];
+
+ /* FIXME: PRIM_POINTS is not enough, in gallium the full check is
+ * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
+ key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS);
+
+ key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);
+
+ if (key->is_coord) { /* Binning VS*/
+ if (key->base.is_last_geometry_stage) {
+ /* Output varyings in the last binning shader are only used for
+ * transform feedback. Set to 0 as VK_EXT_transform_feedback is not
+ * supported.
+ */
+ key->num_used_outputs = 0;
+ } else {
+ /* Linking against GS binning program */
+ assert(pipeline->gs);
+ struct v3dv_shader_variant *gs_bin_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
+
+ STATIC_ASSERT(sizeof(key->used_outputs) ==
+ sizeof(gs_bin_variant->prog_data.gs->input_slots));
+
+ setup_stage_outputs_from_next_stage_inputs(
+ gs_bin_variant->prog_data.gs->num_inputs,
+ gs_bin_variant->prog_data.gs->input_slots,
+ &key->num_used_outputs,
+ key->used_outputs,
+ sizeof(key->used_outputs));
+ }
+ } else { /* Render VS */
+ if (pipeline->gs) {
+ /* Linking against GS render program */
+ struct v3dv_shader_variant *gs_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
+
+ STATIC_ASSERT(sizeof(key->used_outputs) ==
+ sizeof(gs_variant->prog_data.gs->input_slots));
+
+ setup_stage_outputs_from_next_stage_inputs(
+ gs_variant->prog_data.gs->num_inputs,
+ gs_variant->prog_data.gs->input_slots,
+ &key->num_used_outputs,
+ key->used_outputs,
+ sizeof(key->used_outputs));
+ } else {
+ /* Linking against FS program */
+ struct v3dv_shader_variant *fs_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
+
+ STATIC_ASSERT(sizeof(key->used_outputs) ==
+ sizeof(fs_variant->prog_data.fs->input_slots));
+
+ setup_stage_outputs_from_next_stage_inputs(
+ fs_variant->prog_data.fs->num_inputs,
+ fs_variant->prog_data.fs->input_slots,
+ &key->num_used_outputs,
+ key->used_outputs,
+ sizeof(key->used_outputs));
+ }
}
const VkPipelineVertexInputStateCreateInfo *vi_info =
@@ -1223,16 +1380,16 @@ pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
}
}
-/*
- * Creates the pipeline_stage for the coordinate shader. Initially a clone of
- * the vs pipeline_stage, with is_coord to true
+/**
+ * Creates the initial form of the pipeline stage for a binning shader by
+ * cloning the render shader and flagging it as a coordinate shader.
*
* Returns NULL if it was not able to allocate the object, so it should be
* handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.
*/
-static struct v3dv_pipeline_stage*
-pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage *src,
- const VkAllocationCallbacks *pAllocator)
+static struct v3dv_pipeline_stage *
+pipeline_stage_create_binning(const struct v3dv_pipeline_stage *src,
+ const VkAllocationCallbacks *pAllocator)
{
struct v3dv_device *device = src->pipeline->device;
@@ -1243,13 +1400,25 @@ pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage *src,
if (p_stage == NULL)
return NULL;
+ assert(src->stage == BROADCOM_SHADER_VERTEX ||
+ src->stage == BROADCOM_SHADER_GEOMETRY);
+
+ enum broadcom_shader_stage bin_stage =
+ src->stage == BROADCOM_SHADER_VERTEX ?
+ BROADCOM_SHADER_VERTEX_BIN :
+ BROADCOM_SHADER_GEOMETRY_BIN;
+
p_stage->pipeline = src->pipeline;
- assert(src->stage == BROADCOM_SHADER_VERTEX);
- p_stage->stage = BROADCOM_SHADER_VERTEX_BIN;
+ p_stage->stage = bin_stage;
p_stage->entrypoint = src->entrypoint;
p_stage->module = src->module;
- p_stage->nir = src->nir ? nir_shader_clone(NULL, src->nir) : NULL;
+ /* For binning shaders we will clone the NIR code from the corresponding
+ * render shader later, when we call pipeline_compile_xxx_shader. This way
+ * we only have to run the relevant NIR lowerings once for render shaders
+ */
+ p_stage->nir = NULL;
p_stage->spec_info = src->spec_info;
+ p_stage->feedback = (VkPipelineCreationFeedbackEXT) { 0 };
memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
return p_stage;
@@ -1314,14 +1483,18 @@ pipeline_hash_graphics(const struct v3dv_pipeline *pipeline,
struct mesa_sha1 ctx;
_mesa_sha1_init(&ctx);
- /* We need to include both on the sha1 key as one could affect the other
- * during linking (like if vertex output are constants, then the
- * fragment shader would load_const intead of load_input). An
- * alternative would be to use the serialized nir, but that seems like
- * an overkill
+ /* We need to include all shader stages in the sha1 key as linking may modify
+ * the shader code in any stage. An alternative would be to use the
+ * serialized NIR, but that seems like an overkill.
*/
_mesa_sha1_update(&ctx, pipeline->vs->shader_sha1,
sizeof(pipeline->vs->shader_sha1));
+
+ if (pipeline->gs) {
+ _mesa_sha1_update(&ctx, pipeline->gs->shader_sha1,
+ sizeof(pipeline->gs->shader_sha1));
+ }
+
_mesa_sha1_update(&ctx, pipeline->fs->shader_sha1,
sizeof(pipeline->fs->shader_sha1));
@@ -1397,7 +1570,7 @@ pipeline_check_spill_size(struct v3dv_pipeline *pipeline)
*/
struct v3dv_shader_variant *
v3dv_shader_variant_create(struct v3dv_device *device,
- broadcom_shader_stage stage,
+ enum broadcom_shader_stage stage,
struct v3d_prog_data *prog_data,
uint32_t prog_data_size,
uint32_t assembly_offset,
@@ -1441,22 +1614,25 @@ v3dv_shader_variant_create(struct v3dv_device *device,
* VK_ERROR_UNKNOWN, even if we know that the problem was a compiler
* error.
*/
-static struct v3dv_shader_variant*
+static struct v3dv_shader_variant *
pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage,
struct v3d_key *key,
size_t key_size,
const VkAllocationCallbacks *pAllocator,
VkResult *out_vk_result)
{
+ int64_t stage_start = os_time_get_nano();
+
struct v3dv_pipeline *pipeline = p_stage->pipeline;
struct v3dv_physical_device *physical_device =
&pipeline->device->instance->physicalDevice;
const struct v3d_compiler *compiler = physical_device->compiler;
- if (V3D_DEBUG & (V3D_DEBUG_NIR |
- v3d_debug_flag_for_shader_stage(p_stage->stage))) {
+ if (unlikely(V3D_DEBUG & (V3D_DEBUG_NIR |
+ v3d_debug_flag_for_shader_stage
+ (broadcom_shader_stage_to_gl(p_stage->stage))))) {
fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",
- gl_shader_stage_name(p_stage->stage),
+ broadcom_shader_stage_name(p_stage->stage),
p_stage->program_id);
nir_print_shader(p_stage->nir, stderr);
fprintf(stderr, "\n");
@@ -1495,6 +1671,8 @@ pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage,
* we finish it, so let's not worry about freeing the nir here.
*/
+ p_stage->feedback.duration += os_time_get_nano() - stage_start;
+
return variant;
}
@@ -1525,7 +1703,7 @@ st_nir_opts(nir_shader *nir)
if (nir->options->lower_to_scalar) {
NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
- NIR_PASS_V(nir, nir_lower_phis_to_scalar);
+ NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
}
NIR_PASS_V(nir, nir_lower_alu);
@@ -1594,6 +1772,11 @@ pipeline_lower_nir(struct v3dv_pipeline *pipeline,
struct v3dv_pipeline_stage *p_stage,
struct v3dv_pipeline_layout *layout)
{
+ int64_t stage_start = os_time_get_nano();
+
+ assert(pipeline->shared_data &&
+ pipeline->shared_data->maps[p_stage->stage]);
+
nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));
/* We add this because we need a valid sampler for nir_lower_tex to do
@@ -1604,17 +1787,19 @@ pipeline_lower_nir(struct v3dv_pipeline *pipeline,
* another for the case we need a 32bit return size.
*/
UNUSED unsigned index =
- descriptor_map_add(&pipeline->shared_data->sampler_map,
+ descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
-1, -1, -1, 0, 16);
assert(index == V3DV_NO_SAMPLER_16BIT_IDX);
index =
- descriptor_map_add(&pipeline->shared_data->sampler_map,
+ descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,
-2, -2, -2, 0, 32);
assert(index == V3DV_NO_SAMPLER_32BIT_IDX);
/* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
NIR_PASS_V(p_stage->nir, lower_pipeline_layout_info, pipeline, layout);
+
+ p_stage->feedback.duration += os_time_get_nano() - stage_start;
}
/**
@@ -1638,11 +1823,13 @@ get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)
return 0;
}
-static nir_shader*
+static nir_shader *
pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
struct v3dv_pipeline *pipeline,
struct v3dv_pipeline_cache *cache)
{
+ int64_t stage_start = os_time_get_nano();
+
nir_shader *nir = NULL;
nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
@@ -1651,6 +1838,14 @@ pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
if (nir) {
assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage));
+
+ /* A NIR cach hit doesn't avoid the large majority of pipeline stage
+ * creation so the cache hit is not recorded in the pipeline feedback
+ * flags
+ */
+
+ p_stage->feedback.duration += os_time_get_nano() - stage_start;
+
return nir;
}
@@ -1670,6 +1865,9 @@ pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,
p_stage->shader_sha1);
}
+
+ p_stage->feedback.duration += os_time_get_nano() - stage_start;
+
return nir;
}
@@ -1706,13 +1904,6 @@ pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
const VkAllocationCallbacks *pAllocator,
const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- struct v3dv_pipeline_stage *p_stage = pipeline->vs;
-
- /* Right now we only support pipelines with both vertex and fragment
- * shader.
- */
- assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
-
assert(pipeline->vs_bin != NULL);
if (pipeline->vs_bin->nir == NULL) {
assert(pipeline->vs->nir);
@@ -1728,8 +1919,7 @@ pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
if (vk_result != VK_SUCCESS)
return vk_result;
- p_stage = pipeline->vs_bin;
- pipeline_populate_v3d_vs_key(&key, pCreateInfo, p_stage);
+ pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs_bin);
pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] =
pipeline_compile_shader_variant(pipeline->vs_bin, &key.base, sizeof(key),
pAllocator, &vk_result);
@@ -1738,6 +1928,36 @@ pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,
}
static VkResult
+pipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline,
+ const VkAllocationCallbacks *pAllocator,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
+{
+ assert(pipeline->gs);
+
+ assert(pipeline->gs_bin != NULL);
+ if (pipeline->gs_bin->nir == NULL) {
+ assert(pipeline->gs->nir);
+ pipeline->gs_bin->nir = nir_shader_clone(NULL, pipeline->gs->nir);
+ }
+
+ VkResult vk_result;
+ struct v3d_gs_key key;
+ pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs);
+ pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] =
+ pipeline_compile_shader_variant(pipeline->gs, &key.base, sizeof(key),
+ pAllocator, &vk_result);
+ if (vk_result != VK_SUCCESS)
+ return vk_result;
+
+ pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs_bin);
+ pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] =
+ pipeline_compile_shader_variant(pipeline->gs_bin, &key.base, sizeof(key),
+ pAllocator, &vk_result);
+
+ return vk_result;
+}
+
+static VkResult
pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
const VkAllocationCallbacks *pAllocator,
const VkGraphicsPipelineCreateInfo *pCreateInfo)
@@ -1749,6 +1969,7 @@ pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,
struct v3d_fs_key key;
pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage,
+ pipeline->gs != NULL,
get_ucp_enable_mask(pipeline->vs));
VkResult vk_result;
@@ -1768,19 +1989,20 @@ pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
key->robust_buffer_access =
pipeline->device->features.robustBufferAccess;
+ const bool raster_enabled =
+ !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
+
const VkPipelineInputAssemblyStateCreateInfo *ia_info =
pCreateInfo->pInputAssemblyState;
key->topology = vk_to_pipe_prim_type[ia_info->topology];
const VkPipelineColorBlendStateCreateInfo *cb_info =
- pCreateInfo->pColorBlendState;
+ raster_enabled ? pCreateInfo->pColorBlendState : NULL;
+
key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?
vk_to_pipe_logicop[cb_info->logicOp] :
PIPE_LOGICOP_COPY;
- const bool raster_enabled =
- !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
-
/* Multisample rasterization state must be ignored if rasterization
* is disabled.
*/
@@ -1817,7 +2039,8 @@ pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
*/
if (key->logicop_func != PIPE_LOGICOP_COPY) {
key->color_fmt[i].format = fb_pipe_format;
- key->color_fmt[i].swizzle = v3dv_get_format_swizzle(fb_format);
+ key->color_fmt[i].swizzle = v3dv_get_format_swizzle(pipeline->device,
+ fb_format);
}
const struct util_format_description *desc =
@@ -1839,6 +2062,8 @@ pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,
key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);
}
+ assert(pipeline->subpass);
+ key->has_multiview = pipeline->subpass->view_mask != 0;
}
static void
@@ -1858,25 +2083,285 @@ pipeline_populate_compute_key(struct v3dv_pipeline *pipeline,
static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],
- struct v3dv_device *device)
+ struct v3dv_pipeline *pipeline,
+ bool is_graphics_pipeline)
{
- size_t size = sizeof(struct v3dv_pipeline_shared_data);
/* We create new_entry using the device alloc. Right now shared_data is ref
* and unref by both the pipeline and the pipeline cache, so we can't
* ensure that the cache or pipeline alloc will be available on the last
* unref.
*/
struct v3dv_pipeline_shared_data *new_entry =
- vk_zalloc2(&device->vk.alloc, NULL, size, 8,
+ vk_zalloc2(&pipeline->device->vk.alloc, NULL,
+ sizeof(struct v3dv_pipeline_shared_data), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (new_entry == NULL)
return NULL;
+ for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
+ /* We don't need specific descriptor maps for binning stages we use the
+ * map for the render stage.
+ */
+ if (broadcom_shader_stage_is_binning(stage))
+ continue;
+
+ if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) ||
+ (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) {
+ continue;
+ }
+
+ if (stage == BROADCOM_SHADER_GEOMETRY && !pipeline->gs) {
+ /* We always inject a custom GS if we have multiview */
+ if (!pipeline->subpass->view_mask)
+ continue;
+ }
+
+ struct v3dv_descriptor_maps *new_maps =
+ vk_zalloc2(&pipeline->device->vk.alloc, NULL,
+ sizeof(struct v3dv_descriptor_maps), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+
+ if (new_maps == NULL)
+ goto fail;
+
+ new_entry->maps[stage] = new_maps;
+ }
+
+ new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] =
+ new_entry->maps[BROADCOM_SHADER_VERTEX];
+
+ new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] =
+ new_entry->maps[BROADCOM_SHADER_GEOMETRY];
+
new_entry->ref_cnt = 1;
memcpy(new_entry->sha1_key, sha1_key, 20);
return new_entry;
+
+fail:
+ if (new_entry != NULL) {
+ for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
+ if (new_entry->maps[stage] != NULL)
+ vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]);
+ }
+ }
+
+ vk_free(&pipeline->device->vk.alloc, new_entry);
+
+ return NULL;
+}
+
+static void
+write_creation_feedback(struct v3dv_pipeline *pipeline,
+ const void *next,
+ const VkPipelineCreationFeedbackEXT *pipeline_feedback,
+ uint32_t stage_count,
+ const VkPipelineShaderStageCreateInfo *stages)
+{
+ const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
+ vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
+
+ if (create_feedback) {
+ typed_memcpy(create_feedback->pPipelineCreationFeedback,
+ pipeline_feedback,
+ 1);
+
+ assert(stage_count == create_feedback->pipelineStageCreationFeedbackCount);
+
+ for (uint32_t i = 0; i < stage_count; i++) {
+ gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage);
+ switch (s) {
+ case MESA_SHADER_VERTEX:
+ create_feedback->pPipelineStageCreationFeedbacks[i] =
+ pipeline->vs->feedback;
+
+ create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
+ pipeline->vs_bin->feedback.duration;
+ break;
+
+ case MESA_SHADER_GEOMETRY:
+ create_feedback->pPipelineStageCreationFeedbacks[i] =
+ pipeline->gs->feedback;
+
+ create_feedback->pPipelineStageCreationFeedbacks[i].duration +=
+ pipeline->gs_bin->feedback.duration;
+ break;
+
+ case MESA_SHADER_FRAGMENT:
+ create_feedback->pPipelineStageCreationFeedbacks[i] =
+ pipeline->fs->feedback;
+ break;
+
+ case MESA_SHADER_COMPUTE:
+ create_feedback->pPipelineStageCreationFeedbacks[i] =
+ pipeline->cs->feedback;
+ break;
+
+ default:
+ unreachable("not supported shader stage");
+ }
+ }
+ }
+}
+
+static uint32_t
+multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
+{
+ switch (pipeline->topology) {
+ case PIPE_PRIM_POINTS:
+ return GL_POINTS;
+ case PIPE_PRIM_LINES:
+ case PIPE_PRIM_LINE_STRIP:
+ return GL_LINES;
+ case PIPE_PRIM_TRIANGLES:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_TRIANGLE_FAN:
+ return GL_TRIANGLES;
+ default:
+ /* Since we don't allow GS with multiview, we can only see non-adjacency
+ * primitives.
+ */
+ unreachable("Unexpected pipeline primitive type");
+ }
+}
+
+static uint32_t
+multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline)
+{
+ switch (pipeline->topology) {
+ case PIPE_PRIM_POINTS:
+ return GL_POINTS;
+ case PIPE_PRIM_LINES:
+ case PIPE_PRIM_LINE_STRIP:
+ return GL_LINE_STRIP;
+ case PIPE_PRIM_TRIANGLES:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_TRIANGLE_FAN:
+ return GL_TRIANGLE_STRIP;
+ default:
+ /* Since we don't allow GS with multiview, we can only see non-adjacency
+ * primitives.
+ */
+ unreachable("Unexpected pipeline primitive type");
+ }
+}
+
+static bool
+pipeline_add_multiview_gs(struct v3dv_pipeline *pipeline,
+ struct v3dv_pipeline_cache *cache,
+ const VkAllocationCallbacks *pAllocator)
+{
+ /* Create the passthrough GS from the VS output interface */
+ pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
+ nir_shader *vs_nir = pipeline->vs->nir;
+
+ const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
+ "multiview broadcast gs");
+ nir_shader *nir = b.shader;
+ nir->info.inputs_read = vs_nir->info.outputs_written;
+ nir->info.outputs_written = vs_nir->info.outputs_written |
+ (1ull << VARYING_SLOT_LAYER);
+
+ uint32_t vertex_count = u_vertices_per_prim(pipeline->topology);
+ nir->info.gs.input_primitive =
+ multiview_gs_input_primitive_from_pipeline(pipeline);
+ nir->info.gs.output_primitive =
+ multiview_gs_output_primitive_from_pipeline(pipeline);
+ nir->info.gs.vertices_in = vertex_count;
+ nir->info.gs.vertices_out = nir->info.gs.vertices_in;
+ nir->info.gs.invocations = 1;
+ nir->info.gs.active_stream_mask = 0x1;
+
+ /* Make a list of GS input/output variables from the VS outputs */
+ nir_variable *in_vars[100];
+ nir_variable *out_vars[100];
+ uint32_t var_count = 0;
+ nir_foreach_shader_out_variable(out_vs_var, vs_nir) {
+ char name[8];
+ snprintf(name, ARRAY_SIZE(name), "in_%d", var_count);
+
+ in_vars[var_count] =
+ nir_variable_create(nir, nir_var_shader_in,
+ glsl_array_type(out_vs_var->type, vertex_count, 0),
+ name);
+ in_vars[var_count]->data.location = out_vs_var->data.location;
+ in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac;
+ in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
+
+ snprintf(name, ARRAY_SIZE(name), "out_%d", var_count);
+ out_vars[var_count] =
+ nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name);
+ out_vars[var_count]->data.location = out_vs_var->data.location;
+ out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation;
+
+ var_count++;
+ }
+
+ /* Add the gl_Layer output variable */
+ nir_variable *out_layer =
+ nir_variable_create(nir, nir_var_shader_out, glsl_int_type(),
+ "out_Layer");
+ out_layer->data.location = VARYING_SLOT_LAYER;
+
+ /* Get the view index value that we will write to gl_Layer */
+ nir_ssa_def *layer =
+ nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32);
+
+ /* Emit all output vertices */
+ for (uint32_t vi = 0; vi < vertex_count; vi++) {
+ /* Emit all output varyings */
+ for (uint32_t i = 0; i < var_count; i++) {
+ nir_deref_instr *in_value =
+ nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi);
+ nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value);
+ }
+
+ /* Emit gl_Layer write */
+ nir_store_var(&b, out_layer, layer, 0x1);
+
+ nir_emit_vertex(&b, 0);
+ }
+ nir_end_primitive(&b, 0);
+
+ /* Make sure we run our pre-process NIR passes so we produce NIR compatible
+ * with what we expect from SPIR-V modules.
+ */
+ preprocess_nir(nir);
+
+ /* Attach the geometry shader to the pipeline */
+ struct v3dv_device *device = pipeline->device;
+ struct v3dv_physical_device *physical_device =
+ &device->instance->physicalDevice;
+
+ struct v3dv_pipeline_stage *p_stage =
+ vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+
+ if (p_stage == NULL) {
+ ralloc_free(nir);
+ return false;
+ }
+
+ p_stage->pipeline = pipeline;
+ p_stage->stage = BROADCOM_SHADER_GEOMETRY;
+ p_stage->entrypoint = "main";
+ p_stage->module = 0;
+ p_stage->nir = nir;
+ pipeline_compute_sha1_from_nir(p_stage->nir, p_stage->shader_sha1);
+ p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);
+
+ pipeline->has_gs = true;
+ pipeline->gs = p_stage;
+ pipeline->active_stages |= MESA_SHADER_GEOMETRY;
+
+ pipeline->gs_bin =
+ pipeline_stage_create_binning(pipeline->gs, pAllocator);
+ if (pipeline->gs_bin == NULL)
+ return false;
+
+ return true;
}
/*
@@ -1895,6 +2380,11 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator)
{
+ VkPipelineCreationFeedbackEXT pipeline_feedback = {
+ .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
+ };
+ int64_t pipeline_start = os_time_get_nano();
+
struct v3dv_device *device = pipeline->device;
struct v3dv_physical_device *physical_device =
&device->instance->physicalDevice;
@@ -1945,14 +2435,24 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
case MESA_SHADER_VERTEX:
pipeline->vs = p_stage;
pipeline->vs_bin =
- pipeline_stage_create_vs_bin(pipeline->vs, pAllocator);
+ pipeline_stage_create_binning(pipeline->vs, pAllocator);
if (pipeline->vs_bin == NULL)
return VK_ERROR_OUT_OF_HOST_MEMORY;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ pipeline->has_gs = true;
+ pipeline->gs = p_stage;
+ pipeline->gs_bin =
+ pipeline_stage_create_binning(pipeline->gs, pAllocator);
+ if (pipeline->gs_bin == NULL)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
break;
+
case MESA_SHADER_FRAGMENT:
pipeline->fs = p_stage;
break;
+
default:
unreachable("not supported shader stage");
}
@@ -1984,39 +2484,85 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
pipeline->active_stages |= MESA_SHADER_FRAGMENT;
}
- /* Now we will try to get the variants from the pipeline cache */
+ /* If multiview is enabled, we inject a custom passthrough geometry shader
+ * to broadcast draw calls to the appropriate views.
+ */
+ assert(!pipeline->subpass->view_mask || (!pipeline->has_gs && !pipeline->gs));
+ if (pipeline->subpass->view_mask) {
+ if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator))
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+ }
+
+ /* First we try to get the variants from the pipeline cache */
struct v3dv_pipeline_key pipeline_key;
pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo);
unsigned char pipeline_sha1[20];
pipeline_hash_graphics(pipeline, &pipeline_key, pipeline_sha1);
+ bool cache_hit = false;
+
pipeline->shared_data =
- v3dv_pipeline_cache_search_for_pipeline(cache, pipeline_sha1);
+ v3dv_pipeline_cache_search_for_pipeline(cache,
+ pipeline_sha1,
+ &cache_hit);
if (pipeline->shared_data != NULL) {
+ /* A correct pipeline must have at least a VS and FS */
assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]);
assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);
+ assert(!pipeline->gs ||
+ pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]);
+ assert(!pipeline->gs ||
+ pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
+
+ if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
+ pipeline_feedback.flags |=
+ VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
goto success;
}
- pipeline->shared_data =
- v3dv_pipeline_shared_data_new_empty(pipeline_sha1, pipeline->device);
- /* If not, we try to get the nir shaders (from the SPIR-V shader, or from
- * the pipeline cache again) and compile.
+ if (pCreateInfo->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
+ return VK_PIPELINE_COMPILE_REQUIRED_EXT;
+
+ /* Otherwise we try to get the NIR shaders (either from the original SPIR-V
+ * shader or the pipeline cache) and compile.
*/
+ pipeline->shared_data =
+ v3dv_pipeline_shared_data_new_empty(pipeline_sha1, pipeline, true);
+
+ pipeline->vs->feedback.flags |=
+ VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
+ if (pipeline->gs)
+ pipeline->gs->feedback.flags |=
+ VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
+ pipeline->fs->feedback.flags |=
+ VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
+
if (!pipeline->vs->nir)
pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);
+ if (pipeline->gs && !pipeline->gs->nir)
+ pipeline->gs->nir = pipeline_stage_get_nir(pipeline->gs, pipeline, cache);
if (!pipeline->fs->nir)
pipeline->fs->nir = pipeline_stage_get_nir(pipeline->fs, pipeline, cache);
/* Linking + pipeline lowerings */
- link_shaders(pipeline->vs->nir, pipeline->fs->nir);
+ if (pipeline->gs) {
+ link_shaders(pipeline->gs->nir, pipeline->fs->nir);
+ link_shaders(pipeline->vs->nir, pipeline->gs->nir);
+ } else {
+ link_shaders(pipeline->vs->nir, pipeline->fs->nir);
+ }
pipeline_lower_nir(pipeline, pipeline->fs, pipeline->layout);
lower_fs_io(pipeline->fs->nir);
+ if (pipeline->gs) {
+ pipeline_lower_nir(pipeline, pipeline->gs, pipeline->layout);
+ lower_gs_io(pipeline->gs->nir);
+ }
+
pipeline_lower_nir(pipeline, pipeline->vs, pipeline->layout);
lower_vs_io(pipeline->vs->nir);
@@ -2029,6 +2575,16 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
if (vk_result != VK_SUCCESS)
return vk_result;
+ assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] &&
+ !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);
+
+ if (pipeline->gs) {
+ vk_result =
+ pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo);
+ if (vk_result != VK_SUCCESS)
+ return vk_result;
+ }
+
assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] &&
!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);
@@ -2041,29 +2597,52 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
- /* As we got the variants in pipeline->shared_data, after compiling we
- * don't need the pipeline_stages
+ success:
+
+ pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
+ write_creation_feedback(pipeline,
+ pCreateInfo->pNext,
+ &pipeline_feedback,
+ pCreateInfo->stageCount,
+ pCreateInfo->pStages);
+
+ /* Since we have the variants in the pipeline shared data we can now free
+ * the pipeline stages.
*/
pipeline_free_stages(device, pipeline, pAllocator);
- success:
pipeline_check_spill_size(pipeline);
- /* FIXME: values below are default when non-GS is available. Would need to
- * provide real values if GS gets supported
- */
+ return compute_vpm_config(pipeline);
+}
+
+static VkResult
+compute_vpm_config(struct v3dv_pipeline *pipeline)
+{
struct v3dv_shader_variant *vs_variant =
pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
struct v3dv_shader_variant *vs_bin_variant =
- pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN];
-
- pipeline->vpm_cfg_bin.As = 1;
- pipeline->vpm_cfg_bin.Ve = 0;
- pipeline->vpm_cfg_bin.Vc = vs_bin_variant->prog_data.vs->vcm_cache_size;
-
- pipeline->vpm_cfg.As = 1;
- pipeline->vpm_cfg.Ve = 0;
- pipeline->vpm_cfg.Vc = vs_variant->prog_data.vs->vcm_cache_size;
+ pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
+ struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs;
+ struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs;
+
+ struct v3d_gs_prog_data *gs = NULL;
+ struct v3d_gs_prog_data *gs_bin = NULL;
+ if (pipeline->has_gs) {
+ struct v3dv_shader_variant *gs_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
+ struct v3dv_shader_variant *gs_bin_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
+ gs = gs_variant->prog_data.gs;
+ gs_bin = gs_bin_variant->prog_data.gs;
+ }
+
+ if (!v3d_compute_vpm_config(&pipeline->device->devinfo,
+ vs_bin, vs, gs_bin, gs,
+ &pipeline->vpm_cfg_bin,
+ &pipeline->vpm_cfg)) {
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ }
return VK_SUCCESS;
}
@@ -2088,6 +2667,8 @@ v3dv_dynamic_state_mask(VkDynamicState state)
return V3DV_DYNAMIC_DEPTH_BIAS;
case VK_DYNAMIC_STATE_LINE_WIDTH:
return V3DV_DYNAMIC_LINE_WIDTH;
+ case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
+ return V3DV_DYNAMIC_COLOR_WRITE_ENABLE;
/* Depth bounds testing is not available in in V3D 4.2 so here we are just
* ignoring this dynamic state. We are already asserting at pipeline creation
@@ -2108,7 +2689,8 @@ pipeline_init_dynamic_state(
const VkPipelineViewportStateCreateInfo *pViewportState,
const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState,
const VkPipelineColorBlendStateCreateInfo *pColorBlendState,
- const VkPipelineRasterizationStateCreateInfo *pRasterizationState)
+ const VkPipelineRasterizationStateCreateInfo *pRasterizationState,
+ const VkPipelineColorWriteCreateInfoEXT *pColorWriteState)
{
pipeline->dynamic_state = default_dynamic_state;
struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;
@@ -2184,310 +2766,13 @@ pipeline_init_dynamic_state(
dynamic->line_width = pRasterizationState->lineWidth;
}
- pipeline->dynamic_state.mask = dynamic_states;
-}
-
-static uint8_t
-blend_factor(VkBlendFactor factor, bool dst_alpha_one, bool *needs_constants)
-{
- switch (factor) {
- case VK_BLEND_FACTOR_ZERO:
- case VK_BLEND_FACTOR_ONE:
- case VK_BLEND_FACTOR_SRC_COLOR:
- case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
- case VK_BLEND_FACTOR_DST_COLOR:
- case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
- case VK_BLEND_FACTOR_SRC_ALPHA:
- case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
- case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
- return factor;
- case VK_BLEND_FACTOR_CONSTANT_COLOR:
- case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
- case VK_BLEND_FACTOR_CONSTANT_ALPHA:
- case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
- *needs_constants = true;
- return factor;
- case VK_BLEND_FACTOR_DST_ALPHA:
- return dst_alpha_one ? V3D_BLEND_FACTOR_ONE :
- V3D_BLEND_FACTOR_DST_ALPHA;
- case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
- return dst_alpha_one ? V3D_BLEND_FACTOR_ZERO :
- V3D_BLEND_FACTOR_INV_DST_ALPHA;
- case VK_BLEND_FACTOR_SRC1_COLOR:
- case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
- case VK_BLEND_FACTOR_SRC1_ALPHA:
- case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
- assert(!"Invalid blend factor: dual source blending not supported.");
- default:
- assert(!"Unknown blend factor.");
+ if (pColorWriteState && !(dynamic_states & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) {
+ dynamic->color_write_enable = 0;
+ for (uint32_t i = 0; i < pColorWriteState->attachmentCount; i++)
+ dynamic->color_write_enable |= pColorWriteState->pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0;
}
- /* Should be handled by the switch, added to avoid a "end of non-void
- * function" error
- */
- unreachable("Unknown blend factor.");
-}
-
-static void
-pack_blend(struct v3dv_pipeline *pipeline,
- const VkPipelineColorBlendStateCreateInfo *cb_info)
-{
- /* By default, we are not enabling blending and all color channel writes are
- * enabled. Color write enables are independent of whether blending is
- * enabled or not.
- *
- * Vulkan specifies color write masks so that bits set correspond to
- * enabled channels. Our hardware does it the other way around.
- */
- pipeline->blend.enables = 0;
- pipeline->blend.color_write_masks = 0; /* All channels enabled */
-
- if (!cb_info)
- return;
-
- assert(pipeline->subpass);
- if (pipeline->subpass->color_count == 0)
- return;
-
- assert(pipeline->subpass->color_count == cb_info->attachmentCount);
-
- pipeline->blend.needs_color_constants = false;
- uint32_t color_write_masks = 0;
- for (uint32_t i = 0; i < pipeline->subpass->color_count; i++) {
- const VkPipelineColorBlendAttachmentState *b_state =
- &cb_info->pAttachments[i];
-
- uint32_t attachment_idx =
- pipeline->subpass->color_attachments[i].attachment;
- if (attachment_idx == VK_ATTACHMENT_UNUSED)
- continue;
-
- color_write_masks |= (~b_state->colorWriteMask & 0xf) << (4 * i);
-
- if (!b_state->blendEnable)
- continue;
-
- VkAttachmentDescription *desc =
- &pipeline->pass->attachments[attachment_idx].desc;
- const struct v3dv_format *format = v3dv_get_format(desc->format);
- bool dst_alpha_one = (format->swizzle[3] == PIPE_SWIZZLE_1);
-
- uint8_t rt_mask = 1 << i;
- pipeline->blend.enables |= rt_mask;
-
- v3dv_pack(pipeline->blend.cfg[i], BLEND_CFG, config) {
- config.render_target_mask = rt_mask;
-
- config.color_blend_mode = b_state->colorBlendOp;
- config.color_blend_dst_factor =
- blend_factor(b_state->dstColorBlendFactor, dst_alpha_one,
- &pipeline->blend.needs_color_constants);
- config.color_blend_src_factor =
- blend_factor(b_state->srcColorBlendFactor, dst_alpha_one,
- &pipeline->blend.needs_color_constants);
-
- config.alpha_blend_mode = b_state->alphaBlendOp;
- config.alpha_blend_dst_factor =
- blend_factor(b_state->dstAlphaBlendFactor, dst_alpha_one,
- &pipeline->blend.needs_color_constants);
- config.alpha_blend_src_factor =
- blend_factor(b_state->srcAlphaBlendFactor, dst_alpha_one,
- &pipeline->blend.needs_color_constants);
- }
- }
-
- pipeline->blend.color_write_masks = color_write_masks;
-}
-
-/* This requires that pack_blend() had been called before so we can set
- * the overall blend enable bit in the CFG_BITS packet.
- */
-static void
-pack_cfg_bits(struct v3dv_pipeline *pipeline,
- const VkPipelineDepthStencilStateCreateInfo *ds_info,
- const VkPipelineRasterizationStateCreateInfo *rs_info,
- const VkPipelineMultisampleStateCreateInfo *ms_info)
-{
- assert(sizeof(pipeline->cfg_bits) == cl_packet_length(CFG_BITS));
-
- pipeline->msaa =
- ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
-
- v3dv_pack(pipeline->cfg_bits, CFG_BITS, config) {
- config.enable_forward_facing_primitive =
- rs_info ? !(rs_info->cullMode & VK_CULL_MODE_FRONT_BIT) : false;
-
- config.enable_reverse_facing_primitive =
- rs_info ? !(rs_info->cullMode & VK_CULL_MODE_BACK_BIT) : false;
-
- /* Seems like the hardware is backwards regarding this setting... */
- config.clockwise_primitives =
- rs_info ? rs_info->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE : false;
-
- config.enable_depth_offset = rs_info ? rs_info->depthBiasEnable: false;
-
- /* This is required to pass line rasterization tests in CTS while
- * exposing, at least, a minimum of 4-bits of subpixel precision
- * (the minimum requirement).
- */
- config.line_rasterization = 1; /* perp end caps */
-
- if (rs_info && rs_info->polygonMode != VK_POLYGON_MODE_FILL) {
- config.direct3d_wireframe_triangles_mode = true;
- config.direct3d_point_fill_mode =
- rs_info->polygonMode == VK_POLYGON_MODE_POINT;
- }
-
- config.rasterizer_oversample_mode = pipeline->msaa ? 1 : 0;
-
- /* From the Vulkan spec:
- *
- * "Provoking Vertex:
- *
- * The vertex in a primitive from which flat shaded attribute
- * values are taken. This is generally the “first” vertex in the
- * primitive, and depends on the primitive topology."
- *
- * First vertex is the Direct3D style for provoking vertex. OpenGL uses
- * the last vertex by default.
- */
- config.direct3d_provoking_vertex = true;
-
- config.blend_enable = pipeline->blend.enables != 0;
-
- /* Disable depth/stencil if we don't have a D/S attachment */
- bool has_ds_attachment =
- pipeline->subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED;
-
- if (ds_info && ds_info->depthTestEnable && has_ds_attachment) {
- config.z_updates_enable = ds_info->depthWriteEnable;
- config.depth_test_function = ds_info->depthCompareOp;
- } else {
- config.depth_test_function = VK_COMPARE_OP_ALWAYS;
- }
-
- /* EZ state will be updated at draw time based on bound pipeline state */
- config.early_z_updates_enable = false;
- config.early_z_enable = false;
-
- config.stencil_enable =
- ds_info ? ds_info->stencilTestEnable && has_ds_attachment: false;
-
- pipeline->z_updates_enable = config.z_updates_enable;
- };
-}
-
-static uint32_t
-translate_stencil_op(enum pipe_stencil_op op)
-{
- switch (op) {
- case VK_STENCIL_OP_KEEP:
- return V3D_STENCIL_OP_KEEP;
- case VK_STENCIL_OP_ZERO:
- return V3D_STENCIL_OP_ZERO;
- case VK_STENCIL_OP_REPLACE:
- return V3D_STENCIL_OP_REPLACE;
- case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
- return V3D_STENCIL_OP_INCR;
- case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
- return V3D_STENCIL_OP_DECR;
- case VK_STENCIL_OP_INVERT:
- return V3D_STENCIL_OP_INVERT;
- case VK_STENCIL_OP_INCREMENT_AND_WRAP:
- return V3D_STENCIL_OP_INCWRAP;
- case VK_STENCIL_OP_DECREMENT_AND_WRAP:
- return V3D_STENCIL_OP_DECWRAP;
- default:
- unreachable("bad stencil op");
- }
-}
-
-static void
-pack_single_stencil_cfg(struct v3dv_pipeline *pipeline,
- uint8_t *stencil_cfg,
- bool is_front,
- bool is_back,
- const VkStencilOpState *stencil_state)
-{
- /* From the Vulkan spec:
- *
- * "Reference is an integer reference value that is used in the unsigned
- * stencil comparison. The reference value used by stencil comparison
- * must be within the range [0,2^s-1] , where s is the number of bits in
- * the stencil framebuffer attachment, otherwise the reference value is
- * considered undefined."
- *
- * In our case, 's' is always 8, so we clamp to that to prevent our packing
- * functions to assert in debug mode if they see larger values.
- *
- * If we have dynamic state we need to make sure we set the corresponding
- * state bits to 0, since cl_emit_with_prepacked ORs the new value with
- * the old.
- */
- const uint8_t write_mask =
- pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK ?
- 0 : stencil_state->writeMask & 0xff;
-
- const uint8_t compare_mask =
- pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
- 0 : stencil_state->compareMask & 0xff;
-
- const uint8_t reference =
- pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
- 0 : stencil_state->reference & 0xff;
-
- v3dv_pack(stencil_cfg, STENCIL_CFG, config) {
- config.front_config = is_front;
- config.back_config = is_back;
- config.stencil_write_mask = write_mask;
- config.stencil_test_mask = compare_mask;
- config.stencil_test_function = stencil_state->compareOp;
- config.stencil_pass_op = translate_stencil_op(stencil_state->passOp);
- config.depth_test_fail_op = translate_stencil_op(stencil_state->depthFailOp);
- config.stencil_test_fail_op = translate_stencil_op(stencil_state->failOp);
- config.stencil_ref_value = reference;
- }
-}
-
-static void
-pack_stencil_cfg(struct v3dv_pipeline *pipeline,
- const VkPipelineDepthStencilStateCreateInfo *ds_info)
-{
- assert(sizeof(pipeline->stencil_cfg) == 2 * cl_packet_length(STENCIL_CFG));
-
- if (!ds_info || !ds_info->stencilTestEnable)
- return;
-
- if (pipeline->subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
- return;
-
- const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK |
- V3DV_DYNAMIC_STENCIL_WRITE_MASK |
- V3DV_DYNAMIC_STENCIL_REFERENCE;
-
-
- /* If front != back or we have dynamic stencil state we can't emit a single
- * packet for both faces.
- */
- bool needs_front_and_back = false;
- if ((pipeline->dynamic_state.mask & dynamic_stencil_states) ||
- memcmp(&ds_info->front, &ds_info->back, sizeof(ds_info->front)))
- needs_front_and_back = true;
-
- /* If the front and back configurations are the same we can emit both with
- * a single packet.
- */
- pipeline->emit_stencil_cfg[0] = true;
- if (!needs_front_and_back) {
- pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
- true, true, &ds_info->front);
- } else {
- pipeline->emit_stencil_cfg[1] = true;
- pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
- true, false, &ds_info->front);
- pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[1],
- false, true, &ds_info->back);
- }
+ pipeline->dynamic_state.mask = dynamic_states;
}
static bool
@@ -2532,25 +2817,25 @@ pipeline_set_ez_state(struct v3dv_pipeline *pipeline,
const VkPipelineDepthStencilStateCreateInfo *ds_info)
{
if (!ds_info || !ds_info->depthTestEnable) {
- pipeline->ez_state = VC5_EZ_DISABLED;
+ pipeline->ez_state = V3D_EZ_DISABLED;
return;
}
switch (ds_info->depthCompareOp) {
case VK_COMPARE_OP_LESS:
case VK_COMPARE_OP_LESS_OR_EQUAL:
- pipeline->ez_state = VC5_EZ_LT_LE;
+ pipeline->ez_state = V3D_EZ_LT_LE;
break;
case VK_COMPARE_OP_GREATER:
case VK_COMPARE_OP_GREATER_OR_EQUAL:
- pipeline->ez_state = VC5_EZ_GT_GE;
+ pipeline->ez_state = V3D_EZ_GT_GE;
break;
case VK_COMPARE_OP_NEVER:
case VK_COMPARE_OP_EQUAL:
- pipeline->ez_state = VC5_EZ_UNDECIDED;
+ pipeline->ez_state = V3D_EZ_UNDECIDED;
break;
default:
- pipeline->ez_state = VC5_EZ_DISABLED;
+ pipeline->ez_state = V3D_EZ_DISABLED;
break;
}
@@ -2558,220 +2843,10 @@ pipeline_set_ez_state(struct v3dv_pipeline *pipeline,
if (ds_info->stencilTestEnable &&
(!stencil_op_is_no_op(&ds_info->front) ||
!stencil_op_is_no_op(&ds_info->back))) {
- pipeline->ez_state = VC5_EZ_DISABLED;
- }
-}
-
-static void
-pack_shader_state_record(struct v3dv_pipeline *pipeline)
-{
- assert(sizeof(pipeline->shader_state_record) ==
- cl_packet_length(GL_SHADER_STATE_RECORD));
-
- struct v3d_fs_prog_data *prog_data_fs =
- pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs;
-
- struct v3d_vs_prog_data *prog_data_vs =
- pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
-
- struct v3d_vs_prog_data *prog_data_vs_bin =
- pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs;
-
-
- /* Note: we are not packing addresses, as we need the job (see
- * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this
- * point as they depend on dynamic info that can be set after create the
- * pipeline (like viewport), . Would need to be filled later, so we are
- * doing a partial prepacking.
- */
- v3dv_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD, shader) {
- shader.enable_clipping = true;
-
- shader.point_size_in_shaded_vertex_data =
- pipeline->topology == PIPE_PRIM_POINTS;
-
- /* Must be set if the shader modifies Z, discards, or modifies
- * the sample mask. For any of these cases, the fragment
- * shader needs to write the Z value (even just discards).
- */
- shader.fragment_shader_does_z_writes = prog_data_fs->writes_z;
- /* Set if the EZ test must be disabled (due to shader side
- * effects and the early_z flag not being present in the
- * shader).
- */
- shader.turn_off_early_z_test = prog_data_fs->disable_ez;
-
- shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
- prog_data_fs->uses_center_w;
-
- /* The description for gl_SampleID states that if a fragment shader reads
- * it, then we should automatically activate per-sample shading. However,
- * the Vulkan spec also states that if a framebuffer has no attachments:
- *
- * "The subpass continues to use the width, height, and layers of the
- * framebuffer to define the dimensions of the rendering area, and the
- * rasterizationSamples from each pipeline’s
- * VkPipelineMultisampleStateCreateInfo to define the number of
- * samples used in rasterization multisample rasterization."
- *
- * So in this scenario, if the pipeline doesn't enable multiple samples
- * but the fragment shader accesses gl_SampleID we would be requested
- * to do per-sample shading in single sample rasterization mode, which
- * is pointless, so just disable it in that case.
- */
- shader.enable_sample_rate_shading =
- pipeline->sample_rate_shading ||
- (pipeline->msaa && prog_data_fs->force_per_sample_msaa);
-
- shader.any_shader_reads_hardware_written_primitive_id = false;
-
- shader.do_scoreboard_wait_on_first_thread_switch =
- prog_data_fs->lock_scoreboard_on_first_thrsw;
- shader.disable_implicit_point_line_varyings =
- !prog_data_fs->uses_implicit_point_line_varyings;
-
- shader.number_of_varyings_in_fragment_shader =
- prog_data_fs->num_inputs;
-
- shader.coordinate_shader_propagate_nans = true;
- shader.vertex_shader_propagate_nans = true;
- shader.fragment_shader_propagate_nans = true;
-
- /* Note: see previous note about adresses */
- /* shader.coordinate_shader_code_address */
- /* shader.vertex_shader_code_address */
- /* shader.fragment_shader_code_address */
-
- /* FIXME: Use combined input/output size flag in the common case (also
- * on v3d, see v3dx_draw).
- */
- shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
- prog_data_vs_bin->separate_segments;
- shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
- prog_data_vs->separate_segments;
-
- shader.coordinate_shader_input_vpm_segment_size =
- prog_data_vs_bin->separate_segments ?
- prog_data_vs_bin->vpm_input_size : 1;
- shader.vertex_shader_input_vpm_segment_size =
- prog_data_vs->separate_segments ?
- prog_data_vs->vpm_input_size : 1;
-
- shader.coordinate_shader_output_vpm_segment_size =
- prog_data_vs_bin->vpm_output_size;
- shader.vertex_shader_output_vpm_segment_size =
- prog_data_vs->vpm_output_size;
-
- /* Note: see previous note about adresses */
- /* shader.coordinate_shader_uniforms_address */
- /* shader.vertex_shader_uniforms_address */
- /* shader.fragment_shader_uniforms_address */
-
- shader.min_coord_shader_input_segments_required_in_play =
- pipeline->vpm_cfg_bin.As;
- shader.min_vertex_shader_input_segments_required_in_play =
- pipeline->vpm_cfg.As;
-
- shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
- pipeline->vpm_cfg_bin.Ve;
- shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
- pipeline->vpm_cfg.Ve;
-
- shader.coordinate_shader_4_way_threadable =
- prog_data_vs_bin->base.threads == 4;
- shader.vertex_shader_4_way_threadable =
- prog_data_vs->base.threads == 4;
- shader.fragment_shader_4_way_threadable =
- prog_data_fs->base.threads == 4;
-
- shader.coordinate_shader_start_in_final_thread_section =
- prog_data_vs_bin->base.single_seg;
- shader.vertex_shader_start_in_final_thread_section =
- prog_data_vs->base.single_seg;
- shader.fragment_shader_start_in_final_thread_section =
- prog_data_fs->base.single_seg;
-
- shader.vertex_id_read_by_coordinate_shader =
- prog_data_vs_bin->uses_vid;
- shader.base_instance_id_read_by_coordinate_shader =
- prog_data_vs_bin->uses_biid;
- shader.instance_id_read_by_coordinate_shader =
- prog_data_vs_bin->uses_iid;
- shader.vertex_id_read_by_vertex_shader =
- prog_data_vs->uses_vid;
- shader.base_instance_id_read_by_vertex_shader =
- prog_data_vs->uses_biid;
- shader.instance_id_read_by_vertex_shader =
- prog_data_vs->uses_iid;
-
- /* Note: see previous note about adresses */
- /* shader.address_of_default_attribute_values */
- }
-}
-
-static void
-pack_vcm_cache_size(struct v3dv_pipeline *pipeline)
-{
- assert(sizeof(pipeline->vcm_cache_size) ==
- cl_packet_length(VCM_CACHE_SIZE));
-
- v3dv_pack(pipeline->vcm_cache_size, VCM_CACHE_SIZE, vcm) {
- vcm.number_of_16_vertex_batches_for_binning = pipeline->vpm_cfg_bin.Vc;
- vcm.number_of_16_vertex_batches_for_rendering = pipeline->vpm_cfg.Vc;
+ pipeline->ez_state = V3D_EZ_DISABLED;
}
}
-/* As defined on the GL_SHADER_STATE_ATTRIBUTE_RECORD */
-static uint8_t
-get_attr_type(const struct util_format_description *desc)
-{
- uint32_t r_size = desc->channel[0].size;
- uint8_t attr_type = ATTRIBUTE_FLOAT;
-
- switch (desc->channel[0].type) {
- case UTIL_FORMAT_TYPE_FLOAT:
- if (r_size == 32) {
- attr_type = ATTRIBUTE_FLOAT;
- } else {
- assert(r_size == 16);
- attr_type = ATTRIBUTE_HALF_FLOAT;
- }
- break;
-
- case UTIL_FORMAT_TYPE_SIGNED:
- case UTIL_FORMAT_TYPE_UNSIGNED:
- switch (r_size) {
- case 32:
- attr_type = ATTRIBUTE_INT;
- break;
- case 16:
- attr_type = ATTRIBUTE_SHORT;
- break;
- case 10:
- attr_type = ATTRIBUTE_INT2_10_10_10;
- break;
- case 8:
- attr_type = ATTRIBUTE_BYTE;
- break;
- default:
- fprintf(stderr,
- "format %s unsupported\n",
- desc->name);
- attr_type = ATTRIBUTE_BYTE;
- abort();
- }
- break;
-
- default:
- fprintf(stderr,
- "format %s unsupported\n",
- desc->name);
- abort();
- }
-
- return attr_type;
-}
-
static bool
pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
{
@@ -2829,36 +2904,6 @@ v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
}
static void
-pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline,
- uint32_t index,
- const VkVertexInputAttributeDescription *vi_desc)
-{
- const uint32_t packet_length =
- cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
-
- const struct util_format_description *desc =
- vk_format_description(vi_desc->format);
-
- uint32_t binding = vi_desc->binding;
-
- v3dv_pack(&pipeline->vertex_attrs[index * packet_length],
- GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
-
- /* vec_size == 0 means 4 */
- attr.vec_size = desc->nr_channels & 3;
- attr.signed_int_type = (desc->channel[0].type ==
- UTIL_FORMAT_TYPE_SIGNED);
- attr.normalized_int_type = desc->channel[0].normalized;
- attr.read_as_int_uint = desc->channel[0].pure_integer;
-
- attr.instance_divisor = MIN2(pipeline->vb[binding].instance_divisor,
- 0xffff);
- attr.stride = pipeline->vb[binding].stride;
- attr.type = get_attr_type(desc);
- }
-}
-
-static void
pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,
const VkPipelineMultisampleStateCreateInfo *ms_info)
{
@@ -2920,24 +2965,35 @@ pipeline_init(struct v3dv_pipeline *pipeline,
const VkPipelineRasterizationStateCreateInfo *rs_info =
raster_enabled ? pCreateInfo->pRasterizationState : NULL;
+ const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info =
+ rs_info ? vk_find_struct_const(
+ rs_info->pNext,
+ PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) :
+ NULL;
+
const VkPipelineColorBlendStateCreateInfo *cb_info =
raster_enabled ? pCreateInfo->pColorBlendState : NULL;
const VkPipelineMultisampleStateCreateInfo *ms_info =
raster_enabled ? pCreateInfo->pMultisampleState : NULL;
+ const VkPipelineColorWriteCreateInfoEXT *cw_info =
+ cb_info ? vk_find_struct_const(cb_info->pNext,
+ PIPELINE_COLOR_WRITE_CREATE_INFO_EXT) :
+ NULL;
+
pipeline_init_dynamic_state(pipeline,
pCreateInfo->pDynamicState,
- vp_info, ds_info, cb_info, rs_info);
+ vp_info, ds_info, cb_info, rs_info, cw_info);
/* V3D 4.2 doesn't support depth bounds testing so we don't advertise that
* feature and it shouldn't be used by any pipeline.
*/
assert(!ds_info || !ds_info->depthBoundsTestEnable);
- pack_blend(pipeline, cb_info);
- pack_cfg_bits(pipeline, ds_info, rs_info, ms_info);
- pack_stencil_cfg(pipeline, ds_info);
+ v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info,
+ rs_info, pv_info, ms_info);
+
pipeline_set_ez_state(pipeline, ds_info);
enable_depth_bias(pipeline, rs_info);
pipeline_set_sample_mask(pipeline, ms_info);
@@ -2955,49 +3011,14 @@ pipeline_init(struct v3dv_pipeline *pipeline,
return result;
}
- pack_shader_state_record(pipeline);
- pack_vcm_cache_size(pipeline);
-
const VkPipelineVertexInputStateCreateInfo *vi_info =
pCreateInfo->pVertexInputState;
- pipeline->vb_count = vi_info->vertexBindingDescriptionCount;
- for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
- const VkVertexInputBindingDescription *desc =
- &vi_info->pVertexBindingDescriptions[i];
-
- pipeline->vb[desc->binding].stride = desc->stride;
- pipeline->vb[desc->binding].instance_divisor = desc->inputRate;
- }
-
- pipeline->va_count = 0;
- struct v3d_vs_prog_data *prog_data_vs =
- pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
-
- for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
- const VkVertexInputAttributeDescription *desc =
- &vi_info->pVertexAttributeDescriptions[i];
- uint32_t location = desc->location + VERT_ATTRIB_GENERIC0;
-
- /* We use a custom driver_location_map instead of
- * nir_find_variable_with_location because if we were able to get the
- * shader variant from the cache, we would not have the nir shader
- * available.
- */
- uint32_t driver_location =
- prog_data_vs->driver_location_map[location];
-
- if (driver_location != -1) {
- assert(driver_location < MAX_VERTEX_ATTRIBS);
- pipeline->va[driver_location].offset = desc->offset;
- pipeline->va[driver_location].binding = desc->binding;
- pipeline->va[driver_location].vk_format = desc->format;
-
- pack_shader_state_attribute_record(pipeline, driver_location, desc);
+ const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info =
+ vk_find_struct_const(vi_info->pNext,
+ PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
- pipeline->va_count++;
- }
- }
+ v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info);
if (pipeline_has_integer_vertex_attrib(pipeline)) {
pipeline->default_attribute_values =
@@ -3032,7 +3053,7 @@ graphics_pipeline_create(VkDevice _device,
VK_OBJECT_TYPE_PIPELINE);
if (pipeline == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
result = pipeline_init(pipeline, device, cache,
pCreateInfo,
@@ -3040,6 +3061,8 @@ graphics_pipeline_create(VkDevice _device,
if (result != VK_SUCCESS) {
v3dv_destroy_pipeline(pipeline, device, pAllocator);
+ if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT)
+ *pPipeline = VK_NULL_HANDLE;
return result;
}
@@ -3048,7 +3071,7 @@ graphics_pipeline_create(VkDevice _device,
return VK_SUCCESS;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateGraphicsPipelines(VkDevice _device,
VkPipelineCache pipelineCache,
uint32_t count,
@@ -3062,7 +3085,8 @@ v3dv_CreateGraphicsPipelines(VkDevice _device,
if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
mtx_lock(&device->pdevice->mutex);
- for (uint32_t i = 0; i < count; i++) {
+ uint32_t i = 0;
+ for (; i < count; i++) {
VkResult local_result;
local_result = graphics_pipeline_create(_device,
@@ -3074,9 +3098,16 @@ v3dv_CreateGraphicsPipelines(VkDevice _device,
if (local_result != VK_SUCCESS) {
result = local_result;
pPipelines[i] = VK_NULL_HANDLE;
+
+ if (pCreateInfos[i].flags &
+ VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
+ break;
}
}
+ for (; i < count; i++)
+ pPipelines[i] = VK_NULL_HANDLE;
+
if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
mtx_unlock(&device->pdevice->mutex);
@@ -3110,6 +3141,11 @@ pipeline_compile_compute(struct v3dv_pipeline *pipeline,
const VkComputePipelineCreateInfo *info,
const VkAllocationCallbacks *alloc)
{
+ VkPipelineCreationFeedbackEXT pipeline_feedback = {
+ .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
+ };
+ int64_t pipeline_start = os_time_get_nano();
+
struct v3dv_device *device = pipeline->device;
struct v3dv_physical_device *physical_device =
&device->instance->physicalDevice;
@@ -3129,6 +3165,7 @@ pipeline_compile_compute(struct v3dv_pipeline *pipeline,
p_stage->entrypoint = sinfo->pName;
p_stage->module = vk_shader_module_from_handle(sinfo->module);
p_stage->spec_info = sinfo->pSpecializationInfo;
+ p_stage->feedback = (VkPipelineCreationFeedbackEXT) { 0 };
pipeline_hash_shader(p_stage->module,
p_stage->entrypoint,
@@ -3147,16 +3184,27 @@ pipeline_compile_compute(struct v3dv_pipeline *pipeline,
unsigned char pipeline_sha1[20];
pipeline_hash_compute(pipeline, &pipeline_key, pipeline_sha1);
+ bool cache_hit = false;
pipeline->shared_data =
- v3dv_pipeline_cache_search_for_pipeline(cache, pipeline_sha1);
+ v3dv_pipeline_cache_search_for_pipeline(cache, pipeline_sha1, &cache_hit);
if (pipeline->shared_data != NULL) {
assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);
+ if (cache_hit && cache != &pipeline->device->default_pipeline_cache)
+ pipeline_feedback.flags |=
+ VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
+
goto success;
}
+ if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
+ return VK_PIPELINE_COMPILE_REQUIRED_EXT;
+
pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline_sha1,
- pipeline->device);
+ pipeline,
+ false);
+
+ p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
/* If not found on cache, compile it */
p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
@@ -3183,12 +3231,21 @@ pipeline_compile_compute(struct v3dv_pipeline *pipeline,
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
v3dv_pipeline_cache_upload_pipeline(pipeline, cache);
+
+success:
+
+ pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
+ write_creation_feedback(pipeline,
+ info->pNext,
+ &pipeline_feedback,
+ 1,
+ &info->stage);
+
/* As we got the variants in pipeline->shared_data, after compiling we
* don't need the pipeline_stages
*/
pipeline_free_stages(device, pipeline, alloc);
- success:
pipeline_check_spill_size(pipeline);
return VK_SUCCESS;
@@ -3231,12 +3288,14 @@ compute_pipeline_create(VkDevice _device,
pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),
VK_OBJECT_TYPE_PIPELINE);
if (pipeline == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
result = compute_pipeline_init(pipeline, device, cache,
pCreateInfo, pAllocator);
if (result != VK_SUCCESS) {
v3dv_destroy_pipeline(pipeline, device, pAllocator);
+ if (result == VK_PIPELINE_COMPILE_REQUIRED_EXT)
+ *pPipeline = VK_NULL_HANDLE;
return result;
}
@@ -3245,7 +3304,7 @@ compute_pipeline_create(VkDevice _device,
return VK_SUCCESS;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateComputePipelines(VkDevice _device,
VkPipelineCache pipelineCache,
uint32_t createInfoCount,
@@ -3259,7 +3318,8 @@ v3dv_CreateComputePipelines(VkDevice _device,
if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
mtx_lock(&device->pdevice->mutex);
- for (uint32_t i = 0; i < createInfoCount; i++) {
+ uint32_t i = 0;
+ for (; i < createInfoCount; i++) {
VkResult local_result;
local_result = compute_pipeline_create(_device,
pipelineCache,
@@ -3270,9 +3330,16 @@ v3dv_CreateComputePipelines(VkDevice _device,
if (local_result != VK_SUCCESS) {
result = local_result;
pPipelines[i] = VK_NULL_HANDLE;
+
+ if (pCreateInfos[i].flags &
+ VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
+ break;
}
}
+ for (; i < createInfoCount; i++)
+ pPipelines[i] = VK_NULL_HANDLE;
+
if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))
mtx_unlock(&device->pdevice->mutex);
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_pipeline_cache.c b/lib/mesa/src/broadcom/vulkan/v3dv_pipeline_cache.c
index 7d1d11485..c19eecc42 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_pipeline_cache.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_pipeline_cache.c
@@ -63,6 +63,20 @@ cache_dump_stats(struct v3dv_pipeline_cache *cache)
fprintf(stderr, " cache hit count: %d\n", cache->stats.hit);
}
+static void
+pipeline_cache_lock(struct v3dv_pipeline_cache *cache)
+{
+ if (!cache->externally_synchronized)
+ pthread_mutex_lock(&cache->mutex);
+}
+
+static void
+pipeline_cache_unlock(struct v3dv_pipeline_cache *cache)
+{
+ if (!cache->externally_synchronized)
+ pthread_mutex_unlock(&cache->mutex);
+}
+
void
v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
struct v3dv_pipeline_cache *cache,
@@ -75,10 +89,10 @@ v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
if (cache->nir_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
return;
- pthread_mutex_lock(&cache->mutex);
+ pipeline_cache_lock(cache);
struct hash_entry *entry =
_mesa_hash_table_search(cache->nir_cache, sha1_key);
- pthread_mutex_unlock(&cache->mutex);
+ pipeline_cache_unlock(cache);
if (entry)
return;
@@ -91,7 +105,7 @@ v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
return;
}
- pthread_mutex_lock(&cache->mutex);
+ pipeline_cache_lock(cache);
/* Because ralloc isn't thread-safe, we have to do all this inside the
* lock. We could unlock for the big memcpy but it's probably not worth
* the hassle.
@@ -99,7 +113,7 @@ v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
if (entry) {
blob_finish(&blob);
- pthread_mutex_unlock(&cache->mutex);
+ pipeline_cache_unlock(cache);
return;
}
@@ -122,7 +136,7 @@ v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
_mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
- pthread_mutex_unlock(&cache->mutex);
+ pipeline_cache_unlock(cache);
}
nir_shader*
@@ -143,12 +157,12 @@ v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
const struct serialized_nir *snir = NULL;
- pthread_mutex_lock(&cache->mutex);
+ pipeline_cache_lock(cache);
struct hash_entry *entry =
_mesa_hash_table_search(cache->nir_cache, sha1_key);
if (entry)
snir = entry->data;
- pthread_mutex_unlock(&cache->mutex);
+ pipeline_cache_unlock(cache);
if (snir) {
struct blob_reader blob;
@@ -185,6 +199,7 @@ v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
void
v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
struct v3dv_device *device,
+ VkPipelineCacheCreateFlags flags,
bool cache_enabled)
{
cache->device = device;
@@ -202,6 +217,9 @@ v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
cache->stats.miss = 0;
cache->stats.hit = 0;
cache->stats.count = 0;
+
+ cache->externally_synchronized = flags &
+ VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT;
} else {
cache->nir_cache = NULL;
cache->cache = NULL;
@@ -229,7 +247,8 @@ v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *
*/
struct v3dv_pipeline_shared_data *
v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
- unsigned char sha1_key[20])
+ unsigned char sha1_key[20],
+ bool *cache_hit)
{
if (!cache || !cache->cache)
return NULL;
@@ -241,7 +260,7 @@ v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
fprintf(stderr, "pipeline cache %p, search pipeline with key %s\n", cache, sha1buf);
}
- pthread_mutex_lock(&cache->mutex);
+ pipeline_cache_lock(cache);
struct hash_entry *entry =
_mesa_hash_table_search(cache->cache, sha1_key);
@@ -252,6 +271,7 @@ v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
assert(cache_entry);
cache->stats.hit++;
+ *cache_hit = true;
if (debug_cache) {
fprintf(stderr, "\tcache hit: %p\n", cache_entry);
if (dump_stats)
@@ -261,7 +281,7 @@ v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
v3dv_pipeline_shared_data_ref(cache_entry);
- pthread_mutex_unlock(&cache->mutex);
+ pipeline_cache_unlock(cache);
return cache_entry;
}
@@ -273,7 +293,7 @@ v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
cache_dump_stats(cache);
}
- pthread_mutex_unlock(&cache->mutex);
+ pipeline_cache_unlock(cache);
#ifdef ENABLE_SHADER_CACHE
struct v3dv_device *device = cache->device;
@@ -324,6 +344,14 @@ v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
if (shared_data->variants[stage] != NULL)
v3dv_shader_variant_destroy(device, shared_data->variants[stage]);
+
+ /* We don't free binning descriptor maps as we are sharing them
+ * with the render shaders.
+ */
+ if (shared_data->maps[stage] != NULL &&
+ !broadcom_shader_stage_is_binning(stage)) {
+ vk_free(&device->vk.alloc, shared_data->maps[stage]);
+ }
}
if (shared_data->assembly_bo)
@@ -335,11 +363,8 @@ v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache *cache,
const unsigned char sha1_key[20],
+ struct v3dv_descriptor_maps **maps,
struct v3dv_shader_variant **variants,
- const struct v3dv_descriptor_map *ubo_map,
- const struct v3dv_descriptor_map *ssbo_map,
- const struct v3dv_descriptor_map *sampler_map,
- const struct v3dv_descriptor_map *texture_map,
const uint64_t *total_assembly,
const uint32_t total_assembly_size)
{
@@ -359,13 +384,10 @@ v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache *cache,
new_entry->ref_cnt = 1;
memcpy(new_entry->sha1_key, sha1_key, 20);
- memcpy(&new_entry->ubo_map, ubo_map, sizeof(struct v3dv_descriptor_map));
- memcpy(&new_entry->ssbo_map, ssbo_map, sizeof(struct v3dv_descriptor_map));
- memcpy(&new_entry->sampler_map, sampler_map, sizeof(struct v3dv_descriptor_map));
- memcpy(&new_entry->texture_map, texture_map, sizeof(struct v3dv_descriptor_map));
-
- for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++)
+ for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
+ new_entry->maps[stage] = maps[stage];
new_entry->variants[stage] = variants[stage];
+ }
struct v3dv_bo *bo = v3dv_bo_alloc(cache->device, total_assembly_size,
"pipeline shader assembly", true);
@@ -402,12 +424,12 @@ pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
if (cache->stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
return;
- pthread_mutex_lock(&cache->mutex);
+ pipeline_cache_lock(cache);
struct hash_entry *entry =
_mesa_hash_table_search(cache->cache, shared_data->sha1_key);
if (entry) {
- pthread_mutex_unlock(&cache->mutex);
+ pipeline_cache_unlock(cache);
return;
}
@@ -424,7 +446,7 @@ pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
cache_dump_stats(cache);
}
- pthread_mutex_unlock(&cache->mutex);
+ pipeline_cache_unlock(cache);
#ifdef ENABLE_SHADER_CACHE
/* If we are being called from a on-disk-cache hit, we can skip writing to
@@ -490,7 +512,7 @@ shader_variant_create_from_blob(struct v3dv_device *device,
{
VkResult result;
- broadcom_shader_stage stage = blob_read_uint32(blob);
+ enum broadcom_shader_stage stage = blob_read_uint32(blob);
uint32_t prog_data_size = blob_read_uint32(blob);
/* FIXME: as we include the stage perhaps we can avoid prog_data_size? */
@@ -541,17 +563,32 @@ v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
{
const unsigned char *sha1_key = blob_read_bytes(blob, 20);
- const struct v3dv_descriptor_map *ubo_map =
- blob_read_bytes(blob, sizeof(struct v3dv_descriptor_map));
- const struct v3dv_descriptor_map *ssbo_map =
- blob_read_bytes(blob, sizeof(struct v3dv_descriptor_map));
- const struct v3dv_descriptor_map *sampler_map =
- blob_read_bytes(blob, sizeof(struct v3dv_descriptor_map));
- const struct v3dv_descriptor_map *texture_map =
- blob_read_bytes(blob, sizeof(struct v3dv_descriptor_map));
+ struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES] = { 0 };
- if (blob->overrun)
- return NULL;
+ uint8_t descriptor_maps_count = blob_read_uint8(blob);
+ for (uint8_t count = 0; count < descriptor_maps_count; count++) {
+ uint8_t stage = blob_read_uint8(blob);
+
+ const struct v3dv_descriptor_maps *current_maps =
+ blob_read_bytes(blob, sizeof(struct v3dv_descriptor_maps));
+
+ if (blob->overrun)
+ return NULL;
+
+ maps[stage] = vk_zalloc2(&cache->device->vk.alloc, NULL,
+ sizeof(struct v3dv_descriptor_maps), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+
+ if (maps[stage] == NULL)
+ return NULL;
+
+ memcpy(maps[stage], current_maps, sizeof(struct v3dv_descriptor_maps));
+ if (broadcom_shader_stage_is_render_with_binning(stage)) {
+ enum broadcom_shader_stage bin_stage =
+ broadcom_binning_shader_stage_for_render_stage(stage);
+ maps[bin_stage] = maps[stage];
+ }
+ }
uint8_t variant_count = blob_read_uint8(blob);
@@ -571,8 +608,7 @@ v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
if (blob->overrun)
return NULL;
- return v3dv_pipeline_shared_data_new(cache, sha1_key, variants,
- ubo_map, ssbo_map, sampler_map, texture_map,
+ return v3dv_pipeline_shared_data_new(cache, sha1_key, maps, variants,
total_assembly, total_assembly_size);
}
@@ -643,7 +679,7 @@ pipeline_cache_load(struct v3dv_pipeline_cache *cache,
}
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreatePipelineCache(VkDevice _device,
const VkPipelineCacheCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
@@ -653,16 +689,15 @@ v3dv_CreatePipelineCache(VkDevice _device,
struct v3dv_pipeline_cache *cache;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
- assert(pCreateInfo->flags == 0);
cache = vk_object_zalloc(&device->vk, pAllocator,
sizeof(*cache),
VK_OBJECT_TYPE_PIPELINE_CACHE);
if (cache == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
- v3dv_pipeline_cache_init(cache, device,
+ v3dv_pipeline_cache_init(cache, device, pCreateInfo->flags,
device->instance->pipeline_cache_enabled);
if (pCreateInfo->initialDataSize > 0) {
@@ -702,7 +737,7 @@ v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache)
}
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyPipelineCache(VkDevice _device,
VkPipelineCache _cache,
const VkAllocationCallbacks *pAllocator)
@@ -718,7 +753,7 @@ v3dv_DestroyPipelineCache(VkDevice _device,
vk_object_free(&device->vk, pAllocator, cache);
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_MergePipelineCaches(VkDevice device,
VkPipelineCache dstCache,
uint32_t srcCacheCount,
@@ -820,14 +855,33 @@ v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *
{
blob_write_bytes(blob, cache_entry->sha1_key, 20);
- blob_write_bytes(blob, &cache_entry->ubo_map,
- sizeof(struct v3dv_descriptor_map));
- blob_write_bytes(blob, &cache_entry->ssbo_map,
- sizeof(struct v3dv_descriptor_map));
- blob_write_bytes(blob, &cache_entry->sampler_map,
- sizeof(struct v3dv_descriptor_map));
- blob_write_bytes(blob, &cache_entry->texture_map,
- sizeof(struct v3dv_descriptor_map));
+ uint8_t descriptor_maps_count = 0;
+ for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
+ if (broadcom_shader_stage_is_binning(stage))
+ continue;
+ if (cache_entry->maps[stage] == NULL)
+ continue;
+ descriptor_maps_count++;
+ }
+
+ /* Compute pipelines only have one descriptor map,
+ * graphics pipelines may have 2 (VS+FS) or 3 (VS+GS+FS), since the binning
+ * stages take the descriptor map from the render stage.
+ */
+ assert((descriptor_maps_count >= 2 && descriptor_maps_count <= 3) ||
+ (descriptor_maps_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
+ blob_write_uint8(blob, descriptor_maps_count);
+
+ for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
+ if (cache_entry->maps[stage] == NULL)
+ continue;
+ if (broadcom_shader_stage_is_binning(stage))
+ continue;
+
+ blob_write_uint8(blob, stage);
+ blob_write_bytes(blob, cache_entry->maps[stage],
+ sizeof(struct v3dv_descriptor_maps));
+ }
uint8_t variant_count = 0;
for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
@@ -836,10 +890,10 @@ v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *
variant_count++;
}
- /* Right now we only support compute pipeline, or graphics pipeline with
- * vertex, vertex bin, and fragment shader.
+ /* Graphics pipelines with VS+FS have 3 variants, VS+GS+FS will have 5 and
+ * compute pipelines only have 1.
*/
- assert(variant_count == 3 ||
+ assert((variant_count == 5 || variant_count == 3) ||
(variant_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
blob_write_uint8(blob, variant_count);
@@ -864,7 +918,7 @@ v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineCacheData(VkDevice _device,
VkPipelineCache _cache,
size_t *pDataSize,
@@ -881,9 +935,9 @@ v3dv_GetPipelineCacheData(VkDevice _device,
}
struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
- VkResult result = VK_SUCCESS;
+ VkResult result = VK_INCOMPLETE;
- pthread_mutex_lock(&cache->mutex);
+ pipeline_cache_lock(cache);
struct vk_pipeline_cache_header header = {
.header_size = sizeof(struct vk_pipeline_cache_header),
@@ -898,9 +952,7 @@ v3dv_GetPipelineCacheData(VkDevice _device,
intptr_t nir_count_offset = blob_reserve_uint32(&blob);
if (nir_count_offset < 0) {
*pDataSize = 0;
- blob_finish(&blob);
- pthread_mutex_unlock(&cache->mutex);
- return VK_INCOMPLETE;
+ goto done;
}
if (cache->nir_cache) {
@@ -915,9 +967,7 @@ v3dv_GetPipelineCacheData(VkDevice _device,
if (blob.out_of_memory) {
blob.size = save_size;
- pthread_mutex_unlock(&cache->mutex);
- result = VK_INCOMPLETE;
- break;
+ goto done;
}
nir_count++;
@@ -929,9 +979,7 @@ v3dv_GetPipelineCacheData(VkDevice _device,
intptr_t count_offset = blob_reserve_uint32(&blob);
if (count_offset < 0) {
*pDataSize = 0;
- blob_finish(&blob);
- pthread_mutex_unlock(&cache->mutex);
- return VK_INCOMPLETE;
+ goto done;
}
if (cache->cache) {
@@ -942,9 +990,7 @@ v3dv_GetPipelineCacheData(VkDevice _device,
if (!v3dv_pipeline_shared_data_write_to_blob(cache_entry, &blob)) {
/* If it fails reset to the previous size and bail */
blob.size = save_size;
- pthread_mutex_unlock(&cache->mutex);
- result = VK_INCOMPLETE;
- break;
+ goto done;
}
count++;
@@ -955,7 +1001,7 @@ v3dv_GetPipelineCacheData(VkDevice _device,
*pDataSize = blob.size;
- blob_finish(&blob);
+ result = VK_SUCCESS;
if (debug_cache) {
assert(count <= cache->stats.count);
@@ -965,7 +1011,10 @@ v3dv_GetPipelineCacheData(VkDevice _device,
cache, nir_count, count, (uint32_t) *pDataSize);
}
- pthread_mutex_unlock(&cache->mutex);
+ done:
+ blob_finish(&blob);
+
+ pipeline_cache_unlock(cache);
return result;
}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_private.h b/lib/mesa/src/broadcom/vulkan/v3dv_private.h
index 36ecba130..d3c07c649 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_private.h
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_private.h
@@ -38,10 +38,15 @@
#include "vk_device.h"
#include "vk_instance.h"
+#include "vk_image.h"
+#include "vk_log.h"
#include "vk_physical_device.h"
#include "vk_shader_module.h"
#include "vk_util.h"
+#include "vk_command_buffer.h"
+#include "vk_queue.h"
+
#include <xf86drm.h>
#ifdef HAVE_VALGRIND
@@ -56,6 +61,8 @@
#include "common/v3d_device_info.h"
#include "common/v3d_limits.h"
+#include "common/v3d_tiling.h"
+#include "common/v3d_util.h"
#include "compiler/shader_enums.h"
#include "compiler/spirv/nir_spirv.h"
@@ -69,36 +76,17 @@
#include "u_atomic.h"
#include "v3dv_entrypoints.h"
-#include "v3dv_extensions.h"
#include "v3dv_bo.h"
#include "drm-uapi/v3d_drm.h"
-/* FIXME: hooks for the packet definition functions. */
-static inline void
-pack_emit_reloc(void *cl, const void *reloc) {}
-
-#define __gen_user_data struct v3dv_cl
-#define __gen_address_type struct v3dv_cl_reloc
-#define __gen_address_offset(reloc) (((reloc)->bo ? (reloc)->bo->offset : 0) + \
- (reloc)->offset)
-#define __gen_emit_reloc cl_pack_emit_reloc
-#define __gen_unpack_address(cl, s, e) __unpack_address(cl, s, e)
-#include "v3dv_cl.h"
-
#include "vk_alloc.h"
#include "simulator/v3d_simulator.h"
+#include "v3dv_cl.h"
-/* FIXME: pipe_box from Gallium. Needed for some v3d_tiling.c functions.
- * In the future we might want to drop that depedency, but for now it is
- * good enough.
- */
-#include "util/u_box.h"
#include "wsi_common.h"
-#include "broadcom/cle/v3dx_pack.h"
-
/* A non-fatal assert. Useful for debugging. */
#ifdef DEBUG
#define v3dv_assert(x) ({ \
@@ -124,6 +112,9 @@ struct v3dv_instance;
struct v3d_simulator_file;
+/* Minimum required by the Vulkan 1.1 spec */
+#define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30)
+
struct v3dv_physical_device {
struct vk_physical_device vk;
@@ -132,6 +123,15 @@ struct v3dv_physical_device {
int32_t display_fd;
int32_t master_fd;
+ /* We need these because it is not clear how to detect
+ * valid devids in a portable way
+ */
+ bool has_primary;
+ bool has_render;
+
+ dev_t primary_devid;
+ dev_t render_devid;
+
uint8_t driver_build_sha1[20];
uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
uint8_t device_uuid[VK_UUID_SIZE];
@@ -163,6 +163,8 @@ VkResult v3dv_physical_device_acquire_display(struct v3dv_instance *instance,
VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device);
void v3dv_wsi_finish(struct v3dv_physical_device *physical_device);
+struct v3dv_image *v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain,
+ uint32_t index);
void v3dv_meta_clear_init(struct v3dv_device *device);
void v3dv_meta_clear_finish(struct v3dv_device *device);
@@ -173,6 +175,10 @@ void v3dv_meta_blit_finish(struct v3dv_device *device);
void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device);
void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device);
+bool v3dv_meta_can_use_tlb(struct v3dv_image *image,
+ const VkOffset3D *offset,
+ VkFormat *compat_format);
+
struct v3dv_instance {
struct vk_instance vk;
@@ -214,10 +220,9 @@ struct v3dv_queue_submit_wait_info {
};
struct v3dv_queue {
- struct vk_object_base base;
+ struct vk_queue vk;
struct v3dv_device *device;
- VkDeviceQueueCreateFlags flags;
/* A list of active v3dv_queue_submit_wait_info */
struct list_head submit_wait_list;
@@ -229,7 +234,7 @@ struct v3dv_queue {
};
#define V3DV_META_BLIT_CACHE_KEY_SIZE (4 * sizeof(uint32_t))
-#define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (2 * sizeof(uint32_t) + \
+#define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \
sizeof(VkComponentMapping))
struct v3dv_meta_color_clear_pipeline {
@@ -273,6 +278,7 @@ struct v3dv_pipeline_key {
} color_fmt[V3D_MAX_DRAW_BUFFERS];
uint8_t f32_color_rb;
uint32_t va_swap_rb_mask;
+ bool has_multiview;
};
struct v3dv_pipeline_cache_stats {
@@ -285,22 +291,26 @@ struct v3dv_pipeline_cache_stats {
*
* FIXME: perhaps move to common
*/
-typedef enum {
+enum broadcom_shader_stage {
BROADCOM_SHADER_VERTEX,
BROADCOM_SHADER_VERTEX_BIN,
+ BROADCOM_SHADER_GEOMETRY,
+ BROADCOM_SHADER_GEOMETRY_BIN,
BROADCOM_SHADER_FRAGMENT,
BROADCOM_SHADER_COMPUTE,
-} broadcom_shader_stage;
+};
#define BROADCOM_SHADER_STAGES (BROADCOM_SHADER_COMPUTE + 1)
/* Assumes that coordinate shaders will be custom-handled by the caller */
-static inline broadcom_shader_stage
+static inline enum broadcom_shader_stage
gl_shader_stage_to_broadcom(gl_shader_stage stage)
{
switch (stage) {
case MESA_SHADER_VERTEX:
return BROADCOM_SHADER_VERTEX;
+ case MESA_SHADER_GEOMETRY:
+ return BROADCOM_SHADER_GEOMETRY;
case MESA_SHADER_FRAGMENT:
return BROADCOM_SHADER_FRAGMENT;
case MESA_SHADER_COMPUTE:
@@ -311,12 +321,15 @@ gl_shader_stage_to_broadcom(gl_shader_stage stage)
}
static inline gl_shader_stage
-broadcom_shader_stage_to_gl(broadcom_shader_stage stage)
+broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)
{
switch (stage) {
case BROADCOM_SHADER_VERTEX:
case BROADCOM_SHADER_VERTEX_BIN:
return MESA_SHADER_VERTEX;
+ case BROADCOM_SHADER_GEOMETRY:
+ case BROADCOM_SHADER_GEOMETRY_BIN:
+ return MESA_SHADER_GEOMETRY;
case BROADCOM_SHADER_FRAGMENT:
return MESA_SHADER_FRAGMENT;
case BROADCOM_SHADER_COMPUTE:
@@ -326,6 +339,56 @@ broadcom_shader_stage_to_gl(broadcom_shader_stage stage)
}
}
+static inline bool
+broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)
+{
+ switch (stage) {
+ case BROADCOM_SHADER_VERTEX_BIN:
+ case BROADCOM_SHADER_GEOMETRY_BIN:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool
+broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)
+{
+ switch (stage) {
+ case BROADCOM_SHADER_VERTEX:
+ case BROADCOM_SHADER_GEOMETRY:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline enum broadcom_shader_stage
+broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)
+{
+ switch (stage) {
+ case BROADCOM_SHADER_VERTEX:
+ return BROADCOM_SHADER_VERTEX_BIN;
+ case BROADCOM_SHADER_GEOMETRY:
+ return BROADCOM_SHADER_GEOMETRY_BIN;
+ default:
+ unreachable("Invalid shader stage");
+ }
+}
+
+static inline const char *
+broadcom_shader_stage_name(enum broadcom_shader_stage stage)
+{
+ switch(stage) {
+ case BROADCOM_SHADER_VERTEX_BIN:
+ return "MESA_SHADER_VERTEX_BIN";
+ case BROADCOM_SHADER_GEOMETRY_BIN:
+ return "MESA_SHADER_GEOMETRY_BIN";
+ default:
+ return gl_shader_stage_name(broadcom_shader_stage_to_gl(stage));
+ }
+}
+
struct v3dv_pipeline_cache {
struct vk_object_base base;
@@ -337,6 +400,9 @@ struct v3dv_pipeline_cache {
struct hash_table *cache;
struct v3dv_pipeline_cache_stats stats;
+
+ /* For VK_EXT_pipeline_creation_cache_control. */
+ bool externally_synchronized;
};
struct v3dv_device {
@@ -441,34 +507,6 @@ struct v3dv_format {
bool supports_filtering;
};
-/**
- * Tiling mode enum used for v3d_resource.c, which maps directly to the Memory
- * Format field of render target and Z/Stencil config.
- */
-enum v3d_tiling_mode {
- /* Untiled resources. Not valid as texture inputs. */
- VC5_TILING_RASTER,
-
- /* Single line of u-tiles. */
- VC5_TILING_LINEARTILE,
-
- /* Departure from standard 4-UIF block column format. */
- VC5_TILING_UBLINEAR_1_COLUMN,
-
- /* Departure from standard 4-UIF block column format. */
- VC5_TILING_UBLINEAR_2_COLUMN,
-
- /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is
- * split 2x2 into utiles.
- */
- VC5_TILING_UIF_NO_XOR,
-
- /* Normal tiling format: grouped in 4x4 UIFblocks, each of which is
- * split 2x2 into utiles.
- */
- VC5_TILING_UIF_XOR,
-};
-
struct v3d_resource_slice {
uint32_t offset;
uint32_t stride;
@@ -484,56 +522,43 @@ struct v3d_resource_slice {
};
struct v3dv_image {
- struct vk_object_base base;
-
- VkImageType type;
- VkImageAspectFlags aspects;
-
- VkExtent3D extent;
- uint32_t levels;
- uint32_t array_size;
- uint32_t samples;
- VkImageUsageFlags usage;
- VkImageCreateFlags flags;
- VkImageTiling tiling;
+ struct vk_image vk;
- VkFormat vk_format;
const struct v3dv_format *format;
-
uint32_t cpp;
-
- uint64_t drm_format_mod;
bool tiled;
struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS];
uint64_t size; /* Total size in bytes */
uint32_t cube_map_stride;
- uint32_t alignment;
struct v3dv_device_memory *mem;
VkDeviceSize mem_offset;
+ uint32_t alignment;
};
VkImageViewType v3dv_image_type_to_view_type(VkImageType type);
-struct v3dv_image_view {
- struct vk_object_base base;
+/* Pre-generating packets needs to consider changes in packet sizes across hw
+ * versions. Keep things simple and allocate enough space for any supported
+ * version. We ensure the size is large enough through static asserts.
+ */
+#define V3DV_TEXTURE_SHADER_STATE_LENGTH 32
+#define V3DV_SAMPLER_STATE_LENGTH 24
+#define V3DV_BLEND_CFG_LENGTH 5
+#define V3DV_CFG_BITS_LENGTH 4
+#define V3DV_GL_SHADER_STATE_RECORD_LENGTH 36
+#define V3DV_VCM_CACHE_SIZE_LENGTH 2
+#define V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH 16
+#define V3DV_STENCIL_CFG_LENGTH 6
- const struct v3dv_image *image;
- VkImageAspectFlags aspects;
- VkExtent3D extent;
- VkImageViewType type;
+struct v3dv_image_view {
+ struct vk_image_view vk;
- VkFormat vk_format;
const struct v3dv_format *format;
bool swap_rb;
uint32_t internal_bpp;
uint32_t internal_type;
-
- uint32_t base_level;
- uint32_t max_level;
- uint32_t first_layer;
- uint32_t last_layer;
uint32_t offset;
/* Precomputed (composed from createinfo->components and formar swizzle)
@@ -552,7 +577,7 @@ struct v3dv_image_view {
* we generate two states and select the one to use based on the descriptor
* type.
*/
- uint8_t texture_shader_state[2][cl_packet_length(TEXTURE_SHADER_STATE)];
+ uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH];
};
uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer);
@@ -571,7 +596,7 @@ struct v3dv_buffer {
struct v3dv_buffer_view {
struct vk_object_base base;
- const struct v3dv_buffer *buffer;
+ struct v3dv_buffer *buffer;
VkFormat vk_format;
const struct v3dv_format *format;
@@ -583,7 +608,7 @@ struct v3dv_buffer_view {
uint32_t num_elements;
/* Prepacked TEXTURE_SHADER_STATE. */
- uint8_t texture_shader_state[cl_packet_length(TEXTURE_SHADER_STATE)];
+ uint8_t texture_shader_state[V3DV_TEXTURE_SHADER_STATE_LENGTH];
};
struct v3dv_subpass_attachment {
@@ -601,20 +626,33 @@ struct v3dv_subpass {
struct v3dv_subpass_attachment ds_attachment;
- bool has_srgb_rt;
-
/* If we need to emit the clear of the depth/stencil attachment using a
* a draw call instead of using the TLB (GFXH-1461).
*/
bool do_depth_clear_with_draw;
bool do_stencil_clear_with_draw;
+
+ /* Multiview */
+ uint32_t view_mask;
};
struct v3dv_render_pass_attachment {
VkAttachmentDescription desc;
+
uint32_t first_subpass;
uint32_t last_subpass;
+ /* When multiview is enabled, we no longer care about when a particular
+ * attachment is first or last used in a render pass, since not all views
+ * in the attachment will meet that criteria. Instead, we need to track
+ * each individual view (layer) in each attachment and emit our stores,
+ * loads and clears accordingly.
+ */
+ struct {
+ uint32_t first_subpass;
+ uint32_t last_subpass;
+ } views[MAX_MULTIVIEW_VIEW_COUNT];
+
/* If this is a multismapled attachment that is going to be resolved,
* whether we can use the TLB resolve on store.
*/
@@ -624,6 +662,8 @@ struct v3dv_render_pass_attachment {
struct v3dv_render_pass {
struct vk_object_base base;
+ bool multiview_enabled;
+
uint32_t attachment_count;
struct v3dv_render_pass_attachment *attachments;
@@ -677,10 +717,12 @@ void v3dv_framebuffer_compute_internal_bpp_msaa(const struct v3dv_framebuffer *f
const struct v3dv_subpass *subpass,
uint8_t *max_bpp, bool *msaa);
-bool v3dv_subpass_area_is_tile_aligned(const VkRect2D *area,
+bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
+ const VkRect2D *area,
struct v3dv_framebuffer *fb,
struct v3dv_render_pass *pass,
uint32_t subpass_idx);
+
struct v3dv_cmd_pool {
struct vk_object_base base;
@@ -711,11 +753,6 @@ struct v3dv_cmd_buffer_attachment_state {
union v3dv_clear_value clear_value;
};
-void v3dv_get_hw_clear_color(const VkClearColorValue *color,
- uint32_t internal_type,
- uint32_t internal_size,
- uint32_t *hw_color);
-
struct v3dv_viewport_state {
uint32_t count;
VkViewport viewports[MAX_VIEWPORTS];
@@ -740,7 +777,8 @@ enum v3dv_dynamic_state_bits {
V3DV_DYNAMIC_BLEND_CONSTANTS = 1 << 5,
V3DV_DYNAMIC_DEPTH_BIAS = 1 << 6,
V3DV_DYNAMIC_LINE_WIDTH = 1 << 7,
- V3DV_DYNAMIC_ALL = (1 << 8) - 1,
+ V3DV_DYNAMIC_COLOR_WRITE_ENABLE = 1 << 8,
+ V3DV_DYNAMIC_ALL = (1 << 9) - 1,
};
/* Flags for dirty pipeline state.
@@ -762,6 +800,8 @@ enum v3dv_cmd_dirty_bits {
V3DV_CMD_DIRTY_OCCLUSION_QUERY = 1 << 13,
V3DV_CMD_DIRTY_DEPTH_BIAS = 1 << 14,
V3DV_CMD_DIRTY_LINE_WIDTH = 1 << 15,
+ V3DV_CMD_DIRTY_VIEW_INDEX = 1 << 16,
+ V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE = 1 << 17,
};
struct v3dv_dynamic_state {
@@ -799,6 +839,8 @@ struct v3dv_dynamic_state {
} depth_bias;
float line_width;
+
+ uint32_t color_write_enable;
};
extern const struct v3dv_dynamic_state default_dynamic_state;
@@ -808,10 +850,10 @@ void v3dv_viewport_compute_xform(const VkViewport *viewport,
float translate[3]);
enum v3dv_ez_state {
- VC5_EZ_UNDECIDED = 0,
- VC5_EZ_GT_GE,
- VC5_EZ_LT_LE,
- VC5_EZ_DISABLED,
+ V3D_EZ_UNDECIDED = 0,
+ V3D_EZ_GT_GE,
+ V3D_EZ_LT_LE,
+ V3D_EZ_DISABLED,
};
enum v3dv_job_type {
@@ -824,7 +866,6 @@ enum v3dv_job_type {
V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
V3DV_JOB_TYPE_CPU_SET_EVENT,
V3DV_JOB_TYPE_CPU_WAIT_EVENTS,
- V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS,
V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
@@ -839,6 +880,9 @@ struct v3dv_reset_query_cpu_job_info {
struct v3dv_end_query_cpu_job_info {
struct v3dv_query_pool *pool;
uint32_t query;
+
+ /* This is one unless multiview is used */
+ uint32_t count;
};
struct v3dv_copy_query_results_cpu_job_info {
@@ -865,13 +909,6 @@ struct v3dv_event_wait_cpu_job_info {
bool sem_wait;
};
-struct v3dv_clear_attachments_cpu_job_info {
- uint32_t attachment_count;
- VkClearAttachment attachments[V3D_MAX_DRAW_BUFFERS + 1]; /* 4 color + D/S */
- uint32_t rect_count;
- VkClearRect *rects;
-};
-
struct v3dv_copy_buffer_to_image_cpu_job_info {
struct v3dv_image *image;
struct v3dv_buffer *buffer;
@@ -897,6 +934,9 @@ struct v3dv_csd_indirect_cpu_job_info {
struct v3dv_timestamp_query_cpu_job_info {
struct v3dv_query_pool *pool;
uint32_t query;
+
+ /* This is one unless multiview is used */
+ uint32_t count;
};
struct v3dv_job {
@@ -924,6 +964,7 @@ struct v3dv_job {
*/
struct set *bos;
uint32_t bo_count;
+ uint64_t bo_handle_mask;
struct v3dv_bo *tile_alloc;
struct v3dv_bo *tile_state;
@@ -975,7 +1016,6 @@ struct v3dv_job {
struct v3dv_copy_query_results_cpu_job_info query_copy_results;
struct v3dv_event_set_cpu_job_info event_set;
struct v3dv_event_wait_cpu_job_info event_wait;
- struct v3dv_clear_attachments_cpu_job_info clear_attachments;
struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
struct v3dv_csd_indirect_cpu_job_info csd_indirect;
struct v3dv_timestamp_query_cpu_job_info query_timestamp;
@@ -988,6 +1028,7 @@ struct v3dv_job {
struct {
struct v3dv_bo *shared_memory;
uint32_t wg_count[3];
+ uint32_t wg_base[3];
struct drm_v3d_submit_csd submit;
} csd;
};
@@ -998,20 +1039,47 @@ void v3dv_job_init(struct v3dv_job *job,
struct v3dv_cmd_buffer *cmd_buffer,
int32_t subpass_idx);
void v3dv_job_destroy(struct v3dv_job *job);
+
void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
-void v3dv_job_emit_binning_flush(struct v3dv_job *job);
+void v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo);
+
void v3dv_job_start_frame(struct v3dv_job *job,
uint32_t width,
uint32_t height,
uint32_t layers,
+ bool allocate_tile_state_for_all_layers,
uint32_t render_target_count,
uint8_t max_internal_bpp,
bool msaa);
+
+struct v3dv_job *
+v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
+ struct v3dv_cmd_buffer *cmd_buffer);
+
struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device,
enum v3dv_job_type type,
struct v3dv_cmd_buffer *cmd_buffer,
uint32_t subpass_idx);
+void
+v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
+ uint32_t slot_size,
+ uint32_t used_count,
+ uint32_t *alloc_count,
+ void **ptr);
+
+void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer);
+
+/* FIXME: only used on v3dv_cmd_buffer and v3dvx_cmd_buffer, perhaps move to a
+ * cmd_buffer specific header?
+ */
+struct v3dv_draw_info {
+ uint32_t vertex_count;
+ uint32_t instance_count;
+ uint32_t first_vertex;
+ uint32_t first_instance;
+};
+
struct v3dv_vertex_binding {
struct v3dv_buffer *buffer;
VkDeviceSize offset;
@@ -1043,7 +1111,10 @@ struct v3dv_cmd_buffer_state {
struct v3dv_cmd_pipeline_state compute;
struct v3dv_dynamic_state dynamic;
+
uint32_t dirty;
+ VkShaderStageFlagBits dirty_descriptor_stages;
+ VkShaderStageFlagBits dirty_push_constants_stages;
/* Current clip window. We use this to check whether we have an active
* scissor, since in that case we can't use TLB clears and need to fallback
@@ -1075,9 +1146,14 @@ struct v3dv_cmd_buffer_state {
struct {
struct v3dv_cl_reloc vs_bin;
struct v3dv_cl_reloc vs;
+ struct v3dv_cl_reloc gs_bin;
+ struct v3dv_cl_reloc gs;
struct v3dv_cl_reloc fs;
} uniforms;
+ /* Current view index for multiview rendering */
+ uint32_t view_index;
+
/* Used to flag OOM conditions during command buffer recording */
bool oom;
@@ -1126,10 +1202,13 @@ struct v3dv_cmd_buffer_state {
struct v3dv_end_query_cpu_job_info *states;
} end;
- /* This is not NULL if we have an active query, that is, we have called
- * vkCmdBeginQuery but not vkCmdEndQuery.
+ /* This BO is not NULL if we have an active query, that is, we have
+ * called vkCmdBeginQuery but not vkCmdEndQuery.
*/
- struct v3dv_bo *active_query;
+ struct {
+ struct v3dv_bo *bo;
+ uint32_t offset;
+ } active_query;
} query;
};
@@ -1160,44 +1239,24 @@ struct v3dv_descriptor {
};
};
-/* The following v3dv_xxx_descriptor structs represent descriptor info that we
- * upload to a bo, specifically a subregion of the descriptor pool bo.
- *
- * The general rule that we apply right now to decide which info goes to such
- * bo is that we upload those that are referenced by an address when emitting
- * a packet, so needed to be uploaded to an bo in any case.
- *
- * Note that these structs are mostly helpers that improve the semantics when
- * doing all that, but we could do as other mesa vulkan drivers and just
- * upload the info we know it is expected based on the context.
- *
- * Also note that the sizes are aligned, as there is an alignment requirement
- * for addresses.
- */
-struct v3dv_sampled_image_descriptor {
- uint8_t texture_state[cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32)];
-};
-
-struct v3dv_sampler_descriptor {
- uint8_t sampler_state[cl_aligned_packet_length(SAMPLER_STATE, 32)];
-};
-
-struct v3dv_combined_image_sampler_descriptor {
- uint8_t texture_state[cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32)];
- uint8_t sampler_state[cl_aligned_packet_length(SAMPLER_STATE, 32)];
-};
-
struct v3dv_query {
bool maybe_available;
union {
- struct v3dv_bo *bo; /* Used by GPU queries (occlusion) */
- uint64_t value; /* Used by CPU queries (timestamp) */
+ /* Used by GPU queries (occlusion) */
+ struct {
+ struct v3dv_bo *bo;
+ uint32_t offset;
+ };
+ /* Used by CPU queries (timestamp) */
+ uint64_t value;
};
};
struct v3dv_query_pool {
struct vk_object_base base;
+ struct v3dv_bo *bo; /* Only used with GPU queries (occlusion) */
+
VkQueryType query_type;
uint32_t query_count;
struct v3dv_query *queries;
@@ -1221,7 +1280,7 @@ struct v3dv_cmd_buffer_private_obj {
};
struct v3dv_cmd_buffer {
- struct vk_object_base base;
+ struct vk_command_buffer vk;
struct v3dv_device *device;
@@ -1293,12 +1352,6 @@ void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,
uint32_t dirty_dynamic_state,
bool needs_subpass_resume);
-void v3dv_render_pass_setup_render_target(struct v3dv_cmd_buffer *cmd_buffer,
- int rt,
- uint32_t *rt_bpp,
- uint32_t *rt_type,
- uint32_t *rt_clamp);
-
void v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_query_pool *pool,
uint32_t first,
@@ -1338,8 +1391,8 @@ struct v3dv_semaphore {
/* A syncobject handle associated with this semaphore */
uint32_t sync;
- /* The file handle of a fence that we imported into our syncobject */
- int32_t fd;
+ /* A temporary syncobject handle produced from a vkImportSemaphoreFd. */
+ uint32_t temp_sync;
};
struct v3dv_fence {
@@ -1348,8 +1401,8 @@ struct v3dv_fence {
/* A syncobject handle associated with this fence */
uint32_t sync;
- /* The file handle of a fence that we imported into our syncobject */
- int32_t fd;
+ /* A temporary syncobject handle produced from a vkImportFenceFd. */
+ uint32_t temp_sync;
};
struct v3dv_event {
@@ -1358,11 +1411,12 @@ struct v3dv_event {
};
struct v3dv_shader_variant {
- broadcom_shader_stage stage;
+ enum broadcom_shader_stage stage;
union {
struct v3d_prog_data *base;
struct v3d_vs_prog_data *vs;
+ struct v3d_gs_prog_data *gs;
struct v3d_fs_prog_data *fs;
struct v3d_compute_prog_data *cs;
} prog_data;
@@ -1397,7 +1451,7 @@ struct v3dv_shader_variant {
struct v3dv_pipeline_stage {
struct v3dv_pipeline *pipeline;
- broadcom_shader_stage stage;
+ enum broadcom_shader_stage stage;
const struct vk_shader_module *module;
const char *entrypoint;
@@ -1410,20 +1464,8 @@ struct v3dv_pipeline_stage {
/** A name for this program, so you can track it in shader-db output. */
uint32_t program_id;
-};
-/* FIXME: although the full vpm_config is not required at this point, as we
- * don't plan to initially support GS, it is more readable and serves as a
- * placeholder, to have the struct and fill it with default values.
- */
-struct vpm_config {
- uint32_t As;
- uint32_t Vc;
- uint32_t Gs;
- uint32_t Gd;
- uint32_t Gv;
- uint32_t Ve;
- uint32_t gs_width;
+ VkPipelineCreationFeedbackEXT feedback;
};
/* We are using the descriptor pool entry for two things:
@@ -1590,9 +1632,48 @@ struct v3dv_sampler {
* configuration. If needed it will be copied to the descriptor info during
* UpdateDescriptorSets
*/
- uint8_t sampler_state[cl_packet_length(SAMPLER_STATE)];
+ uint8_t sampler_state[V3DV_SAMPLER_STATE_LENGTH];
+};
+
+struct v3dv_descriptor_template_entry {
+ /* The type of descriptor in this entry */
+ VkDescriptorType type;
+
+ /* Binding in the descriptor set */
+ uint32_t binding;
+
+ /* Offset at which to write into the descriptor set binding */
+ uint32_t array_element;
+
+ /* Number of elements to write into the descriptor set binding */
+ uint32_t array_count;
+
+ /* Offset into the user provided data */
+ size_t offset;
+
+ /* Stride between elements into the user provided data */
+ size_t stride;
};
+struct v3dv_descriptor_update_template {
+ struct vk_object_base base;
+
+ VkPipelineBindPoint bind_point;
+
+ /* The descriptor set this template corresponds to. This value is only
+ * valid if the template was created with the templateType
+ * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
+ */
+ uint8_t set;
+
+ /* Number of entries in this template */
+ uint32_t entry_count;
+
+ /* Entries of the template */
+ struct v3dv_descriptor_template_entry entries[0];
+};
+
+
/* We keep two special values for the sampler idx that represents exactly when a
* sampler is not needed/provided. The main use is that even if we don't have
* sampler, we still need to do the output unpacking (through
@@ -1633,6 +1714,13 @@ v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,
*sampler_index = sampler;
}
+struct v3dv_descriptor_maps {
+ struct v3dv_descriptor_map ubo_map;
+ struct v3dv_descriptor_map ssbo_map;
+ struct v3dv_descriptor_map sampler_map;
+ struct v3dv_descriptor_map texture_map;
+};
+
/* The structure represents data shared between different objects, like the
* pipeline and the pipeline cache, so we ref count it to know when it should
* be freed.
@@ -1642,11 +1730,7 @@ struct v3dv_pipeline_shared_data {
unsigned char sha1_key[20];
- struct v3dv_descriptor_map ubo_map;
- struct v3dv_descriptor_map ssbo_map;
- struct v3dv_descriptor_map sampler_map;
- struct v3dv_descriptor_map texture_map;
-
+ struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES];
struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES];
struct v3dv_bo *assembly_bo;
@@ -1662,14 +1746,20 @@ struct v3dv_pipeline {
struct v3dv_render_pass *pass;
struct v3dv_subpass *subpass;
- /* Note: We can't use just a MESA_SHADER_STAGES array as we need to track
- * too the coordinate shader
+ /* Note: We can't use just a MESA_SHADER_STAGES array because we also need
+ * to track binning shaders. Note these will be freed once the pipeline
+ * has been compiled.
*/
struct v3dv_pipeline_stage *vs;
struct v3dv_pipeline_stage *vs_bin;
+ struct v3dv_pipeline_stage *gs;
+ struct v3dv_pipeline_stage *gs_bin;
struct v3dv_pipeline_stage *fs;
struct v3dv_pipeline_stage *cs;
+ /* Flags for whether optional pipeline stages are present, for convenience */
+ bool has_gs;
+
/* Spilling memory requirements */
struct {
struct v3dv_bo *bo;
@@ -1736,7 +1826,7 @@ struct v3dv_pipeline {
/* Per-RT bit mask with blend enables */
uint8_t enables;
/* Per-RT prepacked blend config packets */
- uint8_t cfg[V3D_MAX_DRAW_BUFFERS][cl_packet_length(BLEND_CFG)];
+ uint8_t cfg[V3D_MAX_DRAW_BUFFERS][V3DV_BLEND_CFG_LENGTH];
/* Flag indicating whether the blend factors in use require
* color constants.
*/
@@ -1753,12 +1843,12 @@ struct v3dv_pipeline {
/* Packets prepacked during pipeline creation
*/
- uint8_t cfg_bits[cl_packet_length(CFG_BITS)];
- uint8_t shader_state_record[cl_packet_length(GL_SHADER_STATE_RECORD)];
- uint8_t vcm_cache_size[cl_packet_length(VCM_CACHE_SIZE)];
- uint8_t vertex_attrs[cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD) *
+ uint8_t cfg_bits[V3DV_CFG_BITS_LENGTH];
+ uint8_t shader_state_record[V3DV_GL_SHADER_STATE_RECORD_LENGTH];
+ uint8_t vcm_cache_size[V3DV_VCM_CACHE_SIZE_LENGTH];
+ uint8_t vertex_attrs[V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH *
MAX_VERTEX_ATTRIBS];
- uint8_t stencil_cfg[2][cl_packet_length(STENCIL_CFG)];
+ uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH];
};
static inline VkPipelineBindPoint
@@ -1782,82 +1872,9 @@ v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer *cmd_buffer,
const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(void);
-static inline uint32_t
-v3dv_zs_buffer_from_aspect_bits(VkImageAspectFlags aspects)
-{
- const VkImageAspectFlags zs_aspects =
- VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
- const VkImageAspectFlags filtered_aspects = aspects & zs_aspects;
-
- if (filtered_aspects == zs_aspects)
- return ZSTENCIL;
- else if (filtered_aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
- return Z;
- else if (filtered_aspects == VK_IMAGE_ASPECT_STENCIL_BIT)
- return STENCIL;
- else
- return NONE;
-}
-
-static inline uint32_t
-v3dv_zs_buffer_from_vk_format(VkFormat format)
-{
- switch (format) {
- case VK_FORMAT_D16_UNORM_S8_UINT:
- case VK_FORMAT_D24_UNORM_S8_UINT:
- case VK_FORMAT_D32_SFLOAT_S8_UINT:
- return ZSTENCIL;
- case VK_FORMAT_D16_UNORM:
- case VK_FORMAT_D32_SFLOAT:
- case VK_FORMAT_X8_D24_UNORM_PACK32:
- return Z;
- case VK_FORMAT_S8_UINT:
- return STENCIL;
- default:
- return NONE;
- }
-}
-
-static inline uint32_t
-v3dv_zs_buffer(bool depth, bool stencil)
-{
- if (depth && stencil)
- return ZSTENCIL;
- else if (depth)
- return Z;
- else if (stencil)
- return STENCIL;
- return NONE;
-}
-
-static inline uint8_t
-v3dv_get_internal_depth_type(VkFormat format)
-{
- switch (format) {
- case VK_FORMAT_D16_UNORM:
- return V3D_INTERNAL_TYPE_DEPTH_16;
- case VK_FORMAT_D32_SFLOAT:
- return V3D_INTERNAL_TYPE_DEPTH_32F;
- case VK_FORMAT_X8_D24_UNORM_PACK32:
- case VK_FORMAT_D24_UNORM_S8_UINT:
- return V3D_INTERNAL_TYPE_DEPTH_24;
- default:
- unreachable("Invalid depth format");
- break;
- }
-}
-
-uint32_t v3dv_physical_device_api_version(struct v3dv_physical_device *dev);
uint32_t v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev);
uint32_t v3dv_physical_device_device_id(struct v3dv_physical_device *dev);
-VkResult __vk_errorf(struct v3dv_instance *instance, VkResult error,
- const char *file, int line,
- const char *format, ...);
-
-#define vk_error(instance, error) __vk_errorf(instance, error, __FILE__, __LINE__, NULL);
-#define vk_errorf(instance, error, format, ...) __vk_errorf(instance, error, __FILE__, __LINE__, format, ## __VA_ARGS__);
-
#ifdef DEBUG
#define v3dv_debug_ignored_stype(sType) \
fprintf(stderr, "%s: ignored VkStructureType %u:%s\n\n", __func__, (sType), vk_StructureType_to_str(sType))
@@ -1865,33 +1882,14 @@ VkResult __vk_errorf(struct v3dv_instance *instance, VkResult error,
#define v3dv_debug_ignored_stype(sType)
#endif
-const struct v3dv_format *v3dv_get_format(VkFormat);
-const uint8_t *v3dv_get_format_swizzle(VkFormat f);
-void v3dv_get_internal_type_bpp_for_output_format(uint32_t format, uint32_t *type, uint32_t *bpp);
+const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f);
uint8_t v3dv_get_tex_return_size(const struct v3dv_format *vf, bool compare_enable);
-bool v3dv_tfu_supports_tex_format(const struct v3d_device_info *devinfo,
- uint32_t tex_format);
const struct v3dv_format *
-v3dv_get_compatible_tfu_format(const struct v3d_device_info *devinfo,
+v3dv_get_compatible_tfu_format(struct v3dv_device *device,
uint32_t bpp, VkFormat *out_vk_format);
-bool v3dv_buffer_format_supports_features(VkFormat vk_format,
+bool v3dv_buffer_format_supports_features(struct v3dv_device *device,
+ VkFormat vk_format,
VkFormatFeatureFlags features);
-bool v3dv_format_supports_tlb_resolve(const struct v3dv_format *format);
-
-uint32_t v3d_utile_width(int cpp);
-uint32_t v3d_utile_height(int cpp);
-
-void v3d_load_tiled_image(void *dst, uint32_t dst_stride,
- void *src, uint32_t src_stride,
- enum v3d_tiling_mode tiling_format,
- int cpp, uint32_t image_h,
- const struct pipe_box *box);
-
-void v3d_store_tiled_image(void *dst, uint32_t dst_stride,
- void *src, uint32_t src_stride,
- enum v3d_tiling_mode tiling_format,
- int cpp, uint32_t image_h,
- const struct pipe_box *box);
struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_pipeline *pipeline,
@@ -1912,7 +1910,7 @@ v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
struct v3dv_shader_variant *
v3dv_shader_variant_create(struct v3dv_device *device,
- broadcom_shader_stage stage,
+ enum broadcom_shader_stage stage,
struct v3d_prog_data *prog_data,
uint32_t prog_data_size,
uint32_t assembly_offset,
@@ -1958,13 +1956,15 @@ v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
uint32_t index);
struct v3dv_cl_reloc
-v3dv_descriptor_map_get_sampler_state(struct v3dv_descriptor_state *descriptor_state,
+v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device,
+ struct v3dv_descriptor_state *descriptor_state,
struct v3dv_descriptor_map *map,
struct v3dv_pipeline_layout *pipeline_layout,
uint32_t index);
struct v3dv_cl_reloc
-v3dv_descriptor_map_get_texture_shader_state(struct v3dv_descriptor_state *descriptor_state,
+v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device,
+ struct v3dv_descriptor_state *descriptor_state,
struct v3dv_descriptor_map *map,
struct v3dv_pipeline_layout *pipeline_layout,
uint32_t index);
@@ -1992,6 +1992,7 @@ v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
struct v3dv_device *device,
+ VkPipelineCacheCreateFlags,
bool cache_enabled);
void v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache);
@@ -2008,7 +2009,8 @@ nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
struct v3dv_pipeline_shared_data *
v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
- unsigned char sha1_key[20]);
+ unsigned char sha1_key[20],
+ bool *cache_hit);
void
v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
@@ -2022,73 +2024,58 @@ void v3dv_shader_module_internal_init(struct v3dv_device *device,
struct vk_shader_module *module,
nir_shader *nir);
-#define V3DV_DEFINE_HANDLE_CASTS(__v3dv_type, __VkType) \
- \
- static inline struct __v3dv_type * \
- __v3dv_type ## _from_handle(__VkType _handle) \
- { \
- return (struct __v3dv_type *) _handle; \
- } \
- \
- static inline __VkType \
- __v3dv_type ## _to_handle(struct __v3dv_type *_obj) \
- { \
- return (__VkType) _obj; \
- }
-
-#define V3DV_DEFINE_NONDISP_HANDLE_CASTS(__v3dv_type, __VkType) \
- \
- static inline struct __v3dv_type * \
- __v3dv_type ## _from_handle(__VkType _handle) \
- { \
- return (struct __v3dv_type *)(uintptr_t) _handle; \
- } \
- \
- static inline __VkType \
- __v3dv_type ## _to_handle(struct __v3dv_type *_obj) \
- { \
- return (__VkType)(uintptr_t) _obj; \
- }
-
#define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle) \
- struct __v3dv_type *__name = __v3dv_type ## _from_handle(__handle)
-
-V3DV_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, VkCommandBuffer)
-V3DV_DEFINE_HANDLE_CASTS(v3dv_device, VkDevice)
-V3DV_DEFINE_HANDLE_CASTS(v3dv_instance, VkInstance)
-V3DV_DEFINE_HANDLE_CASTS(v3dv_physical_device, VkPhysicalDevice)
-V3DV_DEFINE_HANDLE_CASTS(v3dv_queue, VkQueue)
-
-V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_cmd_pool, VkCommandPool)
-V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, VkBuffer)
-V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, VkBufferView)
-V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, VkDeviceMemory)
-V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, VkDescriptorPool)
-V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, VkDescriptorSet)
-V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, VkDescriptorSetLayout)
-V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, VkEvent)
-V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_fence, VkFence)
-V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, VkFramebuffer)
-V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, VkImage)
-V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, VkImageView)
-V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, VkPipeline)
-V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, VkPipelineCache)
-V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, VkPipelineLayout)
-V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, VkQueryPool)
-V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, VkRenderPass)
-V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, VkSampler)
-V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_semaphore, VkSemaphore)
-
-/* This is defined as a macro so that it works for both
- * VkImageSubresourceRange and VkImageSubresourceLayers
- */
-#define v3dv_layer_count(_image, _range) \
- ((_range)->layerCount == VK_REMAINING_ARRAY_LAYERS ? \
- (_image)->array_size - (_range)->baseArrayLayer : (_range)->layerCount)
-
-#define v3dv_level_count(_image, _range) \
- ((_range)->levelCount == VK_REMAINING_MIP_LEVELS ? \
- (_image)->levels - (_range)->baseMipLevel : (_range)->levelCount)
+ VK_FROM_HANDLE(__v3dv_type, __name, __handle)
+
+VK_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, vk.base, VkCommandBuffer,
+ VK_OBJECT_TYPE_COMMAND_BUFFER)
+VK_DEFINE_HANDLE_CASTS(v3dv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
+VK_DEFINE_HANDLE_CASTS(v3dv_instance, vk.base, VkInstance,
+ VK_OBJECT_TYPE_INSTANCE)
+VK_DEFINE_HANDLE_CASTS(v3dv_physical_device, vk.base, VkPhysicalDevice,
+ VK_OBJECT_TYPE_PHYSICAL_DEVICE)
+VK_DEFINE_HANDLE_CASTS(v3dv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
+
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_cmd_pool, base, VkCommandPool,
+ VK_OBJECT_TYPE_COMMAND_POOL)
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, base, VkBuffer,
+ VK_OBJECT_TYPE_BUFFER)
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, base, VkBufferView,
+ VK_OBJECT_TYPE_BUFFER_VIEW)
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, base, VkDeviceMemory,
+ VK_OBJECT_TYPE_DEVICE_MEMORY)
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, base, VkDescriptorPool,
+ VK_OBJECT_TYPE_DESCRIPTOR_POOL)
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, base, VkDescriptorSet,
+ VK_OBJECT_TYPE_DESCRIPTOR_SET)
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, base,
+ VkDescriptorSetLayout,
+ VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_update_template, base,
+ VkDescriptorUpdateTemplate,
+ VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_fence, base, VkFence, VK_OBJECT_TYPE_FENCE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, base, VkFramebuffer,
+ VK_OBJECT_TYPE_FRAMEBUFFER)
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, vk.base, VkImage,
+ VK_OBJECT_TYPE_IMAGE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, vk.base, VkImageView,
+ VK_OBJECT_TYPE_IMAGE_VIEW)
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, base, VkPipeline,
+ VK_OBJECT_TYPE_PIPELINE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, base, VkPipelineCache,
+ VK_OBJECT_TYPE_PIPELINE_CACHE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, base, VkPipelineLayout,
+ VK_OBJECT_TYPE_PIPELINE_LAYOUT)
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, base, VkQueryPool,
+ VK_OBJECT_TYPE_QUERY_POOL)
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, base, VkRenderPass,
+ VK_OBJECT_TYPE_RENDER_PASS)
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, base, VkSampler,
+ VK_OBJECT_TYPE_SAMPLER)
+VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_semaphore, base, VkSemaphore,
+ VK_OBJECT_TYPE_SEMAPHORE)
static inline int
v3dv_ioctl(int fd, unsigned long request, void *arg)
@@ -2136,4 +2123,31 @@ u64_compare(const void *key1, const void *key2)
return memcmp(key1, key2, sizeof(uint64_t)) == 0;
}
+/* Helper to call hw ver speficic functions */
+#define v3dv_X(device, thing) ({ \
+ __typeof(&v3d42_##thing) v3d_X_thing; \
+ switch (device->devinfo.ver) { \
+ case 42: \
+ v3d_X_thing = &v3d42_##thing; \
+ break; \
+ default: \
+ unreachable("Unsupported hardware generation"); \
+ } \
+ v3d_X_thing; \
+})
+
+
+/* v3d_macros from common requires v3dX and V3DX definitions. Below we need to
+ * define v3dX for each version supported, because when we compile code that
+ * is not version-specific, all version-specific macros need to be already
+ * defined.
+ */
+#ifdef v3dX
+# include "v3dvx_private.h"
+#else
+# define v3dX(x) v3d42_##x
+# include "v3dvx_private.h"
+# undef v3dX
+#endif
+
#endif /* V3DV_PRIVATE_H */
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_query.c b/lib/mesa/src/broadcom/vulkan/v3dv_query.c
index d3100498c..5e4b92fb1 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_query.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_query.c
@@ -23,7 +23,7 @@
#include "v3dv_private.h"
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateQueryPool(VkDevice _device,
const VkQueryPoolCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
@@ -35,14 +35,11 @@ v3dv_CreateQueryPool(VkDevice _device,
pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP);
assert(pCreateInfo->queryCount > 0);
- /* FIXME: the hw allows us to allocate up to 16 queries in a single block
- * for occlussion queries so we should try to use that.
- */
struct v3dv_query_pool *pool =
vk_object_zalloc(&device->vk, pAllocator, sizeof(*pool),
VK_OBJECT_TYPE_QUERY_POOL);
if (pool == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
pool->query_type = pCreateInfo->queryType;
pool->query_count = pCreateInfo->queryCount;
@@ -53,26 +50,39 @@ v3dv_CreateQueryPool(VkDevice _device,
pool->queries = vk_alloc2(&device->vk.alloc, pAllocator, pool_bytes, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pool->queries == NULL) {
- result = vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- goto fail_alloc_bo_list;
+ result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ goto fail;
+ }
+
+ if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
+ /* The hardware allows us to setup groups of 16 queries in consecutive
+ * 4-byte addresses, requiring only that each group of 16 queries is
+ * aligned to a 1024 byte boundary.
+ */
+ const uint32_t query_groups = DIV_ROUND_UP(pool->query_count, 16);
+ const uint32_t bo_size = query_groups * 1024;
+ pool->bo = v3dv_bo_alloc(device, bo_size, "query", true);
+ if (!pool->bo) {
+ result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ goto fail;
+ }
+ if (!v3dv_bo_map(device, pool->bo, bo_size)) {
+ result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ goto fail;
+ }
}
uint32_t i;
for (i = 0; i < pool->query_count; i++) {
pool->queries[i].maybe_available = false;
switch (pool->query_type) {
- case VK_QUERY_TYPE_OCCLUSION:
- pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query", true);
- if (!pool->queries[i].bo) {
- result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
- goto fail_alloc_bo;
- }
- /* For occlusion queries we only need a 4-byte counter */
- if (!v3dv_bo_map(device, pool->queries[i].bo, 4)) {
- result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
- goto fail_alloc_bo;
- }
+ case VK_QUERY_TYPE_OCCLUSION: {
+ const uint32_t query_group = i / 16;
+ const uint32_t query_offset = query_group * 1024 + (i % 16) * 4;
+ pool->queries[i].bo = pool->bo;
+ pool->queries[i].offset = query_offset;
break;
+ }
case VK_QUERY_TYPE_TIMESTAMP:
pool->queries[i].value = 0;
break;
@@ -85,18 +95,17 @@ v3dv_CreateQueryPool(VkDevice _device,
return VK_SUCCESS;
-fail_alloc_bo:
- for (uint32_t j = 0; j < i; j++)
- v3dv_bo_free(device, pool->queries[j].bo);
- vk_free2(&device->vk.alloc, pAllocator, pool->queries);
-
-fail_alloc_bo_list:
+fail:
+ if (pool->bo)
+ v3dv_bo_free(device, pool->bo);
+ if (pool->queries)
+ vk_free2(&device->vk.alloc, pAllocator, pool->queries);
vk_object_free(&device->vk, pAllocator, pool);
return result;
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyQueryPool(VkDevice _device,
VkQueryPool queryPool,
const VkAllocationCallbacks *pAllocator)
@@ -107,12 +116,12 @@ v3dv_DestroyQueryPool(VkDevice _device,
if (!pool)
return;
- if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
- for (uint32_t i = 0; i < pool->query_count; i++)
- v3dv_bo_free(device, pool->queries[i].bo);
- }
+ if (pool->bo)
+ v3dv_bo_free(device, pool->bo);
+
+ if (pool->queries)
+ vk_free2(&device->vk.alloc, pAllocator, pool->queries);
- vk_free2(&device->vk.alloc, pAllocator, pool->queries);
vk_object_free(&device->vk, pAllocator, pool);
}
@@ -128,12 +137,13 @@ write_query_result(void *dst, uint32_t idx, bool do_64bit, uint64_t value)
}
}
-static uint64_t
+static VkResult
get_occlusion_query_result(struct v3dv_device *device,
struct v3dv_query_pool *pool,
uint32_t query,
bool do_wait,
- bool *available)
+ bool *available,
+ uint64_t *value)
{
assert(pool && pool->query_type == VK_QUERY_TYPE_OCCLUSION);
@@ -149,25 +159,28 @@ get_occlusion_query_result(struct v3dv_device *device,
* error may occur."
*/
if (!q->maybe_available)
- return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
+ return vk_error(device, VK_ERROR_DEVICE_LOST);
if (!v3dv_bo_wait(device, q->bo, 0xffffffffffffffffull))
- return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
+ return vk_error(device, VK_ERROR_DEVICE_LOST);
*available = true;
} else {
*available = q->maybe_available && v3dv_bo_wait(device, q->bo, 0);
}
- return (uint64_t) *((uint32_t *) q->bo->map);
+ const uint8_t *query_addr = ((uint8_t *) q->bo->map) + q->offset;
+ *value = (uint64_t) *((uint32_t *)query_addr);
+ return VK_SUCCESS;
}
-static uint64_t
+static VkResult
get_timestamp_query_result(struct v3dv_device *device,
struct v3dv_query_pool *pool,
uint32_t query,
bool do_wait,
- bool *available)
+ bool *available,
+ uint64_t *value)
{
assert(pool && pool->query_type == VK_QUERY_TYPE_TIMESTAMP);
@@ -182,28 +195,32 @@ get_timestamp_query_result(struct v3dv_device *device,
* error may occur."
*/
if (!q->maybe_available)
- return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
+ return vk_error(device, VK_ERROR_DEVICE_LOST);
*available = true;
} else {
*available = q->maybe_available;
}
- return q->value;
+ *value = q->value;
+ return VK_SUCCESS;
}
-static uint64_t
+static VkResult
get_query_result(struct v3dv_device *device,
struct v3dv_query_pool *pool,
uint32_t query,
bool do_wait,
- bool *available)
+ bool *available,
+ uint64_t *value)
{
switch (pool->query_type) {
case VK_QUERY_TYPE_OCCLUSION:
- return get_occlusion_query_result(device, pool, query, do_wait, available);
+ return get_occlusion_query_result(device, pool, query, do_wait,
+ available, value);
case VK_QUERY_TYPE_TIMESTAMP:
- return get_timestamp_query_result(device, pool, query, do_wait, available);
+ return get_timestamp_query_result(device, pool, query, do_wait,
+ available, value);
default:
unreachable("Unsupported query type");
}
@@ -229,7 +246,11 @@ v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
VkResult result = VK_SUCCESS;
for (uint32_t i = first; i < first + count; i++) {
bool available = false;
- uint64_t value = get_query_result(device, pool, i, do_wait, &available);
+ uint64_t value = 0;
+ VkResult query_result =
+ get_query_result(device, pool, i, do_wait, &available, &value);
+ if (query_result == VK_ERROR_DEVICE_LOST)
+ result = VK_ERROR_DEVICE_LOST;
/**
* From the Vulkan 1.0 spec:
@@ -251,7 +272,7 @@ v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
write_query_result(data, slot++, do_64bit, available ? 1u : 0u);
- if (!write_result)
+ if (!write_result && result != VK_ERROR_DEVICE_LOST)
result = VK_NOT_READY;
data += stride;
@@ -260,7 +281,7 @@ v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
return result;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetQueryPoolResults(VkDevice _device,
VkQueryPool queryPool,
uint32_t firstQuery,
@@ -277,7 +298,7 @@ v3dv_GetQueryPoolResults(VkDevice _device,
pData, stride, flags);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdResetQueryPool(VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
uint32_t firstQuery,
@@ -289,7 +310,7 @@ v3dv_CmdResetQueryPool(VkCommandBuffer commandBuffer,
v3dv_cmd_buffer_reset_queries(cmd_buffer, pool, firstQuery, queryCount);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
uint32_t firstQuery,
@@ -308,7 +329,7 @@ v3dv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
dst, dstOffset, stride, flags);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdBeginQuery(VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
uint32_t query,
@@ -320,7 +341,7 @@ v3dv_CmdBeginQuery(VkCommandBuffer commandBuffer,
v3dv_cmd_buffer_begin_query(cmd_buffer, pool, query, flags);
}
-void
+VKAPI_ATTR void VKAPI_CALL
v3dv_CmdEndQuery(VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
uint32_t query)
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_queue.c b/lib/mesa/src/broadcom/vulkan/v3dv_queue.c
index 6ea6d1acf..1209031d5 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_queue.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_queue.c
@@ -34,19 +34,28 @@ v3dv_clif_dump(struct v3dv_device *device,
struct v3dv_job *job,
struct drm_v3d_submit_cl *submit)
{
- if (!(V3D_DEBUG & (V3D_DEBUG_CL | V3D_DEBUG_CLIF)))
+ if (!(unlikely(V3D_DEBUG & (V3D_DEBUG_CL |
+ V3D_DEBUG_CL_NO_BIN |
+ V3D_DEBUG_CLIF))))
return;
struct clif_dump *clif = clif_dump_init(&device->devinfo,
stderr,
- V3D_DEBUG & V3D_DEBUG_CL);
+ V3D_DEBUG & (V3D_DEBUG_CL |
+ V3D_DEBUG_CL_NO_BIN),
+ V3D_DEBUG & V3D_DEBUG_CL_NO_BIN);
set_foreach(job->bos, entry) {
struct v3dv_bo *bo = (void *)entry->key;
char *name = ralloc_asprintf(NULL, "%s_0x%x",
bo->name, bo->offset);
- v3dv_bo_map(device, bo, bo->size);
+ bool ok = v3dv_bo_map(device, bo, bo->size);
+ if (!ok) {
+ fprintf(stderr, "failed to map BO for clif_dump.\n");
+ ralloc_free(name);
+ goto free_clif;
+ }
clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);
ralloc_free(name);
@@ -54,6 +63,7 @@ v3dv_clif_dump(struct v3dv_device *device,
clif_dump(clif, submit);
+ free_clif:
clif_dump_destroy(clif);
}
@@ -136,7 +146,7 @@ gpu_queue_wait_idle(struct v3dv_queue *queue)
return VK_SUCCESS;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_QueueWaitIdle(VkQueue _queue)
{
V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
@@ -163,19 +173,22 @@ handle_reset_query_cpu_job(struct v3dv_job *job)
* FIXME: we could avoid blocking the main thread for this if we use
* submission thread.
*/
+ if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION)
+ v3dv_bo_wait(job->device, info->pool->bo, PIPE_TIMEOUT_INFINITE);
+
for (uint32_t i = info->first; i < info->first + info->count; i++) {
assert(i < info->pool->query_count);
- struct v3dv_query *query = &info->pool->queries[i];
- query->maybe_available = false;
+ struct v3dv_query *q = &info->pool->queries[i];
+ q->maybe_available = false;
switch (info->pool->query_type) {
case VK_QUERY_TYPE_OCCLUSION: {
- v3dv_bo_wait(job->device, query->bo, PIPE_TIMEOUT_INFINITE);
- uint32_t *counter = (uint32_t *) query->bo->map;
+ const uint8_t *q_addr = ((uint8_t *) q->bo->map) + q->offset;
+ uint32_t *counter = (uint32_t *) q_addr;
*counter = 0;
break;
}
case VK_QUERY_TYPE_TIMESTAMP:
- query->value = 0;
+ q->value = 0;
break;
default:
unreachable("Unsupported query type");
@@ -189,9 +202,11 @@ static VkResult
handle_end_query_cpu_job(struct v3dv_job *job)
{
struct v3dv_end_query_cpu_job_info *info = &job->cpu.query_end;
- assert(info->query < info->pool->query_count);
- struct v3dv_query *query = &info->pool->queries[info->query];
- query->maybe_available = true;
+ for (uint32_t i = 0; i < info->count; i++) {
+ assert(info->query + i < info->pool->query_count);
+ struct v3dv_query *query = &info->pool->queries[info->query + i];
+ query->maybe_available = true;
+ }
return VK_SUCCESS;
}
@@ -208,17 +223,19 @@ handle_copy_query_results_cpu_job(struct v3dv_job *job)
/* Map the entire dst buffer for the CPU copy if needed */
assert(!bo->map || bo->map_size == bo->size);
if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size))
- return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY);
/* FIXME: if flags includes VK_QUERY_RESULT_WAIT_BIT this could trigger a
* sync wait on the CPU for the corresponding GPU jobs to finish. We might
* want to use a submission thread to avoid blocking on the main thread.
*/
+ uint8_t *offset = ((uint8_t *) bo->map) +
+ info->offset + info->dst->mem_offset;
v3dv_get_query_pool_results_cpu(job->device,
info->pool,
info->first,
info->count,
- bo->map + info->dst->mem_offset,
+ offset,
info->stride,
info->flags);
@@ -343,7 +360,7 @@ spawn_event_wait_thread(struct v3dv_job *job, pthread_t *wait_thread)
assert(wait_thread != NULL);
if (pthread_create(wait_thread, NULL, event_wait_thread_func, job))
- return vk_error(job->device->instance, VK_ERROR_DEVICE_LOST);
+ return vk_error(job->device, VK_ERROR_DEVICE_LOST);
return VK_NOT_READY;
}
@@ -396,13 +413,13 @@ handle_copy_buffer_to_image_cpu_job(struct v3dv_job *job)
struct v3dv_bo *dst_bo = info->image->mem->bo;
assert(!dst_bo->map || dst_bo->map_size == dst_bo->size);
if (!dst_bo->map && !v3dv_bo_map(job->device, dst_bo, dst_bo->size))
- return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY);
void *dst_ptr = dst_bo->map;
struct v3dv_bo *src_bo = info->buffer->mem->bo;
assert(!src_bo->map || src_bo->map_size == src_bo->size);
if (!src_bo->map && !v3dv_bo_map(job->device, src_bo, src_bo->size))
- return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY);
void *src_ptr = src_bo->map;
const struct v3d_resource_slice *slice =
@@ -441,10 +458,14 @@ handle_timestamp_query_cpu_job(struct v3dv_job *job)
/* Compute timestamp */
struct timespec t;
clock_gettime(CLOCK_MONOTONIC, &t);
- assert(info->query < info->pool->query_count);
- struct v3dv_query *query = &info->pool->queries[info->query];
- query->maybe_available = true;
- query->value = t.tv_sec * 1000000000ull + t.tv_nsec;
+
+ for (uint32_t i = 0; i < info->count; i++) {
+ assert(info->query + i < info->pool->query_count);
+ struct v3dv_query *query = &info->pool->queries[info->query + i];
+ query->maybe_available = true;
+ if (i == 0)
+ query->value = t.tv_sec * 1000000000ull + t.tv_nsec;
+ }
return VK_SUCCESS;
}
@@ -471,7 +492,7 @@ handle_csd_indirect_cpu_job(struct v3dv_queue *queue,
assert(info->buffer && info->buffer->mem && info->buffer->mem->bo);
struct v3dv_bo *bo = info->buffer->mem->bo;
if (!bo->map && !v3dv_bo_map(job->device, bo, bo->size))
- return vk_error(job->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(job->device, VK_ERROR_OUT_OF_HOST_MEMORY);
assert(bo->map);
const uint32_t offset = info->buffer->mem_offset + info->offset;
@@ -503,23 +524,28 @@ process_semaphores_to_signal(struct v3dv_device *device,
drmSyncobjExportSyncFile(render_fd, device->last_job_sync, &fd);
mtx_unlock(&device->mutex);
if (fd == -1)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ VkResult result = VK_SUCCESS;
for (uint32_t i = 0; i < count; i++) {
struct v3dv_semaphore *sem = v3dv_semaphore_from_handle(sems[i]);
- if (sem->fd >= 0)
- close(sem->fd);
- sem->fd = -1;
-
- int ret = drmSyncobjImportSyncFile(render_fd, sem->sync, fd);
- if (ret)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ int ret;
+ if (!sem->temp_sync)
+ ret = drmSyncobjImportSyncFile(render_fd, sem->sync, fd);
+ else
+ ret = drmSyncobjImportSyncFile(render_fd, sem->temp_sync, fd);
- sem->fd = fd;
+ if (ret) {
+ result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ break;
+ }
}
- return VK_SUCCESS;
+ assert(fd >= 0);
+ close(fd);
+
+ return result;
}
static VkResult
@@ -530,10 +556,6 @@ process_fence_to_signal(struct v3dv_device *device, VkFence _fence)
struct v3dv_fence *fence = v3dv_fence_from_handle(_fence);
- if (fence->fd >= 0)
- close(fence->fd);
- fence->fd = -1;
-
int render_fd = device->pdevice->render_fd;
int fd;
@@ -541,15 +563,18 @@ process_fence_to_signal(struct v3dv_device *device, VkFence _fence)
drmSyncobjExportSyncFile(render_fd, device->last_job_sync, &fd);
mtx_unlock(&device->mutex);
if (fd == -1)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
- int ret = drmSyncobjImportSyncFile(render_fd, fence->sync, fd);
- if (ret)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ int ret;
+ if (!fence->temp_sync)
+ ret = drmSyncobjImportSyncFile(render_fd, fence->sync, fd);
+ else
+ ret = drmSyncobjImportSyncFile(render_fd, fence->temp_sync, fd);
- fence->fd = fd;
+ assert(fd >= 0);
+ close(fd);
- return VK_SUCCESS;
+ return ret ? VK_ERROR_OUT_OF_HOST_MEMORY : VK_SUCCESS;
}
static VkResult
@@ -559,7 +584,7 @@ handle_cl_job(struct v3dv_queue *queue,
{
struct v3dv_device *device = queue->device;
- struct drm_v3d_submit_cl submit;
+ struct drm_v3d_submit_cl submit = { 0 };
/* Sanity check: we should only flag a bcl sync on a job that needs to be
* serialized.
@@ -636,7 +661,7 @@ handle_cl_job(struct v3dv_queue *queue,
free(bo_handles);
if (ret)
- return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
+ return vk_error(device, VK_ERROR_DEVICE_LOST);
return VK_SUCCESS;
}
@@ -659,7 +684,7 @@ handle_tfu_job(struct v3dv_queue *queue,
if (ret != 0) {
fprintf(stderr, "Failed to submit TFU job: %d\n", ret);
- return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
+ return vk_error(device, VK_ERROR_DEVICE_LOST);
}
return VK_SUCCESS;
@@ -704,7 +729,7 @@ handle_csd_job(struct v3dv_queue *queue,
free(bo_handles);
if (ret)
- return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
+ return vk_error(device, VK_ERROR_DEVICE_LOST);
return VK_SUCCESS;
}
@@ -745,91 +770,6 @@ queue_submit_job(struct v3dv_queue *queue,
}
}
-static void
-emit_noop_bin(struct v3dv_job *job)
-{
- v3dv_job_start_frame(job, 1, 1, 1, 1, V3D_INTERNAL_BPP_32, false);
- v3dv_job_emit_binning_flush(job);
-}
-
-static void
-emit_noop_render(struct v3dv_job *job)
-{
- struct v3dv_cl *rcl = &job->rcl;
- v3dv_cl_ensure_space_with_branch(rcl, 200 + 1 * 256 *
- cl_packet_length(SUPERTILE_COORDINATES));
-
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
- config.early_z_disable = true;
- config.image_width_pixels = 1;
- config.image_height_pixels = 1;
- config.number_of_render_targets = 1;
- config.multisample_mode_4x = false;
- config.maximum_bpp_of_all_render_targets = V3D_INTERNAL_BPP_32;
- }
-
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
- rt.render_target_0_internal_bpp = V3D_INTERNAL_BPP_32;
- rt.render_target_0_internal_type = V3D_INTERNAL_TYPE_8;
- rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
- }
-
- cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
- clear.z_clear_value = 1.0f;
- clear.stencil_clear_value = 0;
- };
-
- cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
- init.use_auto_chained_tile_lists = true;
- init.size_of_first_block_in_chained_tile_lists =
- TILE_ALLOCATION_BLOCK_SIZE_64B;
- }
-
- cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
- list.address = v3dv_cl_address(job->tile_alloc, 0);
- }
-
- cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
- config.number_of_bin_tile_lists = 1;
- config.total_frame_width_in_tiles = 1;
- config.total_frame_height_in_tiles = 1;
- config.supertile_width_in_tiles = 1;
- config.supertile_height_in_tiles = 1;
- config.total_frame_width_in_supertiles = 1;
- config.total_frame_height_in_supertiles = 1;
- }
-
- struct v3dv_cl *icl = &job->indirect;
- v3dv_cl_ensure_space(icl, 200, 1);
- struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(icl);
-
- cl_emit(icl, TILE_COORDINATES_IMPLICIT, coords);
-
- cl_emit(icl, END_OF_LOADS, end);
-
- cl_emit(icl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
-
- cl_emit(icl, STORE_TILE_BUFFER_GENERAL, store) {
- store.buffer_to_store = NONE;
- }
-
- cl_emit(icl, END_OF_TILE_MARKER, end);
-
- cl_emit(icl, RETURN_FROM_SUB_LIST, ret);
-
- cl_emit(rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
- branch.start = tile_list_start;
- branch.end = v3dv_cl_get_address(icl);
- }
-
- cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
- coords.column_number_in_supertiles = 0;
- coords.row_number_in_supertiles = 0;
- }
-
- cl_emit(rcl, END_OF_RENDERING, end);
-}
-
static VkResult
queue_create_noop_job(struct v3dv_queue *queue)
{
@@ -837,11 +777,10 @@ queue_create_noop_job(struct v3dv_queue *queue)
queue->noop_job = vk_zalloc(&device->vk.alloc, sizeof(struct v3dv_job), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!queue->noop_job)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
v3dv_job_init(queue->noop_job, V3DV_JOB_TYPE_GPU_CL, device, NULL, -1);
- emit_noop_bin(queue->noop_job);
- emit_noop_render(queue->noop_job);
+ v3dv_X(device, job_emit_noop)(queue->noop_job);
return VK_SUCCESS;
}
@@ -1060,7 +999,7 @@ spawn_master_wait_thread(struct v3dv_queue *queue,
mtx_lock(&queue->mutex);
if (pthread_create(&wait_info->master_wait_thread, NULL,
master_wait_thread_func, wait_info)) {
- result = vk_error(queue->device->instance, VK_ERROR_DEVICE_LOST);
+ result = vk_error(queue, VK_ERROR_DEVICE_LOST);
goto done;
}
@@ -1071,7 +1010,7 @@ done:
return result;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_QueueSubmit(VkQueue _queue,
uint32_t submitCount,
const VkSubmitInfo* pSubmits,
@@ -1106,7 +1045,15 @@ done:
return result;
}
-VkResult
+static void
+destroy_syncobj(uint32_t device_fd, uint32_t *sync)
+{
+ assert(sync);
+ drmSyncobjDestroy(device_fd, *sync);
+ *sync = 0;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateSemaphore(VkDevice _device,
const VkSemaphoreCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
@@ -1120,14 +1067,12 @@ v3dv_CreateSemaphore(VkDevice _device,
vk_object_zalloc(&device->vk, pAllocator, sizeof(struct v3dv_semaphore),
VK_OBJECT_TYPE_SEMAPHORE);
if (sem == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
- sem->fd = -1;
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
int ret = drmSyncobjCreate(device->pdevice->render_fd, 0, &sem->sync);
if (ret) {
vk_object_free(&device->vk, pAllocator, sem);
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
*pSemaphore = v3dv_semaphore_to_handle(sem);
@@ -1135,7 +1080,158 @@ v3dv_CreateSemaphore(VkDevice _device,
return VK_SUCCESS;
}
-void
+VKAPI_ATTR void VKAPI_CALL
+v3dv_GetPhysicalDeviceExternalSemaphoreProperties(
+ VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
+ VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
+{
+ switch (pExternalSemaphoreInfo->handleType) {
+ case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
+ case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
+ pExternalSemaphoreProperties->exportFromImportedHandleTypes =
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
+ pExternalSemaphoreProperties->compatibleHandleTypes =
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
+
+ /* FIXME: we can't import external semaphores until we improve the kernel
+ * submit interface to handle multiple in syncobjs, because once we have
+ * an imported semaphore in our list of semaphores to wait on, we can no
+ * longer use the workaround of waiting on the last syncobj fence produced
+ * from the device, since the imported semaphore may not (and in fact, it
+ * would typically not) have been produced from same device.
+ *
+ * This behavior is exercised via dEQP-VK.synchronization.cross_instance.*.
+ * Particularly, this test:
+ * dEQP-VK.synchronization.cross_instance.dedicated.
+ * write_ssbo_compute_read_vertex_input.buffer_16384_binary_semaphore_fd
+ * fails consistently because of this, so it'll be a good reference to
+ * verify the implementation when the kernel bits are in place.
+ */
+ pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
+
+ /* FIXME: See comment in GetPhysicalDeviceExternalFenceProperties
+ * for details on why we can't export to SYNC_FD.
+ */
+ if (pExternalSemaphoreInfo->handleType !=
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
+ pExternalSemaphoreProperties->externalSemaphoreFeatures |=
+ VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT;
+ }
+ break;
+ default:
+ pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
+ pExternalSemaphoreProperties->compatibleHandleTypes = 0;
+ pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
+ break;
+ }
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+v3dv_ImportSemaphoreFdKHR(
+ VkDevice _device,
+ const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
+{
+ V3DV_FROM_HANDLE(v3dv_device, device, _device);
+ V3DV_FROM_HANDLE(v3dv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
+
+ assert(pImportSemaphoreFdInfo->sType ==
+ VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR);
+
+ int fd = pImportSemaphoreFdInfo->fd;
+ int render_fd = device->pdevice->render_fd;
+
+ bool is_temporary =
+ pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT ||
+ (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT);
+
+ uint32_t new_sync;
+ switch (pImportSemaphoreFdInfo->handleType) {
+ case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: {
+ /* "If handleType is VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, the
+ * special value -1 for fd is treated like a valid sync file descriptor
+ * referring to an object that has already signaled. The import
+ * operation will succeed and the VkSemaphore will have a temporarily
+ * imported payload as if a valid file descriptor had been provided."
+ */
+ unsigned flags = fd == -1 ? DRM_SYNCOBJ_CREATE_SIGNALED : 0;
+ if (drmSyncobjCreate(render_fd, flags, &new_sync))
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ if (fd != -1) {
+ if (drmSyncobjImportSyncFile(render_fd, new_sync, fd)) {
+ drmSyncobjDestroy(render_fd, new_sync);
+ return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ }
+ }
+ break;
+ }
+ case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT: {
+ if (drmSyncobjFDToHandle(render_fd, fd, &new_sync))
+ return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ break;
+ }
+ default:
+ return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ }
+
+ destroy_syncobj(render_fd, &sem->temp_sync);
+ if (is_temporary) {
+ sem->temp_sync = new_sync;
+ } else {
+ destroy_syncobj(render_fd, &sem->sync);
+ sem->sync = new_sync;
+ }
+
+ /* From the Vulkan 1.0.53 spec:
+ *
+ * "Importing a semaphore payload from a file descriptor transfers
+ * ownership of the file descriptor from the application to the
+ * Vulkan implementation. The application must not perform any
+ * operations on the file descriptor after a successful import."
+ *
+ * If the import fails, we leave the file descriptor open.
+ */
+ if (fd != -1)
+ close(fd);
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+v3dv_GetSemaphoreFdKHR(VkDevice _device,
+ const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
+ int *pFd)
+{
+ V3DV_FROM_HANDLE(v3dv_device, device, _device);
+ V3DV_FROM_HANDLE(v3dv_semaphore, sem, pGetFdInfo->semaphore);
+
+ assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR);
+
+ *pFd = -1;
+ int render_fd = device->pdevice->render_fd;
+ switch (pGetFdInfo->handleType) {
+ case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT: {
+ drmSyncobjExportSyncFile(render_fd, sem->sync, pFd);
+ if (*pFd == -1)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ break;
+ case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
+ drmSyncobjHandleToFD(render_fd, sem->sync, pFd);
+ if (*pFd == -1)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ break;
+ }
+ default:
+ unreachable("Unsupported external semaphore handle type");
+ }
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroySemaphore(VkDevice _device,
VkSemaphore semaphore,
const VkAllocationCallbacks *pAllocator)
@@ -1146,15 +1242,13 @@ v3dv_DestroySemaphore(VkDevice _device,
if (sem == NULL)
return;
- drmSyncobjDestroy(device->pdevice->render_fd, sem->sync);
-
- if (sem->fd != -1)
- close(sem->fd);
+ destroy_syncobj(device->pdevice->render_fd, &sem->sync);
+ destroy_syncobj(device->pdevice->render_fd, &sem->temp_sync);
vk_object_free(&device->vk, pAllocator, sem);
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateFence(VkDevice _device,
const VkFenceCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
@@ -1168,7 +1262,7 @@ v3dv_CreateFence(VkDevice _device,
vk_object_zalloc(&device->vk, pAllocator, sizeof(struct v3dv_fence),
VK_OBJECT_TYPE_FENCE);
if (fence == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
unsigned flags = 0;
if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
@@ -1176,17 +1270,136 @@ v3dv_CreateFence(VkDevice _device,
int ret = drmSyncobjCreate(device->pdevice->render_fd, flags, &fence->sync);
if (ret) {
vk_object_free(&device->vk, pAllocator, fence);
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
- fence->fd = -1;
-
*pFence = v3dv_fence_to_handle(fence);
return VK_SUCCESS;
}
-void
+VKAPI_ATTR void VKAPI_CALL
+v3dv_GetPhysicalDeviceExternalFenceProperties(
+ VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
+ VkExternalFenceProperties *pExternalFenceProperties)
+
+{
+ switch (pExternalFenceInfo->handleType) {
+ case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
+ case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
+ pExternalFenceProperties->exportFromImportedHandleTypes =
+ VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT |
+ VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
+ pExternalFenceProperties->compatibleHandleTypes =
+ VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT |
+ VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
+ pExternalFenceProperties->externalFenceFeatures =
+ VK_EXTERNAL_FENCE_FEATURE_IMPORTABLE_BIT;
+
+ /* FIXME: SYNC_FD exports the actual fence referenced by the syncobj, not
+ * the syncobj itself, and that fence is only created after we have
+ * submitted to the kernel and updated the syncobj for the fence to import
+ * the actual DRM fence created with the submission. Unfortunately, if the
+ * queue submission has a 'wait for events' we may hold any jobs after the
+ * wait in a user-space thread until the events are signaled, and in that
+ * case we don't update the out fence of the submit until the events are
+ * signaled and we can submit all the jobs involved with the vkQueueSubmit
+ * call. This means that if the applications submits with an out fence and
+ * a wait for events, trying to export the out fence to a SYNC_FD rigth
+ * after the submission and before the events are signaled will fail,
+ * because the actual DRM fence won't exist yet. This is not a problem
+ * with OPAQUE_FD because in this case we export the entire syncobj, not
+ * the underlying DRM fence. To fix this we need to rework our kernel
+ * interface to be more flexible and accept multiple in/out syncobjs so
+ * we can implement event waits as regular fence waits on the kernel side,
+ * until then, we can only reliably export OPAQUE_FD.
+ */
+ if (pExternalFenceInfo->handleType !=
+ VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT) {
+ pExternalFenceProperties->externalFenceFeatures |=
+ VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT;
+ }
+ break;
+ default:
+ pExternalFenceProperties->exportFromImportedHandleTypes = 0;
+ pExternalFenceProperties->compatibleHandleTypes = 0;
+ pExternalFenceProperties->externalFenceFeatures = 0;
+ break;
+ }
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+v3dv_ImportFenceFdKHR(VkDevice _device,
+ const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
+{
+ V3DV_FROM_HANDLE(v3dv_device, device, _device);
+ V3DV_FROM_HANDLE(v3dv_fence, fence, pImportFenceFdInfo->fence);
+
+ assert(pImportFenceFdInfo->sType ==
+ VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR);
+
+ int fd = pImportFenceFdInfo->fd;
+ int render_fd = device->pdevice->render_fd;
+
+ bool is_temporary =
+ pImportFenceFdInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT ||
+ (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT);
+
+ uint32_t new_sync;
+ switch (pImportFenceFdInfo->handleType) {
+ case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
+ /* "If handleType is VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, the
+ * special value -1 for fd is treated like a valid sync file descriptor
+ * referring to an object that has already signaled. The import
+ * operation will succeed and the VkFence will have a temporarily
+ * imported payload as if a valid file descriptor had been provided."
+ */
+ unsigned flags = fd == -1 ? DRM_SYNCOBJ_CREATE_SIGNALED : 0;
+ if (drmSyncobjCreate(render_fd, flags, &new_sync))
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ if (fd != -1) {
+ if (drmSyncobjImportSyncFile(render_fd, new_sync, fd)) {
+ drmSyncobjDestroy(render_fd, new_sync);
+ return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ }
+ }
+ break;
+ }
+ case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT: {
+ if (drmSyncobjFDToHandle(render_fd, fd, &new_sync))
+ return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ break;
+ }
+ default:
+ return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ }
+
+ destroy_syncobj(render_fd, &fence->temp_sync);
+ if (is_temporary) {
+ fence->temp_sync = new_sync;
+ } else {
+ destroy_syncobj(render_fd, &fence->sync);
+ fence->sync = new_sync;
+ }
+
+ /* From the Vulkan 1.0.53 spec:
+ *
+ * "Importing a fence payload from a file descriptor transfers
+ * ownership of the file descriptor from the application to the
+ * Vulkan implementation. The application must not perform any
+ * operations on the file descriptor after a successful import."
+ *
+ * If the import fails, we leave the file descriptor open.
+ */
+ if (fd != -1)
+ close(fd);
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyFence(VkDevice _device,
VkFence _fence,
const VkAllocationCallbacks *pAllocator)
@@ -1197,15 +1410,13 @@ v3dv_DestroyFence(VkDevice _device,
if (fence == NULL)
return;
- drmSyncobjDestroy(device->pdevice->render_fd, fence->sync);
-
- if (fence->fd != -1)
- close(fence->fd);
+ destroy_syncobj(device->pdevice->render_fd, &fence->sync);
+ destroy_syncobj(device->pdevice->render_fd, &fence->temp_sync);
vk_object_free(&device->vk, pAllocator, fence);
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetFenceStatus(VkDevice _device, VkFence _fence)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
@@ -1216,11 +1427,42 @@ v3dv_GetFenceStatus(VkDevice _device, VkFence _fence)
if (ret == -ETIME)
return VK_NOT_READY;
else if (ret)
- return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
+ return vk_error(device, VK_ERROR_DEVICE_LOST);
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+v3dv_GetFenceFdKHR(VkDevice _device,
+ const VkFenceGetFdInfoKHR *pGetFdInfo,
+ int *pFd)
+{
+ V3DV_FROM_HANDLE(v3dv_device, device, _device);
+ V3DV_FROM_HANDLE(v3dv_fence, fence, pGetFdInfo->fence);
+
+ assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_FENCE_GET_FD_INFO_KHR);
+
+ *pFd = -1;
+ int render_fd = device->pdevice->render_fd;
+ switch (pGetFdInfo->handleType) {
+ case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
+ drmSyncobjExportSyncFile(render_fd, fence->sync, pFd);
+ if (*pFd == -1)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ break;
+ case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
+ drmSyncobjHandleToFD(render_fd, fence->sync, pFd);
+ if (*pFd == -1)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ break;
+ }
+ default:
+ unreachable("Unsupported external fence handle type");
+ }
+
return VK_SUCCESS;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
@@ -1229,23 +1471,41 @@ v3dv_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)
sizeof(*syncobjs) * fenceCount, 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!syncobjs)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ int render_fd = device->pdevice->render_fd;
+ uint32_t reset_count = 0;
for (uint32_t i = 0; i < fenceCount; i++) {
struct v3dv_fence *fence = v3dv_fence_from_handle(pFences[i]);
- syncobjs[i] = fence->sync;
+ /* From the Vulkan spec, section 'Importing Fence Payloads':
+ *
+ * "If the import is temporary, the fence will be restored to its
+ * permanent state the next time that fence is passed to
+ * vkResetFences.
+ *
+ * Note: Restoring a fence to its prior permanent payload is a
+ * distinct operation from resetting a fence payload."
+ *
+ * To restore the previous state, we just need to destroy the temporary.
+ */
+ if (fence->temp_sync)
+ destroy_syncobj(render_fd, &fence->temp_sync);
+ else
+ syncobjs[reset_count++] = fence->sync;
}
- int ret = drmSyncobjReset(device->pdevice->render_fd, syncobjs, fenceCount);
+ int ret = 0;
+ if (reset_count > 0)
+ ret = drmSyncobjReset(render_fd, syncobjs, reset_count);
vk_free(&device->vk.alloc, syncobjs);
if (ret)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
return VK_SUCCESS;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_WaitForFences(VkDevice _device,
uint32_t fenceCount,
const VkFence *pFences,
@@ -1260,11 +1520,11 @@ v3dv_WaitForFences(VkDevice _device,
sizeof(*syncobjs) * fenceCount, 8,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (!syncobjs)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
for (uint32_t i = 0; i < fenceCount; i++) {
struct v3dv_fence *fence = v3dv_fence_from_handle(pFences[i]);
- syncobjs[i] = fence->sync;
+ syncobjs[i] = fence->temp_sync ? fence->temp_sync : fence->sync;
}
unsigned flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
@@ -1282,16 +1542,16 @@ v3dv_WaitForFences(VkDevice _device,
if (ret == -ETIME)
return VK_TIMEOUT;
else if (ret)
- return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
+ return vk_error(device, VK_ERROR_DEVICE_LOST);
return VK_SUCCESS;
}
-VkResult
+VKAPI_ATTR VkResult VKAPI_CALL
v3dv_QueueBindSparse(VkQueue _queue,
uint32_t bindInfoCount,
const VkBindSparseInfo *pBindInfo,
VkFence fence)
{
V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
- return vk_error(queue->device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
+ return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT);
}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_uniforms.c b/lib/mesa/src/broadcom/vulkan/v3dv_uniforms.c
index 8dd085862..47bc3a0b1 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_uniforms.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_uniforms.c
@@ -28,6 +28,52 @@
#include "v3dv_private.h"
#include "vk_format_info.h"
+/* The only version specific structure that we need is
+ * TMU_CONFIG_PARAMETER_1. This didn't seem to change significantly from
+ * previous V3D versions and we don't expect that to change, so for now let's
+ * just hardcode the V3D version here.
+ */
+#define V3D_VERSION 41
+#include "broadcom/common/v3d_macros.h"
+#include "broadcom/cle/v3dx_pack.h"
+
+/* Our Vulkan resource indices represent indices in descriptor maps which
+ * include all shader stages, so we need to size the arrays below
+ * accordingly. For now we only support a maximum of 3 stages: VS, GS, FS.
+ */
+#define MAX_STAGES 3
+
+#define MAX_TOTAL_TEXTURE_SAMPLERS (V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES)
+struct texture_bo_list {
+ struct v3dv_bo *tex[MAX_TOTAL_TEXTURE_SAMPLERS];
+};
+
+/* This tracks state BOs for both textures and samplers, so we
+ * multiply by 2.
+ */
+#define MAX_TOTAL_STATES (2 * V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES)
+struct state_bo_list {
+ uint32_t count;
+ struct v3dv_bo *states[MAX_TOTAL_STATES];
+};
+
+#define MAX_TOTAL_UNIFORM_BUFFERS (1 + MAX_UNIFORM_BUFFERS * MAX_STAGES)
+#define MAX_TOTAL_STORAGE_BUFFERS (MAX_STORAGE_BUFFERS * MAX_STAGES)
+struct buffer_bo_list {
+ struct v3dv_bo *ubo[MAX_TOTAL_UNIFORM_BUFFERS];
+ struct v3dv_bo *ssbo[MAX_TOTAL_STORAGE_BUFFERS];
+};
+
+static bool
+state_bo_in_list(struct state_bo_list *list, struct v3dv_bo *bo)
+{
+ for (int i = 0; i < list->count; i++) {
+ if (list->states[i] == bo)
+ return true;
+ }
+ return false;
+}
+
/*
* This method checks if the ubo used for push constants is needed to be
* updated or not.
@@ -87,43 +133,56 @@ check_push_constants_ubo(struct v3dv_cmd_buffer *cmd_buffer,
static void
write_tmu_p0(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_pipeline *pipeline,
+ enum broadcom_shader_stage stage,
struct v3dv_cl_out **uniforms,
- uint32_t data)
+ uint32_t data,
+ struct texture_bo_list *tex_bos,
+ struct state_bo_list *state_bos)
{
uint32_t texture_idx = v3d_unit_data_get_unit(data);
- struct v3dv_job *job = cmd_buffer->state.job;
+
struct v3dv_descriptor_state *descriptor_state =
v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
/* We need to ensure that the texture bo is added to the job */
struct v3dv_bo *texture_bo =
v3dv_descriptor_map_get_texture_bo(descriptor_state,
- &pipeline->shared_data->texture_map,
+ &pipeline->shared_data->maps[stage]->texture_map,
pipeline->layout, texture_idx);
assert(texture_bo);
- v3dv_job_add_bo(job, texture_bo);
+ assert(texture_idx < V3D_MAX_TEXTURE_SAMPLERS);
+ tex_bos->tex[texture_idx] = texture_bo;
struct v3dv_cl_reloc state_reloc =
- v3dv_descriptor_map_get_texture_shader_state(descriptor_state,
- &pipeline->shared_data->texture_map,
+ v3dv_descriptor_map_get_texture_shader_state(cmd_buffer->device, descriptor_state,
+ &pipeline->shared_data->maps[stage]->texture_map,
pipeline->layout,
texture_idx);
- cl_aligned_reloc(&job->indirect, uniforms,
- state_reloc.bo,
- state_reloc.offset +
- v3d_unit_data_get_offset(data));
+ cl_aligned_u32(uniforms, state_reloc.bo->offset +
+ state_reloc.offset +
+ v3d_unit_data_get_offset(data));
+
+ /* Texture and Sampler states are typically suballocated, so they are
+ * usually the same BO: only flag them once to avoid trying to add them
+ * multiple times to the job later.
+ */
+ if (!state_bo_in_list(state_bos, state_reloc.bo)) {
+ assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS);
+ state_bos->states[state_bos->count++] = state_reloc.bo;
+ }
}
/** V3D 4.x TMU configuration parameter 1 (sampler) */
static void
write_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_pipeline *pipeline,
+ enum broadcom_shader_stage stage,
struct v3dv_cl_out **uniforms,
- uint32_t data)
+ uint32_t data,
+ struct state_bo_list *state_bos)
{
uint32_t sampler_idx = v3d_unit_data_get_unit(data);
- struct v3dv_job *job = cmd_buffer->state.job;
struct v3dv_descriptor_state *descriptor_state =
v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
@@ -131,13 +190,13 @@ write_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer,
sampler_idx != V3DV_NO_SAMPLER_32BIT_IDX);
struct v3dv_cl_reloc sampler_state_reloc =
- v3dv_descriptor_map_get_sampler_state(descriptor_state,
- &pipeline->shared_data->sampler_map,
+ v3dv_descriptor_map_get_sampler_state(cmd_buffer->device, descriptor_state,
+ &pipeline->shared_data->maps[stage]->sampler_map,
pipeline->layout, sampler_idx);
const struct v3dv_sampler *sampler =
v3dv_descriptor_map_get_sampler(descriptor_state,
- &pipeline->shared_data->sampler_map,
+ &pipeline->shared_data->maps[stage]->sampler_map,
pipeline->layout, sampler_idx);
assert(sampler);
@@ -151,26 +210,36 @@ write_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer,
&p1_unpacked);
}
- cl_aligned_reloc(&job->indirect, uniforms,
- sampler_state_reloc.bo,
- sampler_state_reloc.offset +
- p1_packed);
+ cl_aligned_u32(uniforms, sampler_state_reloc.bo->offset +
+ sampler_state_reloc.offset +
+ p1_packed);
+
+ /* Texture and Sampler states are typically suballocated, so they are
+ * usually the same BO: only flag them once to avoid trying to add them
+ * multiple times to the job later.
+ */
+ if (!state_bo_in_list(state_bos, sampler_state_reloc.bo)) {
+ assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS);
+ state_bos->states[state_bos->count++] = sampler_state_reloc.bo;
+ }
}
static void
write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_pipeline *pipeline,
+ enum broadcom_shader_stage stage,
struct v3dv_cl_out **uniforms,
enum quniform_contents content,
- uint32_t data)
+ uint32_t data,
+ struct buffer_bo_list *buffer_bos)
{
- struct v3dv_job *job = cmd_buffer->state.job;
struct v3dv_descriptor_state *descriptor_state =
v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
struct v3dv_descriptor_map *map =
content == QUNIFORM_UBO_ADDR || content == QUNIFORM_GET_UBO_SIZE ?
- &pipeline->shared_data->ubo_map : &pipeline->shared_data->ssbo_map;
+ &pipeline->shared_data->maps[stage]->ubo_map :
+ &pipeline->shared_data->maps[stage]->ssbo_map;
uint32_t offset =
content == QUNIFORM_UBO_ADDR ?
@@ -193,10 +262,10 @@ write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
&cmd_buffer->push_constants_resource;
assert(resource->bo);
- cl_aligned_reloc(&job->indirect, uniforms,
- resource->bo,
- resource->offset + offset + dynamic_offset);
-
+ cl_aligned_u32(uniforms, resource->bo->offset +
+ resource->offset +
+ offset + dynamic_offset);
+ buffer_bos->ubo[0] = resource->bo;
} else {
uint32_t index =
content == QUNIFORM_UBO_ADDR ?
@@ -216,10 +285,18 @@ write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
content == QUNIFORM_GET_UBO_SIZE) {
cl_aligned_u32(uniforms, descriptor->range);
} else {
- cl_aligned_reloc(&job->indirect, uniforms,
- descriptor->buffer->mem->bo,
- descriptor->buffer->mem_offset +
- descriptor->offset + offset + dynamic_offset);
+ cl_aligned_u32(uniforms, descriptor->buffer->mem->bo->offset +
+ descriptor->buffer->mem_offset +
+ descriptor->offset +
+ offset + dynamic_offset);
+
+ if (content == QUNIFORM_UBO_ADDR) {
+ assert(index + 1 < MAX_TOTAL_UNIFORM_BUFFERS);
+ buffer_bos->ubo[index + 1] = descriptor->buffer->mem->bo;
+ } else {
+ assert(index < MAX_TOTAL_STORAGE_BUFFERS);
+ buffer_bos->ssbo[index] = descriptor->buffer->mem->bo;
+ }
}
}
}
@@ -235,26 +312,26 @@ get_texture_size_from_image_view(struct v3dv_image_view *image_view,
/* We don't u_minify the values, as we are using the image_view
* extents
*/
- return image_view->extent.width;
+ return image_view->vk.extent.width;
case QUNIFORM_IMAGE_HEIGHT:
case QUNIFORM_TEXTURE_HEIGHT:
- return image_view->extent.height;
+ return image_view->vk.extent.height;
case QUNIFORM_IMAGE_DEPTH:
case QUNIFORM_TEXTURE_DEPTH:
- return image_view->extent.depth;
+ return image_view->vk.extent.depth;
case QUNIFORM_IMAGE_ARRAY_SIZE:
case QUNIFORM_TEXTURE_ARRAY_SIZE:
- if (image_view->type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) {
- return image_view->last_layer - image_view->first_layer + 1;
+ if (image_view->vk.view_type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) {
+ return image_view->vk.layer_count;
} else {
- assert((image_view->last_layer - image_view->first_layer + 1) % 6 == 0);
- return (image_view->last_layer - image_view->first_layer + 1) / 6;
+ assert(image_view->vk.layer_count % 6 == 0);
+ return image_view->vk.layer_count / 6;
}
case QUNIFORM_TEXTURE_LEVELS:
- return image_view->max_level - image_view->base_level + 1;
+ return image_view->vk.level_count;
case QUNIFORM_TEXTURE_SAMPLES:
- assert(image_view->image);
- return image_view->image->samples;
+ assert(image_view->vk.image);
+ return image_view->vk.image->samples;
default:
unreachable("Bad texture size field");
}
@@ -279,16 +356,18 @@ get_texture_size_from_buffer_view(struct v3dv_buffer_view *buffer_view,
static uint32_t
get_texture_size(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_pipeline *pipeline,
+ enum broadcom_shader_stage stage,
enum quniform_contents contents,
uint32_t data)
{
- uint32_t texture_idx = v3d_unit_data_get_unit(data);
+ uint32_t texture_idx = data;
+
struct v3dv_descriptor_state *descriptor_state =
v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
struct v3dv_descriptor *descriptor =
v3dv_descriptor_map_get_descriptor(descriptor_state,
- &pipeline->shared_data->texture_map,
+ &pipeline->shared_data->maps[stage]->texture_map,
pipeline->layout,
texture_idx, NULL);
@@ -322,6 +401,11 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
struct v3dv_job *job = cmd_buffer->state.job;
assert(job);
+ assert(job->cmd_buffer == cmd_buffer);
+
+ struct texture_bo_list tex_bos = { 0 };
+ struct state_bo_list state_bos = { 0 };
+ struct buffer_bo_list buffer_bos = { 0 };
/* The hardware always pre-fetches the next uniform (also when there
* aren't any), so we always allocate space for an extra slot. This
@@ -369,17 +453,20 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
case QUNIFORM_UBO_ADDR:
case QUNIFORM_GET_SSBO_SIZE:
case QUNIFORM_GET_UBO_SIZE:
- write_ubo_ssbo_uniforms(cmd_buffer, pipeline, &uniforms,
- uinfo->contents[i], data);
+ write_ubo_ssbo_uniforms(cmd_buffer, pipeline, variant->stage, &uniforms,
+ uinfo->contents[i], data, &buffer_bos);
+
break;
case QUNIFORM_IMAGE_TMU_CONFIG_P0:
case QUNIFORM_TMU_CONFIG_P0:
- write_tmu_p0(cmd_buffer, pipeline, &uniforms, data);
+ write_tmu_p0(cmd_buffer, pipeline, variant->stage,
+ &uniforms, data, &tex_bos, &state_bos);
break;
case QUNIFORM_TMU_CONFIG_P1:
- write_tmu_p1(cmd_buffer, pipeline, &uniforms, data);
+ write_tmu_p1(cmd_buffer, pipeline, variant->stage,
+ &uniforms, data, &state_bos);
break;
case QUNIFORM_IMAGE_WIDTH:
@@ -395,10 +482,66 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
cl_aligned_u32(&uniforms,
get_texture_size(cmd_buffer,
pipeline,
+ variant->stage,
uinfo->contents[i],
data));
break;
+ /* We generate this from geometry shaders to cap the generated gl_Layer
+ * to be within the number of layers of the framebuffer so we prevent the
+ * binner from trying to access tile state memory out of bounds (for
+ * layers that don't exist).
+ *
+ * Unfortunately, for secondary command buffers we may not know the
+ * number of layers in the framebuffer at this stage. Since we are
+ * only using this to sanitize the shader and it should not have any
+ * impact on correct shaders that emit valid values for gl_Layer,
+ * we just work around it by using the largest number of layers we
+ * support.
+ *
+ * FIXME: we could do better than this by recording in the job that
+ * the value at this uniform offset is not correct, and patch it when
+ * we execute the secondary command buffer into a primary, since we do
+ * have the correct number of layers at that point, but again, since this
+ * is only for sanityzing the shader and it only affects the specific case
+ * of secondary command buffers without framebuffer info available it
+ * might not be worth the trouble.
+ *
+ * With multiview the number of layers is dictated by the view mask
+ * and not by the framebuffer layers. We do set the job's frame tiling
+ * information correctly from the view mask in that case, however,
+ * secondary command buffers may not have valid frame tiling data,
+ * so when multiview is enabled, we always set the number of layers
+ * from the subpass view mask.
+ */
+ case QUNIFORM_FB_LAYERS: {
+ const struct v3dv_cmd_buffer_state *state = &job->cmd_buffer->state;
+ const uint32_t view_mask =
+ state->pass->subpasses[state->subpass_idx].view_mask;
+
+ uint32_t num_layers;
+ if (view_mask != 0) {
+ num_layers = util_last_bit(view_mask);
+ } else if (job->frame_tiling.layers != 0) {
+ num_layers = job->frame_tiling.layers;
+ } else if (cmd_buffer->state.framebuffer) {
+ num_layers = cmd_buffer->state.framebuffer->layers;
+ } else {
+ assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
+ num_layers = 2048;
+#if DEBUG
+ fprintf(stderr, "Skipping gl_LayerID shader sanity check for "
+ "secondary command buffer\n");
+#endif
+ }
+ cl_aligned_u32(&uniforms, num_layers);
+ break;
+ }
+
+ case QUNIFORM_VIEW_INDEX:
+ cl_aligned_u32(&uniforms, job->cmd_buffer->state.view_index);
+ break;
+
case QUNIFORM_NUM_WORK_GROUPS:
assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
assert(job->csd.wg_count[data] > 0);
@@ -407,15 +550,20 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
cl_aligned_u32(&uniforms, job->csd.wg_count[data]);
break;
+ case QUNIFORM_WORK_GROUP_BASE:
+ assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
+ cl_aligned_u32(&uniforms, job->csd.wg_base[data]);
+ break;
+
case QUNIFORM_SHARED_OFFSET:
assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
assert(job->csd.shared_memory);
- cl_aligned_reloc(&job->indirect, &uniforms, job->csd.shared_memory, 0);
+ cl_aligned_u32(&uniforms, job->csd.shared_memory->offset);
break;
case QUNIFORM_SPILL_OFFSET:
assert(pipeline->spill.bo);
- cl_aligned_reloc(&job->indirect, &uniforms, pipeline->spill.bo, 0);
+ cl_aligned_u32(&uniforms, pipeline->spill.bo->offset);
break;
case QUNIFORM_SPILL_SIZE_PER_THREAD:
@@ -430,6 +578,30 @@ v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
cl_end(&job->indirect, uniforms);
+ for (int i = 0; i < MAX_TOTAL_TEXTURE_SAMPLERS; i++) {
+ if (tex_bos.tex[i])
+ v3dv_job_add_bo(job, tex_bos.tex[i]);
+ }
+
+ for (int i = 0; i < state_bos.count; i++)
+ v3dv_job_add_bo(job, state_bos.states[i]);
+
+ for (int i = 0; i < MAX_TOTAL_UNIFORM_BUFFERS; i++) {
+ if (buffer_bos.ubo[i])
+ v3dv_job_add_bo(job, buffer_bos.ubo[i]);
+ }
+
+ for (int i = 0; i < MAX_TOTAL_STORAGE_BUFFERS; i++) {
+ if (buffer_bos.ssbo[i])
+ v3dv_job_add_bo(job, buffer_bos.ssbo[i]);
+ }
+
+ if (job->csd.shared_memory)
+ v3dv_job_add_bo(job, job->csd.shared_memory);
+
+ if (pipeline->spill.bo)
+ v3dv_job_add_bo(job, pipeline->spill.bo);
+
return uniform_stream;
}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_wsi.c b/lib/mesa/src/broadcom/vulkan/v3dv_wsi.c
index 25bb4636a..154adf3a7 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_wsi.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_wsi.c
@@ -25,11 +25,12 @@
#include "v3dv_private.h"
#include "drm-uapi/drm_fourcc.h"
+#include "wsi_common_entrypoints.h"
#include "vk_format_info.h"
#include "vk_util.h"
#include "wsi_common.h"
-static PFN_vkVoidFunction
+static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
v3dv_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName)
{
V3DV_FROM_HANDLE(v3dv_physical_device, pdevice, physicalDevice);
@@ -46,6 +47,31 @@ v3dv_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName)
return vk_device_dispatch_table_get(&vk_device_trampolines, pName);
}
+static bool
+v3dv_wsi_can_present_on_device(VkPhysicalDevice _pdevice, int fd)
+{
+ V3DV_FROM_HANDLE(v3dv_physical_device, pdevice, _pdevice);
+
+ drmDevicePtr fd_devinfo, display_devinfo;
+ int ret;
+
+ ret = drmGetDevice2(fd, 0, &fd_devinfo);
+ if (ret)
+ return false;
+
+ ret = drmGetDevice2(pdevice->display_fd, 0, &display_devinfo);
+ if (ret) {
+ drmFreeDevice(&fd_devinfo);
+ return false;
+ }
+
+ bool result = drmDevicesEqual(fd_devinfo, display_devinfo);
+
+ drmFreeDevice(&fd_devinfo);
+ drmFreeDevice(&display_devinfo);
+ return result;
+}
+
VkResult
v3dv_wsi_init(struct v3dv_physical_device *physical_device)
{
@@ -61,6 +87,10 @@ v3dv_wsi_init(struct v3dv_physical_device *physical_device)
return result;
physical_device->wsi_device.supports_modifiers = true;
+ physical_device->wsi_device.can_present_on_device =
+ v3dv_wsi_can_present_on_device;
+
+ physical_device->vk.wsi_device = &physical_device->wsi_device;
return VK_SUCCESS;
}
@@ -68,38 +98,11 @@ v3dv_wsi_init(struct v3dv_physical_device *physical_device)
void
v3dv_wsi_finish(struct v3dv_physical_device *physical_device)
{
+ physical_device->vk.wsi_device = NULL;
wsi_device_finish(&physical_device->wsi_device,
&physical_device->vk.instance->alloc);
}
-void v3dv_DestroySurfaceKHR(
- VkInstance _instance,
- VkSurfaceKHR _surface,
- const VkAllocationCallbacks* pAllocator)
-{
- V3DV_FROM_HANDLE(v3dv_instance, instance, _instance);
- ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
-
- if (!surface)
- return;
-
- vk_free2(&instance->vk.alloc, pAllocator, surface);
-}
-
-VkResult v3dv_GetPhysicalDeviceSurfaceSupportKHR(
- VkPhysicalDevice physicalDevice,
- uint32_t queueFamilyIndex,
- VkSurfaceKHR surface,
- VkBool32* pSupported)
-{
- V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice);
-
- return wsi_common_get_surface_support(&device->wsi_device,
- queueFamilyIndex,
- surface,
- pSupported);
-}
-
static void
constraint_surface_capabilities(VkSurfaceCapabilitiesKHR *caps)
{
@@ -114,74 +117,36 @@ constraint_surface_capabilities(VkSurfaceCapabilitiesKHR *caps)
caps->supportedUsageFlags &= ~VK_IMAGE_USAGE_SAMPLED_BIT;
}
-VkResult v3dv_GetPhysicalDeviceSurfaceCapabilitiesKHR(
+VKAPI_ATTR VkResult VKAPI_CALL
+v3dv_GetPhysicalDeviceSurfaceCapabilitiesKHR(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR surface,
VkSurfaceCapabilitiesKHR* pSurfaceCapabilities)
{
- V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice);
-
VkResult result;
- result = wsi_common_get_surface_capabilities(&device->wsi_device,
- surface,
- pSurfaceCapabilities);
+ result = wsi_GetPhysicalDeviceSurfaceCapabilitiesKHR(physicalDevice,
+ surface,
+ pSurfaceCapabilities);
constraint_surface_capabilities(pSurfaceCapabilities);
return result;
}
-VkResult v3dv_GetPhysicalDeviceSurfaceCapabilities2KHR(
+VKAPI_ATTR VkResult VKAPI_CALL
+v3dv_GetPhysicalDeviceSurfaceCapabilities2KHR(
VkPhysicalDevice physicalDevice,
const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo,
VkSurfaceCapabilities2KHR* pSurfaceCapabilities)
{
- V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice);
-
VkResult result;
- result = wsi_common_get_surface_capabilities2(&device->wsi_device,
- pSurfaceInfo,
- pSurfaceCapabilities);
+ result = wsi_GetPhysicalDeviceSurfaceCapabilities2KHR(physicalDevice,
+ pSurfaceInfo,
+ pSurfaceCapabilities);
constraint_surface_capabilities(&pSurfaceCapabilities->surfaceCapabilities);
return result;
}
-VkResult v3dv_GetPhysicalDeviceSurfaceFormatsKHR(
- VkPhysicalDevice physicalDevice,
- VkSurfaceKHR surface,
- uint32_t* pSurfaceFormatCount,
- VkSurfaceFormatKHR* pSurfaceFormats)
-{
- V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice);
-
- return wsi_common_get_surface_formats(&device->wsi_device, surface,
- pSurfaceFormatCount, pSurfaceFormats);
-}
-
-VkResult v3dv_GetPhysicalDeviceSurfaceFormats2KHR(
- VkPhysicalDevice physicalDevice,
- const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo,
- uint32_t* pSurfaceFormatCount,
- VkSurfaceFormat2KHR* pSurfaceFormats)
-{
- V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice);
-
- return wsi_common_get_surface_formats2(&device->wsi_device, pSurfaceInfo,
- pSurfaceFormatCount, pSurfaceFormats);
-}
-
-VkResult v3dv_GetPhysicalDeviceSurfacePresentModesKHR(
- VkPhysicalDevice physicalDevice,
- VkSurfaceKHR surface,
- uint32_t* pPresentModeCount,
- VkPresentModeKHR* pPresentModes)
-{
- V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice);
-
- return wsi_common_get_surface_present_modes(&device->wsi_device, surface,
- pPresentModeCount,
- pPresentModes);
-}
-
-VkResult v3dv_CreateSwapchainKHR(
+VKAPI_ATTR VkResult VKAPI_CALL
+v3dv_CreateSwapchainKHR(
VkDevice _device,
const VkSwapchainCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
@@ -190,7 +155,6 @@ VkResult v3dv_CreateSwapchainKHR(
V3DV_FROM_HANDLE(v3dv_device, device, _device);
struct v3dv_instance *instance = device->instance;
struct v3dv_physical_device *pdevice = &instance->physicalDevice;
- struct wsi_device *wsi_device = &pdevice->wsi_device;
ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, pCreateInfo->surface);
VkResult result =
@@ -198,64 +162,29 @@ VkResult v3dv_CreateSwapchainKHR(
if (result != VK_SUCCESS)
return result;
- const VkAllocationCallbacks *alloc;
- if (pAllocator)
- alloc = pAllocator;
- else
- alloc = &device->vk.alloc;
-
- return wsi_common_create_swapchain(wsi_device, _device,
- pCreateInfo, alloc, pSwapchain);
+ return wsi_CreateSwapchainKHR(_device, pCreateInfo, pAllocator, pSwapchain);
}
-void v3dv_DestroySwapchainKHR(
- VkDevice _device,
- VkSwapchainKHR swapchain,
- const VkAllocationCallbacks* pAllocator)
+struct v3dv_image *
+v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain, uint32_t index)
{
- V3DV_FROM_HANDLE(v3dv_device, device, _device);
- const VkAllocationCallbacks *alloc;
+ uint32_t n_images = index + 1;
+ VkImage *images = malloc(sizeof(*images) * n_images);
+ VkResult result = wsi_common_get_images(swapchain, &n_images, images);
- if (pAllocator)
- alloc = pAllocator;
- else
- alloc = &device->vk.alloc;
+ if (result != VK_SUCCESS && result != VK_INCOMPLETE) {
+ free(images);
+ return NULL;
+ }
- wsi_common_destroy_swapchain(_device, swapchain, alloc);
-}
+ V3DV_FROM_HANDLE(v3dv_image, image, images[index]);
+ free(images);
-VkResult v3dv_GetSwapchainImagesKHR(
- VkDevice device,
- VkSwapchainKHR swapchain,
- uint32_t* pSwapchainImageCount,
- VkImage* pSwapchainImages)
-{
- return wsi_common_get_images(swapchain,
- pSwapchainImageCount,
- pSwapchainImages);
+ return image;
}
-VkResult v3dv_AcquireNextImageKHR(
- VkDevice device,
- VkSwapchainKHR swapchain,
- uint64_t timeout,
- VkSemaphore semaphore,
- VkFence fence,
- uint32_t* pImageIndex)
-{
- VkAcquireNextImageInfoKHR acquire_info = {
- .sType = VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR,
- .swapchain = swapchain,
- .timeout = timeout,
- .semaphore = semaphore,
- .fence = fence,
- .deviceMask = 0,
- };
-
- return v3dv_AcquireNextImage2KHR(device, &acquire_info, pImageIndex);
-}
-
-VkResult v3dv_AcquireNextImage2KHR(
+VKAPI_ATTR VkResult VKAPI_CALL
+v3dv_AcquireNextImage2KHR(
VkDevice _device,
const VkAcquireNextImageInfoKHR* pAcquireInfo,
uint32_t* pImageIndex)
@@ -279,52 +208,3 @@ VkResult v3dv_AcquireNextImage2KHR(
return result;
}
-
-VkResult v3dv_QueuePresentKHR(
- VkQueue _queue,
- const VkPresentInfoKHR* pPresentInfo)
-{
- V3DV_FROM_HANDLE(v3dv_queue, queue, _queue);
- struct v3dv_physical_device *pdevice =
- &queue->device->instance->physicalDevice;
-
- return wsi_common_queue_present(&pdevice->wsi_device,
- v3dv_device_to_handle(queue->device),
- _queue, 0,
- pPresentInfo);
-}
-
-VkResult v3dv_GetDeviceGroupPresentCapabilitiesKHR(
- VkDevice device,
- VkDeviceGroupPresentCapabilitiesKHR* pCapabilities)
-{
- memset(pCapabilities->presentMask, 0,
- sizeof(pCapabilities->presentMask));
- pCapabilities->presentMask[0] = 0x1;
- pCapabilities->modes = VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_BIT_KHR;
-
- return VK_SUCCESS;
-}
-
-VkResult v3dv_GetDeviceGroupSurfacePresentModesKHR(
- VkDevice device,
- VkSurfaceKHR surface,
- VkDeviceGroupPresentModeFlagsKHR* pModes)
-{
- *pModes = VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_BIT_KHR;
-
- return VK_SUCCESS;
-}
-
-VkResult v3dv_GetPhysicalDevicePresentRectanglesKHR(
- VkPhysicalDevice physicalDevice,
- VkSurfaceKHR surface,
- uint32_t* pRectCount,
- VkRect2D* pRects)
-{
- V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice);
-
- return wsi_common_get_present_rectangles(&device->wsi_device,
- surface,
- pRectCount, pRects);
-}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c
new file mode 100644
index 000000000..c2f2c7786
--- /dev/null
+++ b/lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c
@@ -0,0 +1,2281 @@
+/*
+ * Copyright © 2021 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "v3dv_private.h"
+#include "broadcom/common/v3d_macros.h"
+#include "broadcom/cle/v3dx_pack.h"
+#include "broadcom/compiler/v3d_compiler.h"
+
+#include "util/half_float.h"
+#include "vulkan/util/vk_format.h"
+#include "util/u_pack_color.h"
+
+#include "vk_format_info.h"
+
+void
+v3dX(job_emit_binning_flush)(struct v3dv_job *job)
+{
+ assert(job);
+
+ v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(FLUSH));
+ v3dv_return_if_oom(NULL, job);
+
+ cl_emit(&job->bcl, FLUSH, flush);
+}
+
+void
+v3dX(job_emit_binning_prolog)(struct v3dv_job *job,
+ const struct v3dv_frame_tiling *tiling,
+ uint32_t layers)
+{
+ /* This must go before the binning mode configuration. It is
+ * required for layered framebuffers to work.
+ */
+ cl_emit(&job->bcl, NUMBER_OF_LAYERS, config) {
+ config.number_of_layers = layers;
+ }
+
+ cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
+ config.width_in_pixels = tiling->width;
+ config.height_in_pixels = tiling->height;
+ config.number_of_render_targets = MAX2(tiling->render_target_count, 1);
+ config.multisample_mode_4x = tiling->msaa;
+ config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
+ }
+
+ /* There's definitely nothing in the VCD cache we want. */
+ cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
+
+ /* "Binning mode lists must have a Start Tile Binning item (6) after
+ * any prefix state data before the binning list proper starts."
+ */
+ cl_emit(&job->bcl, START_TILE_BINNING, bin);
+}
+
+void
+v3dX(cmd_buffer_end_render_pass_secondary)(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ assert(cmd_buffer->state.job);
+ v3dv_cl_ensure_space_with_branch(&cmd_buffer->state.job->bcl,
+ cl_packet_length(RETURN_FROM_SUB_LIST));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+ cl_emit(&cmd_buffer->state.job->bcl, RETURN_FROM_SUB_LIST, ret);
+}
+
+void
+v3dX(job_emit_clip_window)(struct v3dv_job *job, const VkRect2D *rect)
+{
+ assert(job);
+
+ v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CLIP_WINDOW));
+ v3dv_return_if_oom(NULL, job);
+
+ cl_emit(&job->bcl, CLIP_WINDOW, clip) {
+ clip.clip_window_left_pixel_coordinate = rect->offset.x;
+ clip.clip_window_bottom_pixel_coordinate = rect->offset.y;
+ clip.clip_window_width_in_pixels = rect->extent.width;
+ clip.clip_window_height_in_pixels = rect->extent.height;
+ }
+}
+
+static void
+cmd_buffer_render_pass_emit_load(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_cl *cl,
+ struct v3dv_image_view *iview,
+ uint32_t layer,
+ uint32_t buffer)
+{
+ const struct v3dv_image *image = (struct v3dv_image *) iview->vk.image;
+ const struct v3d_resource_slice *slice =
+ &image->slices[iview->vk.base_mip_level];
+ uint32_t layer_offset =
+ v3dv_layer_offset(image, iview->vk.base_mip_level,
+ iview->vk.base_array_layer + layer);
+
+ cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
+ load.buffer_to_load = buffer;
+ load.address = v3dv_cl_address(image->mem->bo, layer_offset);
+
+ load.input_image_format = iview->format->rt_type;
+ load.r_b_swap = iview->swap_rb;
+ load.memory_format = slice->tiling;
+
+ if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
+ slice->tiling == V3D_TILING_UIF_XOR) {
+ load.height_in_ub_or_stride =
+ slice->padded_height_of_output_image_in_uif_blocks;
+ } else if (slice->tiling == V3D_TILING_RASTER) {
+ load.height_in_ub_or_stride = slice->stride;
+ }
+
+ if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT)
+ load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
+ else
+ load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
+ }
+}
+
+static bool
+check_needs_load(const struct v3dv_cmd_buffer_state *state,
+ VkImageAspectFlags aspect,
+ uint32_t first_subpass_idx,
+ VkAttachmentLoadOp load_op)
+{
+ /* We call this with image->vk.aspects & aspect, so 0 means the aspect we are
+ * testing does not exist in the image.
+ */
+ if (!aspect)
+ return false;
+
+ /* Attachment (or view) load operations apply on the first subpass that
+ * uses the attachment (or view), otherwise we always need to load.
+ */
+ if (state->job->first_subpass > first_subpass_idx)
+ return true;
+
+ /* If the job is continuing a subpass started in another job, we always
+ * need to load.
+ */
+ if (state->job->is_subpass_continue)
+ return true;
+
+ /* If the area is not aligned to tile boundaries, we always need to load */
+ if (!state->tile_aligned_render_area)
+ return true;
+
+ /* The attachment load operations must be LOAD */
+ return load_op == VK_ATTACHMENT_LOAD_OP_LOAD;
+}
+
+static inline uint32_t
+v3dv_zs_buffer(bool depth, bool stencil)
+{
+ if (depth && stencil)
+ return ZSTENCIL;
+ else if (depth)
+ return Z;
+ else if (stencil)
+ return STENCIL;
+ return NONE;
+}
+
+static void
+cmd_buffer_render_pass_emit_loads(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_cl *cl,
+ uint32_t layer)
+{
+ const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
+ const struct v3dv_framebuffer *framebuffer = state->framebuffer;
+ const struct v3dv_render_pass *pass = state->pass;
+ const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx];
+
+ assert(!pass->multiview_enabled || layer < MAX_MULTIVIEW_VIEW_COUNT);
+
+ for (uint32_t i = 0; i < subpass->color_count; i++) {
+ uint32_t attachment_idx = subpass->color_attachments[i].attachment;
+
+ if (attachment_idx == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ const struct v3dv_render_pass_attachment *attachment =
+ &state->pass->attachments[attachment_idx];
+
+ /* According to the Vulkan spec:
+ *
+ * "The load operation for each sample in an attachment happens before
+ * any recorded command which accesses the sample in the first subpass
+ * where the attachment is used."
+ *
+ * If the load operation is CLEAR, we must only clear once on the first
+ * subpass that uses the attachment (and in that case we don't LOAD).
+ * After that, we always want to load so we don't lose any rendering done
+ * by a previous subpass to the same attachment. We also want to load
+ * if the current job is continuing subpass work started by a previous
+ * job, for the same reason.
+ *
+ * If the render area is not aligned to tile boundaries then we have
+ * tiles which are partially covered by it. In this case, we need to
+ * load the tiles so we can preserve the pixels that are outside the
+ * render area for any such tiles.
+ */
+ uint32_t first_subpass = !pass->multiview_enabled ?
+ attachment->first_subpass :
+ attachment->views[layer].first_subpass;
+
+ bool needs_load = check_needs_load(state,
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ first_subpass,
+ attachment->desc.loadOp);
+ if (needs_load) {
+ struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx];
+ cmd_buffer_render_pass_emit_load(cmd_buffer, cl, iview,
+ layer, RENDER_TARGET_0 + i);
+ }
+ }
+
+ uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
+ if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
+ const struct v3dv_render_pass_attachment *ds_attachment =
+ &state->pass->attachments[ds_attachment_idx];
+
+ const VkImageAspectFlags ds_aspects =
+ vk_format_aspects(ds_attachment->desc.format);
+
+ uint32_t ds_first_subpass = !pass->multiview_enabled ?
+ ds_attachment->first_subpass :
+ ds_attachment->views[layer].first_subpass;
+
+ const bool needs_depth_load =
+ check_needs_load(state,
+ ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
+ ds_first_subpass,
+ ds_attachment->desc.loadOp);
+
+ const bool needs_stencil_load =
+ check_needs_load(state,
+ ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
+ ds_first_subpass,
+ ds_attachment->desc.stencilLoadOp);
+
+ if (needs_depth_load || needs_stencil_load) {
+ struct v3dv_image_view *iview =
+ framebuffer->attachments[ds_attachment_idx];
+ /* From the Vulkan spec:
+ *
+ * "When an image view of a depth/stencil image is used as a
+ * depth/stencil framebuffer attachment, the aspectMask is ignored
+ * and both depth and stencil image subresources are used."
+ *
+ * So we ignore the aspects from the subresource range of the image
+ * view for the depth/stencil attachment, but we still need to restrict
+ * the to aspects compatible with the render pass and the image.
+ */
+ const uint32_t zs_buffer =
+ v3dv_zs_buffer(needs_depth_load, needs_stencil_load);
+ cmd_buffer_render_pass_emit_load(cmd_buffer, cl,
+ iview, layer, zs_buffer);
+ }
+ }
+
+ cl_emit(cl, END_OF_LOADS, end);
+}
+
+static void
+cmd_buffer_render_pass_emit_store(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_cl *cl,
+ uint32_t attachment_idx,
+ uint32_t layer,
+ uint32_t buffer,
+ bool clear,
+ bool is_multisample_resolve)
+{
+ const struct v3dv_image_view *iview =
+ cmd_buffer->state.framebuffer->attachments[attachment_idx];
+ const struct v3dv_image *image = (struct v3dv_image *) iview->vk.image;
+ const struct v3d_resource_slice *slice =
+ &image->slices[iview->vk.base_mip_level];
+ uint32_t layer_offset = v3dv_layer_offset(image,
+ iview->vk.base_mip_level,
+ iview->vk.base_array_layer + layer);
+
+ cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
+ store.buffer_to_store = buffer;
+ store.address = v3dv_cl_address(image->mem->bo, layer_offset);
+ store.clear_buffer_being_stored = clear;
+
+ store.output_image_format = iview->format->rt_type;
+ store.r_b_swap = iview->swap_rb;
+ store.memory_format = slice->tiling;
+
+ if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
+ slice->tiling == V3D_TILING_UIF_XOR) {
+ store.height_in_ub_or_stride =
+ slice->padded_height_of_output_image_in_uif_blocks;
+ } else if (slice->tiling == V3D_TILING_RASTER) {
+ store.height_in_ub_or_stride = slice->stride;
+ }
+
+ if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT)
+ store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
+ else if (is_multisample_resolve)
+ store.decimate_mode = V3D_DECIMATE_MODE_4X;
+ else
+ store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
+ }
+}
+
+static bool
+check_needs_clear(const struct v3dv_cmd_buffer_state *state,
+ VkImageAspectFlags aspect,
+ uint32_t first_subpass_idx,
+ VkAttachmentLoadOp load_op,
+ bool do_clear_with_draw)
+{
+ /* We call this with image->vk.aspects & aspect, so 0 means the aspect we are
+ * testing does not exist in the image.
+ */
+ if (!aspect)
+ return false;
+
+ /* If the aspect needs to be cleared with a draw call then we won't emit
+ * the clear here.
+ */
+ if (do_clear_with_draw)
+ return false;
+
+ /* If this is resuming a subpass started with another job, then attachment
+ * load operations don't apply.
+ */
+ if (state->job->is_subpass_continue)
+ return false;
+
+ /* If the render area is not aligned to tile boudaries we can't use the
+ * TLB for a clear.
+ */
+ if (!state->tile_aligned_render_area)
+ return false;
+
+ /* If this job is running in a subpass other than the first subpass in
+ * which this attachment (or view) is used then attachment load operations
+ * don't apply.
+ */
+ if (state->job->first_subpass != first_subpass_idx)
+ return false;
+
+ /* The attachment load operation must be CLEAR */
+ return load_op == VK_ATTACHMENT_LOAD_OP_CLEAR;
+}
+
+static bool
+check_needs_store(const struct v3dv_cmd_buffer_state *state,
+ VkImageAspectFlags aspect,
+ uint32_t last_subpass_idx,
+ VkAttachmentStoreOp store_op)
+{
+ /* We call this with image->vk.aspects & aspect, so 0 means the aspect we are
+ * testing does not exist in the image.
+ */
+ if (!aspect)
+ return false;
+
+ /* Attachment (or view) store operations only apply on the last subpass
+ * where the attachment (or view) is used, in other subpasses we always
+ * need to store.
+ */
+ if (state->subpass_idx < last_subpass_idx)
+ return true;
+
+ /* Attachment store operations only apply on the last job we emit on the the
+ * last subpass where the attachment is used, otherwise we always need to
+ * store.
+ */
+ if (!state->job->is_subpass_finish)
+ return true;
+
+ /* The attachment store operation must be STORE */
+ return store_op == VK_ATTACHMENT_STORE_OP_STORE;
+}
+
+static void
+cmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_cl *cl,
+ uint32_t layer)
+{
+ struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
+ struct v3dv_render_pass *pass = state->pass;
+ const struct v3dv_subpass *subpass =
+ &pass->subpasses[state->subpass_idx];
+
+ bool has_stores = false;
+ bool use_global_zs_clear = false;
+ bool use_global_rt_clear = false;
+
+ assert(!pass->multiview_enabled || layer < MAX_MULTIVIEW_VIEW_COUNT);
+
+ /* FIXME: separate stencil */
+ uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
+ if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
+ const struct v3dv_render_pass_attachment *ds_attachment =
+ &state->pass->attachments[ds_attachment_idx];
+
+ assert(state->job->first_subpass >= ds_attachment->first_subpass);
+ assert(state->subpass_idx >= ds_attachment->first_subpass);
+ assert(state->subpass_idx <= ds_attachment->last_subpass);
+
+ /* From the Vulkan spec, VkImageSubresourceRange:
+ *
+ * "When an image view of a depth/stencil image is used as a
+ * depth/stencil framebuffer attachment, the aspectMask is ignored
+ * and both depth and stencil image subresources are used."
+ *
+ * So we ignore the aspects from the subresource range of the image
+ * view for the depth/stencil attachment, but we still need to restrict
+ * the to aspects compatible with the render pass and the image.
+ */
+ const VkImageAspectFlags aspects =
+ vk_format_aspects(ds_attachment->desc.format);
+
+ /* Only clear once on the first subpass that uses the attachment */
+ uint32_t ds_first_subpass = !state->pass->multiview_enabled ?
+ ds_attachment->first_subpass :
+ ds_attachment->views[layer].first_subpass;
+
+ bool needs_depth_clear =
+ check_needs_clear(state,
+ aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
+ ds_first_subpass,
+ ds_attachment->desc.loadOp,
+ subpass->do_depth_clear_with_draw);
+
+ bool needs_stencil_clear =
+ check_needs_clear(state,
+ aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
+ ds_first_subpass,
+ ds_attachment->desc.stencilLoadOp,
+ subpass->do_stencil_clear_with_draw);
+
+ /* Skip the last store if it is not required */
+ uint32_t ds_last_subpass = !pass->multiview_enabled ?
+ ds_attachment->last_subpass :
+ ds_attachment->views[layer].last_subpass;
+
+ bool needs_depth_store =
+ check_needs_store(state,
+ aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
+ ds_last_subpass,
+ ds_attachment->desc.storeOp);
+
+ bool needs_stencil_store =
+ check_needs_store(state,
+ aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
+ ds_last_subpass,
+ ds_attachment->desc.stencilStoreOp);
+
+ /* GFXH-1689: The per-buffer store command's clear buffer bit is broken
+ * for depth/stencil.
+ *
+ * There used to be some confusion regarding the Clear Tile Buffers
+ * Z/S bit also being broken, but we confirmed with Broadcom that this
+ * is not the case, it was just that some other hardware bugs (that we
+ * need to work around, such as GFXH-1461) could cause this bit to behave
+ * incorrectly.
+ *
+ * There used to be another issue where the RTs bit in the Clear Tile
+ * Buffers packet also cleared Z/S, but Broadcom confirmed this is
+ * fixed since V3D 4.1.
+ *
+ * So if we have to emit a clear of depth or stencil we don't use
+ * the per-buffer store clear bit, even if we need to store the buffers,
+ * instead we always have to use the Clear Tile Buffers Z/S bit.
+ * If we have configured the job to do early Z/S clearing, then we
+ * don't want to emit any Clear Tile Buffers command at all here.
+ *
+ * Note that GFXH-1689 is not reproduced in the simulator, where
+ * using the clear buffer bit in depth/stencil stores works fine.
+ */
+ use_global_zs_clear = !state->job->early_zs_clear &&
+ (needs_depth_clear || needs_stencil_clear);
+ if (needs_depth_store || needs_stencil_store) {
+ const uint32_t zs_buffer =
+ v3dv_zs_buffer(needs_depth_store, needs_stencil_store);
+ cmd_buffer_render_pass_emit_store(cmd_buffer, cl,
+ ds_attachment_idx, layer,
+ zs_buffer, false, false);
+ has_stores = true;
+ }
+ }
+
+ for (uint32_t i = 0; i < subpass->color_count; i++) {
+ uint32_t attachment_idx = subpass->color_attachments[i].attachment;
+
+ if (attachment_idx == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ const struct v3dv_render_pass_attachment *attachment =
+ &state->pass->attachments[attachment_idx];
+
+ assert(state->job->first_subpass >= attachment->first_subpass);
+ assert(state->subpass_idx >= attachment->first_subpass);
+ assert(state->subpass_idx <= attachment->last_subpass);
+
+ /* Only clear once on the first subpass that uses the attachment */
+ uint32_t first_subpass = !pass->multiview_enabled ?
+ attachment->first_subpass :
+ attachment->views[layer].first_subpass;
+
+ bool needs_clear =
+ check_needs_clear(state,
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ first_subpass,
+ attachment->desc.loadOp,
+ false);
+
+ /* Skip the last store if it is not required */
+ uint32_t last_subpass = !pass->multiview_enabled ?
+ attachment->last_subpass :
+ attachment->views[layer].last_subpass;
+
+ bool needs_store =
+ check_needs_store(state,
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ last_subpass,
+ attachment->desc.storeOp);
+
+ /* If we need to resolve this attachment emit that store first. Notice
+ * that we must not request a tile buffer clear here in that case, since
+ * that would clear the tile buffer before we get to emit the actual
+ * color attachment store below, since the clear happens after the
+ * store is completed.
+ *
+ * If the attachment doesn't support TLB resolves then we will have to
+ * fallback to doing the resolve in a shader separately after this
+ * job, so we will need to store the multisampled sttachment even if that
+ * wansn't requested by the client.
+ */
+ const bool needs_resolve =
+ subpass->resolve_attachments &&
+ subpass->resolve_attachments[i].attachment != VK_ATTACHMENT_UNUSED;
+ if (needs_resolve && attachment->use_tlb_resolve) {
+ const uint32_t resolve_attachment_idx =
+ subpass->resolve_attachments[i].attachment;
+ cmd_buffer_render_pass_emit_store(cmd_buffer, cl,
+ resolve_attachment_idx, layer,
+ RENDER_TARGET_0 + i,
+ false, true);
+ has_stores = true;
+ } else if (needs_resolve) {
+ needs_store = true;
+ }
+
+ /* Emit the color attachment store if needed */
+ if (needs_store) {
+ cmd_buffer_render_pass_emit_store(cmd_buffer, cl,
+ attachment_idx, layer,
+ RENDER_TARGET_0 + i,
+ needs_clear && !use_global_rt_clear,
+ false);
+ has_stores = true;
+ } else if (needs_clear) {
+ use_global_rt_clear = true;
+ }
+ }
+
+ /* We always need to emit at least one dummy store */
+ if (!has_stores) {
+ cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
+ store.buffer_to_store = NONE;
+ }
+ }
+
+ /* If we have any depth/stencil clears we can't use the per-buffer clear
+ * bit and instead we have to emit a single clear of all tile buffers.
+ */
+ if (use_global_zs_clear || use_global_rt_clear) {
+ cl_emit(cl, CLEAR_TILE_BUFFERS, clear) {
+ clear.clear_z_stencil_buffer = use_global_zs_clear;
+ clear.clear_all_render_targets = use_global_rt_clear;
+ }
+ }
+}
+
+static void
+cmd_buffer_render_pass_emit_per_tile_rcl(struct v3dv_cmd_buffer *cmd_buffer,
+ uint32_t layer)
+{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
+ /* Emit the generic list in our indirect state -- the rcl will just
+ * have pointers into it.
+ */
+ struct v3dv_cl *cl = &job->indirect;
+ v3dv_cl_ensure_space(cl, 200, 1);
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
+
+ cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
+
+ cmd_buffer_render_pass_emit_loads(cmd_buffer, cl, layer);
+
+ /* The binner starts out writing tiles assuming that the initial mode
+ * is triangles, so make sure that's the case.
+ */
+ cl_emit(cl, PRIM_LIST_FORMAT, fmt) {
+ fmt.primitive_type = LIST_TRIANGLES;
+ }
+
+ /* PTB assumes that value to be 0, but hw will not set it. */
+ cl_emit(cl, SET_INSTANCEID, set) {
+ set.instance_id = 0;
+ }
+
+ cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
+
+ cmd_buffer_render_pass_emit_stores(cmd_buffer, cl, layer);
+
+ cl_emit(cl, END_OF_TILE_MARKER, end);
+
+ cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
+
+ cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
+ branch.start = tile_list_start;
+ branch.end = v3dv_cl_get_address(cl);
+ }
+}
+
+static void
+cmd_buffer_emit_render_pass_layer_rcl(struct v3dv_cmd_buffer *cmd_buffer,
+ uint32_t layer)
+{
+ const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
+
+ struct v3dv_job *job = cmd_buffer->state.job;
+ struct v3dv_cl *rcl = &job->rcl;
+
+ /* If doing multicore binning, we would need to initialize each
+ * core's tile list here.
+ */
+ const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
+ const uint32_t tile_alloc_offset =
+ 64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
+ cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
+ list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
+ }
+
+ cmd_buffer_render_pass_emit_per_tile_rcl(cmd_buffer, layer);
+
+ uint32_t supertile_w_in_pixels =
+ tiling->tile_width * tiling->supertile_width;
+ uint32_t supertile_h_in_pixels =
+ tiling->tile_height * tiling->supertile_height;
+ const uint32_t min_x_supertile =
+ state->render_area.offset.x / supertile_w_in_pixels;
+ const uint32_t min_y_supertile =
+ state->render_area.offset.y / supertile_h_in_pixels;
+
+ uint32_t max_render_x = state->render_area.offset.x;
+ if (state->render_area.extent.width > 0)
+ max_render_x += state->render_area.extent.width - 1;
+ uint32_t max_render_y = state->render_area.offset.y;
+ if (state->render_area.extent.height > 0)
+ max_render_y += state->render_area.extent.height - 1;
+ const uint32_t max_x_supertile = max_render_x / supertile_w_in_pixels;
+ const uint32_t max_y_supertile = max_render_y / supertile_h_in_pixels;
+
+ for (int y = min_y_supertile; y <= max_y_supertile; y++) {
+ for (int x = min_x_supertile; x <= max_x_supertile; x++) {
+ cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
+ coords.column_number_in_supertiles = x;
+ coords.row_number_in_supertiles = y;
+ }
+ }
+ }
+}
+
+static void
+set_rcl_early_z_config(struct v3dv_job *job,
+ bool *early_z_disable,
+ uint32_t *early_z_test_and_update_direction)
+{
+ /* If this is true then we have not emitted any draw calls in this job
+ * and we don't get any benefits form early Z.
+ */
+ if (!job->decided_global_ez_enable) {
+ assert(job->draw_count == 0);
+ *early_z_disable = true;
+ return;
+ }
+
+ switch (job->first_ez_state) {
+ case V3D_EZ_UNDECIDED:
+ case V3D_EZ_LT_LE:
+ *early_z_disable = false;
+ *early_z_test_and_update_direction = EARLY_Z_DIRECTION_LT_LE;
+ break;
+ case V3D_EZ_GT_GE:
+ *early_z_disable = false;
+ *early_z_test_and_update_direction = EARLY_Z_DIRECTION_GT_GE;
+ break;
+ case V3D_EZ_DISABLED:
+ *early_z_disable = true;
+ break;
+ }
+}
+
+void
+v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
+ const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
+ const struct v3dv_framebuffer *framebuffer = state->framebuffer;
+
+ /* We can't emit the RCL until we have a framebuffer, which we may not have
+ * if we are recording a secondary command buffer. In that case, we will
+ * have to wait until vkCmdExecuteCommands is called from a primary command
+ * buffer.
+ */
+ if (!framebuffer) {
+ assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
+ return;
+ }
+
+ const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
+
+ const uint32_t fb_layers = job->frame_tiling.layers;
+
+ v3dv_cl_ensure_space_with_branch(&job->rcl, 200 +
+ MAX2(fb_layers, 1) * 256 *
+ cl_packet_length(SUPERTILE_COORDINATES));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ assert(state->subpass_idx < state->pass->subpass_count);
+ const struct v3dv_render_pass *pass = state->pass;
+ const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx];
+ struct v3dv_cl *rcl = &job->rcl;
+
+ /* Comon config must be the first TILE_RENDERING_MODE_CFG and
+ * Z_STENCIL_CLEAR_VALUES must be last. The ones in between are optional
+ * updates to the previous HW state.
+ */
+ bool do_early_zs_clear = false;
+ const uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
+ config.image_width_pixels = framebuffer->width;
+ config.image_height_pixels = framebuffer->height;
+ config.number_of_render_targets = MAX2(subpass->color_count, 1);
+ config.multisample_mode_4x = tiling->msaa;
+ config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
+
+ if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
+ const struct v3dv_image_view *iview =
+ framebuffer->attachments[ds_attachment_idx];
+ config.internal_depth_type = iview->internal_type;
+
+ set_rcl_early_z_config(job,
+ &config.early_z_disable,
+ &config.early_z_test_and_update_direction);
+
+ /* Early-Z/S clear can be enabled if the job is clearing and not
+ * storing (or loading) depth. If a stencil aspect is also present
+ * we have the same requirements for it, however, in this case we
+ * can accept stencil loadOp DONT_CARE as well, so instead of
+ * checking that stencil is cleared we check that is not loaded.
+ *
+ * Early-Z/S clearing is independent of Early Z/S testing, so it is
+ * possible to enable one but not the other so long as their
+ * respective requirements are met.
+ */
+ struct v3dv_render_pass_attachment *ds_attachment =
+ &pass->attachments[ds_attachment_idx];
+
+ const VkImageAspectFlags ds_aspects =
+ vk_format_aspects(ds_attachment->desc.format);
+
+ bool needs_depth_clear =
+ check_needs_clear(state,
+ ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
+ ds_attachment->first_subpass,
+ ds_attachment->desc.loadOp,
+ subpass->do_depth_clear_with_draw);
+
+ bool needs_depth_store =
+ check_needs_store(state,
+ ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
+ ds_attachment->last_subpass,
+ ds_attachment->desc.storeOp);
+
+ do_early_zs_clear = needs_depth_clear && !needs_depth_store;
+ if (do_early_zs_clear &&
+ vk_format_has_stencil(ds_attachment->desc.format)) {
+ bool needs_stencil_load =
+ check_needs_load(state,
+ ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
+ ds_attachment->first_subpass,
+ ds_attachment->desc.stencilLoadOp);
+
+ bool needs_stencil_store =
+ check_needs_store(state,
+ ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
+ ds_attachment->last_subpass,
+ ds_attachment->desc.stencilStoreOp);
+
+ do_early_zs_clear = !needs_stencil_load && !needs_stencil_store;
+ }
+
+ config.early_depth_stencil_clear = do_early_zs_clear;
+ } else {
+ config.early_z_disable = true;
+ }
+ }
+
+ /* If we enabled early Z/S clear, then we can't emit any "Clear Tile Buffers"
+ * commands with the Z/S bit set, so keep track of whether we enabled this
+ * in the job so we can skip these later.
+ */
+ job->early_zs_clear = do_early_zs_clear;
+
+ for (uint32_t i = 0; i < subpass->color_count; i++) {
+ uint32_t attachment_idx = subpass->color_attachments[i].attachment;
+ if (attachment_idx == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ struct v3dv_image_view *iview =
+ state->framebuffer->attachments[attachment_idx];
+
+ const struct v3dv_image *image = (struct v3dv_image *) iview->vk.image;
+ const struct v3d_resource_slice *slice =
+ &image->slices[iview->vk.base_mip_level];
+
+ const uint32_t *clear_color =
+ &state->attachments[attachment_idx].clear_value.color[0];
+
+ uint32_t clear_pad = 0;
+ if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
+ slice->tiling == V3D_TILING_UIF_XOR) {
+ int uif_block_height = v3d_utile_height(image->cpp) * 2;
+
+ uint32_t implicit_padded_height =
+ align(framebuffer->height, uif_block_height) / uif_block_height;
+
+ if (slice->padded_height_of_output_image_in_uif_blocks -
+ implicit_padded_height >= 15) {
+ clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
+ }
+ }
+
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
+ clear.clear_color_low_32_bits = clear_color[0];
+ clear.clear_color_next_24_bits = clear_color[1] & 0xffffff;
+ clear.render_target_number = i;
+ };
+
+ if (iview->internal_bpp >= V3D_INTERNAL_BPP_64) {
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
+ clear.clear_color_mid_low_32_bits =
+ ((clear_color[1] >> 24) | (clear_color[2] << 8));
+ clear.clear_color_mid_high_24_bits =
+ ((clear_color[2] >> 24) | ((clear_color[3] & 0xffff) << 8));
+ clear.render_target_number = i;
+ };
+ }
+
+ if (iview->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
+ clear.uif_padded_height_in_uif_blocks = clear_pad;
+ clear.clear_color_high_16_bits = clear_color[3] >> 16;
+ clear.render_target_number = i;
+ };
+ }
+ }
+
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
+ v3dX(cmd_buffer_render_pass_setup_render_target)
+ (cmd_buffer, 0, &rt.render_target_0_internal_bpp,
+ &rt.render_target_0_internal_type, &rt.render_target_0_clamp);
+ v3dX(cmd_buffer_render_pass_setup_render_target)
+ (cmd_buffer, 1, &rt.render_target_1_internal_bpp,
+ &rt.render_target_1_internal_type, &rt.render_target_1_clamp);
+ v3dX(cmd_buffer_render_pass_setup_render_target)
+ (cmd_buffer, 2, &rt.render_target_2_internal_bpp,
+ &rt.render_target_2_internal_type, &rt.render_target_2_clamp);
+ v3dX(cmd_buffer_render_pass_setup_render_target)
+ (cmd_buffer, 3, &rt.render_target_3_internal_bpp,
+ &rt.render_target_3_internal_type, &rt.render_target_3_clamp);
+ }
+
+ /* Ends rendering mode config. */
+ if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
+ clear.z_clear_value =
+ state->attachments[ds_attachment_idx].clear_value.z;
+ clear.stencil_clear_value =
+ state->attachments[ds_attachment_idx].clear_value.s;
+ };
+ } else {
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
+ clear.z_clear_value = 1.0f;
+ clear.stencil_clear_value = 0;
+ };
+ }
+
+ /* Always set initial block size before the first branch, which needs
+ * to match the value from binning mode config.
+ */
+ cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
+ init.use_auto_chained_tile_lists = true;
+ init.size_of_first_block_in_chained_tile_lists =
+ TILE_ALLOCATION_BLOCK_SIZE_64B;
+ }
+
+ cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
+ config.number_of_bin_tile_lists = 1;
+ config.total_frame_width_in_tiles = tiling->draw_tiles_x;
+ config.total_frame_height_in_tiles = tiling->draw_tiles_y;
+
+ config.supertile_width_in_tiles = tiling->supertile_width;
+ config.supertile_height_in_tiles = tiling->supertile_height;
+
+ config.total_frame_width_in_supertiles =
+ tiling->frame_width_in_supertiles;
+ config.total_frame_height_in_supertiles =
+ tiling->frame_height_in_supertiles;
+ }
+
+ /* Start by clearing the tile buffer. */
+ cl_emit(rcl, TILE_COORDINATES, coords) {
+ coords.tile_column_number = 0;
+ coords.tile_row_number = 0;
+ }
+
+ /* Emit an initial clear of the tile buffers. This is necessary
+ * for any buffers that should be cleared (since clearing
+ * normally happens at the *end* of the generic tile list), but
+ * it's also nice to clear everything so the first tile doesn't
+ * inherit any contents from some previous frame.
+ *
+ * Also, implement the GFXH-1742 workaround. There's a race in
+ * the HW between the RCL updating the TLB's internal type/size
+ * and the spawning of the QPU instances using the TLB's current
+ * internal type/size. To make sure the QPUs get the right
+ * state, we need 1 dummy store in between internal type/size
+ * changes on V3D 3.x, and 2 dummy stores on 4.x.
+ */
+ for (int i = 0; i < 2; i++) {
+ if (i > 0)
+ cl_emit(rcl, TILE_COORDINATES, coords);
+ cl_emit(rcl, END_OF_LOADS, end);
+ cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
+ store.buffer_to_store = NONE;
+ }
+ if (i == 0 && cmd_buffer->state.tile_aligned_render_area) {
+ cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
+ clear.clear_z_stencil_buffer = !job->early_zs_clear;
+ clear.clear_all_render_targets = true;
+ }
+ }
+ cl_emit(rcl, END_OF_TILE_MARKER, end);
+ }
+
+ cl_emit(rcl, FLUSH_VCD_CACHE, flush);
+
+ for (int layer = 0; layer < MAX2(1, fb_layers); layer++) {
+ if (subpass->view_mask == 0 || (subpass->view_mask & (1u << layer)))
+ cmd_buffer_emit_render_pass_layer_rcl(cmd_buffer, layer);
+ }
+
+ cl_emit(rcl, END_OF_RENDERING, end);
+}
+
+void
+v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
+ /* FIXME: right now we only support one viewport. viewporst[0] would work
+ * now, would need to change if we allow multiple viewports
+ */
+ float *vptranslate = dynamic->viewport.translate[0];
+ float *vpscale = dynamic->viewport.scale[0];
+
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
+ const uint32_t required_cl_size =
+ cl_packet_length(CLIPPER_XY_SCALING) +
+ cl_packet_length(CLIPPER_Z_SCALE_AND_OFFSET) +
+ cl_packet_length(CLIPPER_Z_MIN_MAX_CLIPPING_PLANES) +
+ cl_packet_length(VIEWPORT_OFFSET);
+ v3dv_cl_ensure_space_with_branch(&job->bcl, required_cl_size);
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
+ clip.viewport_half_width_in_1_256th_of_pixel = vpscale[0] * 256.0f;
+ clip.viewport_half_height_in_1_256th_of_pixel = vpscale[1] * 256.0f;
+ }
+
+ cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
+ clip.viewport_z_offset_zc_to_zs = vptranslate[2];
+ clip.viewport_z_scale_zc_to_zs = vpscale[2];
+ }
+ cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
+ /* Vulkan's Z NDC is [0..1], unlile OpenGL which is [-1, 1] */
+ float z1 = vptranslate[2];
+ float z2 = vptranslate[2] + vpscale[2];
+ clip.minimum_zw = MIN2(z1, z2);
+ clip.maximum_zw = MAX2(z1, z2);
+ }
+
+ cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
+ vp.viewport_centre_x_coordinate = vptranslate[0];
+ vp.viewport_centre_y_coordinate = vptranslate[1];
+ }
+
+ cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_VIEWPORT;
+}
+
+void
+v3dX(cmd_buffer_emit_stencil)(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
+ struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+ struct v3dv_dynamic_state *dynamic_state = &cmd_buffer->state.dynamic;
+
+ const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK |
+ V3DV_DYNAMIC_STENCIL_WRITE_MASK |
+ V3DV_DYNAMIC_STENCIL_REFERENCE;
+
+ v3dv_cl_ensure_space_with_branch(&job->bcl,
+ 2 * cl_packet_length(STENCIL_CFG));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ bool emitted_stencil = false;
+ for (uint32_t i = 0; i < 2; i++) {
+ if (pipeline->emit_stencil_cfg[i]) {
+ if (dynamic_state->mask & dynamic_stencil_states) {
+ cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
+ pipeline->stencil_cfg[i], config) {
+ if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK) {
+ config.stencil_test_mask =
+ i == 0 ? dynamic_state->stencil_compare_mask.front :
+ dynamic_state->stencil_compare_mask.back;
+ }
+ if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK) {
+ config.stencil_write_mask =
+ i == 0 ? dynamic_state->stencil_write_mask.front :
+ dynamic_state->stencil_write_mask.back;
+ }
+ if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_REFERENCE) {
+ config.stencil_ref_value =
+ i == 0 ? dynamic_state->stencil_reference.front :
+ dynamic_state->stencil_reference.back;
+ }
+ }
+ } else {
+ cl_emit_prepacked(&job->bcl, &pipeline->stencil_cfg[i]);
+ }
+
+ emitted_stencil = true;
+ }
+ }
+
+ if (emitted_stencil) {
+ const uint32_t dynamic_stencil_dirty_flags =
+ V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK |
+ V3DV_CMD_DIRTY_STENCIL_WRITE_MASK |
+ V3DV_CMD_DIRTY_STENCIL_REFERENCE;
+ cmd_buffer->state.dirty &= ~dynamic_stencil_dirty_flags;
+ }
+}
+
+void
+v3dX(cmd_buffer_emit_depth_bias)(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+ assert(pipeline);
+
+ if (!pipeline->depth_bias.enabled)
+ return;
+
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
+ v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(DEPTH_OFFSET));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
+ cl_emit(&job->bcl, DEPTH_OFFSET, bias) {
+ bias.depth_offset_factor = dynamic->depth_bias.slope_factor;
+ bias.depth_offset_units = dynamic->depth_bias.constant_factor;
+ if (pipeline->depth_bias.is_z16)
+ bias.depth_offset_units *= 256.0f;
+ bias.limit = dynamic->depth_bias.depth_bias_clamp;
+ }
+
+ cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_DEPTH_BIAS;
+}
+
+void
+v3dX(cmd_buffer_emit_line_width)(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
+ v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(LINE_WIDTH));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ cl_emit(&job->bcl, LINE_WIDTH, line) {
+ line.line_width = cmd_buffer->state.dynamic.line_width;
+ }
+
+ cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_LINE_WIDTH;
+}
+
+void
+v3dX(cmd_buffer_emit_sample_state)(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+ assert(pipeline);
+
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
+ v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(SAMPLE_STATE));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ cl_emit(&job->bcl, SAMPLE_STATE, state) {
+ state.coverage = 1.0f;
+ state.mask = pipeline->sample_mask;
+ }
+}
+
+void
+v3dX(cmd_buffer_emit_blend)(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
+ struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+ assert(pipeline);
+
+ const uint32_t blend_packets_size =
+ cl_packet_length(BLEND_ENABLES) +
+ cl_packet_length(BLEND_CONSTANT_COLOR) +
+ cl_packet_length(BLEND_CFG) * V3D_MAX_DRAW_BUFFERS;
+
+ v3dv_cl_ensure_space_with_branch(&job->bcl, blend_packets_size);
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ if (cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PIPELINE) {
+ if (pipeline->blend.enables) {
+ cl_emit(&job->bcl, BLEND_ENABLES, enables) {
+ enables.mask = pipeline->blend.enables;
+ }
+ }
+
+ for (uint32_t i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
+ if (pipeline->blend.enables & (1 << i))
+ cl_emit_prepacked(&job->bcl, &pipeline->blend.cfg[i]);
+ }
+ }
+
+ if (pipeline->blend.needs_color_constants &&
+ cmd_buffer->state.dirty & V3DV_CMD_DIRTY_BLEND_CONSTANTS) {
+ struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
+ cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) {
+ color.red_f16 = _mesa_float_to_half(dynamic->blend_constants[0]);
+ color.green_f16 = _mesa_float_to_half(dynamic->blend_constants[1]);
+ color.blue_f16 = _mesa_float_to_half(dynamic->blend_constants[2]);
+ color.alpha_f16 = _mesa_float_to_half(dynamic->blend_constants[3]);
+ }
+ cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_BLEND_CONSTANTS;
+ }
+}
+
+void
+v3dX(cmd_buffer_emit_color_write_mask)(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(COLOR_WRITE_MASKS));
+
+ struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+ struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
+ cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) {
+ mask.mask = (~dynamic->color_write_enable |
+ pipeline->blend.color_write_masks) & 0xffff;
+ }
+
+ cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE;
+}
+
+static void
+emit_flat_shade_flags(struct v3dv_job *job,
+ int varying_offset,
+ uint32_t varyings,
+ enum V3DX(Varying_Flags_Action) lower,
+ enum V3DX(Varying_Flags_Action) higher)
+{
+ v3dv_cl_ensure_space_with_branch(&job->bcl,
+ cl_packet_length(FLAT_SHADE_FLAGS));
+ v3dv_return_if_oom(NULL, job);
+
+ cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
+ flags.varying_offset_v0 = varying_offset;
+ flags.flat_shade_flags_for_varyings_v024 = varyings;
+ flags.action_for_flat_shade_flags_of_lower_numbered_varyings = lower;
+ flags.action_for_flat_shade_flags_of_higher_numbered_varyings = higher;
+ }
+}
+
+static void
+emit_noperspective_flags(struct v3dv_job *job,
+ int varying_offset,
+ uint32_t varyings,
+ enum V3DX(Varying_Flags_Action) lower,
+ enum V3DX(Varying_Flags_Action) higher)
+{
+ v3dv_cl_ensure_space_with_branch(&job->bcl,
+ cl_packet_length(NON_PERSPECTIVE_FLAGS));
+ v3dv_return_if_oom(NULL, job);
+
+ cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) {
+ flags.varying_offset_v0 = varying_offset;
+ flags.non_perspective_flags_for_varyings_v024 = varyings;
+ flags.action_for_non_perspective_flags_of_lower_numbered_varyings = lower;
+ flags.action_for_non_perspective_flags_of_higher_numbered_varyings = higher;
+ }
+}
+
+static void
+emit_centroid_flags(struct v3dv_job *job,
+ int varying_offset,
+ uint32_t varyings,
+ enum V3DX(Varying_Flags_Action) lower,
+ enum V3DX(Varying_Flags_Action) higher)
+{
+ v3dv_cl_ensure_space_with_branch(&job->bcl,
+ cl_packet_length(CENTROID_FLAGS));
+ v3dv_return_if_oom(NULL, job);
+
+ cl_emit(&job->bcl, CENTROID_FLAGS, flags) {
+ flags.varying_offset_v0 = varying_offset;
+ flags.centroid_flags_for_varyings_v024 = varyings;
+ flags.action_for_centroid_flags_of_lower_numbered_varyings = lower;
+ flags.action_for_centroid_flags_of_higher_numbered_varyings = higher;
+ }
+}
+
+static bool
+emit_varying_flags(struct v3dv_job *job,
+ uint32_t num_flags,
+ const uint32_t *flags,
+ void (*flag_emit_callback)(struct v3dv_job *job,
+ int varying_offset,
+ uint32_t flags,
+ enum V3DX(Varying_Flags_Action) lower,
+ enum V3DX(Varying_Flags_Action) higher))
+{
+ bool emitted_any = false;
+ for (int i = 0; i < num_flags; i++) {
+ if (!flags[i])
+ continue;
+
+ if (emitted_any) {
+ flag_emit_callback(job, i, flags[i],
+ V3D_VARYING_FLAGS_ACTION_UNCHANGED,
+ V3D_VARYING_FLAGS_ACTION_UNCHANGED);
+ } else if (i == 0) {
+ flag_emit_callback(job, i, flags[i],
+ V3D_VARYING_FLAGS_ACTION_UNCHANGED,
+ V3D_VARYING_FLAGS_ACTION_ZEROED);
+ } else {
+ flag_emit_callback(job, i, flags[i],
+ V3D_VARYING_FLAGS_ACTION_ZEROED,
+ V3D_VARYING_FLAGS_ACTION_ZEROED);
+ }
+
+ emitted_any = true;
+ }
+
+ return emitted_any;
+}
+
+void
+v3dX(cmd_buffer_emit_varyings_state)(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+
+ struct v3d_fs_prog_data *prog_data_fs =
+ pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs;
+
+ const uint32_t num_flags =
+ ARRAY_SIZE(prog_data_fs->flat_shade_flags);
+ const uint32_t *flat_shade_flags = prog_data_fs->flat_shade_flags;
+ const uint32_t *noperspective_flags = prog_data_fs->noperspective_flags;
+ const uint32_t *centroid_flags = prog_data_fs->centroid_flags;
+
+ if (!emit_varying_flags(job, num_flags, flat_shade_flags,
+ emit_flat_shade_flags)) {
+ v3dv_cl_ensure_space_with_branch(
+ &job->bcl, cl_packet_length(ZERO_ALL_FLAT_SHADE_FLAGS));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags);
+ }
+
+ if (!emit_varying_flags(job, num_flags, noperspective_flags,
+ emit_noperspective_flags)) {
+ v3dv_cl_ensure_space_with_branch(
+ &job->bcl, cl_packet_length(ZERO_ALL_NON_PERSPECTIVE_FLAGS));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags);
+ }
+
+ if (!emit_varying_flags(job, num_flags, centroid_flags,
+ emit_centroid_flags)) {
+ v3dv_cl_ensure_space_with_branch(
+ &job->bcl, cl_packet_length(ZERO_ALL_CENTROID_FLAGS));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
+ }
+}
+
+static void
+job_update_ez_state(struct v3dv_job *job,
+ struct v3dv_pipeline *pipeline,
+ struct v3dv_cmd_buffer *cmd_buffer)
+{
+ /* If first_ez_state is V3D_EZ_DISABLED it means that we have already
+ * determined that we should disable EZ completely for all draw calls in
+ * this job. This will cause us to disable EZ for the entire job in the
+ * Tile Rendering Mode RCL packet and when we do that we need to make sure
+ * we never emit a draw call in the job with EZ enabled in the CFG_BITS
+ * packet, so ez_state must also be V3D_EZ_DISABLED;
+ */
+ if (job->first_ez_state == V3D_EZ_DISABLED) {
+ assert(job->ez_state == V3D_EZ_DISABLED);
+ return;
+ }
+
+ /* This is part of the pre draw call handling, so we should be inside a
+ * render pass.
+ */
+ assert(cmd_buffer->state.pass);
+
+ /* If this is the first time we update EZ state for this job we first check
+ * if there is anything that requires disabling it completely for the entire
+ * job (based on state that is not related to the current draw call and
+ * pipeline state).
+ */
+ if (!job->decided_global_ez_enable) {
+ job->decided_global_ez_enable = true;
+
+ struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
+ assert(state->subpass_idx < state->pass->subpass_count);
+ struct v3dv_subpass *subpass = &state->pass->subpasses[state->subpass_idx];
+ if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED) {
+ job->first_ez_state = V3D_EZ_DISABLED;
+ job->ez_state = V3D_EZ_DISABLED;
+ return;
+ }
+
+ /* GFXH-1918: the early-z buffer may load incorrect depth values
+ * if the frame has odd width or height.
+ *
+ * So we need to disable EZ in this case.
+ */
+ const struct v3dv_render_pass_attachment *ds_attachment =
+ &state->pass->attachments[subpass->ds_attachment.attachment];
+
+ const VkImageAspectFlags ds_aspects =
+ vk_format_aspects(ds_attachment->desc.format);
+
+ bool needs_depth_load =
+ check_needs_load(state,
+ ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
+ ds_attachment->first_subpass,
+ ds_attachment->desc.loadOp);
+
+ if (needs_depth_load) {
+ struct v3dv_framebuffer *fb = state->framebuffer;
+
+ if (!fb) {
+ assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
+ perf_debug("Loading depth aspect in a secondary command buffer "
+ "without framebuffer info disables early-z tests.\n");
+ job->first_ez_state = V3D_EZ_DISABLED;
+ job->ez_state = V3D_EZ_DISABLED;
+ return;
+ }
+
+ if (((fb->width % 2) != 0 || (fb->height % 2) != 0)) {
+ perf_debug("Loading depth aspect for framebuffer with odd width "
+ "or height disables early-Z tests.\n");
+ job->first_ez_state = V3D_EZ_DISABLED;
+ job->ez_state = V3D_EZ_DISABLED;
+ return;
+ }
+ }
+ }
+
+ /* Otherwise, we can decide to selectively enable or disable EZ for draw
+ * calls using the CFG_BITS packet based on the bound pipeline state.
+ */
+
+ /* If the FS writes Z, then it may update against the chosen EZ direction */
+ struct v3dv_shader_variant *fs_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
+ if (fs_variant->prog_data.fs->writes_z) {
+ job->ez_state = V3D_EZ_DISABLED;
+ return;
+ }
+
+ switch (pipeline->ez_state) {
+ case V3D_EZ_UNDECIDED:
+ /* If the pipeline didn't pick a direction but didn't disable, then go
+ * along with the current EZ state. This allows EZ optimization for Z
+ * func == EQUAL or NEVER.
+ */
+ break;
+
+ case V3D_EZ_LT_LE:
+ case V3D_EZ_GT_GE:
+ /* If the pipeline picked a direction, then it needs to match the current
+ * direction if we've decided on one.
+ */
+ if (job->ez_state == V3D_EZ_UNDECIDED)
+ job->ez_state = pipeline->ez_state;
+ else if (job->ez_state != pipeline->ez_state)
+ job->ez_state = V3D_EZ_DISABLED;
+ break;
+
+ case V3D_EZ_DISABLED:
+ /* If the pipeline disables EZ because of a bad Z func or stencil
+ * operation, then we can't do any more EZ in this frame.
+ */
+ job->ez_state = V3D_EZ_DISABLED;
+ break;
+ }
+
+ if (job->first_ez_state == V3D_EZ_UNDECIDED &&
+ job->ez_state != V3D_EZ_DISABLED) {
+ job->first_ez_state = job->ez_state;
+ }
+}
+
+void
+v3dX(cmd_buffer_emit_configuration_bits)(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
+ struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+ assert(pipeline);
+
+ job_update_ez_state(job, pipeline, cmd_buffer);
+
+ v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CFG_BITS));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ cl_emit_with_prepacked(&job->bcl, CFG_BITS, pipeline->cfg_bits, config) {
+ config.early_z_enable = job->ez_state != V3D_EZ_DISABLED;
+ config.early_z_updates_enable = config.early_z_enable &&
+ pipeline->z_updates_enable;
+ }
+}
+
+void
+v3dX(cmd_buffer_emit_occlusion_query)(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
+ v3dv_cl_ensure_space_with_branch(&job->bcl,
+ cl_packet_length(OCCLUSION_QUERY_COUNTER));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) {
+ if (cmd_buffer->state.query.active_query.bo) {
+ counter.address =
+ v3dv_cl_address(cmd_buffer->state.query.active_query.bo,
+ cmd_buffer->state.query.active_query.offset);
+ }
+ }
+
+ cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_OCCLUSION_QUERY;
+}
+
+static struct v3dv_job *
+cmd_buffer_subpass_split_for_barrier(struct v3dv_cmd_buffer *cmd_buffer,
+ bool is_bcl_barrier)
+{
+ assert(cmd_buffer->state.subpass_idx != -1);
+ v3dv_cmd_buffer_finish_job(cmd_buffer);
+ struct v3dv_job *job =
+ v3dv_cmd_buffer_subpass_resume(cmd_buffer,
+ cmd_buffer->state.subpass_idx);
+ if (!job)
+ return NULL;
+
+ job->serialize = true;
+ job->needs_bcl_sync = is_bcl_barrier;
+ return job;
+}
+
+static void
+cmd_buffer_copy_secondary_end_query_state(struct v3dv_cmd_buffer *primary,
+ struct v3dv_cmd_buffer *secondary)
+{
+ struct v3dv_cmd_buffer_state *p_state = &primary->state;
+ struct v3dv_cmd_buffer_state *s_state = &secondary->state;
+
+ const uint32_t total_state_count =
+ p_state->query.end.used_count + s_state->query.end.used_count;
+ v3dv_cmd_buffer_ensure_array_state(primary,
+ sizeof(struct v3dv_end_query_cpu_job_info),
+ total_state_count,
+ &p_state->query.end.alloc_count,
+ (void **) &p_state->query.end.states);
+ v3dv_return_if_oom(primary, NULL);
+
+ for (uint32_t i = 0; i < s_state->query.end.used_count; i++) {
+ const struct v3dv_end_query_cpu_job_info *s_qstate =
+ &secondary->state.query.end.states[i];
+
+ struct v3dv_end_query_cpu_job_info *p_qstate =
+ &p_state->query.end.states[p_state->query.end.used_count++];
+
+ p_qstate->pool = s_qstate->pool;
+ p_qstate->query = s_qstate->query;
+ }
+}
+
+void
+v3dX(cmd_buffer_execute_inside_pass)(struct v3dv_cmd_buffer *primary,
+ uint32_t cmd_buffer_count,
+ const VkCommandBuffer *cmd_buffers)
+{
+ assert(primary->state.job);
+
+ /* Emit occlusion query state if needed so the draw calls inside our
+ * secondaries update the counters.
+ */
+ bool has_occlusion_query =
+ primary->state.dirty & V3DV_CMD_DIRTY_OCCLUSION_QUERY;
+ if (has_occlusion_query)
+ v3dX(cmd_buffer_emit_occlusion_query)(primary);
+
+ /* FIXME: if our primary job tiling doesn't enable MSSA but any of the
+ * pipelines used by the secondaries do, we need to re-start the primary
+ * job to enable MSAA. See cmd_buffer_restart_job_for_msaa_if_needed.
+ */
+ bool pending_barrier = false;
+ bool pending_bcl_barrier = false;
+ for (uint32_t i = 0; i < cmd_buffer_count; i++) {
+ V3DV_FROM_HANDLE(v3dv_cmd_buffer, secondary, cmd_buffers[i]);
+
+ assert(secondary->usage_flags &
+ VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT);
+
+ list_for_each_entry(struct v3dv_job, secondary_job,
+ &secondary->jobs, list_link) {
+ if (secondary_job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY) {
+ /* If the job is a CL, then we branch to it from the primary BCL.
+ * In this case the secondary's BCL is finished with a
+ * RETURN_FROM_SUB_LIST command to return back to the primary BCL
+ * once we are done executing it.
+ */
+ assert(v3dv_cl_offset(&secondary_job->rcl) == 0);
+ assert(secondary_job->bcl.bo);
+
+ /* Sanity check that secondary BCL ends with RETURN_FROM_SUB_LIST */
+ STATIC_ASSERT(cl_packet_length(RETURN_FROM_SUB_LIST) == 1);
+ assert(v3dv_cl_offset(&secondary_job->bcl) >= 1);
+ assert(*(((uint8_t *)secondary_job->bcl.next) - 1) ==
+ V3DX(RETURN_FROM_SUB_LIST_opcode));
+
+ /* If this secondary has any barriers (or we had any pending barrier
+ * to apply), then we can't just branch to it from the primary, we
+ * need to split the primary to create a new job that can consume
+ * the barriers first.
+ *
+ * FIXME: in this case, maybe just copy the secondary BCL without
+ * the RETURN_FROM_SUB_LIST into the primary job to skip the
+ * branch?
+ */
+ struct v3dv_job *primary_job = primary->state.job;
+ if (!primary_job || secondary_job->serialize || pending_barrier) {
+ const bool needs_bcl_barrier =
+ secondary_job->needs_bcl_sync || pending_bcl_barrier;
+ primary_job =
+ cmd_buffer_subpass_split_for_barrier(primary,
+ needs_bcl_barrier);
+ v3dv_return_if_oom(primary, NULL);
+
+ /* Since we have created a new primary we need to re-emit
+ * occlusion query state.
+ */
+ if (has_occlusion_query)
+ v3dX(cmd_buffer_emit_occlusion_query)(primary);
+ }
+
+ /* Make sure our primary job has all required BO references */
+ set_foreach(secondary_job->bos, entry) {
+ struct v3dv_bo *bo = (struct v3dv_bo *)entry->key;
+ v3dv_job_add_bo(primary_job, bo);
+ }
+
+ /* Emit required branch instructions. We expect each of these
+ * to end with a corresponding 'return from sub list' item.
+ */
+ list_for_each_entry(struct v3dv_bo, bcl_bo,
+ &secondary_job->bcl.bo_list, list_link) {
+ v3dv_cl_ensure_space_with_branch(&primary_job->bcl,
+ cl_packet_length(BRANCH_TO_SUB_LIST));
+ v3dv_return_if_oom(primary, NULL);
+ cl_emit(&primary_job->bcl, BRANCH_TO_SUB_LIST, branch) {
+ branch.address = v3dv_cl_address(bcl_bo, 0);
+ }
+ }
+
+ primary_job->tmu_dirty_rcl |= secondary_job->tmu_dirty_rcl;
+ } else {
+ /* This is a regular job (CPU or GPU), so just finish the current
+ * primary job (if any) and then add the secondary job to the
+ * primary's job list right after it.
+ */
+ v3dv_cmd_buffer_finish_job(primary);
+ v3dv_job_clone_in_cmd_buffer(secondary_job, primary);
+ if (pending_barrier) {
+ secondary_job->serialize = true;
+ if (pending_bcl_barrier)
+ secondary_job->needs_bcl_sync = true;
+ }
+ }
+
+ pending_barrier = false;
+ pending_bcl_barrier = false;
+ }
+
+ /* If the secondary has recorded any vkCmdEndQuery commands, we need to
+ * copy this state to the primary so it is processed properly when the
+ * current primary job is finished.
+ */
+ cmd_buffer_copy_secondary_end_query_state(primary, secondary);
+
+ /* If this secondary had any pending barrier state we will need that
+ * barrier state consumed with whatever comes next in the primary.
+ */
+ assert(secondary->state.has_barrier || !secondary->state.has_bcl_barrier);
+ pending_barrier = secondary->state.has_barrier;
+ pending_bcl_barrier = secondary->state.has_bcl_barrier;
+ }
+
+ if (pending_barrier) {
+ primary->state.has_barrier = true;
+ primary->state.has_bcl_barrier |= pending_bcl_barrier;
+ }
+}
+
+static void
+emit_gs_shader_state_record(struct v3dv_job *job,
+ struct v3dv_bo *assembly_bo,
+ struct v3dv_shader_variant *gs_bin,
+ struct v3dv_cl_reloc gs_bin_uniforms,
+ struct v3dv_shader_variant *gs,
+ struct v3dv_cl_reloc gs_render_uniforms)
+{
+ cl_emit(&job->indirect, GEOMETRY_SHADER_STATE_RECORD, shader) {
+ shader.geometry_bin_mode_shader_code_address =
+ v3dv_cl_address(assembly_bo, gs_bin->assembly_offset);
+ shader.geometry_bin_mode_shader_4_way_threadable =
+ gs_bin->prog_data.gs->base.threads == 4;
+ shader.geometry_bin_mode_shader_start_in_final_thread_section =
+ gs_bin->prog_data.gs->base.single_seg;
+ shader.geometry_bin_mode_shader_propagate_nans = true;
+ shader.geometry_bin_mode_shader_uniforms_address =
+ gs_bin_uniforms;
+
+ shader.geometry_render_mode_shader_code_address =
+ v3dv_cl_address(assembly_bo, gs->assembly_offset);
+ shader.geometry_render_mode_shader_4_way_threadable =
+ gs->prog_data.gs->base.threads == 4;
+ shader.geometry_render_mode_shader_start_in_final_thread_section =
+ gs->prog_data.gs->base.single_seg;
+ shader.geometry_render_mode_shader_propagate_nans = true;
+ shader.geometry_render_mode_shader_uniforms_address =
+ gs_render_uniforms;
+ }
+}
+
+static uint8_t
+v3d_gs_output_primitive(uint32_t prim_type)
+{
+ switch (prim_type) {
+ case GL_POINTS:
+ return GEOMETRY_SHADER_POINTS;
+ case GL_LINE_STRIP:
+ return GEOMETRY_SHADER_LINE_STRIP;
+ case GL_TRIANGLE_STRIP:
+ return GEOMETRY_SHADER_TRI_STRIP;
+ default:
+ unreachable("Unsupported primitive type");
+ }
+}
+
+static void
+emit_tes_gs_common_params(struct v3dv_job *job,
+ uint8_t gs_out_prim_type,
+ uint8_t gs_num_invocations)
+{
+ cl_emit(&job->indirect, TESSELLATION_GEOMETRY_COMMON_PARAMS, shader) {
+ shader.tessellation_type = TESSELLATION_TYPE_TRIANGLE;
+ shader.tessellation_point_mode = false;
+ shader.tessellation_edge_spacing = TESSELLATION_EDGE_SPACING_EVEN;
+ shader.tessellation_clockwise = true;
+ shader.tessellation_invocations = 1;
+
+ shader.geometry_shader_output_format =
+ v3d_gs_output_primitive(gs_out_prim_type);
+ shader.geometry_shader_instances = gs_num_invocations & 0x1F;
+ }
+}
+
+static uint8_t
+simd_width_to_gs_pack_mode(uint32_t width)
+{
+ switch (width) {
+ case 16:
+ return V3D_PACK_MODE_16_WAY;
+ case 8:
+ return V3D_PACK_MODE_8_WAY;
+ case 4:
+ return V3D_PACK_MODE_4_WAY;
+ case 1:
+ return V3D_PACK_MODE_1_WAY;
+ default:
+ unreachable("Invalid SIMD width");
+ };
+}
+
+static void
+emit_tes_gs_shader_params(struct v3dv_job *job,
+ uint32_t gs_simd,
+ uint32_t gs_vpm_output_size,
+ uint32_t gs_max_vpm_input_size_per_batch)
+{
+ cl_emit(&job->indirect, TESSELLATION_GEOMETRY_SHADER_PARAMS, shader) {
+ shader.tcs_batch_flush_mode = V3D_TCS_FLUSH_MODE_FULLY_PACKED;
+ shader.per_patch_data_column_depth = 1;
+ shader.tcs_output_segment_size_in_sectors = 1;
+ shader.tcs_output_segment_pack_mode = V3D_PACK_MODE_16_WAY;
+ shader.tes_output_segment_size_in_sectors = 1;
+ shader.tes_output_segment_pack_mode = V3D_PACK_MODE_16_WAY;
+ shader.gs_output_segment_size_in_sectors = gs_vpm_output_size;
+ shader.gs_output_segment_pack_mode =
+ simd_width_to_gs_pack_mode(gs_simd);
+ shader.tbg_max_patches_per_tcs_batch = 1;
+ shader.tbg_max_extra_vertex_segs_for_patches_after_first = 0;
+ shader.tbg_min_tcs_output_segments_required_in_play = 1;
+ shader.tbg_min_per_patch_data_segments_required_in_play = 1;
+ shader.tpg_max_patches_per_tes_batch = 1;
+ shader.tpg_max_vertex_segments_per_tes_batch = 0;
+ shader.tpg_max_tcs_output_segments_per_tes_batch = 1;
+ shader.tpg_min_tes_output_segments_required_in_play = 1;
+ shader.gbg_max_tes_output_vertex_segments_per_gs_batch =
+ gs_max_vpm_input_size_per_batch;
+ shader.gbg_min_gs_output_segments_required_in_play = 1;
+ }
+}
+
+void
+v3dX(cmd_buffer_emit_gl_shader_state)(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
+ struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
+ struct v3dv_pipeline *pipeline = state->gfx.pipeline;
+ assert(pipeline);
+
+ struct v3dv_shader_variant *vs_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];
+ struct v3d_vs_prog_data *prog_data_vs = vs_variant->prog_data.vs;
+
+ struct v3dv_shader_variant *vs_bin_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN];
+ struct v3d_vs_prog_data *prog_data_vs_bin = vs_bin_variant->prog_data.vs;
+
+ struct v3dv_shader_variant *fs_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];
+ struct v3d_fs_prog_data *prog_data_fs = fs_variant->prog_data.fs;
+
+ struct v3dv_shader_variant *gs_variant = NULL;
+ struct v3dv_shader_variant *gs_bin_variant = NULL;
+ struct v3d_gs_prog_data *prog_data_gs = NULL;
+ struct v3d_gs_prog_data *prog_data_gs_bin = NULL;
+ if (pipeline->has_gs) {
+ gs_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];
+ prog_data_gs = gs_variant->prog_data.gs;
+
+ gs_bin_variant =
+ pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];
+ prog_data_gs_bin = gs_bin_variant->prog_data.gs;
+ }
+
+ /* Update the cache dirty flag based on the shader progs data */
+ job->tmu_dirty_rcl |= prog_data_vs_bin->base.tmu_dirty_rcl;
+ job->tmu_dirty_rcl |= prog_data_vs->base.tmu_dirty_rcl;
+ job->tmu_dirty_rcl |= prog_data_fs->base.tmu_dirty_rcl;
+ if (pipeline->has_gs) {
+ job->tmu_dirty_rcl |= prog_data_gs_bin->base.tmu_dirty_rcl;
+ job->tmu_dirty_rcl |= prog_data_gs->base.tmu_dirty_rcl;
+ }
+
+ /* See GFXH-930 workaround below */
+ uint32_t num_elements_to_emit = MAX2(pipeline->va_count, 1);
+
+ uint32_t shader_state_record_length =
+ cl_packet_length(GL_SHADER_STATE_RECORD);
+ if (pipeline->has_gs) {
+ shader_state_record_length +=
+ cl_packet_length(GEOMETRY_SHADER_STATE_RECORD) +
+ cl_packet_length(TESSELLATION_GEOMETRY_COMMON_PARAMS) +
+ 2 * cl_packet_length(TESSELLATION_GEOMETRY_SHADER_PARAMS);
+ }
+
+ uint32_t shader_rec_offset =
+ v3dv_cl_ensure_space(&job->indirect,
+ shader_state_record_length +
+ num_elements_to_emit *
+ cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD),
+ 32);
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ struct v3dv_bo *assembly_bo = pipeline->shared_data->assembly_bo;
+
+ if (pipeline->has_gs) {
+ emit_gs_shader_state_record(job,
+ assembly_bo,
+ gs_bin_variant,
+ cmd_buffer->state.uniforms.gs_bin,
+ gs_variant,
+ cmd_buffer->state.uniforms.gs);
+
+ emit_tes_gs_common_params(job,
+ prog_data_gs->out_prim_type,
+ prog_data_gs->num_invocations);
+
+ emit_tes_gs_shader_params(job,
+ pipeline->vpm_cfg_bin.gs_width,
+ pipeline->vpm_cfg_bin.Gd,
+ pipeline->vpm_cfg_bin.Gv);
+
+ emit_tes_gs_shader_params(job,
+ pipeline->vpm_cfg.gs_width,
+ pipeline->vpm_cfg.Gd,
+ pipeline->vpm_cfg.Gv);
+ }
+
+ struct v3dv_bo *default_attribute_values =
+ pipeline->default_attribute_values != NULL ?
+ pipeline->default_attribute_values :
+ pipeline->device->default_attribute_float;
+
+ cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_RECORD,
+ pipeline->shader_state_record, shader) {
+
+ /* FIXME: we are setting this values here and during the
+ * prepacking. This is because both cl_emit_with_prepacked and v3dvx_pack
+ * asserts for minimum values of these. It would be good to get
+ * v3dvx_pack to assert on the final value if possible
+ */
+ shader.min_coord_shader_input_segments_required_in_play =
+ pipeline->vpm_cfg_bin.As;
+ shader.min_vertex_shader_input_segments_required_in_play =
+ pipeline->vpm_cfg.As;
+
+ shader.coordinate_shader_code_address =
+ v3dv_cl_address(assembly_bo, vs_bin_variant->assembly_offset);
+ shader.vertex_shader_code_address =
+ v3dv_cl_address(assembly_bo, vs_variant->assembly_offset);
+ shader.fragment_shader_code_address =
+ v3dv_cl_address(assembly_bo, fs_variant->assembly_offset);
+
+ shader.coordinate_shader_uniforms_address = cmd_buffer->state.uniforms.vs_bin;
+ shader.vertex_shader_uniforms_address = cmd_buffer->state.uniforms.vs;
+ shader.fragment_shader_uniforms_address = cmd_buffer->state.uniforms.fs;
+
+ shader.address_of_default_attribute_values =
+ v3dv_cl_address(default_attribute_values, 0);
+
+ shader.any_shader_reads_hardware_written_primitive_id =
+ (pipeline->has_gs && prog_data_gs->uses_pid) || prog_data_fs->uses_pid;
+ shader.insert_primitive_id_as_first_varying_to_fragment_shader =
+ !pipeline->has_gs && prog_data_fs->uses_pid;
+ }
+
+ /* Upload vertex element attributes (SHADER_STATE_ATTRIBUTE_RECORD) */
+ bool cs_loaded_any = false;
+ const bool cs_uses_builtins = prog_data_vs_bin->uses_iid ||
+ prog_data_vs_bin->uses_biid ||
+ prog_data_vs_bin->uses_vid;
+ const uint32_t packet_length =
+ cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
+
+ uint32_t emitted_va_count = 0;
+ for (uint32_t i = 0; emitted_va_count < pipeline->va_count; i++) {
+ assert(i < MAX_VERTEX_ATTRIBS);
+
+ if (pipeline->va[i].vk_format == VK_FORMAT_UNDEFINED)
+ continue;
+
+ const uint32_t binding = pipeline->va[i].binding;
+
+ /* We store each vertex attribute in the array using its driver location
+ * as index.
+ */
+ const uint32_t location = i;
+
+ struct v3dv_vertex_binding *c_vb = &cmd_buffer->state.vertex_bindings[binding];
+
+ cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD,
+ &pipeline->vertex_attrs[i * packet_length], attr) {
+
+ assert(c_vb->buffer->mem->bo);
+ attr.address = v3dv_cl_address(c_vb->buffer->mem->bo,
+ c_vb->buffer->mem_offset +
+ pipeline->va[i].offset +
+ c_vb->offset);
+
+ attr.number_of_values_read_by_coordinate_shader =
+ prog_data_vs_bin->vattr_sizes[location];
+ attr.number_of_values_read_by_vertex_shader =
+ prog_data_vs->vattr_sizes[location];
+
+ /* GFXH-930: At least one attribute must be enabled and read by CS
+ * and VS. If we have attributes being consumed by the VS but not
+ * the CS, then set up a dummy load of the last attribute into the
+ * CS's VPM inputs. (Since CS is just dead-code-elimination compared
+ * to VS, we can't have CS loading but not VS).
+ *
+ * GFXH-1602: first attribute must be active if using builtins.
+ */
+ if (prog_data_vs_bin->vattr_sizes[location])
+ cs_loaded_any = true;
+
+ if (i == 0 && cs_uses_builtins && !cs_loaded_any) {
+ attr.number_of_values_read_by_coordinate_shader = 1;
+ cs_loaded_any = true;
+ } else if (i == pipeline->va_count - 1 && !cs_loaded_any) {
+ attr.number_of_values_read_by_coordinate_shader = 1;
+ cs_loaded_any = true;
+ }
+
+ attr.maximum_index = 0xffffff;
+ }
+
+ emitted_va_count++;
+ }
+
+ if (pipeline->va_count == 0) {
+ /* GFXH-930: At least one attribute must be enabled and read
+ * by CS and VS. If we have no attributes being consumed by
+ * the shader, set up a dummy to be loaded into the VPM.
+ */
+ cl_emit(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
+ /* Valid address of data whose value will be unused. */
+ attr.address = v3dv_cl_address(job->indirect.bo, 0);
+
+ attr.type = ATTRIBUTE_FLOAT;
+ attr.stride = 0;
+ attr.vec_size = 1;
+
+ attr.number_of_values_read_by_coordinate_shader = 1;
+ attr.number_of_values_read_by_vertex_shader = 1;
+ }
+ }
+
+ if (cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PIPELINE) {
+ v3dv_cl_ensure_space_with_branch(&job->bcl,
+ sizeof(pipeline->vcm_cache_size));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ cl_emit_prepacked(&job->bcl, &pipeline->vcm_cache_size);
+ }
+
+ v3dv_cl_ensure_space_with_branch(&job->bcl,
+ cl_packet_length(GL_SHADER_STATE));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ if (pipeline->has_gs) {
+ cl_emit(&job->bcl, GL_SHADER_STATE_INCLUDING_GS, state) {
+ state.address = v3dv_cl_address(job->indirect.bo, shader_rec_offset);
+ state.number_of_attribute_arrays = num_elements_to_emit;
+ }
+ } else {
+ cl_emit(&job->bcl, GL_SHADER_STATE, state) {
+ state.address = v3dv_cl_address(job->indirect.bo, shader_rec_offset);
+ state.number_of_attribute_arrays = num_elements_to_emit;
+ }
+ }
+
+ cmd_buffer->state.dirty &= ~(V3DV_CMD_DIRTY_VERTEX_BUFFER |
+ V3DV_CMD_DIRTY_DESCRIPTOR_SETS |
+ V3DV_CMD_DIRTY_PUSH_CONSTANTS);
+ cmd_buffer->state.dirty_descriptor_stages &= ~VK_SHADER_STAGE_ALL_GRAPHICS;
+ cmd_buffer->state.dirty_push_constants_stages &= ~VK_SHADER_STAGE_ALL_GRAPHICS;
+}
+
+/* FIXME: C&P from v3dx_draw. Refactor to common place? */
+static uint32_t
+v3d_hw_prim_type(enum pipe_prim_type prim_type)
+{
+ switch (prim_type) {
+ case PIPE_PRIM_POINTS:
+ case PIPE_PRIM_LINES:
+ case PIPE_PRIM_LINE_LOOP:
+ case PIPE_PRIM_LINE_STRIP:
+ case PIPE_PRIM_TRIANGLES:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_TRIANGLE_FAN:
+ return prim_type;
+
+ case PIPE_PRIM_LINES_ADJACENCY:
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ return 8 + (prim_type - PIPE_PRIM_LINES_ADJACENCY);
+
+ default:
+ unreachable("Unsupported primitive type");
+ }
+}
+
+void
+v3dX(cmd_buffer_emit_draw)(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_draw_info *info)
+{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
+ struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
+ struct v3dv_pipeline *pipeline = state->gfx.pipeline;
+
+ assert(pipeline);
+
+ uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology);
+
+ if (info->first_instance > 0) {
+ v3dv_cl_ensure_space_with_branch(
+ &job->bcl, cl_packet_length(BASE_VERTEX_BASE_INSTANCE));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) {
+ base.base_instance = info->first_instance;
+ base.base_vertex = 0;
+ }
+ }
+
+ if (info->instance_count > 1) {
+ v3dv_cl_ensure_space_with_branch(
+ &job->bcl, cl_packet_length(VERTEX_ARRAY_INSTANCED_PRIMS));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMS, prim) {
+ prim.mode = hw_prim_type;
+ prim.index_of_first_vertex = info->first_vertex;
+ prim.number_of_instances = info->instance_count;
+ prim.instance_length = info->vertex_count;
+ }
+ } else {
+ v3dv_cl_ensure_space_with_branch(
+ &job->bcl, cl_packet_length(VERTEX_ARRAY_PRIMS));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+ cl_emit(&job->bcl, VERTEX_ARRAY_PRIMS, prim) {
+ prim.mode = hw_prim_type;
+ prim.length = info->vertex_count;
+ prim.index_of_first_vertex = info->first_vertex;
+ }
+ }
+}
+
+void
+v3dX(cmd_buffer_emit_index_buffer)(struct v3dv_cmd_buffer *cmd_buffer)
+{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
+ /* We flag all state as dirty when we create a new job so make sure we
+ * have a valid index buffer before attempting to emit state for it.
+ */
+ struct v3dv_buffer *ibuffer =
+ v3dv_buffer_from_handle(cmd_buffer->state.index_buffer.buffer);
+ if (ibuffer) {
+ v3dv_cl_ensure_space_with_branch(
+ &job->bcl, cl_packet_length(INDEX_BUFFER_SETUP));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ const uint32_t offset = cmd_buffer->state.index_buffer.offset;
+ cl_emit(&job->bcl, INDEX_BUFFER_SETUP, ib) {
+ ib.address = v3dv_cl_address(ibuffer->mem->bo,
+ ibuffer->mem_offset + offset);
+ ib.size = ibuffer->mem->bo->size;
+ }
+ }
+
+ cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_INDEX_BUFFER;
+}
+
+void
+v3dX(cmd_buffer_emit_draw_indexed)(struct v3dv_cmd_buffer *cmd_buffer,
+ uint32_t indexCount,
+ uint32_t instanceCount,
+ uint32_t firstIndex,
+ int32_t vertexOffset,
+ uint32_t firstInstance)
+{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
+ const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+ uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology);
+ uint8_t index_type = ffs(cmd_buffer->state.index_buffer.index_size) - 1;
+ uint32_t index_offset = firstIndex * cmd_buffer->state.index_buffer.index_size;
+
+ if (vertexOffset != 0 || firstInstance != 0) {
+ v3dv_cl_ensure_space_with_branch(
+ &job->bcl, cl_packet_length(BASE_VERTEX_BASE_INSTANCE));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) {
+ base.base_instance = firstInstance;
+ base.base_vertex = vertexOffset;
+ }
+ }
+
+ if (instanceCount == 1) {
+ v3dv_cl_ensure_space_with_branch(
+ &job->bcl, cl_packet_length(INDEXED_PRIM_LIST));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ cl_emit(&job->bcl, INDEXED_PRIM_LIST, prim) {
+ prim.index_type = index_type;
+ prim.length = indexCount;
+ prim.index_offset = index_offset;
+ prim.mode = hw_prim_type;
+ prim.enable_primitive_restarts = pipeline->primitive_restart;
+ }
+ } else if (instanceCount > 1) {
+ v3dv_cl_ensure_space_with_branch(
+ &job->bcl, cl_packet_length(INDEXED_INSTANCED_PRIM_LIST));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ cl_emit(&job->bcl, INDEXED_INSTANCED_PRIM_LIST, prim) {
+ prim.index_type = index_type;
+ prim.index_offset = index_offset;
+ prim.mode = hw_prim_type;
+ prim.enable_primitive_restarts = pipeline->primitive_restart;
+ prim.number_of_instances = instanceCount;
+ prim.instance_length = indexCount;
+ }
+ }
+}
+
+void
+v3dX(cmd_buffer_emit_draw_indirect)(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_buffer *buffer,
+ VkDeviceSize offset,
+ uint32_t drawCount,
+ uint32_t stride)
+{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
+ const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+ uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology);
+
+ v3dv_cl_ensure_space_with_branch(
+ &job->bcl, cl_packet_length(INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ cl_emit(&job->bcl, INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS, prim) {
+ prim.mode = hw_prim_type;
+ prim.number_of_draw_indirect_array_records = drawCount;
+ prim.stride_in_multiples_of_4_bytes = stride >> 2;
+ prim.address = v3dv_cl_address(buffer->mem->bo,
+ buffer->mem_offset + offset);
+ }
+}
+
+void
+v3dX(cmd_buffer_emit_indexed_indirect)(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_buffer *buffer,
+ VkDeviceSize offset,
+ uint32_t drawCount,
+ uint32_t stride)
+{
+ struct v3dv_job *job = cmd_buffer->state.job;
+ assert(job);
+
+ const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
+ uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology);
+ uint8_t index_type = ffs(cmd_buffer->state.index_buffer.index_size) - 1;
+
+ v3dv_cl_ensure_space_with_branch(
+ &job->bcl, cl_packet_length(INDIRECT_INDEXED_INSTANCED_PRIM_LIST));
+ v3dv_return_if_oom(cmd_buffer, NULL);
+
+ cl_emit(&job->bcl, INDIRECT_INDEXED_INSTANCED_PRIM_LIST, prim) {
+ prim.index_type = index_type;
+ prim.mode = hw_prim_type;
+ prim.enable_primitive_restarts = pipeline->primitive_restart;
+ prim.number_of_draw_indirect_indexed_records = drawCount;
+ prim.stride_in_multiples_of_4_bytes = stride >> 2;
+ prim.address = v3dv_cl_address(buffer->mem->bo,
+ buffer->mem_offset + offset);
+ }
+}
+
+void
+v3dX(cmd_buffer_render_pass_setup_render_target)(struct v3dv_cmd_buffer *cmd_buffer,
+ int rt,
+ uint32_t *rt_bpp,
+ uint32_t *rt_type,
+ uint32_t *rt_clamp)
+{
+ const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
+
+ assert(state->subpass_idx < state->pass->subpass_count);
+ const struct v3dv_subpass *subpass =
+ &state->pass->subpasses[state->subpass_idx];
+
+ if (rt >= subpass->color_count)
+ return;
+
+ struct v3dv_subpass_attachment *attachment = &subpass->color_attachments[rt];
+ const uint32_t attachment_idx = attachment->attachment;
+ if (attachment_idx == VK_ATTACHMENT_UNUSED)
+ return;
+
+ const struct v3dv_framebuffer *framebuffer = state->framebuffer;
+ assert(attachment_idx < framebuffer->attachment_count);
+ struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx];
+ assert(iview->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT);
+
+ *rt_bpp = iview->internal_bpp;
+ *rt_type = iview->internal_type;
+ if (vk_format_is_int(iview->vk.format))
+ *rt_clamp = V3D_RENDER_TARGET_CLAMP_INT;
+ else if (vk_format_is_srgb(iview->vk.format))
+ *rt_clamp = V3D_RENDER_TARGET_CLAMP_NORM;
+ else
+ *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
+}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_descriptor_set.c b/lib/mesa/src/broadcom/vulkan/v3dvx_descriptor_set.c
new file mode 100644
index 000000000..2c28ce46a
--- /dev/null
+++ b/lib/mesa/src/broadcom/vulkan/v3dvx_descriptor_set.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright © 2021 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "v3dv_private.h"
+#include "broadcom/common/v3d_macros.h"
+#include "broadcom/cle/v3dx_pack.h"
+#include "broadcom/compiler/v3d_compiler.h"
+
+/*
+ * Returns how much space a given descriptor type needs on a bo (GPU
+ * memory).
+ */
+uint32_t
+v3dX(descriptor_bo_size)(VkDescriptorType type)
+{
+ switch(type) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ return cl_aligned_packet_length(SAMPLER_STATE, 32);
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ return cl_aligned_packet_length(SAMPLER_STATE, 32) +
+ cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32);
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ return cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32);
+ default:
+ return 0;
+ }
+}
+
+/* To compute the max_bo_size we want to iterate through the descriptor
+ * types. Unfourtunately we can't just use the descriptor type enum values, as
+ * the values are not defined consecutively (so extensions could add new
+ * descriptor types), and VK_DESCRIPTOR_TYPE_MAX_ENUM is also a really big
+ * number.
+ */
+static const uint32_t supported_descriptor_types[] = {
+ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC,
+ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC,
+ VK_DESCRIPTOR_TYPE_SAMPLER,
+ VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT,
+ VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+ VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+};
+
+uint32_t
+v3dX(max_descriptor_bo_size)(void)
+{
+ static uint32_t max = 0;
+
+ if (max == 0) {
+ for (uint32_t i = 0; i < ARRAY_SIZE(supported_descriptor_types); i++)
+ max = MAX2(max, v3dX(descriptor_bo_size)(supported_descriptor_types[i]));
+ }
+ assert(max != 0);
+
+ return max;
+}
+
+
+uint32_t
+v3dX(combined_image_sampler_texture_state_offset)(void)
+{
+ return 0;
+}
+
+uint32_t
+v3dX(combined_image_sampler_sampler_state_offset)(void)
+{
+ return cl_aligned_packet_length(TEXTURE_SHADER_STATE, 32);
+}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_device.c b/lib/mesa/src/broadcom/vulkan/v3dvx_device.c
new file mode 100644
index 000000000..a48738aec
--- /dev/null
+++ b/lib/mesa/src/broadcom/vulkan/v3dvx_device.c
@@ -0,0 +1,368 @@
+/*
+ * Copyright © 2021 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "v3dv_private.h"
+
+#include "broadcom/common/v3d_macros.h"
+#include "broadcom/cle/v3dx_pack.h"
+#include "broadcom/compiler/v3d_compiler.h"
+#include "vk_format_info.h"
+#include "util/u_pack_color.h"
+#include "util/half_float.h"
+
+static const enum V3DX(Wrap_Mode) vk_to_v3d_wrap_mode[] = {
+ [VK_SAMPLER_ADDRESS_MODE_REPEAT] = V3D_WRAP_MODE_REPEAT,
+ [VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = V3D_WRAP_MODE_MIRROR,
+ [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = V3D_WRAP_MODE_CLAMP,
+ [VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = V3D_WRAP_MODE_MIRROR_ONCE,
+ [VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = V3D_WRAP_MODE_BORDER,
+};
+
+static const enum V3DX(Compare_Function)
+vk_to_v3d_compare_func[] = {
+ [VK_COMPARE_OP_NEVER] = V3D_COMPARE_FUNC_NEVER,
+ [VK_COMPARE_OP_LESS] = V3D_COMPARE_FUNC_LESS,
+ [VK_COMPARE_OP_EQUAL] = V3D_COMPARE_FUNC_EQUAL,
+ [VK_COMPARE_OP_LESS_OR_EQUAL] = V3D_COMPARE_FUNC_LEQUAL,
+ [VK_COMPARE_OP_GREATER] = V3D_COMPARE_FUNC_GREATER,
+ [VK_COMPARE_OP_NOT_EQUAL] = V3D_COMPARE_FUNC_NOTEQUAL,
+ [VK_COMPARE_OP_GREATER_OR_EQUAL] = V3D_COMPARE_FUNC_GEQUAL,
+ [VK_COMPARE_OP_ALWAYS] = V3D_COMPARE_FUNC_ALWAYS,
+};
+
+
+static union pipe_color_union encode_border_color(
+ const VkSamplerCustomBorderColorCreateInfoEXT *bc_info)
+{
+ const struct util_format_description *desc =
+ vk_format_description(bc_info->format);
+
+ const struct v3dv_format *format = v3dX(get_format)(bc_info->format);
+
+ union pipe_color_union border;
+ for (int i = 0; i < 4; i++) {
+ if (format->swizzle[i] <= 3)
+ border.ui[i] = bc_info->customBorderColor.uint32[format->swizzle[i]];
+ else
+ border.ui[i] = 0;
+ }
+
+ /* handle clamping */
+ if (vk_format_has_depth(bc_info->format) &&
+ vk_format_has_stencil(bc_info->format)) {
+ border.f[0] = CLAMP(border.f[0], 0, 1);
+ border.ui[1] = CLAMP(border.ui[1], 0, 0xff);
+ } else if (vk_format_is_unorm(bc_info->format)) {
+ for (int i = 0; i < 4; i++)
+ border.f[i] = CLAMP(border.f[i], 0, 1);
+ } else if (vk_format_is_snorm(bc_info->format)) {
+ for (int i = 0; i < 4; i++)
+ border.f[i] = CLAMP(border.f[i], -1, 1);
+ } else if (vk_format_is_uint(bc_info->format) &&
+ desc->channel[0].size < 32) {
+ for (int i = 0; i < 4; i++)
+ border.ui[i] = CLAMP(border.ui[i], 0, (1 << desc->channel[i].size));
+ } else if (vk_format_is_sint(bc_info->format) &&
+ desc->channel[0].size < 32) {
+ for (int i = 0; i < 4; i++)
+ border.i[i] = CLAMP(border.i[i],
+ -(1 << (desc->channel[i].size - 1)),
+ (1 << (desc->channel[i].size - 1)) - 1);
+ }
+
+ /* convert from float to expected format */
+ if (vk_format_is_srgb(bc_info->format) ||
+ vk_format_is_compressed(bc_info->format)) {
+ for (int i = 0; i < 4; i++)
+ border.ui[i] = _mesa_float_to_half(border.f[i]);
+ } else if (vk_format_is_unorm(bc_info->format)) {
+ for (int i = 0; i < 4; i++) {
+ switch (desc->channel[i].size) {
+ case 8:
+ case 16:
+ /* expect u16 for non depth values */
+ if (!vk_format_has_depth(bc_info->format))
+ border.ui[i] = (uint32_t) (border.f[i] * (float) 0xffff);
+ break;
+ case 24:
+ case 32:
+ /* uses full f32; no conversion needed */
+ break;
+ default:
+ border.ui[i] = _mesa_float_to_half(border.f[i]);
+ break;
+ }
+ }
+ } else if (vk_format_is_snorm(bc_info->format)) {
+ for (int i = 0; i < 4; i++) {
+ switch (desc->channel[i].size) {
+ case 8:
+ border.ui[i] = (int32_t) (border.f[i] * (float) 0x3fff);
+ break;
+ case 16:
+ border.i[i] = (int32_t) (border.f[i] * (float) 0x7fff);
+ break;
+ case 24:
+ case 32:
+ /* uses full f32; no conversion needed */
+ break;
+ default:
+ border.ui[i] = _mesa_float_to_half(border.f[i]);
+ break;
+ }
+ }
+ } else if (vk_format_is_float(bc_info->format)) {
+ for (int i = 0; i < 4; i++) {
+ switch(desc->channel[i].size) {
+ case 16:
+ border.ui[i] = _mesa_float_to_half(border.f[i]);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ return border;
+}
+
+void
+v3dX(pack_sampler_state)(struct v3dv_sampler *sampler,
+ const VkSamplerCreateInfo *pCreateInfo,
+ const VkSamplerCustomBorderColorCreateInfoEXT *bc_info)
+{
+ enum V3DX(Border_Color_Mode) border_color_mode;
+
+ switch (pCreateInfo->borderColor) {
+ case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
+ case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
+ border_color_mode = V3D_BORDER_COLOR_0000;
+ break;
+ case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
+ case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
+ border_color_mode = V3D_BORDER_COLOR_0001;
+ break;
+ case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
+ case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
+ border_color_mode = V3D_BORDER_COLOR_1111;
+ break;
+ case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
+ case VK_BORDER_COLOR_INT_CUSTOM_EXT:
+ border_color_mode = V3D_BORDER_COLOR_FOLLOWS;
+ break;
+ default:
+ unreachable("Unknown border color");
+ break;
+ }
+
+ /* For some texture formats, when clamping to transparent black border the
+ * CTS expects alpha to be set to 1 instead of 0, but the border color mode
+ * will take priority over the texture state swizzle, so the only way to
+ * fix that is to apply a swizzle in the shader. Here we keep track of
+ * whether we are activating that mode and we will decide if we need to
+ * activate the texture swizzle lowering in the shader key at compile time
+ * depending on the actual texture format.
+ */
+ if ((pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
+ pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
+ pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER) &&
+ border_color_mode == V3D_BORDER_COLOR_0000) {
+ sampler->clamp_to_transparent_black_border = true;
+ }
+
+ v3dvx_pack(sampler->sampler_state, SAMPLER_STATE, s) {
+ if (pCreateInfo->anisotropyEnable) {
+ s.anisotropy_enable = true;
+ if (pCreateInfo->maxAnisotropy > 8)
+ s.maximum_anisotropy = 3;
+ else if (pCreateInfo->maxAnisotropy > 4)
+ s.maximum_anisotropy = 2;
+ else if (pCreateInfo->maxAnisotropy > 2)
+ s.maximum_anisotropy = 1;
+ }
+
+ s.border_color_mode = border_color_mode;
+
+ if (s.border_color_mode == V3D_BORDER_COLOR_FOLLOWS) {
+ union pipe_color_union border = encode_border_color(bc_info);
+
+ s.border_color_word_0 = border.ui[0];
+ s.border_color_word_1 = border.ui[1];
+ s.border_color_word_2 = border.ui[2];
+ s.border_color_word_3 = border.ui[3];
+ }
+
+ s.wrap_i_border = false; /* Also hardcoded on v3d */
+ s.wrap_s = vk_to_v3d_wrap_mode[pCreateInfo->addressModeU];
+ s.wrap_t = vk_to_v3d_wrap_mode[pCreateInfo->addressModeV];
+ s.wrap_r = vk_to_v3d_wrap_mode[pCreateInfo->addressModeW];
+ s.fixed_bias = pCreateInfo->mipLodBias;
+ s.max_level_of_detail = MIN2(MAX2(0, pCreateInfo->maxLod), 15);
+ s.min_level_of_detail = MIN2(MAX2(0, pCreateInfo->minLod), 15);
+ s.srgb_disable = 0; /* Not even set by v3d */
+ s.depth_compare_function =
+ vk_to_v3d_compare_func[pCreateInfo->compareEnable ?
+ pCreateInfo->compareOp : VK_COMPARE_OP_NEVER];
+ s.mip_filter_nearest = pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_NEAREST;
+ s.min_filter_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST;
+ s.mag_filter_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST;
+ }
+}
+
+/**
+ * This computes the maximum bpp used by any of the render targets used by
+ * a particular subpass and checks if any of those render targets are
+ * multisampled. If we don't have a subpass (when we are not inside a
+ * render pass), then we assume that all framebuffer attachments are used.
+ */
+void
+v3dX(framebuffer_compute_internal_bpp_msaa)(
+ const struct v3dv_framebuffer *framebuffer,
+ const struct v3dv_subpass *subpass,
+ uint8_t *max_bpp,
+ bool *msaa)
+{
+ STATIC_ASSERT(V3D_INTERNAL_BPP_32 == 0);
+ *max_bpp = V3D_INTERNAL_BPP_32;
+ *msaa = false;
+
+ if (subpass) {
+ for (uint32_t i = 0; i < subpass->color_count; i++) {
+ uint32_t att_idx = subpass->color_attachments[i].attachment;
+ if (att_idx == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ const struct v3dv_image_view *att = framebuffer->attachments[att_idx];
+ assert(att);
+
+ if (att->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT)
+ *max_bpp = MAX2(*max_bpp, att->internal_bpp);
+
+ if (att->vk.image->samples > VK_SAMPLE_COUNT_1_BIT)
+ *msaa = true;
+ }
+
+ if (!*msaa && subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) {
+ const struct v3dv_image_view *att =
+ framebuffer->attachments[subpass->ds_attachment.attachment];
+ assert(att);
+
+ if (att->vk.image->samples > VK_SAMPLE_COUNT_1_BIT)
+ *msaa = true;
+ }
+
+ return;
+ }
+
+ assert(framebuffer->attachment_count <= 4);
+ for (uint32_t i = 0; i < framebuffer->attachment_count; i++) {
+ const struct v3dv_image_view *att = framebuffer->attachments[i];
+ assert(att);
+
+ if (att->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT)
+ *max_bpp = MAX2(*max_bpp, att->internal_bpp);
+
+ if (att->vk.image->samples > VK_SAMPLE_COUNT_1_BIT)
+ *msaa = true;
+ }
+
+ return;
+}
+
+uint32_t
+v3dX(zs_buffer_from_aspect_bits)(VkImageAspectFlags aspects)
+{
+ const VkImageAspectFlags zs_aspects =
+ VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
+ const VkImageAspectFlags filtered_aspects = aspects & zs_aspects;
+
+ if (filtered_aspects == zs_aspects)
+ return ZSTENCIL;
+ else if (filtered_aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+ return Z;
+ else if (filtered_aspects == VK_IMAGE_ASPECT_STENCIL_BIT)
+ return STENCIL;
+ else
+ return NONE;
+}
+
+void
+v3dX(get_hw_clear_color)(const VkClearColorValue *color,
+ uint32_t internal_type,
+ uint32_t internal_size,
+ uint32_t *hw_color)
+{
+ union util_color uc;
+ switch (internal_type) {
+ case V3D_INTERNAL_TYPE_8:
+ util_pack_color(color->float32, PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
+ memcpy(hw_color, uc.ui, internal_size);
+ break;
+ case V3D_INTERNAL_TYPE_8I:
+ case V3D_INTERNAL_TYPE_8UI:
+ hw_color[0] = ((color->uint32[0] & 0xff) |
+ (color->uint32[1] & 0xff) << 8 |
+ (color->uint32[2] & 0xff) << 16 |
+ (color->uint32[3] & 0xff) << 24);
+ break;
+ case V3D_INTERNAL_TYPE_16F:
+ util_pack_color(color->float32, PIPE_FORMAT_R16G16B16A16_FLOAT, &uc);
+ memcpy(hw_color, uc.ui, internal_size);
+ break;
+ case V3D_INTERNAL_TYPE_16I:
+ case V3D_INTERNAL_TYPE_16UI:
+ hw_color[0] = ((color->uint32[0] & 0xffff) | color->uint32[1] << 16);
+ hw_color[1] = ((color->uint32[2] & 0xffff) | color->uint32[3] << 16);
+ break;
+ case V3D_INTERNAL_TYPE_32F:
+ case V3D_INTERNAL_TYPE_32I:
+ case V3D_INTERNAL_TYPE_32UI:
+ memcpy(hw_color, color->uint32, internal_size);
+ break;
+ }
+}
+
+#ifdef DEBUG
+void
+v3dX(device_check_prepacked_sizes)(void)
+{
+ STATIC_ASSERT(V3DV_SAMPLER_STATE_LENGTH >=
+ cl_packet_length(SAMPLER_STATE));
+ STATIC_ASSERT(V3DV_TEXTURE_SHADER_STATE_LENGTH >=
+ cl_packet_length(TEXTURE_SHADER_STATE));
+ STATIC_ASSERT(V3DV_SAMPLER_STATE_LENGTH >=
+ cl_packet_length(SAMPLER_STATE));
+ STATIC_ASSERT(V3DV_BLEND_CFG_LENGTH>=
+ cl_packet_length(BLEND_CFG));
+ STATIC_ASSERT(V3DV_CFG_BITS_LENGTH>=
+ cl_packet_length(CFG_BITS));
+ STATIC_ASSERT(V3DV_GL_SHADER_STATE_RECORD_LENGTH >=
+ cl_packet_length(GL_SHADER_STATE_RECORD));
+ STATIC_ASSERT(V3DV_VCM_CACHE_SIZE_LENGTH>=
+ cl_packet_length(VCM_CACHE_SIZE));
+ STATIC_ASSERT(V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH >=
+ cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD));
+ STATIC_ASSERT(V3DV_STENCIL_CFG_LENGTH >=
+ cl_packet_length(STENCIL_CFG));
+}
+#endif
diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_formats.c b/lib/mesa/src/broadcom/vulkan/v3dvx_formats.c
new file mode 100644
index 000000000..4f77dd008
--- /dev/null
+++ b/lib/mesa/src/broadcom/vulkan/v3dvx_formats.c
@@ -0,0 +1,465 @@
+/*
+ * Copyright © 2021 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "v3dv_private.h"
+#include "broadcom/common/v3d_macros.h"
+#include "broadcom/cle/v3dx_pack.h"
+
+#include "util/format/u_format.h"
+
+#define SWIZ(x,y,z,w) { \
+ PIPE_SWIZZLE_##x, \
+ PIPE_SWIZZLE_##y, \
+ PIPE_SWIZZLE_##z, \
+ PIPE_SWIZZLE_##w \
+}
+
+#define FORMAT(vk, rt, tex, swiz, return_size, supports_filtering) \
+ [VK_FORMAT_##vk] = { \
+ true, \
+ V3D_OUTPUT_IMAGE_FORMAT_##rt, \
+ TEXTURE_DATA_FORMAT_##tex, \
+ swiz, \
+ return_size, \
+ supports_filtering, \
+ }
+
+#define SWIZ_X001 SWIZ(X, 0, 0, 1)
+#define SWIZ_XY01 SWIZ(X, Y, 0, 1)
+#define SWIZ_XYZ1 SWIZ(X, Y, Z, 1)
+#define SWIZ_XYZW SWIZ(X, Y, Z, W)
+#define SWIZ_YZWX SWIZ(Y, Z, W, X)
+#define SWIZ_YZW1 SWIZ(Y, Z, W, 1)
+#define SWIZ_ZYXW SWIZ(Z, Y, X, W)
+#define SWIZ_ZYX1 SWIZ(Z, Y, X, 1)
+#define SWIZ_XXXY SWIZ(X, X, X, Y)
+#define SWIZ_XXX1 SWIZ(X, X, X, 1)
+#define SWIZ_XXXX SWIZ(X, X, X, X)
+#define SWIZ_000X SWIZ(0, 0, 0, X)
+#define SWIZ_WXYZ SWIZ(W, X, Y, Z)
+
+/* FIXME: expand format table to describe whether the format is supported
+ * for buffer surfaces (texel buffers, vertex buffers, etc).
+ */
+static const struct v3dv_format format_table[] = {
+ /* Color, 4 channels */
+ FORMAT(B8G8R8A8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_ZYXW, 16, true),
+ FORMAT(B8G8R8A8_UNORM, RGBA8, RGBA8, SWIZ_ZYXW, 16, true),
+
+ FORMAT(R8G8B8A8_SRGB, SRGB8_ALPHA8, RGBA8, SWIZ_XYZW, 16, true),
+ FORMAT(R8G8B8A8_UNORM, RGBA8, RGBA8, SWIZ_XYZW, 16, true),
+ FORMAT(R8G8B8A8_SNORM, NO, RGBA8_SNORM, SWIZ_XYZW, 16, true),
+ FORMAT(R8G8B8A8_SINT, RGBA8I, RGBA8I, SWIZ_XYZW, 16, false),
+ FORMAT(R8G8B8A8_UINT, RGBA8UI, RGBA8UI, SWIZ_XYZW, 16, false),
+
+ FORMAT(R16G16B16A16_SFLOAT, RGBA16F, RGBA16F, SWIZ_XYZW, 16, true),
+ FORMAT(R16G16B16A16_UNORM, NO, RGBA16, SWIZ_XYZW, 32, true),
+ FORMAT(R16G16B16A16_SNORM, NO, RGBA16_SNORM, SWIZ_XYZW, 32, true),
+ FORMAT(R16G16B16A16_SINT, RGBA16I, RGBA16I, SWIZ_XYZW, 16, false),
+ FORMAT(R16G16B16A16_UINT, RGBA16UI, RGBA16UI, SWIZ_XYZW, 16, false),
+
+ FORMAT(R32G32B32A32_SFLOAT, RGBA32F, RGBA32F, SWIZ_XYZW, 32, false),
+ FORMAT(R32G32B32A32_SINT, RGBA32I, RGBA32I, SWIZ_XYZW, 32, false),
+ FORMAT(R32G32B32A32_UINT, RGBA32UI, RGBA32UI, SWIZ_XYZW, 32, false),
+
+ /* Color, 3 channels */
+ FORMAT(R32G32B32_SFLOAT, NO, NO, SWIZ_XYZ1, 0, false),
+ FORMAT(R32G32B32_UINT, NO, NO, SWIZ_XYZ1, 0, false),
+ FORMAT(R32G32B32_SINT, NO, NO, SWIZ_XYZ1, 0, false),
+
+ /* Color, 2 channels */
+ FORMAT(R8G8_UNORM, RG8, RG8, SWIZ_XY01, 16, true),
+ FORMAT(R8G8_SNORM, NO, RG8_SNORM, SWIZ_XY01, 16, true),
+ FORMAT(R8G8_SINT, RG8I, RG8I, SWIZ_XY01, 16, false),
+ FORMAT(R8G8_UINT, RG8UI, RG8UI, SWIZ_XY01, 16, false),
+
+ FORMAT(R16G16_UNORM, NO, RG16, SWIZ_XY01, 32, true),
+ FORMAT(R16G16_SNORM, NO, RG16_SNORM, SWIZ_XY01, 32, true),
+ FORMAT(R16G16_SFLOAT, RG16F, RG16F, SWIZ_XY01, 16, true),
+ FORMAT(R16G16_SINT, RG16I, RG16I, SWIZ_XY01, 16, false),
+ FORMAT(R16G16_UINT, RG16UI, RG16UI, SWIZ_XY01, 16, false),
+
+ FORMAT(R32G32_SFLOAT, RG32F, RG32F, SWIZ_XY01, 32, false),
+ FORMAT(R32G32_SINT, RG32I, RG32I, SWIZ_XY01, 32, false),
+ FORMAT(R32G32_UINT, RG32UI, RG32UI, SWIZ_XY01, 32, false),
+
+ /* Color, 1 channel */
+ FORMAT(R8_UNORM, R8, R8, SWIZ_X001, 16, true),
+ FORMAT(R8_SNORM, NO, R8_SNORM, SWIZ_X001, 16, true),
+ FORMAT(R8_SINT, R8I, R8I, SWIZ_X001, 16, false),
+ FORMAT(R8_UINT, R8UI, R8UI, SWIZ_X001, 16, false),
+
+ FORMAT(R16_UNORM, NO, R16, SWIZ_X001, 32, true),
+ FORMAT(R16_SNORM, NO, R16_SNORM, SWIZ_X001, 32, true),
+ FORMAT(R16_SFLOAT, R16F, R16F, SWIZ_X001, 16, true),
+ FORMAT(R16_SINT, R16I, R16I, SWIZ_X001, 16, false),
+ FORMAT(R16_UINT, R16UI, R16UI, SWIZ_X001, 16, false),
+
+ FORMAT(R32_SFLOAT, R32F, R32F, SWIZ_X001, 32, false),
+ FORMAT(R32_SINT, R32I, R32I, SWIZ_X001, 32, false),
+ FORMAT(R32_UINT, R32UI, R32UI, SWIZ_X001, 32, false),
+
+ /* Color, packed */
+ FORMAT(R4G4B4A4_UNORM_PACK16, ABGR4444, RGBA4, SWIZ_XYZW, 16, true),
+ FORMAT(B4G4R4A4_UNORM_PACK16, ABGR4444, RGBA4, SWIZ_ZYXW, 16, true), /* Swap RB */
+ FORMAT(R5G6B5_UNORM_PACK16, BGR565, RGB565, SWIZ_XYZ1, 16, true),
+ FORMAT(R5G5B5A1_UNORM_PACK16, ABGR1555, RGB5_A1, SWIZ_XYZW, 16, true),
+ FORMAT(A1R5G5B5_UNORM_PACK16, RGBA5551, A1_RGB5, SWIZ_ZYXW, 16, true), /* Swap RB */
+ FORMAT(A8B8G8R8_UNORM_PACK32, RGBA8, RGBA8, SWIZ_XYZW, 16, true), /* RGBA8 UNORM */
+ FORMAT(A8B8G8R8_SNORM_PACK32, NO, RGBA8_SNORM, SWIZ_XYZW, 16, true), /* RGBA8 SNORM */
+ FORMAT(A8B8G8R8_UINT_PACK32, RGBA8UI, RGBA8UI, SWIZ_XYZW, 16, false), /* RGBA8 UINT */
+ FORMAT(A8B8G8R8_SINT_PACK32, RGBA8I, RGBA8I, SWIZ_XYZW, 16, false), /* RGBA8 SINT */
+ FORMAT(A8B8G8R8_SRGB_PACK32, SRGB8_ALPHA8, RGBA8, SWIZ_XYZW, 16, true), /* RGBA8 sRGB */
+ FORMAT(A2B10G10R10_UNORM_PACK32,RGB10_A2, RGB10_A2, SWIZ_XYZW, 16, true),
+ FORMAT(A2B10G10R10_UINT_PACK32, RGB10_A2UI, RGB10_A2UI, SWIZ_XYZW, 16, false),
+ FORMAT(E5B9G9R9_UFLOAT_PACK32, NO, RGB9_E5, SWIZ_XYZ1, 16, true),
+ FORMAT(B10G11R11_UFLOAT_PACK32, R11F_G11F_B10F,R11F_G11F_B10F, SWIZ_XYZ1, 16, true),
+
+ /* Depth */
+ FORMAT(D16_UNORM, D16, DEPTH_COMP16, SWIZ_X001, 32, false),
+ FORMAT(D32_SFLOAT, D32F, DEPTH_COMP32F, SWIZ_X001, 32, false),
+ FORMAT(X8_D24_UNORM_PACK32, D24S8, DEPTH24_X8, SWIZ_X001, 32, false),
+
+ /* Depth + Stencil */
+ FORMAT(D24_UNORM_S8_UINT, D24S8, DEPTH24_X8, SWIZ_X001, 32, false),
+
+ /* Compressed: ETC2 / EAC */
+ FORMAT(ETC2_R8G8B8_UNORM_BLOCK, NO, RGB8_ETC2, SWIZ_XYZ1, 16, true),
+ FORMAT(ETC2_R8G8B8_SRGB_BLOCK, NO, RGB8_ETC2, SWIZ_XYZ1, 16, true),
+ FORMAT(ETC2_R8G8B8A1_UNORM_BLOCK, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, true),
+ FORMAT(ETC2_R8G8B8A1_SRGB_BLOCK, NO, RGB8_PUNCHTHROUGH_ALPHA1, SWIZ_XYZW, 16, true),
+ FORMAT(ETC2_R8G8B8A8_UNORM_BLOCK, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, true),
+ FORMAT(ETC2_R8G8B8A8_SRGB_BLOCK, NO, RGBA8_ETC2_EAC, SWIZ_XYZW, 16, true),
+ FORMAT(EAC_R11_UNORM_BLOCK, NO, R11_EAC, SWIZ_X001, 16, true),
+ FORMAT(EAC_R11_SNORM_BLOCK, NO, SIGNED_R11_EAC, SWIZ_X001, 16, true),
+ FORMAT(EAC_R11G11_UNORM_BLOCK, NO, RG11_EAC, SWIZ_XY01, 16, true),
+ FORMAT(EAC_R11G11_SNORM_BLOCK, NO, SIGNED_RG11_EAC, SWIZ_XY01, 16, true),
+
+ /* Compressed: BC1-3 */
+ FORMAT(BC1_RGB_UNORM_BLOCK, NO, BC1, SWIZ_XYZ1, 16, true),
+ FORMAT(BC1_RGB_SRGB_BLOCK, NO, BC1, SWIZ_XYZ1, 16, true),
+ FORMAT(BC1_RGBA_UNORM_BLOCK, NO, BC1, SWIZ_XYZW, 16, true),
+ FORMAT(BC1_RGBA_SRGB_BLOCK, NO, BC1, SWIZ_XYZW, 16, true),
+ FORMAT(BC2_UNORM_BLOCK, NO, BC2, SWIZ_XYZW, 16, true),
+ FORMAT(BC2_SRGB_BLOCK, NO, BC2, SWIZ_XYZW, 16, true),
+ FORMAT(BC3_UNORM_BLOCK, NO, BC3, SWIZ_XYZW, 16, true),
+ FORMAT(BC3_SRGB_BLOCK, NO, BC3, SWIZ_XYZW, 16, true),
+
+ /* Compressed: ASTC */
+ FORMAT(ASTC_4x4_UNORM_BLOCK, NO, ASTC_4X4, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_4x4_SRGB_BLOCK, NO, ASTC_4X4, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_5x4_UNORM_BLOCK, NO, ASTC_5X4, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_5x4_SRGB_BLOCK, NO, ASTC_5X4, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_5x5_UNORM_BLOCK, NO, ASTC_5X5, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_5x5_SRGB_BLOCK, NO, ASTC_5X5, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_6x5_UNORM_BLOCK, NO, ASTC_6X5, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_6x5_SRGB_BLOCK, NO, ASTC_6X5, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_6x6_UNORM_BLOCK, NO, ASTC_6X6, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_6x6_SRGB_BLOCK, NO, ASTC_6X6, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_8x5_UNORM_BLOCK, NO, ASTC_8X5, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_8x5_SRGB_BLOCK, NO, ASTC_8X5, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_8x6_UNORM_BLOCK, NO, ASTC_8X6, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_8x6_SRGB_BLOCK, NO, ASTC_8X6, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_8x8_UNORM_BLOCK, NO, ASTC_8X8, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_8x8_SRGB_BLOCK, NO, ASTC_8X8, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_10x5_UNORM_BLOCK, NO, ASTC_10X5, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_10x5_SRGB_BLOCK, NO, ASTC_10X5, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_10x6_UNORM_BLOCK, NO, ASTC_10X6, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_10x6_SRGB_BLOCK, NO, ASTC_10X6, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_10x8_UNORM_BLOCK, NO, ASTC_10X8, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_10x8_SRGB_BLOCK, NO, ASTC_10X8, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_10x10_UNORM_BLOCK, NO, ASTC_10X10, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_10x10_SRGB_BLOCK, NO, ASTC_10X10, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_12x10_UNORM_BLOCK, NO, ASTC_12X10, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_12x10_SRGB_BLOCK, NO, ASTC_12X10, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_12x12_UNORM_BLOCK, NO, ASTC_12X12, SWIZ_XYZW, 16, true),
+ FORMAT(ASTC_12x12_SRGB_BLOCK, NO, ASTC_12X12, SWIZ_XYZW, 16, true),
+};
+
+const struct v3dv_format *
+v3dX(get_format)(VkFormat format)
+{
+ if (format < ARRAY_SIZE(format_table) && format_table[format].supported)
+ return &format_table[format];
+ else
+ return NULL;
+}
+
+void
+v3dX(get_internal_type_bpp_for_output_format)(uint32_t format,
+ uint32_t *type,
+ uint32_t *bpp)
+{
+ switch (format) {
+ case V3D_OUTPUT_IMAGE_FORMAT_RGBA8:
+ case V3D_OUTPUT_IMAGE_FORMAT_RGB8:
+ case V3D_OUTPUT_IMAGE_FORMAT_RG8:
+ case V3D_OUTPUT_IMAGE_FORMAT_R8:
+ case V3D_OUTPUT_IMAGE_FORMAT_ABGR4444:
+ case V3D_OUTPUT_IMAGE_FORMAT_BGR565:
+ case V3D_OUTPUT_IMAGE_FORMAT_ABGR1555:
+ *type = V3D_INTERNAL_TYPE_8;
+ *bpp = V3D_INTERNAL_BPP_32;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RGBA8I:
+ case V3D_OUTPUT_IMAGE_FORMAT_RG8I:
+ case V3D_OUTPUT_IMAGE_FORMAT_R8I:
+ *type = V3D_INTERNAL_TYPE_8I;
+ *bpp = V3D_INTERNAL_BPP_32;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI:
+ case V3D_OUTPUT_IMAGE_FORMAT_RG8UI:
+ case V3D_OUTPUT_IMAGE_FORMAT_R8UI:
+ *type = V3D_INTERNAL_TYPE_8UI;
+ *bpp = V3D_INTERNAL_BPP_32;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_SRGB8_ALPHA8:
+ case V3D_OUTPUT_IMAGE_FORMAT_SRGB:
+ case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2:
+ case V3D_OUTPUT_IMAGE_FORMAT_R11F_G11F_B10F:
+ case V3D_OUTPUT_IMAGE_FORMAT_RGBA16F:
+ /* Note that sRGB RTs are stored in the tile buffer at 16F,
+ * and the conversion to sRGB happens at tilebuffer load/store.
+ */
+ *type = V3D_INTERNAL_TYPE_16F;
+ *bpp = V3D_INTERNAL_BPP_64;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RG16F:
+ case V3D_OUTPUT_IMAGE_FORMAT_R16F:
+ *type = V3D_INTERNAL_TYPE_16F;
+ /* Use 64bpp to make sure the TLB doesn't throw away the alpha
+ * channel before alpha test happens.
+ */
+ *bpp = V3D_INTERNAL_BPP_64;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RGBA16I:
+ *type = V3D_INTERNAL_TYPE_16I;
+ *bpp = V3D_INTERNAL_BPP_64;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RG16I:
+ case V3D_OUTPUT_IMAGE_FORMAT_R16I:
+ *type = V3D_INTERNAL_TYPE_16I;
+ *bpp = V3D_INTERNAL_BPP_32;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2UI:
+ case V3D_OUTPUT_IMAGE_FORMAT_RGBA16UI:
+ *type = V3D_INTERNAL_TYPE_16UI;
+ *bpp = V3D_INTERNAL_BPP_64;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RG16UI:
+ case V3D_OUTPUT_IMAGE_FORMAT_R16UI:
+ *type = V3D_INTERNAL_TYPE_16UI;
+ *bpp = V3D_INTERNAL_BPP_32;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RGBA32I:
+ *type = V3D_INTERNAL_TYPE_32I;
+ *bpp = V3D_INTERNAL_BPP_128;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RG32I:
+ *type = V3D_INTERNAL_TYPE_32I;
+ *bpp = V3D_INTERNAL_BPP_64;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_R32I:
+ *type = V3D_INTERNAL_TYPE_32I;
+ *bpp = V3D_INTERNAL_BPP_32;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RGBA32UI:
+ *type = V3D_INTERNAL_TYPE_32UI;
+ *bpp = V3D_INTERNAL_BPP_128;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RG32UI:
+ *type = V3D_INTERNAL_TYPE_32UI;
+ *bpp = V3D_INTERNAL_BPP_64;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_R32UI:
+ *type = V3D_INTERNAL_TYPE_32UI;
+ *bpp = V3D_INTERNAL_BPP_32;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RGBA32F:
+ *type = V3D_INTERNAL_TYPE_32F;
+ *bpp = V3D_INTERNAL_BPP_128;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_RG32F:
+ *type = V3D_INTERNAL_TYPE_32F;
+ *bpp = V3D_INTERNAL_BPP_64;
+ break;
+
+ case V3D_OUTPUT_IMAGE_FORMAT_R32F:
+ *type = V3D_INTERNAL_TYPE_32F;
+ *bpp = V3D_INTERNAL_BPP_32;
+ break;
+
+ default:
+ /* Provide some default values, as we'll be called at RB
+ * creation time, even if an RB with this format isn't supported.
+ */
+ *type = V3D_INTERNAL_TYPE_8;
+ *bpp = V3D_INTERNAL_BPP_32;
+ break;
+ }
+}
+
+bool
+v3dX(format_supports_tlb_resolve)(const struct v3dv_format *format)
+{
+ uint32_t type, bpp;
+ v3dX(get_internal_type_bpp_for_output_format)(format->rt_type, &type, &bpp);
+ return type == V3D_INTERNAL_TYPE_8 || type == V3D_INTERNAL_TYPE_16F;
+}
+
+bool
+v3dX(format_supports_blending)(const struct v3dv_format *format)
+{
+ /* Hardware blending is only supported on render targets that are configured
+ * 4x8-bit unorm, 2x16-bit float or 4x16-bit float.
+ */
+ uint32_t type, bpp;
+ v3dX(get_internal_type_bpp_for_output_format)(format->rt_type, &type, &bpp);
+ switch (type) {
+ case V3D_INTERNAL_TYPE_8:
+ return bpp == V3D_INTERNAL_BPP_32;
+ case V3D_INTERNAL_TYPE_16F:
+ return bpp == V3D_INTERNAL_BPP_32 || V3D_INTERNAL_BPP_64;
+ default:
+ return false;
+ }
+}
+
+bool
+v3dX(tfu_supports_tex_format)(uint32_t tex_format)
+{
+ switch (tex_format) {
+ case TEXTURE_DATA_FORMAT_R8:
+ case TEXTURE_DATA_FORMAT_R8_SNORM:
+ case TEXTURE_DATA_FORMAT_RG8:
+ case TEXTURE_DATA_FORMAT_RG8_SNORM:
+ case TEXTURE_DATA_FORMAT_RGBA8:
+ case TEXTURE_DATA_FORMAT_RGBA8_SNORM:
+ case TEXTURE_DATA_FORMAT_RGB565:
+ case TEXTURE_DATA_FORMAT_RGBA4:
+ case TEXTURE_DATA_FORMAT_RGB5_A1:
+ case TEXTURE_DATA_FORMAT_RGB10_A2:
+ case TEXTURE_DATA_FORMAT_R16:
+ case TEXTURE_DATA_FORMAT_R16_SNORM:
+ case TEXTURE_DATA_FORMAT_RG16:
+ case TEXTURE_DATA_FORMAT_RG16_SNORM:
+ case TEXTURE_DATA_FORMAT_RGBA16:
+ case TEXTURE_DATA_FORMAT_RGBA16_SNORM:
+ case TEXTURE_DATA_FORMAT_R16F:
+ case TEXTURE_DATA_FORMAT_RG16F:
+ case TEXTURE_DATA_FORMAT_RGBA16F:
+ case TEXTURE_DATA_FORMAT_R11F_G11F_B10F:
+ case TEXTURE_DATA_FORMAT_R4:
+ case TEXTURE_DATA_FORMAT_RGB9_E5:
+ case TEXTURE_DATA_FORMAT_R32F:
+ case TEXTURE_DATA_FORMAT_RG32F:
+ case TEXTURE_DATA_FORMAT_RGBA32F:
+ case TEXTURE_DATA_FORMAT_RGB8_ETC2:
+ case TEXTURE_DATA_FORMAT_RGB8_PUNCHTHROUGH_ALPHA1:
+ case TEXTURE_DATA_FORMAT_RGBA8_ETC2_EAC:
+ case TEXTURE_DATA_FORMAT_R11_EAC:
+ case TEXTURE_DATA_FORMAT_SIGNED_R11_EAC:
+ case TEXTURE_DATA_FORMAT_RG11_EAC:
+ case TEXTURE_DATA_FORMAT_SIGNED_RG11_EAC:
+ return true;
+ default:
+ return false;
+ }
+}
+
+uint8_t
+v3dX(get_internal_depth_type)(VkFormat format)
+{
+ switch (format) {
+ case VK_FORMAT_D16_UNORM:
+ return V3D_INTERNAL_TYPE_DEPTH_16;
+ case VK_FORMAT_D32_SFLOAT:
+ return V3D_INTERNAL_TYPE_DEPTH_32F;
+ case VK_FORMAT_X8_D24_UNORM_PACK32:
+ case VK_FORMAT_D24_UNORM_S8_UINT:
+ return V3D_INTERNAL_TYPE_DEPTH_24;
+ default:
+ unreachable("Invalid depth format");
+ break;
+ }
+}
+
+void
+v3dX(get_internal_type_bpp_for_image_aspects)(VkFormat vk_format,
+ VkImageAspectFlags aspect_mask,
+ uint32_t *internal_type,
+ uint32_t *internal_bpp)
+{
+ const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
+ VK_IMAGE_ASPECT_STENCIL_BIT;
+
+ /* We can't store depth/stencil pixel formats to a raster format, so
+ * so instead we load our depth/stencil aspects to a compatible color
+ * format.
+ */
+ /* FIXME: pre-compute this at image creation time? */
+ if (aspect_mask & ds_aspects) {
+ switch (vk_format) {
+ case VK_FORMAT_D16_UNORM:
+ *internal_type = V3D_INTERNAL_TYPE_16UI;
+ *internal_bpp = V3D_INTERNAL_BPP_64;
+ break;
+ case VK_FORMAT_D32_SFLOAT:
+ *internal_type = V3D_INTERNAL_TYPE_32F;
+ *internal_bpp = V3D_INTERNAL_BPP_128;
+ break;
+ case VK_FORMAT_X8_D24_UNORM_PACK32:
+ case VK_FORMAT_D24_UNORM_S8_UINT:
+ /* Use RGBA8 format so we can relocate the X/S bits in the appropriate
+ * place to match Vulkan expectations. See the comment on the tile
+ * load command for more details.
+ */
+ *internal_type = V3D_INTERNAL_TYPE_8UI;
+ *internal_bpp = V3D_INTERNAL_BPP_32;
+ break;
+ default:
+ assert(!"unsupported format");
+ break;
+ }
+ } else {
+ const struct v3dv_format *format = v3dX(get_format)(vk_format);
+ v3dX(get_internal_type_bpp_for_output_format)(format->rt_type,
+ internal_type, internal_bpp);
+ }
+}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_image.c b/lib/mesa/src/broadcom/vulkan/v3dvx_image.c
new file mode 100644
index 000000000..a9aa0fb97
--- /dev/null
+++ b/lib/mesa/src/broadcom/vulkan/v3dvx_image.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright © 2021 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "v3dv_private.h"
+#include "broadcom/common/v3d_macros.h"
+#include "broadcom/cle/v3dx_pack.h"
+#include "broadcom/compiler/v3d_compiler.h"
+
+#include "vk_format_info.h"
+
+/*
+ * This method translates pipe_swizzle to the swizzle values used at the
+ * packet TEXTURE_SHADER_STATE
+ *
+ * FIXME: C&P from v3d, common place?
+ */
+static uint32_t
+translate_swizzle(unsigned char pipe_swizzle)
+{
+ switch (pipe_swizzle) {
+ case PIPE_SWIZZLE_0:
+ return 0;
+ case PIPE_SWIZZLE_1:
+ return 1;
+ case PIPE_SWIZZLE_X:
+ case PIPE_SWIZZLE_Y:
+ case PIPE_SWIZZLE_Z:
+ case PIPE_SWIZZLE_W:
+ return 2 + pipe_swizzle;
+ default:
+ unreachable("unknown swizzle");
+ }
+}
+
+/*
+ * Packs and ensure bo for the shader state (the latter can be temporal).
+ */
+static void
+pack_texture_shader_state_helper(struct v3dv_device *device,
+ struct v3dv_image_view *image_view,
+ bool for_cube_map_array_storage)
+{
+ assert(!for_cube_map_array_storage ||
+ image_view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY);
+ const uint32_t index = for_cube_map_array_storage ? 1 : 0;
+
+ assert(image_view->vk.image);
+ const struct v3dv_image *image = (struct v3dv_image *) image_view->vk.image;
+
+ assert(image->vk.samples == VK_SAMPLE_COUNT_1_BIT ||
+ image->vk.samples == VK_SAMPLE_COUNT_4_BIT);
+ const uint32_t msaa_scale = image->vk.samples == VK_SAMPLE_COUNT_1_BIT ? 1 : 2;
+
+ v3dvx_pack(image_view->texture_shader_state[index], TEXTURE_SHADER_STATE, tex) {
+
+ tex.level_0_is_strictly_uif =
+ (image->slices[0].tiling == V3D_TILING_UIF_XOR ||
+ image->slices[0].tiling == V3D_TILING_UIF_NO_XOR);
+
+ tex.level_0_xor_enable = (image->slices[0].tiling == V3D_TILING_UIF_XOR);
+
+ if (tex.level_0_is_strictly_uif)
+ tex.level_0_ub_pad = image->slices[0].ub_pad;
+
+ /* FIXME: v3d never sets uif_xor_disable, but uses it on the following
+ * check so let's set the default value
+ */
+ tex.uif_xor_disable = false;
+ if (tex.uif_xor_disable ||
+ tex.level_0_is_strictly_uif) {
+ tex.extended = true;
+ }
+
+ tex.base_level = image_view->vk.base_mip_level;
+ tex.max_level = image_view->vk.base_mip_level +
+ image_view->vk.level_count - 1;
+
+ tex.swizzle_r = translate_swizzle(image_view->swizzle[0]);
+ tex.swizzle_g = translate_swizzle(image_view->swizzle[1]);
+ tex.swizzle_b = translate_swizzle(image_view->swizzle[2]);
+ tex.swizzle_a = translate_swizzle(image_view->swizzle[3]);
+
+ tex.texture_type = image_view->format->tex_type;
+
+ if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
+ tex.image_depth = image->vk.extent.depth;
+ } else {
+ tex.image_depth = image_view->vk.layer_count;
+ }
+
+ /* Empirical testing with CTS shows that when we are sampling from cube
+ * arrays we want to set image depth to layers / 6, but not when doing
+ * image load/store.
+ */
+ if (image_view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY &&
+ !for_cube_map_array_storage) {
+ assert(tex.image_depth % 6 == 0);
+ tex.image_depth /= 6;
+ }
+
+ tex.image_height = image->vk.extent.height * msaa_scale;
+ tex.image_width = image->vk.extent.width * msaa_scale;
+
+ /* On 4.x, the height of a 1D texture is redefined to be the
+ * upper 14 bits of the width (which is only usable with txf).
+ */
+ if (image->vk.image_type == VK_IMAGE_TYPE_1D) {
+ tex.image_height = tex.image_width >> 14;
+ }
+ tex.image_width &= (1 << 14) - 1;
+ tex.image_height &= (1 << 14) - 1;
+
+ tex.array_stride_64_byte_aligned = image->cube_map_stride / 64;
+
+ tex.srgb = vk_format_is_srgb(image_view->vk.format);
+
+ /* At this point we don't have the job. That's the reason the first
+ * parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to
+ * add the bo to the job. This also means that we need to add manually
+ * the image bo to the job using the texture.
+ */
+ const uint32_t base_offset =
+ image->mem->bo->offset +
+ v3dv_layer_offset(image, 0, image_view->vk.base_array_layer);
+ tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset);
+ }
+}
+
+void
+v3dX(pack_texture_shader_state)(struct v3dv_device *device,
+ struct v3dv_image_view *iview)
+{
+ pack_texture_shader_state_helper(device, iview, false);
+ if (iview->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
+ pack_texture_shader_state_helper(device, iview, true);
+}
+
+void
+v3dX(pack_texture_shader_state_from_buffer_view)(struct v3dv_device *device,
+ struct v3dv_buffer_view *buffer_view)
+{
+ assert(buffer_view->buffer);
+ const struct v3dv_buffer *buffer = buffer_view->buffer;
+
+ v3dvx_pack(buffer_view->texture_shader_state, TEXTURE_SHADER_STATE, tex) {
+ tex.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X);
+ tex.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y);
+ tex.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z);
+ tex.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W);
+
+ tex.image_depth = 1;
+
+ /* On 4.x, the height of a 1D texture is redefined to be the upper 14
+ * bits of the width (which is only usable with txf) (or in other words,
+ * we are providing a 28 bit field for size, but split on the usual
+ * 14bit height/width).
+ */
+ tex.image_width = buffer_view->num_elements;
+ tex.image_height = tex.image_width >> 14;
+ tex.image_width &= (1 << 14) - 1;
+ tex.image_height &= (1 << 14) - 1;
+
+ tex.texture_type = buffer_view->format->tex_type;
+ tex.srgb = vk_format_is_srgb(buffer_view->vk_format);
+
+ /* At this point we don't have the job. That's the reason the first
+ * parameter is NULL, to avoid a crash when cl_pack_emit_reloc tries to
+ * add the bo to the job. This also means that we need to add manually
+ * the image bo to the job using the texture.
+ */
+ const uint32_t base_offset =
+ buffer->mem->bo->offset +
+ buffer->mem_offset +
+ buffer_view->offset;
+
+ tex.texture_base_pointer = v3dv_cl_address(NULL, base_offset);
+ }
+}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_meta_common.c b/lib/mesa/src/broadcom/vulkan/v3dvx_meta_common.c
new file mode 100644
index 000000000..2f79e4e9c
--- /dev/null
+++ b/lib/mesa/src/broadcom/vulkan/v3dvx_meta_common.c
@@ -0,0 +1,1357 @@
+/*
+ * Copyright © 2021 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "v3dv_private.h"
+#include "v3dv_meta_common.h"
+
+#include "broadcom/common/v3d_macros.h"
+#include "broadcom/cle/v3dx_pack.h"
+#include "broadcom/compiler/v3d_compiler.h"
+
+#include "vk_format_info.h"
+
+struct rcl_clear_info {
+ const union v3dv_clear_value *clear_value;
+ struct v3dv_image *image;
+ VkImageAspectFlags aspects;
+ uint32_t level;
+};
+
+static struct v3dv_cl *
+emit_rcl_prologue(struct v3dv_job *job,
+ struct v3dv_meta_framebuffer *fb,
+ const struct rcl_clear_info *clear_info)
+{
+ const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
+
+ struct v3dv_cl *rcl = &job->rcl;
+ v3dv_cl_ensure_space_with_branch(rcl, 200 +
+ tiling->layers * 256 *
+ cl_packet_length(SUPERTILE_COORDINATES));
+ if (job->cmd_buffer->state.oom)
+ return NULL;
+
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
+ config.early_z_disable = true;
+ config.image_width_pixels = tiling->width;
+ config.image_height_pixels = tiling->height;
+ config.number_of_render_targets = 1;
+ config.multisample_mode_4x = tiling->msaa;
+ config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
+ config.internal_depth_type = fb->internal_depth_type;
+ }
+
+ if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) {
+ uint32_t clear_pad = 0;
+ if (clear_info->image) {
+ const struct v3dv_image *image = clear_info->image;
+ const struct v3d_resource_slice *slice =
+ &image->slices[clear_info->level];
+ if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
+ slice->tiling == V3D_TILING_UIF_XOR) {
+ int uif_block_height = v3d_utile_height(image->cpp) * 2;
+
+ uint32_t implicit_padded_height =
+ align(tiling->height, uif_block_height) / uif_block_height;
+
+ if (slice->padded_height_of_output_image_in_uif_blocks -
+ implicit_padded_height >= 15) {
+ clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
+ }
+ }
+ }
+
+ const uint32_t *color = &clear_info->clear_value->color[0];
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
+ clear.clear_color_low_32_bits = color[0];
+ clear.clear_color_next_24_bits = color[1] & 0x00ffffff;
+ clear.render_target_number = 0;
+ };
+
+ if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
+ clear.clear_color_mid_low_32_bits =
+ ((color[1] >> 24) | (color[2] << 8));
+ clear.clear_color_mid_high_24_bits =
+ ((color[2] >> 24) | ((color[3] & 0xffff) << 8));
+ clear.render_target_number = 0;
+ };
+ }
+
+ if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
+ clear.uif_padded_height_in_uif_blocks = clear_pad;
+ clear.clear_color_high_16_bits = color[3] >> 16;
+ clear.render_target_number = 0;
+ };
+ }
+ }
+
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
+ rt.render_target_0_internal_bpp = tiling->internal_bpp;
+ rt.render_target_0_internal_type = fb->internal_type;
+ rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
+ }
+
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
+ clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f;
+ clear.stencil_clear_value = clear_info ? clear_info->clear_value->s : 0;
+ };
+
+ cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
+ init.use_auto_chained_tile_lists = true;
+ init.size_of_first_block_in_chained_tile_lists =
+ TILE_ALLOCATION_BLOCK_SIZE_64B;
+ }
+
+ return rcl;
+}
+
+static void
+emit_frame_setup(struct v3dv_job *job,
+ uint32_t min_layer,
+ const union v3dv_clear_value *clear_value)
+{
+ v3dv_return_if_oom(NULL, job);
+
+ const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
+
+ struct v3dv_cl *rcl = &job->rcl;
+
+ const uint32_t tile_alloc_offset =
+ 64 * min_layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
+ cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
+ list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
+ }
+
+ cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
+ config.number_of_bin_tile_lists = 1;
+ config.total_frame_width_in_tiles = tiling->draw_tiles_x;
+ config.total_frame_height_in_tiles = tiling->draw_tiles_y;
+
+ config.supertile_width_in_tiles = tiling->supertile_width;
+ config.supertile_height_in_tiles = tiling->supertile_height;
+
+ config.total_frame_width_in_supertiles =
+ tiling->frame_width_in_supertiles;
+ config.total_frame_height_in_supertiles =
+ tiling->frame_height_in_supertiles;
+ }
+
+ /* Implement GFXH-1742 workaround. Also, if we are clearing we have to do
+ * it here.
+ */
+ for (int i = 0; i < 2; i++) {
+ cl_emit(rcl, TILE_COORDINATES, coords);
+ cl_emit(rcl, END_OF_LOADS, end);
+ cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
+ store.buffer_to_store = NONE;
+ }
+ if (clear_value && i == 0) {
+ cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
+ clear.clear_z_stencil_buffer = true;
+ clear.clear_all_render_targets = true;
+ }
+ }
+ cl_emit(rcl, END_OF_TILE_MARKER, end);
+ }
+
+ cl_emit(rcl, FLUSH_VCD_CACHE, flush);
+}
+
+static void
+emit_supertile_coordinates(struct v3dv_job *job,
+ struct v3dv_meta_framebuffer *framebuffer)
+{
+ v3dv_return_if_oom(NULL, job);
+
+ struct v3dv_cl *rcl = &job->rcl;
+
+ const uint32_t min_y = framebuffer->min_y_supertile;
+ const uint32_t max_y = framebuffer->max_y_supertile;
+ const uint32_t min_x = framebuffer->min_x_supertile;
+ const uint32_t max_x = framebuffer->max_x_supertile;
+
+ for (int y = min_y; y <= max_y; y++) {
+ for (int x = min_x; x <= max_x; x++) {
+ cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
+ coords.column_number_in_supertiles = x;
+ coords.row_number_in_supertiles = y;
+ }
+ }
+ }
+}
+
+static void
+emit_linear_load(struct v3dv_cl *cl,
+ uint32_t buffer,
+ struct v3dv_bo *bo,
+ uint32_t offset,
+ uint32_t stride,
+ uint32_t format)
+{
+ cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
+ load.buffer_to_load = buffer;
+ load.address = v3dv_cl_address(bo, offset);
+ load.input_image_format = format;
+ load.memory_format = V3D_TILING_RASTER;
+ load.height_in_ub_or_stride = stride;
+ load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
+ }
+}
+
+static void
+emit_linear_store(struct v3dv_cl *cl,
+ uint32_t buffer,
+ struct v3dv_bo *bo,
+ uint32_t offset,
+ uint32_t stride,
+ bool msaa,
+ uint32_t format)
+{
+ cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
+ store.buffer_to_store = RENDER_TARGET_0;
+ store.address = v3dv_cl_address(bo, offset);
+ store.clear_buffer_being_stored = false;
+ store.output_image_format = format;
+ store.memory_format = V3D_TILING_RASTER;
+ store.height_in_ub_or_stride = stride;
+ store.decimate_mode = msaa ? V3D_DECIMATE_MODE_ALL_SAMPLES :
+ V3D_DECIMATE_MODE_SAMPLE_0;
+ }
+}
+
+/* This chooses a tile buffer format that is appropriate for the copy operation.
+ * Typically, this is the image render target type, however, if we are copying
+ * depth/stencil to/from a buffer the hardware can't do raster loads/stores, so
+ * we need to load and store to/from a tile color buffer using a compatible
+ * color format.
+ */
+static uint32_t
+choose_tlb_format(struct v3dv_meta_framebuffer *framebuffer,
+ VkImageAspectFlags aspect,
+ bool for_store,
+ bool is_copy_to_buffer,
+ bool is_copy_from_buffer)
+{
+ if (is_copy_to_buffer || is_copy_from_buffer) {
+ switch (framebuffer->vk_format) {
+ case VK_FORMAT_D16_UNORM:
+ return V3D_OUTPUT_IMAGE_FORMAT_R16UI;
+ case VK_FORMAT_D32_SFLOAT:
+ return V3D_OUTPUT_IMAGE_FORMAT_R32F;
+ case VK_FORMAT_X8_D24_UNORM_PACK32:
+ return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
+ case VK_FORMAT_D24_UNORM_S8_UINT:
+ /* When storing the stencil aspect of a combined depth/stencil image
+ * to a buffer, the Vulkan spec states that the output buffer must
+ * have packed stencil values, so we choose an R8UI format for our
+ * store outputs. For the load input we still want RGBA8UI since the
+ * source image contains 4 channels (including the 3 channels
+ * containing the 24-bit depth value).
+ *
+ * When loading the stencil aspect of a combined depth/stencil image
+ * from a buffer, we read packed 8-bit stencil values from the buffer
+ * that we need to put into the LSB of the 32-bit format (the R
+ * channel), so we use R8UI. For the store, if we used R8UI then we
+ * would write 8-bit stencil values consecutively over depth channels,
+ * so we need to use RGBA8UI. This will write each stencil value in
+ * its correct position, but will overwrite depth values (channels G
+ * B,A) with undefined values. To fix this, we will have to restore
+ * the depth aspect from the Z tile buffer, which we should pre-load
+ * from the image before the store).
+ */
+ if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
+ return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
+ } else {
+ assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT);
+ if (is_copy_to_buffer) {
+ return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI :
+ V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
+ } else {
+ assert(is_copy_from_buffer);
+ return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI :
+ V3D_OUTPUT_IMAGE_FORMAT_R8UI;
+ }
+ }
+ default: /* Color formats */
+ return framebuffer->format->rt_type;
+ break;
+ }
+ } else {
+ return framebuffer->format->rt_type;
+ }
+}
+
+static inline bool
+format_needs_rb_swap(struct v3dv_device *device,
+ VkFormat format)
+{
+ const uint8_t *swizzle = v3dv_get_format_swizzle(device, format);
+ return swizzle[0] == PIPE_SWIZZLE_Z;
+}
+
+static void
+emit_image_load(struct v3dv_device *device,
+ struct v3dv_cl *cl,
+ struct v3dv_meta_framebuffer *framebuffer,
+ struct v3dv_image *image,
+ VkImageAspectFlags aspect,
+ uint32_t layer,
+ uint32_t mip_level,
+ bool is_copy_to_buffer,
+ bool is_copy_from_buffer)
+{
+ uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
+
+ /* For image to/from buffer copies we always load to and store from RT0,
+ * even for depth/stencil aspects, because the hardware can't do raster
+ * stores or loads from/to the depth/stencil tile buffers.
+ */
+ bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
+ aspect == VK_IMAGE_ASPECT_COLOR_BIT;
+
+ const struct v3d_resource_slice *slice = &image->slices[mip_level];
+ cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
+ load.buffer_to_load = load_to_color_tlb ?
+ RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect);
+
+ load.address = v3dv_cl_address(image->mem->bo, layer_offset);
+
+ load.input_image_format = choose_tlb_format(framebuffer, aspect, false,
+ is_copy_to_buffer,
+ is_copy_from_buffer);
+ load.memory_format = slice->tiling;
+
+ /* When copying depth/stencil images to a buffer, for D24 formats Vulkan
+ * expects the depth value in the LSB bits of each 32-bit pixel.
+ * Unfortunately, the hardware seems to put the S8/X8 bits there and the
+ * depth bits on the MSB. To work around that we can reverse the channel
+ * order and then swap the R/B channels to get what we want.
+ *
+ * NOTE: reversing and swapping only gets us the behavior we want if the
+ * operations happen in that exact order, which seems to be the case when
+ * done on the tile buffer load operations. On the store, it seems the
+ * order is not the same. The order on the store is probably reversed so
+ * that reversing and swapping on both the load and the store preserves
+ * the original order of the channels in memory.
+ *
+ * Notice that we only need to do this when copying to a buffer, where
+ * depth and stencil aspects are copied as separate regions and
+ * the spec expects them to be tightly packed.
+ */
+ bool needs_rb_swap = false;
+ bool needs_chan_reverse = false;
+ if (is_copy_to_buffer &&
+ (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
+ (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
+ (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
+ needs_rb_swap = true;
+ needs_chan_reverse = true;
+ } else if (!is_copy_from_buffer && !is_copy_to_buffer &&
+ (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
+ /* This is not a raw data copy (i.e. we are clearing the image),
+ * so we need to make sure we respect the format swizzle.
+ */
+ needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format);
+ }
+
+ load.r_b_swap = needs_rb_swap;
+ load.channel_reverse = needs_chan_reverse;
+
+ if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
+ slice->tiling == V3D_TILING_UIF_XOR) {
+ load.height_in_ub_or_stride =
+ slice->padded_height_of_output_image_in_uif_blocks;
+ } else if (slice->tiling == V3D_TILING_RASTER) {
+ load.height_in_ub_or_stride = slice->stride;
+ }
+
+ if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT)
+ load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
+ else
+ load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
+ }
+}
+
+static void
+emit_image_store(struct v3dv_device *device,
+ struct v3dv_cl *cl,
+ struct v3dv_meta_framebuffer *framebuffer,
+ struct v3dv_image *image,
+ VkImageAspectFlags aspect,
+ uint32_t layer,
+ uint32_t mip_level,
+ bool is_copy_to_buffer,
+ bool is_copy_from_buffer,
+ bool is_multisample_resolve)
+{
+ uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
+
+ bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
+ aspect == VK_IMAGE_ASPECT_COLOR_BIT;
+
+ const struct v3d_resource_slice *slice = &image->slices[mip_level];
+ cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
+ store.buffer_to_store = store_from_color_tlb ?
+ RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect);
+
+ store.address = v3dv_cl_address(image->mem->bo, layer_offset);
+ store.clear_buffer_being_stored = false;
+
+ /* See rationale in emit_image_load() */
+ bool needs_rb_swap = false;
+ bool needs_chan_reverse = false;
+ if (is_copy_from_buffer &&
+ (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
+ (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
+ (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
+ needs_rb_swap = true;
+ needs_chan_reverse = true;
+ } else if (!is_copy_from_buffer && !is_copy_to_buffer &&
+ (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
+ needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format);
+ }
+
+ store.r_b_swap = needs_rb_swap;
+ store.channel_reverse = needs_chan_reverse;
+
+ store.output_image_format = choose_tlb_format(framebuffer, aspect, true,
+ is_copy_to_buffer,
+ is_copy_from_buffer);
+ store.memory_format = slice->tiling;
+ if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
+ slice->tiling == V3D_TILING_UIF_XOR) {
+ store.height_in_ub_or_stride =
+ slice->padded_height_of_output_image_in_uif_blocks;
+ } else if (slice->tiling == V3D_TILING_RASTER) {
+ store.height_in_ub_or_stride = slice->stride;
+ }
+
+ if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT)
+ store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
+ else if (is_multisample_resolve)
+ store.decimate_mode = V3D_DECIMATE_MODE_4X;
+ else
+ store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
+ }
+}
+
+static void
+emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job,
+ struct v3dv_meta_framebuffer *framebuffer,
+ struct v3dv_buffer *buffer,
+ struct v3dv_image *image,
+ uint32_t layer_offset,
+ const VkBufferImageCopy2KHR *region)
+{
+ struct v3dv_cl *cl = &job->indirect;
+ v3dv_cl_ensure_space(cl, 200, 1);
+ v3dv_return_if_oom(NULL, job);
+
+ struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
+
+ cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
+
+ /* Load image to TLB */
+ assert((image->vk.image_type != VK_IMAGE_TYPE_3D &&
+ layer_offset < region->imageSubresource.layerCount) ||
+ layer_offset < image->vk.extent.depth);
+
+ const uint32_t image_layer = image->vk.image_type != VK_IMAGE_TYPE_3D ?
+ region->imageSubresource.baseArrayLayer + layer_offset :
+ region->imageOffset.z + layer_offset;
+
+ emit_image_load(job->device, cl, framebuffer, image,
+ region->imageSubresource.aspectMask,
+ image_layer,
+ region->imageSubresource.mipLevel,
+ true, false);
+
+ cl_emit(cl, END_OF_LOADS, end);
+
+ cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
+
+ /* Store TLB to buffer */
+ uint32_t width, height;
+ if (region->bufferRowLength == 0)
+ width = region->imageExtent.width;
+ else
+ width = region->bufferRowLength;
+
+ if (region->bufferImageHeight == 0)
+ height = region->imageExtent.height;
+ else
+ height = region->bufferImageHeight;
+
+ /* Handle copy from compressed format */
+ width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format));
+ height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format));
+
+ /* If we are storing stencil from a combined depth/stencil format the
+ * Vulkan spec states that the output buffer must have packed stencil
+ * values, where each stencil value is 1 byte.
+ */
+ uint32_t cpp =
+ region->imageSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
+ 1 : image->cpp;
+ uint32_t buffer_stride = width * cpp;
+ uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset +
+ height * buffer_stride * layer_offset;
+
+ uint32_t format = choose_tlb_format(framebuffer,
+ region->imageSubresource.aspectMask,
+ true, true, false);
+ bool msaa = image->vk.samples > VK_SAMPLE_COUNT_1_BIT;
+
+ emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo,
+ buffer_offset, buffer_stride, msaa, format);
+
+ cl_emit(cl, END_OF_TILE_MARKER, end);
+
+ cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
+
+ cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
+ branch.start = tile_list_start;
+ branch.end = v3dv_cl_get_address(cl);
+ }
+}
+
+static void
+emit_copy_layer_to_buffer(struct v3dv_job *job,
+ struct v3dv_buffer *buffer,
+ struct v3dv_image *image,
+ struct v3dv_meta_framebuffer *framebuffer,
+ uint32_t layer,
+ const VkBufferImageCopy2KHR *region)
+{
+ emit_copy_layer_to_buffer_per_tile_list(job, framebuffer, buffer,
+ image, layer, region);
+ emit_supertile_coordinates(job, framebuffer);
+}
+
+void
+v3dX(meta_emit_copy_image_to_buffer_rcl)(struct v3dv_job *job,
+ struct v3dv_buffer *buffer,
+ struct v3dv_image *image,
+ struct v3dv_meta_framebuffer *framebuffer,
+ const VkBufferImageCopy2KHR *region)
+{
+ struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
+ v3dv_return_if_oom(NULL, job);
+
+ emit_frame_setup(job, 0, NULL);
+ for (int layer = 0; layer < job->frame_tiling.layers; layer++)
+ emit_copy_layer_to_buffer(job, buffer, image, framebuffer, layer, region);
+ cl_emit(rcl, END_OF_RENDERING, end);
+}
+
+static void
+emit_resolve_image_layer_per_tile_list(struct v3dv_job *job,
+ struct v3dv_meta_framebuffer *framebuffer,
+ struct v3dv_image *dst,
+ struct v3dv_image *src,
+ uint32_t layer_offset,
+ const VkImageResolve2KHR *region)
+{
+ struct v3dv_cl *cl = &job->indirect;
+ v3dv_cl_ensure_space(cl, 200, 1);
+ v3dv_return_if_oom(NULL, job);
+
+ struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
+
+ cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
+
+ assert((src->vk.image_type != VK_IMAGE_TYPE_3D &&
+ layer_offset < region->srcSubresource.layerCount) ||
+ layer_offset < src->vk.extent.depth);
+
+ const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
+ region->srcSubresource.baseArrayLayer + layer_offset :
+ region->srcOffset.z + layer_offset;
+
+ emit_image_load(job->device, cl, framebuffer, src,
+ region->srcSubresource.aspectMask,
+ src_layer,
+ region->srcSubresource.mipLevel,
+ false, false);
+
+ cl_emit(cl, END_OF_LOADS, end);
+
+ cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
+
+ assert((dst->vk.image_type != VK_IMAGE_TYPE_3D &&
+ layer_offset < region->dstSubresource.layerCount) ||
+ layer_offset < dst->vk.extent.depth);
+
+ const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
+ region->dstSubresource.baseArrayLayer + layer_offset :
+ region->dstOffset.z + layer_offset;
+
+ emit_image_store(job->device, cl, framebuffer, dst,
+ region->dstSubresource.aspectMask,
+ dst_layer,
+ region->dstSubresource.mipLevel,
+ false, false, true);
+
+ cl_emit(cl, END_OF_TILE_MARKER, end);
+
+ cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
+
+ cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
+ branch.start = tile_list_start;
+ branch.end = v3dv_cl_get_address(cl);
+ }
+}
+
+static void
+emit_resolve_image_layer(struct v3dv_job *job,
+ struct v3dv_image *dst,
+ struct v3dv_image *src,
+ struct v3dv_meta_framebuffer *framebuffer,
+ uint32_t layer,
+ const VkImageResolve2KHR *region)
+{
+ emit_resolve_image_layer_per_tile_list(job, framebuffer,
+ dst, src, layer, region);
+ emit_supertile_coordinates(job, framebuffer);
+}
+
+void
+v3dX(meta_emit_resolve_image_rcl)(struct v3dv_job *job,
+ struct v3dv_image *dst,
+ struct v3dv_image *src,
+ struct v3dv_meta_framebuffer *framebuffer,
+ const VkImageResolve2KHR *region)
+{
+ struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
+ v3dv_return_if_oom(NULL, job);
+
+ emit_frame_setup(job, 0, NULL);
+ for (int layer = 0; layer < job->frame_tiling.layers; layer++)
+ emit_resolve_image_layer(job, dst, src, framebuffer, layer, region);
+ cl_emit(rcl, END_OF_RENDERING, end);
+}
+
+static void
+emit_copy_buffer_per_tile_list(struct v3dv_job *job,
+ struct v3dv_bo *dst,
+ struct v3dv_bo *src,
+ uint32_t dst_offset,
+ uint32_t src_offset,
+ uint32_t stride,
+ uint32_t format)
+{
+ struct v3dv_cl *cl = &job->indirect;
+ v3dv_cl_ensure_space(cl, 200, 1);
+ v3dv_return_if_oom(NULL, job);
+
+ struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
+
+ cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
+
+ emit_linear_load(cl, RENDER_TARGET_0, src, src_offset, stride, format);
+
+ cl_emit(cl, END_OF_LOADS, end);
+
+ cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
+
+ emit_linear_store(cl, RENDER_TARGET_0,
+ dst, dst_offset, stride, false, format);
+
+ cl_emit(cl, END_OF_TILE_MARKER, end);
+
+ cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
+
+ cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
+ branch.start = tile_list_start;
+ branch.end = v3dv_cl_get_address(cl);
+ }
+}
+
+void
+v3dX(meta_emit_copy_buffer)(struct v3dv_job *job,
+ struct v3dv_bo *dst,
+ struct v3dv_bo *src,
+ uint32_t dst_offset,
+ uint32_t src_offset,
+ struct v3dv_meta_framebuffer *framebuffer,
+ uint32_t format,
+ uint32_t item_size)
+{
+ const uint32_t stride = job->frame_tiling.width * item_size;
+ emit_copy_buffer_per_tile_list(job, dst, src,
+ dst_offset, src_offset,
+ stride, format);
+ emit_supertile_coordinates(job, framebuffer);
+}
+
+void
+v3dX(meta_emit_copy_buffer_rcl)(struct v3dv_job *job,
+ struct v3dv_bo *dst,
+ struct v3dv_bo *src,
+ uint32_t dst_offset,
+ uint32_t src_offset,
+ struct v3dv_meta_framebuffer *framebuffer,
+ uint32_t format,
+ uint32_t item_size)
+{
+ struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
+ v3dv_return_if_oom(NULL, job);
+
+ emit_frame_setup(job, 0, NULL);
+
+ v3dX(meta_emit_copy_buffer)(job, dst, src, dst_offset, src_offset,
+ framebuffer, format, item_size);
+
+ cl_emit(rcl, END_OF_RENDERING, end);
+}
+
+static void
+emit_copy_image_layer_per_tile_list(struct v3dv_job *job,
+ struct v3dv_meta_framebuffer *framebuffer,
+ struct v3dv_image *dst,
+ struct v3dv_image *src,
+ uint32_t layer_offset,
+ const VkImageCopy2KHR *region)
+{
+ struct v3dv_cl *cl = &job->indirect;
+ v3dv_cl_ensure_space(cl, 200, 1);
+ v3dv_return_if_oom(NULL, job);
+
+ struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
+
+ cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
+
+ assert((src->vk.image_type != VK_IMAGE_TYPE_3D &&
+ layer_offset < region->srcSubresource.layerCount) ||
+ layer_offset < src->vk.extent.depth);
+
+ const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
+ region->srcSubresource.baseArrayLayer + layer_offset :
+ region->srcOffset.z + layer_offset;
+
+ emit_image_load(job->device, cl, framebuffer, src,
+ region->srcSubresource.aspectMask,
+ src_layer,
+ region->srcSubresource.mipLevel,
+ false, false);
+
+ cl_emit(cl, END_OF_LOADS, end);
+
+ cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
+
+ assert((dst->vk.image_type != VK_IMAGE_TYPE_3D &&
+ layer_offset < region->dstSubresource.layerCount) ||
+ layer_offset < dst->vk.extent.depth);
+
+ const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
+ region->dstSubresource.baseArrayLayer + layer_offset :
+ region->dstOffset.z + layer_offset;
+
+ emit_image_store(job->device, cl, framebuffer, dst,
+ region->dstSubresource.aspectMask,
+ dst_layer,
+ region->dstSubresource.mipLevel,
+ false, false, false);
+
+ cl_emit(cl, END_OF_TILE_MARKER, end);
+
+ cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
+
+ cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
+ branch.start = tile_list_start;
+ branch.end = v3dv_cl_get_address(cl);
+ }
+}
+
+static void
+emit_copy_image_layer(struct v3dv_job *job,
+ struct v3dv_image *dst,
+ struct v3dv_image *src,
+ struct v3dv_meta_framebuffer *framebuffer,
+ uint32_t layer,
+ const VkImageCopy2KHR *region)
+{
+ emit_copy_image_layer_per_tile_list(job, framebuffer, dst, src, layer, region);
+ emit_supertile_coordinates(job, framebuffer);
+}
+
+void
+v3dX(meta_emit_copy_image_rcl)(struct v3dv_job *job,
+ struct v3dv_image *dst,
+ struct v3dv_image *src,
+ struct v3dv_meta_framebuffer *framebuffer,
+ const VkImageCopy2KHR *region)
+{
+ struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
+ v3dv_return_if_oom(NULL, job);
+
+ emit_frame_setup(job, 0, NULL);
+ for (int layer = 0; layer < job->frame_tiling.layers; layer++)
+ emit_copy_image_layer(job, dst, src, framebuffer, layer, region);
+ cl_emit(rcl, END_OF_RENDERING, end);
+}
+
+void
+v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_image *dst,
+ uint32_t dst_mip_level,
+ uint32_t dst_layer,
+ struct v3dv_image *src,
+ uint32_t src_mip_level,
+ uint32_t src_layer,
+ uint32_t width,
+ uint32_t height,
+ const struct v3dv_format *format)
+{
+ const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level];
+ const struct v3d_resource_slice *dst_slice = &dst->slices[dst_mip_level];
+
+ assert(dst->mem && dst->mem->bo);
+ const struct v3dv_bo *dst_bo = dst->mem->bo;
+
+ assert(src->mem && src->mem->bo);
+ const struct v3dv_bo *src_bo = src->mem->bo;
+
+ struct drm_v3d_submit_tfu tfu = {
+ .ios = (height << 16) | width,
+ .bo_handles = {
+ dst_bo->handle,
+ src_bo->handle != dst_bo->handle ? src_bo->handle : 0
+ },
+ };
+
+ const uint32_t src_offset =
+ src_bo->offset + v3dv_layer_offset(src, src_mip_level, src_layer);
+ tfu.iia |= src_offset;
+
+ uint32_t icfg;
+ if (src_slice->tiling == V3D_TILING_RASTER) {
+ icfg = V3D_TFU_ICFG_FORMAT_RASTER;
+ } else {
+ icfg = V3D_TFU_ICFG_FORMAT_LINEARTILE +
+ (src_slice->tiling - V3D_TILING_LINEARTILE);
+ }
+ tfu.icfg |= icfg << V3D_TFU_ICFG_FORMAT_SHIFT;
+
+ const uint32_t dst_offset =
+ dst_bo->offset + v3dv_layer_offset(dst, dst_mip_level, dst_layer);
+ tfu.ioa |= dst_offset;
+
+ tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE +
+ (dst_slice->tiling - V3D_TILING_LINEARTILE)) <<
+ V3D_TFU_IOA_FORMAT_SHIFT;
+ tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT;
+
+ switch (src_slice->tiling) {
+ case V3D_TILING_UIF_NO_XOR:
+ case V3D_TILING_UIF_XOR:
+ tfu.iis |= src_slice->padded_height / (2 * v3d_utile_height(src->cpp));
+ break;
+ case V3D_TILING_RASTER:
+ tfu.iis |= src_slice->stride / src->cpp;
+ break;
+ default:
+ break;
+ }
+
+ /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
+ * OPAD field for the destination (how many extra UIF blocks beyond
+ * those necessary to cover the height).
+ */
+ if (dst_slice->tiling == V3D_TILING_UIF_NO_XOR ||
+ dst_slice->tiling == V3D_TILING_UIF_XOR) {
+ uint32_t uif_block_h = 2 * v3d_utile_height(dst->cpp);
+ uint32_t implicit_padded_height = align(height, uif_block_h);
+ uint32_t icfg =
+ (dst_slice->padded_height - implicit_padded_height) / uif_block_h;
+ tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT;
+ }
+
+ v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);
+}
+
+static void
+emit_clear_image_layer_per_tile_list(struct v3dv_job *job,
+ struct v3dv_meta_framebuffer *framebuffer,
+ struct v3dv_image *image,
+ VkImageAspectFlags aspects,
+ uint32_t layer,
+ uint32_t level)
+{
+ struct v3dv_cl *cl = &job->indirect;
+ v3dv_cl_ensure_space(cl, 200, 1);
+ v3dv_return_if_oom(NULL, job);
+
+ struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
+
+ cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
+
+ cl_emit(cl, END_OF_LOADS, end);
+
+ cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
+
+ emit_image_store(job->device, cl, framebuffer, image, aspects,
+ layer, level, false, false, false);
+
+ cl_emit(cl, END_OF_TILE_MARKER, end);
+
+ cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
+
+ cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
+ branch.start = tile_list_start;
+ branch.end = v3dv_cl_get_address(cl);
+ }
+}
+
+static void
+emit_clear_image_layers(struct v3dv_job *job,
+ struct v3dv_image *image,
+ struct v3dv_meta_framebuffer *framebuffer,
+ VkImageAspectFlags aspects,
+ uint32_t min_layer,
+ uint32_t max_layer,
+ uint32_t level)
+{
+ for (uint32_t layer = min_layer; layer < max_layer; layer++) {
+ emit_clear_image_layer_per_tile_list(job, framebuffer, image, aspects,
+ layer, level);
+ emit_supertile_coordinates(job, framebuffer);
+ }
+}
+
+void
+v3dX(meta_emit_clear_image_rcl)(struct v3dv_job *job,
+ struct v3dv_image *image,
+ struct v3dv_meta_framebuffer *framebuffer,
+ const union v3dv_clear_value *clear_value,
+ VkImageAspectFlags aspects,
+ uint32_t min_layer,
+ uint32_t max_layer,
+ uint32_t level)
+{
+ const struct rcl_clear_info clear_info = {
+ .clear_value = clear_value,
+ .image = image,
+ .aspects = aspects,
+ .level = level,
+ };
+
+ struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
+ v3dv_return_if_oom(NULL, job);
+
+ emit_frame_setup(job, 0, clear_value);
+ emit_clear_image_layers(job, image, framebuffer, aspects,
+ min_layer, max_layer, level);
+ cl_emit(rcl, END_OF_RENDERING, end);
+}
+
+static void
+emit_fill_buffer_per_tile_list(struct v3dv_job *job,
+ struct v3dv_bo *bo,
+ uint32_t offset,
+ uint32_t stride)
+{
+ struct v3dv_cl *cl = &job->indirect;
+ v3dv_cl_ensure_space(cl, 200, 1);
+ v3dv_return_if_oom(NULL, job);
+
+ struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
+
+ cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
+
+ cl_emit(cl, END_OF_LOADS, end);
+
+ cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
+
+ emit_linear_store(cl, RENDER_TARGET_0, bo, offset, stride, false,
+ V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI);
+
+ cl_emit(cl, END_OF_TILE_MARKER, end);
+
+ cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
+
+ cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
+ branch.start = tile_list_start;
+ branch.end = v3dv_cl_get_address(cl);
+ }
+}
+
+static void
+emit_fill_buffer(struct v3dv_job *job,
+ struct v3dv_bo *bo,
+ uint32_t offset,
+ struct v3dv_meta_framebuffer *framebuffer)
+{
+ const uint32_t stride = job->frame_tiling.width * 4;
+ emit_fill_buffer_per_tile_list(job, bo, offset, stride);
+ emit_supertile_coordinates(job, framebuffer);
+}
+
+void
+v3dX(meta_emit_fill_buffer_rcl)(struct v3dv_job *job,
+ struct v3dv_bo *bo,
+ uint32_t offset,
+ struct v3dv_meta_framebuffer *framebuffer,
+ uint32_t data)
+{
+ const union v3dv_clear_value clear_value = {
+ .color = { data, 0, 0, 0 },
+ };
+
+ const struct rcl_clear_info clear_info = {
+ .clear_value = &clear_value,
+ .image = NULL,
+ .aspects = VK_IMAGE_ASPECT_COLOR_BIT,
+ .level = 0,
+ };
+
+ struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
+ v3dv_return_if_oom(NULL, job);
+
+ emit_frame_setup(job, 0, &clear_value);
+ emit_fill_buffer(job, bo, offset, framebuffer);
+ cl_emit(rcl, END_OF_RENDERING, end);
+}
+
+
+static void
+emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job,
+ struct v3dv_meta_framebuffer *framebuffer,
+ struct v3dv_image *image,
+ struct v3dv_buffer *buffer,
+ uint32_t layer,
+ const VkBufferImageCopy2KHR *region)
+{
+ struct v3dv_cl *cl = &job->indirect;
+ v3dv_cl_ensure_space(cl, 200, 1);
+ v3dv_return_if_oom(NULL, job);
+
+ struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
+
+ cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
+
+ const VkImageSubresourceLayers *imgrsc = &region->imageSubresource;
+ assert((image->vk.image_type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) ||
+ layer < image->vk.extent.depth);
+
+ /* Load TLB from buffer */
+ uint32_t width, height;
+ if (region->bufferRowLength == 0)
+ width = region->imageExtent.width;
+ else
+ width = region->bufferRowLength;
+
+ if (region->bufferImageHeight == 0)
+ height = region->imageExtent.height;
+ else
+ height = region->bufferImageHeight;
+
+ /* Handle copy to compressed format using a compatible format */
+ width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format));
+ height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format));
+
+ uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
+ 1 : image->cpp;
+ uint32_t buffer_stride = width * cpp;
+ uint32_t buffer_offset =
+ buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer;
+
+ uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask,
+ false, false, true);
+
+ emit_linear_load(cl, RENDER_TARGET_0, buffer->mem->bo,
+ buffer_offset, buffer_stride, format);
+
+ /* Because we can't do raster loads/stores of Z/S formats we need to
+ * use a color tile buffer with a compatible RGBA color format instead.
+ * However, when we are uploading a single aspect to a combined
+ * depth/stencil image we have the problem that our tile buffer stores don't
+ * allow us to mask out the other aspect, so we always write all four RGBA
+ * channels to the image and we end up overwriting that other aspect with
+ * undefined values. To work around that, we first load the aspect we are
+ * not copying from the image memory into a proper Z/S tile buffer. Then we
+ * do our store from the color buffer for the aspect we are copying, and
+ * after that, we do another store from the Z/S tile buffer to restore the
+ * other aspect to its original value.
+ */
+ if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
+ if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
+ emit_image_load(job->device, cl, framebuffer, image,
+ VK_IMAGE_ASPECT_STENCIL_BIT,
+ imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
+ false, false);
+ } else {
+ assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
+ emit_image_load(job->device, cl, framebuffer, image,
+ VK_IMAGE_ASPECT_DEPTH_BIT,
+ imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
+ false, false);
+ }
+ }
+
+ cl_emit(cl, END_OF_LOADS, end);
+
+ cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
+
+ /* Store TLB to image */
+ emit_image_store(job->device, cl, framebuffer, image, imgrsc->aspectMask,
+ imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
+ false, true, false);
+
+ if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
+ if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
+ emit_image_store(job->device, cl, framebuffer, image,
+ VK_IMAGE_ASPECT_STENCIL_BIT,
+ imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
+ false, false, false);
+ } else {
+ assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
+ emit_image_store(job->device, cl, framebuffer, image,
+ VK_IMAGE_ASPECT_DEPTH_BIT,
+ imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
+ false, false, false);
+ }
+ }
+
+ cl_emit(cl, END_OF_TILE_MARKER, end);
+
+ cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
+
+ cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
+ branch.start = tile_list_start;
+ branch.end = v3dv_cl_get_address(cl);
+ }
+}
+
+static void
+emit_copy_buffer_to_layer(struct v3dv_job *job,
+ struct v3dv_image *image,
+ struct v3dv_buffer *buffer,
+ struct v3dv_meta_framebuffer *framebuffer,
+ uint32_t layer,
+ const VkBufferImageCopy2KHR *region)
+{
+ emit_copy_buffer_to_layer_per_tile_list(job, framebuffer, image, buffer,
+ layer, region);
+ emit_supertile_coordinates(job, framebuffer);
+}
+
+void
+v3dX(meta_emit_copy_buffer_to_image_rcl)(struct v3dv_job *job,
+ struct v3dv_image *image,
+ struct v3dv_buffer *buffer,
+ struct v3dv_meta_framebuffer *framebuffer,
+ const VkBufferImageCopy2KHR *region)
+{
+ struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
+ v3dv_return_if_oom(NULL, job);
+
+ emit_frame_setup(job, 0, NULL);
+ for (int layer = 0; layer < job->frame_tiling.layers; layer++)
+ emit_copy_buffer_to_layer(job, image, buffer, framebuffer, layer, region);
+ cl_emit(rcl, END_OF_RENDERING, end);
+}
+
+/* Figure out a TLB size configuration for a number of pixels to process.
+ * Beware that we can't "render" more than 4096x4096 pixels in a single job,
+ * if the pixel count is larger than this, the caller might need to split
+ * the job and call this function multiple times.
+ */
+static void
+framebuffer_size_for_pixel_count(uint32_t num_pixels,
+ uint32_t *width,
+ uint32_t *height)
+{
+ assert(num_pixels > 0);
+
+ const uint32_t max_dim_pixels = 4096;
+ const uint32_t max_pixels = max_dim_pixels * max_dim_pixels;
+
+ uint32_t w, h;
+ if (num_pixels > max_pixels) {
+ w = max_dim_pixels;
+ h = max_dim_pixels;
+ } else {
+ w = num_pixels;
+ h = 1;
+ while (w > max_dim_pixels || ((w % 2) == 0 && w > 2 * h)) {
+ w >>= 1;
+ h <<= 1;
+ }
+ }
+ assert(w <= max_dim_pixels && h <= max_dim_pixels);
+ assert(w * h <= num_pixels);
+ assert(w > 0 && h > 0);
+
+ *width = w;
+ *height = h;
+}
+
+struct v3dv_job *
+v3dX(meta_copy_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_bo *dst,
+ uint32_t dst_offset,
+ struct v3dv_bo *src,
+ uint32_t src_offset,
+ const VkBufferCopy2KHR *region)
+{
+ const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
+ const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
+
+ /* Select appropriate pixel format for the copy operation based on the
+ * size to copy and the alignment of the source and destination offsets.
+ */
+ src_offset += region->srcOffset;
+ dst_offset += region->dstOffset;
+ uint32_t item_size = 4;
+ while (item_size > 1 &&
+ (src_offset % item_size != 0 || dst_offset % item_size != 0)) {
+ item_size /= 2;
+ }
+
+ while (item_size > 1 && region->size % item_size != 0)
+ item_size /= 2;
+
+ assert(region->size % item_size == 0);
+ uint32_t num_items = region->size / item_size;
+ assert(num_items > 0);
+
+ uint32_t format;
+ VkFormat vk_format;
+ switch (item_size) {
+ case 4:
+ format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
+ vk_format = VK_FORMAT_R8G8B8A8_UINT;
+ break;
+ case 2:
+ format = V3D_OUTPUT_IMAGE_FORMAT_RG8UI;
+ vk_format = VK_FORMAT_R8G8_UINT;
+ break;
+ default:
+ format = V3D_OUTPUT_IMAGE_FORMAT_R8UI;
+ vk_format = VK_FORMAT_R8_UINT;
+ break;
+ }
+
+ struct v3dv_job *job = NULL;
+ while (num_items > 0) {
+ job = v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
+ if (!job)
+ return NULL;
+
+ uint32_t width, height;
+ framebuffer_size_for_pixel_count(num_items, &width, &height);
+
+ v3dv_job_start_frame(job, width, height, 1, true, 1, internal_bpp, false);
+
+ struct v3dv_meta_framebuffer framebuffer;
+ v3dX(meta_framebuffer_init)(&framebuffer, vk_format, internal_type,
+ &job->frame_tiling);
+
+ v3dX(job_emit_binning_flush)(job);
+
+ v3dX(meta_emit_copy_buffer_rcl)(job, dst, src, dst_offset, src_offset,
+ &framebuffer, format, item_size);
+
+ v3dv_cmd_buffer_finish_job(cmd_buffer);
+
+ const uint32_t items_copied = width * height;
+ const uint32_t bytes_copied = items_copied * item_size;
+ num_items -= items_copied;
+ src_offset += bytes_copied;
+ dst_offset += bytes_copied;
+ }
+
+ return job;
+}
+
+void
+v3dX(meta_fill_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_bo *bo,
+ uint32_t offset,
+ uint32_t size,
+ uint32_t data)
+{
+ assert(size > 0 && size % 4 == 0);
+ assert(offset + size <= bo->size);
+
+ const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
+ const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
+ uint32_t num_items = size / 4;
+
+ while (num_items > 0) {
+ struct v3dv_job *job =
+ v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
+ if (!job)
+ return;
+
+ uint32_t width, height;
+ framebuffer_size_for_pixel_count(num_items, &width, &height);
+
+ v3dv_job_start_frame(job, width, height, 1, true, 1, internal_bpp, false);
+
+ struct v3dv_meta_framebuffer framebuffer;
+ v3dX(meta_framebuffer_init)(&framebuffer, VK_FORMAT_R8G8B8A8_UINT,
+ internal_type, &job->frame_tiling);
+
+ v3dX(job_emit_binning_flush)(job);
+
+ v3dX(meta_emit_fill_buffer_rcl)(job, bo, offset, &framebuffer, data);
+
+ v3dv_cmd_buffer_finish_job(cmd_buffer);
+
+ const uint32_t items_copied = width * height;
+ const uint32_t bytes_copied = items_copied * 4;
+ num_items -= items_copied;
+ offset += bytes_copied;
+ }
+}
+
+void
+v3dX(meta_framebuffer_init)(struct v3dv_meta_framebuffer *fb,
+ VkFormat vk_format,
+ uint32_t internal_type,
+ const struct v3dv_frame_tiling *tiling)
+{
+ fb->internal_type = internal_type;
+
+ /* Supertile coverage always starts at 0,0 */
+ uint32_t supertile_w_in_pixels =
+ tiling->tile_width * tiling->supertile_width;
+ uint32_t supertile_h_in_pixels =
+ tiling->tile_height * tiling->supertile_height;
+
+ fb->min_x_supertile = 0;
+ fb->min_y_supertile = 0;
+ fb->max_x_supertile = (tiling->width - 1) / supertile_w_in_pixels;
+ fb->max_y_supertile = (tiling->height - 1) / supertile_h_in_pixels;
+
+ fb->vk_format = vk_format;
+ fb->format = v3dX(get_format)(vk_format);
+
+ fb->internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F;
+ if (vk_format_is_depth_or_stencil(vk_format))
+ fb->internal_depth_type = v3dX(get_internal_depth_type)(vk_format);
+}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_pipeline.c b/lib/mesa/src/broadcom/vulkan/v3dvx_pipeline.c
new file mode 100644
index 000000000..8623a4537
--- /dev/null
+++ b/lib/mesa/src/broadcom/vulkan/v3dvx_pipeline.c
@@ -0,0 +1,654 @@
+/*
+ * Copyright © 2021 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "v3dv_private.h"
+#include "broadcom/common/v3d_macros.h"
+#include "broadcom/cle/v3dx_pack.h"
+#include "broadcom/compiler/v3d_compiler.h"
+
+#include "vk_format_info.h"
+
+static uint8_t
+blend_factor(VkBlendFactor factor, bool dst_alpha_one, bool *needs_constants)
+{
+ switch (factor) {
+ case VK_BLEND_FACTOR_ZERO:
+ case VK_BLEND_FACTOR_ONE:
+ case VK_BLEND_FACTOR_SRC_COLOR:
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
+ case VK_BLEND_FACTOR_DST_COLOR:
+ case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
+ case VK_BLEND_FACTOR_SRC_ALPHA:
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
+ case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
+ return factor;
+ case VK_BLEND_FACTOR_CONSTANT_COLOR:
+ case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
+ case VK_BLEND_FACTOR_CONSTANT_ALPHA:
+ case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
+ *needs_constants = true;
+ return factor;
+ case VK_BLEND_FACTOR_DST_ALPHA:
+ return dst_alpha_one ? V3D_BLEND_FACTOR_ONE :
+ V3D_BLEND_FACTOR_DST_ALPHA;
+ case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
+ return dst_alpha_one ? V3D_BLEND_FACTOR_ZERO :
+ V3D_BLEND_FACTOR_INV_DST_ALPHA;
+ case VK_BLEND_FACTOR_SRC1_COLOR:
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
+ case VK_BLEND_FACTOR_SRC1_ALPHA:
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
+ assert(!"Invalid blend factor: dual source blending not supported.");
+ default:
+ assert(!"Unknown blend factor.");
+ }
+
+ /* Should be handled by the switch, added to avoid a "end of non-void
+ * function" error
+ */
+ unreachable("Unknown blend factor.");
+}
+
+static void
+pack_blend(struct v3dv_pipeline *pipeline,
+ const VkPipelineColorBlendStateCreateInfo *cb_info)
+{
+ /* By default, we are not enabling blending and all color channel writes are
+ * enabled. Color write enables are independent of whether blending is
+ * enabled or not.
+ *
+ * Vulkan specifies color write masks so that bits set correspond to
+ * enabled channels. Our hardware does it the other way around.
+ */
+ pipeline->blend.enables = 0;
+ pipeline->blend.color_write_masks = 0; /* All channels enabled */
+
+ if (!cb_info)
+ return;
+
+ assert(pipeline->subpass);
+ if (pipeline->subpass->color_count == 0)
+ return;
+
+ assert(pipeline->subpass->color_count == cb_info->attachmentCount);
+
+ pipeline->blend.needs_color_constants = false;
+ uint32_t color_write_masks = 0;
+ for (uint32_t i = 0; i < pipeline->subpass->color_count; i++) {
+ const VkPipelineColorBlendAttachmentState *b_state =
+ &cb_info->pAttachments[i];
+
+ uint32_t attachment_idx =
+ pipeline->subpass->color_attachments[i].attachment;
+ if (attachment_idx == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ color_write_masks |= (~b_state->colorWriteMask & 0xf) << (4 * i);
+
+ if (!b_state->blendEnable)
+ continue;
+
+ VkAttachmentDescription *desc =
+ &pipeline->pass->attachments[attachment_idx].desc;
+ const struct v3dv_format *format = v3dX(get_format)(desc->format);
+ bool dst_alpha_one = (format->swizzle[3] == PIPE_SWIZZLE_1);
+
+ uint8_t rt_mask = 1 << i;
+ pipeline->blend.enables |= rt_mask;
+
+ v3dvx_pack(pipeline->blend.cfg[i], BLEND_CFG, config) {
+ config.render_target_mask = rt_mask;
+
+ config.color_blend_mode = b_state->colorBlendOp;
+ config.color_blend_dst_factor =
+ blend_factor(b_state->dstColorBlendFactor, dst_alpha_one,
+ &pipeline->blend.needs_color_constants);
+ config.color_blend_src_factor =
+ blend_factor(b_state->srcColorBlendFactor, dst_alpha_one,
+ &pipeline->blend.needs_color_constants);
+
+ config.alpha_blend_mode = b_state->alphaBlendOp;
+ config.alpha_blend_dst_factor =
+ blend_factor(b_state->dstAlphaBlendFactor, dst_alpha_one,
+ &pipeline->blend.needs_color_constants);
+ config.alpha_blend_src_factor =
+ blend_factor(b_state->srcAlphaBlendFactor, dst_alpha_one,
+ &pipeline->blend.needs_color_constants);
+ }
+ }
+
+ pipeline->blend.color_write_masks = color_write_masks;
+}
+
+/* This requires that pack_blend() had been called before so we can set
+ * the overall blend enable bit in the CFG_BITS packet.
+ */
+static void
+pack_cfg_bits(struct v3dv_pipeline *pipeline,
+ const VkPipelineDepthStencilStateCreateInfo *ds_info,
+ const VkPipelineRasterizationStateCreateInfo *rs_info,
+ const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info,
+ const VkPipelineMultisampleStateCreateInfo *ms_info)
+{
+ assert(sizeof(pipeline->cfg_bits) == cl_packet_length(CFG_BITS));
+
+ pipeline->msaa =
+ ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
+
+ v3dvx_pack(pipeline->cfg_bits, CFG_BITS, config) {
+ config.enable_forward_facing_primitive =
+ rs_info ? !(rs_info->cullMode & VK_CULL_MODE_FRONT_BIT) : false;
+
+ config.enable_reverse_facing_primitive =
+ rs_info ? !(rs_info->cullMode & VK_CULL_MODE_BACK_BIT) : false;
+
+ /* Seems like the hardware is backwards regarding this setting... */
+ config.clockwise_primitives =
+ rs_info ? rs_info->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE : false;
+
+ config.enable_depth_offset = rs_info ? rs_info->depthBiasEnable: false;
+
+ /* This is required to pass line rasterization tests in CTS while
+ * exposing, at least, a minimum of 4-bits of subpixel precision
+ * (the minimum requirement).
+ */
+ config.line_rasterization = 1; /* perp end caps */
+
+ if (rs_info && rs_info->polygonMode != VK_POLYGON_MODE_FILL) {
+ config.direct3d_wireframe_triangles_mode = true;
+ config.direct3d_point_fill_mode =
+ rs_info->polygonMode == VK_POLYGON_MODE_POINT;
+ }
+
+ config.rasterizer_oversample_mode = pipeline->msaa ? 1 : 0;
+
+ /* From the Vulkan spec:
+ *
+ * "Provoking Vertex:
+ *
+ * The vertex in a primitive from which flat shaded attribute
+ * values are taken. This is generally the “first” vertex in the
+ * primitive, and depends on the primitive topology."
+ *
+ * First vertex is the Direct3D style for provoking vertex. OpenGL uses
+ * the last vertex by default.
+ */
+ if (pv_info) {
+ config.direct3d_provoking_vertex =
+ pv_info->provokingVertexMode ==
+ VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT;
+ } else {
+ config.direct3d_provoking_vertex = true;
+ }
+
+ config.blend_enable = pipeline->blend.enables != 0;
+
+ /* Disable depth/stencil if we don't have a D/S attachment */
+ bool has_ds_attachment =
+ pipeline->subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED;
+
+ if (ds_info && ds_info->depthTestEnable && has_ds_attachment) {
+ config.z_updates_enable = ds_info->depthWriteEnable;
+ config.depth_test_function = ds_info->depthCompareOp;
+ } else {
+ config.depth_test_function = VK_COMPARE_OP_ALWAYS;
+ }
+
+ /* EZ state will be updated at draw time based on bound pipeline state */
+ config.early_z_updates_enable = false;
+ config.early_z_enable = false;
+
+ config.stencil_enable =
+ ds_info ? ds_info->stencilTestEnable && has_ds_attachment: false;
+
+ pipeline->z_updates_enable = config.z_updates_enable;
+ };
+}
+
+static uint32_t
+translate_stencil_op(enum pipe_stencil_op op)
+{
+ switch (op) {
+ case VK_STENCIL_OP_KEEP:
+ return V3D_STENCIL_OP_KEEP;
+ case VK_STENCIL_OP_ZERO:
+ return V3D_STENCIL_OP_ZERO;
+ case VK_STENCIL_OP_REPLACE:
+ return V3D_STENCIL_OP_REPLACE;
+ case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
+ return V3D_STENCIL_OP_INCR;
+ case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
+ return V3D_STENCIL_OP_DECR;
+ case VK_STENCIL_OP_INVERT:
+ return V3D_STENCIL_OP_INVERT;
+ case VK_STENCIL_OP_INCREMENT_AND_WRAP:
+ return V3D_STENCIL_OP_INCWRAP;
+ case VK_STENCIL_OP_DECREMENT_AND_WRAP:
+ return V3D_STENCIL_OP_DECWRAP;
+ default:
+ unreachable("bad stencil op");
+ }
+}
+
+static void
+pack_single_stencil_cfg(struct v3dv_pipeline *pipeline,
+ uint8_t *stencil_cfg,
+ bool is_front,
+ bool is_back,
+ const VkStencilOpState *stencil_state)
+{
+ /* From the Vulkan spec:
+ *
+ * "Reference is an integer reference value that is used in the unsigned
+ * stencil comparison. The reference value used by stencil comparison
+ * must be within the range [0,2^s-1] , where s is the number of bits in
+ * the stencil framebuffer attachment, otherwise the reference value is
+ * considered undefined."
+ *
+ * In our case, 's' is always 8, so we clamp to that to prevent our packing
+ * functions to assert in debug mode if they see larger values.
+ *
+ * If we have dynamic state we need to make sure we set the corresponding
+ * state bits to 0, since cl_emit_with_prepacked ORs the new value with
+ * the old.
+ */
+ const uint8_t write_mask =
+ pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK ?
+ 0 : stencil_state->writeMask & 0xff;
+
+ const uint8_t compare_mask =
+ pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
+ 0 : stencil_state->compareMask & 0xff;
+
+ const uint8_t reference =
+ pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
+ 0 : stencil_state->reference & 0xff;
+
+ v3dvx_pack(stencil_cfg, STENCIL_CFG, config) {
+ config.front_config = is_front;
+ config.back_config = is_back;
+ config.stencil_write_mask = write_mask;
+ config.stencil_test_mask = compare_mask;
+ config.stencil_test_function = stencil_state->compareOp;
+ config.stencil_pass_op = translate_stencil_op(stencil_state->passOp);
+ config.depth_test_fail_op = translate_stencil_op(stencil_state->depthFailOp);
+ config.stencil_test_fail_op = translate_stencil_op(stencil_state->failOp);
+ config.stencil_ref_value = reference;
+ }
+}
+
+static void
+pack_stencil_cfg(struct v3dv_pipeline *pipeline,
+ const VkPipelineDepthStencilStateCreateInfo *ds_info)
+{
+ assert(sizeof(pipeline->stencil_cfg) == 2 * cl_packet_length(STENCIL_CFG));
+
+ if (!ds_info || !ds_info->stencilTestEnable)
+ return;
+
+ if (pipeline->subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
+ return;
+
+ const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK |
+ V3DV_DYNAMIC_STENCIL_WRITE_MASK |
+ V3DV_DYNAMIC_STENCIL_REFERENCE;
+
+
+ /* If front != back or we have dynamic stencil state we can't emit a single
+ * packet for both faces.
+ */
+ bool needs_front_and_back = false;
+ if ((pipeline->dynamic_state.mask & dynamic_stencil_states) ||
+ memcmp(&ds_info->front, &ds_info->back, sizeof(ds_info->front)))
+ needs_front_and_back = true;
+
+ /* If the front and back configurations are the same we can emit both with
+ * a single packet.
+ */
+ pipeline->emit_stencil_cfg[0] = true;
+ if (!needs_front_and_back) {
+ pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
+ true, true, &ds_info->front);
+ } else {
+ pipeline->emit_stencil_cfg[1] = true;
+ pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
+ true, false, &ds_info->front);
+ pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[1],
+ false, true, &ds_info->back);
+ }
+}
+
+void
+v3dX(pipeline_pack_state)(struct v3dv_pipeline *pipeline,
+ const VkPipelineColorBlendStateCreateInfo *cb_info,
+ const VkPipelineDepthStencilStateCreateInfo *ds_info,
+ const VkPipelineRasterizationStateCreateInfo *rs_info,
+ const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info,
+ const VkPipelineMultisampleStateCreateInfo *ms_info)
+{
+ pack_blend(pipeline, cb_info);
+ pack_cfg_bits(pipeline, ds_info, rs_info, pv_info, ms_info);
+ pack_stencil_cfg(pipeline, ds_info);
+}
+
+static void
+pack_shader_state_record(struct v3dv_pipeline *pipeline)
+{
+ assert(sizeof(pipeline->shader_state_record) ==
+ cl_packet_length(GL_SHADER_STATE_RECORD));
+
+ struct v3d_fs_prog_data *prog_data_fs =
+ pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs;
+
+ struct v3d_vs_prog_data *prog_data_vs =
+ pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
+
+ struct v3d_vs_prog_data *prog_data_vs_bin =
+ pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs;
+
+
+ /* Note: we are not packing addresses, as we need the job (see
+ * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this
+ * point as they depend on dynamic info that can be set after create the
+ * pipeline (like viewport), . Would need to be filled later, so we are
+ * doing a partial prepacking.
+ */
+ v3dvx_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD, shader) {
+ shader.enable_clipping = true;
+
+ if (!pipeline->has_gs) {
+ shader.point_size_in_shaded_vertex_data =
+ pipeline->topology == PIPE_PRIM_POINTS;
+ } else {
+ struct v3d_gs_prog_data *prog_data_gs =
+ pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]->prog_data.gs;
+ shader.point_size_in_shaded_vertex_data = prog_data_gs->writes_psiz;
+ }
+
+ /* Must be set if the shader modifies Z, discards, or modifies
+ * the sample mask. For any of these cases, the fragment
+ * shader needs to write the Z value (even just discards).
+ */
+ shader.fragment_shader_does_z_writes = prog_data_fs->writes_z;
+ /* Set if the EZ test must be disabled (due to shader side
+ * effects and the early_z flag not being present in the
+ * shader).
+ */
+ shader.turn_off_early_z_test = prog_data_fs->disable_ez;
+
+ shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
+ prog_data_fs->uses_center_w;
+
+ /* The description for gl_SampleID states that if a fragment shader reads
+ * it, then we should automatically activate per-sample shading. However,
+ * the Vulkan spec also states that if a framebuffer has no attachments:
+ *
+ * "The subpass continues to use the width, height, and layers of the
+ * framebuffer to define the dimensions of the rendering area, and the
+ * rasterizationSamples from each pipeline’s
+ * VkPipelineMultisampleStateCreateInfo to define the number of
+ * samples used in rasterization multisample rasterization."
+ *
+ * So in this scenario, if the pipeline doesn't enable multiple samples
+ * but the fragment shader accesses gl_SampleID we would be requested
+ * to do per-sample shading in single sample rasterization mode, which
+ * is pointless, so just disable it in that case.
+ */
+ shader.enable_sample_rate_shading =
+ pipeline->sample_rate_shading ||
+ (pipeline->msaa && prog_data_fs->force_per_sample_msaa);
+
+ shader.any_shader_reads_hardware_written_primitive_id = false;
+
+ shader.do_scoreboard_wait_on_first_thread_switch =
+ prog_data_fs->lock_scoreboard_on_first_thrsw;
+ shader.disable_implicit_point_line_varyings =
+ !prog_data_fs->uses_implicit_point_line_varyings;
+
+ shader.number_of_varyings_in_fragment_shader =
+ prog_data_fs->num_inputs;
+
+ shader.coordinate_shader_propagate_nans = true;
+ shader.vertex_shader_propagate_nans = true;
+ shader.fragment_shader_propagate_nans = true;
+
+ /* Note: see previous note about adresses */
+ /* shader.coordinate_shader_code_address */
+ /* shader.vertex_shader_code_address */
+ /* shader.fragment_shader_code_address */
+
+ /* FIXME: Use combined input/output size flag in the common case (also
+ * on v3d, see v3dx_draw).
+ */
+ shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
+ prog_data_vs_bin->separate_segments;
+ shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
+ prog_data_vs->separate_segments;
+
+ shader.coordinate_shader_input_vpm_segment_size =
+ prog_data_vs_bin->separate_segments ?
+ prog_data_vs_bin->vpm_input_size : 1;
+ shader.vertex_shader_input_vpm_segment_size =
+ prog_data_vs->separate_segments ?
+ prog_data_vs->vpm_input_size : 1;
+
+ shader.coordinate_shader_output_vpm_segment_size =
+ prog_data_vs_bin->vpm_output_size;
+ shader.vertex_shader_output_vpm_segment_size =
+ prog_data_vs->vpm_output_size;
+
+ /* Note: see previous note about adresses */
+ /* shader.coordinate_shader_uniforms_address */
+ /* shader.vertex_shader_uniforms_address */
+ /* shader.fragment_shader_uniforms_address */
+
+ shader.min_coord_shader_input_segments_required_in_play =
+ pipeline->vpm_cfg_bin.As;
+ shader.min_vertex_shader_input_segments_required_in_play =
+ pipeline->vpm_cfg.As;
+
+ shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
+ pipeline->vpm_cfg_bin.Ve;
+ shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
+ pipeline->vpm_cfg.Ve;
+
+ shader.coordinate_shader_4_way_threadable =
+ prog_data_vs_bin->base.threads == 4;
+ shader.vertex_shader_4_way_threadable =
+ prog_data_vs->base.threads == 4;
+ shader.fragment_shader_4_way_threadable =
+ prog_data_fs->base.threads == 4;
+
+ shader.coordinate_shader_start_in_final_thread_section =
+ prog_data_vs_bin->base.single_seg;
+ shader.vertex_shader_start_in_final_thread_section =
+ prog_data_vs->base.single_seg;
+ shader.fragment_shader_start_in_final_thread_section =
+ prog_data_fs->base.single_seg;
+
+ shader.vertex_id_read_by_coordinate_shader =
+ prog_data_vs_bin->uses_vid;
+ shader.base_instance_id_read_by_coordinate_shader =
+ prog_data_vs_bin->uses_biid;
+ shader.instance_id_read_by_coordinate_shader =
+ prog_data_vs_bin->uses_iid;
+ shader.vertex_id_read_by_vertex_shader =
+ prog_data_vs->uses_vid;
+ shader.base_instance_id_read_by_vertex_shader =
+ prog_data_vs->uses_biid;
+ shader.instance_id_read_by_vertex_shader =
+ prog_data_vs->uses_iid;
+
+ /* Note: see previous note about adresses */
+ /* shader.address_of_default_attribute_values */
+ }
+}
+
+static void
+pack_vcm_cache_size(struct v3dv_pipeline *pipeline)
+{
+ assert(sizeof(pipeline->vcm_cache_size) ==
+ cl_packet_length(VCM_CACHE_SIZE));
+
+ v3dvx_pack(pipeline->vcm_cache_size, VCM_CACHE_SIZE, vcm) {
+ vcm.number_of_16_vertex_batches_for_binning = pipeline->vpm_cfg_bin.Vc;
+ vcm.number_of_16_vertex_batches_for_rendering = pipeline->vpm_cfg.Vc;
+ }
+}
+
+/* As defined on the GL_SHADER_STATE_ATTRIBUTE_RECORD */
+static uint8_t
+get_attr_type(const struct util_format_description *desc)
+{
+ uint32_t r_size = desc->channel[0].size;
+ uint8_t attr_type = ATTRIBUTE_FLOAT;
+
+ switch (desc->channel[0].type) {
+ case UTIL_FORMAT_TYPE_FLOAT:
+ if (r_size == 32) {
+ attr_type = ATTRIBUTE_FLOAT;
+ } else {
+ assert(r_size == 16);
+ attr_type = ATTRIBUTE_HALF_FLOAT;
+ }
+ break;
+
+ case UTIL_FORMAT_TYPE_SIGNED:
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ switch (r_size) {
+ case 32:
+ attr_type = ATTRIBUTE_INT;
+ break;
+ case 16:
+ attr_type = ATTRIBUTE_SHORT;
+ break;
+ case 10:
+ attr_type = ATTRIBUTE_INT2_10_10_10;
+ break;
+ case 8:
+ attr_type = ATTRIBUTE_BYTE;
+ break;
+ default:
+ fprintf(stderr,
+ "format %s unsupported\n",
+ desc->name);
+ attr_type = ATTRIBUTE_BYTE;
+ abort();
+ }
+ break;
+
+ default:
+ fprintf(stderr,
+ "format %s unsupported\n",
+ desc->name);
+ abort();
+ }
+
+ return attr_type;
+}
+
+static void
+pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline,
+ uint32_t index,
+ const VkVertexInputAttributeDescription *vi_desc)
+{
+ const uint32_t packet_length =
+ cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
+
+ const struct util_format_description *desc =
+ vk_format_description(vi_desc->format);
+
+ uint32_t binding = vi_desc->binding;
+
+ v3dvx_pack(&pipeline->vertex_attrs[index * packet_length],
+ GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
+
+ /* vec_size == 0 means 4 */
+ attr.vec_size = desc->nr_channels & 3;
+ attr.signed_int_type = (desc->channel[0].type ==
+ UTIL_FORMAT_TYPE_SIGNED);
+ attr.normalized_int_type = desc->channel[0].normalized;
+ attr.read_as_int_uint = desc->channel[0].pure_integer;
+
+ attr.instance_divisor = MIN2(pipeline->vb[binding].instance_divisor,
+ 0xffff);
+ attr.stride = pipeline->vb[binding].stride;
+ attr.type = get_attr_type(desc);
+ }
+}
+
+void
+v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline,
+ const VkPipelineVertexInputStateCreateInfo *vi_info,
+ const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info)
+{
+ pack_shader_state_record(pipeline);
+ pack_vcm_cache_size(pipeline);
+
+ pipeline->vb_count = vi_info->vertexBindingDescriptionCount;
+ for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
+ const VkVertexInputBindingDescription *desc =
+ &vi_info->pVertexBindingDescriptions[i];
+
+ pipeline->vb[desc->binding].stride = desc->stride;
+ pipeline->vb[desc->binding].instance_divisor = desc->inputRate;
+ }
+
+ if (vd_info) {
+ for (uint32_t i = 0; i < vd_info->vertexBindingDivisorCount; i++) {
+ const VkVertexInputBindingDivisorDescriptionEXT *desc =
+ &vd_info->pVertexBindingDivisors[i];
+
+ pipeline->vb[desc->binding].instance_divisor = desc->divisor;
+ }
+ }
+
+ pipeline->va_count = 0;
+ struct v3d_vs_prog_data *prog_data_vs =
+ pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
+
+ for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
+ const VkVertexInputAttributeDescription *desc =
+ &vi_info->pVertexAttributeDescriptions[i];
+ uint32_t location = desc->location + VERT_ATTRIB_GENERIC0;
+
+ /* We use a custom driver_location_map instead of
+ * nir_find_variable_with_location because if we were able to get the
+ * shader variant from the cache, we would not have the nir shader
+ * available.
+ */
+ uint32_t driver_location =
+ prog_data_vs->driver_location_map[location];
+
+ if (driver_location != -1) {
+ assert(driver_location < MAX_VERTEX_ATTRIBS);
+ pipeline->va[driver_location].offset = desc->offset;
+ pipeline->va[driver_location].binding = desc->binding;
+ pipeline->va[driver_location].vk_format = desc->format;
+
+ pack_shader_state_attribute_record(pipeline, driver_location, desc);
+
+ pipeline->va_count++;
+ }
+ }
+}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_private.h b/lib/mesa/src/broadcom/vulkan/v3dvx_private.h
new file mode 100644
index 000000000..ab134225a
--- /dev/null
+++ b/lib/mesa/src/broadcom/vulkan/v3dvx_private.h
@@ -0,0 +1,314 @@
+/*
+ * Copyright © 2021 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/* This file generates the per-v3d-version function prototypes. It must only
+ * be included from v3dv_private.h.
+ */
+
+#ifndef V3DV_PRIVATE_H
+#error This file is included by means other than v3dv_private.h
+#endif
+
+/* Used at v3dv_cmd_buffer */
+void
+v3dX(job_emit_binning_flush)(struct v3dv_job *job);
+
+void
+v3dX(cmd_buffer_emit_color_write_mask)(struct v3dv_cmd_buffer *cmd_buffer);
+
+void
+v3dX(cmd_buffer_end_render_pass_secondary)(struct v3dv_cmd_buffer *cmd_buffer);
+
+void
+v3dX(job_emit_clip_window)(struct v3dv_job *job, const VkRect2D *rect);
+
+void
+v3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer);
+
+void
+v3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer);
+
+void
+v3dX(cmd_buffer_emit_stencil)(struct v3dv_cmd_buffer *cmd_buffer);
+
+void
+v3dX(cmd_buffer_emit_depth_bias)(struct v3dv_cmd_buffer *cmd_buffer);
+
+void
+v3dX(cmd_buffer_emit_line_width)(struct v3dv_cmd_buffer *cmd_buffer);
+
+void
+v3dX(cmd_buffer_emit_sample_state)(struct v3dv_cmd_buffer *cmd_buffer);
+
+void
+v3dX(cmd_buffer_emit_blend)(struct v3dv_cmd_buffer *cmd_buffer);
+
+void
+v3dX(cmd_buffer_emit_varyings_state)(struct v3dv_cmd_buffer *cmd_buffer);
+
+void
+v3dX(cmd_buffer_emit_configuration_bits)(struct v3dv_cmd_buffer *cmd_buffer);
+
+void
+v3dX(job_emit_binning_prolog)(struct v3dv_job *job,
+ const struct v3dv_frame_tiling *tiling,
+ uint32_t layers);
+
+void
+v3dX(cmd_buffer_execute_inside_pass)(struct v3dv_cmd_buffer *primary,
+ uint32_t cmd_buffer_count,
+ const VkCommandBuffer *cmd_buffers);
+
+void
+v3dX(cmd_buffer_emit_occlusion_query)(struct v3dv_cmd_buffer *cmd_buffer);
+
+void
+v3dX(cmd_buffer_emit_gl_shader_state)(struct v3dv_cmd_buffer *cmd_buffer);
+
+
+void
+v3dX(cmd_buffer_emit_draw)(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_draw_info *info);
+
+
+void
+v3dX(cmd_buffer_emit_index_buffer)(struct v3dv_cmd_buffer *cmd_buffer);
+
+void
+v3dX(cmd_buffer_emit_draw_indexed)(struct v3dv_cmd_buffer *cmd_buffer,
+ uint32_t indexCount,
+ uint32_t instanceCount,
+ uint32_t firstIndex,
+ int32_t vertexOffset,
+ uint32_t firstInstance);
+
+void
+v3dX(cmd_buffer_emit_draw_indirect)(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_buffer *buffer,
+ VkDeviceSize offset,
+ uint32_t drawCount,
+ uint32_t stride);
+
+void
+v3dX(cmd_buffer_emit_indexed_indirect)(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_buffer *buffer,
+ VkDeviceSize offset,
+ uint32_t drawCount,
+ uint32_t stride);
+
+void
+v3dX(get_hw_clear_color)(const VkClearColorValue *color,
+ uint32_t internal_type,
+ uint32_t internal_size,
+ uint32_t *hw_color);
+
+void
+v3dX(cmd_buffer_render_pass_setup_render_target)(struct v3dv_cmd_buffer *cmd_buffer,
+ int rt,
+ uint32_t *rt_bpp,
+ uint32_t *rt_type,
+ uint32_t *rt_clamp);
+
+/* Used at v3dv_device */
+
+void
+v3dX(pack_sampler_state)(struct v3dv_sampler *sampler,
+ const VkSamplerCreateInfo *pCreateInfo,
+ const VkSamplerCustomBorderColorCreateInfoEXT *bc_info);
+
+void
+v3dX(framebuffer_compute_internal_bpp_msaa)(const struct v3dv_framebuffer *framebuffer,
+ const struct v3dv_subpass *subpass,
+ uint8_t *max_bpp, bool *msaa);
+
+#ifdef DEBUG
+void
+v3dX(device_check_prepacked_sizes)(void);
+#endif
+
+/* Used at v3dv_format */
+const struct v3dv_format *
+v3dX(get_format)(VkFormat);
+
+void
+v3dX(get_internal_type_bpp_for_output_format)(uint32_t format,
+ uint32_t *type,
+ uint32_t *bpp);
+
+bool
+v3dX(format_supports_tlb_resolve)(const struct v3dv_format *format);
+
+bool
+v3dX(format_supports_blending)(const struct v3dv_format *format);
+
+bool
+v3dX(tfu_supports_tex_format)(uint32_t tex_format);
+
+/* Used at v3dv_image */
+
+void
+v3dX(pack_texture_shader_state)(struct v3dv_device *device,
+ struct v3dv_image_view *iview);
+
+void
+v3dX(pack_texture_shader_state_from_buffer_view)(struct v3dv_device *device,
+ struct v3dv_buffer_view *buffer_view);
+
+/* Used at v3dv_meta_* */
+
+uint32_t
+v3dX(zs_buffer_from_aspect_bits)(VkImageAspectFlags aspects);
+
+uint8_t
+v3dX(get_internal_depth_type)(VkFormat format);
+
+struct v3dv_meta_framebuffer;
+
+void
+v3dX(meta_emit_copy_image_to_buffer_rcl)(struct v3dv_job *job,
+ struct v3dv_buffer *buffer,
+ struct v3dv_image *image,
+ struct v3dv_meta_framebuffer *framebuffer,
+ const VkBufferImageCopy2KHR *region);
+
+void
+v3dX(meta_emit_resolve_image_rcl)(struct v3dv_job *job,
+ struct v3dv_image *dst,
+ struct v3dv_image *src,
+ struct v3dv_meta_framebuffer *framebuffer,
+ const VkImageResolve2KHR *region);
+
+void
+v3dX(meta_emit_copy_buffer)(struct v3dv_job *job,
+ struct v3dv_bo *dst,
+ struct v3dv_bo *src,
+ uint32_t dst_offset,
+ uint32_t src_offset,
+ struct v3dv_meta_framebuffer *framebuffer,
+ uint32_t format,
+ uint32_t item_size);
+
+void
+v3dX(meta_emit_copy_buffer_rcl)(struct v3dv_job *job,
+ struct v3dv_bo *dst,
+ struct v3dv_bo *src,
+ uint32_t dst_offset,
+ uint32_t src_offset,
+ struct v3dv_meta_framebuffer *framebuffer,
+ uint32_t format,
+ uint32_t item_size);
+
+void
+v3dX(meta_emit_copy_image_rcl)(struct v3dv_job *job,
+ struct v3dv_image *dst,
+ struct v3dv_image *src,
+ struct v3dv_meta_framebuffer *framebuffer,
+ const VkImageCopy2KHR *region);
+
+void
+v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_image *dst,
+ uint32_t dst_mip_level,
+ uint32_t dst_layer,
+ struct v3dv_image *src,
+ uint32_t src_mip_level,
+ uint32_t src_layer,
+ uint32_t width,
+ uint32_t height,
+ const struct v3dv_format *format);
+
+void
+v3dX(meta_emit_clear_image_rcl)(struct v3dv_job *job,
+ struct v3dv_image *image,
+ struct v3dv_meta_framebuffer *framebuffer,
+ const union v3dv_clear_value *clear_value,
+ VkImageAspectFlags aspects,
+ uint32_t min_layer,
+ uint32_t max_layer,
+ uint32_t level);
+
+void
+v3dX(meta_emit_fill_buffer_rcl)(struct v3dv_job *job,
+ struct v3dv_bo *bo,
+ uint32_t offset,
+ struct v3dv_meta_framebuffer *framebuffer,
+ uint32_t data);
+
+void
+v3dX(meta_emit_copy_buffer_to_image_rcl)(struct v3dv_job *job,
+ struct v3dv_image *image,
+ struct v3dv_buffer *buffer,
+ struct v3dv_meta_framebuffer *framebuffer,
+ const VkBufferImageCopy2KHR *region);
+
+void
+v3dX(get_internal_type_bpp_for_image_aspects)(VkFormat vk_format,
+ VkImageAspectFlags aspect_mask,
+ uint32_t *internal_type,
+ uint32_t *internal_bpp);
+
+struct v3dv_job *
+v3dX(meta_copy_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_bo *dst,
+ uint32_t dst_offset,
+ struct v3dv_bo *src,
+ uint32_t src_offset,
+ const VkBufferCopy2KHR *region);
+
+void
+v3dX(meta_fill_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_bo *bo,
+ uint32_t offset,
+ uint32_t size,
+ uint32_t data);
+
+void
+v3dX(meta_framebuffer_init)(struct v3dv_meta_framebuffer *fb,
+ VkFormat vk_format,
+ uint32_t internal_type,
+ const struct v3dv_frame_tiling *tiling);
+
+/* Used at v3dv_pipeline */
+void
+v3dX(pipeline_pack_state)(struct v3dv_pipeline *pipeline,
+ const VkPipelineColorBlendStateCreateInfo *cb_info,
+ const VkPipelineDepthStencilStateCreateInfo *ds_info,
+ const VkPipelineRasterizationStateCreateInfo *rs_info,
+ const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info,
+ const VkPipelineMultisampleStateCreateInfo *ms_info);
+void
+v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline,
+ const VkPipelineVertexInputStateCreateInfo *vi_info,
+ const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info);
+/* Used at v3dv_queue */
+void
+v3dX(job_emit_noop)(struct v3dv_job *job);
+
+/* Used at v3dv_descriptor_set, and other descriptor set utils */
+uint32_t v3dX(descriptor_bo_size)(VkDescriptorType type);
+
+uint32_t v3dX(max_descriptor_bo_size)(void);
+
+uint32_t v3dX(combined_image_sampler_texture_state_offset)(void);
+
+uint32_t v3dX(combined_image_sampler_sampler_state_offset)(void);
diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_queue.c b/lib/mesa/src/broadcom/vulkan/v3dvx_queue.c
new file mode 100644
index 000000000..38f9efbfa
--- /dev/null
+++ b/lib/mesa/src/broadcom/vulkan/v3dvx_queue.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright © 2021 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "v3dv_private.h"
+#include "broadcom/common/v3d_macros.h"
+#include "broadcom/cle/v3dx_pack.h"
+#include "broadcom/compiler/v3d_compiler.h"
+
+void
+v3dX(job_emit_noop)(struct v3dv_job *job)
+{
+ v3dv_job_start_frame(job, 1, 1, 1, true, 1, V3D_INTERNAL_BPP_32, false);
+ v3dX(job_emit_binning_flush)(job);
+
+ struct v3dv_cl *rcl = &job->rcl;
+ v3dv_cl_ensure_space_with_branch(rcl, 200 + 1 * 256 *
+ cl_packet_length(SUPERTILE_COORDINATES));
+
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
+ config.early_z_disable = true;
+ config.image_width_pixels = 1;
+ config.image_height_pixels = 1;
+ config.number_of_render_targets = 1;
+ config.multisample_mode_4x = false;
+ config.maximum_bpp_of_all_render_targets = V3D_INTERNAL_BPP_32;
+ }
+
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
+ rt.render_target_0_internal_bpp = V3D_INTERNAL_BPP_32;
+ rt.render_target_0_internal_type = V3D_INTERNAL_TYPE_8;
+ rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
+ }
+
+ cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
+ clear.z_clear_value = 1.0f;
+ clear.stencil_clear_value = 0;
+ };
+
+ cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
+ init.use_auto_chained_tile_lists = true;
+ init.size_of_first_block_in_chained_tile_lists =
+ TILE_ALLOCATION_BLOCK_SIZE_64B;
+ }
+
+ cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
+ list.address = v3dv_cl_address(job->tile_alloc, 0);
+ }
+
+ cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
+ config.number_of_bin_tile_lists = 1;
+ config.total_frame_width_in_tiles = 1;
+ config.total_frame_height_in_tiles = 1;
+ config.supertile_width_in_tiles = 1;
+ config.supertile_height_in_tiles = 1;
+ config.total_frame_width_in_supertiles = 1;
+ config.total_frame_height_in_supertiles = 1;
+ }
+
+ struct v3dv_cl *icl = &job->indirect;
+ v3dv_cl_ensure_space(icl, 200, 1);
+ struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(icl);
+
+ cl_emit(icl, TILE_COORDINATES_IMPLICIT, coords);
+
+ cl_emit(icl, END_OF_LOADS, end);
+
+ cl_emit(icl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
+
+ cl_emit(icl, STORE_TILE_BUFFER_GENERAL, store) {
+ store.buffer_to_store = NONE;
+ }
+
+ cl_emit(icl, END_OF_TILE_MARKER, end);
+
+ cl_emit(icl, RETURN_FROM_SUB_LIST, ret);
+
+ cl_emit(rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
+ branch.start = tile_list_start;
+ branch.end = v3dv_cl_get_address(icl);
+ }
+
+ cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
+ coords.column_number_in_supertiles = 0;
+ coords.row_number_in_supertiles = 0;
+ }
+
+ cl_emit(rcl, END_OF_RENDERING, end);
+}
diff --git a/lib/mesa/src/broadcom/vulkan/vk_format_info.h b/lib/mesa/src/broadcom/vulkan/vk_format_info.h
index 3490ededf..da85cb5b5 100644
--- a/lib/mesa/src/broadcom/vulkan/vk_format_info.h
+++ b/lib/mesa/src/broadcom/vulkan/vk_format_info.h
@@ -50,6 +50,24 @@ vk_format_is_uint(VkFormat format)
}
static inline bool
+vk_format_is_unorm(VkFormat format)
+{
+ return util_format_is_unorm(vk_format_to_pipe_format(format));
+}
+
+static inline bool
+vk_format_is_snorm(VkFormat format)
+{
+ return util_format_is_snorm(vk_format_to_pipe_format(format));
+}
+
+static inline bool
+vk_format_is_float(VkFormat format)
+{
+ return util_format_is_float(vk_format_to_pipe_format(format));
+}
+
+static inline bool
vk_format_is_srgb(VkFormat format)
{
return util_format_is_srgb(vk_format_to_pipe_format(format));