summaryrefslogtreecommitdiff
path: root/lib/mesa/src/amd
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2019-01-29 11:08:07 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2019-01-29 11:08:07 +0000
commit6b139c2063623e9310025247cd966490b9aa57ea (patch)
tree375acfd898ca3d721250aa17291bbb90a8d7250a /lib/mesa/src/amd
parentcce99579dcfb1d54c54cff65573be3430e77f2c5 (diff)
Import Mesa 18.3.2
Diffstat (limited to 'lib/mesa/src/amd')
-rw-r--r--lib/mesa/src/amd/addrlib/amdgpu_asic_addr.h138
-rw-r--r--lib/mesa/src/amd/addrlib/inc/chip/r800/si_gb_reg.h8
-rw-r--r--lib/mesa/src/amd/addrlib/meson.build63
-rw-r--r--lib/mesa/src/amd/common/ac_shader_abi.h103
-rw-r--r--lib/mesa/src/amd/common/ac_shader_util.c179
-rw-r--r--lib/mesa/src/amd/common/ac_shader_util.h48
-rw-r--r--lib/mesa/src/amd/common/meson.build63
-rw-r--r--lib/mesa/src/amd/meson.build27
-rw-r--r--lib/mesa/src/amd/vulkan/meson.build178
-rw-r--r--lib/mesa/src/amd/vulkan/radv_android.c379
-rw-r--r--lib/mesa/src/amd/vulkan/radv_debug.c122
-rw-r--r--lib/mesa/src/amd/vulkan/radv_extensions.c500
-rw-r--r--lib/mesa/src/amd/vulkan/radv_extensions.h127
-rw-r--r--lib/mesa/src/amd/vulkan/radv_extensions.py214
-rw-r--r--lib/mesa/src/amd/vulkan/radv_icd.py47
-rw-r--r--lib/mesa/src/amd/vulkan/radv_llvm_helper.cpp140
-rw-r--r--lib/mesa/src/amd/vulkan/radv_nir_to_llvm.c3968
-rw-r--r--lib/mesa/src/amd/vulkan/radv_pass.c211
-rw-r--r--lib/mesa/src/amd/vulkan/radv_shader.c581
-rw-r--r--lib/mesa/src/amd/vulkan/radv_shader.h318
-rw-r--r--lib/mesa/src/amd/vulkan/radv_shader_helper.h44
-rw-r--r--lib/mesa/src/amd/vulkan/radv_shader_info.c532
-rw-r--r--lib/mesa/src/amd/vulkan/radv_wsi_display.c354
-rw-r--r--lib/mesa/src/amd/vulkan/vk_format_layout.csv20
-rw-r--r--lib/mesa/src/amd/vulkan/vk_format_table.c576
25 files changed, 8095 insertions, 845 deletions
diff --git a/lib/mesa/src/amd/addrlib/amdgpu_asic_addr.h b/lib/mesa/src/amd/addrlib/amdgpu_asic_addr.h
new file mode 100644
index 000000000..7436c5493
--- /dev/null
+++ b/lib/mesa/src/amd/addrlib/amdgpu_asic_addr.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright © 2017 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+#ifndef _AMDGPU_ASIC_ADDR_H
+#define _AMDGPU_ASIC_ADDR_H
+
+#define ATI_VENDOR_ID 0x1002
+#define AMD_VENDOR_ID 0x1022
+
+// AMDGPU_VENDOR_IS_AMD(vendorId)
+#define AMDGPU_VENDOR_IS_AMD(v) ((v == ATI_VENDOR_ID) || (v == AMD_VENDOR_ID))
+
+#define FAMILY_UNKNOWN 0x00
+#define FAMILY_TN 0x69
+#define FAMILY_SI 0x6E
+#define FAMILY_CI 0x78
+#define FAMILY_KV 0x7D
+#define FAMILY_VI 0x82
+#define FAMILY_POLARIS 0x82
+#define FAMILY_CZ 0x87
+#define FAMILY_AI 0x8D
+#define FAMILY_RV 0x8E
+
+// AMDGPU_FAMILY_IS(familyId, familyName)
+#define FAMILY_IS(f, fn) (f == FAMILY_##fn)
+#define FAMILY_IS_TN(f) FAMILY_IS(f, TN)
+#define FAMILY_IS_SI(f) FAMILY_IS(f, SI)
+#define FAMILY_IS_CI(f) FAMILY_IS(f, CI)
+#define FAMILY_IS_KV(f) FAMILY_IS(f, KV)
+#define FAMILY_IS_VI(f) FAMILY_IS(f, VI)
+#define FAMILY_IS_POLARIS(f) FAMILY_IS(f, POLARIS)
+#define FAMILY_IS_CZ(f) FAMILY_IS(f, CZ)
+#define FAMILY_IS_AI(f) FAMILY_IS(f, AI)
+#define FAMILY_IS_RV(f) FAMILY_IS(f, RV)
+
+#define AMDGPU_UNKNOWN 0xFF
+
+#define AMDGPU_TAHITI_RANGE 0x05, 0x14
+#define AMDGPU_PITCAIRN_RANGE 0x15, 0x28
+#define AMDGPU_CAPEVERDE_RANGE 0x29, 0x3C
+#define AMDGPU_OLAND_RANGE 0x3C, 0x46
+#define AMDGPU_HAINAN_RANGE 0x46, 0xFF
+
+#define AMDGPU_BONAIRE_RANGE 0x14, 0x28
+#define AMDGPU_HAWAII_RANGE 0x28, 0x3C
+
+#define AMDGPU_SPECTRE_RANGE 0x01, 0x41
+#define AMDGPU_SPOOKY_RANGE 0x41, 0x81
+#define AMDGPU_KALINDI_RANGE 0x81, 0xA1
+#define AMDGPU_GODAVARI_RANGE 0xA1, 0xFF
+
+#define AMDGPU_ICELAND_RANGE 0x01, 0x14
+#define AMDGPU_TONGA_RANGE 0x14, 0x28
+#define AMDGPU_FIJI_RANGE 0x3C, 0x50
+
+#define AMDGPU_POLARIS10_RANGE 0x50, 0x5A
+#define AMDGPU_POLARIS11_RANGE 0x5A, 0x64
+#define AMDGPU_POLARIS12_RANGE 0x64, 0x6E
+#define AMDGPU_VEGAM_RANGE 0x6E, 0xFF
+
+#define AMDGPU_CARRIZO_RANGE 0x01, 0x21
+#define AMDGPU_BRISTOL_RANGE 0x10, 0x21
+#define AMDGPU_STONEY_RANGE 0x61, 0xFF
+
+#define AMDGPU_VEGA10_RANGE 0x01, 0x14
+#define AMDGPU_VEGA12_RANGE 0x14, 0x28
+#define AMDGPU_VEGA20_RANGE 0x28, 0xFF
+
+#define AMDGPU_RAVEN_RANGE 0x01, 0x81
+#define AMDGPU_RAVEN2_RANGE 0x81, 0xFF
+
+#define AMDGPU_EXPAND_FIX(x) x
+#define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max))
+#define AMDGPU_IN_RANGE(val, ...) AMDGPU_EXPAND_FIX(AMDGPU_RANGE_HELPER(val, __VA_ARGS__))
+
+
+// ASICREV_IS(eRevisionId, revisionName)
+#define ASICREV_IS(r, rn) AMDGPU_IN_RANGE(r, AMDGPU_##rn##_RANGE)
+#define ASICREV_IS_TAHITI_P(r) ASICREV_IS(r, TAHITI)
+#define ASICREV_IS_PITCAIRN_PM(r) ASICREV_IS(r, PITCAIRN)
+#define ASICREV_IS_CAPEVERDE_M(r) ASICREV_IS(r, CAPEVERDE)
+#define ASICREV_IS_OLAND_M(r) ASICREV_IS(r, OLAND)
+#define ASICREV_IS_HAINAN_V(r) ASICREV_IS(r, HAINAN)
+
+#define ASICREV_IS_BONAIRE_M(r) ASICREV_IS(r, BONAIRE)
+#define ASICREV_IS_HAWAII_P(r) ASICREV_IS(r, HAWAII)
+
+#define ASICREV_IS_SPECTRE(r) ASICREV_IS(r, SPECTRE)
+#define ASICREV_IS_SPOOKY(r) ASICREV_IS(r, SPOOKY)
+#define ASICREV_IS_KALINDI(r) ASICREV_IS(r, KALINDI)
+#define ASICREV_IS_KALINDI_GODAVARI(r) ASICREV_IS(r, GODAVARI)
+
+#define ASICREV_IS_ICELAND_M(r) ASICREV_IS(r, ICELAND)
+#define ASICREV_IS_TONGA_P(r) ASICREV_IS(r, TONGA)
+#define ASICREV_IS_FIJI_P(r) ASICREV_IS(r, FIJI)
+
+#define ASICREV_IS_POLARIS10_P(r) ASICREV_IS(r, POLARIS10)
+#define ASICREV_IS_POLARIS11_M(r) ASICREV_IS(r, POLARIS11)
+#define ASICREV_IS_POLARIS12_V(r) ASICREV_IS(r, POLARIS12)
+#define ASICREV_IS_VEGAM_P(r) ASICREV_IS(r, VEGAM)
+
+#define ASICREV_IS_CARRIZO(r) ASICREV_IS(r, CARRIZO)
+#define ASICREV_IS_CARRIZO_BRISTOL(r) ASICREV_IS(r, BRISTOL)
+#define ASICREV_IS_STONEY(r) ASICREV_IS(r, STONEY)
+
+#define ASICREV_IS_VEGA10_M(r) ASICREV_IS(r, VEGA10)
+#define ASICREV_IS_VEGA10_P(r) ASICREV_IS(r, VEGA10)
+#define ASICREV_IS_VEGA12_P(r) ASICREV_IS(r, VEGA12)
+#define ASICREV_IS_VEGA12_p(r) ASICREV_IS(r, VEGA12)
+#define ASICREV_IS_VEGA20_P(r) ASICREV_IS(r, VEGA20)
+
+#define ASICREV_IS_RAVEN(r) ASICREV_IS(r, RAVEN)
+#define ASICREV_IS_RAVEN2(r) ASICREV_IS(r, RAVEN2)
+
+#endif // _AMDGPU_ASIC_ADDR_H
diff --git a/lib/mesa/src/amd/addrlib/inc/chip/r800/si_gb_reg.h b/lib/mesa/src/amd/addrlib/inc/chip/r800/si_gb_reg.h
index cf67f602b..793edbc62 100644
--- a/lib/mesa/src/amd/addrlib/inc/chip/r800/si_gb_reg.h
+++ b/lib/mesa/src/amd/addrlib/inc/chip/r800/si_gb_reg.h
@@ -27,6 +27,14 @@
* of the Software.
*/
+#include "util/u_endian.h"
+
+#if defined(PIPE_ARCH_LITTLE_ENDIAN)
+#define LITTLEENDIAN_CPU
+#elif defined(PIPE_ARCH_BIG_ENDIAN)
+#define BIGENDIAN_CPU
+#endif
+
//
// Make sure the necessary endian defines are there.
//
diff --git a/lib/mesa/src/amd/addrlib/meson.build b/lib/mesa/src/amd/addrlib/meson.build
new file mode 100644
index 000000000..b9550afd2
--- /dev/null
+++ b/lib/mesa/src/amd/addrlib/meson.build
@@ -0,0 +1,63 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+files_addrlib = files(
+ 'addrinterface.cpp',
+ 'addrinterface.h',
+ 'addrtypes.h',
+ 'core/addrcommon.h',
+ 'core/addrelemlib.cpp',
+ 'core/addrelemlib.h',
+ 'core/addrlib.cpp',
+ 'core/addrlib.h',
+ 'core/addrlib1.cpp',
+ 'core/addrlib1.h',
+ 'core/addrlib2.cpp',
+ 'core/addrlib2.h',
+ 'core/addrobject.cpp',
+ 'core/addrobject.h',
+ 'gfx9/chip/gfx9_enum.h',
+ 'gfx9/coord.cpp',
+ 'gfx9/coord.h',
+ 'gfx9/gfx9addrlib.cpp',
+ 'gfx9/gfx9addrlib.h',
+ 'amdgpu_asic_addr.h',
+ 'inc/chip/gfx9/gfx9_gb_reg.h',
+ 'inc/chip/r800/si_gb_reg.h',
+ 'r800/chip/si_ci_vi_merged_enum.h',
+ 'r800/ciaddrlib.cpp',
+ 'r800/ciaddrlib.h',
+ 'r800/egbaddrlib.cpp',
+ 'r800/egbaddrlib.h',
+ 'r800/siaddrlib.cpp',
+ 'r800/siaddrlib.h',
+)
+
+libamdgpu_addrlib = static_library(
+ 'addrlib',
+ files_addrlib,
+ include_directories : [
+ include_directories(
+ 'core', 'inc/chip/gfx9', 'inc/chip/r800', 'gfx9/chip', 'r800/chip',
+ ),
+ inc_amd_common, inc_common, inc_src,
+ ],
+ cpp_args : cpp_vis_args,
+)
diff --git a/lib/mesa/src/amd/common/ac_shader_abi.h b/lib/mesa/src/amd/common/ac_shader_abi.h
index b04dc076d..ee18e6c19 100644
--- a/lib/mesa/src/amd/common/ac_shader_abi.h
+++ b/lib/mesa/src/amd/common/ac_shader_abi.h
@@ -26,6 +26,12 @@
#include <llvm-c/Core.h>
+#include "compiler/shader_enums.h"
+
+struct nir_variable;
+
+#define AC_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
+
enum ac_descriptor_type {
AC_DESC_IMAGE,
AC_DESC_FMASK,
@@ -42,10 +48,27 @@ struct ac_shader_abi {
LLVMValueRef draw_id;
LLVMValueRef vertex_id;
LLVMValueRef instance_id;
+ LLVMValueRef tcs_patch_id;
+ LLVMValueRef tcs_rel_ids;
+ LLVMValueRef tes_patch_id;
+ LLVMValueRef gs_prim_id;
+ LLVMValueRef gs_invocation_id;
LLVMValueRef frag_pos[4];
LLVMValueRef front_face;
LLVMValueRef ancillary;
LLVMValueRef sample_coverage;
+ LLVMValueRef prim_mask;
+ /* CS */
+ LLVMValueRef local_invocation_ids;
+ LLVMValueRef num_work_groups;
+ LLVMValueRef workgroup_ids[3];
+ LLVMValueRef tg_size;
+
+ /* Vulkan only */
+ LLVMValueRef push_constants;
+ LLVMValueRef view_index;
+
+ LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4];
/* For VS and PS: pre-loaded shader inputs.
*
@@ -54,10 +77,60 @@ struct ac_shader_abi {
*/
LLVMValueRef *inputs;
+ /* Varying -> attribute number mapping. Also NIR-only */
+ unsigned fs_input_attr_indices[MAX_VARYING];
+
void (*emit_outputs)(struct ac_shader_abi *abi,
unsigned max_outputs,
LLVMValueRef *addrs);
+ void (*emit_vertex)(struct ac_shader_abi *abi,
+ unsigned stream,
+ LLVMValueRef *addrs);
+
+ void (*emit_primitive)(struct ac_shader_abi *abi,
+ unsigned stream);
+
+ void (*emit_kill)(struct ac_shader_abi *abi, LLVMValueRef visible);
+
+ LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
+ unsigned location,
+ unsigned driver_location,
+ unsigned component,
+ unsigned num_components,
+ unsigned vertex_index,
+ unsigned const_index,
+ LLVMTypeRef type);
+
+ LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi,
+ LLVMTypeRef type,
+ LLVMValueRef vertex_index,
+ LLVMValueRef param_index,
+ unsigned const_index,
+ unsigned location,
+ unsigned driver_location,
+ unsigned component,
+ unsigned num_components,
+ bool is_patch,
+ bool is_compact,
+ bool load_inputs);
+
+ void (*store_tcs_outputs)(struct ac_shader_abi *abi,
+ const struct nir_variable *var,
+ LLVMValueRef vertex_index,
+ LLVMValueRef param_index,
+ unsigned const_index,
+ LLVMValueRef src,
+ unsigned writemask);
+
+ LLVMValueRef (*load_tess_coord)(struct ac_shader_abi *abi);
+
+ LLVMValueRef (*load_patch_vertices_in)(struct ac_shader_abi *abi);
+
+ LLVMValueRef (*load_tess_level)(struct ac_shader_abi *abi,
+ unsigned varying_id);
+
+
LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
/**
@@ -87,14 +160,40 @@ struct ac_shader_abi {
unsigned constant_index,
LLVMValueRef index,
enum ac_descriptor_type desc_type,
- bool image, bool write);
+ bool image, bool write,
+ bool bindless);
+
+ /**
+ * Load a Vulkan-specific resource.
+ *
+ * \param index resource index
+ * \param desc_set descriptor set
+ * \param binding descriptor set binding
+ */
+ LLVMValueRef (*load_resource)(struct ac_shader_abi *abi,
+ LLVMValueRef index,
+ unsigned desc_set,
+ unsigned binding);
+
+ LLVMValueRef (*lookup_interp_param)(struct ac_shader_abi *abi,
+ enum glsl_interp_mode interp,
+ unsigned location);
+
+ LLVMValueRef (*load_sample_position)(struct ac_shader_abi *abi,
+ LLVMValueRef sample_id);
+
+ LLVMValueRef (*load_local_group_size)(struct ac_shader_abi *abi);
+
+ LLVMValueRef (*load_sample_mask_in)(struct ac_shader_abi *abi);
+
+ LLVMValueRef (*load_base_vertex)(struct ac_shader_abi *abi);
/* Whether to clamp the shadow reference value to [0,1]on VI. Radeonsi currently
* uses it due to promoting D16 to D32, but radv needs it off. */
bool clamp_shadow_reference;
/* Whether to workaround GFX9 ignoring the stride for the buffer size if IDXEN=0
- * and LLVM optimizes an indexed load with constant index to IDXEN=0. */
+ * and LLVM optimizes an indexed load with constant index to IDXEN=0. */
bool gfx9_stride_size_workaround;
};
diff --git a/lib/mesa/src/amd/common/ac_shader_util.c b/lib/mesa/src/amd/common/ac_shader_util.c
new file mode 100644
index 000000000..531395f4f
--- /dev/null
+++ b/lib/mesa/src/amd/common/ac_shader_util.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "ac_nir_to_llvm.h"
+#include "ac_shader_util.h"
+#include "sid.h"
+
+unsigned
+ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
+ bool writes_samplemask)
+{
+ if (writes_z) {
+ /* Z needs 32 bits. */
+ if (writes_samplemask)
+ return V_028710_SPI_SHADER_32_ABGR;
+ else if (writes_stencil)
+ return V_028710_SPI_SHADER_32_GR;
+ else
+ return V_028710_SPI_SHADER_32_R;
+ } else if (writes_stencil || writes_samplemask) {
+ /* Both stencil and sample mask need only 16 bits. */
+ return V_028710_SPI_SHADER_UINT16_ABGR;
+ } else {
+ return V_028710_SPI_SHADER_ZERO;
+ }
+}
+
+unsigned
+ac_get_cb_shader_mask(unsigned spi_shader_col_format)
+{
+ unsigned i, cb_shader_mask = 0;
+
+ for (i = 0; i < 8; i++) {
+ switch ((spi_shader_col_format >> (i * 4)) & 0xf) {
+ case V_028714_SPI_SHADER_ZERO:
+ break;
+ case V_028714_SPI_SHADER_32_R:
+ cb_shader_mask |= 0x1 << (i * 4);
+ break;
+ case V_028714_SPI_SHADER_32_GR:
+ cb_shader_mask |= 0x3 << (i * 4);
+ break;
+ case V_028714_SPI_SHADER_32_AR:
+ cb_shader_mask |= 0x9 << (i * 4);
+ break;
+ case V_028714_SPI_SHADER_FP16_ABGR:
+ case V_028714_SPI_SHADER_UNORM16_ABGR:
+ case V_028714_SPI_SHADER_SNORM16_ABGR:
+ case V_028714_SPI_SHADER_UINT16_ABGR:
+ case V_028714_SPI_SHADER_SINT16_ABGR:
+ case V_028714_SPI_SHADER_32_ABGR:
+ cb_shader_mask |= 0xf << (i * 4);
+ break;
+ default:
+ assert(0);
+ }
+ }
+ return cb_shader_mask;
+}
+
+/**
+ * Calculate the appropriate setting of VGT_GS_MODE when \p shader is a
+ * geometry shader.
+ */
+uint32_t
+ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class)
+{
+ unsigned cut_mode;
+
+ if (gs_max_vert_out <= 128) {
+ cut_mode = V_028A40_GS_CUT_128;
+ } else if (gs_max_vert_out <= 256) {
+ cut_mode = V_028A40_GS_CUT_256;
+ } else if (gs_max_vert_out <= 512) {
+ cut_mode = V_028A40_GS_CUT_512;
+ } else {
+ assert(gs_max_vert_out <= 1024);
+ cut_mode = V_028A40_GS_CUT_1024;
+ }
+
+ return S_028A40_MODE(V_028A40_GS_SCENARIO_G) |
+ S_028A40_CUT_MODE(cut_mode)|
+ S_028A40_ES_WRITE_OPTIMIZE(chip_class <= VI) |
+ S_028A40_GS_WRITE_OPTIMIZE(1) |
+ S_028A40_ONCHIP(chip_class >= GFX9 ? 1 : 0);
+}
+
+void
+ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth,
+ LLVMValueRef stencil, LLVMValueRef samplemask,
+ struct ac_export_args *args)
+{
+ unsigned mask = 0;
+ unsigned format = ac_get_spi_shader_z_format(depth != NULL,
+ stencil != NULL,
+ samplemask != NULL);
+
+ assert(depth || stencil || samplemask);
+
+ memset(args, 0, sizeof(*args));
+
+ args->valid_mask = 1; /* whether the EXEC mask is valid */
+ args->done = 1; /* DONE bit */
+
+ /* Specify the target we are exporting */
+ args->target = V_008DFC_SQ_EXP_MRTZ;
+
+ args->compr = 0; /* COMP flag */
+ args->out[0] = LLVMGetUndef(ctx->f32); /* R, depth */
+ args->out[1] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */
+ args->out[2] = LLVMGetUndef(ctx->f32); /* B, sample mask */
+ args->out[3] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */
+
+ if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
+ assert(!depth);
+ args->compr = 1; /* COMPR flag */
+
+ if (stencil) {
+ /* Stencil should be in X[23:16]. */
+ stencil = ac_to_integer(ctx, stencil);
+ stencil = LLVMBuildShl(ctx->builder, stencil,
+ LLVMConstInt(ctx->i32, 16, 0), "");
+ args->out[0] = ac_to_float(ctx, stencil);
+ mask |= 0x3;
+ }
+ if (samplemask) {
+ /* SampleMask should be in Y[15:0]. */
+ args->out[1] = samplemask;
+ mask |= 0xc;
+ }
+ } else {
+ if (depth) {
+ args->out[0] = depth;
+ mask |= 0x1;
+ }
+ if (stencil) {
+ args->out[1] = stencil;
+ mask |= 0x2;
+ }
+ if (samplemask) {
+ args->out[2] = samplemask;
+ mask |= 0x4;
+ }
+ }
+
+ /* SI (except OLAND and HAINAN) has a bug that it only looks
+ * at the X writemask component. */
+ if (ctx->chip_class == SI &&
+ ctx->family != CHIP_OLAND &&
+ ctx->family != CHIP_HAINAN)
+ mask |= 0x1;
+
+ /* Specify which components to enable */
+ args->enabled_channels = mask;
+}
diff --git a/lib/mesa/src/amd/common/ac_shader_util.h b/lib/mesa/src/amd/common/ac_shader_util.h
new file mode 100644
index 000000000..e4cf2bf57
--- /dev/null
+++ b/lib/mesa/src/amd/common/ac_shader_util.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef AC_SHADER_UTIL_H
+#define AC_SHADER_UTIL_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "amd_family.h"
+#include "ac_llvm_build.h"
+
+unsigned
+ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
+ bool writes_samplemask);
+
+unsigned
+ac_get_cb_shader_mask(unsigned spi_shader_col_format);
+
+uint32_t
+ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class);
+
+void
+ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth,
+ LLVMValueRef stencil, LLVMValueRef samplemask,
+ struct ac_export_args *args);
+
+#endif
diff --git a/lib/mesa/src/amd/common/meson.build b/lib/mesa/src/amd/common/meson.build
new file mode 100644
index 000000000..6827a0209
--- /dev/null
+++ b/lib/mesa/src/amd/common/meson.build
@@ -0,0 +1,63 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+sid_tables_h = custom_target(
+ 'sid_tables_h',
+ input : ['sid_tables.py', 'sid.h', 'gfx9d.h'],
+ output : 'sid_tables.h',
+ command : [prog_python, '@INPUT@'],
+ capture : true,
+)
+
+amd_common_files = files(
+ 'ac_binary.c',
+ 'ac_binary.h',
+ 'ac_exp_param.h',
+ 'ac_llvm_build.c',
+ 'ac_llvm_build.h',
+ 'ac_llvm_helper.cpp',
+ 'ac_llvm_util.c',
+ 'ac_llvm_util.h',
+ 'ac_shader_abi.h',
+ 'ac_shader_util.c',
+ 'ac_shader_util.h',
+ 'ac_nir_to_llvm.c',
+ 'ac_nir_to_llvm.h',
+ 'ac_gpu_info.c',
+ 'ac_gpu_info.h',
+ 'ac_surface.c',
+ 'ac_surface.h',
+ 'ac_debug.c',
+ 'ac_debug.h',
+)
+
+libamd_common = static_library(
+ 'amd_common',
+ [amd_common_files, sid_tables_h],
+ include_directories : [
+ inc_common, inc_compiler, inc_mesa, inc_mapi, inc_amd,
+ ],
+ dependencies : [
+ dep_llvm, dep_thread, dep_elf, dep_libdrm_amdgpu, dep_valgrind,
+ idep_nir_headers,
+ ],
+ c_args : [c_vis_args],
+ cpp_args : [cpp_vis_args],
+)
diff --git a/lib/mesa/src/amd/meson.build b/lib/mesa/src/amd/meson.build
new file mode 100644
index 000000000..f96a9aac0
--- /dev/null
+++ b/lib/mesa/src/amd/meson.build
@@ -0,0 +1,27 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+inc_amd = include_directories('.')
+
+subdir('addrlib')
+subdir('common')
+if with_amd_vk
+ subdir('vulkan')
+endif
diff --git a/lib/mesa/src/amd/vulkan/meson.build b/lib/mesa/src/amd/vulkan/meson.build
new file mode 100644
index 000000000..cc2aa7fd1
--- /dev/null
+++ b/lib/mesa/src/amd/vulkan/meson.build
@@ -0,0 +1,178 @@
+# Copyright © 2017 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+radv_entrypoints = custom_target(
+ 'radv_entrypoints.[ch]',
+ input : ['radv_entrypoints_gen.py', vk_api_xml],
+ output : ['radv_entrypoints.h', 'radv_entrypoints.c'],
+ command : [
+ prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--outdir',
+ meson.current_build_dir()
+ ],
+ depend_files : files('radv_extensions.py'),
+)
+
+radv_extensions_c = custom_target(
+ 'radv_extensions.c',
+ input : ['radv_extensions.py', vk_api_xml],
+ output : ['radv_extensions.c', 'radv_extensions.h'],
+ command : [
+ prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--out-c', '@OUTPUT0@',
+ '--out-h', '@OUTPUT1@'
+ ],
+)
+
+vk_format_table_c = custom_target(
+ 'vk_format_table.c',
+ input : ['vk_format_table.py', 'vk_format_layout.csv'],
+ output : 'vk_format_table.c',
+ command : [prog_python, '@INPUT@'],
+ depend_files : files('vk_format_parse.py'),
+ capture : true,
+)
+
+libradv_files = files(
+ 'winsys/amdgpu/radv_amdgpu_bo.c',
+ 'winsys/amdgpu/radv_amdgpu_bo.h',
+ 'winsys/amdgpu/radv_amdgpu_cs.c',
+ 'winsys/amdgpu/radv_amdgpu_cs.h',
+ 'winsys/amdgpu/radv_amdgpu_surface.c',
+ 'winsys/amdgpu/radv_amdgpu_surface.h',
+ 'winsys/amdgpu/radv_amdgpu_winsys.c',
+ 'winsys/amdgpu/radv_amdgpu_winsys.h',
+ 'winsys/amdgpu/radv_amdgpu_winsys_public.h',
+ 'radv_cmd_buffer.c',
+ 'radv_cs.h',
+ 'radv_debug.c',
+ 'radv_debug.h',
+ 'radv_device.c',
+ 'radv_descriptor_set.c',
+ 'radv_descriptor_set.h',
+ 'radv_formats.c',
+ 'radv_image.c',
+ 'radv_llvm_helper.cpp',
+ 'radv_meta.c',
+ 'radv_meta.h',
+ 'radv_meta_blit.c',
+ 'radv_meta_blit2d.c',
+ 'radv_meta_buffer.c',
+ 'radv_meta_bufimage.c',
+ 'radv_meta_clear.c',
+ 'radv_meta_copy.c',
+ 'radv_meta_decompress.c',
+ 'radv_meta_fast_clear.c',
+ 'radv_meta_resolve.c',
+ 'radv_meta_resolve_cs.c',
+ 'radv_meta_resolve_fs.c',
+ 'radv_nir_to_llvm.c',
+ 'radv_pass.c',
+ 'radv_pipeline.c',
+ 'radv_pipeline_cache.c',
+ 'radv_private.h',
+ 'radv_radeon_winsys.h',
+ 'radv_shader.c',
+ 'radv_shader.h',
+ 'radv_shader_helper.h',
+ 'radv_shader_info.c',
+ 'radv_query.c',
+ 'radv_util.c',
+ 'radv_util.h',
+ 'radv_wsi.c',
+ 'si_cmd_buffer.c',
+ 'vk_format.h',
+)
+
+radv_deps = []
+radv_flags = []
+
+if with_platform_x11
+ radv_deps += dep_xcb_dri3
+ radv_flags += [
+ '-DVK_USE_PLATFORM_XCB_KHR',
+ '-DVK_USE_PLATFORM_XLIB_KHR',
+ ]
+ libradv_files += files('radv_wsi_x11.c')
+endif
+
+if with_platform_wayland
+ radv_deps += dep_wayland_client
+ radv_flags += '-DVK_USE_PLATFORM_WAYLAND_KHR'
+ libradv_files += files('radv_wsi_wayland.c')
+endif
+
+if with_platform_drm
+ radv_flags += '-DVK_USE_PLATFORM_DISPLAY_KHR'
+ libradv_files += files('radv_wsi_display.c')
+endif
+
+if with_xlib_lease
+ radv_deps += [dep_xcb_xrandr, dep_xlib_xrandr]
+ radv_flags += '-DVK_USE_PLATFORM_XLIB_XRANDR_EXT'
+endif
+
+libvulkan_radeon = shared_library(
+ 'vulkan_radeon',
+ [libradv_files, radv_entrypoints, radv_extensions_c, vk_format_table_c, sha1_h],
+ include_directories : [
+ inc_common, inc_amd, inc_amd_common, inc_compiler, inc_vulkan_util,
+ inc_vulkan_wsi,
+ ],
+ link_with : [
+ libamd_common, libamdgpu_addrlib, libvulkan_util, libvulkan_wsi,
+ libmesa_util,
+ ],
+ dependencies : [
+ dep_llvm, dep_libdrm_amdgpu, dep_thread, dep_elf, dep_dl, dep_m,
+ dep_valgrind, radv_deps,
+ idep_nir,
+ ],
+ c_args : [c_vis_args, no_override_init_args, radv_flags],
+ cpp_args : [cpp_vis_args, radv_flags],
+ link_args : [ld_args_bsymbolic, ld_args_gc_sections],
+ install : true,
+)
+
+radeon_icd = custom_target(
+ 'radeon_icd',
+ input : 'radv_icd.py',
+ output : 'radeon_icd.@0@.json'.format(host_machine.cpu()),
+ command : [
+ prog_python, '@INPUT@',
+ '--lib-path', join_paths(get_option('prefix'), get_option('libdir')),
+ '--out', '@OUTPUT@',
+ ],
+ depend_files : files('radv_extensions.py'),
+ build_by_default : true,
+ install_dir : with_vulkan_icd_dir,
+ install : true,
+)
+
+radv_dev_icd = custom_target(
+ 'radv_dev_icd',
+ input : 'radv_icd.py',
+ output : 'dev_icd.json',
+ command : [
+ prog_python, '@INPUT@', '--lib-path', meson.current_build_dir(),
+ '--out', '@OUTPUT@'
+ ],
+ depend_files : files('radv_extensions.py'),
+ build_by_default : true,
+ install : false,
+)
diff --git a/lib/mesa/src/amd/vulkan/radv_android.c b/lib/mesa/src/amd/vulkan/radv_android.c
new file mode 100644
index 000000000..1a4425f26
--- /dev/null
+++ b/lib/mesa/src/amd/vulkan/radv_android.c
@@ -0,0 +1,379 @@
+/*
+ * Copyright © 2017, Google Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <hardware/gralloc.h>
+#include <hardware/hardware.h>
+#include <hardware/hwvulkan.h>
+#include <vulkan/vk_android_native_buffer.h>
+#include <vulkan/vk_icd.h>
+#include <libsync.h>
+
+#include "radv_private.h"
+
+static int radv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev);
+static int radv_hal_close(struct hw_device_t *dev);
+
+static void UNUSED
+static_asserts(void)
+{
+ STATIC_ASSERT(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC);
+}
+
+PUBLIC struct hwvulkan_module_t HAL_MODULE_INFO_SYM = {
+ .common = {
+ .tag = HARDWARE_MODULE_TAG,
+ .module_api_version = HWVULKAN_MODULE_API_VERSION_0_1,
+ .hal_api_version = HARDWARE_MAKE_API_VERSION(1, 0),
+ .id = HWVULKAN_HARDWARE_MODULE_ID,
+ .name = "AMD Vulkan HAL",
+ .author = "Google",
+ .methods = &(hw_module_methods_t) {
+ .open = radv_hal_open,
+ },
+ },
+};
+
+/* If any bits in test_mask are set, then unset them and return true. */
+static inline bool
+unmask32(uint32_t *inout_mask, uint32_t test_mask)
+{
+ uint32_t orig_mask = *inout_mask;
+ *inout_mask &= ~test_mask;
+ return *inout_mask != orig_mask;
+}
+
+static int
+radv_hal_open(const struct hw_module_t* mod, const char* id,
+ struct hw_device_t** dev)
+{
+ assert(mod == &HAL_MODULE_INFO_SYM.common);
+ assert(strcmp(id, HWVULKAN_DEVICE_0) == 0);
+
+ hwvulkan_device_t *hal_dev = malloc(sizeof(*hal_dev));
+ if (!hal_dev)
+ return -1;
+
+ *hal_dev = (hwvulkan_device_t) {
+ .common = {
+ .tag = HARDWARE_DEVICE_TAG,
+ .version = HWVULKAN_DEVICE_API_VERSION_0_1,
+ .module = &HAL_MODULE_INFO_SYM.common,
+ .close = radv_hal_close,
+ },
+ .EnumerateInstanceExtensionProperties = radv_EnumerateInstanceExtensionProperties,
+ .CreateInstance = radv_CreateInstance,
+ .GetInstanceProcAddr = radv_GetInstanceProcAddr,
+ };
+
+ *dev = &hal_dev->common;
+ return 0;
+}
+
+static int
+radv_hal_close(struct hw_device_t *dev)
+{
+ /* hwvulkan.h claims that hw_device_t::close() is never called. */
+ return -1;
+}
+
+VkResult
+radv_image_from_gralloc(VkDevice device_h,
+ const VkImageCreateInfo *base_info,
+ const VkNativeBufferANDROID *gralloc_info,
+ const VkAllocationCallbacks *alloc,
+ VkImage *out_image_h)
+
+{
+ RADV_FROM_HANDLE(radv_device, device, device_h);
+ VkImage image_h = VK_NULL_HANDLE;
+ struct radv_image *image = NULL;
+ struct radv_bo *bo = NULL;
+ VkResult result;
+
+ if (gralloc_info->handle->numFds != 1) {
+ return vk_errorf(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR,
+ "VkNativeBufferANDROID::handle::numFds is %d, "
+ "expected 1", gralloc_info->handle->numFds);
+ }
+
+ /* Do not close the gralloc handle's dma_buf. The lifetime of the dma_buf
+ * must exceed that of the gralloc handle, and we do not own the gralloc
+ * handle.
+ */
+ int dma_buf = gralloc_info->handle->data[0];
+
+ VkDeviceMemory memory_h;
+
+ const VkImportMemoryFdInfoKHR import_info = {
+ .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
+ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
+ .fd = dup(dma_buf),
+ };
+
+ /* Find the first VRAM memory type, or GART for PRIME images. */
+ int memory_type_index = -1;
+ for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) {
+ bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+ if (is_local) {
+ memory_type_index = i;
+ break;
+ }
+ }
+
+ /* fallback */
+ if (memory_type_index == -1)
+ memory_type_index = 0;
+
+ result = radv_AllocateMemory(device_h,
+ &(VkMemoryAllocateInfo) {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .pNext = &import_info,
+ /* Max buffer size, unused for imports */
+ .allocationSize = 0x7FFFFFFF,
+ .memoryTypeIndex = memory_type_index,
+ },
+ alloc,
+ &memory_h);
+ if (result != VK_SUCCESS)
+ return result;
+
+ struct radeon_bo_metadata md;
+ device->ws->buffer_get_metadata(radv_device_memory_from_handle(memory_h)->bo, &md);
+
+ bool is_scanout;
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ /* Copied from radeonsi, but is hacky so should be cleaned up. */
+ is_scanout = md.u.gfx9.swizzle_mode == 0 || md.u.gfx9.swizzle_mode % 4 == 2;
+ } else {
+ is_scanout = md.u.legacy.scanout;
+ }
+
+ VkImageCreateInfo updated_base_info = *base_info;
+
+ VkExternalMemoryImageCreateInfo external_memory_info = {
+ .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
+ .pNext = updated_base_info.pNext,
+ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
+ };
+
+ updated_base_info.pNext = &external_memory_info;
+
+ result = radv_image_create(device_h,
+ &(struct radv_image_create_info) {
+ .vk_info = &updated_base_info,
+ .scanout = is_scanout,
+ .no_metadata_planes = true},
+ alloc,
+ &image_h);
+
+ if (result != VK_SUCCESS)
+ goto fail_create_image;
+
+ image = radv_image_from_handle(image_h);
+
+ radv_BindImageMemory(device_h, image_h, memory_h, 0);
+
+ image->owned_memory = memory_h;
+ /* Don't clobber the out-parameter until success is certain. */
+ *out_image_h = image_h;
+
+ return VK_SUCCESS;
+
+fail_create_image:
+ radv_FreeMemory(device_h, memory_h, alloc);
+ return result;
+}
+
+VkResult radv_GetSwapchainGrallocUsageANDROID(
+ VkDevice device_h,
+ VkFormat format,
+ VkImageUsageFlags imageUsage,
+ int* grallocUsage)
+{
+ RADV_FROM_HANDLE(radv_device, device, device_h);
+ struct radv_physical_device *phys_dev = device->physical_device;
+ VkPhysicalDevice phys_dev_h = radv_physical_device_to_handle(phys_dev);
+ VkResult result;
+
+ *grallocUsage = 0;
+
+ /* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags
+ * returned to applications via VkSurfaceCapabilitiesKHR::supportedUsageFlags.
+ * The relevant code in libvulkan/swapchain.cpp contains this fun comment:
+ *
+ * TODO(jessehall): I think these are right, but haven't thought hard
+ * about it. Do we need to query the driver for support of any of
+ * these?
+ *
+ * Any disagreement between this function and the hardcoded
+ * VkSurfaceCapabilitiesKHR:supportedUsageFlags causes tests
+ * dEQP-VK.wsi.android.swapchain.*.image_usage to fail.
+ */
+
+ const VkPhysicalDeviceImageFormatInfo2KHR image_format_info = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR,
+ .format = format,
+ .type = VK_IMAGE_TYPE_2D,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .usage = imageUsage,
+ };
+
+ VkImageFormatProperties2KHR image_format_props = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR,
+ };
+
+ /* Check that requested format and usage are supported. */
+ result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h,
+ &image_format_info, &image_format_props);
+ if (result != VK_SUCCESS) {
+ return vk_errorf(device->instance, result,
+ "radv_GetPhysicalDeviceImageFormatProperties2 failed "
+ "inside %s", __func__);
+ }
+
+ if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
+ *grallocUsage |= GRALLOC_USAGE_HW_RENDER;
+
+ if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+ VK_IMAGE_USAGE_SAMPLED_BIT |
+ VK_IMAGE_USAGE_STORAGE_BIT |
+ VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
+ *grallocUsage |= GRALLOC_USAGE_HW_TEXTURE;
+
+ /* All VkImageUsageFlags not explicitly checked here are unsupported for
+ * gralloc swapchains.
+ */
+ if (imageUsage != 0) {
+ return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED,
+ "unsupported VkImageUsageFlags(0x%x) for gralloc "
+ "swapchain", imageUsage);
+ }
+
+ /*
+ * FINISHME: Advertise all display-supported formats. Mostly
+ * DRM_FORMAT_ARGB2101010 and DRM_FORMAT_ABGR2101010, but need to check
+ * what we need for 30-bit colors.
+ */
+ if (format == VK_FORMAT_B8G8R8A8_UNORM ||
+ format == VK_FORMAT_B5G6R5_UNORM_PACK16) {
+ *grallocUsage |= GRALLOC_USAGE_HW_FB |
+ GRALLOC_USAGE_HW_COMPOSER |
+ GRALLOC_USAGE_EXTERNAL_DISP;
+ }
+
+ if (*grallocUsage == 0)
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ return VK_SUCCESS;
+}
+
+VkResult
+radv_AcquireImageANDROID(
+ VkDevice device,
+ VkImage image_h,
+ int nativeFenceFd,
+ VkSemaphore semaphore,
+ VkFence fence)
+{
+ VkResult semaphore_result = VK_SUCCESS, fence_result = VK_SUCCESS;
+
+ if (semaphore != VK_NULL_HANDLE) {
+ int semaphore_fd = nativeFenceFd >= 0 ? dup(nativeFenceFd) : nativeFenceFd;
+ semaphore_result = radv_ImportSemaphoreFdKHR(device,
+ &(VkImportSemaphoreFdInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR,
+ .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR,
+ .fd = semaphore_fd,
+ .semaphore = semaphore,
+ });
+ }
+
+ if (fence != VK_NULL_HANDLE) {
+ int fence_fd = nativeFenceFd >= 0 ? dup(nativeFenceFd) : nativeFenceFd;
+ fence_result = radv_ImportFenceFdKHR(device,
+ &(VkImportFenceFdInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR,
+ .flags = VK_FENCE_IMPORT_TEMPORARY_BIT_KHR,
+ .fd = fence_fd,
+ .fence = fence,
+ });
+ }
+
+ close(nativeFenceFd);
+
+ if (semaphore_result != VK_SUCCESS)
+ return semaphore_result;
+ return fence_result;
+}
+
+VkResult
+radv_QueueSignalReleaseImageANDROID(
+ VkQueue _queue,
+ uint32_t waitSemaphoreCount,
+ const VkSemaphore* pWaitSemaphores,
+ VkImage image,
+ int* pNativeFenceFd)
+{
+ RADV_FROM_HANDLE(radv_queue, queue, _queue);
+ VkResult result = VK_SUCCESS;
+
+ if (waitSemaphoreCount == 0) {
+ if (pNativeFenceFd)
+ *pNativeFenceFd = -1;
+ return VK_SUCCESS;
+ }
+
+ int fd = -1;
+
+ for (uint32_t i = 0; i < waitSemaphoreCount; ++i) {
+ int tmp_fd;
+ result = radv_GetSemaphoreFdKHR(radv_device_to_handle(queue->device),
+ &(VkSemaphoreGetFdInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
+ .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR,
+ .semaphore = pWaitSemaphores[i],
+ }, &tmp_fd);
+ if (result != VK_SUCCESS) {
+ if (fd >= 0)
+ close (fd);
+ return result;
+ }
+
+ if (fd < 0)
+ fd = tmp_fd;
+ else if (tmp_fd >= 0) {
+ sync_accumulate("radv", &fd, tmp_fd);
+ close(tmp_fd);
+ }
+ }
+
+ if (pNativeFenceFd) {
+ *pNativeFenceFd = fd;
+ } else if (fd >= 0) {
+ close(fd);
+ /* We still need to do the exports, to reset the semaphores, but
+ * otherwise we don't wait on them. */
+ }
+ return VK_SUCCESS;
+}
diff --git a/lib/mesa/src/amd/vulkan/radv_debug.c b/lib/mesa/src/amd/vulkan/radv_debug.c
index b69c05b64..08fc80c12 100644
--- a/lib/mesa/src/amd/vulkan/radv_debug.c
+++ b/lib/mesa/src/amd/vulkan/radv_debug.c
@@ -29,6 +29,7 @@
#include <stdio.h>
#include <sys/utsname.h>
+#include "util/mesa-sha1.h"
#include "sid.h"
#include "gfx9d.h"
#include "ac_debug.h"
@@ -61,7 +62,8 @@ radv_init_trace(struct radv_device *device)
device->trace_bo = ws->buffer_create(ws, TRACE_BO_SIZE, 8,
RADEON_DOMAIN_VRAM,
- RADEON_FLAG_CPU_ACCESS);
+ RADEON_FLAG_CPU_ACCESS|
+ RADEON_FLAG_NO_INTERPROCESS_SHARING);
if (!device->trace_bo)
return false;
@@ -78,7 +80,7 @@ radv_init_trace(struct radv_device *device)
}
static void
-radv_dump_trace(struct radv_device *device, struct radeon_winsys_cs *cs)
+radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs)
{
const char *filename = getenv("RADV_TRACE_FILE");
FILE *f = fopen(filename, "w");
@@ -367,11 +369,9 @@ static void si_add_split_disasm(const char *disasm,
}
static void
-radv_dump_annotated_shader(struct radv_pipeline *pipeline,
- struct radv_shader_variant *shader,
- gl_shader_stage stage,
- struct ac_wave_info *waves, unsigned num_waves,
- FILE *f)
+radv_dump_annotated_shader(struct radv_shader_variant *shader,
+ gl_shader_stage stage, struct ac_wave_info *waves,
+ unsigned num_waves, FILE *f)
{
uint64_t start_addr, end_addr;
unsigned i;
@@ -442,28 +442,22 @@ radv_dump_annotated_shader(struct radv_pipeline *pipeline,
static void
radv_dump_annotated_shaders(struct radv_pipeline *pipeline,
- struct radv_shader_variant *compute_shader,
- FILE *f)
+ VkShaderStageFlagBits active_stages, FILE *f)
{
struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
unsigned num_waves = ac_get_wave_info(waves);
- unsigned mask;
fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
"\n\n", num_waves);
/* Dump annotated active graphics shaders. */
- mask = pipeline->active_stages;
- while (mask) {
- int stage = u_bit_scan(&mask);
+ while (active_stages) {
+ int stage = u_bit_scan(&active_stages);
- radv_dump_annotated_shader(pipeline, pipeline->shaders[stage],
+ radv_dump_annotated_shader(pipeline->shaders[stage],
stage, waves, num_waves, f);
}
- radv_dump_annotated_shader(pipeline, compute_shader,
- MESA_SHADER_COMPUTE, waves, num_waves, f);
-
/* Print waves executing shaders that are not currently bound. */
unsigned i;
bool found = false;
@@ -498,7 +492,13 @@ radv_dump_shader(struct radv_pipeline *pipeline,
fprintf(f, "%s:\n\n", radv_get_shader_name(shader, stage));
if (shader->spirv) {
- fprintf(f, "SPIRV:\n");
+ unsigned char sha1[21];
+ char sha1buf[41];
+
+ _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);
+ _mesa_sha1_format(sha1buf, sha1);
+
+ fprintf(f, "SPIRV (sha1: %s):\n", sha1buf);
radv_print_spirv(shader->spirv, shader->spirv_size, f);
}
@@ -507,55 +507,59 @@ radv_dump_shader(struct radv_pipeline *pipeline,
nir_print_shader(shader->nir, f);
}
- fprintf(stderr, "DISASM:\n%s\n", shader->disasm_string);
+ fprintf(f, "LLVM IR:\n%s\n", shader->llvm_ir_string);
+ fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
radv_shader_dump_stats(pipeline->device, shader, stage, f);
}
static void
radv_dump_shaders(struct radv_pipeline *pipeline,
- struct radv_shader_variant *compute_shader, FILE *f)
+ VkShaderStageFlagBits active_stages, FILE *f)
{
- unsigned mask;
-
/* Dump active graphics shaders. */
- mask = pipeline->active_stages;
- while (mask) {
- int stage = u_bit_scan(&mask);
+ while (active_stages) {
+ int stage = u_bit_scan(&active_stages);
radv_dump_shader(pipeline, pipeline->shaders[stage], stage, f);
}
+}
- radv_dump_shader(pipeline, compute_shader, MESA_SHADER_COMPUTE, f);
+static void
+radv_dump_pipeline_state(struct radv_pipeline *pipeline,
+ VkShaderStageFlagBits active_stages, FILE *f)
+{
+ radv_dump_shaders(pipeline, active_stages, f);
+ radv_dump_annotated_shaders(pipeline, active_stages, f);
+ radv_dump_descriptors(pipeline, f);
}
static void
radv_dump_graphics_state(struct radv_pipeline *graphics_pipeline,
struct radv_pipeline *compute_pipeline, FILE *f)
{
- struct radv_shader_variant *compute_shader =
- compute_pipeline ? compute_pipeline->shaders[MESA_SHADER_COMPUTE] : NULL;
+ VkShaderStageFlagBits active_stages;
- if (!graphics_pipeline)
- return;
+ if (graphics_pipeline) {
+ active_stages = graphics_pipeline->active_stages;
+ radv_dump_pipeline_state(graphics_pipeline, active_stages, f);
+ }
- radv_dump_shaders(graphics_pipeline, compute_shader, f);
- radv_dump_annotated_shaders(graphics_pipeline, compute_shader, f);
- radv_dump_descriptors(graphics_pipeline, f);
+ if (compute_pipeline) {
+ active_stages = VK_SHADER_STAGE_COMPUTE_BIT;
+ radv_dump_pipeline_state(compute_pipeline, active_stages, f);
+ }
}
static void
radv_dump_compute_state(struct radv_pipeline *compute_pipeline, FILE *f)
{
+ VkShaderStageFlagBits active_stages = VK_SHADER_STAGE_COMPUTE_BIT;
+
if (!compute_pipeline)
return;
- radv_dump_shaders(compute_pipeline,
- compute_pipeline->shaders[MESA_SHADER_COMPUTE], f);
- radv_dump_annotated_shaders(compute_pipeline,
- compute_pipeline->shaders[MESA_SHADER_COMPUTE],
- f);
- radv_dump_descriptors(compute_pipeline, f);
+ radv_dump_pipeline_state(compute_pipeline, active_stages, f);
}
static struct radv_pipeline *
@@ -592,28 +596,32 @@ radv_dump_dmesg(FILE *f)
pclose(p);
}
-static void
+void
radv_dump_enabled_options(struct radv_device *device, FILE *f)
{
uint64_t mask;
- fprintf(f, "Enabled debug options: ");
+ if (device->instance->debug_flags) {
+ fprintf(f, "Enabled debug options: ");
- mask = device->instance->debug_flags;
- while (mask) {
- int i = u_bit_scan64(&mask);
- fprintf(f, "%s, ", radv_get_debug_option_name(i));
+ mask = device->instance->debug_flags;
+ while (mask) {
+ int i = u_bit_scan64(&mask);
+ fprintf(f, "%s, ", radv_get_debug_option_name(i));
+ }
+ fprintf(f, "\n");
}
- fprintf(f, "\n");
- fprintf(f, "Enabled perftest options: ");
+ if (device->instance->perftest_flags) {
+ fprintf(f, "Enabled perftest options: ");
- mask = device->instance->perftest_flags;
- while (mask) {
- int i = u_bit_scan64(&mask);
- fprintf(f, "%s, ", radv_get_perftest_option_name(i));
+ mask = device->instance->perftest_flags;
+ while (mask) {
+ int i = u_bit_scan64(&mask);
+ fprintf(f, "%s, ", radv_get_perftest_option_name(i));
+ }
+ fprintf(f, "\n");
}
- fprintf(f, "\n");
}
static void
@@ -630,11 +638,9 @@ radv_dump_device_name(struct radv_device *device, FILE *f)
snprintf(kernel_version, sizeof(kernel_version),
" / %s", uname_data.release);
- if (HAVE_LLVM > 0) {
- snprintf(llvm_string, sizeof(llvm_string),
- ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
- HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
- }
+ snprintf(llvm_string, sizeof(llvm_string),
+ ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
+ HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
fprintf(f, "Device name: %s (%s DRM %i.%i.%i%s%s)\n\n",
chip_name, device->physical_device->name,
@@ -654,7 +660,7 @@ radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring)
}
void
-radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_winsys_cs *cs)
+radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
{
struct radv_pipeline *graphics_pipeline, *compute_pipeline;
struct radv_device *device = queue->device;
diff --git a/lib/mesa/src/amd/vulkan/radv_extensions.c b/lib/mesa/src/amd/vulkan/radv_extensions.c
index f9268dfbe..9294b0769 100644
--- a/lib/mesa/src/amd/vulkan/radv_extensions.c
+++ b/lib/mesa/src/amd/vulkan/radv_extensions.c
@@ -51,6 +51,18 @@
#else
# define VK_USE_PLATFORM_XLIB_KHR false
#endif
+#ifdef VK_USE_PLATFORM_DISPLAY_KHR
+# undef VK_USE_PLATFORM_DISPLAY_KHR
+# define VK_USE_PLATFORM_DISPLAY_KHR true
+#else
+# define VK_USE_PLATFORM_DISPLAY_KHR false
+#endif
+#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
+# undef VK_USE_PLATFORM_XLIB_XRANDR_EXT
+# define VK_USE_PLATFORM_XLIB_XRANDR_EXT true
+#else
+# define VK_USE_PLATFORM_XLIB_XRANDR_EXT false
+#endif
/* And ANDROID too */
#ifdef ANDROID
@@ -60,348 +72,172 @@
# define ANDROID false
#endif
-#define RADV_HAS_SURFACE (VK_USE_PLATFORM_WAYLAND_KHR || VK_USE_PLATFORM_XCB_KHR || VK_USE_PLATFORM_XLIB_KHR)
+#define RADV_HAS_SURFACE (VK_USE_PLATFORM_WAYLAND_KHR || VK_USE_PLATFORM_XCB_KHR || VK_USE_PLATFORM_XLIB_KHR || VK_USE_PLATFORM_DISPLAY_KHR)
-bool
-radv_instance_extension_supported(const char *name)
-{
- if (strcmp(name, "VK_KHR_external_memory_capabilities") == 0)
- return true;
- if (strcmp(name, "VK_KHR_external_semaphore_capabilities") == 0)
- return true;
- if (strcmp(name, "VK_KHR_get_physical_device_properties2") == 0)
- return true;
- if (strcmp(name, "VK_KHR_surface") == 0)
- return RADV_HAS_SURFACE;
- if (strcmp(name, "VK_KHR_wayland_surface") == 0)
- return VK_USE_PLATFORM_WAYLAND_KHR;
- if (strcmp(name, "VK_KHR_xcb_surface") == 0)
- return VK_USE_PLATFORM_XCB_KHR;
- if (strcmp(name, "VK_KHR_xlib_surface") == 0)
- return VK_USE_PLATFORM_XLIB_KHR;
- return false;
-}
-VkResult radv_EnumerateInstanceExtensionProperties(
- const char* pLayerName,
- uint32_t* pPropertyCount,
- VkExtensionProperties* pProperties)
-{
- VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
+const VkExtensionProperties radv_instance_extensions[RADV_INSTANCE_EXTENSION_COUNT] = {
+ {"VK_KHR_device_group_creation", 1},
+ {"VK_KHR_external_fence_capabilities", 1},
+ {"VK_KHR_external_memory_capabilities", 1},
+ {"VK_KHR_external_semaphore_capabilities", 1},
+ {"VK_KHR_get_display_properties2", 1},
+ {"VK_KHR_get_physical_device_properties2", 1},
+ {"VK_KHR_get_surface_capabilities2", 1},
+ {"VK_KHR_surface", 25},
+ {"VK_KHR_wayland_surface", 6},
+ {"VK_KHR_xcb_surface", 6},
+ {"VK_KHR_xlib_surface", 6},
+ {"VK_KHR_display", 23},
+ {"VK_EXT_direct_mode_display", 1},
+ {"VK_EXT_acquire_xlib_display", 1},
+ {"VK_EXT_display_surface_counter", 1},
+ {"VK_EXT_debug_report", 9},
+};
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_external_memory_capabilities",
- .specVersion = 1,
- };
- }
- }
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_external_semaphore_capabilities",
- .specVersion = 1,
- };
- }
- }
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_get_physical_device_properties2",
- .specVersion = 1,
- };
- }
- }
- if (RADV_HAS_SURFACE) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_surface",
- .specVersion = 25,
- };
- }
- }
- if (VK_USE_PLATFORM_WAYLAND_KHR) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_wayland_surface",
- .specVersion = 6,
- };
- }
- }
- if (VK_USE_PLATFORM_XCB_KHR) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_xcb_surface",
- .specVersion = 6,
- };
- }
- }
- if (VK_USE_PLATFORM_XLIB_KHR) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_xlib_surface",
- .specVersion = 6,
- };
- }
- }
+const VkExtensionProperties radv_device_extensions[RADV_DEVICE_EXTENSION_COUNT] = {
+ {"VK_ANDROID_native_buffer", 5},
+ {"VK_KHR_16bit_storage", 1},
+ {"VK_KHR_bind_memory2", 1},
+ {"VK_KHR_create_renderpass2", 1},
+ {"VK_KHR_dedicated_allocation", 1},
+ {"VK_KHR_descriptor_update_template", 1},
+ {"VK_KHR_device_group", 1},
+ {"VK_KHR_draw_indirect_count", 1},
+ {"VK_KHR_driver_properties", 1},
+ {"VK_KHR_external_fence", 1},
+ {"VK_KHR_external_fence_fd", 1},
+ {"VK_KHR_external_memory", 1},
+ {"VK_KHR_external_memory_fd", 1},
+ {"VK_KHR_external_semaphore", 1},
+ {"VK_KHR_external_semaphore_fd", 1},
+ {"VK_KHR_get_memory_requirements2", 1},
+ {"VK_KHR_image_format_list", 1},
+ {"VK_KHR_incremental_present", 1},
+ {"VK_KHR_maintenance1", 1},
+ {"VK_KHR_maintenance2", 1},
+ {"VK_KHR_maintenance3", 1},
+ {"VK_KHR_push_descriptor", 1},
+ {"VK_KHR_relaxed_block_layout", 1},
+ {"VK_KHR_sampler_mirror_clamp_to_edge", 1},
+ {"VK_KHR_shader_draw_parameters", 1},
+ {"VK_KHR_storage_buffer_storage_class", 1},
+ {"VK_KHR_swapchain", 68},
+ {"VK_KHR_variable_pointers", 1},
+ {"VK_KHR_multiview", 1},
+ {"VK_EXT_calibrated_timestamps", 1},
+ {"VK_EXT_conditional_rendering", 1},
+ {"VK_EXT_conservative_rasterization", 1},
+ {"VK_EXT_display_control", 1},
+ {"VK_EXT_depth_range_unrestricted", 1},
+ {"VK_EXT_descriptor_indexing", 2},
+ {"VK_EXT_discard_rectangles", 1},
+ {"VK_EXT_external_memory_dma_buf", 1},
+ {"VK_EXT_external_memory_host", 1},
+ {"VK_EXT_global_priority", 1},
+ {"VK_EXT_pci_bus_info", 1},
+ {"VK_EXT_sampler_filter_minmax", 1},
+ {"VK_EXT_shader_viewport_index_layer", 1},
+ {"VK_EXT_shader_stencil_export", 1},
+ {"VK_EXT_transform_feedback", 1},
+ {"VK_EXT_vertex_attribute_divisor", 3},
+ {"VK_AMD_draw_indirect_count", 1},
+ {"VK_AMD_gcn_shader", 1},
+ {"VK_AMD_rasterization_order", 1},
+ {"VK_AMD_shader_core_properties", 1},
+ {"VK_AMD_shader_info", 1},
+ {"VK_AMD_shader_trinary_minmax", 1},
+ {"VK_GOOGLE_decorate_string", 1},
+ {"VK_GOOGLE_hlsl_functionality1", 1},
+};
- return vk_outarray_status(&out);
-}
+const struct radv_instance_extension_table radv_supported_instance_extensions = {
+ .KHR_device_group_creation = true,
+ .KHR_external_fence_capabilities = true,
+ .KHR_external_memory_capabilities = true,
+ .KHR_external_semaphore_capabilities = true,
+ .KHR_get_display_properties2 = VK_USE_PLATFORM_DISPLAY_KHR,
+ .KHR_get_physical_device_properties2 = true,
+ .KHR_get_surface_capabilities2 = RADV_HAS_SURFACE,
+ .KHR_surface = RADV_HAS_SURFACE,
+ .KHR_wayland_surface = VK_USE_PLATFORM_WAYLAND_KHR,
+ .KHR_xcb_surface = VK_USE_PLATFORM_XCB_KHR,
+ .KHR_xlib_surface = VK_USE_PLATFORM_XLIB_KHR,
+ .KHR_display = VK_USE_PLATFORM_DISPLAY_KHR,
+ .EXT_direct_mode_display = VK_USE_PLATFORM_DISPLAY_KHR,
+ .EXT_acquire_xlib_display = VK_USE_PLATFORM_XLIB_XRANDR_EXT,
+ .EXT_display_surface_counter = VK_USE_PLATFORM_DISPLAY_KHR,
+ .EXT_debug_report = true,
+};
-uint32_t
-radv_physical_device_api_version(struct radv_physical_device *dev)
+void radv_fill_device_extension_table(const struct radv_physical_device *device,
+ struct radv_device_extension_table* table)
{
- return VK_MAKE_VERSION(1, 0, 57);
+ table->ANDROID_native_buffer = ANDROID && device->rad_info.has_syncobj_wait_for_submit;
+ table->KHR_16bit_storage = HAVE_LLVM >= 0x0700;
+ table->KHR_bind_memory2 = true;
+ table->KHR_create_renderpass2 = true;
+ table->KHR_dedicated_allocation = true;
+ table->KHR_descriptor_update_template = true;
+ table->KHR_device_group = true;
+ table->KHR_draw_indirect_count = true;
+ table->KHR_driver_properties = true;
+ table->KHR_external_fence = device->rad_info.has_syncobj_wait_for_submit;
+ table->KHR_external_fence_fd = device->rad_info.has_syncobj_wait_for_submit;
+ table->KHR_external_memory = true;
+ table->KHR_external_memory_fd = true;
+ table->KHR_external_semaphore = device->rad_info.has_syncobj;
+ table->KHR_external_semaphore_fd = device->rad_info.has_syncobj;
+ table->KHR_get_memory_requirements2 = true;
+ table->KHR_image_format_list = true;
+ table->KHR_incremental_present = RADV_HAS_SURFACE;
+ table->KHR_maintenance1 = true;
+ table->KHR_maintenance2 = true;
+ table->KHR_maintenance3 = true;
+ table->KHR_push_descriptor = true;
+ table->KHR_relaxed_block_layout = true;
+ table->KHR_sampler_mirror_clamp_to_edge = true;
+ table->KHR_shader_draw_parameters = true;
+ table->KHR_storage_buffer_storage_class = true;
+ table->KHR_swapchain = RADV_HAS_SURFACE;
+ table->KHR_variable_pointers = true;
+ table->KHR_multiview = true;
+ table->EXT_calibrated_timestamps = true;
+ table->EXT_conditional_rendering = true;
+ table->EXT_conservative_rasterization = device->rad_info.chip_class >= GFX9;
+ table->EXT_display_control = VK_USE_PLATFORM_DISPLAY_KHR;
+ table->EXT_depth_range_unrestricted = true;
+ table->EXT_descriptor_indexing = true;
+ table->EXT_discard_rectangles = true;
+ table->EXT_external_memory_dma_buf = true;
+ table->EXT_external_memory_host = device->rad_info.has_userptr;
+ table->EXT_global_priority = device->rad_info.has_ctx_priority;
+ table->EXT_pci_bus_info = false;
+ table->EXT_sampler_filter_minmax = device->rad_info.chip_class >= CIK;
+ table->EXT_shader_viewport_index_layer = true;
+ table->EXT_shader_stencil_export = true;
+ table->EXT_transform_feedback = true;
+ table->EXT_vertex_attribute_divisor = true;
+ table->AMD_draw_indirect_count = true;
+ table->AMD_gcn_shader = true;
+ table->AMD_rasterization_order = device->has_out_of_order_rast;
+ table->AMD_shader_core_properties = true;
+ table->AMD_shader_info = true;
+ table->AMD_shader_trinary_minmax = true;
+ table->GOOGLE_decorate_string = true;
+ table->GOOGLE_hlsl_functionality1 = true;
}
-bool
-radv_physical_device_extension_supported(struct radv_physical_device *device,
- const char *name)
+VkResult radv_EnumerateInstanceVersion(
+ uint32_t* pApiVersion)
{
- if (strcmp(name, "VK_KHR_bind_memory2") == 0)
- return true;
- if (strcmp(name, "VK_KHR_dedicated_allocation") == 0)
- return true;
- if (strcmp(name, "VK_KHR_descriptor_update_template") == 0)
- return true;
- if (strcmp(name, "VK_KHR_external_memory") == 0)
- return true;
- if (strcmp(name, "VK_KHR_external_memory_fd") == 0)
- return true;
- if (strcmp(name, "VK_KHR_external_semaphore") == 0)
- return device->rad_info.has_syncobj;
- if (strcmp(name, "VK_KHR_external_semaphore_fd") == 0)
- return device->rad_info.has_syncobj;
- if (strcmp(name, "VK_KHR_get_memory_requirements2") == 0)
- return true;
- if (strcmp(name, "VK_KHR_image_format_list") == 0)
- return true;
- if (strcmp(name, "VK_KHR_incremental_present") == 0)
- return true;
- if (strcmp(name, "VK_KHR_maintenance1") == 0)
- return true;
- if (strcmp(name, "VK_KHR_maintenance2") == 0)
- return true;
- if (strcmp(name, "VK_KHR_push_descriptor") == 0)
- return true;
- if (strcmp(name, "VK_KHR_relaxed_block_layout") == 0)
- return true;
- if (strcmp(name, "VK_KHR_sampler_mirror_clamp_to_edge") == 0)
- return true;
- if (strcmp(name, "VK_KHR_shader_draw_parameters") == 0)
- return true;
- if (strcmp(name, "VK_KHR_storage_buffer_storage_class") == 0)
- return true;
- if (strcmp(name, "VK_KHR_swapchain") == 0)
- return RADV_HAS_SURFACE;
- if (strcmp(name, "VK_KHR_variable_pointers") == 0)
- return true;
- if (strcmp(name, "VK_KHX_multiview") == 0)
- return false;
- if (strcmp(name, "VK_EXT_global_priority") == 0)
- return device->rad_info.has_ctx_priority;
- if (strcmp(name, "VK_AMD_draw_indirect_count") == 0)
- return true;
- if (strcmp(name, "VK_AMD_rasterization_order") == 0)
- return device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2;
- return false;
+ *pApiVersion = VK_MAKE_VERSION(1, 1, 70);
+ return VK_SUCCESS;
}
-VkResult radv_EnumerateDeviceExtensionProperties(
- VkPhysicalDevice physicalDevice,
- const char* pLayerName,
- uint32_t* pPropertyCount,
- VkExtensionProperties* pProperties)
+uint32_t
+radv_physical_device_api_version(struct radv_physical_device *dev)
{
- RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
- VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
- (void)device;
-
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_bind_memory2",
- .specVersion = 1,
- };
- }
- }
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_dedicated_allocation",
- .specVersion = 1,
- };
- }
- }
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_descriptor_update_template",
- .specVersion = 1,
- };
- }
- }
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_external_memory",
- .specVersion = 1,
- };
- }
- }
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_external_memory_fd",
- .specVersion = 1,
- };
- }
- }
- if (device->rad_info.has_syncobj) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_external_semaphore",
- .specVersion = 1,
- };
- }
- }
- if (device->rad_info.has_syncobj) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_external_semaphore_fd",
- .specVersion = 1,
- };
- }
- }
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_get_memory_requirements2",
- .specVersion = 1,
- };
- }
- }
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_image_format_list",
- .specVersion = 1,
- };
- }
- }
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_incremental_present",
- .specVersion = 1,
- };
- }
- }
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_maintenance1",
- .specVersion = 1,
- };
- }
- }
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_maintenance2",
- .specVersion = 1,
- };
- }
- }
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_push_descriptor",
- .specVersion = 1,
- };
- }
- }
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_relaxed_block_layout",
- .specVersion = 1,
- };
- }
- }
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_sampler_mirror_clamp_to_edge",
- .specVersion = 1,
- };
- }
- }
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_shader_draw_parameters",
- .specVersion = 1,
- };
- }
- }
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_storage_buffer_storage_class",
- .specVersion = 1,
- };
- }
- }
- if (RADV_HAS_SURFACE) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_swapchain",
- .specVersion = 68,
- };
- }
- }
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHR_variable_pointers",
- .specVersion = 1,
- };
- }
- }
- if (false) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_KHX_multiview",
- .specVersion = 1,
- };
- }
- }
- if (device->rad_info.has_ctx_priority) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_EXT_global_priority",
- .specVersion = 1,
- };
- }
- }
- if (true) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_AMD_draw_indirect_count",
- .specVersion = 1,
- };
- }
- }
- if (device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "VK_AMD_rasterization_order",
- .specVersion = 1,
- };
- }
- }
-
- return vk_outarray_status(&out);
+ if (!ANDROID && dev->rad_info.has_syncobj_wait_for_submit)
+ return VK_MAKE_VERSION(1, 1, 70);
+ return VK_MAKE_VERSION(1, 0, 68);
}
diff --git a/lib/mesa/src/amd/vulkan/radv_extensions.h b/lib/mesa/src/amd/vulkan/radv_extensions.h
new file mode 100644
index 000000000..5f76d5d20
--- /dev/null
+++ b/lib/mesa/src/amd/vulkan/radv_extensions.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef RADV_EXTENSIONS_H
+#define RADV_EXTENSIONS_H
+
+enum {
+ RADV_INSTANCE_EXTENSION_COUNT = 16,
+ RADV_DEVICE_EXTENSION_COUNT = 53,
+};
+
+struct radv_instance_extension_table {
+ union {
+ bool extensions[RADV_INSTANCE_EXTENSION_COUNT];
+ struct {
+ bool KHR_device_group_creation;
+ bool KHR_external_fence_capabilities;
+ bool KHR_external_memory_capabilities;
+ bool KHR_external_semaphore_capabilities;
+ bool KHR_get_display_properties2;
+ bool KHR_get_physical_device_properties2;
+ bool KHR_get_surface_capabilities2;
+ bool KHR_surface;
+ bool KHR_wayland_surface;
+ bool KHR_xcb_surface;
+ bool KHR_xlib_surface;
+ bool KHR_display;
+ bool EXT_direct_mode_display;
+ bool EXT_acquire_xlib_display;
+ bool EXT_display_surface_counter;
+ bool EXT_debug_report;
+ };
+ };
+};
+
+struct radv_device_extension_table {
+ union {
+ bool extensions[RADV_DEVICE_EXTENSION_COUNT];
+ struct {
+ bool ANDROID_native_buffer;
+ bool KHR_16bit_storage;
+ bool KHR_bind_memory2;
+ bool KHR_create_renderpass2;
+ bool KHR_dedicated_allocation;
+ bool KHR_descriptor_update_template;
+ bool KHR_device_group;
+ bool KHR_draw_indirect_count;
+ bool KHR_driver_properties;
+ bool KHR_external_fence;
+ bool KHR_external_fence_fd;
+ bool KHR_external_memory;
+ bool KHR_external_memory_fd;
+ bool KHR_external_semaphore;
+ bool KHR_external_semaphore_fd;
+ bool KHR_get_memory_requirements2;
+ bool KHR_image_format_list;
+ bool KHR_incremental_present;
+ bool KHR_maintenance1;
+ bool KHR_maintenance2;
+ bool KHR_maintenance3;
+ bool KHR_push_descriptor;
+ bool KHR_relaxed_block_layout;
+ bool KHR_sampler_mirror_clamp_to_edge;
+ bool KHR_shader_draw_parameters;
+ bool KHR_storage_buffer_storage_class;
+ bool KHR_swapchain;
+ bool KHR_variable_pointers;
+ bool KHR_multiview;
+ bool EXT_calibrated_timestamps;
+ bool EXT_conditional_rendering;
+ bool EXT_conservative_rasterization;
+ bool EXT_display_control;
+ bool EXT_depth_range_unrestricted;
+ bool EXT_descriptor_indexing;
+ bool EXT_discard_rectangles;
+ bool EXT_external_memory_dma_buf;
+ bool EXT_external_memory_host;
+ bool EXT_global_priority;
+ bool EXT_pci_bus_info;
+ bool EXT_sampler_filter_minmax;
+ bool EXT_shader_viewport_index_layer;
+ bool EXT_shader_stencil_export;
+ bool EXT_transform_feedback;
+ bool EXT_vertex_attribute_divisor;
+ bool AMD_draw_indirect_count;
+ bool AMD_gcn_shader;
+ bool AMD_rasterization_order;
+ bool AMD_shader_core_properties;
+ bool AMD_shader_info;
+ bool AMD_shader_trinary_minmax;
+ bool GOOGLE_decorate_string;
+ bool GOOGLE_hlsl_functionality1;
+ };
+ };
+};
+
+extern const VkExtensionProperties radv_instance_extensions[RADV_INSTANCE_EXTENSION_COUNT];
+extern const VkExtensionProperties radv_device_extensions[RADV_DEVICE_EXTENSION_COUNT];
+extern const struct radv_instance_extension_table radv_supported_instance_extensions;
+
+
+struct radv_physical_device;
+
+void radv_fill_device_extension_table(const struct radv_physical_device *device,
+ struct radv_device_extension_table* table);
+#endif
diff --git a/lib/mesa/src/amd/vulkan/radv_extensions.py b/lib/mesa/src/amd/vulkan/radv_extensions.py
index 43c0fa740..4a28f8bf4 100644
--- a/lib/mesa/src/amd/vulkan/radv_extensions.py
+++ b/lib/mesa/src/amd/vulkan/radv_extensions.py
@@ -31,7 +31,7 @@ import xml.etree.cElementTree as et
from mako.template import Template
-MAX_API_VERSION = '1.0.57'
+MAX_API_VERSION = '1.1.70'
class Extension:
def __init__(self, name, ext_version, enable):
@@ -50,21 +50,34 @@ class Extension:
# the those extension strings, then tests dEQP-VK.api.info.instance.extensions
# and dEQP-VK.api.info.device fail due to the duplicated strings.
EXTENSIONS = [
+ Extension('VK_ANDROID_native_buffer', 5, 'ANDROID && device->rad_info.has_syncobj_wait_for_submit'),
+ Extension('VK_KHR_16bit_storage', 1, 'HAVE_LLVM >= 0x0700'),
Extension('VK_KHR_bind_memory2', 1, True),
+ Extension('VK_KHR_create_renderpass2', 1, True),
Extension('VK_KHR_dedicated_allocation', 1, True),
Extension('VK_KHR_descriptor_update_template', 1, True),
+ Extension('VK_KHR_device_group', 1, True),
+ Extension('VK_KHR_device_group_creation', 1, True),
+ Extension('VK_KHR_draw_indirect_count', 1, True),
+ Extension('VK_KHR_driver_properties', 1, True),
+ Extension('VK_KHR_external_fence', 1, 'device->rad_info.has_syncobj_wait_for_submit'),
+ Extension('VK_KHR_external_fence_capabilities', 1, True),
+ Extension('VK_KHR_external_fence_fd', 1, 'device->rad_info.has_syncobj_wait_for_submit'),
Extension('VK_KHR_external_memory', 1, True),
Extension('VK_KHR_external_memory_capabilities', 1, True),
Extension('VK_KHR_external_memory_fd', 1, True),
Extension('VK_KHR_external_semaphore', 1, 'device->rad_info.has_syncobj'),
Extension('VK_KHR_external_semaphore_capabilities', 1, True),
Extension('VK_KHR_external_semaphore_fd', 1, 'device->rad_info.has_syncobj'),
+ Extension('VK_KHR_get_display_properties2', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
Extension('VK_KHR_get_memory_requirements2', 1, True),
Extension('VK_KHR_get_physical_device_properties2', 1, True),
+ Extension('VK_KHR_get_surface_capabilities2', 1, 'RADV_HAS_SURFACE'),
Extension('VK_KHR_image_format_list', 1, True),
- Extension('VK_KHR_incremental_present', 1, True),
+ Extension('VK_KHR_incremental_present', 1, 'RADV_HAS_SURFACE'),
Extension('VK_KHR_maintenance1', 1, True),
Extension('VK_KHR_maintenance2', 1, True),
+ Extension('VK_KHR_maintenance3', 1, True),
Extension('VK_KHR_push_descriptor', 1, True),
Extension('VK_KHR_relaxed_block_layout', 1, True),
Extension('VK_KHR_sampler_mirror_clamp_to_edge', 1, True),
@@ -76,10 +89,36 @@ EXTENSIONS = [
Extension('VK_KHR_wayland_surface', 6, 'VK_USE_PLATFORM_WAYLAND_KHR'),
Extension('VK_KHR_xcb_surface', 6, 'VK_USE_PLATFORM_XCB_KHR'),
Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'),
- Extension('VK_KHX_multiview', 1, False),
+ Extension('VK_KHR_multiview', 1, True),
+ Extension('VK_KHR_display', 23, 'VK_USE_PLATFORM_DISPLAY_KHR'),
+ Extension('VK_EXT_direct_mode_display', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
+ Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
+ Extension('VK_EXT_calibrated_timestamps', 1, True),
+ Extension('VK_EXT_conditional_rendering', 1, True),
+ Extension('VK_EXT_conservative_rasterization', 1, 'device->rad_info.chip_class >= GFX9'),
+ Extension('VK_EXT_display_surface_counter', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
+ Extension('VK_EXT_display_control', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
+ Extension('VK_EXT_debug_report', 9, True),
+ Extension('VK_EXT_depth_range_unrestricted', 1, True),
+ Extension('VK_EXT_descriptor_indexing', 2, True),
+ Extension('VK_EXT_discard_rectangles', 1, True),
+ Extension('VK_EXT_external_memory_dma_buf', 1, True),
+ Extension('VK_EXT_external_memory_host', 1, 'device->rad_info.has_userptr'),
Extension('VK_EXT_global_priority', 1, 'device->rad_info.has_ctx_priority'),
+ Extension('VK_EXT_pci_bus_info', 1, False),
+ Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= CIK'),
+ Extension('VK_EXT_shader_viewport_index_layer', 1, True),
+ Extension('VK_EXT_shader_stencil_export', 1, True),
+ Extension('VK_EXT_transform_feedback', 1, True),
+ Extension('VK_EXT_vertex_attribute_divisor', 3, True),
Extension('VK_AMD_draw_indirect_count', 1, True),
- Extension('VK_AMD_rasterization_order', 1, 'device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2'),
+ Extension('VK_AMD_gcn_shader', 1, True),
+ Extension('VK_AMD_rasterization_order', 1, 'device->has_out_of_order_rast'),
+ Extension('VK_AMD_shader_core_properties', 1, True),
+ Extension('VK_AMD_shader_info', 1, True),
+ Extension('VK_AMD_shader_trinary_minmax', 1, True),
+ Extension('VK_GOOGLE_decorate_string', 1, True),
+ Extension('VK_GOOGLE_hlsl_functionality1', 1, True),
]
class VkVersion:
@@ -106,7 +145,8 @@ class VkVersion:
return '.'.join(ver_list)
def c_vk_version(self):
- ver_list = [str(self.major), str(self.minor), str(self.patch)]
+ patch = self.patch if self.patch is not None else 0
+ ver_list = [str(self.major), str(self.minor), str(patch)]
return 'VK_MAKE_VERSION(' + ', '.join(ver_list) + ')'
def __int_ver(self):
@@ -114,14 +154,15 @@ class VkVersion:
patch = self.patch if self.patch is not None else 0
return (self.major << 22) | (self.minor << 12) | patch
- def __cmp__(self, other):
+ def __gt__(self, other):
# If only one of them has a patch version, "ignore" it by making
# other's patch version match self.
if (self.patch is None) != (other.patch is None):
other = copy.copy(other)
other.patch = self.patch
- return self.__int_ver().__cmp__(other.__int_ver())
+ return self.__int_ver() > other.__int_ver()
+
MAX_API_VERSION = VkVersion(MAX_API_VERSION)
@@ -139,31 +180,64 @@ def _init_exts_from_xml(xml):
if ext_name not in ext_name_map:
continue
- # Workaround for VK_ANDROID_native_buffer. Its <extension> element in
- # vk.xml lists it as supported="disabled" and provides only a stub
- # definition. Its <extension> element in Mesa's custom
- # vk_android_native_buffer.xml, though, lists it as
- # supported='android-vendor' and fully defines the extension. We want
- # to skip the <extension> element in vk.xml.
- if ext_elem.attrib['supported'] == 'disabled':
- assert ext_name == 'VK_ANDROID_native_buffer'
- continue
-
ext = ext_name_map[ext_name]
ext.type = ext_elem.attrib['type']
-_TEMPLATE = Template(COPYRIGHT + """
+_TEMPLATE_H = Template(COPYRIGHT + """
+#ifndef RADV_EXTENSIONS_H
+#define RADV_EXTENSIONS_H
+
+enum {
+ RADV_INSTANCE_EXTENSION_COUNT = ${len(instance_extensions)},
+ RADV_DEVICE_EXTENSION_COUNT = ${len(device_extensions)},
+};
+
+struct radv_instance_extension_table {
+ union {
+ bool extensions[RADV_INSTANCE_EXTENSION_COUNT];
+ struct {
+%for ext in instance_extensions:
+ bool ${ext.name[3:]};
+%endfor
+ };
+ };
+};
+
+struct radv_device_extension_table {
+ union {
+ bool extensions[RADV_DEVICE_EXTENSION_COUNT];
+ struct {
+%for ext in device_extensions:
+ bool ${ext.name[3:]};
+%endfor
+ };
+ };
+};
+
+extern const VkExtensionProperties radv_instance_extensions[RADV_INSTANCE_EXTENSION_COUNT];
+extern const VkExtensionProperties radv_device_extensions[RADV_DEVICE_EXTENSION_COUNT];
+extern const struct radv_instance_extension_table radv_supported_instance_extensions;
+
+
+struct radv_physical_device;
+
+void radv_fill_device_extension_table(const struct radv_physical_device *device,
+ struct radv_device_extension_table* table);
+#endif
+""")
+
+_TEMPLATE_C = Template(COPYRIGHT + """
#include "radv_private.h"
#include "vk_util.h"
/* Convert the VK_USE_PLATFORM_* defines to booleans */
-%for platform in ['ANDROID', 'WAYLAND', 'XCB', 'XLIB']:
-#ifdef VK_USE_PLATFORM_${platform}_KHR
-# undef VK_USE_PLATFORM_${platform}_KHR
-# define VK_USE_PLATFORM_${platform}_KHR true
+%for platform in ['ANDROID_KHR', 'WAYLAND_KHR', 'XCB_KHR', 'XLIB_KHR', 'DISPLAY_KHR', 'XLIB_XRANDR_EXT']:
+#ifdef VK_USE_PLATFORM_${platform}
+# undef VK_USE_PLATFORM_${platform}
+# define VK_USE_PLATFORM_${platform} true
#else
-# define VK_USE_PLATFORM_${platform}_KHR false
+# define VK_USE_PLATFORM_${platform} false
#endif
%endfor
@@ -177,84 +251,56 @@ _TEMPLATE = Template(COPYRIGHT + """
#define RADV_HAS_SURFACE (VK_USE_PLATFORM_WAYLAND_KHR || \\
VK_USE_PLATFORM_XCB_KHR || \\
- VK_USE_PLATFORM_XLIB_KHR)
+ VK_USE_PLATFORM_XLIB_KHR || \\
+ VK_USE_PLATFORM_DISPLAY_KHR)
-bool
-radv_instance_extension_supported(const char *name)
-{
+
+const VkExtensionProperties radv_instance_extensions[RADV_INSTANCE_EXTENSION_COUNT] = {
%for ext in instance_extensions:
- if (strcmp(name, "${ext.name}") == 0)
- return ${ext.enable};
+ {"${ext.name}", ${ext.ext_version}},
%endfor
- return false;
-}
+};
-VkResult radv_EnumerateInstanceExtensionProperties(
- const char* pLayerName,
- uint32_t* pPropertyCount,
- VkExtensionProperties* pProperties)
-{
- VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
+const VkExtensionProperties radv_device_extensions[RADV_DEVICE_EXTENSION_COUNT] = {
+%for ext in device_extensions:
+ {"${ext.name}", ${ext.ext_version}},
+%endfor
+};
+const struct radv_instance_extension_table radv_supported_instance_extensions = {
%for ext in instance_extensions:
- if (${ext.enable}) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "${ext.name}",
- .specVersion = ${ext.ext_version},
- };
- }
- }
+ .${ext.name[3:]} = ${ext.enable},
%endfor
+};
- return vk_outarray_status(&out);
-}
-
-uint32_t
-radv_physical_device_api_version(struct radv_physical_device *dev)
-{
- return ${MAX_API_VERSION.c_vk_version()};
-}
-
-bool
-radv_physical_device_extension_supported(struct radv_physical_device *device,
- const char *name)
+void radv_fill_device_extension_table(const struct radv_physical_device *device,
+ struct radv_device_extension_table* table)
{
%for ext in device_extensions:
- if (strcmp(name, "${ext.name}") == 0)
- return ${ext.enable};
+ table->${ext.name[3:]} = ${ext.enable};
%endfor
- return false;
}
-VkResult radv_EnumerateDeviceExtensionProperties(
- VkPhysicalDevice physicalDevice,
- const char* pLayerName,
- uint32_t* pPropertyCount,
- VkExtensionProperties* pProperties)
+VkResult radv_EnumerateInstanceVersion(
+ uint32_t* pApiVersion)
{
- RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
- VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount);
- (void)device;
-
-%for ext in device_extensions:
- if (${ext.enable}) {
- vk_outarray_append(&out, prop) {
- *prop = (VkExtensionProperties) {
- .extensionName = "${ext.name}",
- .specVersion = ${ext.ext_version},
- };
- }
- }
-%endfor
+ *pApiVersion = ${MAX_API_VERSION.c_vk_version()};
+ return VK_SUCCESS;
+}
- return vk_outarray_status(&out);
+uint32_t
+radv_physical_device_api_version(struct radv_physical_device *dev)
+{
+ if (!ANDROID && dev->rad_info.has_syncobj_wait_for_submit)
+ return VK_MAKE_VERSION(1, 1, 70);
+ return VK_MAKE_VERSION(1, 0, 68);
}
""")
if __name__ == '__main__':
parser = argparse.ArgumentParser()
- parser.add_argument('--out', help='Output C file.', required=True)
+ parser.add_argument('--out-c', help='Output C file.', required=True)
+ parser.add_argument('--out-h', help='Output H file.', required=True)
parser.add_argument('--xml',
help='Vulkan API XML file.',
required=True,
@@ -274,5 +320,7 @@ if __name__ == '__main__':
'device_extensions': [e for e in EXTENSIONS if e.type == 'device'],
}
- with open(args.out, 'w') as f:
- f.write(_TEMPLATE.render(**template_env))
+ with open(args.out_c, 'w') as f:
+ f.write(_TEMPLATE_C.render(**template_env))
+ with open(args.out_h, 'w') as f:
+ f.write(_TEMPLATE_H.render(**template_env))
diff --git a/lib/mesa/src/amd/vulkan/radv_icd.py b/lib/mesa/src/amd/vulkan/radv_icd.py
new file mode 100644
index 000000000..cc86bbfa5
--- /dev/null
+++ b/lib/mesa/src/amd/vulkan/radv_icd.py
@@ -0,0 +1,47 @@
+# Copyright 2017 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sub license, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice (including the
+# next paragraph) shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+# IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+import json
+import os.path
+
+from radv_extensions import *
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--out', help='Output json file.', required=True)
+ parser.add_argument('--lib-path', help='Path to libvulkan_radeon.so')
+ args = parser.parse_args()
+
+ path = 'libvulkan_radeon.so'
+ if args.lib_path:
+ path = os.path.join(args.lib_path, path)
+
+ json_data = {
+ 'file_format_version': '1.0.0',
+ 'ICD': {
+ 'library_path': path,
+ 'api_version': str(MAX_API_VERSION),
+ },
+ }
+
+ with open(args.out, 'w') as f:
+ json.dump(json_data, f, indent = 4, sort_keys=True, separators=(',', ': '))
diff --git a/lib/mesa/src/amd/vulkan/radv_llvm_helper.cpp b/lib/mesa/src/amd/vulkan/radv_llvm_helper.cpp
new file mode 100644
index 000000000..ed05e1197
--- /dev/null
+++ b/lib/mesa/src/amd/vulkan/radv_llvm_helper.cpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright © 2018 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "ac_llvm_util.h"
+#include "ac_llvm_build.h"
+#include "radv_shader_helper.h"
+
+#include <list>
+class radv_llvm_per_thread_info {
+public:
+ radv_llvm_per_thread_info(enum radeon_family arg_family,
+ enum ac_target_machine_options arg_tm_options)
+ : family(arg_family), tm_options(arg_tm_options) {}
+
+ ~radv_llvm_per_thread_info()
+ {
+ ac_destroy_llvm_passes(passes);
+ ac_destroy_llvm_compiler(&llvm_info);
+ }
+
+ bool init(void)
+ {
+ if (!ac_init_llvm_compiler(&llvm_info,
+ true,
+ family,
+ tm_options))
+ return false;
+
+ passes = ac_create_llvm_passes(llvm_info.tm);
+ if (!passes)
+ return false;
+
+ return true;
+ }
+
+ bool compile_to_memory_buffer(LLVMModuleRef module,
+ struct ac_shader_binary *binary)
+ {
+ return ac_compile_module_to_binary(passes, module, binary);
+ }
+
+ bool is_same(enum radeon_family arg_family,
+ enum ac_target_machine_options arg_tm_options) {
+ if (arg_family == family &&
+ arg_tm_options == tm_options)
+ return true;
+ return false;
+ }
+ struct ac_llvm_compiler llvm_info;
+private:
+ enum radeon_family family;
+ enum ac_target_machine_options tm_options;
+ struct ac_compiler_passes *passes;
+};
+
+/* we have to store a linked list per thread due to the possiblity of multiple gpus being required */
+static thread_local std::list<radv_llvm_per_thread_info> radv_llvm_per_thread_list;
+
+bool radv_compile_to_binary(struct ac_llvm_compiler *info,
+ LLVMModuleRef module,
+ struct ac_shader_binary *binary)
+{
+ radv_llvm_per_thread_info *thread_info = nullptr;
+
+ for (auto &I : radv_llvm_per_thread_list) {
+ if (I.llvm_info.tm == info->tm) {
+ thread_info = &I;
+ break;
+ }
+ }
+
+ if (!thread_info) {
+ struct ac_compiler_passes *passes = ac_create_llvm_passes(info->tm);
+ bool ret = ac_compile_module_to_binary(passes, module, binary);
+ ac_destroy_llvm_passes(passes);
+ return ret;
+ }
+
+ return thread_info->compile_to_memory_buffer(module, binary);
+}
+
+bool radv_init_llvm_compiler(struct ac_llvm_compiler *info,
+ bool okay_to_leak_target_library_info,
+ bool thread_compiler,
+ enum radeon_family family,
+ enum ac_target_machine_options tm_options)
+{
+ if (thread_compiler) {
+ for (auto &I : radv_llvm_per_thread_list) {
+ if (I.is_same(family, tm_options)) {
+ *info = I.llvm_info;
+ return true;
+ }
+ }
+
+ radv_llvm_per_thread_list.emplace_back(family, tm_options);
+ radv_llvm_per_thread_info &tinfo = radv_llvm_per_thread_list.back();
+
+ if (!tinfo.init()) {
+ radv_llvm_per_thread_list.pop_back();
+ return false;
+ }
+
+ *info = tinfo.llvm_info;
+ return true;
+ }
+
+ if (!ac_init_llvm_compiler(info,
+ okay_to_leak_target_library_info,
+ family,
+ tm_options))
+ return false;
+ return true;
+}
+
+void radv_destroy_llvm_compiler(struct ac_llvm_compiler *info,
+ bool thread_compiler)
+{
+ if (!thread_compiler)
+ ac_destroy_llvm_compiler(info);
+}
diff --git a/lib/mesa/src/amd/vulkan/radv_nir_to_llvm.c b/lib/mesa/src/amd/vulkan/radv_nir_to_llvm.c
new file mode 100644
index 000000000..8c21c4235
--- /dev/null
+++ b/lib/mesa/src/amd/vulkan/radv_nir_to_llvm.c
@@ -0,0 +1,3968 @@
+/*
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "radv_private.h"
+#include "radv_shader.h"
+#include "radv_shader_helper.h"
+#include "nir/nir.h"
+
+#include <llvm-c/Core.h>
+#include <llvm-c/TargetMachine.h>
+#include <llvm-c/Transforms/Scalar.h>
+#if HAVE_LLVM >= 0x0700
+#include <llvm-c/Transforms/Utils.h>
+#endif
+
+#include "sid.h"
+#include "gfx9d.h"
+#include "ac_binary.h"
+#include "ac_llvm_util.h"
+#include "ac_llvm_build.h"
+#include "ac_shader_abi.h"
+#include "ac_shader_util.h"
+#include "ac_exp_param.h"
+
+#define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1)
+
+struct radv_shader_context {
+ struct ac_llvm_context ac;
+ const struct radv_nir_compiler_options *options;
+ struct radv_shader_variant_info *shader_info;
+ struct ac_shader_abi abi;
+
+ unsigned max_workgroup_size;
+ LLVMContextRef context;
+ LLVMValueRef main_function;
+
+ LLVMValueRef descriptor_sets[RADV_UD_MAX_SETS];
+ LLVMValueRef ring_offsets;
+
+ LLVMValueRef vertex_buffers;
+ LLVMValueRef rel_auto_id;
+ LLVMValueRef vs_prim_id;
+ LLVMValueRef es2gs_offset;
+
+ LLVMValueRef oc_lds;
+ LLVMValueRef merged_wave_info;
+ LLVMValueRef tess_factor_offset;
+ LLVMValueRef tes_rel_patch_id;
+ LLVMValueRef tes_u;
+ LLVMValueRef tes_v;
+
+ LLVMValueRef gs2vs_offset;
+ LLVMValueRef gs_wave_id;
+ LLVMValueRef gs_vtx_offset[6];
+
+ LLVMValueRef esgs_ring;
+ LLVMValueRef gsvs_ring[4];
+ LLVMValueRef hs_ring_tess_offchip;
+ LLVMValueRef hs_ring_tess_factor;
+
+ LLVMValueRef persp_sample, persp_center, persp_centroid;
+ LLVMValueRef linear_sample, linear_center, linear_centroid;
+
+ /* Streamout */
+ LLVMValueRef streamout_buffers;
+ LLVMValueRef streamout_write_idx;
+ LLVMValueRef streamout_config;
+ LLVMValueRef streamout_offset[4];
+
+ gl_shader_stage stage;
+
+ LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
+
+ uint64_t input_mask;
+ uint64_t output_mask;
+
+ bool is_gs_copy_shader;
+ LLVMValueRef gs_next_vertex[4];
+ unsigned gs_max_out_vertices;
+
+ unsigned tes_primitive_mode;
+
+ uint32_t tcs_patch_outputs_read;
+ uint64_t tcs_outputs_read;
+ uint32_t tcs_vertices_per_patch;
+ uint32_t tcs_num_inputs;
+ uint32_t tcs_num_patches;
+ uint32_t max_gsvs_emit_size;
+ uint32_t gsvs_vertex_size;
+};
+
+enum radeon_llvm_calling_convention {
+ RADEON_LLVM_AMDGPU_VS = 87,
+ RADEON_LLVM_AMDGPU_GS = 88,
+ RADEON_LLVM_AMDGPU_PS = 89,
+ RADEON_LLVM_AMDGPU_CS = 90,
+ RADEON_LLVM_AMDGPU_HS = 93,
+};
+
+static inline struct radv_shader_context *
+radv_shader_context_from_abi(struct ac_shader_abi *abi)
+{
+ struct radv_shader_context *ctx = NULL;
+ return container_of(abi, ctx, abi);
+}
+
+struct ac_build_if_state
+{
+ struct radv_shader_context *ctx;
+ LLVMValueRef condition;
+ LLVMBasicBlockRef entry_block;
+ LLVMBasicBlockRef true_block;
+ LLVMBasicBlockRef false_block;
+ LLVMBasicBlockRef merge_block;
+};
+
+static LLVMBasicBlockRef
+ac_build_insert_new_block(struct radv_shader_context *ctx, const char *name)
+{
+ LLVMBasicBlockRef current_block;
+ LLVMBasicBlockRef next_block;
+ LLVMBasicBlockRef new_block;
+
+ /* get current basic block */
+ current_block = LLVMGetInsertBlock(ctx->ac.builder);
+
+ /* chqeck if there's another block after this one */
+ next_block = LLVMGetNextBasicBlock(current_block);
+ if (next_block) {
+ /* insert the new block before the next block */
+ new_block = LLVMInsertBasicBlockInContext(ctx->context, next_block, name);
+ }
+ else {
+ /* append new block after current block */
+ LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
+ new_block = LLVMAppendBasicBlockInContext(ctx->context, function, name);
+ }
+ return new_block;
+}
+
+static void
+ac_nir_build_if(struct ac_build_if_state *ifthen,
+ struct radv_shader_context *ctx,
+ LLVMValueRef condition)
+{
+ LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->ac.builder);
+
+ memset(ifthen, 0, sizeof *ifthen);
+ ifthen->ctx = ctx;
+ ifthen->condition = condition;
+ ifthen->entry_block = block;
+
+ /* create endif/merge basic block for the phi functions */
+ ifthen->merge_block = ac_build_insert_new_block(ctx, "endif-block");
+
+ /* create/insert true_block before merge_block */
+ ifthen->true_block =
+ LLVMInsertBasicBlockInContext(ctx->context,
+ ifthen->merge_block,
+ "if-true-block");
+
+ /* successive code goes into the true block */
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, ifthen->true_block);
+}
+
+/**
+ * End a conditional.
+ */
+static void
+ac_nir_build_endif(struct ac_build_if_state *ifthen)
+{
+ LLVMBuilderRef builder = ifthen->ctx->ac.builder;
+
+ /* Insert branch to the merge block from current block */
+ LLVMBuildBr(builder, ifthen->merge_block);
+
+ /*
+ * Now patch in the various branch instructions.
+ */
+
+ /* Insert the conditional branch instruction at the end of entry_block */
+ LLVMPositionBuilderAtEnd(builder, ifthen->entry_block);
+ if (ifthen->false_block) {
+ /* we have an else clause */
+ LLVMBuildCondBr(builder, ifthen->condition,
+ ifthen->true_block, ifthen->false_block);
+ }
+ else {
+ /* no else clause */
+ LLVMBuildCondBr(builder, ifthen->condition,
+ ifthen->true_block, ifthen->merge_block);
+ }
+
+ /* Resume building code at end of the ifthen->merge_block */
+ LLVMPositionBuilderAtEnd(builder, ifthen->merge_block);
+}
+
+
+static LLVMValueRef get_rel_patch_id(struct radv_shader_context *ctx)
+{
+ switch (ctx->stage) {
+ case MESA_SHADER_TESS_CTRL:
+ return ac_unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8);
+ case MESA_SHADER_TESS_EVAL:
+ return ctx->tes_rel_patch_id;
+ break;
+ default:
+ unreachable("Illegal stage");
+ }
+}
+
+static unsigned
+get_tcs_num_patches(struct radv_shader_context *ctx)
+{
+ unsigned num_tcs_input_cp = ctx->options->key.tcs.input_vertices;
+ unsigned num_tcs_output_cp = ctx->tcs_vertices_per_patch;
+ uint32_t input_vertex_size = ctx->tcs_num_inputs * 16;
+ uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size;
+ uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ uint32_t num_tcs_patch_outputs = util_last_bit64(ctx->shader_info->info.tcs.patch_outputs_written);
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size;
+ uint32_t output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+ unsigned num_patches;
+ unsigned hardware_lds_size;
+
+ /* Ensure that we only need one wave per SIMD so we don't need to check
+ * resource usage. Also ensures that the number of tcs in and out
+ * vertices per threadgroup are at most 256.
+ */
+ num_patches = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp) * 4;
+ /* Make sure that the data fits in LDS. This assumes the shaders only
+ * use LDS for the inputs and outputs.
+ */
+ hardware_lds_size = ctx->options->chip_class >= CIK ? 65536 : 32768;
+ num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size));
+ /* Make sure the output data fits in the offchip buffer */
+ num_patches = MIN2(num_patches, (ctx->options->tess_offchip_block_dw_size * 4) / output_patch_size);
+ /* Not necessary for correctness, but improves performance. The
+ * specific value is taken from the proprietary driver.
+ */
+ num_patches = MIN2(num_patches, 40);
+
+ /* SI bug workaround - limit LS-HS threadgroups to only one wave. */
+ if (ctx->options->chip_class == SI) {
+ unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
+ num_patches = MIN2(num_patches, one_wave);
+ }
+ return num_patches;
+}
+
+static unsigned
+calculate_tess_lds_size(struct radv_shader_context *ctx)
+{
+ unsigned num_tcs_input_cp = ctx->options->key.tcs.input_vertices;
+ unsigned num_tcs_output_cp;
+ unsigned num_tcs_outputs, num_tcs_patch_outputs;
+ unsigned input_vertex_size, output_vertex_size;
+ unsigned input_patch_size, output_patch_size;
+ unsigned pervertex_output_patch_size;
+ unsigned output_patch0_offset;
+ unsigned num_patches;
+ unsigned lds_size;
+
+ num_tcs_output_cp = ctx->tcs_vertices_per_patch;
+ num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ num_tcs_patch_outputs = util_last_bit64(ctx->shader_info->info.tcs.patch_outputs_written);
+
+ input_vertex_size = ctx->tcs_num_inputs * 16;
+ output_vertex_size = num_tcs_outputs * 16;
+
+ input_patch_size = num_tcs_input_cp * input_vertex_size;
+
+ pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
+ output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+
+ num_patches = ctx->tcs_num_patches;
+ output_patch0_offset = input_patch_size * num_patches;
+
+ lds_size = output_patch0_offset + output_patch_size * num_patches;
+ return lds_size;
+}
+
+/* Tessellation shaders pass outputs to the next shader using LDS.
+ *
+ * LS outputs = TCS inputs
+ * TCS outputs = TES inputs
+ *
+ * The LDS layout is:
+ * - TCS inputs for patch 0
+ * - TCS inputs for patch 1
+ * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
+ * - ...
+ * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
+ * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
+ * - TCS outputs for patch 1
+ * - Per-patch TCS outputs for patch 1
+ * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
+ * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
+ * - ...
+ *
+ * All three shaders VS(LS), TCS, TES share the same LDS space.
+ */
+static LLVMValueRef
+get_tcs_in_patch_stride(struct radv_shader_context *ctx)
+{
+ assert (ctx->stage == MESA_SHADER_TESS_CTRL);
+ uint32_t input_vertex_size = ctx->tcs_num_inputs * 16;
+ uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size;
+
+ input_patch_size /= 4;
+ return LLVMConstInt(ctx->ac.i32, input_patch_size, false);
+}
+
+static LLVMValueRef
+get_tcs_out_patch_stride(struct radv_shader_context *ctx)
+{
+ uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ uint32_t num_tcs_patch_outputs = util_last_bit64(ctx->shader_info->info.tcs.patch_outputs_written);
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size;
+ uint32_t output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+ output_patch_size /= 4;
+ return LLVMConstInt(ctx->ac.i32, output_patch_size, false);
+}
+
+static LLVMValueRef
+get_tcs_out_vertex_stride(struct radv_shader_context *ctx)
+{
+ uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ output_vertex_size /= 4;
+ return LLVMConstInt(ctx->ac.i32, output_vertex_size, false);
+}
+
+static LLVMValueRef
+get_tcs_out_patch0_offset(struct radv_shader_context *ctx)
+{
+ assert (ctx->stage == MESA_SHADER_TESS_CTRL);
+ uint32_t input_vertex_size = ctx->tcs_num_inputs * 16;
+ uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size;
+ uint32_t output_patch0_offset = input_patch_size;
+ unsigned num_patches = ctx->tcs_num_patches;
+
+ output_patch0_offset *= num_patches;
+ output_patch0_offset /= 4;
+ return LLVMConstInt(ctx->ac.i32, output_patch0_offset, false);
+}
+
+static LLVMValueRef
+get_tcs_out_patch0_patch_data_offset(struct radv_shader_context *ctx)
+{
+ assert (ctx->stage == MESA_SHADER_TESS_CTRL);
+ uint32_t input_vertex_size = ctx->tcs_num_inputs * 16;
+ uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size;
+ uint32_t output_patch0_offset = input_patch_size;
+
+ uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size;
+ unsigned num_patches = ctx->tcs_num_patches;
+
+ output_patch0_offset *= num_patches;
+ output_patch0_offset += pervertex_output_patch_size;
+ output_patch0_offset /= 4;
+ return LLVMConstInt(ctx->ac.i32, output_patch0_offset, false);
+}
+
+static LLVMValueRef
+get_tcs_in_current_patch_offset(struct radv_shader_context *ctx)
+{
+ LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx);
+ LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
+
+ return LLVMBuildMul(ctx->ac.builder, patch_stride, rel_patch_id, "");
+}
+
+static LLVMValueRef
+get_tcs_out_current_patch_offset(struct radv_shader_context *ctx)
+{
+ LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx);
+ LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
+ LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
+
+ return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id,
+ patch0_offset);
+}
+
+static LLVMValueRef
+get_tcs_out_current_patch_data_offset(struct radv_shader_context *ctx)
+{
+ LLVMValueRef patch0_patch_data_offset =
+ get_tcs_out_patch0_patch_data_offset(ctx);
+ LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
+ LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
+
+ return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id,
+ patch0_patch_data_offset);
+}
+
+#define MAX_ARGS 64
+struct arg_info {
+ LLVMTypeRef types[MAX_ARGS];
+ LLVMValueRef *assign[MAX_ARGS];
+ unsigned array_params_mask;
+ uint8_t count;
+ uint8_t sgpr_count;
+ uint8_t num_sgprs_used;
+ uint8_t num_vgprs_used;
+};
+
+enum ac_arg_regfile {
+ ARG_SGPR,
+ ARG_VGPR,
+};
+
+static void
+add_arg(struct arg_info *info, enum ac_arg_regfile regfile, LLVMTypeRef type,
+ LLVMValueRef *param_ptr)
+{
+ assert(info->count < MAX_ARGS);
+
+ info->assign[info->count] = param_ptr;
+ info->types[info->count] = type;
+ info->count++;
+
+ if (regfile == ARG_SGPR) {
+ info->num_sgprs_used += ac_get_type_size(type) / 4;
+ info->sgpr_count++;
+ } else {
+ assert(regfile == ARG_VGPR);
+ info->num_vgprs_used += ac_get_type_size(type) / 4;
+ }
+}
+
+static inline void
+add_array_arg(struct arg_info *info, LLVMTypeRef type, LLVMValueRef *param_ptr)
+{
+ info->array_params_mask |= (1 << info->count);
+ add_arg(info, ARG_SGPR, type, param_ptr);
+}
+
+static void assign_arguments(LLVMValueRef main_function,
+ struct arg_info *info)
+{
+ unsigned i;
+ for (i = 0; i < info->count; i++) {
+ if (info->assign[i])
+ *info->assign[i] = LLVMGetParam(main_function, i);
+ }
+}
+
+static LLVMValueRef
+create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
+ LLVMBuilderRef builder, LLVMTypeRef *return_types,
+ unsigned num_return_elems,
+ struct arg_info *args,
+ unsigned max_workgroup_size,
+ const struct radv_nir_compiler_options *options)
+{
+ LLVMTypeRef main_function_type, ret_type;
+ LLVMBasicBlockRef main_function_body;
+
+ if (num_return_elems)
+ ret_type = LLVMStructTypeInContext(ctx, return_types,
+ num_return_elems, true);
+ else
+ ret_type = LLVMVoidTypeInContext(ctx);
+
+ /* Setup the function */
+ main_function_type =
+ LLVMFunctionType(ret_type, args->types, args->count, 0);
+ LLVMValueRef main_function =
+ LLVMAddFunction(module, "main", main_function_type);
+ main_function_body =
+ LLVMAppendBasicBlockInContext(ctx, main_function, "main_body");
+ LLVMPositionBuilderAtEnd(builder, main_function_body);
+
+ LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS);
+ for (unsigned i = 0; i < args->sgpr_count; ++i) {
+ ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG);
+
+ if (args->array_params_mask & (1 << i)) {
+ LLVMValueRef P = LLVMGetParam(main_function, i);
+ ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_NOALIAS);
+ ac_add_attr_dereferenceable(P, UINT64_MAX);
+ }
+ }
+
+ if (options->address32_hi) {
+ ac_llvm_add_target_dep_function_attr(main_function,
+ "amdgpu-32bit-address-high-bits",
+ options->address32_hi);
+ }
+
+ if (max_workgroup_size) {
+ ac_llvm_add_target_dep_function_attr(main_function,
+ "amdgpu-max-work-group-size",
+ max_workgroup_size);
+ }
+ if (options->unsafe_math) {
+ /* These were copied from some LLVM test. */
+ LLVMAddTargetDependentFunctionAttr(main_function,
+ "less-precise-fpmad",
+ "true");
+ LLVMAddTargetDependentFunctionAttr(main_function,
+ "no-infs-fp-math",
+ "true");
+ LLVMAddTargetDependentFunctionAttr(main_function,
+ "no-nans-fp-math",
+ "true");
+ LLVMAddTargetDependentFunctionAttr(main_function,
+ "unsafe-fp-math",
+ "true");
+ LLVMAddTargetDependentFunctionAttr(main_function,
+ "no-signed-zeros-fp-math",
+ "true");
+ }
+ return main_function;
+}
+
+
+static void
+set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx,
+ uint8_t num_sgprs, bool indirect)
+{
+ ud_info->sgpr_idx = *sgpr_idx;
+ ud_info->num_sgprs = num_sgprs;
+ ud_info->indirect = indirect;
+ *sgpr_idx += num_sgprs;
+}
+
+static void
+set_loc_shader(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx,
+ uint8_t num_sgprs)
+{
+ struct radv_userdata_info *ud_info =
+ &ctx->shader_info->user_sgprs_locs.shader_data[idx];
+ assert(ud_info);
+
+ set_loc(ud_info, sgpr_idx, num_sgprs, false);
+}
+
+static void
+set_loc_shader_ptr(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx)
+{
+ bool use_32bit_pointers = HAVE_32BIT_POINTERS &&
+ idx != AC_UD_SCRATCH_RING_OFFSETS;
+
+ set_loc_shader(ctx, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
+}
+
+static void
+set_loc_desc(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx,
+ bool indirect)
+{
+ struct radv_userdata_locations *locs =
+ &ctx->shader_info->user_sgprs_locs;
+ struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx];
+ assert(ud_info);
+
+ set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2, indirect);
+
+ if (!indirect)
+ locs->descriptor_sets_enabled |= 1 << idx;
+}
+
+struct user_sgpr_info {
+ bool need_ring_offsets;
+ bool indirect_all_descriptor_sets;
+};
+
+static bool needs_view_index_sgpr(struct radv_shader_context *ctx,
+ gl_shader_stage stage)
+{
+ switch (stage) {
+ case MESA_SHADER_VERTEX:
+ if (ctx->shader_info->info.needs_multiview_view_index ||
+ (!ctx->options->key.vs.as_es && !ctx->options->key.vs.as_ls && ctx->options->key.has_multiview_view_index))
+ return true;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.tes.as_es && ctx->options->key.has_multiview_view_index))
+ return true;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ case MESA_SHADER_TESS_CTRL:
+ if (ctx->shader_info->info.needs_multiview_view_index)
+ return true;
+ break;
+ default:
+ break;
+ }
+ return false;
+}
+
+static uint8_t
+count_vs_user_sgprs(struct radv_shader_context *ctx)
+{
+ uint8_t count = 0;
+
+ if (ctx->shader_info->info.vs.has_vertex_buffers)
+ count += HAVE_32BIT_POINTERS ? 1 : 2;
+ count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2;
+
+ return count;
+}
+
+static void allocate_user_sgprs(struct radv_shader_context *ctx,
+ gl_shader_stage stage,
+ bool has_previous_stage,
+ gl_shader_stage previous_stage,
+ bool needs_view_index,
+ struct user_sgpr_info *user_sgpr_info)
+{
+ uint8_t user_sgpr_count = 0;
+
+ memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
+
+ /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */
+ if (stage == MESA_SHADER_GEOMETRY ||
+ stage == MESA_SHADER_VERTEX ||
+ stage == MESA_SHADER_TESS_CTRL ||
+ stage == MESA_SHADER_TESS_EVAL ||
+ ctx->is_gs_copy_shader)
+ user_sgpr_info->need_ring_offsets = true;
+
+ if (stage == MESA_SHADER_FRAGMENT &&
+ ctx->shader_info->info.ps.needs_sample_positions)
+ user_sgpr_info->need_ring_offsets = true;
+
+ /* 2 user sgprs will nearly always be allocated for scratch/rings */
+ if (ctx->options->supports_spill || user_sgpr_info->need_ring_offsets) {
+ user_sgpr_count += 2;
+ }
+
+ switch (stage) {
+ case MESA_SHADER_COMPUTE:
+ if (ctx->shader_info->info.cs.uses_grid_size)
+ user_sgpr_count += 3;
+ break;
+ case MESA_SHADER_FRAGMENT:
+ user_sgpr_count += ctx->shader_info->info.ps.needs_sample_positions;
+ break;
+ case MESA_SHADER_VERTEX:
+ if (!ctx->is_gs_copy_shader)
+ user_sgpr_count += count_vs_user_sgprs(ctx);
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ if (has_previous_stage) {
+ if (previous_stage == MESA_SHADER_VERTEX)
+ user_sgpr_count += count_vs_user_sgprs(ctx);
+ }
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ break;
+ case MESA_SHADER_GEOMETRY:
+ if (has_previous_stage) {
+ if (previous_stage == MESA_SHADER_VERTEX) {
+ user_sgpr_count += count_vs_user_sgprs(ctx);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (needs_view_index)
+ user_sgpr_count++;
+
+ if (ctx->shader_info->info.loads_push_constants)
+ user_sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2;
+
+ uint32_t available_sgprs = ctx->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16;
+ uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
+ uint32_t num_desc_set =
+ util_bitcount(ctx->shader_info->info.desc_set_used_mask);
+
+ if (remaining_sgprs / (HAVE_32BIT_POINTERS ? 1 : 2) < num_desc_set) {
+ user_sgpr_info->indirect_all_descriptor_sets = true;
+ }
+}
+
+static void
+declare_global_input_sgprs(struct radv_shader_context *ctx,
+ gl_shader_stage stage,
+ bool has_previous_stage,
+ gl_shader_stage previous_stage,
+ const struct user_sgpr_info *user_sgpr_info,
+ struct arg_info *args,
+ LLVMValueRef *desc_sets)
+{
+ LLVMTypeRef type = ac_array_in_const32_addr_space(ctx->ac.i8);
+ unsigned num_sets = ctx->options->layout ?
+ ctx->options->layout->num_sets : 0;
+ unsigned stage_mask = 1 << stage;
+
+ if (has_previous_stage)
+ stage_mask |= 1 << previous_stage;
+
+ /* 1 for each descriptor set */
+ if (!user_sgpr_info->indirect_all_descriptor_sets) {
+ for (unsigned i = 0; i < num_sets; ++i) {
+ if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) &&
+ ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
+ add_array_arg(args, type,
+ &ctx->descriptor_sets[i]);
+ }
+ }
+ } else {
+ add_array_arg(args, ac_array_in_const32_addr_space(type), desc_sets);
+ }
+
+ if (ctx->shader_info->info.loads_push_constants) {
+ /* 1 for push constants and dynamic descriptors */
+ add_array_arg(args, type, &ctx->abi.push_constants);
+ }
+
+ if (ctx->shader_info->info.so.num_outputs) {
+ add_arg(args, ARG_SGPR,
+ ac_array_in_const32_addr_space(ctx->ac.v4i32),
+ &ctx->streamout_buffers);
+ }
+}
+
+static void
+declare_vs_specific_input_sgprs(struct radv_shader_context *ctx,
+ gl_shader_stage stage,
+ bool has_previous_stage,
+ gl_shader_stage previous_stage,
+ struct arg_info *args)
+{
+ if (!ctx->is_gs_copy_shader &&
+ (stage == MESA_SHADER_VERTEX ||
+ (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
+ if (ctx->shader_info->info.vs.has_vertex_buffers) {
+ add_arg(args, ARG_SGPR,
+ ac_array_in_const32_addr_space(ctx->ac.v4i32),
+ &ctx->vertex_buffers);
+ }
+ add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex);
+ add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.start_instance);
+ if (ctx->shader_info->info.vs.needs_draw_id) {
+ add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.draw_id);
+ }
+ }
+}
+
+static void
+declare_vs_input_vgprs(struct radv_shader_context *ctx, struct arg_info *args)
+{
+ add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.vertex_id);
+ if (!ctx->is_gs_copy_shader) {
+ if (ctx->options->key.vs.as_ls) {
+ add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->rel_auto_id);
+ add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id);
+ } else {
+ add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id);
+ add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->vs_prim_id);
+ }
+ add_arg(args, ARG_VGPR, ctx->ac.i32, NULL); /* unused */
+ }
+}
+
+static void
+declare_streamout_sgprs(struct radv_shader_context *ctx, gl_shader_stage stage,
+ struct arg_info *args)
+{
+ int i;
+
+ /* Streamout SGPRs. */
+ if (ctx->shader_info->info.so.num_outputs) {
+ assert(stage == MESA_SHADER_VERTEX ||
+ stage == MESA_SHADER_TESS_EVAL);
+
+ if (stage != MESA_SHADER_TESS_EVAL) {
+ add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->streamout_config);
+ } else {
+ args->assign[args->count - 1] = &ctx->streamout_config;
+ args->types[args->count - 1] = ctx->ac.i32;
+ }
+
+ add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->streamout_write_idx);
+ }
+
+ /* A streamout buffer offset is loaded if the stride is non-zero. */
+ for (i = 0; i < 4; i++) {
+ if (!ctx->shader_info->info.so.strides[i])
+ continue;
+
+ add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->streamout_offset[i]);
+ }
+}
+
+static void
+declare_tes_input_vgprs(struct radv_shader_context *ctx, struct arg_info *args)
+{
+ add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_u);
+ add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_v);
+ add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->tes_rel_patch_id);
+ add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.tes_patch_id);
+}
+
+static void
+set_global_input_locs(struct radv_shader_context *ctx, gl_shader_stage stage,
+ bool has_previous_stage, gl_shader_stage previous_stage,
+ const struct user_sgpr_info *user_sgpr_info,
+ LLVMValueRef desc_sets, uint8_t *user_sgpr_idx)
+{
+ unsigned num_sets = ctx->options->layout ?
+ ctx->options->layout->num_sets : 0;
+ unsigned stage_mask = 1 << stage;
+
+ if (has_previous_stage)
+ stage_mask |= 1 << previous_stage;
+
+ if (!user_sgpr_info->indirect_all_descriptor_sets) {
+ for (unsigned i = 0; i < num_sets; ++i) {
+ if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) &&
+ ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
+ set_loc_desc(ctx, i, user_sgpr_idx, false);
+ } else
+ ctx->descriptor_sets[i] = NULL;
+ }
+ } else {
+ set_loc_shader_ptr(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS,
+ user_sgpr_idx);
+
+ for (unsigned i = 0; i < num_sets; ++i) {
+ if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) &&
+ ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
+ ctx->descriptor_sets[i] =
+ ac_build_load_to_sgpr(&ctx->ac,
+ desc_sets,
+ LLVMConstInt(ctx->ac.i32, i, false));
+
+ } else
+ ctx->descriptor_sets[i] = NULL;
+ }
+ ctx->shader_info->need_indirect_descriptor_sets = true;
+ }
+
+ if (ctx->shader_info->info.loads_push_constants) {
+ set_loc_shader_ptr(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
+ }
+
+ if (ctx->streamout_buffers) {
+ set_loc_shader_ptr(ctx, AC_UD_STREAMOUT_BUFFERS,
+ user_sgpr_idx);
+ }
+}
+
+static void
+set_vs_specific_input_locs(struct radv_shader_context *ctx,
+ gl_shader_stage stage, bool has_previous_stage,
+ gl_shader_stage previous_stage,
+ uint8_t *user_sgpr_idx)
+{
+ if (!ctx->is_gs_copy_shader &&
+ (stage == MESA_SHADER_VERTEX ||
+ (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
+ if (ctx->shader_info->info.vs.has_vertex_buffers) {
+ set_loc_shader_ptr(ctx, AC_UD_VS_VERTEX_BUFFERS,
+ user_sgpr_idx);
+ }
+
+ unsigned vs_num = 2;
+ if (ctx->shader_info->info.vs.needs_draw_id)
+ vs_num++;
+
+ set_loc_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE,
+ user_sgpr_idx, vs_num);
+ }
+}
+
+static void set_llvm_calling_convention(LLVMValueRef func,
+ gl_shader_stage stage)
+{
+ enum radeon_llvm_calling_convention calling_conv;
+
+ switch (stage) {
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_TESS_EVAL:
+ calling_conv = RADEON_LLVM_AMDGPU_VS;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ calling_conv = RADEON_LLVM_AMDGPU_GS;
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ calling_conv = RADEON_LLVM_AMDGPU_HS;
+ break;
+ case MESA_SHADER_FRAGMENT:
+ calling_conv = RADEON_LLVM_AMDGPU_PS;
+ break;
+ case MESA_SHADER_COMPUTE:
+ calling_conv = RADEON_LLVM_AMDGPU_CS;
+ break;
+ default:
+ unreachable("Unhandle shader type");
+ }
+
+ LLVMSetFunctionCallConv(func, calling_conv);
+}
+
+static void create_function(struct radv_shader_context *ctx,
+ gl_shader_stage stage,
+ bool has_previous_stage,
+ gl_shader_stage previous_stage)
+{
+ uint8_t user_sgpr_idx;
+ struct user_sgpr_info user_sgpr_info;
+ struct arg_info args = {};
+ LLVMValueRef desc_sets;
+ bool needs_view_index = needs_view_index_sgpr(ctx, stage);
+ allocate_user_sgprs(ctx, stage, has_previous_stage,
+ previous_stage, needs_view_index, &user_sgpr_info);
+
+ if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) {
+ add_arg(&args, ARG_SGPR, ac_array_in_const_addr_space(ctx->ac.v4i32),
+ &ctx->ring_offsets);
+ }
+
+ switch (stage) {
+ case MESA_SHADER_COMPUTE:
+ declare_global_input_sgprs(ctx, stage, has_previous_stage,
+ previous_stage, &user_sgpr_info,
+ &args, &desc_sets);
+
+ if (ctx->shader_info->info.cs.uses_grid_size) {
+ add_arg(&args, ARG_SGPR, ctx->ac.v3i32,
+ &ctx->abi.num_work_groups);
+ }
+
+ for (int i = 0; i < 3; i++) {
+ ctx->abi.workgroup_ids[i] = NULL;
+ if (ctx->shader_info->info.cs.uses_block_id[i]) {
+ add_arg(&args, ARG_SGPR, ctx->ac.i32,
+ &ctx->abi.workgroup_ids[i]);
+ }
+ }
+
+ if (ctx->shader_info->info.cs.uses_local_invocation_idx)
+ add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.tg_size);
+ add_arg(&args, ARG_VGPR, ctx->ac.v3i32,
+ &ctx->abi.local_invocation_ids);
+ break;
+ case MESA_SHADER_VERTEX:
+ declare_global_input_sgprs(ctx, stage, has_previous_stage,
+ previous_stage, &user_sgpr_info,
+ &args, &desc_sets);
+ declare_vs_specific_input_sgprs(ctx, stage, has_previous_stage,
+ previous_stage, &args);
+
+ if (needs_view_index)
+ add_arg(&args, ARG_SGPR, ctx->ac.i32,
+ &ctx->abi.view_index);
+ if (ctx->options->key.vs.as_es) {
+ add_arg(&args, ARG_SGPR, ctx->ac.i32,
+ &ctx->es2gs_offset);
+ } else if (ctx->options->key.vs.as_ls) {
+ /* no extra parameters */
+ } else {
+ declare_streamout_sgprs(ctx, stage, &args);
+ }
+
+ declare_vs_input_vgprs(ctx, &args);
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ if (has_previous_stage) {
+ // First 6 system regs
+ add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
+ add_arg(&args, ARG_SGPR, ctx->ac.i32,
+ &ctx->merged_wave_info);
+ add_arg(&args, ARG_SGPR, ctx->ac.i32,
+ &ctx->tess_factor_offset);
+
+ add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // scratch offset
+ add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
+ add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
+
+ declare_global_input_sgprs(ctx, stage,
+ has_previous_stage,
+ previous_stage,
+ &user_sgpr_info, &args,
+ &desc_sets);
+ declare_vs_specific_input_sgprs(ctx, stage,
+ has_previous_stage,
+ previous_stage, &args);
+
+ if (needs_view_index)
+ add_arg(&args, ARG_SGPR, ctx->ac.i32,
+ &ctx->abi.view_index);
+
+ add_arg(&args, ARG_VGPR, ctx->ac.i32,
+ &ctx->abi.tcs_patch_id);
+ add_arg(&args, ARG_VGPR, ctx->ac.i32,
+ &ctx->abi.tcs_rel_ids);
+
+ declare_vs_input_vgprs(ctx, &args);
+ } else {
+ declare_global_input_sgprs(ctx, stage,
+ has_previous_stage,
+ previous_stage,
+ &user_sgpr_info, &args,
+ &desc_sets);
+
+ if (needs_view_index)
+ add_arg(&args, ARG_SGPR, ctx->ac.i32,
+ &ctx->abi.view_index);
+
+ add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
+ add_arg(&args, ARG_SGPR, ctx->ac.i32,
+ &ctx->tess_factor_offset);
+ add_arg(&args, ARG_VGPR, ctx->ac.i32,
+ &ctx->abi.tcs_patch_id);
+ add_arg(&args, ARG_VGPR, ctx->ac.i32,
+ &ctx->abi.tcs_rel_ids);
+ }
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ declare_global_input_sgprs(ctx, stage, has_previous_stage,
+ previous_stage, &user_sgpr_info,
+ &args, &desc_sets);
+
+ if (needs_view_index)
+ add_arg(&args, ARG_SGPR, ctx->ac.i32,
+ &ctx->abi.view_index);
+
+ if (ctx->options->key.tes.as_es) {
+ add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
+ add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL);
+ add_arg(&args, ARG_SGPR, ctx->ac.i32,
+ &ctx->es2gs_offset);
+ } else {
+ add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL);
+ declare_streamout_sgprs(ctx, stage, &args);
+ add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
+ }
+ declare_tes_input_vgprs(ctx, &args);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ if (has_previous_stage) {
+ // First 6 system regs
+ add_arg(&args, ARG_SGPR, ctx->ac.i32,
+ &ctx->gs2vs_offset);
+ add_arg(&args, ARG_SGPR, ctx->ac.i32,
+ &ctx->merged_wave_info);
+ add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
+
+ add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // scratch offset
+ add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
+ add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
+
+ declare_global_input_sgprs(ctx, stage,
+ has_previous_stage,
+ previous_stage,
+ &user_sgpr_info, &args,
+ &desc_sets);
+
+ if (previous_stage != MESA_SHADER_TESS_EVAL) {
+ declare_vs_specific_input_sgprs(ctx, stage,
+ has_previous_stage,
+ previous_stage,
+ &args);
+ }
+
+ if (needs_view_index)
+ add_arg(&args, ARG_SGPR, ctx->ac.i32,
+ &ctx->abi.view_index);
+
+ add_arg(&args, ARG_VGPR, ctx->ac.i32,
+ &ctx->gs_vtx_offset[0]);
+ add_arg(&args, ARG_VGPR, ctx->ac.i32,
+ &ctx->gs_vtx_offset[2]);
+ add_arg(&args, ARG_VGPR, ctx->ac.i32,
+ &ctx->abi.gs_prim_id);
+ add_arg(&args, ARG_VGPR, ctx->ac.i32,
+ &ctx->abi.gs_invocation_id);
+ add_arg(&args, ARG_VGPR, ctx->ac.i32,
+ &ctx->gs_vtx_offset[4]);
+
+ if (previous_stage == MESA_SHADER_VERTEX) {
+ declare_vs_input_vgprs(ctx, &args);
+ } else {
+ declare_tes_input_vgprs(ctx, &args);
+ }
+ } else {
+ declare_global_input_sgprs(ctx, stage,
+ has_previous_stage,
+ previous_stage,
+ &user_sgpr_info, &args,
+ &desc_sets);
+
+ if (needs_view_index)
+ add_arg(&args, ARG_SGPR, ctx->ac.i32,
+ &ctx->abi.view_index);
+
+ add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->gs2vs_offset);
+ add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->gs_wave_id);
+ add_arg(&args, ARG_VGPR, ctx->ac.i32,
+ &ctx->gs_vtx_offset[0]);
+ add_arg(&args, ARG_VGPR, ctx->ac.i32,
+ &ctx->gs_vtx_offset[1]);
+ add_arg(&args, ARG_VGPR, ctx->ac.i32,
+ &ctx->abi.gs_prim_id);
+ add_arg(&args, ARG_VGPR, ctx->ac.i32,
+ &ctx->gs_vtx_offset[2]);
+ add_arg(&args, ARG_VGPR, ctx->ac.i32,
+ &ctx->gs_vtx_offset[3]);
+ add_arg(&args, ARG_VGPR, ctx->ac.i32,
+ &ctx->gs_vtx_offset[4]);
+ add_arg(&args, ARG_VGPR, ctx->ac.i32,
+ &ctx->gs_vtx_offset[5]);
+ add_arg(&args, ARG_VGPR, ctx->ac.i32,
+ &ctx->abi.gs_invocation_id);
+ }
+ break;
+ case MESA_SHADER_FRAGMENT:
+ declare_global_input_sgprs(ctx, stage, has_previous_stage,
+ previous_stage, &user_sgpr_info,
+ &args, &desc_sets);
+
+ add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.prim_mask);
+ add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_sample);
+ add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_center);
+ add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_centroid);
+ add_arg(&args, ARG_VGPR, ctx->ac.v3i32, NULL); /* persp pull model */
+ add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_sample);
+ add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_center);
+ add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_centroid);
+ add_arg(&args, ARG_VGPR, ctx->ac.f32, NULL); /* line stipple tex */
+ add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[0]);
+ add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[1]);
+ add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[2]);
+ add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[3]);
+ add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.front_face);
+ add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.ancillary);
+ add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.sample_coverage);
+ add_arg(&args, ARG_VGPR, ctx->ac.i32, NULL); /* fixed pt */
+ break;
+ default:
+ unreachable("Shader stage not implemented");
+ }
+
+ ctx->main_function = create_llvm_function(
+ ctx->context, ctx->ac.module, ctx->ac.builder, NULL, 0, &args,
+ ctx->max_workgroup_size, ctx->options);
+ set_llvm_calling_convention(ctx->main_function, stage);
+
+
+ ctx->shader_info->num_input_vgprs = 0;
+ ctx->shader_info->num_input_sgprs = ctx->options->supports_spill ? 2 : 0;
+
+ ctx->shader_info->num_input_sgprs += args.num_sgprs_used;
+
+ if (ctx->stage != MESA_SHADER_FRAGMENT)
+ ctx->shader_info->num_input_vgprs = args.num_vgprs_used;
+
+ assign_arguments(ctx->main_function, &args);
+
+ user_sgpr_idx = 0;
+
+ if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) {
+ set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS,
+ &user_sgpr_idx);
+ if (ctx->options->supports_spill) {
+ ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
+ LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST),
+ NULL, 0, AC_FUNC_ATTR_READNONE);
+ ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets,
+ ac_array_in_const_addr_space(ctx->ac.v4i32), "");
+ }
+ }
+
+ /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
+ * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
+ if (has_previous_stage)
+ user_sgpr_idx = 0;
+
+ set_global_input_locs(ctx, stage, has_previous_stage, previous_stage,
+ &user_sgpr_info, desc_sets, &user_sgpr_idx);
+
+ switch (stage) {
+ case MESA_SHADER_COMPUTE:
+ if (ctx->shader_info->info.cs.uses_grid_size) {
+ set_loc_shader(ctx, AC_UD_CS_GRID_SIZE,
+ &user_sgpr_idx, 3);
+ }
+ break;
+ case MESA_SHADER_VERTEX:
+ set_vs_specific_input_locs(ctx, stage, has_previous_stage,
+ previous_stage, &user_sgpr_idx);
+ if (ctx->abi.view_index)
+ set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ set_vs_specific_input_locs(ctx, stage, has_previous_stage,
+ previous_stage, &user_sgpr_idx);
+ if (ctx->abi.view_index)
+ set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ if (ctx->abi.view_index)
+ set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ if (has_previous_stage) {
+ if (previous_stage == MESA_SHADER_VERTEX)
+ set_vs_specific_input_locs(ctx, stage,
+ has_previous_stage,
+ previous_stage,
+ &user_sgpr_idx);
+ }
+ if (ctx->abi.view_index)
+ set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
+ break;
+ case MESA_SHADER_FRAGMENT:
+ break;
+ default:
+ unreachable("Shader stage not implemented");
+ }
+
+ if (stage == MESA_SHADER_TESS_CTRL ||
+ (stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_ls) ||
+ /* GFX9 has the ESGS ring buffer in LDS. */
+ (stage == MESA_SHADER_GEOMETRY && has_previous_stage)) {
+ ac_declare_lds_as_pointer(&ctx->ac);
+ }
+
+ ctx->shader_info->num_user_sgprs = user_sgpr_idx;
+}
+
+
+static LLVMValueRef
+radv_load_resource(struct ac_shader_abi *abi, LLVMValueRef index,
+ unsigned desc_set, unsigned binding)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
+ struct radv_pipeline_layout *pipeline_layout = ctx->options->layout;
+ struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
+ unsigned base_offset = layout->binding[binding].offset;
+ LLVMValueRef offset, stride;
+
+ if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
+ layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
+ unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start +
+ layout->binding[binding].dynamic_offset_offset;
+ desc_ptr = ctx->abi.push_constants;
+ base_offset = pipeline_layout->push_constant_size + 16 * idx;
+ stride = LLVMConstInt(ctx->ac.i32, 16, false);
+ } else
+ stride = LLVMConstInt(ctx->ac.i32, layout->binding[binding].size, false);
+
+ offset = ac_build_imad(&ctx->ac, index, stride,
+ LLVMConstInt(ctx->ac.i32, base_offset, false));
+
+ desc_ptr = ac_build_gep0(&ctx->ac, desc_ptr, offset);
+ desc_ptr = ac_cast_ptr(&ctx->ac, desc_ptr, ctx->ac.v4i32);
+ LLVMSetMetadata(desc_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
+
+ return desc_ptr;
+}
+
+
+/* The offchip buffer layout for TCS->TES is
+ *
+ * - attribute 0 of patch 0 vertex 0
+ * - attribute 0 of patch 0 vertex 1
+ * - attribute 0 of patch 0 vertex 2
+ * ...
+ * - attribute 0 of patch 1 vertex 0
+ * - attribute 0 of patch 1 vertex 1
+ * ...
+ * - attribute 1 of patch 0 vertex 0
+ * - attribute 1 of patch 0 vertex 1
+ * ...
+ * - per patch attribute 0 of patch 0
+ * - per patch attribute 0 of patch 1
+ * ...
+ *
+ * Note that every attribute has 4 components.
+ */
+static LLVMValueRef get_non_vertex_index_offset(struct radv_shader_context *ctx)
+{
+ uint32_t num_patches = ctx->tcs_num_patches;
+ uint32_t num_tcs_outputs;
+ if (ctx->stage == MESA_SHADER_TESS_CTRL)
+ num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written);
+ else
+ num_tcs_outputs = ctx->options->key.tes.tcs_num_outputs;
+
+ uint32_t output_vertex_size = num_tcs_outputs * 16;
+ uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size;
+
+ return LLVMConstInt(ctx->ac.i32, pervertex_output_patch_size * num_patches, false);
+}
+
+static LLVMValueRef calc_param_stride(struct radv_shader_context *ctx,
+ LLVMValueRef vertex_index)
+{
+ LLVMValueRef param_stride;
+ if (vertex_index)
+ param_stride = LLVMConstInt(ctx->ac.i32, ctx->tcs_vertices_per_patch * ctx->tcs_num_patches, false);
+ else
+ param_stride = LLVMConstInt(ctx->ac.i32, ctx->tcs_num_patches, false);
+ return param_stride;
+}
+
+static LLVMValueRef get_tcs_tes_buffer_address(struct radv_shader_context *ctx,
+ LLVMValueRef vertex_index,
+ LLVMValueRef param_index)
+{
+ LLVMValueRef base_addr;
+ LLVMValueRef param_stride, constant16;
+ LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
+ LLVMValueRef vertices_per_patch = LLVMConstInt(ctx->ac.i32, ctx->tcs_vertices_per_patch, false);
+ constant16 = LLVMConstInt(ctx->ac.i32, 16, false);
+ param_stride = calc_param_stride(ctx, vertex_index);
+ if (vertex_index) {
+ base_addr = ac_build_imad(&ctx->ac, rel_patch_id,
+ vertices_per_patch, vertex_index);
+ } else {
+ base_addr = rel_patch_id;
+ }
+
+ base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
+ LLVMBuildMul(ctx->ac.builder, param_index,
+ param_stride, ""), "");
+
+ base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, "");
+
+ if (!vertex_index) {
+ LLVMValueRef patch_data_offset = get_non_vertex_index_offset(ctx);
+
+ base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
+ patch_data_offset, "");
+ }
+ return base_addr;
+}
+
+static LLVMValueRef get_tcs_tes_buffer_address_params(struct radv_shader_context *ctx,
+ unsigned param,
+ unsigned const_index,
+ bool is_compact,
+ LLVMValueRef vertex_index,
+ LLVMValueRef indir_index)
+{
+ LLVMValueRef param_index;
+
+ if (indir_index)
+ param_index = LLVMBuildAdd(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, param, false),
+ indir_index, "");
+ else {
+ if (const_index && !is_compact)
+ param += const_index;
+ param_index = LLVMConstInt(ctx->ac.i32, param, false);
+ }
+ return get_tcs_tes_buffer_address(ctx, vertex_index, param_index);
+}
+
+static LLVMValueRef
+get_dw_address(struct radv_shader_context *ctx,
+ LLVMValueRef dw_addr,
+ unsigned param,
+ unsigned const_index,
+ bool compact_const_index,
+ LLVMValueRef vertex_index,
+ LLVMValueRef stride,
+ LLVMValueRef indir_index)
+
+{
+
+ if (vertex_index) {
+ dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
+ LLVMBuildMul(ctx->ac.builder,
+ vertex_index,
+ stride, ""), "");
+ }
+
+ if (indir_index)
+ dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
+ LLVMBuildMul(ctx->ac.builder, indir_index,
+ LLVMConstInt(ctx->ac.i32, 4, false), ""), "");
+ else if (const_index && !compact_const_index)
+ dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
+ LLVMConstInt(ctx->ac.i32, const_index * 4, false), "");
+
+ dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
+ LLVMConstInt(ctx->ac.i32, param * 4, false), "");
+
+ if (const_index && compact_const_index)
+ dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
+ LLVMConstInt(ctx->ac.i32, const_index, false), "");
+ return dw_addr;
+}
+
+static LLVMValueRef
+load_tcs_varyings(struct ac_shader_abi *abi,
+ LLVMTypeRef type,
+ LLVMValueRef vertex_index,
+ LLVMValueRef indir_index,
+ unsigned const_index,
+ unsigned location,
+ unsigned driver_location,
+ unsigned component,
+ unsigned num_components,
+ bool is_patch,
+ bool is_compact,
+ bool load_input)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ LLVMValueRef dw_addr, stride;
+ LLVMValueRef value[4], result;
+ unsigned param = shader_io_get_unique_index(location);
+
+ if (load_input) {
+ uint32_t input_vertex_size = (ctx->tcs_num_inputs * 16) / 4;
+ stride = LLVMConstInt(ctx->ac.i32, input_vertex_size, false);
+ dw_addr = get_tcs_in_current_patch_offset(ctx);
+ } else {
+ if (!is_patch) {
+ stride = get_tcs_out_vertex_stride(ctx);
+ dw_addr = get_tcs_out_current_patch_offset(ctx);
+ } else {
+ dw_addr = get_tcs_out_current_patch_data_offset(ctx);
+ stride = NULL;
+ }
+ }
+
+ dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
+ indir_index);
+
+ for (unsigned i = 0; i < num_components + component; i++) {
+ value[i] = ac_lds_load(&ctx->ac, dw_addr);
+ dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
+ ctx->ac.i32_1, "");
+ }
+ result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
+ return result;
+}
+
+static void
+store_tcs_output(struct ac_shader_abi *abi,
+ const nir_variable *var,
+ LLVMValueRef vertex_index,
+ LLVMValueRef param_index,
+ unsigned const_index,
+ LLVMValueRef src,
+ unsigned writemask)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ const unsigned location = var->data.location;
+ const unsigned component = var->data.location_frac;
+ const bool is_patch = var->data.patch;
+ const bool is_compact = var->data.compact;
+ LLVMValueRef dw_addr;
+ LLVMValueRef stride = NULL;
+ LLVMValueRef buf_addr = NULL;
+ unsigned param;
+ bool store_lds = true;
+
+ if (is_patch) {
+ if (!(ctx->tcs_patch_outputs_read & (1U << (location - VARYING_SLOT_PATCH0))))
+ store_lds = false;
+ } else {
+ if (!(ctx->tcs_outputs_read & (1ULL << location)))
+ store_lds = false;
+ }
+
+ param = shader_io_get_unique_index(location);
+ if (location == VARYING_SLOT_CLIP_DIST0 &&
+ is_compact && const_index > 3) {
+ const_index -= 3;
+ param++;
+ }
+
+ if (!is_patch) {
+ stride = get_tcs_out_vertex_stride(ctx);
+ dw_addr = get_tcs_out_current_patch_offset(ctx);
+ } else {
+ dw_addr = get_tcs_out_current_patch_data_offset(ctx);
+ }
+
+ dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
+ param_index);
+ buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact,
+ vertex_index, param_index);
+
+ bool is_tess_factor = false;
+ if (location == VARYING_SLOT_TESS_LEVEL_INNER ||
+ location == VARYING_SLOT_TESS_LEVEL_OUTER)
+ is_tess_factor = true;
+
+ unsigned base = is_compact ? const_index : 0;
+ for (unsigned chan = 0; chan < 8; chan++) {
+ if (!(writemask & (1 << chan)))
+ continue;
+ LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
+ value = ac_to_integer(&ctx->ac, value);
+ value = LLVMBuildZExtOrBitCast(ctx->ac.builder, value, ctx->ac.i32, "");
+
+ if (store_lds || is_tess_factor) {
+ LLVMValueRef dw_addr_chan =
+ LLVMBuildAdd(ctx->ac.builder, dw_addr,
+ LLVMConstInt(ctx->ac.i32, chan, false), "");
+ ac_lds_store(&ctx->ac, dw_addr_chan, value);
+ }
+
+ if (!is_tess_factor && writemask != 0xF)
+ ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1,
+ buf_addr, ctx->oc_lds,
+ 4 * (base + chan), 1, 0, true, false);
+ }
+
+ if (writemask == 0xF) {
+ ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, src, 4,
+ buf_addr, ctx->oc_lds,
+ (base * 4), 1, 0, true, false);
+ }
+}
+
+static LLVMValueRef
+load_tes_input(struct ac_shader_abi *abi,
+ LLVMTypeRef type,
+ LLVMValueRef vertex_index,
+ LLVMValueRef param_index,
+ unsigned const_index,
+ unsigned location,
+ unsigned driver_location,
+ unsigned component,
+ unsigned num_components,
+ bool is_patch,
+ bool is_compact,
+ bool load_input)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ LLVMValueRef buf_addr;
+ LLVMValueRef result;
+ unsigned param = shader_io_get_unique_index(location);
+
+ if (location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 3) {
+ const_index -= 3;
+ param++;
+ }
+
+ buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index,
+ is_compact, vertex_index, param_index);
+
+ LLVMValueRef comp_offset = LLVMConstInt(ctx->ac.i32, component * 4, false);
+ buf_addr = LLVMBuildAdd(ctx->ac.builder, buf_addr, comp_offset, "");
+
+ result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, num_components, NULL,
+ buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false);
+ result = ac_trim_vector(&ctx->ac, result, num_components);
+ return result;
+}
+
+static LLVMValueRef
+load_gs_input(struct ac_shader_abi *abi,
+ unsigned location,
+ unsigned driver_location,
+ unsigned component,
+ unsigned num_components,
+ unsigned vertex_index,
+ unsigned const_index,
+ LLVMTypeRef type)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ LLVMValueRef vtx_offset;
+ unsigned param, vtx_offset_param;
+ LLVMValueRef value[4], result;
+
+ vtx_offset_param = vertex_index;
+ assert(vtx_offset_param < 6);
+ vtx_offset = LLVMBuildMul(ctx->ac.builder, ctx->gs_vtx_offset[vtx_offset_param],
+ LLVMConstInt(ctx->ac.i32, 4, false), "");
+
+ param = shader_io_get_unique_index(location);
+
+ for (unsigned i = component; i < num_components + component; i++) {
+ if (ctx->ac.chip_class >= GFX9) {
+ LLVMValueRef dw_addr = ctx->gs_vtx_offset[vtx_offset_param];
+ dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
+ LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), "");
+ value[i] = ac_lds_load(&ctx->ac, dw_addr);
+ } else {
+ LLVMValueRef soffset =
+ LLVMConstInt(ctx->ac.i32,
+ (param * 4 + i + const_index) * 256,
+ false);
+
+ value[i] = ac_build_buffer_load(&ctx->ac,
+ ctx->esgs_ring, 1,
+ ctx->ac.i32_0,
+ vtx_offset, soffset,
+ 0, 1, 0, true, false);
+ }
+
+ if (ac_get_type_size(type) == 2) {
+ value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], ctx->ac.i32, "");
+ value[i] = LLVMBuildTrunc(ctx->ac.builder, value[i], ctx->ac.i16, "");
+ }
+ value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, "");
+ }
+ result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
+ result = ac_to_integer(&ctx->ac, result);
+ return result;
+}
+
+
+static void radv_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ ac_build_kill_if_false(&ctx->ac, visible);
+}
+
+static LLVMValueRef lookup_interp_param(struct ac_shader_abi *abi,
+ enum glsl_interp_mode interp, unsigned location)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+
+ switch (interp) {
+ case INTERP_MODE_FLAT:
+ default:
+ return NULL;
+ case INTERP_MODE_SMOOTH:
+ case INTERP_MODE_NONE:
+ if (location == INTERP_CENTER)
+ return ctx->persp_center;
+ else if (location == INTERP_CENTROID)
+ return ctx->persp_centroid;
+ else if (location == INTERP_SAMPLE)
+ return ctx->persp_sample;
+ break;
+ case INTERP_MODE_NOPERSPECTIVE:
+ if (location == INTERP_CENTER)
+ return ctx->linear_center;
+ else if (location == INTERP_CENTROID)
+ return ctx->linear_centroid;
+ else if (location == INTERP_SAMPLE)
+ return ctx->linear_sample;
+ break;
+ }
+ return NULL;
+}
+
+static uint32_t
+radv_get_sample_pos_offset(uint32_t num_samples)
+{
+ uint32_t sample_pos_offset = 0;
+
+ switch (num_samples) {
+ case 2:
+ sample_pos_offset = 1;
+ break;
+ case 4:
+ sample_pos_offset = 3;
+ break;
+ case 8:
+ sample_pos_offset = 7;
+ break;
+ case 16:
+ sample_pos_offset = 15;
+ break;
+ default:
+ break;
+ }
+ return sample_pos_offset;
+}
+
+static LLVMValueRef load_sample_position(struct ac_shader_abi *abi,
+ LLVMValueRef sample_id)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+
+ LLVMValueRef result;
+ LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_PS_SAMPLE_POSITIONS, false));
+
+ ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
+ ac_array_in_const_addr_space(ctx->ac.v2f32), "");
+
+ uint32_t sample_pos_offset =
+ radv_get_sample_pos_offset(ctx->options->key.fs.num_samples);
+
+ sample_id =
+ LLVMBuildAdd(ctx->ac.builder, sample_id,
+ LLVMConstInt(ctx->ac.i32, sample_pos_offset, false), "");
+ result = ac_build_load_invariant(&ctx->ac, ptr, sample_id);
+
+ return result;
+}
+
+
+static LLVMValueRef load_sample_mask_in(struct ac_shader_abi *abi)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ uint8_t log2_ps_iter_samples;
+
+ if (ctx->shader_info->info.ps.force_persample) {
+ log2_ps_iter_samples =
+ util_logbase2(ctx->options->key.fs.num_samples);
+ } else {
+ log2_ps_iter_samples = ctx->options->key.fs.log2_ps_iter_samples;
+ }
+
+ /* The bit pattern matches that used by fixed function fragment
+ * processing. */
+ static const uint16_t ps_iter_masks[] = {
+ 0xffff, /* not used */
+ 0x5555,
+ 0x1111,
+ 0x0101,
+ 0x0001,
+ };
+ assert(log2_ps_iter_samples < ARRAY_SIZE(ps_iter_masks));
+
+ uint32_t ps_iter_mask = ps_iter_masks[log2_ps_iter_samples];
+
+ LLVMValueRef result, sample_id;
+ sample_id = ac_unpack_param(&ctx->ac, abi->ancillary, 8, 4);
+ sample_id = LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, ps_iter_mask, false), sample_id, "");
+ result = LLVMBuildAnd(ctx->ac.builder, sample_id, abi->sample_coverage, "");
+ return result;
+}
+
+
+static void
+visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs)
+{
+ LLVMValueRef gs_next_vertex;
+ LLVMValueRef can_emit;
+ unsigned offset = 0;
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+
+ /* Write vertex attribute values to GSVS ring */
+ gs_next_vertex = LLVMBuildLoad(ctx->ac.builder,
+ ctx->gs_next_vertex[stream],
+ "");
+
+ /* If this thread has already emitted the declared maximum number of
+ * vertices, kill it: excessive vertex emissions are not supposed to
+ * have any effect, and GS threads have no externally observable
+ * effects other than emitting vertices.
+ */
+ can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex,
+ LLVMConstInt(ctx->ac.i32, ctx->gs_max_out_vertices, false), "");
+ ac_build_kill_if_false(&ctx->ac, can_emit);
+
+ for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+ unsigned output_usage_mask =
+ ctx->shader_info->info.gs.output_usage_mask[i];
+ uint8_t output_stream =
+ ctx->shader_info->info.gs.output_streams[i];
+ LLVMValueRef *out_ptr = &addrs[i * 4];
+ int length = util_last_bit(output_usage_mask);
+
+ if (!(ctx->output_mask & (1ull << i)) ||
+ output_stream != stream)
+ continue;
+
+ for (unsigned j = 0; j < length; j++) {
+ if (!(output_usage_mask & (1 << j)))
+ continue;
+
+ LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder,
+ out_ptr[j], "");
+ LLVMValueRef voffset =
+ LLVMConstInt(ctx->ac.i32, offset *
+ ctx->gs_max_out_vertices, false);
+
+ offset++;
+
+ voffset = LLVMBuildAdd(ctx->ac.builder, voffset, gs_next_vertex, "");
+ voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), "");
+
+ out_val = ac_to_integer(&ctx->ac, out_val);
+ out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
+
+ ac_build_buffer_store_dword(&ctx->ac,
+ ctx->gsvs_ring[stream],
+ out_val, 1,
+ voffset, ctx->gs2vs_offset, 0,
+ 1, 1, true, true);
+ }
+ }
+
+ gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex,
+ ctx->ac.i32_1, "");
+ LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]);
+
+ ac_build_sendmsg(&ctx->ac,
+ AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
+ ctx->gs_wave_id);
+}
+
+static void
+visit_end_primitive(struct ac_shader_abi *abi, unsigned stream)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8), ctx->gs_wave_id);
+}
+
+static LLVMValueRef
+load_tess_coord(struct ac_shader_abi *abi)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+
+ LLVMValueRef coord[4] = {
+ ctx->tes_u,
+ ctx->tes_v,
+ ctx->ac.f32_0,
+ ctx->ac.f32_0,
+ };
+
+ if (ctx->tes_primitive_mode == GL_TRIANGLES)
+ coord[2] = LLVMBuildFSub(ctx->ac.builder, ctx->ac.f32_1,
+ LLVMBuildFAdd(ctx->ac.builder, coord[0], coord[1], ""), "");
+
+ return ac_build_gather_values(&ctx->ac, coord, 3);
+}
+
+static LLVMValueRef
+load_patch_vertices_in(struct ac_shader_abi *abi)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ return LLVMConstInt(ctx->ac.i32, ctx->options->key.tcs.input_vertices, false);
+}
+
+
+static LLVMValueRef radv_load_base_vertex(struct ac_shader_abi *abi)
+{
+ return abi->base_vertex;
+}
+
+static LLVMValueRef radv_load_ssbo(struct ac_shader_abi *abi,
+ LLVMValueRef buffer_ptr, bool write)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ LLVMValueRef result;
+
+ LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
+
+ result = LLVMBuildLoad(ctx->ac.builder, buffer_ptr, "");
+ LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md);
+
+ return result;
+}
+
+static LLVMValueRef radv_load_ubo(struct ac_shader_abi *abi, LLVMValueRef buffer_ptr)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ LLVMValueRef result;
+
+ LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
+
+ result = LLVMBuildLoad(ctx->ac.builder, buffer_ptr, "");
+ LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md);
+
+ return result;
+}
+
+static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi,
+ unsigned descriptor_set,
+ unsigned base_index,
+ unsigned constant_index,
+ LLVMValueRef index,
+ enum ac_descriptor_type desc_type,
+ bool image, bool write,
+ bool bindless)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ LLVMValueRef list = ctx->descriptor_sets[descriptor_set];
+ struct radv_descriptor_set_layout *layout = ctx->options->layout->set[descriptor_set].layout;
+ struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index;
+ unsigned offset = binding->offset;
+ unsigned stride = binding->size;
+ unsigned type_size;
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMTypeRef type;
+
+ assert(base_index < layout->binding_count);
+
+ switch (desc_type) {
+ case AC_DESC_IMAGE:
+ type = ctx->ac.v8i32;
+ type_size = 32;
+ break;
+ case AC_DESC_FMASK:
+ type = ctx->ac.v8i32;
+ offset += 32;
+ type_size = 32;
+ break;
+ case AC_DESC_SAMPLER:
+ type = ctx->ac.v4i32;
+ if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+ offset += 64;
+
+ type_size = 16;
+ break;
+ case AC_DESC_BUFFER:
+ type = ctx->ac.v4i32;
+ type_size = 16;
+ break;
+ default:
+ unreachable("invalid desc_type\n");
+ }
+
+ offset += constant_index * stride;
+
+ if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset &&
+ (!index || binding->immutable_samplers_equal)) {
+ if (binding->immutable_samplers_equal)
+ constant_index = 0;
+
+ const uint32_t *samplers = radv_immutable_samplers(layout, binding);
+
+ LLVMValueRef constants[] = {
+ LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 0], 0),
+ LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 1], 0),
+ LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 2], 0),
+ LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 3], 0),
+ };
+ return ac_build_gather_values(&ctx->ac, constants, 4);
+ }
+
+ assert(stride % type_size == 0);
+
+ if (!index)
+ index = ctx->ac.i32_0;
+
+ index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), "");
+
+ list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0));
+ list = LLVMBuildPointerCast(builder, list,
+ ac_array_in_const32_addr_space(type), "");
+
+ return ac_build_load_to_sgpr(&ctx->ac, list, index);
+}
+
+/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
+ * so we may need to fix it up. */
+static LLVMValueRef
+adjust_vertex_fetch_alpha(struct radv_shader_context *ctx,
+ unsigned adjustment,
+ LLVMValueRef alpha)
+{
+ if (adjustment == RADV_ALPHA_ADJUST_NONE)
+ return alpha;
+
+ LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0);
+
+ if (adjustment == RADV_ALPHA_ADJUST_SSCALED)
+ alpha = LLVMBuildFPToUI(ctx->ac.builder, alpha, ctx->ac.i32, "");
+ else
+ alpha = ac_to_integer(&ctx->ac, alpha);
+
+ /* For the integer-like cases, do a natural sign extension.
+ *
+ * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0
+ * and happen to contain 0, 1, 2, 3 as the two LSBs of the
+ * exponent.
+ */
+ alpha = LLVMBuildShl(ctx->ac.builder, alpha,
+ adjustment == RADV_ALPHA_ADJUST_SNORM ?
+ LLVMConstInt(ctx->ac.i32, 7, 0) : c30, "");
+ alpha = LLVMBuildAShr(ctx->ac.builder, alpha, c30, "");
+
+ /* Convert back to the right type. */
+ if (adjustment == RADV_ALPHA_ADJUST_SNORM) {
+ LLVMValueRef clamp;
+ LLVMValueRef neg_one = LLVMConstReal(ctx->ac.f32, -1.0);
+ alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
+ clamp = LLVMBuildFCmp(ctx->ac.builder, LLVMRealULT, alpha, neg_one, "");
+ alpha = LLVMBuildSelect(ctx->ac.builder, clamp, neg_one, alpha, "");
+ } else if (adjustment == RADV_ALPHA_ADJUST_SSCALED) {
+ alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
+ }
+
+ return alpha;
+}
+
+static void
+handle_vs_input_decl(struct radv_shader_context *ctx,
+ struct nir_variable *variable)
+{
+ LLVMValueRef t_list_ptr = ctx->vertex_buffers;
+ LLVMValueRef t_offset;
+ LLVMValueRef t_list;
+ LLVMValueRef input;
+ LLVMValueRef buffer_index;
+ unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
+ uint8_t input_usage_mask =
+ ctx->shader_info->info.vs.input_usage_mask[variable->data.location];
+ unsigned num_channels = util_last_bit(input_usage_mask);
+
+ variable->data.driver_location = variable->data.location * 4;
+
+ enum glsl_base_type type = glsl_get_base_type(variable->type);
+ for (unsigned i = 0; i < attrib_count; ++i) {
+ LLVMValueRef output[4];
+ unsigned attrib_index = variable->data.location + i - VERT_ATTRIB_GENERIC0;
+
+ if (ctx->options->key.vs.instance_rate_inputs & (1u << attrib_index)) {
+ uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[attrib_index];
+
+ if (divisor) {
+ buffer_index = ctx->abi.instance_id;
+
+ if (divisor != 1) {
+ buffer_index = LLVMBuildUDiv(ctx->ac.builder, buffer_index,
+ LLVMConstInt(ctx->ac.i32, divisor, 0), "");
+ }
+
+ if (ctx->options->key.vs.as_ls) {
+ ctx->shader_info->vs.vgpr_comp_cnt =
+ MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt);
+ } else {
+ ctx->shader_info->vs.vgpr_comp_cnt =
+ MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt);
+ }
+ } else {
+ buffer_index = ctx->ac.i32_0;
+ }
+
+ buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.start_instance, buffer_index, "");
+ } else
+ buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id,
+ ctx->abi.base_vertex, "");
+ t_offset = LLVMConstInt(ctx->ac.i32, attrib_index, false);
+
+ t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
+
+ input = ac_build_buffer_load_format(&ctx->ac, t_list,
+ buffer_index,
+ ctx->ac.i32_0,
+ num_channels, false, true);
+
+ input = ac_build_expand_to_vec4(&ctx->ac, input, num_channels);
+
+ for (unsigned chan = 0; chan < 4; chan++) {
+ LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
+ output[chan] = LLVMBuildExtractElement(ctx->ac.builder, input, llvm_chan, "");
+ if (type == GLSL_TYPE_FLOAT16) {
+ output[chan] = LLVMBuildBitCast(ctx->ac.builder, output[chan], ctx->ac.f32, "");
+ output[chan] = LLVMBuildFPTrunc(ctx->ac.builder, output[chan], ctx->ac.f16, "");
+ }
+ }
+
+ unsigned alpha_adjust = (ctx->options->key.vs.alpha_adjust >> (attrib_index * 2)) & 3;
+ output[3] = adjust_vertex_fetch_alpha(ctx, alpha_adjust, output[3]);
+
+ for (unsigned chan = 0; chan < 4; chan++) {
+ output[chan] = ac_to_integer(&ctx->ac, output[chan]);
+ if (type == GLSL_TYPE_UINT16 || type == GLSL_TYPE_INT16)
+ output[chan] = LLVMBuildTrunc(ctx->ac.builder, output[chan], ctx->ac.i16, "");
+
+ ctx->inputs[ac_llvm_reg_index_soa(variable->data.location + i, chan)] = output[chan];
+ }
+ }
+}
+
+static void interp_fs_input(struct radv_shader_context *ctx,
+ unsigned attr,
+ LLVMValueRef interp_param,
+ LLVMValueRef prim_mask,
+ LLVMValueRef result[4])
+{
+ LLVMValueRef attr_number;
+ unsigned chan;
+ LLVMValueRef i, j;
+ bool interp = !LLVMIsUndef(interp_param);
+
+ attr_number = LLVMConstInt(ctx->ac.i32, attr, false);
+
+ /* fs.constant returns the param from the middle vertex, so it's not
+ * really useful for flat shading. It's meant to be used for custom
+ * interpolation (but the intrinsic can't fetch from the other two
+ * vertices).
+ *
+ * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
+ * to do the right thing. The only reason we use fs.constant is that
+ * fs.interp cannot be used on integers, because they can be equal
+ * to NaN.
+ */
+ if (interp) {
+ interp_param = LLVMBuildBitCast(ctx->ac.builder, interp_param,
+ ctx->ac.v2f32, "");
+
+ i = LLVMBuildExtractElement(ctx->ac.builder, interp_param,
+ ctx->ac.i32_0, "");
+ j = LLVMBuildExtractElement(ctx->ac.builder, interp_param,
+ ctx->ac.i32_1, "");
+ }
+
+ for (chan = 0; chan < 4; chan++) {
+ LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
+
+ if (interp) {
+ result[chan] = ac_build_fs_interp(&ctx->ac,
+ llvm_chan,
+ attr_number,
+ prim_mask, i, j);
+ } else {
+ result[chan] = ac_build_fs_interp_mov(&ctx->ac,
+ LLVMConstInt(ctx->ac.i32, 2, false),
+ llvm_chan,
+ attr_number,
+ prim_mask);
+ result[chan] = LLVMBuildBitCast(ctx->ac.builder, result[chan], ctx->ac.i32, "");
+ result[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, result[chan], LLVMTypeOf(interp_param), "");
+ }
+ }
+}
+
+static void
+handle_fs_input_decl(struct radv_shader_context *ctx,
+ struct nir_variable *variable)
+{
+ int idx = variable->data.location;
+ unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
+ LLVMValueRef interp = NULL;
+ uint64_t mask;
+
+ variable->data.driver_location = idx * 4;
+ mask = ((1ull << attrib_count) - 1) << variable->data.location;
+
+ if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
+ unsigned interp_type;
+ if (variable->data.sample)
+ interp_type = INTERP_SAMPLE;
+ else if (variable->data.centroid)
+ interp_type = INTERP_CENTROID;
+ else
+ interp_type = INTERP_CENTER;
+
+ interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type);
+ }
+ bool is_16bit = glsl_type_is_16bit(variable->type);
+ LLVMTypeRef type = is_16bit ? ctx->ac.i16 : ctx->ac.i32;
+ if (interp == NULL)
+ interp = LLVMGetUndef(type);
+
+ for (unsigned i = 0; i < attrib_count; ++i)
+ ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
+
+ if (idx == VARYING_SLOT_CLIP_DIST0) {
+ /* Do not account for the number of components inside the array
+ * of clip/cull distances because this might wrongly set other
+ * bits like primitive ID or layer.
+ */
+ mask = 1ull << VARYING_SLOT_CLIP_DIST0;
+ }
+
+ ctx->input_mask |= mask;
+}
+
+static void
+handle_vs_inputs(struct radv_shader_context *ctx,
+ struct nir_shader *nir) {
+ nir_foreach_variable(variable, &nir->inputs)
+ handle_vs_input_decl(ctx, variable);
+}
+
+static void
+prepare_interp_optimize(struct radv_shader_context *ctx,
+ struct nir_shader *nir)
+{
+ bool uses_center = false;
+ bool uses_centroid = false;
+ nir_foreach_variable(variable, &nir->inputs) {
+ if (glsl_get_base_type(glsl_without_array(variable->type)) != GLSL_TYPE_FLOAT ||
+ variable->data.sample)
+ continue;
+
+ if (variable->data.centroid)
+ uses_centroid = true;
+ else
+ uses_center = true;
+ }
+
+ if (uses_center && uses_centroid) {
+ LLVMValueRef sel = LLVMBuildICmp(ctx->ac.builder, LLVMIntSLT, ctx->abi.prim_mask, ctx->ac.i32_0, "");
+ ctx->persp_centroid = LLVMBuildSelect(ctx->ac.builder, sel, ctx->persp_center, ctx->persp_centroid, "");
+ ctx->linear_centroid = LLVMBuildSelect(ctx->ac.builder, sel, ctx->linear_center, ctx->linear_centroid, "");
+ }
+}
+
+static void
+handle_fs_inputs(struct radv_shader_context *ctx,
+ struct nir_shader *nir)
+{
+ prepare_interp_optimize(ctx, nir);
+
+ nir_foreach_variable(variable, &nir->inputs)
+ handle_fs_input_decl(ctx, variable);
+
+ unsigned index = 0;
+
+ if (ctx->shader_info->info.ps.uses_input_attachments ||
+ ctx->shader_info->info.needs_multiview_view_index) {
+ ctx->input_mask |= 1ull << VARYING_SLOT_LAYER;
+ ctx->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)] = LLVMGetUndef(ctx->ac.i32);
+ }
+
+ for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
+ LLVMValueRef interp_param;
+ LLVMValueRef *inputs = ctx->inputs +ac_llvm_reg_index_soa(i, 0);
+
+ if (!(ctx->input_mask & (1ull << i)))
+ continue;
+
+ if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC ||
+ i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) {
+ interp_param = *inputs;
+ interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask,
+ inputs);
+
+ if (LLVMIsUndef(interp_param))
+ ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
+ if (i >= VARYING_SLOT_VAR0)
+ ctx->abi.fs_input_attr_indices[i - VARYING_SLOT_VAR0] = index;
+ ++index;
+ } else if (i == VARYING_SLOT_CLIP_DIST0) {
+ int length = ctx->shader_info->info.ps.num_input_clips_culls;
+
+ for (unsigned j = 0; j < length; j += 4) {
+ inputs = ctx->inputs + ac_llvm_reg_index_soa(i, j);
+
+ interp_param = *inputs;
+ interp_fs_input(ctx, index, interp_param,
+ ctx->abi.prim_mask, inputs);
+ ++index;
+ }
+ } else if (i == VARYING_SLOT_POS) {
+ for(int i = 0; i < 3; ++i)
+ inputs[i] = ctx->abi.frag_pos[i];
+
+ inputs[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
+ ctx->abi.frag_pos[3]);
+ }
+ }
+ ctx->shader_info->fs.num_interp = index;
+ ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0;
+
+ if (ctx->shader_info->info.needs_multiview_view_index)
+ ctx->abi.view_index = ctx->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
+}
+
+static void
+scan_shader_output_decl(struct radv_shader_context *ctx,
+ struct nir_variable *variable,
+ struct nir_shader *shader,
+ gl_shader_stage stage)
+{
+ int idx = variable->data.location + variable->data.index;
+ unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
+ uint64_t mask_attribs;
+
+ variable->data.driver_location = idx * 4;
+
+ /* tess ctrl has it's own load/store paths for outputs */
+ if (stage == MESA_SHADER_TESS_CTRL)
+ return;
+
+ mask_attribs = ((1ull << attrib_count) - 1) << idx;
+ if (stage == MESA_SHADER_VERTEX ||
+ stage == MESA_SHADER_TESS_EVAL ||
+ stage == MESA_SHADER_GEOMETRY) {
+ if (idx == VARYING_SLOT_CLIP_DIST0) {
+ if (stage == MESA_SHADER_VERTEX) {
+ ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1;
+ ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1;
+ ctx->shader_info->vs.outinfo.cull_dist_mask <<= shader->info.clip_distance_array_size;
+ }
+ if (stage == MESA_SHADER_TESS_EVAL) {
+ ctx->shader_info->tes.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1;
+ ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1;
+ ctx->shader_info->tes.outinfo.cull_dist_mask <<= shader->info.clip_distance_array_size;
+ }
+
+ mask_attribs = 1ull << idx;
+ }
+ }
+
+ ctx->output_mask |= mask_attribs;
+}
+
+
+/* Initialize arguments for the shader export intrinsic */
+static void
+si_llvm_init_export_args(struct radv_shader_context *ctx,
+ LLVMValueRef *values,
+ unsigned enabled_channels,
+ unsigned target,
+ struct ac_export_args *args)
+{
+ /* Specify the channels that are enabled. */
+ args->enabled_channels = enabled_channels;
+
+ /* Specify whether the EXEC mask represents the valid mask */
+ args->valid_mask = 0;
+
+ /* Specify whether this is the last export */
+ args->done = 0;
+
+ /* Specify the target we are exporting */
+ args->target = target;
+
+ args->compr = false;
+ args->out[0] = LLVMGetUndef(ctx->ac.f32);
+ args->out[1] = LLVMGetUndef(ctx->ac.f32);
+ args->out[2] = LLVMGetUndef(ctx->ac.f32);
+ args->out[3] = LLVMGetUndef(ctx->ac.f32);
+
+ if (!values)
+ return;
+
+ bool is_16bit = ac_get_type_size(LLVMTypeOf(values[0])) == 2;
+ if (ctx->stage == MESA_SHADER_FRAGMENT) {
+ unsigned index = target - V_008DFC_SQ_EXP_MRT;
+ unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
+ bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
+ bool is_int10 = (ctx->options->key.fs.is_int10 >> index) & 1;
+ unsigned chan;
+
+ LLVMValueRef (*packf)(struct ac_llvm_context *ctx, LLVMValueRef args[2]) = NULL;
+ LLVMValueRef (*packi)(struct ac_llvm_context *ctx, LLVMValueRef args[2],
+ unsigned bits, bool hi) = NULL;
+
+ switch(col_format) {
+ case V_028714_SPI_SHADER_ZERO:
+ args->enabled_channels = 0; /* writemask */
+ args->target = V_008DFC_SQ_EXP_NULL;
+ break;
+
+ case V_028714_SPI_SHADER_32_R:
+ args->enabled_channels = 1;
+ args->out[0] = values[0];
+ break;
+
+ case V_028714_SPI_SHADER_32_GR:
+ args->enabled_channels = 0x3;
+ args->out[0] = values[0];
+ args->out[1] = values[1];
+ break;
+
+ case V_028714_SPI_SHADER_32_AR:
+ args->enabled_channels = 0x9;
+ args->out[0] = values[0];
+ args->out[3] = values[3];
+ break;
+
+ case V_028714_SPI_SHADER_FP16_ABGR:
+ args->enabled_channels = 0x5;
+ packf = ac_build_cvt_pkrtz_f16;
+ if (is_16bit) {
+ for (unsigned chan = 0; chan < 4; chan++)
+ values[chan] = LLVMBuildFPExt(ctx->ac.builder,
+ values[chan],
+ ctx->ac.f32, "");
+ }
+ break;
+
+ case V_028714_SPI_SHADER_UNORM16_ABGR:
+ args->enabled_channels = 0x5;
+ packf = ac_build_cvt_pknorm_u16;
+ break;
+
+ case V_028714_SPI_SHADER_SNORM16_ABGR:
+ args->enabled_channels = 0x5;
+ packf = ac_build_cvt_pknorm_i16;
+ break;
+
+ case V_028714_SPI_SHADER_UINT16_ABGR:
+ args->enabled_channels = 0x5;
+ packi = ac_build_cvt_pk_u16;
+ if (is_16bit) {
+ for (unsigned chan = 0; chan < 4; chan++)
+ values[chan] = LLVMBuildZExt(ctx->ac.builder,
+ values[chan],
+ ctx->ac.i32, "");
+ }
+ break;
+
+ case V_028714_SPI_SHADER_SINT16_ABGR:
+ args->enabled_channels = 0x5;
+ packi = ac_build_cvt_pk_i16;
+ if (is_16bit) {
+ for (unsigned chan = 0; chan < 4; chan++)
+ values[chan] = LLVMBuildSExt(ctx->ac.builder,
+ values[chan],
+ ctx->ac.i32, "");
+ }
+ break;
+
+ default:
+ case V_028714_SPI_SHADER_32_ABGR:
+ memcpy(&args->out[0], values, sizeof(values[0]) * 4);
+ break;
+ }
+
+ /* Pack f16 or norm_i16/u16. */
+ if (packf) {
+ for (chan = 0; chan < 2; chan++) {
+ LLVMValueRef pack_args[2] = {
+ values[2 * chan],
+ values[2 * chan + 1]
+ };
+ LLVMValueRef packed;
+
+ packed = packf(&ctx->ac, pack_args);
+ args->out[chan] = ac_to_float(&ctx->ac, packed);
+ }
+ args->compr = 1; /* COMPR flag */
+ }
+
+ /* Pack i16/u16. */
+ if (packi) {
+ for (chan = 0; chan < 2; chan++) {
+ LLVMValueRef pack_args[2] = {
+ ac_to_integer(&ctx->ac, values[2 * chan]),
+ ac_to_integer(&ctx->ac, values[2 * chan + 1])
+ };
+ LLVMValueRef packed;
+
+ packed = packi(&ctx->ac, pack_args,
+ is_int8 ? 8 : is_int10 ? 10 : 16,
+ chan == 1);
+ args->out[chan] = ac_to_float(&ctx->ac, packed);
+ }
+ args->compr = 1; /* COMPR flag */
+ }
+ return;
+ }
+
+ if (is_16bit) {
+ for (unsigned chan = 0; chan < 4; chan++) {
+ values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i16, "");
+ args->out[chan] = LLVMBuildZExt(ctx->ac.builder, values[chan], ctx->ac.i32, "");
+ }
+ } else
+ memcpy(&args->out[0], values, sizeof(values[0]) * 4);
+
+ for (unsigned i = 0; i < 4; ++i) {
+ if (!(args->enabled_channels & (1 << i)))
+ continue;
+
+ args->out[i] = ac_to_float(&ctx->ac, args->out[i]);
+ }
+}
+
+static void
+radv_export_param(struct radv_shader_context *ctx, unsigned index,
+ LLVMValueRef *values, unsigned enabled_channels)
+{
+ struct ac_export_args args;
+
+ si_llvm_init_export_args(ctx, values, enabled_channels,
+ V_008DFC_SQ_EXP_PARAM + index, &args);
+ ac_build_export(&ctx->ac, &args);
+}
+
+static LLVMValueRef
+radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan)
+{
+ LLVMValueRef output =
+ ctx->abi.outputs[ac_llvm_reg_index_soa(index, chan)];
+
+ return LLVMBuildLoad(ctx->ac.builder, output, "");
+}
+
+static void
+radv_emit_stream_output(struct radv_shader_context *ctx,
+ LLVMValueRef const *so_buffers,
+ LLVMValueRef const *so_write_offsets,
+ const struct radv_stream_output *output)
+{
+ unsigned num_comps = util_bitcount(output->component_mask);
+ unsigned loc = output->location;
+ unsigned buf = output->buffer;
+ unsigned offset = output->offset;
+ unsigned start;
+ LLVMValueRef out[4];
+
+ assert(num_comps && num_comps <= 4);
+ if (!num_comps || num_comps > 4)
+ return;
+
+ /* Get the first component. */
+ start = ffs(output->component_mask) - 1;
+
+ /* Load the output as int. */
+ for (int i = 0; i < num_comps; i++) {
+ out[i] = ac_to_integer(&ctx->ac,
+ radv_load_output(ctx, loc, start + i));
+ }
+
+ /* Pack the output. */
+ LLVMValueRef vdata = NULL;
+
+ switch (num_comps) {
+ case 1: /* as i32 */
+ vdata = out[0];
+ break;
+ case 2: /* as v2i32 */
+ case 3: /* as v4i32 (aligned to 4) */
+ out[3] = LLVMGetUndef(ctx->ac.i32);
+ /* fall through */
+ case 4: /* as v4i32 */
+ vdata = ac_build_gather_values(&ctx->ac, out,
+ util_next_power_of_two(num_comps));
+ break;
+ }
+
+ ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf],
+ vdata, num_comps, so_write_offsets[buf],
+ ctx->ac.i32_0, offset,
+ 1, 1, true, false);
+}
+
+static void
+radv_emit_streamout(struct radv_shader_context *ctx, unsigned stream)
+{
+ struct ac_build_if_state if_ctx;
+ int i;
+
+ /* Get bits [22:16], i.e. (so_param >> 16) & 127; */
+ assert(ctx->streamout_config);
+ LLVMValueRef so_vtx_count =
+ ac_build_bfe(&ctx->ac, ctx->streamout_config,
+ LLVMConstInt(ctx->ac.i32, 16, false),
+ LLVMConstInt(ctx->ac.i32, 7, false), false);
+
+ LLVMValueRef tid = ac_get_thread_id(&ctx->ac);
+
+ /* can_emit = tid < so_vtx_count; */
+ LLVMValueRef can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
+ tid, so_vtx_count, "");
+
+ /* Emit the streamout code conditionally. This actually avoids
+ * out-of-bounds buffer access. The hw tells us via the SGPR
+ * (so_vtx_count) which threads are allowed to emit streamout data.
+ */
+ ac_nir_build_if(&if_ctx, ctx, can_emit);
+ {
+ /* The buffer offset is computed as follows:
+ * ByteOffset = streamout_offset[buffer_id]*4 +
+ * (streamout_write_index + thread_id)*stride[buffer_id] +
+ * attrib_offset
+ */
+ LLVMValueRef so_write_index = ctx->streamout_write_idx;
+
+ /* Compute (streamout_write_index + thread_id). */
+ so_write_index =
+ LLVMBuildAdd(ctx->ac.builder, so_write_index, tid, "");
+
+ /* Load the descriptor and compute the write offset for each
+ * enabled buffer.
+ */
+ LLVMValueRef so_write_offset[4] = {};
+ LLVMValueRef so_buffers[4] = {};
+ LLVMValueRef buf_ptr = ctx->streamout_buffers;
+
+ for (i = 0; i < 4; i++) {
+ uint16_t stride = ctx->shader_info->info.so.strides[i];
+
+ if (!stride)
+ continue;
+
+ LLVMValueRef offset =
+ LLVMConstInt(ctx->ac.i32, i, false);
+
+ so_buffers[i] = ac_build_load_to_sgpr(&ctx->ac,
+ buf_ptr, offset);
+
+ LLVMValueRef so_offset = ctx->streamout_offset[i];
+
+ so_offset = LLVMBuildMul(ctx->ac.builder, so_offset,
+ LLVMConstInt(ctx->ac.i32, 4, false), "");
+
+ so_write_offset[i] =
+ ac_build_imad(&ctx->ac, so_write_index,
+ LLVMConstInt(ctx->ac.i32,
+ stride * 4, false),
+ so_offset);
+ }
+
+ /* Write streamout data. */
+ for (i = 0; i < ctx->shader_info->info.so.num_outputs; i++) {
+ struct radv_stream_output *output =
+ &ctx->shader_info->info.so.outputs[i];
+
+ if (stream != output->stream)
+ continue;
+
+ radv_emit_stream_output(ctx, so_buffers,
+ so_write_offset, output);
+ }
+ }
+ ac_nir_build_endif(&if_ctx);
+}
+
+static void
+handle_vs_outputs_post(struct radv_shader_context *ctx,
+ bool export_prim_id, bool export_layer_id,
+ struct radv_vs_output_info *outinfo)
+{
+ uint32_t param_count = 0;
+ unsigned target;
+ unsigned pos_idx, num_pos_exports = 0;
+ struct ac_export_args args, pos_args[4] = {};
+ LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL;
+ int i;
+
+ if (ctx->options->key.has_multiview_view_index) {
+ LLVMValueRef* tmp_out = &ctx->abi.outputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
+ if(!*tmp_out) {
+ for(unsigned i = 0; i < 4; ++i)
+ ctx->abi.outputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, i)] =
+ ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "");
+ }
+
+ LLVMBuildStore(ctx->ac.builder, ac_to_float(&ctx->ac, ctx->abi.view_index), *tmp_out);
+ ctx->output_mask |= 1ull << VARYING_SLOT_LAYER;
+ }
+
+ memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
+ sizeof(outinfo->vs_output_param_offset));
+
+ if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) {
+ unsigned output_usage_mask, length;
+ LLVMValueRef slots[8];
+ unsigned j;
+
+ if (ctx->stage == MESA_SHADER_VERTEX &&
+ !ctx->is_gs_copy_shader) {
+ output_usage_mask =
+ ctx->shader_info->info.vs.output_usage_mask[VARYING_SLOT_CLIP_DIST0];
+ } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
+ output_usage_mask =
+ ctx->shader_info->info.tes.output_usage_mask[VARYING_SLOT_CLIP_DIST0];
+ } else {
+ assert(ctx->is_gs_copy_shader);
+ output_usage_mask =
+ ctx->shader_info->info.gs.output_usage_mask[VARYING_SLOT_CLIP_DIST0];
+ }
+
+ length = util_last_bit(output_usage_mask);
+
+ i = VARYING_SLOT_CLIP_DIST0;
+ for (j = 0; j < length; j++)
+ slots[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j));
+
+ for (i = length; i < 8; i++)
+ slots[i] = LLVMGetUndef(ctx->ac.f32);
+
+ if (length > 4) {
+ target = V_008DFC_SQ_EXP_POS + 3;
+ si_llvm_init_export_args(ctx, &slots[4], 0xf, target, &args);
+ memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
+ &args, sizeof(args));
+ }
+
+ target = V_008DFC_SQ_EXP_POS + 2;
+ si_llvm_init_export_args(ctx, &slots[0], 0xf, target, &args);
+ memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
+ &args, sizeof(args));
+
+ /* Export the clip/cull distances values to the next stage. */
+ radv_export_param(ctx, param_count, &slots[0], 0xf);
+ outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = param_count++;
+ if (length > 4) {
+ radv_export_param(ctx, param_count, &slots[4], 0xf);
+ outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = param_count++;
+ }
+ }
+
+ LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_1};
+ if (ctx->output_mask & (1ull << VARYING_SLOT_POS)) {
+ for (unsigned j = 0; j < 4; j++)
+ pos_values[j] = radv_load_output(ctx, VARYING_SLOT_POS, j);
+ }
+ si_llvm_init_export_args(ctx, pos_values, 0xf, V_008DFC_SQ_EXP_POS, &pos_args[0]);
+
+ if (ctx->output_mask & (1ull << VARYING_SLOT_PSIZ)) {
+ outinfo->writes_pointsize = true;
+ psize_value = radv_load_output(ctx, VARYING_SLOT_PSIZ, 0);
+ }
+
+ if (ctx->output_mask & (1ull << VARYING_SLOT_LAYER)) {
+ outinfo->writes_layer = true;
+ layer_value = radv_load_output(ctx, VARYING_SLOT_LAYER, 0);
+ }
+
+ if (ctx->output_mask & (1ull << VARYING_SLOT_VIEWPORT)) {
+ outinfo->writes_viewport_index = true;
+ viewport_index_value = radv_load_output(ctx, VARYING_SLOT_VIEWPORT, 0);
+ }
+
+ if (ctx->shader_info->info.so.num_outputs &&
+ !ctx->is_gs_copy_shader) {
+ /* The GS copy shader emission already emits streamout. */
+ radv_emit_streamout(ctx, 0);
+ }
+
+ if (outinfo->writes_pointsize ||
+ outinfo->writes_layer ||
+ outinfo->writes_viewport_index) {
+ pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) |
+ (outinfo->writes_layer == true ? 4 : 0));
+ pos_args[1].valid_mask = 0;
+ pos_args[1].done = 0;
+ pos_args[1].target = V_008DFC_SQ_EXP_POS + 1;
+ pos_args[1].compr = 0;
+ pos_args[1].out[0] = ctx->ac.f32_0; /* X */
+ pos_args[1].out[1] = ctx->ac.f32_0; /* Y */
+ pos_args[1].out[2] = ctx->ac.f32_0; /* Z */
+ pos_args[1].out[3] = ctx->ac.f32_0; /* W */
+
+ if (outinfo->writes_pointsize == true)
+ pos_args[1].out[0] = psize_value;
+ if (outinfo->writes_layer == true)
+ pos_args[1].out[2] = layer_value;
+ if (outinfo->writes_viewport_index == true) {
+ if (ctx->options->chip_class >= GFX9) {
+ /* GFX9 has the layer in out.z[10:0] and the viewport
+ * index in out.z[19:16].
+ */
+ LLVMValueRef v = viewport_index_value;
+ v = ac_to_integer(&ctx->ac, v);
+ v = LLVMBuildShl(ctx->ac.builder, v,
+ LLVMConstInt(ctx->ac.i32, 16, false),
+ "");
+ v = LLVMBuildOr(ctx->ac.builder, v,
+ ac_to_integer(&ctx->ac, pos_args[1].out[2]), "");
+
+ pos_args[1].out[2] = ac_to_float(&ctx->ac, v);
+ pos_args[1].enabled_channels |= 1 << 2;
+ } else {
+ pos_args[1].out[3] = viewport_index_value;
+ pos_args[1].enabled_channels |= 1 << 3;
+ }
+ }
+ }
+ for (i = 0; i < 4; i++) {
+ if (pos_args[i].out[0])
+ num_pos_exports++;
+ }
+
+ pos_idx = 0;
+ for (i = 0; i < 4; i++) {
+ if (!pos_args[i].out[0])
+ continue;
+
+ /* Specify the target we are exporting */
+ pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++;
+ if (pos_idx == num_pos_exports)
+ pos_args[i].done = 1;
+ ac_build_export(&ctx->ac, &pos_args[i]);
+ }
+
+ for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+ LLVMValueRef values[4];
+ if (!(ctx->output_mask & (1ull << i)))
+ continue;
+
+ if (i != VARYING_SLOT_LAYER &&
+ i != VARYING_SLOT_PRIMITIVE_ID &&
+ i < VARYING_SLOT_VAR0)
+ continue;
+
+ for (unsigned j = 0; j < 4; j++)
+ values[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j));
+
+ unsigned output_usage_mask;
+
+ if (ctx->stage == MESA_SHADER_VERTEX &&
+ !ctx->is_gs_copy_shader) {
+ output_usage_mask =
+ ctx->shader_info->info.vs.output_usage_mask[i];
+ } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
+ output_usage_mask =
+ ctx->shader_info->info.tes.output_usage_mask[i];
+ } else {
+ assert(ctx->is_gs_copy_shader);
+ output_usage_mask =
+ ctx->shader_info->info.gs.output_usage_mask[i];
+ }
+
+ radv_export_param(ctx, param_count, values, output_usage_mask);
+
+ outinfo->vs_output_param_offset[i] = param_count++;
+ }
+
+ if (export_prim_id) {
+ LLVMValueRef values[4];
+
+ values[0] = ctx->vs_prim_id;
+ ctx->shader_info->vs.vgpr_comp_cnt = MAX2(2,
+ ctx->shader_info->vs.vgpr_comp_cnt);
+ for (unsigned j = 1; j < 4; j++)
+ values[j] = ctx->ac.f32_0;
+
+ radv_export_param(ctx, param_count, values, 0x1);
+
+ outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count++;
+ outinfo->export_prim_id = true;
+ }
+
+ if (export_layer_id && layer_value) {
+ LLVMValueRef values[4];
+
+ values[0] = layer_value;
+ for (unsigned j = 1; j < 4; j++)
+ values[j] = ctx->ac.f32_0;
+
+ radv_export_param(ctx, param_count, values, 0x1);
+
+ outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count++;
+ }
+
+ outinfo->pos_exports = num_pos_exports;
+ outinfo->param_exports = param_count;
+}
+
+static void
+handle_es_outputs_post(struct radv_shader_context *ctx,
+ struct radv_es_output_info *outinfo)
+{
+ int j;
+ uint64_t max_output_written = 0;
+ LLVMValueRef lds_base = NULL;
+
+ for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+ unsigned output_usage_mask;
+ int param_index;
+ int length = 4;
+
+ if (!(ctx->output_mask & (1ull << i)))
+ continue;
+
+ if (ctx->stage == MESA_SHADER_VERTEX) {
+ output_usage_mask =
+ ctx->shader_info->info.vs.output_usage_mask[i];
+ } else {
+ assert(ctx->stage == MESA_SHADER_TESS_EVAL);
+ output_usage_mask =
+ ctx->shader_info->info.tes.output_usage_mask[i];
+ }
+
+ if (i == VARYING_SLOT_CLIP_DIST0)
+ length = util_last_bit(output_usage_mask);
+
+ param_index = shader_io_get_unique_index(i);
+
+ max_output_written = MAX2(param_index + (length > 4), max_output_written);
+ }
+
+ outinfo->esgs_itemsize = (max_output_written + 1) * 16;
+
+ if (ctx->ac.chip_class >= GFX9) {
+ unsigned itemsize_dw = outinfo->esgs_itemsize / 4;
+ LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac);
+ LLVMValueRef wave_idx = ac_unpack_param(&ctx->ac, ctx->merged_wave_info, 24, 4);
+ vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx,
+ LLVMBuildMul(ctx->ac.builder, wave_idx,
+ LLVMConstInt(ctx->ac.i32, 64, false), ""), "");
+ lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx,
+ LLVMConstInt(ctx->ac.i32, itemsize_dw, 0), "");
+ }
+
+ for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+ LLVMValueRef dw_addr = NULL;
+ LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4];
+ unsigned output_usage_mask;
+ int param_index;
+ int length = 4;
+
+ if (!(ctx->output_mask & (1ull << i)))
+ continue;
+
+ if (ctx->stage == MESA_SHADER_VERTEX) {
+ output_usage_mask =
+ ctx->shader_info->info.vs.output_usage_mask[i];
+ } else {
+ assert(ctx->stage == MESA_SHADER_TESS_EVAL);
+ output_usage_mask =
+ ctx->shader_info->info.tes.output_usage_mask[i];
+ }
+
+ if (i == VARYING_SLOT_CLIP_DIST0)
+ length = util_last_bit(output_usage_mask);
+
+ param_index = shader_io_get_unique_index(i);
+
+ if (lds_base) {
+ dw_addr = LLVMBuildAdd(ctx->ac.builder, lds_base,
+ LLVMConstInt(ctx->ac.i32, param_index * 4, false),
+ "");
+ }
+
+ for (j = 0; j < length; j++) {
+ if (!(output_usage_mask & (1 << j)))
+ continue;
+
+ LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "");
+ out_val = ac_to_integer(&ctx->ac, out_val);
+ out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
+
+ if (ctx->ac.chip_class >= GFX9) {
+ LLVMValueRef dw_addr_offset =
+ LLVMBuildAdd(ctx->ac.builder, dw_addr,
+ LLVMConstInt(ctx->ac.i32,
+ j, false), "");
+
+ ac_lds_store(&ctx->ac, dw_addr_offset, out_val);
+ } else {
+ ac_build_buffer_store_dword(&ctx->ac,
+ ctx->esgs_ring,
+ out_val, 1,
+ NULL, ctx->es2gs_offset,
+ (4 * param_index + j) * 4,
+ 1, 1, true, true);
+ }
+ }
+ }
+}
+
+static void
+handle_ls_outputs_post(struct radv_shader_context *ctx)
+{
+ LLVMValueRef vertex_id = ctx->rel_auto_id;
+ uint32_t num_tcs_inputs = util_last_bit64(ctx->shader_info->info.vs.ls_outputs_written);
+ LLVMValueRef vertex_dw_stride = LLVMConstInt(ctx->ac.i32, num_tcs_inputs * 4, false);
+ LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id,
+ vertex_dw_stride, "");
+
+ for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+ unsigned output_usage_mask =
+ ctx->shader_info->info.vs.output_usage_mask[i];
+ LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4];
+ int length = 4;
+
+ if (!(ctx->output_mask & (1ull << i)))
+ continue;
+
+ if (i == VARYING_SLOT_CLIP_DIST0)
+ length = util_last_bit(output_usage_mask);
+
+ int param = shader_io_get_unique_index(i);
+ LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr,
+ LLVMConstInt(ctx->ac.i32, param * 4, false),
+ "");
+ for (unsigned j = 0; j < length; j++) {
+ LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "");
+ value = ac_to_integer(&ctx->ac, value);
+ value = LLVMBuildZExtOrBitCast(ctx->ac.builder, value, ctx->ac.i32, "");
+ ac_lds_store(&ctx->ac, dw_addr, value);
+ dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, ctx->ac.i32_1, "");
+ }
+ }
+}
+
+static void
+write_tess_factors(struct radv_shader_context *ctx)
+{
+ unsigned stride, outer_comps, inner_comps;
+ struct ac_build_if_state if_ctx, inner_if_ctx;
+ LLVMValueRef invocation_id = ac_unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 8, 5);
+ LLVMValueRef rel_patch_id = ac_unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8);
+ unsigned tess_inner_index = 0, tess_outer_index;
+ LLVMValueRef lds_base, lds_inner = NULL, lds_outer, byteoffset, buffer;
+ LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
+ int i;
+ ac_emit_barrier(&ctx->ac, ctx->stage);
+
+ switch (ctx->options->key.tcs.primitive_mode) {
+ case GL_ISOLINES:
+ stride = 2;
+ outer_comps = 2;
+ inner_comps = 0;
+ break;
+ case GL_TRIANGLES:
+ stride = 4;
+ outer_comps = 3;
+ inner_comps = 1;
+ break;
+ case GL_QUADS:
+ stride = 6;
+ outer_comps = 4;
+ inner_comps = 2;
+ break;
+ default:
+ return;
+ }
+
+ ac_nir_build_if(&if_ctx, ctx,
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
+ invocation_id, ctx->ac.i32_0, ""));
+
+ lds_base = get_tcs_out_current_patch_data_offset(ctx);
+
+ if (inner_comps) {
+ tess_inner_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
+ lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base,
+ LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, false), "");
+ }
+
+ tess_outer_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER);
+ lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base,
+ LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, false), "");
+
+ for (i = 0; i < 4; i++) {
+ inner[i] = LLVMGetUndef(ctx->ac.i32);
+ outer[i] = LLVMGetUndef(ctx->ac.i32);
+ }
+
+ // LINES reversal
+ if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) {
+ outer[0] = out[1] = ac_lds_load(&ctx->ac, lds_outer);
+ lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_outer,
+ ctx->ac.i32_1, "");
+ outer[1] = out[0] = ac_lds_load(&ctx->ac, lds_outer);
+ } else {
+ for (i = 0; i < outer_comps; i++) {
+ outer[i] = out[i] =
+ ac_lds_load(&ctx->ac, lds_outer);
+ lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_outer,
+ ctx->ac.i32_1, "");
+ }
+ for (i = 0; i < inner_comps; i++) {
+ inner[i] = out[outer_comps+i] =
+ ac_lds_load(&ctx->ac, lds_inner);
+ lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_inner,
+ ctx->ac.i32_1, "");
+ }
+ }
+
+ /* Convert the outputs to vectors for stores. */
+ vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4));
+ vec1 = NULL;
+
+ if (stride > 4)
+ vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4);
+
+
+ buffer = ctx->hs_ring_tess_factor;
+ tf_base = ctx->tess_factor_offset;
+ byteoffset = LLVMBuildMul(ctx->ac.builder, rel_patch_id,
+ LLVMConstInt(ctx->ac.i32, 4 * stride, false), "");
+ unsigned tf_offset = 0;
+
+ if (ctx->options->chip_class <= VI) {
+ ac_nir_build_if(&inner_if_ctx, ctx,
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
+ rel_patch_id, ctx->ac.i32_0, ""));
+
+ /* Store the dynamic HS control word. */
+ ac_build_buffer_store_dword(&ctx->ac, buffer,
+ LLVMConstInt(ctx->ac.i32, 0x80000000, false),
+ 1, ctx->ac.i32_0, tf_base,
+ 0, 1, 0, true, false);
+ tf_offset += 4;
+
+ ac_nir_build_endif(&inner_if_ctx);
+ }
+
+ /* Store the tessellation factors. */
+ ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
+ MIN2(stride, 4), byteoffset, tf_base,
+ tf_offset, 1, 0, true, false);
+ if (vec1)
+ ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
+ stride - 4, byteoffset, tf_base,
+ 16 + tf_offset, 1, 0, true, false);
+
+ //store to offchip for TES to read - only if TES reads them
+ if (ctx->options->key.tcs.tes_reads_tess_factors) {
+ LLVMValueRef inner_vec, outer_vec, tf_outer_offset;
+ LLVMValueRef tf_inner_offset;
+ unsigned param_outer, param_inner;
+
+ param_outer = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER);
+ tf_outer_offset = get_tcs_tes_buffer_address(ctx, NULL,
+ LLVMConstInt(ctx->ac.i32, param_outer, 0));
+
+ outer_vec = ac_build_gather_values(&ctx->ac, outer,
+ util_next_power_of_two(outer_comps));
+
+ ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec,
+ outer_comps, tf_outer_offset,
+ ctx->oc_lds, 0, 1, 0, true, false);
+ if (inner_comps) {
+ param_inner = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
+ tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL,
+ LLVMConstInt(ctx->ac.i32, param_inner, 0));
+
+ inner_vec = inner_comps == 1 ? inner[0] :
+ ac_build_gather_values(&ctx->ac, inner, inner_comps);
+ ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec,
+ inner_comps, tf_inner_offset,
+ ctx->oc_lds, 0, 1, 0, true, false);
+ }
+ }
+ ac_nir_build_endif(&if_ctx);
+}
+
+static void
+handle_tcs_outputs_post(struct radv_shader_context *ctx)
+{
+ write_tess_factors(ctx);
+}
+
+static bool
+si_export_mrt_color(struct radv_shader_context *ctx,
+ LLVMValueRef *color, unsigned index,
+ struct ac_export_args *args)
+{
+ /* Export */
+ si_llvm_init_export_args(ctx, color, 0xf,
+ V_008DFC_SQ_EXP_MRT + index, args);
+ if (!args->enabled_channels)
+ return false; /* unnecessary NULL export */
+
+ return true;
+}
+
+static void
+radv_export_mrt_z(struct radv_shader_context *ctx,
+ LLVMValueRef depth, LLVMValueRef stencil,
+ LLVMValueRef samplemask)
+{
+ struct ac_export_args args;
+
+ ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args);
+
+ ac_build_export(&ctx->ac, &args);
+}
+
+static void
+handle_fs_outputs_post(struct radv_shader_context *ctx)
+{
+ unsigned index = 0;
+ LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
+ struct ac_export_args color_args[8];
+
+ for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+ LLVMValueRef values[4];
+
+ if (!(ctx->output_mask & (1ull << i)))
+ continue;
+
+ if (i < FRAG_RESULT_DATA0)
+ continue;
+
+ for (unsigned j = 0; j < 4; j++)
+ values[j] = ac_to_float(&ctx->ac,
+ radv_load_output(ctx, i, j));
+
+ bool ret = si_export_mrt_color(ctx, values,
+ i - FRAG_RESULT_DATA0,
+ &color_args[index]);
+ if (ret)
+ index++;
+ }
+
+ /* Process depth, stencil, samplemask. */
+ if (ctx->shader_info->info.ps.writes_z) {
+ depth = ac_to_float(&ctx->ac,
+ radv_load_output(ctx, FRAG_RESULT_DEPTH, 0));
+ }
+ if (ctx->shader_info->info.ps.writes_stencil) {
+ stencil = ac_to_float(&ctx->ac,
+ radv_load_output(ctx, FRAG_RESULT_STENCIL, 0));
+ }
+ if (ctx->shader_info->info.ps.writes_sample_mask) {
+ samplemask = ac_to_float(&ctx->ac,
+ radv_load_output(ctx, FRAG_RESULT_SAMPLE_MASK, 0));
+ }
+
+ /* Set the DONE bit on last non-null color export only if Z isn't
+ * exported.
+ */
+ if (index > 0 &&
+ !ctx->shader_info->info.ps.writes_z &&
+ !ctx->shader_info->info.ps.writes_stencil &&
+ !ctx->shader_info->info.ps.writes_sample_mask) {
+ unsigned last = index - 1;
+
+ color_args[last].valid_mask = 1; /* whether the EXEC mask is valid */
+ color_args[last].done = 1; /* DONE bit */
+ }
+
+ /* Export PS outputs. */
+ for (unsigned i = 0; i < index; i++)
+ ac_build_export(&ctx->ac, &color_args[i]);
+
+ if (depth || stencil || samplemask)
+ radv_export_mrt_z(ctx, depth, stencil, samplemask);
+ else if (!index)
+ ac_build_export_null(&ctx->ac);
+}
+
+static void
+emit_gs_epilogue(struct radv_shader_context *ctx)
+{
+ ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, ctx->gs_wave_id);
+}
+
+static void
+handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs,
+ LLVMValueRef *addrs)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+
+ switch (ctx->stage) {
+ case MESA_SHADER_VERTEX:
+ if (ctx->options->key.vs.as_ls)
+ handle_ls_outputs_post(ctx);
+ else if (ctx->options->key.vs.as_es)
+ handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info);
+ else
+ handle_vs_outputs_post(ctx, ctx->options->key.vs.export_prim_id,
+ ctx->options->key.vs.export_layer_id,
+ &ctx->shader_info->vs.outinfo);
+ break;
+ case MESA_SHADER_FRAGMENT:
+ handle_fs_outputs_post(ctx);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ emit_gs_epilogue(ctx);
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ handle_tcs_outputs_post(ctx);
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ if (ctx->options->key.tes.as_es)
+ handle_es_outputs_post(ctx, &ctx->shader_info->tes.es_info);
+ else
+ handle_vs_outputs_post(ctx, ctx->options->key.tes.export_prim_id,
+ ctx->options->key.tes.export_layer_id,
+ &ctx->shader_info->tes.outinfo);
+ break;
+ default:
+ break;
+ }
+}
+
+static void ac_llvm_finalize_module(struct radv_shader_context *ctx,
+ LLVMPassManagerRef passmgr,
+ const struct radv_nir_compiler_options *options)
+{
+ LLVMRunPassManager(passmgr, ctx->ac.module);
+ LLVMDisposeBuilder(ctx->ac.builder);
+
+ ac_llvm_context_dispose(&ctx->ac);
+}
+
+static void
+ac_nir_eliminate_const_vs_outputs(struct radv_shader_context *ctx)
+{
+ struct radv_vs_output_info *outinfo;
+
+ switch (ctx->stage) {
+ case MESA_SHADER_FRAGMENT:
+ case MESA_SHADER_COMPUTE:
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_GEOMETRY:
+ return;
+ case MESA_SHADER_VERTEX:
+ if (ctx->options->key.vs.as_ls ||
+ ctx->options->key.vs.as_es)
+ return;
+ outinfo = &ctx->shader_info->vs.outinfo;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ if (ctx->options->key.vs.as_es)
+ return;
+ outinfo = &ctx->shader_info->tes.outinfo;
+ break;
+ default:
+ unreachable("Unhandled shader type");
+ }
+
+ ac_optimize_vs_outputs(&ctx->ac,
+ ctx->main_function,
+ outinfo->vs_output_param_offset,
+ VARYING_SLOT_MAX,
+ &outinfo->param_exports);
+}
+
+static void
+ac_setup_rings(struct radv_shader_context *ctx)
+{
+ if (ctx->options->chip_class <= VI &&
+ (ctx->stage == MESA_SHADER_GEOMETRY ||
+ ctx->options->key.vs.as_es || ctx->options->key.tes.as_es)) {
+ unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS
+ : RING_ESGS_VS;
+ LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, ring, false);
+
+ ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac,
+ ctx->ring_offsets,
+ offset);
+ }
+
+ if (ctx->is_gs_copy_shader) {
+ ctx->gsvs_ring[0] =
+ ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets,
+ LLVMConstInt(ctx->ac.i32,
+ RING_GSVS_VS, false));
+ }
+
+ if (ctx->stage == MESA_SHADER_GEOMETRY) {
+ /* The conceptual layout of the GSVS ring is
+ * v0c0 .. vLv0 v0c1 .. vLc1 ..
+ * but the real memory layout is swizzled across
+ * threads:
+ * t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
+ * t16v0c0 ..
+ * Override the buffer descriptor accordingly.
+ */
+ LLVMTypeRef v2i64 = LLVMVectorType(ctx->ac.i64, 2);
+ uint64_t stream_offset = 0;
+ unsigned num_records = 64;
+ LLVMValueRef base_ring;
+
+ base_ring =
+ ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets,
+ LLVMConstInt(ctx->ac.i32,
+ RING_GSVS_GS, false));
+
+ for (unsigned stream = 0; stream < 4; stream++) {
+ unsigned num_components, stride;
+ LLVMValueRef ring, tmp;
+
+ num_components =
+ ctx->shader_info->info.gs.num_stream_output_components[stream];
+
+ if (!num_components)
+ continue;
+
+ stride = 4 * num_components * ctx->gs_max_out_vertices;
+
+ /* Limit on the stride field for <= CIK. */
+ assert(stride < (1 << 14));
+
+ ring = LLVMBuildBitCast(ctx->ac.builder,
+ base_ring, v2i64, "");
+ tmp = LLVMBuildExtractElement(ctx->ac.builder,
+ ring, ctx->ac.i32_0, "");
+ tmp = LLVMBuildAdd(ctx->ac.builder, tmp,
+ LLVMConstInt(ctx->ac.i64,
+ stream_offset, 0), "");
+ ring = LLVMBuildInsertElement(ctx->ac.builder,
+ ring, tmp, ctx->ac.i32_0, "");
+
+ stream_offset += stride * 64;
+
+ ring = LLVMBuildBitCast(ctx->ac.builder, ring,
+ ctx->ac.v4i32, "");
+
+ tmp = LLVMBuildExtractElement(ctx->ac.builder, ring,
+ ctx->ac.i32_1, "");
+ tmp = LLVMBuildOr(ctx->ac.builder, tmp,
+ LLVMConstInt(ctx->ac.i32,
+ S_008F04_STRIDE(stride), false), "");
+ ring = LLVMBuildInsertElement(ctx->ac.builder, ring, tmp,
+ ctx->ac.i32_1, "");
+
+ ring = LLVMBuildInsertElement(ctx->ac.builder, ring,
+ LLVMConstInt(ctx->ac.i32,
+ num_records, false),
+ LLVMConstInt(ctx->ac.i32, 2, false), "");
+
+ ctx->gsvs_ring[stream] = ring;
+ }
+ }
+
+ if (ctx->stage == MESA_SHADER_TESS_CTRL ||
+ ctx->stage == MESA_SHADER_TESS_EVAL) {
+ ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_OFFCHIP, false));
+ ctx->hs_ring_tess_factor = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_FACTOR, false));
+ }
+}
+
+static unsigned
+ac_nir_get_max_workgroup_size(enum chip_class chip_class,
+ const struct nir_shader *nir)
+{
+ switch (nir->info.stage) {
+ case MESA_SHADER_TESS_CTRL:
+ return chip_class >= CIK ? 128 : 64;
+ case MESA_SHADER_GEOMETRY:
+ return chip_class >= GFX9 ? 128 : 64;
+ case MESA_SHADER_COMPUTE:
+ break;
+ default:
+ return 0;
+ }
+
+ unsigned max_workgroup_size = nir->info.cs.local_size[0] *
+ nir->info.cs.local_size[1] *
+ nir->info.cs.local_size[2];
+ return max_workgroup_size;
+}
+
+/* Fixup the HW not emitting the TCS regs if there are no HS threads. */
+static void ac_nir_fixup_ls_hs_input_vgprs(struct radv_shader_context *ctx)
+{
+ LLVMValueRef count = ac_unpack_param(&ctx->ac, ctx->merged_wave_info, 8, 8);
+ LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count,
+ ctx->ac.i32_0, "");
+ ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->rel_auto_id, ctx->abi.instance_id, "");
+ ctx->rel_auto_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_rel_ids, ctx->rel_auto_id, "");
+ ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_patch_id, ctx->abi.vertex_id, "");
+}
+
+static void prepare_gs_input_vgprs(struct radv_shader_context *ctx)
+{
+ for(int i = 5; i >= 0; --i) {
+ ctx->gs_vtx_offset[i] = ac_unpack_param(&ctx->ac, ctx->gs_vtx_offset[i & ~1],
+ (i & 1) * 16, 16);
+ }
+
+ ctx->gs_wave_id = ac_unpack_param(&ctx->ac, ctx->merged_wave_info, 16, 8);
+}
+
+
+static
+LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
+ struct nir_shader *const *shaders,
+ int shader_count,
+ struct radv_shader_variant_info *shader_info,
+ const struct radv_nir_compiler_options *options)
+{
+ struct radv_shader_context ctx = {0};
+ unsigned i;
+ ctx.options = options;
+ ctx.shader_info = shader_info;
+
+ ac_llvm_context_init(&ctx.ac, options->chip_class, options->family);
+ ctx.context = ctx.ac.context;
+ ctx.ac.module = ac_create_module(ac_llvm->tm, ctx.context);
+
+ enum ac_float_mode float_mode =
+ options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
+ AC_FLOAT_MODE_DEFAULT;
+
+ ctx.ac.builder = ac_create_builder(ctx.context, float_mode);
+
+ memset(shader_info, 0, sizeof(*shader_info));
+
+ for(int i = 0; i < shader_count; ++i)
+ radv_nir_shader_info_pass(shaders[i], options, &shader_info->info);
+
+ for (i = 0; i < RADV_UD_MAX_SETS; i++)
+ shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
+ for (i = 0; i < AC_UD_MAX_UD; i++)
+ shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
+
+ ctx.max_workgroup_size = 0;
+ for (int i = 0; i < shader_count; ++i) {
+ ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size,
+ ac_nir_get_max_workgroup_size(ctx.options->chip_class,
+ shaders[i]));
+ }
+
+ create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2,
+ shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX);
+
+ ctx.abi.inputs = &ctx.inputs[0];
+ ctx.abi.emit_outputs = handle_shader_outputs_post;
+ ctx.abi.emit_vertex = visit_emit_vertex;
+ ctx.abi.load_ubo = radv_load_ubo;
+ ctx.abi.load_ssbo = radv_load_ssbo;
+ ctx.abi.load_sampler_desc = radv_get_sampler_desc;
+ ctx.abi.load_resource = radv_load_resource;
+ ctx.abi.clamp_shadow_reference = false;
+ ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9;
+
+ if (shader_count >= 2)
+ ac_init_exec_full_mask(&ctx.ac);
+
+ if (ctx.ac.chip_class == GFX9 &&
+ shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
+ ac_nir_fixup_ls_hs_input_vgprs(&ctx);
+
+ for(int i = 0; i < shader_count; ++i) {
+ ctx.stage = shaders[i]->info.stage;
+ ctx.output_mask = 0;
+
+ if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
+ for (int i = 0; i < 4; i++) {
+ ctx.gs_next_vertex[i] =
+ ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
+ }
+ ctx.gs_max_out_vertices = shaders[i]->info.gs.vertices_out;
+ ctx.abi.load_inputs = load_gs_input;
+ ctx.abi.emit_primitive = visit_end_primitive;
+ } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
+ ctx.tcs_outputs_read = shaders[i]->info.outputs_read;
+ ctx.tcs_patch_outputs_read = shaders[i]->info.patch_outputs_read;
+ ctx.abi.load_tess_varyings = load_tcs_varyings;
+ ctx.abi.load_patch_vertices_in = load_patch_vertices_in;
+ ctx.abi.store_tcs_outputs = store_tcs_output;
+ ctx.tcs_vertices_per_patch = shaders[i]->info.tess.tcs_vertices_out;
+ if (shader_count == 1)
+ ctx.tcs_num_inputs = ctx.options->key.tcs.num_inputs;
+ else
+ ctx.tcs_num_inputs = util_last_bit64(shader_info->info.vs.ls_outputs_written);
+ ctx.tcs_num_patches = get_tcs_num_patches(&ctx);
+ } else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
+ ctx.tes_primitive_mode = shaders[i]->info.tess.primitive_mode;
+ ctx.abi.load_tess_varyings = load_tes_input;
+ ctx.abi.load_tess_coord = load_tess_coord;
+ ctx.abi.load_patch_vertices_in = load_patch_vertices_in;
+ ctx.tcs_vertices_per_patch = shaders[i]->info.tess.tcs_vertices_out;
+ ctx.tcs_num_patches = ctx.options->key.tes.num_patches;
+ } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
+ if (shader_info->info.vs.needs_instance_id) {
+ if (ctx.options->key.vs.as_ls) {
+ ctx.shader_info->vs.vgpr_comp_cnt =
+ MAX2(2, ctx.shader_info->vs.vgpr_comp_cnt);
+ } else {
+ ctx.shader_info->vs.vgpr_comp_cnt =
+ MAX2(1, ctx.shader_info->vs.vgpr_comp_cnt);
+ }
+ }
+ ctx.abi.load_base_vertex = radv_load_base_vertex;
+ } else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) {
+ shader_info->fs.can_discard = shaders[i]->info.fs.uses_discard;
+ ctx.abi.lookup_interp_param = lookup_interp_param;
+ ctx.abi.load_sample_position = load_sample_position;
+ ctx.abi.load_sample_mask_in = load_sample_mask_in;
+ ctx.abi.emit_kill = radv_emit_kill;
+ }
+
+ if (i)
+ ac_emit_barrier(&ctx.ac, ctx.stage);
+
+ nir_foreach_variable(variable, &shaders[i]->outputs)
+ scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage);
+
+ if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
+ unsigned addclip = shaders[i]->info.clip_distance_array_size +
+ shaders[i]->info.cull_distance_array_size > 4;
+ ctx.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16;
+ ctx.max_gsvs_emit_size = ctx.gsvs_vertex_size *
+ shaders[i]->info.gs.vertices_out;
+ }
+
+ ac_setup_rings(&ctx);
+
+ LLVMBasicBlockRef merge_block;
+ if (shader_count >= 2) {
+ LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
+ LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
+ merge_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
+
+ LLVMValueRef count = ac_unpack_param(&ctx.ac, ctx.merged_wave_info, 8 * i, 8);
+ LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac);
+ LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT,
+ thread_id, count, "");
+ LLVMBuildCondBr(ctx.ac.builder, cond, then_block, merge_block);
+
+ LLVMPositionBuilderAtEnd(ctx.ac.builder, then_block);
+ }
+
+ if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT)
+ handle_fs_inputs(&ctx, shaders[i]);
+ else if(shaders[i]->info.stage == MESA_SHADER_VERTEX)
+ handle_vs_inputs(&ctx, shaders[i]);
+ else if(shader_count >= 2 && shaders[i]->info.stage == MESA_SHADER_GEOMETRY)
+ prepare_gs_input_vgprs(&ctx);
+
+ ac_nir_translate(&ctx.ac, &ctx.abi, shaders[i]);
+
+ if (shader_count >= 2) {
+ LLVMBuildBr(ctx.ac.builder, merge_block);
+ LLVMPositionBuilderAtEnd(ctx.ac.builder, merge_block);
+ }
+
+ if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
+ shader_info->gs.gsvs_vertex_size = ctx.gsvs_vertex_size;
+ shader_info->gs.max_gsvs_emit_size = ctx.max_gsvs_emit_size;
+ } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
+ shader_info->tcs.num_patches = ctx.tcs_num_patches;
+ shader_info->tcs.lds_size = calculate_tess_lds_size(&ctx);
+ }
+ }
+
+ LLVMBuildRetVoid(ctx.ac.builder);
+
+ if (options->dump_preoptir)
+ ac_dump_module(ctx.ac.module);
+
+ ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, options);
+
+ if (shader_count == 1)
+ ac_nir_eliminate_const_vs_outputs(&ctx);
+
+ if (options->dump_shader) {
+ ctx.shader_info->private_mem_vgprs =
+ ac_count_scratch_private_memory(ctx.main_function);
+ }
+
+ return ctx.ac.module;
+}
+
+static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
+{
+ unsigned *retval = (unsigned *)context;
+ LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
+ char *description = LLVMGetDiagInfoDescription(di);
+
+ if (severity == LLVMDSError) {
+ *retval = 1;
+ fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n",
+ description);
+ }
+
+ LLVMDisposeMessage(description);
+}
+
+static unsigned ac_llvm_compile(LLVMModuleRef M,
+ struct ac_shader_binary *binary,
+ struct ac_llvm_compiler *ac_llvm)
+{
+ unsigned retval = 0;
+ LLVMContextRef llvm_ctx;
+
+ /* Setup Diagnostic Handler*/
+ llvm_ctx = LLVMGetModuleContext(M);
+
+ LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler,
+ &retval);
+
+ /* Compile IR*/
+ if (!radv_compile_to_binary(ac_llvm, M, binary))
+ retval = 1;
+ return retval;
+}
+
+static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm,
+ LLVMModuleRef llvm_module,
+ struct ac_shader_binary *binary,
+ struct ac_shader_config *config,
+ struct radv_shader_variant_info *shader_info,
+ gl_shader_stage stage,
+ const struct radv_nir_compiler_options *options)
+{
+ if (options->dump_shader)
+ ac_dump_module(llvm_module);
+
+ memset(binary, 0, sizeof(*binary));
+
+ if (options->record_llvm_ir) {
+ char *llvm_ir = LLVMPrintModuleToString(llvm_module);
+ binary->llvm_ir_string = strdup(llvm_ir);
+ LLVMDisposeMessage(llvm_ir);
+ }
+
+ int v = ac_llvm_compile(llvm_module, binary, ac_llvm);
+ if (v) {
+ fprintf(stderr, "compile failed\n");
+ }
+
+ if (options->dump_shader)
+ fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
+
+ ac_shader_binary_read_config(binary, config, 0, options->supports_spill);
+
+ LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
+ LLVMDisposeModule(llvm_module);
+ LLVMContextDispose(ctx);
+
+ if (stage == MESA_SHADER_FRAGMENT) {
+ shader_info->num_input_vgprs = 0;
+ if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
+ shader_info->num_input_vgprs += 2;
+ if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
+ shader_info->num_input_vgprs += 2;
+ if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
+ shader_info->num_input_vgprs += 2;
+ if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
+ shader_info->num_input_vgprs += 3;
+ if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
+ shader_info->num_input_vgprs += 2;
+ if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
+ shader_info->num_input_vgprs += 2;
+ if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
+ shader_info->num_input_vgprs += 2;
+ if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
+ shader_info->num_input_vgprs += 1;
+ if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
+ shader_info->num_input_vgprs += 1;
+ if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
+ shader_info->num_input_vgprs += 1;
+ if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
+ shader_info->num_input_vgprs += 1;
+ if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
+ shader_info->num_input_vgprs += 1;
+ if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr))
+ shader_info->num_input_vgprs += 1;
+ if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr))
+ shader_info->num_input_vgprs += 1;
+ if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
+ shader_info->num_input_vgprs += 1;
+ if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
+ shader_info->num_input_vgprs += 1;
+ }
+ config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs);
+
+ /* +3 for scratch wave offset and VCC */
+ config->num_sgprs = MAX2(config->num_sgprs,
+ shader_info->num_input_sgprs + 3);
+
+ /* Enable 64-bit and 16-bit denormals, because there is no performance
+ * cost.
+ *
+ * If denormals are enabled, all floating-point output modifiers are
+ * ignored.
+ *
+ * Don't enable denormals for 32-bit floats, because:
+ * - Floating-point output modifiers would be ignored by the hw.
+ * - Some opcodes don't support denormals, such as v_mad_f32. We would
+ * have to stop using those.
+ * - SI & CI would be very slow.
+ */
+ config->float_mode |= V_00B028_FP_64_DENORMS;
+}
+
+static void
+ac_fill_shader_info(struct radv_shader_variant_info *shader_info, struct nir_shader *nir, const struct radv_nir_compiler_options *options)
+{
+ switch (nir->info.stage) {
+ case MESA_SHADER_COMPUTE:
+ for (int i = 0; i < 3; ++i)
+ shader_info->cs.block_size[i] = nir->info.cs.local_size[i];
+ break;
+ case MESA_SHADER_FRAGMENT:
+ shader_info->fs.early_fragment_test = nir->info.fs.early_fragment_tests;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ shader_info->gs.vertices_in = nir->info.gs.vertices_in;
+ shader_info->gs.vertices_out = nir->info.gs.vertices_out;
+ shader_info->gs.output_prim = nir->info.gs.output_primitive;
+ shader_info->gs.invocations = nir->info.gs.invocations;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ shader_info->tes.primitive_mode = nir->info.tess.primitive_mode;
+ shader_info->tes.spacing = nir->info.tess.spacing;
+ shader_info->tes.ccw = nir->info.tess.ccw;
+ shader_info->tes.point_mode = nir->info.tess.point_mode;
+ shader_info->tes.as_es = options->key.tes.as_es;
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ shader_info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
+ break;
+ case MESA_SHADER_VERTEX:
+ shader_info->vs.as_es = options->key.vs.as_es;
+ shader_info->vs.as_ls = options->key.vs.as_ls;
+ /* in LS mode we need at least 1, invocation id needs 2, handled elsewhere */
+ if (options->key.vs.as_ls)
+ shader_info->vs.vgpr_comp_cnt = MAX2(1, shader_info->vs.vgpr_comp_cnt);
+ break;
+ default:
+ break;
+ }
+}
+
+void
+radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm,
+ struct ac_shader_binary *binary,
+ struct ac_shader_config *config,
+ struct radv_shader_variant_info *shader_info,
+ struct nir_shader *const *nir,
+ int nir_count,
+ const struct radv_nir_compiler_options *options)
+{
+
+ LLVMModuleRef llvm_module;
+
+ llvm_module = ac_translate_nir_to_llvm(ac_llvm, nir, nir_count, shader_info,
+ options);
+
+ ac_compile_llvm_module(ac_llvm, llvm_module, binary, config, shader_info,
+ nir[0]->info.stage, options);
+
+ for (int i = 0; i < nir_count; ++i)
+ ac_fill_shader_info(shader_info, nir[i], options);
+
+ /* Determine the ES type (VS or TES) for the GS on GFX9. */
+ if (options->chip_class == GFX9) {
+ if (nir_count == 2 &&
+ nir[1]->info.stage == MESA_SHADER_GEOMETRY) {
+ shader_info->gs.es_type = nir[0]->info.stage;
+ }
+ }
+}
+
+static void
+ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
+{
+ LLVMValueRef vtx_offset =
+ LLVMBuildMul(ctx->ac.builder, ctx->abi.vertex_id,
+ LLVMConstInt(ctx->ac.i32, 4, false), "");
+ LLVMValueRef stream_id;
+
+ /* Fetch the vertex stream ID. */
+ if (ctx->shader_info->info.so.num_outputs) {
+ stream_id =
+ ac_unpack_param(&ctx->ac, ctx->streamout_config, 24, 2);
+ } else {
+ stream_id = ctx->ac.i32_0;
+ }
+
+ LLVMBasicBlockRef end_bb;
+ LLVMValueRef switch_inst;
+
+ end_bb = LLVMAppendBasicBlockInContext(ctx->ac.context,
+ ctx->main_function, "end");
+ switch_inst = LLVMBuildSwitch(ctx->ac.builder, stream_id, end_bb, 4);
+
+ for (unsigned stream = 0; stream < 4; stream++) {
+ unsigned num_components =
+ ctx->shader_info->info.gs.num_stream_output_components[stream];
+ LLVMBasicBlockRef bb;
+ unsigned offset;
+
+ if (!num_components)
+ continue;
+
+ if (stream > 0 && !ctx->shader_info->info.so.num_outputs)
+ continue;
+
+ bb = LLVMInsertBasicBlockInContext(ctx->ac.context, end_bb, "out");
+ LLVMAddCase(switch_inst, LLVMConstInt(ctx->ac.i32, stream, 0), bb);
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, bb);
+
+ offset = 0;
+ for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+ unsigned output_usage_mask =
+ ctx->shader_info->info.gs.output_usage_mask[i];
+ unsigned output_stream =
+ ctx->shader_info->info.gs.output_streams[i];
+ int length = util_last_bit(output_usage_mask);
+
+ if (!(ctx->output_mask & (1ull << i)) ||
+ output_stream != stream)
+ continue;
+
+ for (unsigned j = 0; j < length; j++) {
+ LLVMValueRef value, soffset;
+
+ if (!(output_usage_mask & (1 << j)))
+ continue;
+
+ soffset = LLVMConstInt(ctx->ac.i32,
+ offset *
+ ctx->gs_max_out_vertices * 16 * 4, false);
+
+ offset++;
+
+ value = ac_build_buffer_load(&ctx->ac,
+ ctx->gsvs_ring[0],
+ 1, ctx->ac.i32_0,
+ vtx_offset, soffset,
+ 0, 1, 1, true, false);
+
+ LLVMTypeRef type = LLVMGetAllocatedType(ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]);
+ if (ac_get_type_size(type) == 2) {
+ value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->ac.i32, "");
+ value = LLVMBuildTrunc(ctx->ac.builder, value, ctx->ac.i16, "");
+ }
+
+ LLVMBuildStore(ctx->ac.builder,
+ ac_to_float(&ctx->ac, value), ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]);
+ }
+ }
+
+ if (ctx->shader_info->info.so.num_outputs)
+ radv_emit_streamout(ctx, stream);
+
+ if (stream == 0) {
+ handle_vs_outputs_post(ctx, false, false,
+ &ctx->shader_info->vs.outinfo);
+ }
+
+ LLVMBuildBr(ctx->ac.builder, end_bb);
+ }
+
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, end_bb);
+}
+
+void
+radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
+ struct nir_shader *geom_shader,
+ struct ac_shader_binary *binary,
+ struct ac_shader_config *config,
+ struct radv_shader_variant_info *shader_info,
+ const struct radv_nir_compiler_options *options)
+{
+ struct radv_shader_context ctx = {0};
+ ctx.options = options;
+ ctx.shader_info = shader_info;
+
+ ac_llvm_context_init(&ctx.ac, options->chip_class, options->family);
+ ctx.context = ctx.ac.context;
+ ctx.ac.module = ac_create_module(ac_llvm->tm, ctx.context);
+
+ ctx.is_gs_copy_shader = true;
+
+ enum ac_float_mode float_mode =
+ options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
+ AC_FLOAT_MODE_DEFAULT;
+
+ ctx.ac.builder = ac_create_builder(ctx.context, float_mode);
+ ctx.stage = MESA_SHADER_VERTEX;
+
+ radv_nir_shader_info_pass(geom_shader, options, &shader_info->info);
+
+ create_function(&ctx, MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX);
+
+ ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out;
+ ac_setup_rings(&ctx);
+
+ nir_foreach_variable(variable, &geom_shader->outputs) {
+ scan_shader_output_decl(&ctx, variable, geom_shader, MESA_SHADER_VERTEX);
+ ac_handle_shader_output_decl(&ctx.ac, &ctx.abi, geom_shader,
+ variable, MESA_SHADER_VERTEX);
+ }
+
+ ac_gs_copy_shader_emit(&ctx);
+
+ LLVMBuildRetVoid(ctx.ac.builder);
+
+ ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, options);
+
+ ac_compile_llvm_module(ac_llvm, ctx.ac.module, binary, config, shader_info,
+ MESA_SHADER_VERTEX, options);
+}
diff --git a/lib/mesa/src/amd/vulkan/radv_pass.c b/lib/mesa/src/amd/vulkan/radv_pass.c
index a52dae39d..9cd1b31a0 100644
--- a/lib/mesa/src/amd/vulkan/radv_pass.c
+++ b/lib/mesa/src/amd/vulkan/radv_pass.c
@@ -38,7 +38,7 @@ VkResult radv_CreateRenderPass(
struct radv_render_pass *pass;
size_t size;
size_t attachments_offset;
- VkRenderPassMultiviewCreateInfoKHX *multiview_info = NULL;
+ VkRenderPassMultiviewCreateInfoKHR *multiview_info = NULL;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
@@ -50,7 +50,7 @@ VkResult radv_CreateRenderPass(
pass = vk_alloc2(&device->alloc, pAllocator, size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pass == NULL)
- return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
memset(pass, 0, size);
pass->attachment_count = pCreateInfo->attachmentCount;
@@ -59,8 +59,8 @@ VkResult radv_CreateRenderPass(
vk_foreach_struct(ext, pCreateInfo->pNext) {
switch(ext->sType) {
- case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHX:
- multiview_info = ( VkRenderPassMultiviewCreateInfoKHX*)ext;
+ case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR:
+ multiview_info = ( VkRenderPassMultiviewCreateInfoKHR*)ext;
break;
default:
break;
@@ -80,25 +80,25 @@ VkResult radv_CreateRenderPass(
// att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
}
uint32_t subpass_attachment_count = 0;
- VkAttachmentReference *p;
+ struct radv_subpass_attachment *p;
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
subpass_attachment_count +=
desc->inputAttachmentCount +
desc->colorAttachmentCount +
- /* Count colorAttachmentCount again for resolve_attachments */
- desc->colorAttachmentCount;
+ (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
+ (desc->pDepthStencilAttachment != NULL);
}
if (subpass_attachment_count) {
pass->subpass_attachments =
vk_alloc2(&device->alloc, pAllocator,
- subpass_attachment_count * sizeof(VkAttachmentReference), 8,
+ subpass_attachment_count * sizeof(struct radv_subpass_attachment), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pass->subpass_attachments == NULL) {
vk_free2(&device->alloc, pAllocator, pass);
- return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
}
} else
pass->subpass_attachments = NULL;
@@ -106,6 +106,7 @@ VkResult radv_CreateRenderPass(
p = pass->subpass_attachments;
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
+ uint32_t color_sample_count = 1, depth_sample_count = 1;
struct radv_subpass *subpass = &pass->subpasses[i];
subpass->input_count = desc->inputAttachmentCount;
@@ -118,8 +119,10 @@ VkResult radv_CreateRenderPass(
p += desc->inputAttachmentCount;
for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
- subpass->input_attachments[j]
- = desc->pInputAttachments[j];
+ subpass->input_attachments[j] = (struct radv_subpass_attachment) {
+ .attachment = desc->pInputAttachments[j].attachment,
+ .layout = desc->pInputAttachments[j].layout,
+ };
if (desc->pInputAttachments[j].attachment != VK_ATTACHMENT_UNUSED)
pass->attachments[desc->pInputAttachments[j].attachment].view_mask |= subpass->view_mask;
}
@@ -130,10 +133,171 @@ VkResult radv_CreateRenderPass(
p += desc->colorAttachmentCount;
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
- subpass->color_attachments[j]
- = desc->pColorAttachments[j];
- if (desc->pColorAttachments[j].attachment != VK_ATTACHMENT_UNUSED)
+ subpass->color_attachments[j] = (struct radv_subpass_attachment) {
+ .attachment = desc->pColorAttachments[j].attachment,
+ .layout = desc->pColorAttachments[j].layout,
+ };
+ if (desc->pColorAttachments[j].attachment != VK_ATTACHMENT_UNUSED) {
pass->attachments[desc->pColorAttachments[j].attachment].view_mask |= subpass->view_mask;
+ color_sample_count = pCreateInfo->pAttachments[desc->pColorAttachments[j].attachment].samples;
+ }
+ }
+ }
+
+ subpass->has_resolve = false;
+ if (desc->pResolveAttachments) {
+ subpass->resolve_attachments = p;
+ p += desc->colorAttachmentCount;
+
+ for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
+ uint32_t a = desc->pResolveAttachments[j].attachment;
+ subpass->resolve_attachments[j] = (struct radv_subpass_attachment) {
+ .attachment = desc->pResolveAttachments[j].attachment,
+ .layout = desc->pResolveAttachments[j].layout,
+ };
+ if (a != VK_ATTACHMENT_UNUSED) {
+ subpass->has_resolve = true;
+ pass->attachments[desc->pResolveAttachments[j].attachment].view_mask |= subpass->view_mask;
+ }
+ }
+ }
+
+ if (desc->pDepthStencilAttachment) {
+ subpass->depth_stencil_attachment = (struct radv_subpass_attachment) {
+ .attachment = desc->pDepthStencilAttachment->attachment,
+ .layout = desc->pDepthStencilAttachment->layout,
+ };
+ if (desc->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) {
+ pass->attachments[desc->pDepthStencilAttachment->attachment].view_mask |= subpass->view_mask;
+ depth_sample_count = pCreateInfo->pAttachments[desc->pDepthStencilAttachment->attachment].samples;
+ }
+ } else {
+ subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
+ }
+
+ subpass->max_sample_count = MAX2(color_sample_count,
+ depth_sample_count);
+ }
+
+ for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
+ uint32_t dst = pCreateInfo->pDependencies[i].dstSubpass;
+ if (dst == VK_SUBPASS_EXTERNAL) {
+ pass->end_barrier.src_stage_mask = pCreateInfo->pDependencies[i].srcStageMask;
+ pass->end_barrier.src_access_mask = pCreateInfo->pDependencies[i].srcAccessMask;
+ pass->end_barrier.dst_access_mask = pCreateInfo->pDependencies[i].dstAccessMask;
+ } else {
+ pass->subpasses[dst].start_barrier.src_stage_mask = pCreateInfo->pDependencies[i].srcStageMask;
+ pass->subpasses[dst].start_barrier.src_access_mask = pCreateInfo->pDependencies[i].srcAccessMask;
+ pass->subpasses[dst].start_barrier.dst_access_mask = pCreateInfo->pDependencies[i].dstAccessMask;
+ }
+ }
+
+ *pRenderPass = radv_render_pass_to_handle(pass);
+
+ return VK_SUCCESS;
+}
+
+VkResult radv_CreateRenderPass2KHR(
+ VkDevice _device,
+ const VkRenderPassCreateInfo2KHR* pCreateInfo,
+ const VkAllocationCallbacks* pAllocator,
+ VkRenderPass* pRenderPass)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_render_pass *pass;
+ size_t size;
+ size_t attachments_offset;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR);
+
+ size = sizeof(*pass);
+ size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
+ attachments_offset = size;
+ size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
+
+ pass = vk_alloc2(&device->alloc, pAllocator, size, 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (pass == NULL)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ memset(pass, 0, size);
+ pass->attachment_count = pCreateInfo->attachmentCount;
+ pass->subpass_count = pCreateInfo->subpassCount;
+ pass->attachments = (void *) pass + attachments_offset;
+
+ for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
+ struct radv_render_pass_attachment *att = &pass->attachments[i];
+
+ att->format = pCreateInfo->pAttachments[i].format;
+ att->samples = pCreateInfo->pAttachments[i].samples;
+ att->load_op = pCreateInfo->pAttachments[i].loadOp;
+ att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
+ att->initial_layout = pCreateInfo->pAttachments[i].initialLayout;
+ att->final_layout = pCreateInfo->pAttachments[i].finalLayout;
+ // att->store_op = pCreateInfo->pAttachments[i].storeOp;
+ // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
+ }
+ uint32_t subpass_attachment_count = 0;
+ struct radv_subpass_attachment *p;
+ for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
+ const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i];
+
+ subpass_attachment_count +=
+ desc->inputAttachmentCount +
+ desc->colorAttachmentCount +
+ (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
+ (desc->pDepthStencilAttachment != NULL);
+ }
+
+ if (subpass_attachment_count) {
+ pass->subpass_attachments =
+ vk_alloc2(&device->alloc, pAllocator,
+ subpass_attachment_count * sizeof(struct radv_subpass_attachment), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (pass->subpass_attachments == NULL) {
+ vk_free2(&device->alloc, pAllocator, pass);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+ } else
+ pass->subpass_attachments = NULL;
+
+ p = pass->subpass_attachments;
+ for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
+ const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i];
+ uint32_t color_sample_count = 1, depth_sample_count = 1;
+ struct radv_subpass *subpass = &pass->subpasses[i];
+
+ subpass->input_count = desc->inputAttachmentCount;
+ subpass->color_count = desc->colorAttachmentCount;
+ subpass->view_mask = desc->viewMask;
+
+ if (desc->inputAttachmentCount > 0) {
+ subpass->input_attachments = p;
+ p += desc->inputAttachmentCount;
+
+ for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
+ subpass->input_attachments[j] = (struct radv_subpass_attachment) {
+ .attachment = desc->pInputAttachments[j].attachment,
+ .layout = desc->pInputAttachments[j].layout,
+ };
+ if (desc->pInputAttachments[j].attachment != VK_ATTACHMENT_UNUSED)
+ pass->attachments[desc->pInputAttachments[j].attachment].view_mask |= subpass->view_mask;
+ }
+ }
+
+ if (desc->colorAttachmentCount > 0) {
+ subpass->color_attachments = p;
+ p += desc->colorAttachmentCount;
+
+ for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
+ subpass->color_attachments[j] = (struct radv_subpass_attachment) {
+ .attachment = desc->pColorAttachments[j].attachment,
+ .layout = desc->pColorAttachments[j].layout,
+ };
+ if (desc->pColorAttachments[j].attachment != VK_ATTACHMENT_UNUSED) {
+ pass->attachments[desc->pColorAttachments[j].attachment].view_mask |= subpass->view_mask;
+ color_sample_count = pCreateInfo->pAttachments[desc->pColorAttachments[j].attachment].samples;
+ }
}
}
@@ -144,8 +308,10 @@ VkResult radv_CreateRenderPass(
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
uint32_t a = desc->pResolveAttachments[j].attachment;
- subpass->resolve_attachments[j]
- = desc->pResolveAttachments[j];
+ subpass->resolve_attachments[j] = (struct radv_subpass_attachment) {
+ .attachment = desc->pResolveAttachments[j].attachment,
+ .layout = desc->pResolveAttachments[j].layout,
+ };
if (a != VK_ATTACHMENT_UNUSED) {
subpass->has_resolve = true;
pass->attachments[desc->pResolveAttachments[j].attachment].view_mask |= subpass->view_mask;
@@ -154,13 +320,20 @@ VkResult radv_CreateRenderPass(
}
if (desc->pDepthStencilAttachment) {
- subpass->depth_stencil_attachment =
- *desc->pDepthStencilAttachment;
- if (desc->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED)
+ subpass->depth_stencil_attachment = (struct radv_subpass_attachment) {
+ .attachment = desc->pDepthStencilAttachment->attachment,
+ .layout = desc->pDepthStencilAttachment->layout,
+ };
+ if (desc->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) {
pass->attachments[desc->pDepthStencilAttachment->attachment].view_mask |= subpass->view_mask;
+ depth_sample_count = pCreateInfo->pAttachments[desc->pDepthStencilAttachment->attachment].samples;
+ }
} else {
subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
}
+
+ subpass->max_sample_count = MAX2(color_sample_count,
+ depth_sample_count);
}
for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
diff --git a/lib/mesa/src/amd/vulkan/radv_shader.c b/lib/mesa/src/amd/vulkan/radv_shader.c
index 83e2e675e..f98ca6b4e 100644
--- a/lib/mesa/src/amd/vulkan/radv_shader.c
+++ b/lib/mesa/src/amd/vulkan/radv_shader.c
@@ -30,12 +30,14 @@
#include "radv_debug.h"
#include "radv_private.h"
#include "radv_shader.h"
+#include "radv_shader_helper.h"
#include "nir/nir.h"
#include "nir/nir_builder.h"
#include "spirv/nir_spirv.h"
#include <llvm-c/Core.h>
#include <llvm-c/TargetMachine.h>
+#include <llvm-c/Support.h>
#include "sid.h"
#include "gfx9d.h"
@@ -46,10 +48,14 @@
#include "util/debug.h"
#include "ac_exp_param.h"
+#include "util/string_buffer.h"
+
static const struct nir_shader_compiler_options nir_options = {
.vertex_id_zero_based = true,
.lower_scmp = true,
.lower_flrp32 = true,
+ .lower_flrp64 = true,
+ .lower_device_index_to_zero = true,
.lower_fsat = true,
.lower_fdiv = true,
.lower_sub = true,
@@ -64,6 +70,7 @@ static const struct nir_shader_compiler_options nir_options = {
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_ffma = true,
+ .lower_fpow = true,
.max_unroll_iterations = 32
};
@@ -83,7 +90,7 @@ VkResult radv_CreateShaderModule(
sizeof(*module) + pCreateInfo->codeSize, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (module == NULL)
- return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
module->nir = NULL;
module->size = pCreateInfo->codeSize;
@@ -110,55 +117,33 @@ void radv_DestroyShaderModule(
vk_free2(&device->alloc, pAllocator, module);
}
-bool
-radv_lower_indirect_derefs(struct nir_shader *nir,
- struct radv_physical_device *device)
-{
- /* While it would be nice not to have this flag, we are constrained
- * by the reality that LLVM 5.0 doesn't have working VGPR indexing
- * on GFX9.
- */
- bool llvm_has_working_vgpr_indexing =
- device->rad_info.chip_class <= VI;
-
- /* TODO: Indirect indexing of GS inputs is unimplemented.
- *
- * TCS and TES load inputs directly from LDS or offchip memory, so
- * indirect indexing is trivial.
- */
- nir_variable_mode indirect_mask = 0;
- if (nir->info.stage == MESA_SHADER_GEOMETRY ||
- (nir->info.stage != MESA_SHADER_TESS_CTRL &&
- nir->info.stage != MESA_SHADER_TESS_EVAL &&
- !llvm_has_working_vgpr_indexing)) {
- indirect_mask |= nir_var_shader_in;
- }
- if (!llvm_has_working_vgpr_indexing &&
- nir->info.stage != MESA_SHADER_TESS_CTRL)
- indirect_mask |= nir_var_shader_out;
-
- /* TODO: We shouldn't need to do this, however LLVM isn't currently
- * smart enough to handle indirects without causing excess spilling
- * causing the gpu to hang.
- *
- * See the following thread for more details of the problem:
- * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
- */
- indirect_mask |= nir_var_local;
-
- return nir_lower_indirect_derefs(nir, indirect_mask);
-}
-
void
-radv_optimize_nir(struct nir_shader *shader)
+radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively,
+ bool allow_copies)
{
bool progress;
do {
progress = false;
+ NIR_PASS(progress, shader, nir_split_array_vars, nir_var_local);
+ NIR_PASS(progress, shader, nir_shrink_vec_array_vars, nir_var_local);
+
NIR_PASS_V(shader, nir_lower_vars_to_ssa);
- NIR_PASS_V(shader, nir_lower_64bit_pack);
+ NIR_PASS_V(shader, nir_lower_pack);
+
+ if (allow_copies) {
+ /* Only run this pass in the first call to
+ * radv_optimize_nir. Later calls assume that we've
+ * lowered away any copy_deref instructions and we
+ * don't want to introduce any more.
+ */
+ NIR_PASS(progress, shader, nir_opt_find_array_copies);
+ }
+
+ NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
+ NIR_PASS(progress, shader, nir_opt_dead_write_vars);
+
NIR_PASS_V(shader, nir_lower_alu_to_scalar);
NIR_PASS_V(shader, nir_lower_phis_to_scalar);
@@ -182,7 +167,10 @@ radv_optimize_nir(struct nir_shader *shader)
if (shader->options->max_unroll_iterations) {
NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
}
- } while (progress);
+ } while (progress && !optimize_conservatively);
+
+ NIR_PASS(progress, shader, nir_opt_shrink_load);
+ NIR_PASS(progress, shader, nir_opt_move_load_ubo);
}
nir_shader *
@@ -190,12 +178,9 @@ radv_shader_compile_to_nir(struct radv_device *device,
struct radv_shader_module *module,
const char *entrypoint_name,
gl_shader_stage stage,
- const VkSpecializationInfo *spec_info)
+ const VkSpecializationInfo *spec_info,
+ const VkPipelineCreateFlags flags)
{
- if (strcmp(entrypoint_name, "main") != 0) {
- radv_finishme("Multiple shaders per module not really supported");
- }
-
nir_shader *nir;
nir_function *entry_point;
if (module->nir) {
@@ -204,7 +189,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
* and just use the NIR shader */
nir = module->nir;
nir->options = &nir_options;
- nir_validate_shader(nir);
+ nir_validate_shader(nir, "in internal shader");
assert(exec_list_length(&nir->functions) == 1);
struct exec_node *node = exec_list_get_head(&nir->functions);
@@ -233,22 +218,42 @@ radv_shader_compile_to_nir(struct radv_device *device,
spec_entries[i].data32 = *(const uint32_t *)data;
}
}
- const struct nir_spirv_supported_extensions supported_ext = {
- .draw_parameters = true,
- .float64 = true,
- .image_read_without_format = true,
- .image_write_without_format = true,
- .tessellation = true,
- .int64 = true,
- .multiview = true,
- .variable_pointers = true,
+ const struct spirv_to_nir_options spirv_options = {
+ .caps = {
+ .device_group = true,
+ .draw_parameters = true,
+ .float64 = true,
+ .image_read_without_format = true,
+ .image_write_without_format = true,
+ .tessellation = true,
+ .int64 = true,
+ .int16 = true,
+ .multiview = true,
+ .subgroup_arithmetic = true,
+ .subgroup_ballot = true,
+ .subgroup_basic = true,
+ .subgroup_quad = true,
+ .subgroup_shuffle = true,
+ .subgroup_vote = true,
+ .variable_pointers = true,
+ .gcn_shader = true,
+ .trinary_minmax = true,
+ .shader_viewport_index_layer = true,
+ .descriptor_array_dynamic_indexing = true,
+ .runtime_descriptor_array = true,
+ .stencil_export = true,
+ .storage_16bit = true,
+ .geometry_streams = true,
+ .transform_feedback = true,
+ },
};
entry_point = spirv_to_nir(spirv, module->size / 4,
spec_entries, num_spec_entries,
- stage, entrypoint_name, &supported_ext, &nir_options);
+ stage, entrypoint_name,
+ &spirv_options, &nir_options);
nir = entry_point->shader;
assert(nir->info.stage == stage);
- nir_validate_shader(nir);
+ nir_validate_shader(nir, "after spirv_to_nir");
free(spec_entries);
@@ -259,6 +264,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local);
NIR_PASS_V(nir, nir_lower_returns);
NIR_PASS_V(nir, nir_inline_functions);
+ NIR_PASS_V(nir, nir_copy_prop);
/* Pick off the single entrypoint that we want */
foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
@@ -268,13 +274,25 @@ radv_shader_compile_to_nir(struct radv_device *device,
assert(exec_list_length(&nir->functions) == 1);
entry_point->name = ralloc_strdup(entry_point, "main");
- NIR_PASS_V(nir, nir_remove_dead_variables,
- nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
+ /* Make sure we lower constant initializers on output variables so that
+ * nir_remove_dead_variables below sees the corresponding stores
+ */
+ NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_shader_out);
/* Now that we've deleted all but the main function, we can go ahead and
* lower the rest of the constant initializers.
*/
NIR_PASS_V(nir, nir_lower_constant_initializers, ~0);
+
+ /* Split member structs. We do this before lower_io_to_temporaries so that
+ * it doesn't lower system values to temporaries by accident.
+ */
+ NIR_PASS_V(nir, nir_split_var_copies);
+ NIR_PASS_V(nir, nir_split_per_member_structs);
+
+ NIR_PASS_V(nir, nir_remove_dead_variables,
+ nir_var_shader_in | nir_var_shader_out | nir_var_system_value);
+
NIR_PASS_V(nir, nir_lower_system_values);
NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
}
@@ -291,11 +309,48 @@ radv_shader_compile_to_nir(struct radv_device *device,
nir_lower_tex(nir, &tex_options);
nir_lower_vars_to_ssa(nir);
- nir_lower_var_copies(nir);
+
+ if (nir->info.stage == MESA_SHADER_VERTEX ||
+ nir->info.stage == MESA_SHADER_GEOMETRY) {
+ NIR_PASS_V(nir, nir_lower_io_to_temporaries,
+ nir_shader_get_entrypoint(nir), true, true);
+ } else if (nir->info.stage == MESA_SHADER_TESS_EVAL||
+ nir->info.stage == MESA_SHADER_FRAGMENT) {
+ NIR_PASS_V(nir, nir_lower_io_to_temporaries,
+ nir_shader_get_entrypoint(nir), true, false);
+ }
+
+ nir_split_var_copies(nir);
+
nir_lower_global_vars_to_local(nir);
nir_remove_dead_variables(nir, nir_var_local);
- radv_lower_indirect_derefs(nir, device->physical_device);
- radv_optimize_nir(nir);
+ nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) {
+ .subgroup_size = 64,
+ .ballot_bit_size = 64,
+ .lower_to_scalar = 1,
+ .lower_subgroup_masks = 1,
+ .lower_shuffle = 1,
+ .lower_shuffle_to_32bit = 1,
+ .lower_vote_eq_to_ballot = 1,
+ });
+
+ nir_lower_load_const_to_scalar(nir);
+
+ if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
+ radv_optimize_nir(nir, false, true);
+
+ /* We call nir_lower_var_copies() after the first radv_optimize_nir()
+ * to remove any copies introduced by nir_opt_find_array_copies().
+ */
+ nir_lower_var_copies(nir);
+
+ /* Indirect lowering must be called after the radv_optimize_nir() loop
+ * has been called at least once. Otherwise indirect lowering can
+ * bloat the instruction count of the loop and cause it to be
+ * considered too large for unrolling.
+ */
+ ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class);
+ radv_optimize_nir(nir, flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT, false);
return nir;
}
@@ -331,7 +386,10 @@ radv_alloc_shader_memory(struct radv_device *device,
slab->size = 256 * 1024;
slab->bo = device->ws->buffer_create(device->ws, slab->size, 256,
- RADEON_DOMAIN_VRAM, 0);
+ RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_NO_INTERPROCESS_SHARING |
+ (device->physical_device->cpdma_prefetch_writes_memory ?
+ 0 : RADEON_FLAG_READ_ONLY));
slab->ptr = (char*)device->ws->buffer_map(slab->bo);
list_inithead(&slab->shaders);
@@ -355,6 +413,16 @@ radv_destroy_shader_slabs(struct radv_device *device)
mtx_destroy(&device->shader_slab_mutex);
}
+/* For the UMR disassembler. */
+#define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
+#define DEBUGGER_NUM_MARKERS 5
+
+static unsigned
+radv_get_shader_binary_size(struct ac_shader_binary *binary)
+{
+ return binary->code_size + DEBUGGER_NUM_MARKERS * 4;
+}
+
static void
radv_fill_shader_variant(struct radv_device *device,
struct radv_shader_variant *variant,
@@ -362,16 +430,20 @@ radv_fill_shader_variant(struct radv_device *device,
gl_shader_stage stage)
{
bool scratch_enabled = variant->config.scratch_bytes_per_wave > 0;
+ struct radv_shader_info *info = &variant->info.info;
unsigned vgpr_comp_cnt = 0;
- if (scratch_enabled && !device->llvm_supports_spill)
- radv_finishme("shader scratch support only available with LLVM 4.0");
-
- variant->code_size = binary->code_size;
+ variant->code_size = radv_get_shader_binary_size(binary);
variant->rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) |
- S_00B12C_SCRATCH_EN(scratch_enabled);
-
- variant->rsrc1 = S_00B848_VGPRS((variant->config.num_vgprs - 1) / 4) |
+ S_00B12C_USER_SGPR_MSB(variant->info.num_user_sgprs >> 5) |
+ S_00B12C_SCRATCH_EN(scratch_enabled) |
+ S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) |
+ S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) |
+ S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) |
+ S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) |
+ S_00B12C_SO_EN(!!info->so.num_outputs);
+
+ variant->rsrc1 = S_00B848_VGPRS((variant->config.num_vgprs - 1) / 4) |
S_00B848_SGPRS((variant->config.num_sgprs - 1) / 8) |
S_00B848_DX10_CLAMP(1) |
S_00B848_FLOAT_MODE(variant->config.float_mode);
@@ -382,10 +454,11 @@ radv_fill_shader_variant(struct radv_device *device,
variant->rsrc2 |= S_00B12C_OC_LDS_EN(1);
break;
case MESA_SHADER_TESS_CTRL:
- if (device->physical_device->rad_info.chip_class >= GFX9)
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
vgpr_comp_cnt = variant->info.vs.vgpr_comp_cnt;
- else
+ } else {
variant->rsrc2 |= S_00B12C_OC_LDS_EN(1);
+ }
break;
case MESA_SHADER_VERTEX:
case MESA_SHADER_GEOMETRY:
@@ -395,9 +468,12 @@ radv_fill_shader_variant(struct radv_device *device,
break;
case MESA_SHADER_COMPUTE:
variant->rsrc2 |=
- S_00B84C_TGID_X_EN(1) | S_00B84C_TGID_Y_EN(1) |
- S_00B84C_TGID_Z_EN(1) | S_00B84C_TIDIG_COMP_CNT(2) |
- S_00B84C_TG_SIZE_EN(1) |
+ S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) |
+ S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) |
+ S_00B84C_TGID_Z_EN(info->cs.uses_block_id[2]) |
+ S_00B84C_TIDIG_COMP_CNT(info->cs.uses_thread_id[2] ? 2 :
+ info->cs.uses_thread_id[1] ? 1 : 0) |
+ S_00B84C_TG_SIZE_EN(info->cs.uses_local_invocation_idx) |
S_00B84C_LDS_SIZE(variant->config.lds_size);
break;
default:
@@ -407,18 +483,81 @@ radv_fill_shader_variant(struct radv_device *device,
if (device->physical_device->rad_info.chip_class >= GFX9 &&
stage == MESA_SHADER_GEOMETRY) {
- /* TODO: Figure out how many we actually need. */
- variant->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(3);
- variant->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(3) |
- S_00B22C_OC_LDS_EN(1);
+ unsigned es_type = variant->info.gs.es_type;
+ unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt;
+
+ if (es_type == MESA_SHADER_VERTEX) {
+ es_vgpr_comp_cnt = variant->info.vs.vgpr_comp_cnt;
+ } else if (es_type == MESA_SHADER_TESS_EVAL) {
+ es_vgpr_comp_cnt = 3;
+ } else {
+ unreachable("invalid shader ES type");
+ }
+
+ /* If offsets 4, 5 are used, GS_VGPR_COMP_CNT is ignored and
+ * VGPR[0:4] are always loaded.
+ */
+ if (info->uses_invocation_id) {
+ gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */
+ } else if (info->uses_prim_id) {
+ gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */
+ } else if (variant->info.gs.vertices_in >= 3) {
+ gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */
+ } else {
+ gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
+ }
+
+ variant->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt);
+ variant->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
+ S_00B22C_OC_LDS_EN(es_type == MESA_SHADER_TESS_EVAL);
} else if (device->physical_device->rad_info.chip_class >= GFX9 &&
- stage == MESA_SHADER_TESS_CTRL)
+ stage == MESA_SHADER_TESS_CTRL) {
variant->rsrc1 |= S_00B428_LS_VGPR_COMP_CNT(vgpr_comp_cnt);
- else
+ } else {
variant->rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt);
+ }
void *ptr = radv_alloc_shader_memory(device, variant);
memcpy(ptr, binary->code, binary->code_size);
+
+ /* Add end-of-code markers for the UMR disassembler. */
+ uint32_t *ptr32 = (uint32_t *)ptr + binary->code_size / 4;
+ for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; i++)
+ ptr32[i] = DEBUGGER_END_OF_CODE_MARKER;
+
+}
+
+static void radv_init_llvm_target()
+{
+ LLVMInitializeAMDGPUTargetInfo();
+ LLVMInitializeAMDGPUTarget();
+ LLVMInitializeAMDGPUTargetMC();
+ LLVMInitializeAMDGPUAsmPrinter();
+
+ /* For inline assembly. */
+ LLVMInitializeAMDGPUAsmParser();
+
+ /* Workaround for bug in llvm 4.0 that causes image intrinsics
+ * to disappear.
+ * https://reviews.llvm.org/D26348
+ *
+ * Workaround for bug in llvm that causes the GPU to hang in presence
+ * of nested loops because there is an exec mask issue. The proper
+ * solution is to fix LLVM but this might require a bunch of work.
+ * https://bugs.llvm.org/show_bug.cgi?id=37744
+ *
+ * "mesa" is the prefix for error messages.
+ */
+ const char *argv[3] = { "mesa", "-simplifycfg-sink-common=false",
+ "-amdgpu-skip-threshold=1" };
+ LLVMParseCommandLineOptions(3, argv, NULL);
+}
+
+static once_flag radv_init_llvm_target_once_flag = ONCE_FLAG_INIT;
+
+static void radv_init_llvm_once(void)
+{
+ call_once(&radv_init_llvm_target_once_flag, radv_init_llvm_target);
}
static struct radv_shader_variant *
@@ -427,42 +566,55 @@ shader_variant_create(struct radv_device *device,
struct nir_shader * const *shaders,
int shader_count,
gl_shader_stage stage,
- struct ac_nir_compiler_options *options,
+ struct radv_nir_compiler_options *options,
bool gs_copy_shader,
void **code_out,
unsigned *code_size_out)
{
enum radeon_family chip_family = device->physical_device->rad_info.family;
- bool dump_shaders = device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS;
enum ac_target_machine_options tm_options = 0;
struct radv_shader_variant *variant;
struct ac_shader_binary binary;
- LLVMTargetMachineRef tm;
-
+ struct ac_llvm_compiler ac_llvm;
+ bool thread_compiler;
variant = calloc(1, sizeof(struct radv_shader_variant));
if (!variant)
return NULL;
options->family = chip_family;
options->chip_class = device->physical_device->rad_info.chip_class;
+ options->dump_shader = radv_can_dump_shader(device, module, gs_copy_shader);
+ options->dump_preoptir = options->dump_shader &&
+ device->instance->debug_flags & RADV_DEBUG_PREOPTIR;
+ options->record_llvm_ir = device->keep_shader_info;
+ options->check_ir = device->instance->debug_flags & RADV_DEBUG_CHECKIR;
+ options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
+ options->address32_hi = device->physical_device->rad_info.address32_hi;
if (options->supports_spill)
tm_options |= AC_TM_SUPPORTS_SPILL;
if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
tm_options |= AC_TM_SISCHED;
- tm = ac_create_target_machine(chip_family, tm_options);
-
+ if (options->check_ir)
+ tm_options |= AC_TM_CHECK_IR;
+
+ thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
+ radv_init_llvm_once();
+ radv_init_llvm_compiler(&ac_llvm, false,
+ thread_compiler,
+ chip_family, tm_options);
if (gs_copy_shader) {
assert(shader_count == 1);
- ac_create_gs_copy_shader(tm, *shaders, &binary, &variant->config,
- &variant->info, options, dump_shaders);
+ radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary,
+ &variant->config, &variant->info,
+ options);
} else {
- ac_compile_nir_shader(tm, &binary, &variant->config,
- &variant->info, shaders, shader_count, options,
- dump_shaders);
+ radv_compile_nir_shader(&ac_llvm, &binary, &variant->config,
+ &variant->info, shaders, shader_count,
+ options);
}
- LLVMDisposeTargetMachine(tm);
+ radv_destroy_llvm_compiler(&ac_llvm, thread_compiler);
radv_fill_shader_variant(device, variant, &binary, stage);
@@ -477,8 +629,9 @@ shader_variant_create(struct radv_device *device,
free(binary.relocs);
variant->ref_count = 1;
- if (device->trace_bo) {
+ if (device->keep_shader_info) {
variant->disasm_string = binary.disasm_string;
+ variant->llvm_ir_string = binary.llvm_ir_string;
if (!gs_copy_shader && !module->nir) {
variant->nir = *shaders;
variant->spirv = (uint32_t *)module->data;
@@ -497,18 +650,18 @@ radv_shader_variant_create(struct radv_device *device,
struct nir_shader *const *shaders,
int shader_count,
struct radv_pipeline_layout *layout,
- const struct ac_shader_variant_key *key,
+ const struct radv_shader_variant_key *key,
void **code_out,
unsigned *code_size_out)
{
- struct ac_nir_compiler_options options = {0};
+ struct radv_nir_compiler_options options = {0};
options.layout = layout;
if (key)
options.key = *key;
options.unsafe_math = !!(device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH);
- options.supports_spill = device->llvm_supports_spill;
+ options.supports_spill = true;
return shader_variant_create(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage,
&options, false, code_out, code_size_out);
@@ -521,7 +674,7 @@ radv_create_gs_copy_shader(struct radv_device *device,
unsigned *code_size_out,
bool multiview)
{
- struct ac_nir_compiler_options options = {0};
+ struct radv_nir_compiler_options options = {0};
options.key.has_multiview_view_index = multiview;
@@ -542,48 +695,10 @@ radv_shader_variant_destroy(struct radv_device *device,
ralloc_free(variant->nir);
free(variant->disasm_string);
+ free(variant->llvm_ir_string);
free(variant);
}
-uint32_t
-radv_shader_stage_to_user_data_0(gl_shader_stage stage, enum chip_class chip_class,
- bool has_gs, bool has_tess)
-{
- switch (stage) {
- case MESA_SHADER_FRAGMENT:
- return R_00B030_SPI_SHADER_USER_DATA_PS_0;
- case MESA_SHADER_VERTEX:
- if (chip_class >= GFX9) {
- return has_tess ? R_00B430_SPI_SHADER_USER_DATA_LS_0 :
- has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
- R_00B130_SPI_SHADER_USER_DATA_VS_0;
- }
- if (has_tess)
- return R_00B530_SPI_SHADER_USER_DATA_LS_0;
- else
- return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : R_00B130_SPI_SHADER_USER_DATA_VS_0;
- case MESA_SHADER_GEOMETRY:
- return chip_class >= GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
- R_00B230_SPI_SHADER_USER_DATA_GS_0;
- case MESA_SHADER_COMPUTE:
- return R_00B900_COMPUTE_USER_DATA_0;
- case MESA_SHADER_TESS_CTRL:
- return chip_class >= GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0 :
- R_00B430_SPI_SHADER_USER_DATA_HS_0;
- case MESA_SHADER_TESS_EVAL:
- if (chip_class >= GFX9) {
- return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
- R_00B130_SPI_SHADER_USER_DATA_VS_0;
- }
- if (has_gs)
- return R_00B330_SPI_SHADER_USER_DATA_ES_0;
- else
- return R_00B130_SPI_SHADER_USER_DATA_VS_0;
- default:
- unreachable("unknown shader");
- }
-}
-
const char *
radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage)
{
@@ -599,27 +714,18 @@ radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage)
};
}
-void
-radv_shader_dump_stats(struct radv_device *device,
- struct radv_shader_variant *variant,
- gl_shader_stage stage,
- FILE *file)
+static void
+generate_shader_stats(struct radv_device *device,
+ struct radv_shader_variant *variant,
+ gl_shader_stage stage,
+ struct _mesa_string_buffer *buf)
{
unsigned lds_increment = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
struct ac_shader_config *conf;
unsigned max_simd_waves;
unsigned lds_per_wave = 0;
- switch (device->physical_device->rad_info.family) {
- /* These always have 8 waves: */
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- max_simd_waves = 8;
- break;
- default:
- max_simd_waves = 10;
- }
+ max_simd_waves = ac_get_max_simd_waves(device->physical_device->rad_info.family);
conf = &variant->config;
@@ -629,15 +735,15 @@ radv_shader_dump_stats(struct radv_device *device,
lds_increment);
}
- if (conf->num_sgprs) {
- if (device->physical_device->rad_info.chip_class >= VI)
- max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);
- else
- max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);
- }
+ if (conf->num_sgprs)
+ max_simd_waves =
+ MIN2(max_simd_waves,
+ radv_get_num_physical_sgprs(device->physical_device) / conf->num_sgprs);
if (conf->num_vgprs)
- max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
+ max_simd_waves =
+ MIN2(max_simd_waves,
+ RADV_NUM_PHYSICAL_VGPRS / conf->num_vgprs);
/* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD
* that PS can use.
@@ -645,27 +751,140 @@ radv_shader_dump_stats(struct radv_device *device,
if (lds_per_wave)
max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
+ if (stage == MESA_SHADER_FRAGMENT) {
+ _mesa_string_buffer_printf(buf, "*** SHADER CONFIG ***\n"
+ "SPI_PS_INPUT_ADDR = 0x%04x\n"
+ "SPI_PS_INPUT_ENA = 0x%04x\n",
+ conf->spi_ps_input_addr, conf->spi_ps_input_ena);
+ }
+
+ _mesa_string_buffer_printf(buf, "*** SHADER STATS ***\n"
+ "SGPRS: %d\n"
+ "VGPRS: %d\n"
+ "Spilled SGPRs: %d\n"
+ "Spilled VGPRs: %d\n"
+ "PrivMem VGPRS: %d\n"
+ "Code Size: %d bytes\n"
+ "LDS: %d blocks\n"
+ "Scratch: %d bytes per wave\n"
+ "Max Waves: %d\n"
+ "********************\n\n\n",
+ conf->num_sgprs, conf->num_vgprs,
+ conf->spilled_sgprs, conf->spilled_vgprs,
+ variant->info.private_mem_vgprs, variant->code_size,
+ conf->lds_size, conf->scratch_bytes_per_wave,
+ max_simd_waves);
+}
+
+void
+radv_shader_dump_stats(struct radv_device *device,
+ struct radv_shader_variant *variant,
+ gl_shader_stage stage,
+ FILE *file)
+{
+ struct _mesa_string_buffer *buf = _mesa_string_buffer_create(NULL, 256);
+
+ generate_shader_stats(device, variant, stage, buf);
+
fprintf(file, "\n%s:\n", radv_get_shader_name(variant, stage));
+ fprintf(file, "%s", buf->buf);
- if (stage == MESA_SHADER_FRAGMENT) {
- fprintf(file, "*** SHADER CONFIG ***\n"
- "SPI_PS_INPUT_ADDR = 0x%04x\n"
- "SPI_PS_INPUT_ENA = 0x%04x\n",
- conf->spi_ps_input_addr, conf->spi_ps_input_ena);
+ _mesa_string_buffer_destroy(buf);
+}
+
+VkResult
+radv_GetShaderInfoAMD(VkDevice _device,
+ VkPipeline _pipeline,
+ VkShaderStageFlagBits shaderStage,
+ VkShaderInfoTypeAMD infoType,
+ size_t* pInfoSize,
+ void* pInfo)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
+ gl_shader_stage stage = vk_to_mesa_shader_stage(shaderStage);
+ struct radv_shader_variant *variant = pipeline->shaders[stage];
+ struct _mesa_string_buffer *buf;
+ VkResult result = VK_SUCCESS;
+
+ /* Spec doesn't indicate what to do if the stage is invalid, so just
+ * return no info for this. */
+ if (!variant)
+ return vk_error(device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
+
+ switch (infoType) {
+ case VK_SHADER_INFO_TYPE_STATISTICS_AMD:
+ if (!pInfo) {
+ *pInfoSize = sizeof(VkShaderStatisticsInfoAMD);
+ } else {
+ unsigned lds_multiplier = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
+ struct ac_shader_config *conf = &variant->config;
+
+ VkShaderStatisticsInfoAMD statistics = {};
+ statistics.shaderStageMask = shaderStage;
+ statistics.numPhysicalVgprs = RADV_NUM_PHYSICAL_VGPRS;
+ statistics.numPhysicalSgprs = radv_get_num_physical_sgprs(device->physical_device);
+ statistics.numAvailableSgprs = statistics.numPhysicalSgprs;
+
+ if (stage == MESA_SHADER_COMPUTE) {
+ unsigned *local_size = variant->nir->info.cs.local_size;
+ unsigned workgroup_size = local_size[0] * local_size[1] * local_size[2];
+
+ statistics.numAvailableVgprs = statistics.numPhysicalVgprs /
+ ceil((double)workgroup_size / statistics.numPhysicalVgprs);
+
+ statistics.computeWorkGroupSize[0] = local_size[0];
+ statistics.computeWorkGroupSize[1] = local_size[1];
+ statistics.computeWorkGroupSize[2] = local_size[2];
+ } else {
+ statistics.numAvailableVgprs = statistics.numPhysicalVgprs;
+ }
+
+ statistics.resourceUsage.numUsedVgprs = conf->num_vgprs;
+ statistics.resourceUsage.numUsedSgprs = conf->num_sgprs;
+ statistics.resourceUsage.ldsSizePerLocalWorkGroup = 32768;
+ statistics.resourceUsage.ldsUsageSizeInBytes = conf->lds_size * lds_multiplier;
+ statistics.resourceUsage.scratchMemUsageInBytes = conf->scratch_bytes_per_wave;
+
+ size_t size = *pInfoSize;
+ *pInfoSize = sizeof(statistics);
+
+ memcpy(pInfo, &statistics, MIN2(size, *pInfoSize));
+
+ if (size < *pInfoSize)
+ result = VK_INCOMPLETE;
+ }
+
+ break;
+ case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD:
+ buf = _mesa_string_buffer_create(NULL, 1024);
+
+ _mesa_string_buffer_printf(buf, "%s:\n", radv_get_shader_name(variant, stage));
+ _mesa_string_buffer_printf(buf, "%s\n\n", variant->disasm_string);
+ generate_shader_stats(device, variant, stage, buf);
+
+ /* Need to include the null terminator. */
+ size_t length = buf->length + 1;
+
+ if (!pInfo) {
+ *pInfoSize = length;
+ } else {
+ size_t size = *pInfoSize;
+ *pInfoSize = length;
+
+ memcpy(pInfo, buf->buf, MIN2(size, length));
+
+ if (size < length)
+ result = VK_INCOMPLETE;
+ }
+
+ _mesa_string_buffer_destroy(buf);
+ break;
+ default:
+ /* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented for now. */
+ result = VK_ERROR_FEATURE_NOT_PRESENT;
+ break;
}
- fprintf(file, "*** SHADER STATS ***\n"
- "SGPRS: %d\n"
- "VGPRS: %d\n"
- "Spilled SGPRs: %d\n"
- "Spilled VGPRs: %d\n"
- "Code Size: %d bytes\n"
- "LDS: %d blocks\n"
- "Scratch: %d bytes per wave\n"
- "Max Waves: %d\n"
- "********************\n\n\n",
- conf->num_sgprs, conf->num_vgprs,
- conf->spilled_sgprs, conf->spilled_vgprs, variant->code_size,
- conf->lds_size, conf->scratch_bytes_per_wave,
- max_simd_waves);
+ return result;
}
diff --git a/lib/mesa/src/amd/vulkan/radv_shader.h b/lib/mesa/src/amd/vulkan/radv_shader.h
index 6e4e9966c..a1d38b3ce 100644
--- a/lib/mesa/src/amd/vulkan/radv_shader.h
+++ b/lib/mesa/src/amd/vulkan/radv_shader.h
@@ -28,10 +28,26 @@
#ifndef RADV_SHADER_H
#define RADV_SHADER_H
+#include "radv_debug.h"
#include "radv_private.h"
#include "nir/nir.h"
+/* descriptor index into scratch ring offsets */
+#define RING_SCRATCH 0
+#define RING_ESGS_VS 1
+#define RING_ESGS_GS 2
+#define RING_GSVS_VS 3
+#define RING_GSVS_GS 4
+#define RING_HS_TESS_FACTOR 5
+#define RING_HS_TESS_OFFCHIP 6
+#define RING_PS_SAMPLE_POSITIONS 7
+
+// Match MAX_SETS from radv_descriptor_set.h
+#define RADV_UD_MAX_SETS MAX_SETS
+
+#define RADV_NUM_PHYSICAL_VGPRS 256
+
struct radv_shader_module {
struct nir_shader *nir;
unsigned char sha1[20];
@@ -39,6 +55,241 @@ struct radv_shader_module {
char data[0];
};
+enum {
+ RADV_ALPHA_ADJUST_NONE = 0,
+ RADV_ALPHA_ADJUST_SNORM = 1,
+ RADV_ALPHA_ADJUST_SINT = 2,
+ RADV_ALPHA_ADJUST_SSCALED = 3,
+};
+
+struct radv_vs_variant_key {
+ uint32_t instance_rate_inputs;
+ uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
+
+ /* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
+ * so we may need to fix it up. */
+ uint64_t alpha_adjust;
+
+ uint32_t as_es:1;
+ uint32_t as_ls:1;
+ uint32_t export_prim_id:1;
+ uint32_t export_layer_id:1;
+};
+
+struct radv_tes_variant_key {
+ uint32_t as_es:1;
+ uint32_t export_prim_id:1;
+ uint32_t export_layer_id:1;
+ uint8_t num_patches;
+ uint8_t tcs_num_outputs;
+};
+
+struct radv_tcs_variant_key {
+ struct radv_vs_variant_key vs_key;
+ unsigned primitive_mode;
+ unsigned input_vertices;
+ unsigned num_inputs;
+ uint32_t tes_reads_tess_factors:1;
+};
+
+struct radv_fs_variant_key {
+ uint32_t col_format;
+ uint8_t log2_ps_iter_samples;
+ uint8_t num_samples;
+ uint32_t is_int8;
+ uint32_t is_int10;
+};
+
+struct radv_shader_variant_key {
+ union {
+ struct radv_vs_variant_key vs;
+ struct radv_fs_variant_key fs;
+ struct radv_tes_variant_key tes;
+ struct radv_tcs_variant_key tcs;
+ };
+ bool has_multiview_view_index;
+};
+
+struct radv_nir_compiler_options {
+ struct radv_pipeline_layout *layout;
+ struct radv_shader_variant_key key;
+ bool unsafe_math;
+ bool supports_spill;
+ bool clamp_shadow_reference;
+ bool dump_shader;
+ bool dump_preoptir;
+ bool record_llvm_ir;
+ bool check_ir;
+ enum radeon_family family;
+ enum chip_class chip_class;
+ uint32_t tess_offchip_block_dw_size;
+ uint32_t address32_hi;
+};
+
+enum radv_ud_index {
+ AC_UD_SCRATCH_RING_OFFSETS = 0,
+ AC_UD_PUSH_CONSTANTS = 1,
+ AC_UD_INDIRECT_DESCRIPTOR_SETS = 2,
+ AC_UD_VIEW_INDEX = 3,
+ AC_UD_STREAMOUT_BUFFERS = 4,
+ AC_UD_SHADER_START = 5,
+ AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
+ AC_UD_VS_BASE_VERTEX_START_INSTANCE,
+ AC_UD_VS_MAX_UD,
+ AC_UD_PS_MAX_UD,
+ AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START,
+ AC_UD_CS_MAX_UD,
+ AC_UD_GS_MAX_UD,
+ AC_UD_TCS_MAX_UD,
+ AC_UD_TES_MAX_UD,
+ AC_UD_MAX_UD = AC_UD_TCS_MAX_UD,
+};
+
+struct radv_stream_output {
+ uint8_t location;
+ uint8_t buffer;
+ uint16_t offset;
+ uint8_t component_mask;
+ uint8_t stream;
+};
+
+struct radv_streamout_info {
+ uint16_t num_outputs;
+ struct radv_stream_output outputs[MAX_SO_OUTPUTS];
+ uint16_t strides[MAX_SO_BUFFERS];
+ uint32_t enabled_stream_buffers_mask;
+};
+
+struct radv_shader_info {
+ bool loads_push_constants;
+ uint32_t desc_set_used_mask;
+ bool needs_multiview_view_index;
+ bool uses_invocation_id;
+ bool uses_prim_id;
+ struct {
+ uint64_t ls_outputs_written;
+ uint8_t input_usage_mask[VERT_ATTRIB_MAX];
+ uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
+ bool has_vertex_buffers; /* needs vertex buffers and base/start */
+ bool needs_draw_id;
+ bool needs_instance_id;
+ } vs;
+ struct {
+ uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
+ uint8_t num_stream_output_components[4];
+ uint8_t output_streams[VARYING_SLOT_VAR31 + 1];
+ uint8_t max_stream;
+ } gs;
+ struct {
+ uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
+ } tes;
+ struct {
+ bool force_persample;
+ bool needs_sample_positions;
+ bool uses_input_attachments;
+ bool writes_memory;
+ bool writes_z;
+ bool writes_stencil;
+ bool writes_sample_mask;
+ bool has_pcoord;
+ bool prim_id_input;
+ bool layer_input;
+ uint8_t num_input_clips_culls;
+ } ps;
+ struct {
+ bool uses_grid_size;
+ bool uses_block_id[3];
+ bool uses_thread_id[3];
+ bool uses_local_invocation_idx;
+ } cs;
+ struct {
+ uint64_t outputs_written;
+ uint64_t patch_outputs_written;
+ } tcs;
+
+ struct radv_streamout_info so;
+};
+
+struct radv_userdata_info {
+ int8_t sgpr_idx;
+ uint8_t num_sgprs;
+ bool indirect;
+};
+
+struct radv_userdata_locations {
+ struct radv_userdata_info descriptor_sets[RADV_UD_MAX_SETS];
+ struct radv_userdata_info shader_data[AC_UD_MAX_UD];
+ uint32_t descriptor_sets_enabled;
+};
+
+struct radv_vs_output_info {
+ uint8_t vs_output_param_offset[VARYING_SLOT_MAX];
+ uint8_t clip_dist_mask;
+ uint8_t cull_dist_mask;
+ uint8_t param_exports;
+ bool writes_pointsize;
+ bool writes_layer;
+ bool writes_viewport_index;
+ bool export_prim_id;
+ unsigned pos_exports;
+};
+
+struct radv_es_output_info {
+ uint32_t esgs_itemsize;
+};
+
+struct radv_shader_variant_info {
+ struct radv_userdata_locations user_sgprs_locs;
+ struct radv_shader_info info;
+ unsigned num_user_sgprs;
+ unsigned num_input_sgprs;
+ unsigned num_input_vgprs;
+ unsigned private_mem_vgprs;
+ bool need_indirect_descriptor_sets;
+ struct {
+ struct {
+ struct radv_vs_output_info outinfo;
+ struct radv_es_output_info es_info;
+ unsigned vgpr_comp_cnt;
+ bool as_es;
+ bool as_ls;
+ } vs;
+ struct {
+ unsigned num_interp;
+ uint32_t input_mask;
+ uint32_t flat_shaded_mask;
+ bool can_discard;
+ bool early_fragment_test;
+ } fs;
+ struct {
+ unsigned block_size[3];
+ } cs;
+ struct {
+ unsigned vertices_in;
+ unsigned vertices_out;
+ unsigned output_prim;
+ unsigned invocations;
+ unsigned gsvs_vertex_size;
+ unsigned max_gsvs_emit_size;
+ unsigned es_type; /* GFX9: VS or TES */
+ } gs;
+ struct {
+ unsigned tcs_vertices_out;
+ uint32_t num_patches;
+ uint32_t lds_size;
+ } tcs;
+ struct {
+ struct radv_vs_output_info outinfo;
+ struct radv_es_output_info es_info;
+ bool as_es;
+ unsigned primitive_mode;
+ enum gl_tess_spacing spacing;
+ bool ccw;
+ bool point_mode;
+ } tes;
+ };
+};
+
struct radv_shader_variant {
uint32_t ref_count;
@@ -46,7 +297,7 @@ struct radv_shader_variant {
uint64_t bo_offset;
struct ac_shader_config config;
uint32_t code_size;
- struct ac_shader_variant_info info;
+ struct radv_shader_variant_info info;
unsigned rsrc1;
unsigned rsrc2;
@@ -55,6 +306,7 @@ struct radv_shader_variant {
uint32_t spirv_size;
struct nir_shader *nir;
char *disasm_string;
+ char *llvm_ir_string;
struct list_head slab_list;
};
@@ -68,14 +320,16 @@ struct radv_shader_slab {
};
void
-radv_optimize_nir(struct nir_shader *shader);
+radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively,
+ bool allow_copies);
nir_shader *
radv_shader_compile_to_nir(struct radv_device *device,
struct radv_shader_module *module,
const char *entrypoint_name,
gl_shader_stage stage,
- const VkSpecializationInfo *spec_info);
+ const VkSpecializationInfo *spec_info,
+ const VkPipelineCreateFlags flags);
void *
radv_alloc_shader_memory(struct radv_device *device,
@@ -90,7 +344,7 @@ radv_shader_variant_create(struct radv_device *device,
struct nir_shader *const *shaders,
int shader_count,
struct radv_pipeline_layout *layout,
- const struct ac_shader_variant_key *key,
+ const struct radv_shader_variant_key *key,
void **code_out,
unsigned *code_size_out);
@@ -103,14 +357,6 @@ void
radv_shader_variant_destroy(struct radv_device *device,
struct radv_shader_variant *variant);
-bool
-radv_lower_indirect_derefs(struct nir_shader *nir,
- struct radv_physical_device *device);
-
-uint32_t
-radv_shader_stage_to_user_data_0(gl_shader_stage stage, enum chip_class chip_class,
- bool has_gs, bool has_tess);
-
const char *
radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage);
@@ -120,4 +366,52 @@ radv_shader_dump_stats(struct radv_device *device,
gl_shader_stage stage,
FILE *file);
+static inline bool
+radv_can_dump_shader(struct radv_device *device,
+ struct radv_shader_module *module,
+ bool is_gs_copy_shader)
+{
+ if (!(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS))
+ return false;
+
+ /* Only dump non-meta shaders, useful for debugging purposes. */
+ return (module && !module->nir) || is_gs_copy_shader;
+}
+
+static inline bool
+radv_can_dump_shader_stats(struct radv_device *device,
+ struct radv_shader_module *module)
+{
+ /* Only dump non-meta shader stats. */
+ return device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS &&
+ module && !module->nir;
+}
+
+static inline unsigned shader_io_get_unique_index(gl_varying_slot slot)
+{
+ /* handle patch indices separate */
+ if (slot == VARYING_SLOT_TESS_LEVEL_OUTER)
+ return 0;
+ if (slot == VARYING_SLOT_TESS_LEVEL_INNER)
+ return 1;
+ if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX)
+ return 2 + (slot - VARYING_SLOT_PATCH0);
+ if (slot == VARYING_SLOT_POS)
+ return 0;
+ if (slot == VARYING_SLOT_PSIZ)
+ return 1;
+ if (slot == VARYING_SLOT_CLIP_DIST0)
+ return 2;
+ /* 3 is reserved for clip dist as well */
+ if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
+ return 4 + (slot - VARYING_SLOT_VAR0);
+ unreachable("illegal slot in get unique index\n");
+}
+
+static inline uint32_t
+radv_get_num_physical_sgprs(struct radv_physical_device *physical_device)
+{
+ return physical_device->rad_info.chip_class >= VI ? 800 : 512;
+}
+
#endif
diff --git a/lib/mesa/src/amd/vulkan/radv_shader_helper.h b/lib/mesa/src/amd/vulkan/radv_shader_helper.h
new file mode 100644
index 000000000..3c81f5be5
--- /dev/null
+++ b/lib/mesa/src/amd/vulkan/radv_shader_helper.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright © 2018 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#ifndef RADV_SHADER_HELPER_H
+#define RADV_SHADER_HELPER_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool radv_init_llvm_compiler(struct ac_llvm_compiler *info,
+ bool okay_to_leak_target_library_info,
+ bool thread_compiler,
+ enum radeon_family family,
+ enum ac_target_machine_options tm_options);
+void radv_destroy_llvm_compiler(struct ac_llvm_compiler *info,
+ bool thread_compiler);
+
+bool radv_compile_to_binary(struct ac_llvm_compiler *info,
+ LLVMModuleRef module,
+ struct ac_shader_binary *binary);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/lib/mesa/src/amd/vulkan/radv_shader_info.c b/lib/mesa/src/amd/vulkan/radv_shader_info.c
new file mode 100644
index 000000000..f7888ec6a
--- /dev/null
+++ b/lib/mesa/src/amd/vulkan/radv_shader_info.c
@@ -0,0 +1,532 @@
+/*
+ * Copyright © 2017 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "radv_private.h"
+#include "radv_shader.h"
+#include "nir/nir.h"
+#include "nir/nir_deref.h"
+#include "nir/nir_xfb_info.h"
+
+static void mark_sampler_desc(const nir_variable *var,
+ struct radv_shader_info *info)
+{
+ info->desc_set_used_mask |= (1 << var->data.descriptor_set);
+}
+
+static void mark_ls_output(struct radv_shader_info *info,
+ uint32_t param, int num_slots)
+{
+ uint64_t mask = (1ull << num_slots) - 1ull;
+ info->vs.ls_outputs_written |= (mask << param);
+}
+
+static void mark_tess_output(struct radv_shader_info *info,
+ bool is_patch, uint32_t param, int num_slots)
+{
+ uint64_t mask = (1ull << num_slots) - 1ull;
+ if (is_patch)
+ info->tcs.patch_outputs_written |= (mask << param);
+ else
+ info->tcs.outputs_written |= (mask << param);
+}
+
+static void
+get_deref_offset(nir_deref_instr *instr,
+ unsigned *const_out)
+{
+ nir_variable *var = nir_deref_instr_get_variable(instr);
+ nir_deref_path path;
+ unsigned idx_lvl = 1;
+
+ if (var->data.compact) {
+ assert(instr->deref_type == nir_deref_type_array);
+ nir_const_value *v = nir_src_as_const_value(instr->arr.index);
+ assert(v);
+ *const_out = v->u32[0];
+ return;
+ }
+
+ nir_deref_path_init(&path, instr, NULL);
+
+ uint32_t const_offset = 0;
+
+ for (; path.path[idx_lvl]; ++idx_lvl) {
+ const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
+ if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
+ unsigned index = path.path[idx_lvl]->strct.index;
+
+ for (unsigned i = 0; i < index; i++) {
+ const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
+ const_offset += glsl_count_attribute_slots(ft, false);
+ }
+ } else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) {
+ unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, false);
+ nir_const_value *v = nir_src_as_const_value(path.path[idx_lvl]->arr.index);
+ if (v)
+ const_offset += v->u32[0] * size;
+ } else
+ unreachable("Uhandled deref type in get_deref_instr_offset");
+ }
+
+ *const_out = const_offset;
+
+ nir_deref_path_finish(&path);
+}
+
+static void
+gather_intrinsic_load_deref_info(const nir_shader *nir,
+ const nir_intrinsic_instr *instr,
+ struct radv_shader_info *info)
+{
+ switch (nir->info.stage) {
+ case MESA_SHADER_VERTEX: {
+ nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
+
+ if (var->data.mode == nir_var_shader_in) {
+ unsigned idx = var->data.location;
+ uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa);
+
+ info->vs.input_usage_mask[idx] |=
+ mask << var->data.location_frac;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+static void
+set_output_usage_mask(const nir_shader *nir, const nir_intrinsic_instr *instr,
+ uint8_t *output_usage_mask)
+{
+ nir_deref_instr *deref_instr =
+ nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+ nir_variable *var = nir_deref_instr_get_variable(deref_instr);
+ unsigned attrib_count = glsl_count_attribute_slots(var->type, false);
+ unsigned idx = var->data.location;
+ unsigned comp = var->data.location_frac;
+ unsigned const_offset = 0;
+
+ get_deref_offset(deref_instr, &const_offset);
+
+ if (idx == VARYING_SLOT_CLIP_DIST0) {
+ /* Special case for clip/cull distances because there are
+ * combined into a single array that contains both.
+ */
+ output_usage_mask[idx] |= 1 << const_offset;
+ return;
+ }
+
+ for (unsigned i = 0; i < attrib_count; i++) {
+ output_usage_mask[idx + i + const_offset] |=
+ instr->const_index[0] << comp;
+ }
+}
+
+static void
+gather_intrinsic_store_deref_info(const nir_shader *nir,
+ const nir_intrinsic_instr *instr,
+ struct radv_shader_info *info)
+{
+ nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
+
+ if (var->data.mode == nir_var_shader_out) {
+ unsigned idx = var->data.location;
+
+ switch (nir->info.stage) {
+ case MESA_SHADER_VERTEX:
+ set_output_usage_mask(nir, instr,
+ info->vs.output_usage_mask);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ set_output_usage_mask(nir, instr,
+ info->gs.output_usage_mask);
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ set_output_usage_mask(nir, instr,
+ info->tes.output_usage_mask);
+ break;
+ case MESA_SHADER_TESS_CTRL: {
+ unsigned param = shader_io_get_unique_index(idx);
+ const struct glsl_type *type = var->type;
+
+ if (!var->data.patch)
+ type = glsl_get_array_element(var->type);
+
+ unsigned slots =
+ var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
+ : glsl_count_attribute_slots(type, false);
+
+ if (idx == VARYING_SLOT_CLIP_DIST0)
+ slots = (nir->info.clip_distance_array_size +
+ nir->info.cull_distance_array_size > 4) ? 2 : 1;
+
+ mark_tess_output(info, var->data.patch, param, slots);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+static void
+gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
+ struct radv_shader_info *info)
+{
+ switch (instr->intrinsic) {
+ case nir_intrinsic_interp_deref_at_sample:
+ info->ps.needs_sample_positions = true;
+ break;
+ case nir_intrinsic_load_draw_id:
+ info->vs.needs_draw_id = true;
+ break;
+ case nir_intrinsic_load_instance_id:
+ info->vs.needs_instance_id = true;
+ break;
+ case nir_intrinsic_load_num_work_groups:
+ info->cs.uses_grid_size = true;
+ break;
+ case nir_intrinsic_load_local_invocation_id:
+ case nir_intrinsic_load_work_group_id: {
+ unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
+ while (mask) {
+ unsigned i = u_bit_scan(&mask);
+
+ if (instr->intrinsic == nir_intrinsic_load_work_group_id)
+ info->cs.uses_block_id[i] = true;
+ else
+ info->cs.uses_thread_id[i] = true;
+ }
+ break;
+ }
+ case nir_intrinsic_load_local_invocation_index:
+ case nir_intrinsic_load_subgroup_id:
+ case nir_intrinsic_load_num_subgroups:
+ info->cs.uses_local_invocation_idx = true;
+ break;
+ case nir_intrinsic_load_sample_id:
+ info->ps.force_persample = true;
+ break;
+ case nir_intrinsic_load_sample_pos:
+ info->ps.force_persample = true;
+ break;
+ case nir_intrinsic_load_view_index:
+ info->needs_multiview_view_index = true;
+ if (nir->info.stage == MESA_SHADER_FRAGMENT)
+ info->ps.layer_input = true;
+ break;
+ case nir_intrinsic_load_invocation_id:
+ info->uses_invocation_id = true;
+ break;
+ case nir_intrinsic_load_primitive_id:
+ info->uses_prim_id = true;
+ break;
+ case nir_intrinsic_load_push_constant:
+ info->loads_push_constants = true;
+ break;
+ case nir_intrinsic_vulkan_resource_index:
+ info->desc_set_used_mask |= (1 << nir_intrinsic_desc_set(instr));
+ break;
+ case nir_intrinsic_image_deref_load:
+ case nir_intrinsic_image_deref_store:
+ case nir_intrinsic_image_deref_atomic_add:
+ case nir_intrinsic_image_deref_atomic_min:
+ case nir_intrinsic_image_deref_atomic_max:
+ case nir_intrinsic_image_deref_atomic_and:
+ case nir_intrinsic_image_deref_atomic_or:
+ case nir_intrinsic_image_deref_atomic_xor:
+ case nir_intrinsic_image_deref_atomic_exchange:
+ case nir_intrinsic_image_deref_atomic_comp_swap:
+ case nir_intrinsic_image_deref_size: {
+ nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
+ const struct glsl_type *type = glsl_without_array(var->type);
+
+ enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
+ if (dim == GLSL_SAMPLER_DIM_SUBPASS ||
+ dim == GLSL_SAMPLER_DIM_SUBPASS_MS) {
+ info->ps.layer_input = true;
+ info->ps.uses_input_attachments = true;
+ }
+ mark_sampler_desc(var, info);
+
+ if (nir_intrinsic_image_deref_store ||
+ nir_intrinsic_image_deref_atomic_add ||
+ nir_intrinsic_image_deref_atomic_min ||
+ nir_intrinsic_image_deref_atomic_max ||
+ nir_intrinsic_image_deref_atomic_and ||
+ nir_intrinsic_image_deref_atomic_or ||
+ nir_intrinsic_image_deref_atomic_xor ||
+ nir_intrinsic_image_deref_atomic_exchange ||
+ nir_intrinsic_image_deref_atomic_comp_swap) {
+ if (nir->info.stage == MESA_SHADER_FRAGMENT)
+ info->ps.writes_memory = true;
+ }
+ break;
+ }
+ case nir_intrinsic_store_ssbo:
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_ssbo_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_ssbo_atomic_umax:
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_ssbo_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ if (nir->info.stage == MESA_SHADER_FRAGMENT)
+ info->ps.writes_memory = true;
+ break;
+ case nir_intrinsic_load_deref:
+ gather_intrinsic_load_deref_info(nir, instr, info);
+ break;
+ case nir_intrinsic_store_deref:
+ gather_intrinsic_store_deref_info(nir, instr, info);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+gather_tex_info(const nir_shader *nir, const nir_tex_instr *instr,
+ struct radv_shader_info *info)
+{
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ switch (instr->src[i].src_type) {
+ case nir_tex_src_texture_deref:
+ mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info);
+ break;
+ case nir_tex_src_sampler_deref:
+ mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void
+gather_info_block(const nir_shader *nir, const nir_block *block,
+ struct radv_shader_info *info)
+{
+ nir_foreach_instr(instr, block) {
+ switch (instr->type) {
+ case nir_instr_type_intrinsic:
+ gather_intrinsic_info(nir, nir_instr_as_intrinsic(instr), info);
+ break;
+ case nir_instr_type_tex:
+ gather_tex_info(nir, nir_instr_as_tex(instr), info);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void
+gather_info_input_decl_vs(const nir_shader *nir, const nir_variable *var,
+ struct radv_shader_info *info)
+{
+ int idx = var->data.location;
+
+ if (idx >= VERT_ATTRIB_GENERIC0 && idx <= VERT_ATTRIB_GENERIC15)
+ info->vs.has_vertex_buffers = true;
+}
+
+static void
+gather_info_input_decl_ps(const nir_shader *nir, const nir_variable *var,
+ struct radv_shader_info *info)
+{
+ unsigned attrib_count = glsl_count_attribute_slots(var->type, false);
+ const struct glsl_type *type = glsl_without_array(var->type);
+ int idx = var->data.location;
+
+ switch (idx) {
+ case VARYING_SLOT_PNTC:
+ info->ps.has_pcoord = true;
+ break;
+ case VARYING_SLOT_PRIMITIVE_ID:
+ info->ps.prim_id_input = true;
+ break;
+ case VARYING_SLOT_LAYER:
+ info->ps.layer_input = true;
+ break;
+ case VARYING_SLOT_CLIP_DIST0:
+ info->ps.num_input_clips_culls = attrib_count;
+ break;
+ default:
+ break;
+ }
+
+ if (glsl_get_base_type(type) == GLSL_TYPE_FLOAT) {
+ if (var->data.sample)
+ info->ps.force_persample = true;
+ }
+}
+
+static void
+gather_info_input_decl(const nir_shader *nir, const nir_variable *var,
+ struct radv_shader_info *info)
+{
+ switch (nir->info.stage) {
+ case MESA_SHADER_VERTEX:
+ gather_info_input_decl_vs(nir, var, info);
+ break;
+ case MESA_SHADER_FRAGMENT:
+ gather_info_input_decl_ps(nir, var, info);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+gather_info_output_decl_ls(const nir_shader *nir, const nir_variable *var,
+ struct radv_shader_info *info)
+{
+ int idx = var->data.location;
+ unsigned param = shader_io_get_unique_index(idx);
+ int num_slots = glsl_count_attribute_slots(var->type, false);
+ if (idx == VARYING_SLOT_CLIP_DIST0)
+ num_slots = (nir->info.clip_distance_array_size + nir->info.cull_distance_array_size > 4) ? 2 : 1;
+ mark_ls_output(info, param, num_slots);
+}
+
+static void
+gather_info_output_decl_ps(const nir_shader *nir, const nir_variable *var,
+ struct radv_shader_info *info)
+{
+ int idx = var->data.location;
+
+ switch (idx) {
+ case FRAG_RESULT_DEPTH:
+ info->ps.writes_z = true;
+ break;
+ case FRAG_RESULT_STENCIL:
+ info->ps.writes_stencil = true;
+ break;
+ case FRAG_RESULT_SAMPLE_MASK:
+ info->ps.writes_sample_mask = true;
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+gather_info_output_decl_gs(const nir_shader *nir, const nir_variable *var,
+ struct radv_shader_info *info)
+{
+ unsigned num_components = glsl_get_component_slots(var->type);
+ unsigned stream = var->data.stream;
+ unsigned idx = var->data.location;
+
+ assert(stream < 4);
+
+ info->gs.max_stream = MAX2(info->gs.max_stream, stream);
+ info->gs.num_stream_output_components[stream] += num_components;
+ info->gs.output_streams[idx] = stream;
+}
+
+static void
+gather_info_output_decl(const nir_shader *nir, const nir_variable *var,
+ struct radv_shader_info *info,
+ const struct radv_nir_compiler_options *options)
+{
+ switch (nir->info.stage) {
+ case MESA_SHADER_FRAGMENT:
+ gather_info_output_decl_ps(nir, var, info);
+ break;
+ case MESA_SHADER_VERTEX:
+ if (options->key.vs.as_ls)
+ gather_info_output_decl_ls(nir, var, info);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ gather_info_output_decl_gs(nir, var, info);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+gather_xfb_info(const nir_shader *nir, struct radv_shader_info *info)
+{
+ nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL);
+ struct radv_streamout_info *so = &info->so;
+
+ if (!xfb)
+ return;
+
+ assert(xfb->output_count < MAX_SO_OUTPUTS);
+ so->num_outputs = xfb->output_count;
+
+ for (unsigned i = 0; i < xfb->output_count; i++) {
+ struct radv_stream_output *output = &so->outputs[i];
+
+ output->buffer = xfb->outputs[i].buffer;
+ output->stream = xfb->buffer_to_stream[xfb->outputs[i].buffer];
+ output->offset = xfb->outputs[i].offset;
+ output->location = xfb->outputs[i].location;
+ output->component_mask = xfb->outputs[i].component_mask;
+
+ so->enabled_stream_buffers_mask |=
+ (1 << output->buffer) << (output->stream * 4);
+
+ }
+
+ for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++) {
+ so->strides[i] = xfb->strides[i] / 4;
+ }
+
+ ralloc_free(xfb);
+}
+
+void
+radv_nir_shader_info_pass(const struct nir_shader *nir,
+ const struct radv_nir_compiler_options *options,
+ struct radv_shader_info *info)
+{
+ struct nir_function *func =
+ (struct nir_function *)exec_list_get_head_const(&nir->functions);
+
+ if (options->layout && options->layout->dynamic_offset_count)
+ info->loads_push_constants = true;
+
+ nir_foreach_variable(variable, &nir->inputs)
+ gather_info_input_decl(nir, variable, info);
+
+ nir_foreach_block(block, func->impl) {
+ gather_info_block(nir, block, info);
+ }
+
+ nir_foreach_variable(variable, &nir->outputs)
+ gather_info_output_decl(nir, variable, info, options);
+
+ if (nir->info.stage == MESA_SHADER_VERTEX ||
+ nir->info.stage == MESA_SHADER_TESS_EVAL ||
+ nir->info.stage == MESA_SHADER_GEOMETRY)
+ gather_xfb_info(nir, info);
+}
diff --git a/lib/mesa/src/amd/vulkan/radv_wsi_display.c b/lib/mesa/src/amd/vulkan/radv_wsi_display.c
new file mode 100644
index 000000000..d8743a06e
--- /dev/null
+++ b/lib/mesa/src/amd/vulkan/radv_wsi_display.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright © 2017 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no representations
+ * about the suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include "radv_private.h"
+#include "radv_cs.h"
+#include "util/disk_cache.h"
+#include "util/strtod.h"
+#include "vk_util.h"
+#include <xf86drm.h>
+#include <xf86drmMode.h>
+#include <amdgpu.h>
+#include <amdgpu_drm.h>
+#include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
+#include "ac_llvm_util.h"
+#include "vk_format.h"
+#include "sid.h"
+#include "util/debug.h"
+#include "wsi_common_display.h"
+
+#define MM_PER_PIXEL (1.0/96.0 * 25.4)
+
+VkResult
+radv_GetPhysicalDeviceDisplayPropertiesKHR(VkPhysicalDevice physical_device,
+ uint32_t *property_count,
+ VkDisplayPropertiesKHR *properties)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+
+ return wsi_display_get_physical_device_display_properties(
+ physical_device,
+ &pdevice->wsi_device,
+ property_count,
+ properties);
+}
+
+VkResult
+radv_GetPhysicalDeviceDisplayProperties2KHR(VkPhysicalDevice physical_device,
+ uint32_t *property_count,
+ VkDisplayProperties2KHR *properties)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+
+ return wsi_display_get_physical_device_display_properties2(
+ physical_device,
+ &pdevice->wsi_device,
+ property_count,
+ properties);
+}
+
+VkResult
+radv_GetPhysicalDeviceDisplayPlanePropertiesKHR(
+ VkPhysicalDevice physical_device,
+ uint32_t *property_count,
+ VkDisplayPlanePropertiesKHR *properties)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+
+ return wsi_display_get_physical_device_display_plane_properties(
+ physical_device,
+ &pdevice->wsi_device,
+ property_count,
+ properties);
+}
+
+VkResult
+radv_GetPhysicalDeviceDisplayPlaneProperties2KHR(
+ VkPhysicalDevice physical_device,
+ uint32_t *property_count,
+ VkDisplayPlaneProperties2KHR *properties)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+
+ return wsi_display_get_physical_device_display_plane_properties2(
+ physical_device,
+ &pdevice->wsi_device,
+ property_count,
+ properties);
+}
+
+VkResult
+radv_GetDisplayPlaneSupportedDisplaysKHR(VkPhysicalDevice physical_device,
+ uint32_t plane_index,
+ uint32_t *display_count,
+ VkDisplayKHR *displays)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+
+ return wsi_display_get_display_plane_supported_displays(
+ physical_device,
+ &pdevice->wsi_device,
+ plane_index,
+ display_count,
+ displays);
+}
+
+
+VkResult
+radv_GetDisplayModePropertiesKHR(VkPhysicalDevice physical_device,
+ VkDisplayKHR display,
+ uint32_t *property_count,
+ VkDisplayModePropertiesKHR *properties)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+
+ return wsi_display_get_display_mode_properties(physical_device,
+ &pdevice->wsi_device,
+ display,
+ property_count,
+ properties);
+}
+
+VkResult
+radv_GetDisplayModeProperties2KHR(VkPhysicalDevice physical_device,
+ VkDisplayKHR display,
+ uint32_t *property_count,
+ VkDisplayModeProperties2KHR *properties)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+
+ return wsi_display_get_display_mode_properties2(physical_device,
+ &pdevice->wsi_device,
+ display,
+ property_count,
+ properties);
+}
+
+VkResult
+radv_CreateDisplayModeKHR(VkPhysicalDevice physical_device,
+ VkDisplayKHR display,
+ const VkDisplayModeCreateInfoKHR *create_info,
+ const VkAllocationCallbacks *allocator,
+ VkDisplayModeKHR *mode)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+
+ return wsi_display_create_display_mode(physical_device,
+ &pdevice->wsi_device,
+ display,
+ create_info,
+ allocator,
+ mode);
+}
+
+VkResult
+radv_GetDisplayPlaneCapabilitiesKHR(VkPhysicalDevice physical_device,
+ VkDisplayModeKHR mode_khr,
+ uint32_t plane_index,
+ VkDisplayPlaneCapabilitiesKHR *capabilities)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+
+ return wsi_get_display_plane_capabilities(physical_device,
+ &pdevice->wsi_device,
+ mode_khr,
+ plane_index,
+ capabilities);
+}
+
+VkResult
+radv_GetDisplayPlaneCapabilities2KHR(VkPhysicalDevice physical_device,
+ const VkDisplayPlaneInfo2KHR *pDisplayPlaneInfo,
+ VkDisplayPlaneCapabilities2KHR *capabilities)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+
+ return wsi_get_display_plane_capabilities2(physical_device,
+ &pdevice->wsi_device,
+ pDisplayPlaneInfo,
+ capabilities);
+}
+
+VkResult
+radv_CreateDisplayPlaneSurfaceKHR(
+ VkInstance _instance,
+ const VkDisplaySurfaceCreateInfoKHR *create_info,
+ const VkAllocationCallbacks *allocator,
+ VkSurfaceKHR *surface)
+{
+ RADV_FROM_HANDLE(radv_instance, instance, _instance);
+ const VkAllocationCallbacks *alloc;
+
+ if (allocator)
+ alloc = allocator;
+ else
+ alloc = &instance->alloc;
+
+ return wsi_create_display_surface(_instance, alloc,
+ create_info, surface);
+}
+
+VkResult
+radv_ReleaseDisplayEXT(VkPhysicalDevice physical_device,
+ VkDisplayKHR display)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+
+ return wsi_release_display(physical_device,
+ &pdevice->wsi_device,
+ display);
+}
+
+#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
+VkResult
+radv_AcquireXlibDisplayEXT(VkPhysicalDevice physical_device,
+ Display *dpy,
+ VkDisplayKHR display)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+
+ return wsi_acquire_xlib_display(physical_device,
+ &pdevice->wsi_device,
+ dpy,
+ display);
+}
+
+VkResult
+radv_GetRandROutputDisplayEXT(VkPhysicalDevice physical_device,
+ Display *dpy,
+ RROutput output,
+ VkDisplayKHR *display)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+
+ return wsi_get_randr_output_display(physical_device,
+ &pdevice->wsi_device,
+ dpy,
+ output,
+ display);
+}
+#endif /* VK_USE_PLATFORM_XLIB_XRANDR_EXT */
+
+/* VK_EXT_display_control */
+
+VkResult
+radv_DisplayPowerControlEXT(VkDevice _device,
+ VkDisplayKHR display,
+ const VkDisplayPowerInfoEXT *display_power_info)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+
+ return wsi_display_power_control(_device,
+ &device->physical_device->wsi_device,
+ display,
+ display_power_info);
+}
+
+VkResult
+radv_RegisterDeviceEventEXT(VkDevice _device,
+ const VkDeviceEventInfoEXT *device_event_info,
+ const VkAllocationCallbacks *allocator,
+ VkFence *_fence)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_fence *fence;
+ VkResult ret;
+
+ fence = vk_alloc2(&device->instance->alloc, allocator, sizeof (*fence),
+ 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!fence)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ fence->fence = NULL;
+ fence->submitted = true;
+ fence->signalled = false;
+ fence->syncobj = 0;
+ fence->temp_syncobj = 0;
+
+ ret = wsi_register_device_event(_device,
+ &device->physical_device->wsi_device,
+ device_event_info,
+ allocator,
+ &fence->fence_wsi);
+ if (ret == VK_SUCCESS)
+ *_fence = radv_fence_to_handle(fence);
+ else
+ vk_free2(&device->instance->alloc, allocator, fence);
+ return ret;
+}
+
+VkResult
+radv_RegisterDisplayEventEXT(VkDevice _device,
+ VkDisplayKHR display,
+ const VkDisplayEventInfoEXT *display_event_info,
+ const VkAllocationCallbacks *allocator,
+ VkFence *_fence)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+
+ struct radv_fence *fence;
+ VkResult ret;
+
+ fence = vk_alloc2(&device->instance->alloc, allocator, sizeof (*fence),
+ 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!fence)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ fence->fence = NULL;
+ fence->submitted = true;
+ fence->signalled = false;
+ fence->syncobj = 0;
+ fence->temp_syncobj = 0;
+
+ ret = wsi_register_display_event(_device,
+ &device->physical_device->wsi_device,
+ display,
+ display_event_info,
+ allocator,
+ &(fence->fence_wsi));
+
+ if (ret == VK_SUCCESS)
+ *_fence = radv_fence_to_handle(fence);
+ else
+ vk_free2(&device->instance->alloc, allocator, fence);
+ return ret;
+}
+
+VkResult
+radv_GetSwapchainCounterEXT(VkDevice _device,
+ VkSwapchainKHR swapchain,
+ VkSurfaceCounterFlagBitsEXT flag_bits,
+ uint64_t *value)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+
+ return wsi_get_swapchain_counter(_device,
+ &device->physical_device->wsi_device,
+ swapchain,
+ flag_bits,
+ value);
+}
+
diff --git a/lib/mesa/src/amd/vulkan/vk_format_layout.csv b/lib/mesa/src/amd/vulkan/vk_format_layout.csv
index ae9ceda08..f9c2e6f7c 100644
--- a/lib/mesa/src/amd/vulkan/vk_format_layout.csv
+++ b/lib/mesa/src/amd/vulkan/vk_format_layout.csv
@@ -148,16 +148,16 @@ VK_FORMAT_BC6H_UFLOAT_BLOCK , bptc, 4, 4, x128, , , , xyz1
VK_FORMAT_BC6H_SFLOAT_BLOCK , bptc, 4, 4, x128, , , , xyz1, rgb
VK_FORMAT_BC7_UNORM_BLOCK , bptc, 4, 4, x128, , , , xyzw, rgb
VK_FORMAT_BC7_SRGB_BLOCK , bptc, 4, 4, x128, , , , xyzw, srgb
-VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK,
-VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK,
-VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK,
-VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK,
-VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK,
-VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK,
-VK_FORMAT_EAC_R11_UNORM_BLOCK,
-VK_FORMAT_EAC_R11_SNORM_BLOCK,
-VK_FORMAT_EAC_R11G11_UNORM_BLOCK,
-VK_FORMAT_EAC_R11G11_SNORM_BLOCK,
+VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK , etc, 4, 4, x64, , , , xyz1, rgb
+VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK , etc, 4, 4, x64, , , , xyz1, srgb
+VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK , etc, 4, 4, x64, , , , xyzw, rgb
+VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK , etc, 4, 4, x64, , , , xyzw, srgb
+VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK , etc, 4, 4, x128, , , , xyzw, rgb
+VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK , etc, 4, 4, x128, , , , xyzw, srgb
+VK_FORMAT_EAC_R11_UNORM_BLOCK , etc, 4, 4, x64, , , , x001, rgb
+VK_FORMAT_EAC_R11_SNORM_BLOCK , etc, 4, 4, x64, , , , x001, rgb
+VK_FORMAT_EAC_R11G11_UNORM_BLOCK , etc, 4, 4, x128, , , , xy01, rgb
+VK_FORMAT_EAC_R11G11_SNORM_BLOCK , etc, 4, 4, x128, , , , xy01, rgb
VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
diff --git a/lib/mesa/src/amd/vulkan/vk_format_table.c b/lib/mesa/src/amd/vulkan/vk_format_table.c
index de0808dc2..0b04ce97b 100644
--- a/lib/mesa/src/amd/vulkan/vk_format_table.c
+++ b/lib/mesa/src/amd/vulkan/vk_format_table.c
@@ -1,4 +1,4 @@
-/* This file is autogenerated by u_format_table.py from u_format.csv. Do not edit directly. */
+/* This file is autogenerated by vk_format_table.py from vk_format_layout.csv. Do not edit directly. */
/**************************************************************************
*
@@ -30,7 +30,7 @@
#include "stdbool.h"
#include "vk_format.h"
-const struct vk_format_description
+static const struct vk_format_description
vk_format_undefined_description = {
VK_FORMAT_UNDEFINED,
"VK_FORMAT_UNDEFINED",
@@ -56,7 +56,7 @@ vk_format_undefined_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r4g4_unorm_pack8_description = {
VK_FORMAT_R4G4_UNORM_PACK8,
"VK_FORMAT_R4G4_UNORM_PACK8",
@@ -100,7 +100,7 @@ vk_format_r4g4_unorm_pack8_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r4g4b4a4_unorm_pack16_description = {
VK_FORMAT_R4G4B4A4_UNORM_PACK16,
"VK_FORMAT_R4G4B4A4_UNORM_PACK16",
@@ -144,7 +144,7 @@ vk_format_r4g4b4a4_unorm_pack16_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_b4g4r4a4_unorm_pack16_description = {
VK_FORMAT_B4G4R4A4_UNORM_PACK16,
"VK_FORMAT_B4G4R4A4_UNORM_PACK16",
@@ -188,7 +188,7 @@ vk_format_b4g4r4a4_unorm_pack16_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r5g6b5_unorm_pack16_description = {
VK_FORMAT_R5G6B5_UNORM_PACK16,
"VK_FORMAT_R5G6B5_UNORM_PACK16",
@@ -232,7 +232,7 @@ vk_format_r5g6b5_unorm_pack16_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_b5g6r5_unorm_pack16_description = {
VK_FORMAT_B5G6R5_UNORM_PACK16,
"VK_FORMAT_B5G6R5_UNORM_PACK16",
@@ -276,7 +276,7 @@ vk_format_b5g6r5_unorm_pack16_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r5g5b5a1_unorm_pack16_description = {
VK_FORMAT_R5G5B5A1_UNORM_PACK16,
"VK_FORMAT_R5G5B5A1_UNORM_PACK16",
@@ -320,7 +320,7 @@ vk_format_r5g5b5a1_unorm_pack16_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_b5g5r5a1_unorm_pack16_description = {
VK_FORMAT_B5G5R5A1_UNORM_PACK16,
"VK_FORMAT_B5G5R5A1_UNORM_PACK16",
@@ -364,7 +364,7 @@ vk_format_b5g5r5a1_unorm_pack16_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a1r5g5b5_unorm_pack16_description = {
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
"VK_FORMAT_A1R5G5B5_UNORM_PACK16",
@@ -408,7 +408,7 @@ vk_format_a1r5g5b5_unorm_pack16_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8_unorm_description = {
VK_FORMAT_R8_UNORM,
"VK_FORMAT_R8_UNORM",
@@ -434,7 +434,7 @@ vk_format_r8_unorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8_snorm_description = {
VK_FORMAT_R8_SNORM,
"VK_FORMAT_R8_SNORM",
@@ -460,7 +460,7 @@ vk_format_r8_snorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8_uscaled_description = {
VK_FORMAT_R8_USCALED,
"VK_FORMAT_R8_USCALED",
@@ -486,7 +486,7 @@ vk_format_r8_uscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8_sscaled_description = {
VK_FORMAT_R8_SSCALED,
"VK_FORMAT_R8_SSCALED",
@@ -512,7 +512,7 @@ vk_format_r8_sscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8_uint_description = {
VK_FORMAT_R8_UINT,
"VK_FORMAT_R8_UINT",
@@ -538,7 +538,7 @@ vk_format_r8_uint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8_sint_description = {
VK_FORMAT_R8_SINT,
"VK_FORMAT_R8_SINT",
@@ -564,7 +564,7 @@ vk_format_r8_sint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8_srgb_description = {
VK_FORMAT_R8_SRGB,
"VK_FORMAT_R8_SRGB",
@@ -590,7 +590,7 @@ vk_format_r8_srgb_description = {
VK_FORMAT_COLORSPACE_SRGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8_unorm_description = {
VK_FORMAT_R8G8_UNORM,
"VK_FORMAT_R8G8_UNORM",
@@ -634,7 +634,7 @@ vk_format_r8g8_unorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8_snorm_description = {
VK_FORMAT_R8G8_SNORM,
"VK_FORMAT_R8G8_SNORM",
@@ -678,7 +678,7 @@ vk_format_r8g8_snorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8_uscaled_description = {
VK_FORMAT_R8G8_USCALED,
"VK_FORMAT_R8G8_USCALED",
@@ -722,7 +722,7 @@ vk_format_r8g8_uscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8_sscaled_description = {
VK_FORMAT_R8G8_SSCALED,
"VK_FORMAT_R8G8_SSCALED",
@@ -766,7 +766,7 @@ vk_format_r8g8_sscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8_uint_description = {
VK_FORMAT_R8G8_UINT,
"VK_FORMAT_R8G8_UINT",
@@ -810,7 +810,7 @@ vk_format_r8g8_uint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8_sint_description = {
VK_FORMAT_R8G8_SINT,
"VK_FORMAT_R8G8_SINT",
@@ -854,7 +854,7 @@ vk_format_r8g8_sint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8_srgb_description = {
VK_FORMAT_R8G8_SRGB,
"VK_FORMAT_R8G8_SRGB",
@@ -898,7 +898,7 @@ vk_format_r8g8_srgb_description = {
VK_FORMAT_COLORSPACE_SRGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8b8_unorm_description = {
VK_FORMAT_R8G8B8_UNORM,
"VK_FORMAT_R8G8B8_UNORM",
@@ -942,7 +942,7 @@ vk_format_r8g8b8_unorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8b8_snorm_description = {
VK_FORMAT_R8G8B8_SNORM,
"VK_FORMAT_R8G8B8_SNORM",
@@ -986,7 +986,7 @@ vk_format_r8g8b8_snorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8b8_uscaled_description = {
VK_FORMAT_R8G8B8_USCALED,
"VK_FORMAT_R8G8B8_USCALED",
@@ -1030,7 +1030,7 @@ vk_format_r8g8b8_uscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8b8_sscaled_description = {
VK_FORMAT_R8G8B8_SSCALED,
"VK_FORMAT_R8G8B8_SSCALED",
@@ -1074,7 +1074,7 @@ vk_format_r8g8b8_sscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8b8_uint_description = {
VK_FORMAT_R8G8B8_UINT,
"VK_FORMAT_R8G8B8_UINT",
@@ -1118,7 +1118,7 @@ vk_format_r8g8b8_uint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8b8_sint_description = {
VK_FORMAT_R8G8B8_SINT,
"VK_FORMAT_R8G8B8_SINT",
@@ -1162,7 +1162,7 @@ vk_format_r8g8b8_sint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8b8_srgb_description = {
VK_FORMAT_R8G8B8_SRGB,
"VK_FORMAT_R8G8B8_SRGB",
@@ -1206,7 +1206,7 @@ vk_format_r8g8b8_srgb_description = {
VK_FORMAT_COLORSPACE_SRGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_b8g8r8_unorm_description = {
VK_FORMAT_B8G8R8_UNORM,
"VK_FORMAT_B8G8R8_UNORM",
@@ -1250,7 +1250,7 @@ vk_format_b8g8r8_unorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_b8g8r8_snorm_description = {
VK_FORMAT_B8G8R8_SNORM,
"VK_FORMAT_B8G8R8_SNORM",
@@ -1294,7 +1294,7 @@ vk_format_b8g8r8_snorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_b8g8r8_uscaled_description = {
VK_FORMAT_B8G8R8_USCALED,
"VK_FORMAT_B8G8R8_USCALED",
@@ -1338,7 +1338,7 @@ vk_format_b8g8r8_uscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_b8g8r8_sscaled_description = {
VK_FORMAT_B8G8R8_SSCALED,
"VK_FORMAT_B8G8R8_SSCALED",
@@ -1382,7 +1382,7 @@ vk_format_b8g8r8_sscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_b8g8r8_uint_description = {
VK_FORMAT_B8G8R8_UINT,
"VK_FORMAT_B8G8R8_UINT",
@@ -1426,7 +1426,7 @@ vk_format_b8g8r8_uint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_b8g8r8_sint_description = {
VK_FORMAT_B8G8R8_SINT,
"VK_FORMAT_B8G8R8_SINT",
@@ -1470,7 +1470,7 @@ vk_format_b8g8r8_sint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_b8g8r8_srgb_description = {
VK_FORMAT_B8G8R8_SRGB,
"VK_FORMAT_B8G8R8_SRGB",
@@ -1514,7 +1514,7 @@ vk_format_b8g8r8_srgb_description = {
VK_FORMAT_COLORSPACE_SRGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8b8a8_unorm_description = {
VK_FORMAT_R8G8B8A8_UNORM,
"VK_FORMAT_R8G8B8A8_UNORM",
@@ -1558,7 +1558,7 @@ vk_format_r8g8b8a8_unorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8b8a8_snorm_description = {
VK_FORMAT_R8G8B8A8_SNORM,
"VK_FORMAT_R8G8B8A8_SNORM",
@@ -1602,7 +1602,7 @@ vk_format_r8g8b8a8_snorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8b8a8_uscaled_description = {
VK_FORMAT_R8G8B8A8_USCALED,
"VK_FORMAT_R8G8B8A8_USCALED",
@@ -1646,7 +1646,7 @@ vk_format_r8g8b8a8_uscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8b8a8_sscaled_description = {
VK_FORMAT_R8G8B8A8_SSCALED,
"VK_FORMAT_R8G8B8A8_SSCALED",
@@ -1690,7 +1690,7 @@ vk_format_r8g8b8a8_sscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8b8a8_uint_description = {
VK_FORMAT_R8G8B8A8_UINT,
"VK_FORMAT_R8G8B8A8_UINT",
@@ -1734,7 +1734,7 @@ vk_format_r8g8b8a8_uint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8b8a8_sint_description = {
VK_FORMAT_R8G8B8A8_SINT,
"VK_FORMAT_R8G8B8A8_SINT",
@@ -1778,7 +1778,7 @@ vk_format_r8g8b8a8_sint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r8g8b8a8_srgb_description = {
VK_FORMAT_R8G8B8A8_SRGB,
"VK_FORMAT_R8G8B8A8_SRGB",
@@ -1822,7 +1822,7 @@ vk_format_r8g8b8a8_srgb_description = {
VK_FORMAT_COLORSPACE_SRGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_b8g8r8a8_unorm_description = {
VK_FORMAT_B8G8R8A8_UNORM,
"VK_FORMAT_B8G8R8A8_UNORM",
@@ -1866,7 +1866,7 @@ vk_format_b8g8r8a8_unorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_b8g8r8a8_snorm_description = {
VK_FORMAT_B8G8R8A8_SNORM,
"VK_FORMAT_B8G8R8A8_SNORM",
@@ -1910,7 +1910,7 @@ vk_format_b8g8r8a8_snorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_b8g8r8a8_uscaled_description = {
VK_FORMAT_B8G8R8A8_USCALED,
"VK_FORMAT_B8G8R8A8_USCALED",
@@ -1954,7 +1954,7 @@ vk_format_b8g8r8a8_uscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_b8g8r8a8_sscaled_description = {
VK_FORMAT_B8G8R8A8_SSCALED,
"VK_FORMAT_B8G8R8A8_SSCALED",
@@ -1998,7 +1998,7 @@ vk_format_b8g8r8a8_sscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_b8g8r8a8_uint_description = {
VK_FORMAT_B8G8R8A8_UINT,
"VK_FORMAT_B8G8R8A8_UINT",
@@ -2042,7 +2042,7 @@ vk_format_b8g8r8a8_uint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_b8g8r8a8_sint_description = {
VK_FORMAT_B8G8R8A8_SINT,
"VK_FORMAT_B8G8R8A8_SINT",
@@ -2086,7 +2086,7 @@ vk_format_b8g8r8a8_sint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_b8g8r8a8_srgb_description = {
VK_FORMAT_B8G8R8A8_SRGB,
"VK_FORMAT_B8G8R8A8_SRGB",
@@ -2130,7 +2130,7 @@ vk_format_b8g8r8a8_srgb_description = {
VK_FORMAT_COLORSPACE_SRGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a8b8g8r8_unorm_pack32_description = {
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
"VK_FORMAT_A8B8G8R8_UNORM_PACK32",
@@ -2174,7 +2174,7 @@ vk_format_a8b8g8r8_unorm_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a8b8g8r8_snorm_pack32_description = {
VK_FORMAT_A8B8G8R8_SNORM_PACK32,
"VK_FORMAT_A8B8G8R8_SNORM_PACK32",
@@ -2218,7 +2218,7 @@ vk_format_a8b8g8r8_snorm_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a8b8g8r8_uscaled_pack32_description = {
VK_FORMAT_A8B8G8R8_USCALED_PACK32,
"VK_FORMAT_A8B8G8R8_USCALED_PACK32",
@@ -2262,7 +2262,7 @@ vk_format_a8b8g8r8_uscaled_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a8b8g8r8_sscaled_pack32_description = {
VK_FORMAT_A8B8G8R8_SSCALED_PACK32,
"VK_FORMAT_A8B8G8R8_SSCALED_PACK32",
@@ -2306,7 +2306,7 @@ vk_format_a8b8g8r8_sscaled_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a8b8g8r8_uint_pack32_description = {
VK_FORMAT_A8B8G8R8_UINT_PACK32,
"VK_FORMAT_A8B8G8R8_UINT_PACK32",
@@ -2350,7 +2350,7 @@ vk_format_a8b8g8r8_uint_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a8b8g8r8_sint_pack32_description = {
VK_FORMAT_A8B8G8R8_SINT_PACK32,
"VK_FORMAT_A8B8G8R8_SINT_PACK32",
@@ -2394,7 +2394,7 @@ vk_format_a8b8g8r8_sint_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a8b8g8r8_srgb_pack32_description = {
VK_FORMAT_A8B8G8R8_SRGB_PACK32,
"VK_FORMAT_A8B8G8R8_SRGB_PACK32",
@@ -2438,7 +2438,7 @@ vk_format_a8b8g8r8_srgb_pack32_description = {
VK_FORMAT_COLORSPACE_SRGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a2r10g10b10_unorm_pack32_description = {
VK_FORMAT_A2R10G10B10_UNORM_PACK32,
"VK_FORMAT_A2R10G10B10_UNORM_PACK32",
@@ -2482,7 +2482,7 @@ vk_format_a2r10g10b10_unorm_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a2r10g10b10_snorm_pack32_description = {
VK_FORMAT_A2R10G10B10_SNORM_PACK32,
"VK_FORMAT_A2R10G10B10_SNORM_PACK32",
@@ -2526,7 +2526,7 @@ vk_format_a2r10g10b10_snorm_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a2r10g10b10_uscaled_pack32_description = {
VK_FORMAT_A2R10G10B10_USCALED_PACK32,
"VK_FORMAT_A2R10G10B10_USCALED_PACK32",
@@ -2570,7 +2570,7 @@ vk_format_a2r10g10b10_uscaled_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a2r10g10b10_sscaled_pack32_description = {
VK_FORMAT_A2R10G10B10_SSCALED_PACK32,
"VK_FORMAT_A2R10G10B10_SSCALED_PACK32",
@@ -2614,7 +2614,7 @@ vk_format_a2r10g10b10_sscaled_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a2r10g10b10_uint_pack32_description = {
VK_FORMAT_A2R10G10B10_UINT_PACK32,
"VK_FORMAT_A2R10G10B10_UINT_PACK32",
@@ -2658,7 +2658,7 @@ vk_format_a2r10g10b10_uint_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a2r10g10b10_sint_pack32_description = {
VK_FORMAT_A2R10G10B10_SINT_PACK32,
"VK_FORMAT_A2R10G10B10_SINT_PACK32",
@@ -2702,7 +2702,7 @@ vk_format_a2r10g10b10_sint_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a2b10g10r10_unorm_pack32_description = {
VK_FORMAT_A2B10G10R10_UNORM_PACK32,
"VK_FORMAT_A2B10G10R10_UNORM_PACK32",
@@ -2746,7 +2746,7 @@ vk_format_a2b10g10r10_unorm_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a2b10g10r10_snorm_pack32_description = {
VK_FORMAT_A2B10G10R10_SNORM_PACK32,
"VK_FORMAT_A2B10G10R10_SNORM_PACK32",
@@ -2790,7 +2790,7 @@ vk_format_a2b10g10r10_snorm_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a2b10g10r10_uscaled_pack32_description = {
VK_FORMAT_A2B10G10R10_USCALED_PACK32,
"VK_FORMAT_A2B10G10R10_USCALED_PACK32",
@@ -2834,7 +2834,7 @@ vk_format_a2b10g10r10_uscaled_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a2b10g10r10_sscaled_pack32_description = {
VK_FORMAT_A2B10G10R10_SSCALED_PACK32,
"VK_FORMAT_A2B10G10R10_SSCALED_PACK32",
@@ -2878,7 +2878,7 @@ vk_format_a2b10g10r10_sscaled_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a2b10g10r10_uint_pack32_description = {
VK_FORMAT_A2B10G10R10_UINT_PACK32,
"VK_FORMAT_A2B10G10R10_UINT_PACK32",
@@ -2922,7 +2922,7 @@ vk_format_a2b10g10r10_uint_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_a2b10g10r10_sint_pack32_description = {
VK_FORMAT_A2B10G10R10_SINT_PACK32,
"VK_FORMAT_A2B10G10R10_SINT_PACK32",
@@ -2966,7 +2966,7 @@ vk_format_a2b10g10r10_sint_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16_unorm_description = {
VK_FORMAT_R16_UNORM,
"VK_FORMAT_R16_UNORM",
@@ -2992,7 +2992,7 @@ vk_format_r16_unorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16_snorm_description = {
VK_FORMAT_R16_SNORM,
"VK_FORMAT_R16_SNORM",
@@ -3018,7 +3018,7 @@ vk_format_r16_snorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16_uscaled_description = {
VK_FORMAT_R16_USCALED,
"VK_FORMAT_R16_USCALED",
@@ -3044,7 +3044,7 @@ vk_format_r16_uscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16_sscaled_description = {
VK_FORMAT_R16_SSCALED,
"VK_FORMAT_R16_SSCALED",
@@ -3070,7 +3070,7 @@ vk_format_r16_sscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16_uint_description = {
VK_FORMAT_R16_UINT,
"VK_FORMAT_R16_UINT",
@@ -3096,7 +3096,7 @@ vk_format_r16_uint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16_sint_description = {
VK_FORMAT_R16_SINT,
"VK_FORMAT_R16_SINT",
@@ -3122,7 +3122,7 @@ vk_format_r16_sint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16_sfloat_description = {
VK_FORMAT_R16_SFLOAT,
"VK_FORMAT_R16_SFLOAT",
@@ -3148,7 +3148,7 @@ vk_format_r16_sfloat_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16_unorm_description = {
VK_FORMAT_R16G16_UNORM,
"VK_FORMAT_R16G16_UNORM",
@@ -3192,7 +3192,7 @@ vk_format_r16g16_unorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16_snorm_description = {
VK_FORMAT_R16G16_SNORM,
"VK_FORMAT_R16G16_SNORM",
@@ -3236,7 +3236,7 @@ vk_format_r16g16_snorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16_uscaled_description = {
VK_FORMAT_R16G16_USCALED,
"VK_FORMAT_R16G16_USCALED",
@@ -3280,7 +3280,7 @@ vk_format_r16g16_uscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16_sscaled_description = {
VK_FORMAT_R16G16_SSCALED,
"VK_FORMAT_R16G16_SSCALED",
@@ -3324,7 +3324,7 @@ vk_format_r16g16_sscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16_uint_description = {
VK_FORMAT_R16G16_UINT,
"VK_FORMAT_R16G16_UINT",
@@ -3368,7 +3368,7 @@ vk_format_r16g16_uint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16_sint_description = {
VK_FORMAT_R16G16_SINT,
"VK_FORMAT_R16G16_SINT",
@@ -3412,7 +3412,7 @@ vk_format_r16g16_sint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16_sfloat_description = {
VK_FORMAT_R16G16_SFLOAT,
"VK_FORMAT_R16G16_SFLOAT",
@@ -3456,7 +3456,7 @@ vk_format_r16g16_sfloat_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16b16_unorm_description = {
VK_FORMAT_R16G16B16_UNORM,
"VK_FORMAT_R16G16B16_UNORM",
@@ -3500,7 +3500,7 @@ vk_format_r16g16b16_unorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16b16_snorm_description = {
VK_FORMAT_R16G16B16_SNORM,
"VK_FORMAT_R16G16B16_SNORM",
@@ -3544,7 +3544,7 @@ vk_format_r16g16b16_snorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16b16_uscaled_description = {
VK_FORMAT_R16G16B16_USCALED,
"VK_FORMAT_R16G16B16_USCALED",
@@ -3588,7 +3588,7 @@ vk_format_r16g16b16_uscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16b16_sscaled_description = {
VK_FORMAT_R16G16B16_SSCALED,
"VK_FORMAT_R16G16B16_SSCALED",
@@ -3632,7 +3632,7 @@ vk_format_r16g16b16_sscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16b16_uint_description = {
VK_FORMAT_R16G16B16_UINT,
"VK_FORMAT_R16G16B16_UINT",
@@ -3676,7 +3676,7 @@ vk_format_r16g16b16_uint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16b16_sint_description = {
VK_FORMAT_R16G16B16_SINT,
"VK_FORMAT_R16G16B16_SINT",
@@ -3720,7 +3720,7 @@ vk_format_r16g16b16_sint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16b16_sfloat_description = {
VK_FORMAT_R16G16B16_SFLOAT,
"VK_FORMAT_R16G16B16_SFLOAT",
@@ -3764,7 +3764,7 @@ vk_format_r16g16b16_sfloat_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16b16a16_unorm_description = {
VK_FORMAT_R16G16B16A16_UNORM,
"VK_FORMAT_R16G16B16A16_UNORM",
@@ -3808,7 +3808,7 @@ vk_format_r16g16b16a16_unorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16b16a16_snorm_description = {
VK_FORMAT_R16G16B16A16_SNORM,
"VK_FORMAT_R16G16B16A16_SNORM",
@@ -3852,7 +3852,7 @@ vk_format_r16g16b16a16_snorm_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16b16a16_uscaled_description = {
VK_FORMAT_R16G16B16A16_USCALED,
"VK_FORMAT_R16G16B16A16_USCALED",
@@ -3896,7 +3896,7 @@ vk_format_r16g16b16a16_uscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16b16a16_sscaled_description = {
VK_FORMAT_R16G16B16A16_SSCALED,
"VK_FORMAT_R16G16B16A16_SSCALED",
@@ -3940,7 +3940,7 @@ vk_format_r16g16b16a16_sscaled_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16b16a16_uint_description = {
VK_FORMAT_R16G16B16A16_UINT,
"VK_FORMAT_R16G16B16A16_UINT",
@@ -3984,7 +3984,7 @@ vk_format_r16g16b16a16_uint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16b16a16_sint_description = {
VK_FORMAT_R16G16B16A16_SINT,
"VK_FORMAT_R16G16B16A16_SINT",
@@ -4028,7 +4028,7 @@ vk_format_r16g16b16a16_sint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r16g16b16a16_sfloat_description = {
VK_FORMAT_R16G16B16A16_SFLOAT,
"VK_FORMAT_R16G16B16A16_SFLOAT",
@@ -4072,7 +4072,7 @@ vk_format_r16g16b16a16_sfloat_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r32_uint_description = {
VK_FORMAT_R32_UINT,
"VK_FORMAT_R32_UINT",
@@ -4098,7 +4098,7 @@ vk_format_r32_uint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r32_sint_description = {
VK_FORMAT_R32_SINT,
"VK_FORMAT_R32_SINT",
@@ -4124,7 +4124,7 @@ vk_format_r32_sint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r32_sfloat_description = {
VK_FORMAT_R32_SFLOAT,
"VK_FORMAT_R32_SFLOAT",
@@ -4150,7 +4150,7 @@ vk_format_r32_sfloat_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r32g32_uint_description = {
VK_FORMAT_R32G32_UINT,
"VK_FORMAT_R32G32_UINT",
@@ -4194,7 +4194,7 @@ vk_format_r32g32_uint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r32g32_sint_description = {
VK_FORMAT_R32G32_SINT,
"VK_FORMAT_R32G32_SINT",
@@ -4238,7 +4238,7 @@ vk_format_r32g32_sint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r32g32_sfloat_description = {
VK_FORMAT_R32G32_SFLOAT,
"VK_FORMAT_R32G32_SFLOAT",
@@ -4282,7 +4282,7 @@ vk_format_r32g32_sfloat_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r32g32b32_uint_description = {
VK_FORMAT_R32G32B32_UINT,
"VK_FORMAT_R32G32B32_UINT",
@@ -4326,7 +4326,7 @@ vk_format_r32g32b32_uint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r32g32b32_sint_description = {
VK_FORMAT_R32G32B32_SINT,
"VK_FORMAT_R32G32B32_SINT",
@@ -4370,7 +4370,7 @@ vk_format_r32g32b32_sint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r32g32b32_sfloat_description = {
VK_FORMAT_R32G32B32_SFLOAT,
"VK_FORMAT_R32G32B32_SFLOAT",
@@ -4414,7 +4414,7 @@ vk_format_r32g32b32_sfloat_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r32g32b32a32_uint_description = {
VK_FORMAT_R32G32B32A32_UINT,
"VK_FORMAT_R32G32B32A32_UINT",
@@ -4458,7 +4458,7 @@ vk_format_r32g32b32a32_uint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r32g32b32a32_sint_description = {
VK_FORMAT_R32G32B32A32_SINT,
"VK_FORMAT_R32G32B32A32_SINT",
@@ -4502,7 +4502,7 @@ vk_format_r32g32b32a32_sint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r32g32b32a32_sfloat_description = {
VK_FORMAT_R32G32B32A32_SFLOAT,
"VK_FORMAT_R32G32B32A32_SFLOAT",
@@ -4546,7 +4546,7 @@ vk_format_r32g32b32a32_sfloat_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r64_uint_description = {
VK_FORMAT_R64_UINT,
"VK_FORMAT_R64_UINT",
@@ -4572,7 +4572,7 @@ vk_format_r64_uint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r64_sint_description = {
VK_FORMAT_R64_SINT,
"VK_FORMAT_R64_SINT",
@@ -4598,7 +4598,7 @@ vk_format_r64_sint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r64_sfloat_description = {
VK_FORMAT_R64_SFLOAT,
"VK_FORMAT_R64_SFLOAT",
@@ -4624,7 +4624,7 @@ vk_format_r64_sfloat_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r64g64_uint_description = {
VK_FORMAT_R64G64_UINT,
"VK_FORMAT_R64G64_UINT",
@@ -4668,7 +4668,7 @@ vk_format_r64g64_uint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r64g64_sint_description = {
VK_FORMAT_R64G64_SINT,
"VK_FORMAT_R64G64_SINT",
@@ -4712,7 +4712,7 @@ vk_format_r64g64_sint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r64g64_sfloat_description = {
VK_FORMAT_R64G64_SFLOAT,
"VK_FORMAT_R64G64_SFLOAT",
@@ -4756,7 +4756,7 @@ vk_format_r64g64_sfloat_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r64g64b64_uint_description = {
VK_FORMAT_R64G64B64_UINT,
"VK_FORMAT_R64G64B64_UINT",
@@ -4800,7 +4800,7 @@ vk_format_r64g64b64_uint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r64g64b64_sint_description = {
VK_FORMAT_R64G64B64_SINT,
"VK_FORMAT_R64G64B64_SINT",
@@ -4844,7 +4844,7 @@ vk_format_r64g64b64_sint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r64g64b64_sfloat_description = {
VK_FORMAT_R64G64B64_SFLOAT,
"VK_FORMAT_R64G64B64_SFLOAT",
@@ -4888,7 +4888,7 @@ vk_format_r64g64b64_sfloat_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r64g64b64a64_uint_description = {
VK_FORMAT_R64G64B64A64_UINT,
"VK_FORMAT_R64G64B64A64_UINT",
@@ -4932,7 +4932,7 @@ vk_format_r64g64b64a64_uint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r64g64b64a64_sint_description = {
VK_FORMAT_R64G64B64A64_SINT,
"VK_FORMAT_R64G64B64A64_SINT",
@@ -4976,7 +4976,7 @@ vk_format_r64g64b64a64_sint_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_r64g64b64a64_sfloat_description = {
VK_FORMAT_R64G64B64A64_SFLOAT,
"VK_FORMAT_R64G64B64A64_SFLOAT",
@@ -5020,7 +5020,7 @@ vk_format_r64g64b64a64_sfloat_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_b10g11r11_ufloat_pack32_description = {
VK_FORMAT_B10G11R11_UFLOAT_PACK32,
"VK_FORMAT_B10G11R11_UFLOAT_PACK32",
@@ -5046,7 +5046,7 @@ vk_format_b10g11r11_ufloat_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_e5b9g9r9_ufloat_pack32_description = {
VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
"VK_FORMAT_E5B9G9R9_UFLOAT_PACK32",
@@ -5072,7 +5072,7 @@ vk_format_e5b9g9r9_ufloat_pack32_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_d16_unorm_description = {
VK_FORMAT_D16_UNORM,
"VK_FORMAT_D16_UNORM",
@@ -5098,7 +5098,7 @@ vk_format_d16_unorm_description = {
VK_FORMAT_COLORSPACE_ZS,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_x8_d24_unorm_pack32_description = {
VK_FORMAT_X8_D24_UNORM_PACK32,
"VK_FORMAT_X8_D24_UNORM_PACK32",
@@ -5142,7 +5142,7 @@ vk_format_x8_d24_unorm_pack32_description = {
VK_FORMAT_COLORSPACE_ZS,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_d32_sfloat_description = {
VK_FORMAT_D32_SFLOAT,
"VK_FORMAT_D32_SFLOAT",
@@ -5168,7 +5168,7 @@ vk_format_d32_sfloat_description = {
VK_FORMAT_COLORSPACE_ZS,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_s8_uint_description = {
VK_FORMAT_S8_UINT,
"VK_FORMAT_S8_UINT",
@@ -5194,7 +5194,7 @@ vk_format_s8_uint_description = {
VK_FORMAT_COLORSPACE_ZS,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_d16_unorm_s8_uint_description = {
VK_FORMAT_D16_UNORM_S8_UINT,
"VK_FORMAT_D16_UNORM_S8_UINT",
@@ -5238,7 +5238,7 @@ vk_format_d16_unorm_s8_uint_description = {
VK_FORMAT_COLORSPACE_ZS,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_d24_unorm_s8_uint_description = {
VK_FORMAT_D24_UNORM_S8_UINT,
"VK_FORMAT_D24_UNORM_S8_UINT",
@@ -5282,7 +5282,7 @@ vk_format_d24_unorm_s8_uint_description = {
VK_FORMAT_COLORSPACE_ZS,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_d32_sfloat_s8_uint_description = {
VK_FORMAT_D32_SFLOAT_S8_UINT,
"VK_FORMAT_D32_SFLOAT_S8_UINT",
@@ -5326,7 +5326,7 @@ vk_format_d32_sfloat_s8_uint_description = {
VK_FORMAT_COLORSPACE_ZS,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_bc1_rgb_unorm_block_description = {
VK_FORMAT_BC1_RGB_UNORM_BLOCK,
"VK_FORMAT_BC1_RGB_UNORM_BLOCK",
@@ -5352,7 +5352,7 @@ vk_format_bc1_rgb_unorm_block_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_bc1_rgb_srgb_block_description = {
VK_FORMAT_BC1_RGB_SRGB_BLOCK,
"VK_FORMAT_BC1_RGB_SRGB_BLOCK",
@@ -5378,7 +5378,7 @@ vk_format_bc1_rgb_srgb_block_description = {
VK_FORMAT_COLORSPACE_SRGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_bc1_rgba_unorm_block_description = {
VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
"VK_FORMAT_BC1_RGBA_UNORM_BLOCK",
@@ -5404,7 +5404,7 @@ vk_format_bc1_rgba_unorm_block_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_bc1_rgba_srgb_block_description = {
VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
"VK_FORMAT_BC1_RGBA_SRGB_BLOCK",
@@ -5430,7 +5430,7 @@ vk_format_bc1_rgba_srgb_block_description = {
VK_FORMAT_COLORSPACE_SRGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_bc2_unorm_block_description = {
VK_FORMAT_BC2_UNORM_BLOCK,
"VK_FORMAT_BC2_UNORM_BLOCK",
@@ -5456,7 +5456,7 @@ vk_format_bc2_unorm_block_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_bc2_srgb_block_description = {
VK_FORMAT_BC2_SRGB_BLOCK,
"VK_FORMAT_BC2_SRGB_BLOCK",
@@ -5482,7 +5482,7 @@ vk_format_bc2_srgb_block_description = {
VK_FORMAT_COLORSPACE_SRGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_bc3_unorm_block_description = {
VK_FORMAT_BC3_UNORM_BLOCK,
"VK_FORMAT_BC3_UNORM_BLOCK",
@@ -5508,7 +5508,7 @@ vk_format_bc3_unorm_block_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_bc3_srgb_block_description = {
VK_FORMAT_BC3_SRGB_BLOCK,
"VK_FORMAT_BC3_SRGB_BLOCK",
@@ -5534,7 +5534,7 @@ vk_format_bc3_srgb_block_description = {
VK_FORMAT_COLORSPACE_SRGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_bc4_unorm_block_description = {
VK_FORMAT_BC4_UNORM_BLOCK,
"VK_FORMAT_BC4_UNORM_BLOCK",
@@ -5560,7 +5560,7 @@ vk_format_bc4_unorm_block_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_bc4_snorm_block_description = {
VK_FORMAT_BC4_SNORM_BLOCK,
"VK_FORMAT_BC4_SNORM_BLOCK",
@@ -5586,7 +5586,7 @@ vk_format_bc4_snorm_block_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_bc5_unorm_block_description = {
VK_FORMAT_BC5_UNORM_BLOCK,
"VK_FORMAT_BC5_UNORM_BLOCK",
@@ -5612,7 +5612,7 @@ vk_format_bc5_unorm_block_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_bc5_snorm_block_description = {
VK_FORMAT_BC5_SNORM_BLOCK,
"VK_FORMAT_BC5_SNORM_BLOCK",
@@ -5638,7 +5638,7 @@ vk_format_bc5_snorm_block_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_bc6h_ufloat_block_description = {
VK_FORMAT_BC6H_UFLOAT_BLOCK,
"VK_FORMAT_BC6H_UFLOAT_BLOCK",
@@ -5664,7 +5664,7 @@ vk_format_bc6h_ufloat_block_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_bc6h_sfloat_block_description = {
VK_FORMAT_BC6H_SFLOAT_BLOCK,
"VK_FORMAT_BC6H_SFLOAT_BLOCK",
@@ -5690,7 +5690,7 @@ vk_format_bc6h_sfloat_block_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_bc7_unorm_block_description = {
VK_FORMAT_BC7_UNORM_BLOCK,
"VK_FORMAT_BC7_UNORM_BLOCK",
@@ -5716,7 +5716,7 @@ vk_format_bc7_unorm_block_description = {
VK_FORMAT_COLORSPACE_RGB,
};
-const struct vk_format_description
+static const struct vk_format_description
vk_format_bc7_srgb_block_description = {
VK_FORMAT_BC7_SRGB_BLOCK,
"VK_FORMAT_BC7_SRGB_BLOCK",
@@ -5742,6 +5742,266 @@ vk_format_bc7_srgb_block_description = {
VK_FORMAT_COLORSPACE_SRGB,
};
+static const struct vk_format_description
+vk_format_etc2_r8g8b8_unorm_block_description = {
+ VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK,
+ "VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK",
+ "etc2_r8g8b8_unorm_block",
+ {4, 4, 64}, /* block */
+ VK_FORMAT_LAYOUT_ETC,
+ 1, /* nr_channels */
+ false, /* is_array */
+ false, /* is_bitmask */
+ false, /* is_mixed */
+ {
+ {VK_FORMAT_TYPE_VOID, false, false, false, 64, 0}, /* x = x */
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0}
+ },
+ {
+ VK_SWIZZLE_X, /* r */
+ VK_SWIZZLE_Y, /* g */
+ VK_SWIZZLE_Z, /* b */
+ VK_SWIZZLE_1 /* a */
+ },
+ VK_FORMAT_COLORSPACE_RGB,
+};
+
+static const struct vk_format_description
+vk_format_etc2_r8g8b8_srgb_block_description = {
+ VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK,
+ "VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK",
+ "etc2_r8g8b8_srgb_block",
+ {4, 4, 64}, /* block */
+ VK_FORMAT_LAYOUT_ETC,
+ 1, /* nr_channels */
+ false, /* is_array */
+ false, /* is_bitmask */
+ false, /* is_mixed */
+ {
+ {VK_FORMAT_TYPE_VOID, false, false, false, 64, 0}, /* x = x */
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0}
+ },
+ {
+ VK_SWIZZLE_X, /* sr */
+ VK_SWIZZLE_Y, /* sg */
+ VK_SWIZZLE_Z, /* sb */
+ VK_SWIZZLE_1 /* a */
+ },
+ VK_FORMAT_COLORSPACE_SRGB,
+};
+
+static const struct vk_format_description
+vk_format_etc2_r8g8b8a1_unorm_block_description = {
+ VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK,
+ "VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK",
+ "etc2_r8g8b8a1_unorm_block",
+ {4, 4, 64}, /* block */
+ VK_FORMAT_LAYOUT_ETC,
+ 1, /* nr_channels */
+ false, /* is_array */
+ false, /* is_bitmask */
+ false, /* is_mixed */
+ {
+ {VK_FORMAT_TYPE_VOID, false, false, false, 64, 0}, /* x = x */
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0}
+ },
+ {
+ VK_SWIZZLE_X, /* r */
+ VK_SWIZZLE_Y, /* g */
+ VK_SWIZZLE_Z, /* b */
+ VK_SWIZZLE_W /* a */
+ },
+ VK_FORMAT_COLORSPACE_RGB,
+};
+
+static const struct vk_format_description
+vk_format_etc2_r8g8b8a1_srgb_block_description = {
+ VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK,
+ "VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK",
+ "etc2_r8g8b8a1_srgb_block",
+ {4, 4, 64}, /* block */
+ VK_FORMAT_LAYOUT_ETC,
+ 1, /* nr_channels */
+ false, /* is_array */
+ false, /* is_bitmask */
+ false, /* is_mixed */
+ {
+ {VK_FORMAT_TYPE_VOID, false, false, false, 64, 0}, /* x = x */
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0}
+ },
+ {
+ VK_SWIZZLE_X, /* sr */
+ VK_SWIZZLE_Y, /* sg */
+ VK_SWIZZLE_Z, /* sb */
+ VK_SWIZZLE_W /* a */
+ },
+ VK_FORMAT_COLORSPACE_SRGB,
+};
+
+static const struct vk_format_description
+vk_format_etc2_r8g8b8a8_unorm_block_description = {
+ VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK,
+ "VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK",
+ "etc2_r8g8b8a8_unorm_block",
+ {4, 4, 128}, /* block */
+ VK_FORMAT_LAYOUT_ETC,
+ 1, /* nr_channels */
+ false, /* is_array */
+ false, /* is_bitmask */
+ false, /* is_mixed */
+ {
+ {VK_FORMAT_TYPE_VOID, false, false, false, 128, 0}, /* x = x */
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0}
+ },
+ {
+ VK_SWIZZLE_X, /* r */
+ VK_SWIZZLE_Y, /* g */
+ VK_SWIZZLE_Z, /* b */
+ VK_SWIZZLE_W /* a */
+ },
+ VK_FORMAT_COLORSPACE_RGB,
+};
+
+static const struct vk_format_description
+vk_format_etc2_r8g8b8a8_srgb_block_description = {
+ VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK,
+ "VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK",
+ "etc2_r8g8b8a8_srgb_block",
+ {4, 4, 128}, /* block */
+ VK_FORMAT_LAYOUT_ETC,
+ 1, /* nr_channels */
+ false, /* is_array */
+ false, /* is_bitmask */
+ false, /* is_mixed */
+ {
+ {VK_FORMAT_TYPE_VOID, false, false, false, 128, 0}, /* x = x */
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0}
+ },
+ {
+ VK_SWIZZLE_X, /* sr */
+ VK_SWIZZLE_Y, /* sg */
+ VK_SWIZZLE_Z, /* sb */
+ VK_SWIZZLE_W /* a */
+ },
+ VK_FORMAT_COLORSPACE_SRGB,
+};
+
+static const struct vk_format_description
+vk_format_eac_r11_unorm_block_description = {
+ VK_FORMAT_EAC_R11_UNORM_BLOCK,
+ "VK_FORMAT_EAC_R11_UNORM_BLOCK",
+ "eac_r11_unorm_block",
+ {4, 4, 64}, /* block */
+ VK_FORMAT_LAYOUT_ETC,
+ 1, /* nr_channels */
+ false, /* is_array */
+ false, /* is_bitmask */
+ false, /* is_mixed */
+ {
+ {VK_FORMAT_TYPE_VOID, false, false, false, 64, 0}, /* x = x */
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0}
+ },
+ {
+ VK_SWIZZLE_X, /* r */
+ VK_SWIZZLE_0, /* g */
+ VK_SWIZZLE_0, /* b */
+ VK_SWIZZLE_1 /* a */
+ },
+ VK_FORMAT_COLORSPACE_RGB,
+};
+
+static const struct vk_format_description
+vk_format_eac_r11_snorm_block_description = {
+ VK_FORMAT_EAC_R11_SNORM_BLOCK,
+ "VK_FORMAT_EAC_R11_SNORM_BLOCK",
+ "eac_r11_snorm_block",
+ {4, 4, 64}, /* block */
+ VK_FORMAT_LAYOUT_ETC,
+ 1, /* nr_channels */
+ false, /* is_array */
+ false, /* is_bitmask */
+ false, /* is_mixed */
+ {
+ {VK_FORMAT_TYPE_VOID, false, false, false, 64, 0}, /* x = x */
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0}
+ },
+ {
+ VK_SWIZZLE_X, /* r */
+ VK_SWIZZLE_0, /* g */
+ VK_SWIZZLE_0, /* b */
+ VK_SWIZZLE_1 /* a */
+ },
+ VK_FORMAT_COLORSPACE_RGB,
+};
+
+static const struct vk_format_description
+vk_format_eac_r11g11_unorm_block_description = {
+ VK_FORMAT_EAC_R11G11_UNORM_BLOCK,
+ "VK_FORMAT_EAC_R11G11_UNORM_BLOCK",
+ "eac_r11g11_unorm_block",
+ {4, 4, 128}, /* block */
+ VK_FORMAT_LAYOUT_ETC,
+ 1, /* nr_channels */
+ false, /* is_array */
+ false, /* is_bitmask */
+ false, /* is_mixed */
+ {
+ {VK_FORMAT_TYPE_VOID, false, false, false, 128, 0}, /* x = x */
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0}
+ },
+ {
+ VK_SWIZZLE_X, /* r */
+ VK_SWIZZLE_Y, /* g */
+ VK_SWIZZLE_0, /* b */
+ VK_SWIZZLE_1 /* a */
+ },
+ VK_FORMAT_COLORSPACE_RGB,
+};
+
+static const struct vk_format_description
+vk_format_eac_r11g11_snorm_block_description = {
+ VK_FORMAT_EAC_R11G11_SNORM_BLOCK,
+ "VK_FORMAT_EAC_R11G11_SNORM_BLOCK",
+ "eac_r11g11_snorm_block",
+ {4, 4, 128}, /* block */
+ VK_FORMAT_LAYOUT_ETC,
+ 1, /* nr_channels */
+ false, /* is_array */
+ false, /* is_bitmask */
+ false, /* is_mixed */
+ {
+ {VK_FORMAT_TYPE_VOID, false, false, false, 128, 0}, /* x = x */
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0}
+ },
+ {
+ VK_SWIZZLE_X, /* r */
+ VK_SWIZZLE_Y, /* g */
+ VK_SWIZZLE_0, /* b */
+ VK_SWIZZLE_1 /* a */
+ },
+ VK_FORMAT_COLORSPACE_RGB,
+};
+
const struct vk_format_description *
vk_format_description(VkFormat format)
{
@@ -6044,6 +6304,26 @@ vk_format_description(VkFormat format)
return &vk_format_bc7_unorm_block_description;
case VK_FORMAT_BC7_SRGB_BLOCK:
return &vk_format_bc7_srgb_block_description;
+ case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
+ return &vk_format_etc2_r8g8b8_unorm_block_description;
+ case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
+ return &vk_format_etc2_r8g8b8_srgb_block_description;
+ case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
+ return &vk_format_etc2_r8g8b8a1_unorm_block_description;
+ case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
+ return &vk_format_etc2_r8g8b8a1_srgb_block_description;
+ case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
+ return &vk_format_etc2_r8g8b8a8_unorm_block_description;
+ case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
+ return &vk_format_etc2_r8g8b8a8_srgb_block_description;
+ case VK_FORMAT_EAC_R11_UNORM_BLOCK:
+ return &vk_format_eac_r11_unorm_block_description;
+ case VK_FORMAT_EAC_R11_SNORM_BLOCK:
+ return &vk_format_eac_r11_snorm_block_description;
+ case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
+ return &vk_format_eac_r11g11_unorm_block_description;
+ case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
+ return &vk_format_eac_r11g11_snorm_block_description;
default:
return NULL;
}