diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2019-01-29 11:08:07 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2019-01-29 11:08:07 +0000 |
commit | 6b139c2063623e9310025247cd966490b9aa57ea (patch) | |
tree | 375acfd898ca3d721250aa17291bbb90a8d7250a /lib/mesa/src/amd | |
parent | cce99579dcfb1d54c54cff65573be3430e77f2c5 (diff) |
Import Mesa 18.3.2
Diffstat (limited to 'lib/mesa/src/amd')
25 files changed, 8095 insertions, 845 deletions
diff --git a/lib/mesa/src/amd/addrlib/amdgpu_asic_addr.h b/lib/mesa/src/amd/addrlib/amdgpu_asic_addr.h new file mode 100644 index 000000000..7436c5493 --- /dev/null +++ b/lib/mesa/src/amd/addrlib/amdgpu_asic_addr.h @@ -0,0 +1,138 @@ +/* + * Copyright © 2017 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ + +#ifndef _AMDGPU_ASIC_ADDR_H +#define _AMDGPU_ASIC_ADDR_H + +#define ATI_VENDOR_ID 0x1002 +#define AMD_VENDOR_ID 0x1022 + +// AMDGPU_VENDOR_IS_AMD(vendorId) +#define AMDGPU_VENDOR_IS_AMD(v) ((v == ATI_VENDOR_ID) || (v == AMD_VENDOR_ID)) + +#define FAMILY_UNKNOWN 0x00 +#define FAMILY_TN 0x69 +#define FAMILY_SI 0x6E +#define FAMILY_CI 0x78 +#define FAMILY_KV 0x7D +#define FAMILY_VI 0x82 +#define FAMILY_POLARIS 0x82 +#define FAMILY_CZ 0x87 +#define FAMILY_AI 0x8D +#define FAMILY_RV 0x8E + +// AMDGPU_FAMILY_IS(familyId, familyName) +#define FAMILY_IS(f, fn) (f == FAMILY_##fn) +#define FAMILY_IS_TN(f) FAMILY_IS(f, TN) +#define FAMILY_IS_SI(f) FAMILY_IS(f, SI) +#define FAMILY_IS_CI(f) FAMILY_IS(f, CI) +#define FAMILY_IS_KV(f) FAMILY_IS(f, KV) +#define FAMILY_IS_VI(f) FAMILY_IS(f, VI) +#define FAMILY_IS_POLARIS(f) FAMILY_IS(f, POLARIS) +#define FAMILY_IS_CZ(f) FAMILY_IS(f, CZ) +#define FAMILY_IS_AI(f) FAMILY_IS(f, AI) +#define FAMILY_IS_RV(f) FAMILY_IS(f, RV) + +#define AMDGPU_UNKNOWN 0xFF + +#define AMDGPU_TAHITI_RANGE 0x05, 0x14 +#define AMDGPU_PITCAIRN_RANGE 0x15, 0x28 +#define AMDGPU_CAPEVERDE_RANGE 0x29, 0x3C +#define AMDGPU_OLAND_RANGE 0x3C, 0x46 +#define AMDGPU_HAINAN_RANGE 0x46, 0xFF + +#define AMDGPU_BONAIRE_RANGE 0x14, 0x28 +#define AMDGPU_HAWAII_RANGE 0x28, 0x3C + +#define AMDGPU_SPECTRE_RANGE 0x01, 0x41 +#define AMDGPU_SPOOKY_RANGE 0x41, 0x81 +#define AMDGPU_KALINDI_RANGE 0x81, 0xA1 +#define AMDGPU_GODAVARI_RANGE 0xA1, 0xFF + +#define AMDGPU_ICELAND_RANGE 0x01, 0x14 +#define AMDGPU_TONGA_RANGE 0x14, 0x28 +#define AMDGPU_FIJI_RANGE 0x3C, 0x50 + +#define AMDGPU_POLARIS10_RANGE 0x50, 0x5A +#define AMDGPU_POLARIS11_RANGE 0x5A, 0x64 +#define AMDGPU_POLARIS12_RANGE 0x64, 0x6E +#define AMDGPU_VEGAM_RANGE 0x6E, 0xFF + +#define AMDGPU_CARRIZO_RANGE 0x01, 0x21 +#define AMDGPU_BRISTOL_RANGE 0x10, 0x21 +#define AMDGPU_STONEY_RANGE 0x61, 0xFF + +#define AMDGPU_VEGA10_RANGE 0x01, 0x14 +#define AMDGPU_VEGA12_RANGE 0x14, 0x28 +#define AMDGPU_VEGA20_RANGE 0x28, 0xFF + +#define AMDGPU_RAVEN_RANGE 0x01, 0x81 +#define AMDGPU_RAVEN2_RANGE 0x81, 0xFF + +#define AMDGPU_EXPAND_FIX(x) x +#define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max)) +#define AMDGPU_IN_RANGE(val, ...) AMDGPU_EXPAND_FIX(AMDGPU_RANGE_HELPER(val, __VA_ARGS__)) + + +// ASICREV_IS(eRevisionId, revisionName) +#define ASICREV_IS(r, rn) AMDGPU_IN_RANGE(r, AMDGPU_##rn##_RANGE) +#define ASICREV_IS_TAHITI_P(r) ASICREV_IS(r, TAHITI) +#define ASICREV_IS_PITCAIRN_PM(r) ASICREV_IS(r, PITCAIRN) +#define ASICREV_IS_CAPEVERDE_M(r) ASICREV_IS(r, CAPEVERDE) +#define ASICREV_IS_OLAND_M(r) ASICREV_IS(r, OLAND) +#define ASICREV_IS_HAINAN_V(r) ASICREV_IS(r, HAINAN) + +#define ASICREV_IS_BONAIRE_M(r) ASICREV_IS(r, BONAIRE) +#define ASICREV_IS_HAWAII_P(r) ASICREV_IS(r, HAWAII) + +#define ASICREV_IS_SPECTRE(r) ASICREV_IS(r, SPECTRE) +#define ASICREV_IS_SPOOKY(r) ASICREV_IS(r, SPOOKY) +#define ASICREV_IS_KALINDI(r) ASICREV_IS(r, KALINDI) +#define ASICREV_IS_KALINDI_GODAVARI(r) ASICREV_IS(r, GODAVARI) + +#define ASICREV_IS_ICELAND_M(r) ASICREV_IS(r, ICELAND) +#define ASICREV_IS_TONGA_P(r) ASICREV_IS(r, TONGA) +#define ASICREV_IS_FIJI_P(r) ASICREV_IS(r, FIJI) + +#define ASICREV_IS_POLARIS10_P(r) ASICREV_IS(r, POLARIS10) +#define ASICREV_IS_POLARIS11_M(r) ASICREV_IS(r, POLARIS11) +#define ASICREV_IS_POLARIS12_V(r) ASICREV_IS(r, POLARIS12) +#define ASICREV_IS_VEGAM_P(r) ASICREV_IS(r, VEGAM) + +#define ASICREV_IS_CARRIZO(r) ASICREV_IS(r, CARRIZO) +#define ASICREV_IS_CARRIZO_BRISTOL(r) ASICREV_IS(r, BRISTOL) +#define ASICREV_IS_STONEY(r) ASICREV_IS(r, STONEY) + +#define ASICREV_IS_VEGA10_M(r) ASICREV_IS(r, VEGA10) +#define ASICREV_IS_VEGA10_P(r) ASICREV_IS(r, VEGA10) +#define ASICREV_IS_VEGA12_P(r) ASICREV_IS(r, VEGA12) +#define ASICREV_IS_VEGA12_p(r) ASICREV_IS(r, VEGA12) +#define ASICREV_IS_VEGA20_P(r) ASICREV_IS(r, VEGA20) + +#define ASICREV_IS_RAVEN(r) ASICREV_IS(r, RAVEN) +#define ASICREV_IS_RAVEN2(r) ASICREV_IS(r, RAVEN2) + +#endif // _AMDGPU_ASIC_ADDR_H diff --git a/lib/mesa/src/amd/addrlib/inc/chip/r800/si_gb_reg.h b/lib/mesa/src/amd/addrlib/inc/chip/r800/si_gb_reg.h index cf67f602b..793edbc62 100644 --- a/lib/mesa/src/amd/addrlib/inc/chip/r800/si_gb_reg.h +++ b/lib/mesa/src/amd/addrlib/inc/chip/r800/si_gb_reg.h @@ -27,6 +27,14 @@ * of the Software. */ +#include "util/u_endian.h" + +#if defined(PIPE_ARCH_LITTLE_ENDIAN) +#define LITTLEENDIAN_CPU +#elif defined(PIPE_ARCH_BIG_ENDIAN) +#define BIGENDIAN_CPU +#endif + // // Make sure the necessary endian defines are there. // diff --git a/lib/mesa/src/amd/addrlib/meson.build b/lib/mesa/src/amd/addrlib/meson.build new file mode 100644 index 000000000..b9550afd2 --- /dev/null +++ b/lib/mesa/src/amd/addrlib/meson.build @@ -0,0 +1,63 @@ +# Copyright © 2017 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +files_addrlib = files( + 'addrinterface.cpp', + 'addrinterface.h', + 'addrtypes.h', + 'core/addrcommon.h', + 'core/addrelemlib.cpp', + 'core/addrelemlib.h', + 'core/addrlib.cpp', + 'core/addrlib.h', + 'core/addrlib1.cpp', + 'core/addrlib1.h', + 'core/addrlib2.cpp', + 'core/addrlib2.h', + 'core/addrobject.cpp', + 'core/addrobject.h', + 'gfx9/chip/gfx9_enum.h', + 'gfx9/coord.cpp', + 'gfx9/coord.h', + 'gfx9/gfx9addrlib.cpp', + 'gfx9/gfx9addrlib.h', + 'amdgpu_asic_addr.h', + 'inc/chip/gfx9/gfx9_gb_reg.h', + 'inc/chip/r800/si_gb_reg.h', + 'r800/chip/si_ci_vi_merged_enum.h', + 'r800/ciaddrlib.cpp', + 'r800/ciaddrlib.h', + 'r800/egbaddrlib.cpp', + 'r800/egbaddrlib.h', + 'r800/siaddrlib.cpp', + 'r800/siaddrlib.h', +) + +libamdgpu_addrlib = static_library( + 'addrlib', + files_addrlib, + include_directories : [ + include_directories( + 'core', 'inc/chip/gfx9', 'inc/chip/r800', 'gfx9/chip', 'r800/chip', + ), + inc_amd_common, inc_common, inc_src, + ], + cpp_args : cpp_vis_args, +) diff --git a/lib/mesa/src/amd/common/ac_shader_abi.h b/lib/mesa/src/amd/common/ac_shader_abi.h index b04dc076d..ee18e6c19 100644 --- a/lib/mesa/src/amd/common/ac_shader_abi.h +++ b/lib/mesa/src/amd/common/ac_shader_abi.h @@ -26,6 +26,12 @@ #include <llvm-c/Core.h> +#include "compiler/shader_enums.h" + +struct nir_variable; + +#define AC_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1) + enum ac_descriptor_type { AC_DESC_IMAGE, AC_DESC_FMASK, @@ -42,10 +48,27 @@ struct ac_shader_abi { LLVMValueRef draw_id; LLVMValueRef vertex_id; LLVMValueRef instance_id; + LLVMValueRef tcs_patch_id; + LLVMValueRef tcs_rel_ids; + LLVMValueRef tes_patch_id; + LLVMValueRef gs_prim_id; + LLVMValueRef gs_invocation_id; LLVMValueRef frag_pos[4]; LLVMValueRef front_face; LLVMValueRef ancillary; LLVMValueRef sample_coverage; + LLVMValueRef prim_mask; + /* CS */ + LLVMValueRef local_invocation_ids; + LLVMValueRef num_work_groups; + LLVMValueRef workgroup_ids[3]; + LLVMValueRef tg_size; + + /* Vulkan only */ + LLVMValueRef push_constants; + LLVMValueRef view_index; + + LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4]; /* For VS and PS: pre-loaded shader inputs. * @@ -54,10 +77,60 @@ struct ac_shader_abi { */ LLVMValueRef *inputs; + /* Varying -> attribute number mapping. Also NIR-only */ + unsigned fs_input_attr_indices[MAX_VARYING]; + void (*emit_outputs)(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs); + void (*emit_vertex)(struct ac_shader_abi *abi, + unsigned stream, + LLVMValueRef *addrs); + + void (*emit_primitive)(struct ac_shader_abi *abi, + unsigned stream); + + void (*emit_kill)(struct ac_shader_abi *abi, LLVMValueRef visible); + + LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi, + unsigned location, + unsigned driver_location, + unsigned component, + unsigned num_components, + unsigned vertex_index, + unsigned const_index, + LLVMTypeRef type); + + LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, + LLVMTypeRef type, + LLVMValueRef vertex_index, + LLVMValueRef param_index, + unsigned const_index, + unsigned location, + unsigned driver_location, + unsigned component, + unsigned num_components, + bool is_patch, + bool is_compact, + bool load_inputs); + + void (*store_tcs_outputs)(struct ac_shader_abi *abi, + const struct nir_variable *var, + LLVMValueRef vertex_index, + LLVMValueRef param_index, + unsigned const_index, + LLVMValueRef src, + unsigned writemask); + + LLVMValueRef (*load_tess_coord)(struct ac_shader_abi *abi); + + LLVMValueRef (*load_patch_vertices_in)(struct ac_shader_abi *abi); + + LLVMValueRef (*load_tess_level)(struct ac_shader_abi *abi, + unsigned varying_id); + + LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index); /** @@ -87,14 +160,40 @@ struct ac_shader_abi { unsigned constant_index, LLVMValueRef index, enum ac_descriptor_type desc_type, - bool image, bool write); + bool image, bool write, + bool bindless); + + /** + * Load a Vulkan-specific resource. + * + * \param index resource index + * \param desc_set descriptor set + * \param binding descriptor set binding + */ + LLVMValueRef (*load_resource)(struct ac_shader_abi *abi, + LLVMValueRef index, + unsigned desc_set, + unsigned binding); + + LLVMValueRef (*lookup_interp_param)(struct ac_shader_abi *abi, + enum glsl_interp_mode interp, + unsigned location); + + LLVMValueRef (*load_sample_position)(struct ac_shader_abi *abi, + LLVMValueRef sample_id); + + LLVMValueRef (*load_local_group_size)(struct ac_shader_abi *abi); + + LLVMValueRef (*load_sample_mask_in)(struct ac_shader_abi *abi); + + LLVMValueRef (*load_base_vertex)(struct ac_shader_abi *abi); /* Whether to clamp the shadow reference value to [0,1]on VI. Radeonsi currently * uses it due to promoting D16 to D32, but radv needs it off. */ bool clamp_shadow_reference; /* Whether to workaround GFX9 ignoring the stride for the buffer size if IDXEN=0 - * and LLVM optimizes an indexed load with constant index to IDXEN=0. */ + * and LLVM optimizes an indexed load with constant index to IDXEN=0. */ bool gfx9_stride_size_workaround; }; diff --git a/lib/mesa/src/amd/common/ac_shader_util.c b/lib/mesa/src/amd/common/ac_shader_util.c new file mode 100644 index 000000000..531395f4f --- /dev/null +++ b/lib/mesa/src/amd/common/ac_shader_util.c @@ -0,0 +1,179 @@ +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#include "ac_nir_to_llvm.h" +#include "ac_shader_util.h" +#include "sid.h" + +unsigned +ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, + bool writes_samplemask) +{ + if (writes_z) { + /* Z needs 32 bits. */ + if (writes_samplemask) + return V_028710_SPI_SHADER_32_ABGR; + else if (writes_stencil) + return V_028710_SPI_SHADER_32_GR; + else + return V_028710_SPI_SHADER_32_R; + } else if (writes_stencil || writes_samplemask) { + /* Both stencil and sample mask need only 16 bits. */ + return V_028710_SPI_SHADER_UINT16_ABGR; + } else { + return V_028710_SPI_SHADER_ZERO; + } +} + +unsigned +ac_get_cb_shader_mask(unsigned spi_shader_col_format) +{ + unsigned i, cb_shader_mask = 0; + + for (i = 0; i < 8; i++) { + switch ((spi_shader_col_format >> (i * 4)) & 0xf) { + case V_028714_SPI_SHADER_ZERO: + break; + case V_028714_SPI_SHADER_32_R: + cb_shader_mask |= 0x1 << (i * 4); + break; + case V_028714_SPI_SHADER_32_GR: + cb_shader_mask |= 0x3 << (i * 4); + break; + case V_028714_SPI_SHADER_32_AR: + cb_shader_mask |= 0x9 << (i * 4); + break; + case V_028714_SPI_SHADER_FP16_ABGR: + case V_028714_SPI_SHADER_UNORM16_ABGR: + case V_028714_SPI_SHADER_SNORM16_ABGR: + case V_028714_SPI_SHADER_UINT16_ABGR: + case V_028714_SPI_SHADER_SINT16_ABGR: + case V_028714_SPI_SHADER_32_ABGR: + cb_shader_mask |= 0xf << (i * 4); + break; + default: + assert(0); + } + } + return cb_shader_mask; +} + +/** + * Calculate the appropriate setting of VGT_GS_MODE when \p shader is a + * geometry shader. + */ +uint32_t +ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class) +{ + unsigned cut_mode; + + if (gs_max_vert_out <= 128) { + cut_mode = V_028A40_GS_CUT_128; + } else if (gs_max_vert_out <= 256) { + cut_mode = V_028A40_GS_CUT_256; + } else if (gs_max_vert_out <= 512) { + cut_mode = V_028A40_GS_CUT_512; + } else { + assert(gs_max_vert_out <= 1024); + cut_mode = V_028A40_GS_CUT_1024; + } + + return S_028A40_MODE(V_028A40_GS_SCENARIO_G) | + S_028A40_CUT_MODE(cut_mode)| + S_028A40_ES_WRITE_OPTIMIZE(chip_class <= VI) | + S_028A40_GS_WRITE_OPTIMIZE(1) | + S_028A40_ONCHIP(chip_class >= GFX9 ? 1 : 0); +} + +void +ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, + LLVMValueRef stencil, LLVMValueRef samplemask, + struct ac_export_args *args) +{ + unsigned mask = 0; + unsigned format = ac_get_spi_shader_z_format(depth != NULL, + stencil != NULL, + samplemask != NULL); + + assert(depth || stencil || samplemask); + + memset(args, 0, sizeof(*args)); + + args->valid_mask = 1; /* whether the EXEC mask is valid */ + args->done = 1; /* DONE bit */ + + /* Specify the target we are exporting */ + args->target = V_008DFC_SQ_EXP_MRTZ; + + args->compr = 0; /* COMP flag */ + args->out[0] = LLVMGetUndef(ctx->f32); /* R, depth */ + args->out[1] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */ + args->out[2] = LLVMGetUndef(ctx->f32); /* B, sample mask */ + args->out[3] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */ + + if (format == V_028710_SPI_SHADER_UINT16_ABGR) { + assert(!depth); + args->compr = 1; /* COMPR flag */ + + if (stencil) { + /* Stencil should be in X[23:16]. */ + stencil = ac_to_integer(ctx, stencil); + stencil = LLVMBuildShl(ctx->builder, stencil, + LLVMConstInt(ctx->i32, 16, 0), ""); + args->out[0] = ac_to_float(ctx, stencil); + mask |= 0x3; + } + if (samplemask) { + /* SampleMask should be in Y[15:0]. */ + args->out[1] = samplemask; + mask |= 0xc; + } + } else { + if (depth) { + args->out[0] = depth; + mask |= 0x1; + } + if (stencil) { + args->out[1] = stencil; + mask |= 0x2; + } + if (samplemask) { + args->out[2] = samplemask; + mask |= 0x4; + } + } + + /* SI (except OLAND and HAINAN) has a bug that it only looks + * at the X writemask component. */ + if (ctx->chip_class == SI && + ctx->family != CHIP_OLAND && + ctx->family != CHIP_HAINAN) + mask |= 0x1; + + /* Specify which components to enable */ + args->enabled_channels = mask; +} diff --git a/lib/mesa/src/amd/common/ac_shader_util.h b/lib/mesa/src/amd/common/ac_shader_util.h new file mode 100644 index 000000000..e4cf2bf57 --- /dev/null +++ b/lib/mesa/src/amd/common/ac_shader_util.h @@ -0,0 +1,48 @@ +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef AC_SHADER_UTIL_H +#define AC_SHADER_UTIL_H + +#include <stdbool.h> +#include <stdint.h> + +#include "amd_family.h" +#include "ac_llvm_build.h" + +unsigned +ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, + bool writes_samplemask); + +unsigned +ac_get_cb_shader_mask(unsigned spi_shader_col_format); + +uint32_t +ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class); + +void +ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, + LLVMValueRef stencil, LLVMValueRef samplemask, + struct ac_export_args *args); + +#endif diff --git a/lib/mesa/src/amd/common/meson.build b/lib/mesa/src/amd/common/meson.build new file mode 100644 index 000000000..6827a0209 --- /dev/null +++ b/lib/mesa/src/amd/common/meson.build @@ -0,0 +1,63 @@ +# Copyright © 2017 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +sid_tables_h = custom_target( + 'sid_tables_h', + input : ['sid_tables.py', 'sid.h', 'gfx9d.h'], + output : 'sid_tables.h', + command : [prog_python, '@INPUT@'], + capture : true, +) + +amd_common_files = files( + 'ac_binary.c', + 'ac_binary.h', + 'ac_exp_param.h', + 'ac_llvm_build.c', + 'ac_llvm_build.h', + 'ac_llvm_helper.cpp', + 'ac_llvm_util.c', + 'ac_llvm_util.h', + 'ac_shader_abi.h', + 'ac_shader_util.c', + 'ac_shader_util.h', + 'ac_nir_to_llvm.c', + 'ac_nir_to_llvm.h', + 'ac_gpu_info.c', + 'ac_gpu_info.h', + 'ac_surface.c', + 'ac_surface.h', + 'ac_debug.c', + 'ac_debug.h', +) + +libamd_common = static_library( + 'amd_common', + [amd_common_files, sid_tables_h], + include_directories : [ + inc_common, inc_compiler, inc_mesa, inc_mapi, inc_amd, + ], + dependencies : [ + dep_llvm, dep_thread, dep_elf, dep_libdrm_amdgpu, dep_valgrind, + idep_nir_headers, + ], + c_args : [c_vis_args], + cpp_args : [cpp_vis_args], +) diff --git a/lib/mesa/src/amd/meson.build b/lib/mesa/src/amd/meson.build new file mode 100644 index 000000000..f96a9aac0 --- /dev/null +++ b/lib/mesa/src/amd/meson.build @@ -0,0 +1,27 @@ +# Copyright © 2017 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +inc_amd = include_directories('.') + +subdir('addrlib') +subdir('common') +if with_amd_vk + subdir('vulkan') +endif diff --git a/lib/mesa/src/amd/vulkan/meson.build b/lib/mesa/src/amd/vulkan/meson.build new file mode 100644 index 000000000..cc2aa7fd1 --- /dev/null +++ b/lib/mesa/src/amd/vulkan/meson.build @@ -0,0 +1,178 @@ +# Copyright © 2017 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +radv_entrypoints = custom_target( + 'radv_entrypoints.[ch]', + input : ['radv_entrypoints_gen.py', vk_api_xml], + output : ['radv_entrypoints.h', 'radv_entrypoints.c'], + command : [ + prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--outdir', + meson.current_build_dir() + ], + depend_files : files('radv_extensions.py'), +) + +radv_extensions_c = custom_target( + 'radv_extensions.c', + input : ['radv_extensions.py', vk_api_xml], + output : ['radv_extensions.c', 'radv_extensions.h'], + command : [ + prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--out-c', '@OUTPUT0@', + '--out-h', '@OUTPUT1@' + ], +) + +vk_format_table_c = custom_target( + 'vk_format_table.c', + input : ['vk_format_table.py', 'vk_format_layout.csv'], + output : 'vk_format_table.c', + command : [prog_python, '@INPUT@'], + depend_files : files('vk_format_parse.py'), + capture : true, +) + +libradv_files = files( + 'winsys/amdgpu/radv_amdgpu_bo.c', + 'winsys/amdgpu/radv_amdgpu_bo.h', + 'winsys/amdgpu/radv_amdgpu_cs.c', + 'winsys/amdgpu/radv_amdgpu_cs.h', + 'winsys/amdgpu/radv_amdgpu_surface.c', + 'winsys/amdgpu/radv_amdgpu_surface.h', + 'winsys/amdgpu/radv_amdgpu_winsys.c', + 'winsys/amdgpu/radv_amdgpu_winsys.h', + 'winsys/amdgpu/radv_amdgpu_winsys_public.h', + 'radv_cmd_buffer.c', + 'radv_cs.h', + 'radv_debug.c', + 'radv_debug.h', + 'radv_device.c', + 'radv_descriptor_set.c', + 'radv_descriptor_set.h', + 'radv_formats.c', + 'radv_image.c', + 'radv_llvm_helper.cpp', + 'radv_meta.c', + 'radv_meta.h', + 'radv_meta_blit.c', + 'radv_meta_blit2d.c', + 'radv_meta_buffer.c', + 'radv_meta_bufimage.c', + 'radv_meta_clear.c', + 'radv_meta_copy.c', + 'radv_meta_decompress.c', + 'radv_meta_fast_clear.c', + 'radv_meta_resolve.c', + 'radv_meta_resolve_cs.c', + 'radv_meta_resolve_fs.c', + 'radv_nir_to_llvm.c', + 'radv_pass.c', + 'radv_pipeline.c', + 'radv_pipeline_cache.c', + 'radv_private.h', + 'radv_radeon_winsys.h', + 'radv_shader.c', + 'radv_shader.h', + 'radv_shader_helper.h', + 'radv_shader_info.c', + 'radv_query.c', + 'radv_util.c', + 'radv_util.h', + 'radv_wsi.c', + 'si_cmd_buffer.c', + 'vk_format.h', +) + +radv_deps = [] +radv_flags = [] + +if with_platform_x11 + radv_deps += dep_xcb_dri3 + radv_flags += [ + '-DVK_USE_PLATFORM_XCB_KHR', + '-DVK_USE_PLATFORM_XLIB_KHR', + ] + libradv_files += files('radv_wsi_x11.c') +endif + +if with_platform_wayland + radv_deps += dep_wayland_client + radv_flags += '-DVK_USE_PLATFORM_WAYLAND_KHR' + libradv_files += files('radv_wsi_wayland.c') +endif + +if with_platform_drm + radv_flags += '-DVK_USE_PLATFORM_DISPLAY_KHR' + libradv_files += files('radv_wsi_display.c') +endif + +if with_xlib_lease + radv_deps += [dep_xcb_xrandr, dep_xlib_xrandr] + radv_flags += '-DVK_USE_PLATFORM_XLIB_XRANDR_EXT' +endif + +libvulkan_radeon = shared_library( + 'vulkan_radeon', + [libradv_files, radv_entrypoints, radv_extensions_c, vk_format_table_c, sha1_h], + include_directories : [ + inc_common, inc_amd, inc_amd_common, inc_compiler, inc_vulkan_util, + inc_vulkan_wsi, + ], + link_with : [ + libamd_common, libamdgpu_addrlib, libvulkan_util, libvulkan_wsi, + libmesa_util, + ], + dependencies : [ + dep_llvm, dep_libdrm_amdgpu, dep_thread, dep_elf, dep_dl, dep_m, + dep_valgrind, radv_deps, + idep_nir, + ], + c_args : [c_vis_args, no_override_init_args, radv_flags], + cpp_args : [cpp_vis_args, radv_flags], + link_args : [ld_args_bsymbolic, ld_args_gc_sections], + install : true, +) + +radeon_icd = custom_target( + 'radeon_icd', + input : 'radv_icd.py', + output : 'radeon_icd.@0@.json'.format(host_machine.cpu()), + command : [ + prog_python, '@INPUT@', + '--lib-path', join_paths(get_option('prefix'), get_option('libdir')), + '--out', '@OUTPUT@', + ], + depend_files : files('radv_extensions.py'), + build_by_default : true, + install_dir : with_vulkan_icd_dir, + install : true, +) + +radv_dev_icd = custom_target( + 'radv_dev_icd', + input : 'radv_icd.py', + output : 'dev_icd.json', + command : [ + prog_python, '@INPUT@', '--lib-path', meson.current_build_dir(), + '--out', '@OUTPUT@' + ], + depend_files : files('radv_extensions.py'), + build_by_default : true, + install : false, +) diff --git a/lib/mesa/src/amd/vulkan/radv_android.c b/lib/mesa/src/amd/vulkan/radv_android.c new file mode 100644 index 000000000..1a4425f26 --- /dev/null +++ b/lib/mesa/src/amd/vulkan/radv_android.c @@ -0,0 +1,379 @@ +/* + * Copyright © 2017, Google Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <hardware/gralloc.h> +#include <hardware/hardware.h> +#include <hardware/hwvulkan.h> +#include <vulkan/vk_android_native_buffer.h> +#include <vulkan/vk_icd.h> +#include <libsync.h> + +#include "radv_private.h" + +static int radv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev); +static int radv_hal_close(struct hw_device_t *dev); + +static void UNUSED +static_asserts(void) +{ + STATIC_ASSERT(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC); +} + +PUBLIC struct hwvulkan_module_t HAL_MODULE_INFO_SYM = { + .common = { + .tag = HARDWARE_MODULE_TAG, + .module_api_version = HWVULKAN_MODULE_API_VERSION_0_1, + .hal_api_version = HARDWARE_MAKE_API_VERSION(1, 0), + .id = HWVULKAN_HARDWARE_MODULE_ID, + .name = "AMD Vulkan HAL", + .author = "Google", + .methods = &(hw_module_methods_t) { + .open = radv_hal_open, + }, + }, +}; + +/* If any bits in test_mask are set, then unset them and return true. */ +static inline bool +unmask32(uint32_t *inout_mask, uint32_t test_mask) +{ + uint32_t orig_mask = *inout_mask; + *inout_mask &= ~test_mask; + return *inout_mask != orig_mask; +} + +static int +radv_hal_open(const struct hw_module_t* mod, const char* id, + struct hw_device_t** dev) +{ + assert(mod == &HAL_MODULE_INFO_SYM.common); + assert(strcmp(id, HWVULKAN_DEVICE_0) == 0); + + hwvulkan_device_t *hal_dev = malloc(sizeof(*hal_dev)); + if (!hal_dev) + return -1; + + *hal_dev = (hwvulkan_device_t) { + .common = { + .tag = HARDWARE_DEVICE_TAG, + .version = HWVULKAN_DEVICE_API_VERSION_0_1, + .module = &HAL_MODULE_INFO_SYM.common, + .close = radv_hal_close, + }, + .EnumerateInstanceExtensionProperties = radv_EnumerateInstanceExtensionProperties, + .CreateInstance = radv_CreateInstance, + .GetInstanceProcAddr = radv_GetInstanceProcAddr, + }; + + *dev = &hal_dev->common; + return 0; +} + +static int +radv_hal_close(struct hw_device_t *dev) +{ + /* hwvulkan.h claims that hw_device_t::close() is never called. */ + return -1; +} + +VkResult +radv_image_from_gralloc(VkDevice device_h, + const VkImageCreateInfo *base_info, + const VkNativeBufferANDROID *gralloc_info, + const VkAllocationCallbacks *alloc, + VkImage *out_image_h) + +{ + RADV_FROM_HANDLE(radv_device, device, device_h); + VkImage image_h = VK_NULL_HANDLE; + struct radv_image *image = NULL; + struct radv_bo *bo = NULL; + VkResult result; + + if (gralloc_info->handle->numFds != 1) { + return vk_errorf(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR, + "VkNativeBufferANDROID::handle::numFds is %d, " + "expected 1", gralloc_info->handle->numFds); + } + + /* Do not close the gralloc handle's dma_buf. The lifetime of the dma_buf + * must exceed that of the gralloc handle, and we do not own the gralloc + * handle. + */ + int dma_buf = gralloc_info->handle->data[0]; + + VkDeviceMemory memory_h; + + const VkImportMemoryFdInfoKHR import_info = { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, + .fd = dup(dma_buf), + }; + + /* Find the first VRAM memory type, or GART for PRIME images. */ + int memory_type_index = -1; + for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) { + bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (is_local) { + memory_type_index = i; + break; + } + } + + /* fallback */ + if (memory_type_index == -1) + memory_type_index = 0; + + result = radv_AllocateMemory(device_h, + &(VkMemoryAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = &import_info, + /* Max buffer size, unused for imports */ + .allocationSize = 0x7FFFFFFF, + .memoryTypeIndex = memory_type_index, + }, + alloc, + &memory_h); + if (result != VK_SUCCESS) + return result; + + struct radeon_bo_metadata md; + device->ws->buffer_get_metadata(radv_device_memory_from_handle(memory_h)->bo, &md); + + bool is_scanout; + if (device->physical_device->rad_info.chip_class >= GFX9) { + /* Copied from radeonsi, but is hacky so should be cleaned up. */ + is_scanout = md.u.gfx9.swizzle_mode == 0 || md.u.gfx9.swizzle_mode % 4 == 2; + } else { + is_scanout = md.u.legacy.scanout; + } + + VkImageCreateInfo updated_base_info = *base_info; + + VkExternalMemoryImageCreateInfo external_memory_info = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, + .pNext = updated_base_info.pNext, + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, + }; + + updated_base_info.pNext = &external_memory_info; + + result = radv_image_create(device_h, + &(struct radv_image_create_info) { + .vk_info = &updated_base_info, + .scanout = is_scanout, + .no_metadata_planes = true}, + alloc, + &image_h); + + if (result != VK_SUCCESS) + goto fail_create_image; + + image = radv_image_from_handle(image_h); + + radv_BindImageMemory(device_h, image_h, memory_h, 0); + + image->owned_memory = memory_h; + /* Don't clobber the out-parameter until success is certain. */ + *out_image_h = image_h; + + return VK_SUCCESS; + +fail_create_image: + radv_FreeMemory(device_h, memory_h, alloc); + return result; +} + +VkResult radv_GetSwapchainGrallocUsageANDROID( + VkDevice device_h, + VkFormat format, + VkImageUsageFlags imageUsage, + int* grallocUsage) +{ + RADV_FROM_HANDLE(radv_device, device, device_h); + struct radv_physical_device *phys_dev = device->physical_device; + VkPhysicalDevice phys_dev_h = radv_physical_device_to_handle(phys_dev); + VkResult result; + + *grallocUsage = 0; + + /* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags + * returned to applications via VkSurfaceCapabilitiesKHR::supportedUsageFlags. + * The relevant code in libvulkan/swapchain.cpp contains this fun comment: + * + * TODO(jessehall): I think these are right, but haven't thought hard + * about it. Do we need to query the driver for support of any of + * these? + * + * Any disagreement between this function and the hardcoded + * VkSurfaceCapabilitiesKHR:supportedUsageFlags causes tests + * dEQP-VK.wsi.android.swapchain.*.image_usage to fail. + */ + + const VkPhysicalDeviceImageFormatInfo2KHR image_format_info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR, + .format = format, + .type = VK_IMAGE_TYPE_2D, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = imageUsage, + }; + + VkImageFormatProperties2KHR image_format_props = { + .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR, + }; + + /* Check that requested format and usage are supported. */ + result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h, + &image_format_info, &image_format_props); + if (result != VK_SUCCESS) { + return vk_errorf(device->instance, result, + "radv_GetPhysicalDeviceImageFormatProperties2 failed " + "inside %s", __func__); + } + + if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) + *grallocUsage |= GRALLOC_USAGE_HW_RENDER; + + if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) + *grallocUsage |= GRALLOC_USAGE_HW_TEXTURE; + + /* All VkImageUsageFlags not explicitly checked here are unsupported for + * gralloc swapchains. + */ + if (imageUsage != 0) { + return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED, + "unsupported VkImageUsageFlags(0x%x) for gralloc " + "swapchain", imageUsage); + } + + /* + * FINISHME: Advertise all display-supported formats. Mostly + * DRM_FORMAT_ARGB2101010 and DRM_FORMAT_ABGR2101010, but need to check + * what we need for 30-bit colors. + */ + if (format == VK_FORMAT_B8G8R8A8_UNORM || + format == VK_FORMAT_B5G6R5_UNORM_PACK16) { + *grallocUsage |= GRALLOC_USAGE_HW_FB | + GRALLOC_USAGE_HW_COMPOSER | + GRALLOC_USAGE_EXTERNAL_DISP; + } + + if (*grallocUsage == 0) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + return VK_SUCCESS; +} + +VkResult +radv_AcquireImageANDROID( + VkDevice device, + VkImage image_h, + int nativeFenceFd, + VkSemaphore semaphore, + VkFence fence) +{ + VkResult semaphore_result = VK_SUCCESS, fence_result = VK_SUCCESS; + + if (semaphore != VK_NULL_HANDLE) { + int semaphore_fd = nativeFenceFd >= 0 ? dup(nativeFenceFd) : nativeFenceFd; + semaphore_result = radv_ImportSemaphoreFdKHR(device, + &(VkImportSemaphoreFdInfoKHR) { + .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR, + .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR, + .fd = semaphore_fd, + .semaphore = semaphore, + }); + } + + if (fence != VK_NULL_HANDLE) { + int fence_fd = nativeFenceFd >= 0 ? dup(nativeFenceFd) : nativeFenceFd; + fence_result = radv_ImportFenceFdKHR(device, + &(VkImportFenceFdInfoKHR) { + .sType = VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR, + .flags = VK_FENCE_IMPORT_TEMPORARY_BIT_KHR, + .fd = fence_fd, + .fence = fence, + }); + } + + close(nativeFenceFd); + + if (semaphore_result != VK_SUCCESS) + return semaphore_result; + return fence_result; +} + +VkResult +radv_QueueSignalReleaseImageANDROID( + VkQueue _queue, + uint32_t waitSemaphoreCount, + const VkSemaphore* pWaitSemaphores, + VkImage image, + int* pNativeFenceFd) +{ + RADV_FROM_HANDLE(radv_queue, queue, _queue); + VkResult result = VK_SUCCESS; + + if (waitSemaphoreCount == 0) { + if (pNativeFenceFd) + *pNativeFenceFd = -1; + return VK_SUCCESS; + } + + int fd = -1; + + for (uint32_t i = 0; i < waitSemaphoreCount; ++i) { + int tmp_fd; + result = radv_GetSemaphoreFdKHR(radv_device_to_handle(queue->device), + &(VkSemaphoreGetFdInfoKHR) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR, + .semaphore = pWaitSemaphores[i], + }, &tmp_fd); + if (result != VK_SUCCESS) { + if (fd >= 0) + close (fd); + return result; + } + + if (fd < 0) + fd = tmp_fd; + else if (tmp_fd >= 0) { + sync_accumulate("radv", &fd, tmp_fd); + close(tmp_fd); + } + } + + if (pNativeFenceFd) { + *pNativeFenceFd = fd; + } else if (fd >= 0) { + close(fd); + /* We still need to do the exports, to reset the semaphores, but + * otherwise we don't wait on them. */ + } + return VK_SUCCESS; +} diff --git a/lib/mesa/src/amd/vulkan/radv_debug.c b/lib/mesa/src/amd/vulkan/radv_debug.c index b69c05b64..08fc80c12 100644 --- a/lib/mesa/src/amd/vulkan/radv_debug.c +++ b/lib/mesa/src/amd/vulkan/radv_debug.c @@ -29,6 +29,7 @@ #include <stdio.h> #include <sys/utsname.h> +#include "util/mesa-sha1.h" #include "sid.h" #include "gfx9d.h" #include "ac_debug.h" @@ -61,7 +62,8 @@ radv_init_trace(struct radv_device *device) device->trace_bo = ws->buffer_create(ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM, - RADEON_FLAG_CPU_ACCESS); + RADEON_FLAG_CPU_ACCESS| + RADEON_FLAG_NO_INTERPROCESS_SHARING); if (!device->trace_bo) return false; @@ -78,7 +80,7 @@ radv_init_trace(struct radv_device *device) } static void -radv_dump_trace(struct radv_device *device, struct radeon_winsys_cs *cs) +radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs) { const char *filename = getenv("RADV_TRACE_FILE"); FILE *f = fopen(filename, "w"); @@ -367,11 +369,9 @@ static void si_add_split_disasm(const char *disasm, } static void -radv_dump_annotated_shader(struct radv_pipeline *pipeline, - struct radv_shader_variant *shader, - gl_shader_stage stage, - struct ac_wave_info *waves, unsigned num_waves, - FILE *f) +radv_dump_annotated_shader(struct radv_shader_variant *shader, + gl_shader_stage stage, struct ac_wave_info *waves, + unsigned num_waves, FILE *f) { uint64_t start_addr, end_addr; unsigned i; @@ -442,28 +442,22 @@ radv_dump_annotated_shader(struct radv_pipeline *pipeline, static void radv_dump_annotated_shaders(struct radv_pipeline *pipeline, - struct radv_shader_variant *compute_shader, - FILE *f) + VkShaderStageFlagBits active_stages, FILE *f) { struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]; unsigned num_waves = ac_get_wave_info(waves); - unsigned mask; fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves); /* Dump annotated active graphics shaders. */ - mask = pipeline->active_stages; - while (mask) { - int stage = u_bit_scan(&mask); + while (active_stages) { + int stage = u_bit_scan(&active_stages); - radv_dump_annotated_shader(pipeline, pipeline->shaders[stage], + radv_dump_annotated_shader(pipeline->shaders[stage], stage, waves, num_waves, f); } - radv_dump_annotated_shader(pipeline, compute_shader, - MESA_SHADER_COMPUTE, waves, num_waves, f); - /* Print waves executing shaders that are not currently bound. */ unsigned i; bool found = false; @@ -498,7 +492,13 @@ radv_dump_shader(struct radv_pipeline *pipeline, fprintf(f, "%s:\n\n", radv_get_shader_name(shader, stage)); if (shader->spirv) { - fprintf(f, "SPIRV:\n"); + unsigned char sha1[21]; + char sha1buf[41]; + + _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1); + _mesa_sha1_format(sha1buf, sha1); + + fprintf(f, "SPIRV (sha1: %s):\n", sha1buf); radv_print_spirv(shader->spirv, shader->spirv_size, f); } @@ -507,55 +507,59 @@ radv_dump_shader(struct radv_pipeline *pipeline, nir_print_shader(shader->nir, f); } - fprintf(stderr, "DISASM:\n%s\n", shader->disasm_string); + fprintf(f, "LLVM IR:\n%s\n", shader->llvm_ir_string); + fprintf(f, "DISASM:\n%s\n", shader->disasm_string); radv_shader_dump_stats(pipeline->device, shader, stage, f); } static void radv_dump_shaders(struct radv_pipeline *pipeline, - struct radv_shader_variant *compute_shader, FILE *f) + VkShaderStageFlagBits active_stages, FILE *f) { - unsigned mask; - /* Dump active graphics shaders. */ - mask = pipeline->active_stages; - while (mask) { - int stage = u_bit_scan(&mask); + while (active_stages) { + int stage = u_bit_scan(&active_stages); radv_dump_shader(pipeline, pipeline->shaders[stage], stage, f); } +} - radv_dump_shader(pipeline, compute_shader, MESA_SHADER_COMPUTE, f); +static void +radv_dump_pipeline_state(struct radv_pipeline *pipeline, + VkShaderStageFlagBits active_stages, FILE *f) +{ + radv_dump_shaders(pipeline, active_stages, f); + radv_dump_annotated_shaders(pipeline, active_stages, f); + radv_dump_descriptors(pipeline, f); } static void radv_dump_graphics_state(struct radv_pipeline *graphics_pipeline, struct radv_pipeline *compute_pipeline, FILE *f) { - struct radv_shader_variant *compute_shader = - compute_pipeline ? compute_pipeline->shaders[MESA_SHADER_COMPUTE] : NULL; + VkShaderStageFlagBits active_stages; - if (!graphics_pipeline) - return; + if (graphics_pipeline) { + active_stages = graphics_pipeline->active_stages; + radv_dump_pipeline_state(graphics_pipeline, active_stages, f); + } - radv_dump_shaders(graphics_pipeline, compute_shader, f); - radv_dump_annotated_shaders(graphics_pipeline, compute_shader, f); - radv_dump_descriptors(graphics_pipeline, f); + if (compute_pipeline) { + active_stages = VK_SHADER_STAGE_COMPUTE_BIT; + radv_dump_pipeline_state(compute_pipeline, active_stages, f); + } } static void radv_dump_compute_state(struct radv_pipeline *compute_pipeline, FILE *f) { + VkShaderStageFlagBits active_stages = VK_SHADER_STAGE_COMPUTE_BIT; + if (!compute_pipeline) return; - radv_dump_shaders(compute_pipeline, - compute_pipeline->shaders[MESA_SHADER_COMPUTE], f); - radv_dump_annotated_shaders(compute_pipeline, - compute_pipeline->shaders[MESA_SHADER_COMPUTE], - f); - radv_dump_descriptors(compute_pipeline, f); + radv_dump_pipeline_state(compute_pipeline, active_stages, f); } static struct radv_pipeline * @@ -592,28 +596,32 @@ radv_dump_dmesg(FILE *f) pclose(p); } -static void +void radv_dump_enabled_options(struct radv_device *device, FILE *f) { uint64_t mask; - fprintf(f, "Enabled debug options: "); + if (device->instance->debug_flags) { + fprintf(f, "Enabled debug options: "); - mask = device->instance->debug_flags; - while (mask) { - int i = u_bit_scan64(&mask); - fprintf(f, "%s, ", radv_get_debug_option_name(i)); + mask = device->instance->debug_flags; + while (mask) { + int i = u_bit_scan64(&mask); + fprintf(f, "%s, ", radv_get_debug_option_name(i)); + } + fprintf(f, "\n"); } - fprintf(f, "\n"); - fprintf(f, "Enabled perftest options: "); + if (device->instance->perftest_flags) { + fprintf(f, "Enabled perftest options: "); - mask = device->instance->perftest_flags; - while (mask) { - int i = u_bit_scan64(&mask); - fprintf(f, "%s, ", radv_get_perftest_option_name(i)); + mask = device->instance->perftest_flags; + while (mask) { + int i = u_bit_scan64(&mask); + fprintf(f, "%s, ", radv_get_perftest_option_name(i)); + } + fprintf(f, "\n"); } - fprintf(f, "\n"); } static void @@ -630,11 +638,9 @@ radv_dump_device_name(struct radv_device *device, FILE *f) snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release); - if (HAVE_LLVM > 0) { - snprintf(llvm_string, sizeof(llvm_string), - ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff, - HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH); - } + snprintf(llvm_string, sizeof(llvm_string), + ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff, + HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH); fprintf(f, "Device name: %s (%s DRM %i.%i.%i%s%s)\n\n", chip_name, device->physical_device->name, @@ -654,7 +660,7 @@ radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring) } void -radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_winsys_cs *cs) +radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs) { struct radv_pipeline *graphics_pipeline, *compute_pipeline; struct radv_device *device = queue->device; diff --git a/lib/mesa/src/amd/vulkan/radv_extensions.c b/lib/mesa/src/amd/vulkan/radv_extensions.c index f9268dfbe..9294b0769 100644 --- a/lib/mesa/src/amd/vulkan/radv_extensions.c +++ b/lib/mesa/src/amd/vulkan/radv_extensions.c @@ -51,6 +51,18 @@ #else # define VK_USE_PLATFORM_XLIB_KHR false #endif +#ifdef VK_USE_PLATFORM_DISPLAY_KHR +# undef VK_USE_PLATFORM_DISPLAY_KHR +# define VK_USE_PLATFORM_DISPLAY_KHR true +#else +# define VK_USE_PLATFORM_DISPLAY_KHR false +#endif +#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT +# undef VK_USE_PLATFORM_XLIB_XRANDR_EXT +# define VK_USE_PLATFORM_XLIB_XRANDR_EXT true +#else +# define VK_USE_PLATFORM_XLIB_XRANDR_EXT false +#endif /* And ANDROID too */ #ifdef ANDROID @@ -60,348 +72,172 @@ # define ANDROID false #endif -#define RADV_HAS_SURFACE (VK_USE_PLATFORM_WAYLAND_KHR || VK_USE_PLATFORM_XCB_KHR || VK_USE_PLATFORM_XLIB_KHR) +#define RADV_HAS_SURFACE (VK_USE_PLATFORM_WAYLAND_KHR || VK_USE_PLATFORM_XCB_KHR || VK_USE_PLATFORM_XLIB_KHR || VK_USE_PLATFORM_DISPLAY_KHR) -bool -radv_instance_extension_supported(const char *name) -{ - if (strcmp(name, "VK_KHR_external_memory_capabilities") == 0) - return true; - if (strcmp(name, "VK_KHR_external_semaphore_capabilities") == 0) - return true; - if (strcmp(name, "VK_KHR_get_physical_device_properties2") == 0) - return true; - if (strcmp(name, "VK_KHR_surface") == 0) - return RADV_HAS_SURFACE; - if (strcmp(name, "VK_KHR_wayland_surface") == 0) - return VK_USE_PLATFORM_WAYLAND_KHR; - if (strcmp(name, "VK_KHR_xcb_surface") == 0) - return VK_USE_PLATFORM_XCB_KHR; - if (strcmp(name, "VK_KHR_xlib_surface") == 0) - return VK_USE_PLATFORM_XLIB_KHR; - return false; -} -VkResult radv_EnumerateInstanceExtensionProperties( - const char* pLayerName, - uint32_t* pPropertyCount, - VkExtensionProperties* pProperties) -{ - VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); +const VkExtensionProperties radv_instance_extensions[RADV_INSTANCE_EXTENSION_COUNT] = { + {"VK_KHR_device_group_creation", 1}, + {"VK_KHR_external_fence_capabilities", 1}, + {"VK_KHR_external_memory_capabilities", 1}, + {"VK_KHR_external_semaphore_capabilities", 1}, + {"VK_KHR_get_display_properties2", 1}, + {"VK_KHR_get_physical_device_properties2", 1}, + {"VK_KHR_get_surface_capabilities2", 1}, + {"VK_KHR_surface", 25}, + {"VK_KHR_wayland_surface", 6}, + {"VK_KHR_xcb_surface", 6}, + {"VK_KHR_xlib_surface", 6}, + {"VK_KHR_display", 23}, + {"VK_EXT_direct_mode_display", 1}, + {"VK_EXT_acquire_xlib_display", 1}, + {"VK_EXT_display_surface_counter", 1}, + {"VK_EXT_debug_report", 9}, +}; - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_external_memory_capabilities", - .specVersion = 1, - }; - } - } - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_external_semaphore_capabilities", - .specVersion = 1, - }; - } - } - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_get_physical_device_properties2", - .specVersion = 1, - }; - } - } - if (RADV_HAS_SURFACE) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_surface", - .specVersion = 25, - }; - } - } - if (VK_USE_PLATFORM_WAYLAND_KHR) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_wayland_surface", - .specVersion = 6, - }; - } - } - if (VK_USE_PLATFORM_XCB_KHR) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_xcb_surface", - .specVersion = 6, - }; - } - } - if (VK_USE_PLATFORM_XLIB_KHR) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_xlib_surface", - .specVersion = 6, - }; - } - } +const VkExtensionProperties radv_device_extensions[RADV_DEVICE_EXTENSION_COUNT] = { + {"VK_ANDROID_native_buffer", 5}, + {"VK_KHR_16bit_storage", 1}, + {"VK_KHR_bind_memory2", 1}, + {"VK_KHR_create_renderpass2", 1}, + {"VK_KHR_dedicated_allocation", 1}, + {"VK_KHR_descriptor_update_template", 1}, + {"VK_KHR_device_group", 1}, + {"VK_KHR_draw_indirect_count", 1}, + {"VK_KHR_driver_properties", 1}, + {"VK_KHR_external_fence", 1}, + {"VK_KHR_external_fence_fd", 1}, + {"VK_KHR_external_memory", 1}, + {"VK_KHR_external_memory_fd", 1}, + {"VK_KHR_external_semaphore", 1}, + {"VK_KHR_external_semaphore_fd", 1}, + {"VK_KHR_get_memory_requirements2", 1}, + {"VK_KHR_image_format_list", 1}, + {"VK_KHR_incremental_present", 1}, + {"VK_KHR_maintenance1", 1}, + {"VK_KHR_maintenance2", 1}, + {"VK_KHR_maintenance3", 1}, + {"VK_KHR_push_descriptor", 1}, + {"VK_KHR_relaxed_block_layout", 1}, + {"VK_KHR_sampler_mirror_clamp_to_edge", 1}, + {"VK_KHR_shader_draw_parameters", 1}, + {"VK_KHR_storage_buffer_storage_class", 1}, + {"VK_KHR_swapchain", 68}, + {"VK_KHR_variable_pointers", 1}, + {"VK_KHR_multiview", 1}, + {"VK_EXT_calibrated_timestamps", 1}, + {"VK_EXT_conditional_rendering", 1}, + {"VK_EXT_conservative_rasterization", 1}, + {"VK_EXT_display_control", 1}, + {"VK_EXT_depth_range_unrestricted", 1}, + {"VK_EXT_descriptor_indexing", 2}, + {"VK_EXT_discard_rectangles", 1}, + {"VK_EXT_external_memory_dma_buf", 1}, + {"VK_EXT_external_memory_host", 1}, + {"VK_EXT_global_priority", 1}, + {"VK_EXT_pci_bus_info", 1}, + {"VK_EXT_sampler_filter_minmax", 1}, + {"VK_EXT_shader_viewport_index_layer", 1}, + {"VK_EXT_shader_stencil_export", 1}, + {"VK_EXT_transform_feedback", 1}, + {"VK_EXT_vertex_attribute_divisor", 3}, + {"VK_AMD_draw_indirect_count", 1}, + {"VK_AMD_gcn_shader", 1}, + {"VK_AMD_rasterization_order", 1}, + {"VK_AMD_shader_core_properties", 1}, + {"VK_AMD_shader_info", 1}, + {"VK_AMD_shader_trinary_minmax", 1}, + {"VK_GOOGLE_decorate_string", 1}, + {"VK_GOOGLE_hlsl_functionality1", 1}, +}; - return vk_outarray_status(&out); -} +const struct radv_instance_extension_table radv_supported_instance_extensions = { + .KHR_device_group_creation = true, + .KHR_external_fence_capabilities = true, + .KHR_external_memory_capabilities = true, + .KHR_external_semaphore_capabilities = true, + .KHR_get_display_properties2 = VK_USE_PLATFORM_DISPLAY_KHR, + .KHR_get_physical_device_properties2 = true, + .KHR_get_surface_capabilities2 = RADV_HAS_SURFACE, + .KHR_surface = RADV_HAS_SURFACE, + .KHR_wayland_surface = VK_USE_PLATFORM_WAYLAND_KHR, + .KHR_xcb_surface = VK_USE_PLATFORM_XCB_KHR, + .KHR_xlib_surface = VK_USE_PLATFORM_XLIB_KHR, + .KHR_display = VK_USE_PLATFORM_DISPLAY_KHR, + .EXT_direct_mode_display = VK_USE_PLATFORM_DISPLAY_KHR, + .EXT_acquire_xlib_display = VK_USE_PLATFORM_XLIB_XRANDR_EXT, + .EXT_display_surface_counter = VK_USE_PLATFORM_DISPLAY_KHR, + .EXT_debug_report = true, +}; -uint32_t -radv_physical_device_api_version(struct radv_physical_device *dev) +void radv_fill_device_extension_table(const struct radv_physical_device *device, + struct radv_device_extension_table* table) { - return VK_MAKE_VERSION(1, 0, 57); + table->ANDROID_native_buffer = ANDROID && device->rad_info.has_syncobj_wait_for_submit; + table->KHR_16bit_storage = HAVE_LLVM >= 0x0700; + table->KHR_bind_memory2 = true; + table->KHR_create_renderpass2 = true; + table->KHR_dedicated_allocation = true; + table->KHR_descriptor_update_template = true; + table->KHR_device_group = true; + table->KHR_draw_indirect_count = true; + table->KHR_driver_properties = true; + table->KHR_external_fence = device->rad_info.has_syncobj_wait_for_submit; + table->KHR_external_fence_fd = device->rad_info.has_syncobj_wait_for_submit; + table->KHR_external_memory = true; + table->KHR_external_memory_fd = true; + table->KHR_external_semaphore = device->rad_info.has_syncobj; + table->KHR_external_semaphore_fd = device->rad_info.has_syncobj; + table->KHR_get_memory_requirements2 = true; + table->KHR_image_format_list = true; + table->KHR_incremental_present = RADV_HAS_SURFACE; + table->KHR_maintenance1 = true; + table->KHR_maintenance2 = true; + table->KHR_maintenance3 = true; + table->KHR_push_descriptor = true; + table->KHR_relaxed_block_layout = true; + table->KHR_sampler_mirror_clamp_to_edge = true; + table->KHR_shader_draw_parameters = true; + table->KHR_storage_buffer_storage_class = true; + table->KHR_swapchain = RADV_HAS_SURFACE; + table->KHR_variable_pointers = true; + table->KHR_multiview = true; + table->EXT_calibrated_timestamps = true; + table->EXT_conditional_rendering = true; + table->EXT_conservative_rasterization = device->rad_info.chip_class >= GFX9; + table->EXT_display_control = VK_USE_PLATFORM_DISPLAY_KHR; + table->EXT_depth_range_unrestricted = true; + table->EXT_descriptor_indexing = true; + table->EXT_discard_rectangles = true; + table->EXT_external_memory_dma_buf = true; + table->EXT_external_memory_host = device->rad_info.has_userptr; + table->EXT_global_priority = device->rad_info.has_ctx_priority; + table->EXT_pci_bus_info = false; + table->EXT_sampler_filter_minmax = device->rad_info.chip_class >= CIK; + table->EXT_shader_viewport_index_layer = true; + table->EXT_shader_stencil_export = true; + table->EXT_transform_feedback = true; + table->EXT_vertex_attribute_divisor = true; + table->AMD_draw_indirect_count = true; + table->AMD_gcn_shader = true; + table->AMD_rasterization_order = device->has_out_of_order_rast; + table->AMD_shader_core_properties = true; + table->AMD_shader_info = true; + table->AMD_shader_trinary_minmax = true; + table->GOOGLE_decorate_string = true; + table->GOOGLE_hlsl_functionality1 = true; } -bool -radv_physical_device_extension_supported(struct radv_physical_device *device, - const char *name) +VkResult radv_EnumerateInstanceVersion( + uint32_t* pApiVersion) { - if (strcmp(name, "VK_KHR_bind_memory2") == 0) - return true; - if (strcmp(name, "VK_KHR_dedicated_allocation") == 0) - return true; - if (strcmp(name, "VK_KHR_descriptor_update_template") == 0) - return true; - if (strcmp(name, "VK_KHR_external_memory") == 0) - return true; - if (strcmp(name, "VK_KHR_external_memory_fd") == 0) - return true; - if (strcmp(name, "VK_KHR_external_semaphore") == 0) - return device->rad_info.has_syncobj; - if (strcmp(name, "VK_KHR_external_semaphore_fd") == 0) - return device->rad_info.has_syncobj; - if (strcmp(name, "VK_KHR_get_memory_requirements2") == 0) - return true; - if (strcmp(name, "VK_KHR_image_format_list") == 0) - return true; - if (strcmp(name, "VK_KHR_incremental_present") == 0) - return true; - if (strcmp(name, "VK_KHR_maintenance1") == 0) - return true; - if (strcmp(name, "VK_KHR_maintenance2") == 0) - return true; - if (strcmp(name, "VK_KHR_push_descriptor") == 0) - return true; - if (strcmp(name, "VK_KHR_relaxed_block_layout") == 0) - return true; - if (strcmp(name, "VK_KHR_sampler_mirror_clamp_to_edge") == 0) - return true; - if (strcmp(name, "VK_KHR_shader_draw_parameters") == 0) - return true; - if (strcmp(name, "VK_KHR_storage_buffer_storage_class") == 0) - return true; - if (strcmp(name, "VK_KHR_swapchain") == 0) - return RADV_HAS_SURFACE; - if (strcmp(name, "VK_KHR_variable_pointers") == 0) - return true; - if (strcmp(name, "VK_KHX_multiview") == 0) - return false; - if (strcmp(name, "VK_EXT_global_priority") == 0) - return device->rad_info.has_ctx_priority; - if (strcmp(name, "VK_AMD_draw_indirect_count") == 0) - return true; - if (strcmp(name, "VK_AMD_rasterization_order") == 0) - return device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2; - return false; + *pApiVersion = VK_MAKE_VERSION(1, 1, 70); + return VK_SUCCESS; } -VkResult radv_EnumerateDeviceExtensionProperties( - VkPhysicalDevice physicalDevice, - const char* pLayerName, - uint32_t* pPropertyCount, - VkExtensionProperties* pProperties) +uint32_t +radv_physical_device_api_version(struct radv_physical_device *dev) { - RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); - VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); - (void)device; - - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_bind_memory2", - .specVersion = 1, - }; - } - } - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_dedicated_allocation", - .specVersion = 1, - }; - } - } - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_descriptor_update_template", - .specVersion = 1, - }; - } - } - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_external_memory", - .specVersion = 1, - }; - } - } - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_external_memory_fd", - .specVersion = 1, - }; - } - } - if (device->rad_info.has_syncobj) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_external_semaphore", - .specVersion = 1, - }; - } - } - if (device->rad_info.has_syncobj) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_external_semaphore_fd", - .specVersion = 1, - }; - } - } - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_get_memory_requirements2", - .specVersion = 1, - }; - } - } - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_image_format_list", - .specVersion = 1, - }; - } - } - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_incremental_present", - .specVersion = 1, - }; - } - } - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_maintenance1", - .specVersion = 1, - }; - } - } - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_maintenance2", - .specVersion = 1, - }; - } - } - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_push_descriptor", - .specVersion = 1, - }; - } - } - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_relaxed_block_layout", - .specVersion = 1, - }; - } - } - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_sampler_mirror_clamp_to_edge", - .specVersion = 1, - }; - } - } - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_shader_draw_parameters", - .specVersion = 1, - }; - } - } - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_storage_buffer_storage_class", - .specVersion = 1, - }; - } - } - if (RADV_HAS_SURFACE) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_swapchain", - .specVersion = 68, - }; - } - } - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHR_variable_pointers", - .specVersion = 1, - }; - } - } - if (false) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_KHX_multiview", - .specVersion = 1, - }; - } - } - if (device->rad_info.has_ctx_priority) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_EXT_global_priority", - .specVersion = 1, - }; - } - } - if (true) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_AMD_draw_indirect_count", - .specVersion = 1, - }; - } - } - if (device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "VK_AMD_rasterization_order", - .specVersion = 1, - }; - } - } - - return vk_outarray_status(&out); + if (!ANDROID && dev->rad_info.has_syncobj_wait_for_submit) + return VK_MAKE_VERSION(1, 1, 70); + return VK_MAKE_VERSION(1, 0, 68); } diff --git a/lib/mesa/src/amd/vulkan/radv_extensions.h b/lib/mesa/src/amd/vulkan/radv_extensions.h new file mode 100644 index 000000000..5f76d5d20 --- /dev/null +++ b/lib/mesa/src/amd/vulkan/radv_extensions.h @@ -0,0 +1,127 @@ +/* + * Copyright 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef RADV_EXTENSIONS_H +#define RADV_EXTENSIONS_H + +enum { + RADV_INSTANCE_EXTENSION_COUNT = 16, + RADV_DEVICE_EXTENSION_COUNT = 53, +}; + +struct radv_instance_extension_table { + union { + bool extensions[RADV_INSTANCE_EXTENSION_COUNT]; + struct { + bool KHR_device_group_creation; + bool KHR_external_fence_capabilities; + bool KHR_external_memory_capabilities; + bool KHR_external_semaphore_capabilities; + bool KHR_get_display_properties2; + bool KHR_get_physical_device_properties2; + bool KHR_get_surface_capabilities2; + bool KHR_surface; + bool KHR_wayland_surface; + bool KHR_xcb_surface; + bool KHR_xlib_surface; + bool KHR_display; + bool EXT_direct_mode_display; + bool EXT_acquire_xlib_display; + bool EXT_display_surface_counter; + bool EXT_debug_report; + }; + }; +}; + +struct radv_device_extension_table { + union { + bool extensions[RADV_DEVICE_EXTENSION_COUNT]; + struct { + bool ANDROID_native_buffer; + bool KHR_16bit_storage; + bool KHR_bind_memory2; + bool KHR_create_renderpass2; + bool KHR_dedicated_allocation; + bool KHR_descriptor_update_template; + bool KHR_device_group; + bool KHR_draw_indirect_count; + bool KHR_driver_properties; + bool KHR_external_fence; + bool KHR_external_fence_fd; + bool KHR_external_memory; + bool KHR_external_memory_fd; + bool KHR_external_semaphore; + bool KHR_external_semaphore_fd; + bool KHR_get_memory_requirements2; + bool KHR_image_format_list; + bool KHR_incremental_present; + bool KHR_maintenance1; + bool KHR_maintenance2; + bool KHR_maintenance3; + bool KHR_push_descriptor; + bool KHR_relaxed_block_layout; + bool KHR_sampler_mirror_clamp_to_edge; + bool KHR_shader_draw_parameters; + bool KHR_storage_buffer_storage_class; + bool KHR_swapchain; + bool KHR_variable_pointers; + bool KHR_multiview; + bool EXT_calibrated_timestamps; + bool EXT_conditional_rendering; + bool EXT_conservative_rasterization; + bool EXT_display_control; + bool EXT_depth_range_unrestricted; + bool EXT_descriptor_indexing; + bool EXT_discard_rectangles; + bool EXT_external_memory_dma_buf; + bool EXT_external_memory_host; + bool EXT_global_priority; + bool EXT_pci_bus_info; + bool EXT_sampler_filter_minmax; + bool EXT_shader_viewport_index_layer; + bool EXT_shader_stencil_export; + bool EXT_transform_feedback; + bool EXT_vertex_attribute_divisor; + bool AMD_draw_indirect_count; + bool AMD_gcn_shader; + bool AMD_rasterization_order; + bool AMD_shader_core_properties; + bool AMD_shader_info; + bool AMD_shader_trinary_minmax; + bool GOOGLE_decorate_string; + bool GOOGLE_hlsl_functionality1; + }; + }; +}; + +extern const VkExtensionProperties radv_instance_extensions[RADV_INSTANCE_EXTENSION_COUNT]; +extern const VkExtensionProperties radv_device_extensions[RADV_DEVICE_EXTENSION_COUNT]; +extern const struct radv_instance_extension_table radv_supported_instance_extensions; + + +struct radv_physical_device; + +void radv_fill_device_extension_table(const struct radv_physical_device *device, + struct radv_device_extension_table* table); +#endif diff --git a/lib/mesa/src/amd/vulkan/radv_extensions.py b/lib/mesa/src/amd/vulkan/radv_extensions.py index 43c0fa740..4a28f8bf4 100644 --- a/lib/mesa/src/amd/vulkan/radv_extensions.py +++ b/lib/mesa/src/amd/vulkan/radv_extensions.py @@ -31,7 +31,7 @@ import xml.etree.cElementTree as et from mako.template import Template -MAX_API_VERSION = '1.0.57' +MAX_API_VERSION = '1.1.70' class Extension: def __init__(self, name, ext_version, enable): @@ -50,21 +50,34 @@ class Extension: # the those extension strings, then tests dEQP-VK.api.info.instance.extensions # and dEQP-VK.api.info.device fail due to the duplicated strings. EXTENSIONS = [ + Extension('VK_ANDROID_native_buffer', 5, 'ANDROID && device->rad_info.has_syncobj_wait_for_submit'), + Extension('VK_KHR_16bit_storage', 1, 'HAVE_LLVM >= 0x0700'), Extension('VK_KHR_bind_memory2', 1, True), + Extension('VK_KHR_create_renderpass2', 1, True), Extension('VK_KHR_dedicated_allocation', 1, True), Extension('VK_KHR_descriptor_update_template', 1, True), + Extension('VK_KHR_device_group', 1, True), + Extension('VK_KHR_device_group_creation', 1, True), + Extension('VK_KHR_draw_indirect_count', 1, True), + Extension('VK_KHR_driver_properties', 1, True), + Extension('VK_KHR_external_fence', 1, 'device->rad_info.has_syncobj_wait_for_submit'), + Extension('VK_KHR_external_fence_capabilities', 1, True), + Extension('VK_KHR_external_fence_fd', 1, 'device->rad_info.has_syncobj_wait_for_submit'), Extension('VK_KHR_external_memory', 1, True), Extension('VK_KHR_external_memory_capabilities', 1, True), Extension('VK_KHR_external_memory_fd', 1, True), Extension('VK_KHR_external_semaphore', 1, 'device->rad_info.has_syncobj'), Extension('VK_KHR_external_semaphore_capabilities', 1, True), Extension('VK_KHR_external_semaphore_fd', 1, 'device->rad_info.has_syncobj'), + Extension('VK_KHR_get_display_properties2', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'), Extension('VK_KHR_get_memory_requirements2', 1, True), Extension('VK_KHR_get_physical_device_properties2', 1, True), + Extension('VK_KHR_get_surface_capabilities2', 1, 'RADV_HAS_SURFACE'), Extension('VK_KHR_image_format_list', 1, True), - Extension('VK_KHR_incremental_present', 1, True), + Extension('VK_KHR_incremental_present', 1, 'RADV_HAS_SURFACE'), Extension('VK_KHR_maintenance1', 1, True), Extension('VK_KHR_maintenance2', 1, True), + Extension('VK_KHR_maintenance3', 1, True), Extension('VK_KHR_push_descriptor', 1, True), Extension('VK_KHR_relaxed_block_layout', 1, True), Extension('VK_KHR_sampler_mirror_clamp_to_edge', 1, True), @@ -76,10 +89,36 @@ EXTENSIONS = [ Extension('VK_KHR_wayland_surface', 6, 'VK_USE_PLATFORM_WAYLAND_KHR'), Extension('VK_KHR_xcb_surface', 6, 'VK_USE_PLATFORM_XCB_KHR'), Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'), - Extension('VK_KHX_multiview', 1, False), + Extension('VK_KHR_multiview', 1, True), + Extension('VK_KHR_display', 23, 'VK_USE_PLATFORM_DISPLAY_KHR'), + Extension('VK_EXT_direct_mode_display', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'), + Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'), + Extension('VK_EXT_calibrated_timestamps', 1, True), + Extension('VK_EXT_conditional_rendering', 1, True), + Extension('VK_EXT_conservative_rasterization', 1, 'device->rad_info.chip_class >= GFX9'), + Extension('VK_EXT_display_surface_counter', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'), + Extension('VK_EXT_display_control', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'), + Extension('VK_EXT_debug_report', 9, True), + Extension('VK_EXT_depth_range_unrestricted', 1, True), + Extension('VK_EXT_descriptor_indexing', 2, True), + Extension('VK_EXT_discard_rectangles', 1, True), + Extension('VK_EXT_external_memory_dma_buf', 1, True), + Extension('VK_EXT_external_memory_host', 1, 'device->rad_info.has_userptr'), Extension('VK_EXT_global_priority', 1, 'device->rad_info.has_ctx_priority'), + Extension('VK_EXT_pci_bus_info', 1, False), + Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= CIK'), + Extension('VK_EXT_shader_viewport_index_layer', 1, True), + Extension('VK_EXT_shader_stencil_export', 1, True), + Extension('VK_EXT_transform_feedback', 1, True), + Extension('VK_EXT_vertex_attribute_divisor', 3, True), Extension('VK_AMD_draw_indirect_count', 1, True), - Extension('VK_AMD_rasterization_order', 1, 'device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2'), + Extension('VK_AMD_gcn_shader', 1, True), + Extension('VK_AMD_rasterization_order', 1, 'device->has_out_of_order_rast'), + Extension('VK_AMD_shader_core_properties', 1, True), + Extension('VK_AMD_shader_info', 1, True), + Extension('VK_AMD_shader_trinary_minmax', 1, True), + Extension('VK_GOOGLE_decorate_string', 1, True), + Extension('VK_GOOGLE_hlsl_functionality1', 1, True), ] class VkVersion: @@ -106,7 +145,8 @@ class VkVersion: return '.'.join(ver_list) def c_vk_version(self): - ver_list = [str(self.major), str(self.minor), str(self.patch)] + patch = self.patch if self.patch is not None else 0 + ver_list = [str(self.major), str(self.minor), str(patch)] return 'VK_MAKE_VERSION(' + ', '.join(ver_list) + ')' def __int_ver(self): @@ -114,14 +154,15 @@ class VkVersion: patch = self.patch if self.patch is not None else 0 return (self.major << 22) | (self.minor << 12) | patch - def __cmp__(self, other): + def __gt__(self, other): # If only one of them has a patch version, "ignore" it by making # other's patch version match self. if (self.patch is None) != (other.patch is None): other = copy.copy(other) other.patch = self.patch - return self.__int_ver().__cmp__(other.__int_ver()) + return self.__int_ver() > other.__int_ver() + MAX_API_VERSION = VkVersion(MAX_API_VERSION) @@ -139,31 +180,64 @@ def _init_exts_from_xml(xml): if ext_name not in ext_name_map: continue - # Workaround for VK_ANDROID_native_buffer. Its <extension> element in - # vk.xml lists it as supported="disabled" and provides only a stub - # definition. Its <extension> element in Mesa's custom - # vk_android_native_buffer.xml, though, lists it as - # supported='android-vendor' and fully defines the extension. We want - # to skip the <extension> element in vk.xml. - if ext_elem.attrib['supported'] == 'disabled': - assert ext_name == 'VK_ANDROID_native_buffer' - continue - ext = ext_name_map[ext_name] ext.type = ext_elem.attrib['type'] -_TEMPLATE = Template(COPYRIGHT + """ +_TEMPLATE_H = Template(COPYRIGHT + """ +#ifndef RADV_EXTENSIONS_H +#define RADV_EXTENSIONS_H + +enum { + RADV_INSTANCE_EXTENSION_COUNT = ${len(instance_extensions)}, + RADV_DEVICE_EXTENSION_COUNT = ${len(device_extensions)}, +}; + +struct radv_instance_extension_table { + union { + bool extensions[RADV_INSTANCE_EXTENSION_COUNT]; + struct { +%for ext in instance_extensions: + bool ${ext.name[3:]}; +%endfor + }; + }; +}; + +struct radv_device_extension_table { + union { + bool extensions[RADV_DEVICE_EXTENSION_COUNT]; + struct { +%for ext in device_extensions: + bool ${ext.name[3:]}; +%endfor + }; + }; +}; + +extern const VkExtensionProperties radv_instance_extensions[RADV_INSTANCE_EXTENSION_COUNT]; +extern const VkExtensionProperties radv_device_extensions[RADV_DEVICE_EXTENSION_COUNT]; +extern const struct radv_instance_extension_table radv_supported_instance_extensions; + + +struct radv_physical_device; + +void radv_fill_device_extension_table(const struct radv_physical_device *device, + struct radv_device_extension_table* table); +#endif +""") + +_TEMPLATE_C = Template(COPYRIGHT + """ #include "radv_private.h" #include "vk_util.h" /* Convert the VK_USE_PLATFORM_* defines to booleans */ -%for platform in ['ANDROID', 'WAYLAND', 'XCB', 'XLIB']: -#ifdef VK_USE_PLATFORM_${platform}_KHR -# undef VK_USE_PLATFORM_${platform}_KHR -# define VK_USE_PLATFORM_${platform}_KHR true +%for platform in ['ANDROID_KHR', 'WAYLAND_KHR', 'XCB_KHR', 'XLIB_KHR', 'DISPLAY_KHR', 'XLIB_XRANDR_EXT']: +#ifdef VK_USE_PLATFORM_${platform} +# undef VK_USE_PLATFORM_${platform} +# define VK_USE_PLATFORM_${platform} true #else -# define VK_USE_PLATFORM_${platform}_KHR false +# define VK_USE_PLATFORM_${platform} false #endif %endfor @@ -177,84 +251,56 @@ _TEMPLATE = Template(COPYRIGHT + """ #define RADV_HAS_SURFACE (VK_USE_PLATFORM_WAYLAND_KHR || \\ VK_USE_PLATFORM_XCB_KHR || \\ - VK_USE_PLATFORM_XLIB_KHR) + VK_USE_PLATFORM_XLIB_KHR || \\ + VK_USE_PLATFORM_DISPLAY_KHR) -bool -radv_instance_extension_supported(const char *name) -{ + +const VkExtensionProperties radv_instance_extensions[RADV_INSTANCE_EXTENSION_COUNT] = { %for ext in instance_extensions: - if (strcmp(name, "${ext.name}") == 0) - return ${ext.enable}; + {"${ext.name}", ${ext.ext_version}}, %endfor - return false; -} +}; -VkResult radv_EnumerateInstanceExtensionProperties( - const char* pLayerName, - uint32_t* pPropertyCount, - VkExtensionProperties* pProperties) -{ - VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); +const VkExtensionProperties radv_device_extensions[RADV_DEVICE_EXTENSION_COUNT] = { +%for ext in device_extensions: + {"${ext.name}", ${ext.ext_version}}, +%endfor +}; +const struct radv_instance_extension_table radv_supported_instance_extensions = { %for ext in instance_extensions: - if (${ext.enable}) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "${ext.name}", - .specVersion = ${ext.ext_version}, - }; - } - } + .${ext.name[3:]} = ${ext.enable}, %endfor +}; - return vk_outarray_status(&out); -} - -uint32_t -radv_physical_device_api_version(struct radv_physical_device *dev) -{ - return ${MAX_API_VERSION.c_vk_version()}; -} - -bool -radv_physical_device_extension_supported(struct radv_physical_device *device, - const char *name) +void radv_fill_device_extension_table(const struct radv_physical_device *device, + struct radv_device_extension_table* table) { %for ext in device_extensions: - if (strcmp(name, "${ext.name}") == 0) - return ${ext.enable}; + table->${ext.name[3:]} = ${ext.enable}; %endfor - return false; } -VkResult radv_EnumerateDeviceExtensionProperties( - VkPhysicalDevice physicalDevice, - const char* pLayerName, - uint32_t* pPropertyCount, - VkExtensionProperties* pProperties) +VkResult radv_EnumerateInstanceVersion( + uint32_t* pApiVersion) { - RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice); - VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); - (void)device; - -%for ext in device_extensions: - if (${ext.enable}) { - vk_outarray_append(&out, prop) { - *prop = (VkExtensionProperties) { - .extensionName = "${ext.name}", - .specVersion = ${ext.ext_version}, - }; - } - } -%endfor + *pApiVersion = ${MAX_API_VERSION.c_vk_version()}; + return VK_SUCCESS; +} - return vk_outarray_status(&out); +uint32_t +radv_physical_device_api_version(struct radv_physical_device *dev) +{ + if (!ANDROID && dev->rad_info.has_syncobj_wait_for_submit) + return VK_MAKE_VERSION(1, 1, 70); + return VK_MAKE_VERSION(1, 0, 68); } """) if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('--out', help='Output C file.', required=True) + parser.add_argument('--out-c', help='Output C file.', required=True) + parser.add_argument('--out-h', help='Output H file.', required=True) parser.add_argument('--xml', help='Vulkan API XML file.', required=True, @@ -274,5 +320,7 @@ if __name__ == '__main__': 'device_extensions': [e for e in EXTENSIONS if e.type == 'device'], } - with open(args.out, 'w') as f: - f.write(_TEMPLATE.render(**template_env)) + with open(args.out_c, 'w') as f: + f.write(_TEMPLATE_C.render(**template_env)) + with open(args.out_h, 'w') as f: + f.write(_TEMPLATE_H.render(**template_env)) diff --git a/lib/mesa/src/amd/vulkan/radv_icd.py b/lib/mesa/src/amd/vulkan/radv_icd.py new file mode 100644 index 000000000..cc86bbfa5 --- /dev/null +++ b/lib/mesa/src/amd/vulkan/radv_icd.py @@ -0,0 +1,47 @@ +# Copyright 2017 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sub license, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice (including the +# next paragraph) shall be included in all copies or substantial portions +# of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +# IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR +# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import json +import os.path + +from radv_extensions import * + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--out', help='Output json file.', required=True) + parser.add_argument('--lib-path', help='Path to libvulkan_radeon.so') + args = parser.parse_args() + + path = 'libvulkan_radeon.so' + if args.lib_path: + path = os.path.join(args.lib_path, path) + + json_data = { + 'file_format_version': '1.0.0', + 'ICD': { + 'library_path': path, + 'api_version': str(MAX_API_VERSION), + }, + } + + with open(args.out, 'w') as f: + json.dump(json_data, f, indent = 4, sort_keys=True, separators=(',', ': ')) diff --git a/lib/mesa/src/amd/vulkan/radv_llvm_helper.cpp b/lib/mesa/src/amd/vulkan/radv_llvm_helper.cpp new file mode 100644 index 000000000..ed05e1197 --- /dev/null +++ b/lib/mesa/src/amd/vulkan/radv_llvm_helper.cpp @@ -0,0 +1,140 @@ +/* + * Copyright © 2018 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "ac_llvm_util.h" +#include "ac_llvm_build.h" +#include "radv_shader_helper.h" + +#include <list> +class radv_llvm_per_thread_info { +public: + radv_llvm_per_thread_info(enum radeon_family arg_family, + enum ac_target_machine_options arg_tm_options) + : family(arg_family), tm_options(arg_tm_options) {} + + ~radv_llvm_per_thread_info() + { + ac_destroy_llvm_passes(passes); + ac_destroy_llvm_compiler(&llvm_info); + } + + bool init(void) + { + if (!ac_init_llvm_compiler(&llvm_info, + true, + family, + tm_options)) + return false; + + passes = ac_create_llvm_passes(llvm_info.tm); + if (!passes) + return false; + + return true; + } + + bool compile_to_memory_buffer(LLVMModuleRef module, + struct ac_shader_binary *binary) + { + return ac_compile_module_to_binary(passes, module, binary); + } + + bool is_same(enum radeon_family arg_family, + enum ac_target_machine_options arg_tm_options) { + if (arg_family == family && + arg_tm_options == tm_options) + return true; + return false; + } + struct ac_llvm_compiler llvm_info; +private: + enum radeon_family family; + enum ac_target_machine_options tm_options; + struct ac_compiler_passes *passes; +}; + +/* we have to store a linked list per thread due to the possiblity of multiple gpus being required */ +static thread_local std::list<radv_llvm_per_thread_info> radv_llvm_per_thread_list; + +bool radv_compile_to_binary(struct ac_llvm_compiler *info, + LLVMModuleRef module, + struct ac_shader_binary *binary) +{ + radv_llvm_per_thread_info *thread_info = nullptr; + + for (auto &I : radv_llvm_per_thread_list) { + if (I.llvm_info.tm == info->tm) { + thread_info = &I; + break; + } + } + + if (!thread_info) { + struct ac_compiler_passes *passes = ac_create_llvm_passes(info->tm); + bool ret = ac_compile_module_to_binary(passes, module, binary); + ac_destroy_llvm_passes(passes); + return ret; + } + + return thread_info->compile_to_memory_buffer(module, binary); +} + +bool radv_init_llvm_compiler(struct ac_llvm_compiler *info, + bool okay_to_leak_target_library_info, + bool thread_compiler, + enum radeon_family family, + enum ac_target_machine_options tm_options) +{ + if (thread_compiler) { + for (auto &I : radv_llvm_per_thread_list) { + if (I.is_same(family, tm_options)) { + *info = I.llvm_info; + return true; + } + } + + radv_llvm_per_thread_list.emplace_back(family, tm_options); + radv_llvm_per_thread_info &tinfo = radv_llvm_per_thread_list.back(); + + if (!tinfo.init()) { + radv_llvm_per_thread_list.pop_back(); + return false; + } + + *info = tinfo.llvm_info; + return true; + } + + if (!ac_init_llvm_compiler(info, + okay_to_leak_target_library_info, + family, + tm_options)) + return false; + return true; +} + +void radv_destroy_llvm_compiler(struct ac_llvm_compiler *info, + bool thread_compiler) +{ + if (!thread_compiler) + ac_destroy_llvm_compiler(info); +} diff --git a/lib/mesa/src/amd/vulkan/radv_nir_to_llvm.c b/lib/mesa/src/amd/vulkan/radv_nir_to_llvm.c new file mode 100644 index 000000000..8c21c4235 --- /dev/null +++ b/lib/mesa/src/amd/vulkan/radv_nir_to_llvm.c @@ -0,0 +1,3968 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * based in part on anv driver which is: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "radv_private.h" +#include "radv_shader.h" +#include "radv_shader_helper.h" +#include "nir/nir.h" + +#include <llvm-c/Core.h> +#include <llvm-c/TargetMachine.h> +#include <llvm-c/Transforms/Scalar.h> +#if HAVE_LLVM >= 0x0700 +#include <llvm-c/Transforms/Utils.h> +#endif + +#include "sid.h" +#include "gfx9d.h" +#include "ac_binary.h" +#include "ac_llvm_util.h" +#include "ac_llvm_build.h" +#include "ac_shader_abi.h" +#include "ac_shader_util.h" +#include "ac_exp_param.h" + +#define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1) + +struct radv_shader_context { + struct ac_llvm_context ac; + const struct radv_nir_compiler_options *options; + struct radv_shader_variant_info *shader_info; + struct ac_shader_abi abi; + + unsigned max_workgroup_size; + LLVMContextRef context; + LLVMValueRef main_function; + + LLVMValueRef descriptor_sets[RADV_UD_MAX_SETS]; + LLVMValueRef ring_offsets; + + LLVMValueRef vertex_buffers; + LLVMValueRef rel_auto_id; + LLVMValueRef vs_prim_id; + LLVMValueRef es2gs_offset; + + LLVMValueRef oc_lds; + LLVMValueRef merged_wave_info; + LLVMValueRef tess_factor_offset; + LLVMValueRef tes_rel_patch_id; + LLVMValueRef tes_u; + LLVMValueRef tes_v; + + LLVMValueRef gs2vs_offset; + LLVMValueRef gs_wave_id; + LLVMValueRef gs_vtx_offset[6]; + + LLVMValueRef esgs_ring; + LLVMValueRef gsvs_ring[4]; + LLVMValueRef hs_ring_tess_offchip; + LLVMValueRef hs_ring_tess_factor; + + LLVMValueRef persp_sample, persp_center, persp_centroid; + LLVMValueRef linear_sample, linear_center, linear_centroid; + + /* Streamout */ + LLVMValueRef streamout_buffers; + LLVMValueRef streamout_write_idx; + LLVMValueRef streamout_config; + LLVMValueRef streamout_offset[4]; + + gl_shader_stage stage; + + LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4]; + + uint64_t input_mask; + uint64_t output_mask; + + bool is_gs_copy_shader; + LLVMValueRef gs_next_vertex[4]; + unsigned gs_max_out_vertices; + + unsigned tes_primitive_mode; + + uint32_t tcs_patch_outputs_read; + uint64_t tcs_outputs_read; + uint32_t tcs_vertices_per_patch; + uint32_t tcs_num_inputs; + uint32_t tcs_num_patches; + uint32_t max_gsvs_emit_size; + uint32_t gsvs_vertex_size; +}; + +enum radeon_llvm_calling_convention { + RADEON_LLVM_AMDGPU_VS = 87, + RADEON_LLVM_AMDGPU_GS = 88, + RADEON_LLVM_AMDGPU_PS = 89, + RADEON_LLVM_AMDGPU_CS = 90, + RADEON_LLVM_AMDGPU_HS = 93, +}; + +static inline struct radv_shader_context * +radv_shader_context_from_abi(struct ac_shader_abi *abi) +{ + struct radv_shader_context *ctx = NULL; + return container_of(abi, ctx, abi); +} + +struct ac_build_if_state +{ + struct radv_shader_context *ctx; + LLVMValueRef condition; + LLVMBasicBlockRef entry_block; + LLVMBasicBlockRef true_block; + LLVMBasicBlockRef false_block; + LLVMBasicBlockRef merge_block; +}; + +static LLVMBasicBlockRef +ac_build_insert_new_block(struct radv_shader_context *ctx, const char *name) +{ + LLVMBasicBlockRef current_block; + LLVMBasicBlockRef next_block; + LLVMBasicBlockRef new_block; + + /* get current basic block */ + current_block = LLVMGetInsertBlock(ctx->ac.builder); + + /* chqeck if there's another block after this one */ + next_block = LLVMGetNextBasicBlock(current_block); + if (next_block) { + /* insert the new block before the next block */ + new_block = LLVMInsertBasicBlockInContext(ctx->context, next_block, name); + } + else { + /* append new block after current block */ + LLVMValueRef function = LLVMGetBasicBlockParent(current_block); + new_block = LLVMAppendBasicBlockInContext(ctx->context, function, name); + } + return new_block; +} + +static void +ac_nir_build_if(struct ac_build_if_state *ifthen, + struct radv_shader_context *ctx, + LLVMValueRef condition) +{ + LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->ac.builder); + + memset(ifthen, 0, sizeof *ifthen); + ifthen->ctx = ctx; + ifthen->condition = condition; + ifthen->entry_block = block; + + /* create endif/merge basic block for the phi functions */ + ifthen->merge_block = ac_build_insert_new_block(ctx, "endif-block"); + + /* create/insert true_block before merge_block */ + ifthen->true_block = + LLVMInsertBasicBlockInContext(ctx->context, + ifthen->merge_block, + "if-true-block"); + + /* successive code goes into the true block */ + LLVMPositionBuilderAtEnd(ctx->ac.builder, ifthen->true_block); +} + +/** + * End a conditional. + */ +static void +ac_nir_build_endif(struct ac_build_if_state *ifthen) +{ + LLVMBuilderRef builder = ifthen->ctx->ac.builder; + + /* Insert branch to the merge block from current block */ + LLVMBuildBr(builder, ifthen->merge_block); + + /* + * Now patch in the various branch instructions. + */ + + /* Insert the conditional branch instruction at the end of entry_block */ + LLVMPositionBuilderAtEnd(builder, ifthen->entry_block); + if (ifthen->false_block) { + /* we have an else clause */ + LLVMBuildCondBr(builder, ifthen->condition, + ifthen->true_block, ifthen->false_block); + } + else { + /* no else clause */ + LLVMBuildCondBr(builder, ifthen->condition, + ifthen->true_block, ifthen->merge_block); + } + + /* Resume building code at end of the ifthen->merge_block */ + LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); +} + + +static LLVMValueRef get_rel_patch_id(struct radv_shader_context *ctx) +{ + switch (ctx->stage) { + case MESA_SHADER_TESS_CTRL: + return ac_unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8); + case MESA_SHADER_TESS_EVAL: + return ctx->tes_rel_patch_id; + break; + default: + unreachable("Illegal stage"); + } +} + +static unsigned +get_tcs_num_patches(struct radv_shader_context *ctx) +{ + unsigned num_tcs_input_cp = ctx->options->key.tcs.input_vertices; + unsigned num_tcs_output_cp = ctx->tcs_vertices_per_patch; + uint32_t input_vertex_size = ctx->tcs_num_inputs * 16; + uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size; + uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written); + uint32_t num_tcs_patch_outputs = util_last_bit64(ctx->shader_info->info.tcs.patch_outputs_written); + uint32_t output_vertex_size = num_tcs_outputs * 16; + uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size; + uint32_t output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16; + unsigned num_patches; + unsigned hardware_lds_size; + + /* Ensure that we only need one wave per SIMD so we don't need to check + * resource usage. Also ensures that the number of tcs in and out + * vertices per threadgroup are at most 256. + */ + num_patches = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp) * 4; + /* Make sure that the data fits in LDS. This assumes the shaders only + * use LDS for the inputs and outputs. + */ + hardware_lds_size = ctx->options->chip_class >= CIK ? 65536 : 32768; + num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size)); + /* Make sure the output data fits in the offchip buffer */ + num_patches = MIN2(num_patches, (ctx->options->tess_offchip_block_dw_size * 4) / output_patch_size); + /* Not necessary for correctness, but improves performance. The + * specific value is taken from the proprietary driver. + */ + num_patches = MIN2(num_patches, 40); + + /* SI bug workaround - limit LS-HS threadgroups to only one wave. */ + if (ctx->options->chip_class == SI) { + unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp); + num_patches = MIN2(num_patches, one_wave); + } + return num_patches; +} + +static unsigned +calculate_tess_lds_size(struct radv_shader_context *ctx) +{ + unsigned num_tcs_input_cp = ctx->options->key.tcs.input_vertices; + unsigned num_tcs_output_cp; + unsigned num_tcs_outputs, num_tcs_patch_outputs; + unsigned input_vertex_size, output_vertex_size; + unsigned input_patch_size, output_patch_size; + unsigned pervertex_output_patch_size; + unsigned output_patch0_offset; + unsigned num_patches; + unsigned lds_size; + + num_tcs_output_cp = ctx->tcs_vertices_per_patch; + num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written); + num_tcs_patch_outputs = util_last_bit64(ctx->shader_info->info.tcs.patch_outputs_written); + + input_vertex_size = ctx->tcs_num_inputs * 16; + output_vertex_size = num_tcs_outputs * 16; + + input_patch_size = num_tcs_input_cp * input_vertex_size; + + pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size; + output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16; + + num_patches = ctx->tcs_num_patches; + output_patch0_offset = input_patch_size * num_patches; + + lds_size = output_patch0_offset + output_patch_size * num_patches; + return lds_size; +} + +/* Tessellation shaders pass outputs to the next shader using LDS. + * + * LS outputs = TCS inputs + * TCS outputs = TES inputs + * + * The LDS layout is: + * - TCS inputs for patch 0 + * - TCS inputs for patch 1 + * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2) + * - ... + * - TCS outputs for patch 0 = get_tcs_out_patch0_offset + * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset + * - TCS outputs for patch 1 + * - Per-patch TCS outputs for patch 1 + * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2) + * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2) + * - ... + * + * All three shaders VS(LS), TCS, TES share the same LDS space. + */ +static LLVMValueRef +get_tcs_in_patch_stride(struct radv_shader_context *ctx) +{ + assert (ctx->stage == MESA_SHADER_TESS_CTRL); + uint32_t input_vertex_size = ctx->tcs_num_inputs * 16; + uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size; + + input_patch_size /= 4; + return LLVMConstInt(ctx->ac.i32, input_patch_size, false); +} + +static LLVMValueRef +get_tcs_out_patch_stride(struct radv_shader_context *ctx) +{ + uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written); + uint32_t num_tcs_patch_outputs = util_last_bit64(ctx->shader_info->info.tcs.patch_outputs_written); + uint32_t output_vertex_size = num_tcs_outputs * 16; + uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size; + uint32_t output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16; + output_patch_size /= 4; + return LLVMConstInt(ctx->ac.i32, output_patch_size, false); +} + +static LLVMValueRef +get_tcs_out_vertex_stride(struct radv_shader_context *ctx) +{ + uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written); + uint32_t output_vertex_size = num_tcs_outputs * 16; + output_vertex_size /= 4; + return LLVMConstInt(ctx->ac.i32, output_vertex_size, false); +} + +static LLVMValueRef +get_tcs_out_patch0_offset(struct radv_shader_context *ctx) +{ + assert (ctx->stage == MESA_SHADER_TESS_CTRL); + uint32_t input_vertex_size = ctx->tcs_num_inputs * 16; + uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size; + uint32_t output_patch0_offset = input_patch_size; + unsigned num_patches = ctx->tcs_num_patches; + + output_patch0_offset *= num_patches; + output_patch0_offset /= 4; + return LLVMConstInt(ctx->ac.i32, output_patch0_offset, false); +} + +static LLVMValueRef +get_tcs_out_patch0_patch_data_offset(struct radv_shader_context *ctx) +{ + assert (ctx->stage == MESA_SHADER_TESS_CTRL); + uint32_t input_vertex_size = ctx->tcs_num_inputs * 16; + uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size; + uint32_t output_patch0_offset = input_patch_size; + + uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written); + uint32_t output_vertex_size = num_tcs_outputs * 16; + uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size; + unsigned num_patches = ctx->tcs_num_patches; + + output_patch0_offset *= num_patches; + output_patch0_offset += pervertex_output_patch_size; + output_patch0_offset /= 4; + return LLVMConstInt(ctx->ac.i32, output_patch0_offset, false); +} + +static LLVMValueRef +get_tcs_in_current_patch_offset(struct radv_shader_context *ctx) +{ + LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx); + LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); + + return LLVMBuildMul(ctx->ac.builder, patch_stride, rel_patch_id, ""); +} + +static LLVMValueRef +get_tcs_out_current_patch_offset(struct radv_shader_context *ctx) +{ + LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx); + LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx); + LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); + + return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, + patch0_offset); +} + +static LLVMValueRef +get_tcs_out_current_patch_data_offset(struct radv_shader_context *ctx) +{ + LLVMValueRef patch0_patch_data_offset = + get_tcs_out_patch0_patch_data_offset(ctx); + LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx); + LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); + + return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, + patch0_patch_data_offset); +} + +#define MAX_ARGS 64 +struct arg_info { + LLVMTypeRef types[MAX_ARGS]; + LLVMValueRef *assign[MAX_ARGS]; + unsigned array_params_mask; + uint8_t count; + uint8_t sgpr_count; + uint8_t num_sgprs_used; + uint8_t num_vgprs_used; +}; + +enum ac_arg_regfile { + ARG_SGPR, + ARG_VGPR, +}; + +static void +add_arg(struct arg_info *info, enum ac_arg_regfile regfile, LLVMTypeRef type, + LLVMValueRef *param_ptr) +{ + assert(info->count < MAX_ARGS); + + info->assign[info->count] = param_ptr; + info->types[info->count] = type; + info->count++; + + if (regfile == ARG_SGPR) { + info->num_sgprs_used += ac_get_type_size(type) / 4; + info->sgpr_count++; + } else { + assert(regfile == ARG_VGPR); + info->num_vgprs_used += ac_get_type_size(type) / 4; + } +} + +static inline void +add_array_arg(struct arg_info *info, LLVMTypeRef type, LLVMValueRef *param_ptr) +{ + info->array_params_mask |= (1 << info->count); + add_arg(info, ARG_SGPR, type, param_ptr); +} + +static void assign_arguments(LLVMValueRef main_function, + struct arg_info *info) +{ + unsigned i; + for (i = 0; i < info->count; i++) { + if (info->assign[i]) + *info->assign[i] = LLVMGetParam(main_function, i); + } +} + +static LLVMValueRef +create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, + LLVMBuilderRef builder, LLVMTypeRef *return_types, + unsigned num_return_elems, + struct arg_info *args, + unsigned max_workgroup_size, + const struct radv_nir_compiler_options *options) +{ + LLVMTypeRef main_function_type, ret_type; + LLVMBasicBlockRef main_function_body; + + if (num_return_elems) + ret_type = LLVMStructTypeInContext(ctx, return_types, + num_return_elems, true); + else + ret_type = LLVMVoidTypeInContext(ctx); + + /* Setup the function */ + main_function_type = + LLVMFunctionType(ret_type, args->types, args->count, 0); + LLVMValueRef main_function = + LLVMAddFunction(module, "main", main_function_type); + main_function_body = + LLVMAppendBasicBlockInContext(ctx, main_function, "main_body"); + LLVMPositionBuilderAtEnd(builder, main_function_body); + + LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS); + for (unsigned i = 0; i < args->sgpr_count; ++i) { + ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG); + + if (args->array_params_mask & (1 << i)) { + LLVMValueRef P = LLVMGetParam(main_function, i); + ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_NOALIAS); + ac_add_attr_dereferenceable(P, UINT64_MAX); + } + } + + if (options->address32_hi) { + ac_llvm_add_target_dep_function_attr(main_function, + "amdgpu-32bit-address-high-bits", + options->address32_hi); + } + + if (max_workgroup_size) { + ac_llvm_add_target_dep_function_attr(main_function, + "amdgpu-max-work-group-size", + max_workgroup_size); + } + if (options->unsafe_math) { + /* These were copied from some LLVM test. */ + LLVMAddTargetDependentFunctionAttr(main_function, + "less-precise-fpmad", + "true"); + LLVMAddTargetDependentFunctionAttr(main_function, + "no-infs-fp-math", + "true"); + LLVMAddTargetDependentFunctionAttr(main_function, + "no-nans-fp-math", + "true"); + LLVMAddTargetDependentFunctionAttr(main_function, + "unsafe-fp-math", + "true"); + LLVMAddTargetDependentFunctionAttr(main_function, + "no-signed-zeros-fp-math", + "true"); + } + return main_function; +} + + +static void +set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, + uint8_t num_sgprs, bool indirect) +{ + ud_info->sgpr_idx = *sgpr_idx; + ud_info->num_sgprs = num_sgprs; + ud_info->indirect = indirect; + *sgpr_idx += num_sgprs; +} + +static void +set_loc_shader(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx, + uint8_t num_sgprs) +{ + struct radv_userdata_info *ud_info = + &ctx->shader_info->user_sgprs_locs.shader_data[idx]; + assert(ud_info); + + set_loc(ud_info, sgpr_idx, num_sgprs, false); +} + +static void +set_loc_shader_ptr(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx) +{ + bool use_32bit_pointers = HAVE_32BIT_POINTERS && + idx != AC_UD_SCRATCH_RING_OFFSETS; + + set_loc_shader(ctx, idx, sgpr_idx, use_32bit_pointers ? 1 : 2); +} + +static void +set_loc_desc(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx, + bool indirect) +{ + struct radv_userdata_locations *locs = + &ctx->shader_info->user_sgprs_locs; + struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx]; + assert(ud_info); + + set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2, indirect); + + if (!indirect) + locs->descriptor_sets_enabled |= 1 << idx; +} + +struct user_sgpr_info { + bool need_ring_offsets; + bool indirect_all_descriptor_sets; +}; + +static bool needs_view_index_sgpr(struct radv_shader_context *ctx, + gl_shader_stage stage) +{ + switch (stage) { + case MESA_SHADER_VERTEX: + if (ctx->shader_info->info.needs_multiview_view_index || + (!ctx->options->key.vs.as_es && !ctx->options->key.vs.as_ls && ctx->options->key.has_multiview_view_index)) + return true; + break; + case MESA_SHADER_TESS_EVAL: + if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.tes.as_es && ctx->options->key.has_multiview_view_index)) + return true; + break; + case MESA_SHADER_GEOMETRY: + case MESA_SHADER_TESS_CTRL: + if (ctx->shader_info->info.needs_multiview_view_index) + return true; + break; + default: + break; + } + return false; +} + +static uint8_t +count_vs_user_sgprs(struct radv_shader_context *ctx) +{ + uint8_t count = 0; + + if (ctx->shader_info->info.vs.has_vertex_buffers) + count += HAVE_32BIT_POINTERS ? 1 : 2; + count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2; + + return count; +} + +static void allocate_user_sgprs(struct radv_shader_context *ctx, + gl_shader_stage stage, + bool has_previous_stage, + gl_shader_stage previous_stage, + bool needs_view_index, + struct user_sgpr_info *user_sgpr_info) +{ + uint8_t user_sgpr_count = 0; + + memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info)); + + /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */ + if (stage == MESA_SHADER_GEOMETRY || + stage == MESA_SHADER_VERTEX || + stage == MESA_SHADER_TESS_CTRL || + stage == MESA_SHADER_TESS_EVAL || + ctx->is_gs_copy_shader) + user_sgpr_info->need_ring_offsets = true; + + if (stage == MESA_SHADER_FRAGMENT && + ctx->shader_info->info.ps.needs_sample_positions) + user_sgpr_info->need_ring_offsets = true; + + /* 2 user sgprs will nearly always be allocated for scratch/rings */ + if (ctx->options->supports_spill || user_sgpr_info->need_ring_offsets) { + user_sgpr_count += 2; + } + + switch (stage) { + case MESA_SHADER_COMPUTE: + if (ctx->shader_info->info.cs.uses_grid_size) + user_sgpr_count += 3; + break; + case MESA_SHADER_FRAGMENT: + user_sgpr_count += ctx->shader_info->info.ps.needs_sample_positions; + break; + case MESA_SHADER_VERTEX: + if (!ctx->is_gs_copy_shader) + user_sgpr_count += count_vs_user_sgprs(ctx); + break; + case MESA_SHADER_TESS_CTRL: + if (has_previous_stage) { + if (previous_stage == MESA_SHADER_VERTEX) + user_sgpr_count += count_vs_user_sgprs(ctx); + } + break; + case MESA_SHADER_TESS_EVAL: + break; + case MESA_SHADER_GEOMETRY: + if (has_previous_stage) { + if (previous_stage == MESA_SHADER_VERTEX) { + user_sgpr_count += count_vs_user_sgprs(ctx); + } + } + break; + default: + break; + } + + if (needs_view_index) + user_sgpr_count++; + + if (ctx->shader_info->info.loads_push_constants) + user_sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2; + + uint32_t available_sgprs = ctx->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16; + uint32_t remaining_sgprs = available_sgprs - user_sgpr_count; + uint32_t num_desc_set = + util_bitcount(ctx->shader_info->info.desc_set_used_mask); + + if (remaining_sgprs / (HAVE_32BIT_POINTERS ? 1 : 2) < num_desc_set) { + user_sgpr_info->indirect_all_descriptor_sets = true; + } +} + +static void +declare_global_input_sgprs(struct radv_shader_context *ctx, + gl_shader_stage stage, + bool has_previous_stage, + gl_shader_stage previous_stage, + const struct user_sgpr_info *user_sgpr_info, + struct arg_info *args, + LLVMValueRef *desc_sets) +{ + LLVMTypeRef type = ac_array_in_const32_addr_space(ctx->ac.i8); + unsigned num_sets = ctx->options->layout ? + ctx->options->layout->num_sets : 0; + unsigned stage_mask = 1 << stage; + + if (has_previous_stage) + stage_mask |= 1 << previous_stage; + + /* 1 for each descriptor set */ + if (!user_sgpr_info->indirect_all_descriptor_sets) { + for (unsigned i = 0; i < num_sets; ++i) { + if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) && + ctx->options->layout->set[i].layout->shader_stages & stage_mask) { + add_array_arg(args, type, + &ctx->descriptor_sets[i]); + } + } + } else { + add_array_arg(args, ac_array_in_const32_addr_space(type), desc_sets); + } + + if (ctx->shader_info->info.loads_push_constants) { + /* 1 for push constants and dynamic descriptors */ + add_array_arg(args, type, &ctx->abi.push_constants); + } + + if (ctx->shader_info->info.so.num_outputs) { + add_arg(args, ARG_SGPR, + ac_array_in_const32_addr_space(ctx->ac.v4i32), + &ctx->streamout_buffers); + } +} + +static void +declare_vs_specific_input_sgprs(struct radv_shader_context *ctx, + gl_shader_stage stage, + bool has_previous_stage, + gl_shader_stage previous_stage, + struct arg_info *args) +{ + if (!ctx->is_gs_copy_shader && + (stage == MESA_SHADER_VERTEX || + (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { + if (ctx->shader_info->info.vs.has_vertex_buffers) { + add_arg(args, ARG_SGPR, + ac_array_in_const32_addr_space(ctx->ac.v4i32), + &ctx->vertex_buffers); + } + add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex); + add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.start_instance); + if (ctx->shader_info->info.vs.needs_draw_id) { + add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.draw_id); + } + } +} + +static void +declare_vs_input_vgprs(struct radv_shader_context *ctx, struct arg_info *args) +{ + add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.vertex_id); + if (!ctx->is_gs_copy_shader) { + if (ctx->options->key.vs.as_ls) { + add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->rel_auto_id); + add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id); + } else { + add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id); + add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->vs_prim_id); + } + add_arg(args, ARG_VGPR, ctx->ac.i32, NULL); /* unused */ + } +} + +static void +declare_streamout_sgprs(struct radv_shader_context *ctx, gl_shader_stage stage, + struct arg_info *args) +{ + int i; + + /* Streamout SGPRs. */ + if (ctx->shader_info->info.so.num_outputs) { + assert(stage == MESA_SHADER_VERTEX || + stage == MESA_SHADER_TESS_EVAL); + + if (stage != MESA_SHADER_TESS_EVAL) { + add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->streamout_config); + } else { + args->assign[args->count - 1] = &ctx->streamout_config; + args->types[args->count - 1] = ctx->ac.i32; + } + + add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->streamout_write_idx); + } + + /* A streamout buffer offset is loaded if the stride is non-zero. */ + for (i = 0; i < 4; i++) { + if (!ctx->shader_info->info.so.strides[i]) + continue; + + add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->streamout_offset[i]); + } +} + +static void +declare_tes_input_vgprs(struct radv_shader_context *ctx, struct arg_info *args) +{ + add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_u); + add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_v); + add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->tes_rel_patch_id); + add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.tes_patch_id); +} + +static void +set_global_input_locs(struct radv_shader_context *ctx, gl_shader_stage stage, + bool has_previous_stage, gl_shader_stage previous_stage, + const struct user_sgpr_info *user_sgpr_info, + LLVMValueRef desc_sets, uint8_t *user_sgpr_idx) +{ + unsigned num_sets = ctx->options->layout ? + ctx->options->layout->num_sets : 0; + unsigned stage_mask = 1 << stage; + + if (has_previous_stage) + stage_mask |= 1 << previous_stage; + + if (!user_sgpr_info->indirect_all_descriptor_sets) { + for (unsigned i = 0; i < num_sets; ++i) { + if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) && + ctx->options->layout->set[i].layout->shader_stages & stage_mask) { + set_loc_desc(ctx, i, user_sgpr_idx, false); + } else + ctx->descriptor_sets[i] = NULL; + } + } else { + set_loc_shader_ptr(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, + user_sgpr_idx); + + for (unsigned i = 0; i < num_sets; ++i) { + if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) && + ctx->options->layout->set[i].layout->shader_stages & stage_mask) { + ctx->descriptor_sets[i] = + ac_build_load_to_sgpr(&ctx->ac, + desc_sets, + LLVMConstInt(ctx->ac.i32, i, false)); + + } else + ctx->descriptor_sets[i] = NULL; + } + ctx->shader_info->need_indirect_descriptor_sets = true; + } + + if (ctx->shader_info->info.loads_push_constants) { + set_loc_shader_ptr(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx); + } + + if (ctx->streamout_buffers) { + set_loc_shader_ptr(ctx, AC_UD_STREAMOUT_BUFFERS, + user_sgpr_idx); + } +} + +static void +set_vs_specific_input_locs(struct radv_shader_context *ctx, + gl_shader_stage stage, bool has_previous_stage, + gl_shader_stage previous_stage, + uint8_t *user_sgpr_idx) +{ + if (!ctx->is_gs_copy_shader && + (stage == MESA_SHADER_VERTEX || + (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { + if (ctx->shader_info->info.vs.has_vertex_buffers) { + set_loc_shader_ptr(ctx, AC_UD_VS_VERTEX_BUFFERS, + user_sgpr_idx); + } + + unsigned vs_num = 2; + if (ctx->shader_info->info.vs.needs_draw_id) + vs_num++; + + set_loc_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, + user_sgpr_idx, vs_num); + } +} + +static void set_llvm_calling_convention(LLVMValueRef func, + gl_shader_stage stage) +{ + enum radeon_llvm_calling_convention calling_conv; + + switch (stage) { + case MESA_SHADER_VERTEX: + case MESA_SHADER_TESS_EVAL: + calling_conv = RADEON_LLVM_AMDGPU_VS; + break; + case MESA_SHADER_GEOMETRY: + calling_conv = RADEON_LLVM_AMDGPU_GS; + break; + case MESA_SHADER_TESS_CTRL: + calling_conv = RADEON_LLVM_AMDGPU_HS; + break; + case MESA_SHADER_FRAGMENT: + calling_conv = RADEON_LLVM_AMDGPU_PS; + break; + case MESA_SHADER_COMPUTE: + calling_conv = RADEON_LLVM_AMDGPU_CS; + break; + default: + unreachable("Unhandle shader type"); + } + + LLVMSetFunctionCallConv(func, calling_conv); +} + +static void create_function(struct radv_shader_context *ctx, + gl_shader_stage stage, + bool has_previous_stage, + gl_shader_stage previous_stage) +{ + uint8_t user_sgpr_idx; + struct user_sgpr_info user_sgpr_info; + struct arg_info args = {}; + LLVMValueRef desc_sets; + bool needs_view_index = needs_view_index_sgpr(ctx, stage); + allocate_user_sgprs(ctx, stage, has_previous_stage, + previous_stage, needs_view_index, &user_sgpr_info); + + if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) { + add_arg(&args, ARG_SGPR, ac_array_in_const_addr_space(ctx->ac.v4i32), + &ctx->ring_offsets); + } + + switch (stage) { + case MESA_SHADER_COMPUTE: + declare_global_input_sgprs(ctx, stage, has_previous_stage, + previous_stage, &user_sgpr_info, + &args, &desc_sets); + + if (ctx->shader_info->info.cs.uses_grid_size) { + add_arg(&args, ARG_SGPR, ctx->ac.v3i32, + &ctx->abi.num_work_groups); + } + + for (int i = 0; i < 3; i++) { + ctx->abi.workgroup_ids[i] = NULL; + if (ctx->shader_info->info.cs.uses_block_id[i]) { + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->abi.workgroup_ids[i]); + } + } + + if (ctx->shader_info->info.cs.uses_local_invocation_idx) + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.tg_size); + add_arg(&args, ARG_VGPR, ctx->ac.v3i32, + &ctx->abi.local_invocation_ids); + break; + case MESA_SHADER_VERTEX: + declare_global_input_sgprs(ctx, stage, has_previous_stage, + previous_stage, &user_sgpr_info, + &args, &desc_sets); + declare_vs_specific_input_sgprs(ctx, stage, has_previous_stage, + previous_stage, &args); + + if (needs_view_index) + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->abi.view_index); + if (ctx->options->key.vs.as_es) { + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->es2gs_offset); + } else if (ctx->options->key.vs.as_ls) { + /* no extra parameters */ + } else { + declare_streamout_sgprs(ctx, stage, &args); + } + + declare_vs_input_vgprs(ctx, &args); + break; + case MESA_SHADER_TESS_CTRL: + if (has_previous_stage) { + // First 6 system regs + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->merged_wave_info); + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->tess_factor_offset); + + add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // scratch offset + add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown + add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown + + declare_global_input_sgprs(ctx, stage, + has_previous_stage, + previous_stage, + &user_sgpr_info, &args, + &desc_sets); + declare_vs_specific_input_sgprs(ctx, stage, + has_previous_stage, + previous_stage, &args); + + if (needs_view_index) + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->abi.view_index); + + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->abi.tcs_patch_id); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->abi.tcs_rel_ids); + + declare_vs_input_vgprs(ctx, &args); + } else { + declare_global_input_sgprs(ctx, stage, + has_previous_stage, + previous_stage, + &user_sgpr_info, &args, + &desc_sets); + + if (needs_view_index) + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->abi.view_index); + + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->tess_factor_offset); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->abi.tcs_patch_id); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->abi.tcs_rel_ids); + } + break; + case MESA_SHADER_TESS_EVAL: + declare_global_input_sgprs(ctx, stage, has_previous_stage, + previous_stage, &user_sgpr_info, + &args, &desc_sets); + + if (needs_view_index) + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->abi.view_index); + + if (ctx->options->key.tes.as_es) { + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); + add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->es2gs_offset); + } else { + add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); + declare_streamout_sgprs(ctx, stage, &args); + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); + } + declare_tes_input_vgprs(ctx, &args); + break; + case MESA_SHADER_GEOMETRY: + if (has_previous_stage) { + // First 6 system regs + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->gs2vs_offset); + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->merged_wave_info); + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); + + add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // scratch offset + add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown + add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown + + declare_global_input_sgprs(ctx, stage, + has_previous_stage, + previous_stage, + &user_sgpr_info, &args, + &desc_sets); + + if (previous_stage != MESA_SHADER_TESS_EVAL) { + declare_vs_specific_input_sgprs(ctx, stage, + has_previous_stage, + previous_stage, + &args); + } + + if (needs_view_index) + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->abi.view_index); + + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->gs_vtx_offset[0]); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->gs_vtx_offset[2]); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->abi.gs_prim_id); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->abi.gs_invocation_id); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->gs_vtx_offset[4]); + + if (previous_stage == MESA_SHADER_VERTEX) { + declare_vs_input_vgprs(ctx, &args); + } else { + declare_tes_input_vgprs(ctx, &args); + } + } else { + declare_global_input_sgprs(ctx, stage, + has_previous_stage, + previous_stage, + &user_sgpr_info, &args, + &desc_sets); + + if (needs_view_index) + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->abi.view_index); + + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->gs2vs_offset); + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->gs_wave_id); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->gs_vtx_offset[0]); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->gs_vtx_offset[1]); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->abi.gs_prim_id); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->gs_vtx_offset[2]); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->gs_vtx_offset[3]); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->gs_vtx_offset[4]); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->gs_vtx_offset[5]); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->abi.gs_invocation_id); + } + break; + case MESA_SHADER_FRAGMENT: + declare_global_input_sgprs(ctx, stage, has_previous_stage, + previous_stage, &user_sgpr_info, + &args, &desc_sets); + + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.prim_mask); + add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_sample); + add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_center); + add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_centroid); + add_arg(&args, ARG_VGPR, ctx->ac.v3i32, NULL); /* persp pull model */ + add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_sample); + add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_center); + add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_centroid); + add_arg(&args, ARG_VGPR, ctx->ac.f32, NULL); /* line stipple tex */ + add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[0]); + add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[1]); + add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[2]); + add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[3]); + add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.front_face); + add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.ancillary); + add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.sample_coverage); + add_arg(&args, ARG_VGPR, ctx->ac.i32, NULL); /* fixed pt */ + break; + default: + unreachable("Shader stage not implemented"); + } + + ctx->main_function = create_llvm_function( + ctx->context, ctx->ac.module, ctx->ac.builder, NULL, 0, &args, + ctx->max_workgroup_size, ctx->options); + set_llvm_calling_convention(ctx->main_function, stage); + + + ctx->shader_info->num_input_vgprs = 0; + ctx->shader_info->num_input_sgprs = ctx->options->supports_spill ? 2 : 0; + + ctx->shader_info->num_input_sgprs += args.num_sgprs_used; + + if (ctx->stage != MESA_SHADER_FRAGMENT) + ctx->shader_info->num_input_vgprs = args.num_vgprs_used; + + assign_arguments(ctx->main_function, &args); + + user_sgpr_idx = 0; + + if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) { + set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS, + &user_sgpr_idx); + if (ctx->options->supports_spill) { + ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", + LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST), + NULL, 0, AC_FUNC_ATTR_READNONE); + ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets, + ac_array_in_const_addr_space(ctx->ac.v4i32), ""); + } + } + + /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including + * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */ + if (has_previous_stage) + user_sgpr_idx = 0; + + set_global_input_locs(ctx, stage, has_previous_stage, previous_stage, + &user_sgpr_info, desc_sets, &user_sgpr_idx); + + switch (stage) { + case MESA_SHADER_COMPUTE: + if (ctx->shader_info->info.cs.uses_grid_size) { + set_loc_shader(ctx, AC_UD_CS_GRID_SIZE, + &user_sgpr_idx, 3); + } + break; + case MESA_SHADER_VERTEX: + set_vs_specific_input_locs(ctx, stage, has_previous_stage, + previous_stage, &user_sgpr_idx); + if (ctx->abi.view_index) + set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + break; + case MESA_SHADER_TESS_CTRL: + set_vs_specific_input_locs(ctx, stage, has_previous_stage, + previous_stage, &user_sgpr_idx); + if (ctx->abi.view_index) + set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + break; + case MESA_SHADER_TESS_EVAL: + if (ctx->abi.view_index) + set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + break; + case MESA_SHADER_GEOMETRY: + if (has_previous_stage) { + if (previous_stage == MESA_SHADER_VERTEX) + set_vs_specific_input_locs(ctx, stage, + has_previous_stage, + previous_stage, + &user_sgpr_idx); + } + if (ctx->abi.view_index) + set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + break; + case MESA_SHADER_FRAGMENT: + break; + default: + unreachable("Shader stage not implemented"); + } + + if (stage == MESA_SHADER_TESS_CTRL || + (stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_ls) || + /* GFX9 has the ESGS ring buffer in LDS. */ + (stage == MESA_SHADER_GEOMETRY && has_previous_stage)) { + ac_declare_lds_as_pointer(&ctx->ac); + } + + ctx->shader_info->num_user_sgprs = user_sgpr_idx; +} + + +static LLVMValueRef +radv_load_resource(struct ac_shader_abi *abi, LLVMValueRef index, + unsigned desc_set, unsigned binding) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set]; + struct radv_pipeline_layout *pipeline_layout = ctx->options->layout; + struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout; + unsigned base_offset = layout->binding[binding].offset; + LLVMValueRef offset, stride; + + if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || + layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { + unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start + + layout->binding[binding].dynamic_offset_offset; + desc_ptr = ctx->abi.push_constants; + base_offset = pipeline_layout->push_constant_size + 16 * idx; + stride = LLVMConstInt(ctx->ac.i32, 16, false); + } else + stride = LLVMConstInt(ctx->ac.i32, layout->binding[binding].size, false); + + offset = ac_build_imad(&ctx->ac, index, stride, + LLVMConstInt(ctx->ac.i32, base_offset, false)); + + desc_ptr = ac_build_gep0(&ctx->ac, desc_ptr, offset); + desc_ptr = ac_cast_ptr(&ctx->ac, desc_ptr, ctx->ac.v4i32); + LLVMSetMetadata(desc_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md); + + return desc_ptr; +} + + +/* The offchip buffer layout for TCS->TES is + * + * - attribute 0 of patch 0 vertex 0 + * - attribute 0 of patch 0 vertex 1 + * - attribute 0 of patch 0 vertex 2 + * ... + * - attribute 0 of patch 1 vertex 0 + * - attribute 0 of patch 1 vertex 1 + * ... + * - attribute 1 of patch 0 vertex 0 + * - attribute 1 of patch 0 vertex 1 + * ... + * - per patch attribute 0 of patch 0 + * - per patch attribute 0 of patch 1 + * ... + * + * Note that every attribute has 4 components. + */ +static LLVMValueRef get_non_vertex_index_offset(struct radv_shader_context *ctx) +{ + uint32_t num_patches = ctx->tcs_num_patches; + uint32_t num_tcs_outputs; + if (ctx->stage == MESA_SHADER_TESS_CTRL) + num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written); + else + num_tcs_outputs = ctx->options->key.tes.tcs_num_outputs; + + uint32_t output_vertex_size = num_tcs_outputs * 16; + uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size; + + return LLVMConstInt(ctx->ac.i32, pervertex_output_patch_size * num_patches, false); +} + +static LLVMValueRef calc_param_stride(struct radv_shader_context *ctx, + LLVMValueRef vertex_index) +{ + LLVMValueRef param_stride; + if (vertex_index) + param_stride = LLVMConstInt(ctx->ac.i32, ctx->tcs_vertices_per_patch * ctx->tcs_num_patches, false); + else + param_stride = LLVMConstInt(ctx->ac.i32, ctx->tcs_num_patches, false); + return param_stride; +} + +static LLVMValueRef get_tcs_tes_buffer_address(struct radv_shader_context *ctx, + LLVMValueRef vertex_index, + LLVMValueRef param_index) +{ + LLVMValueRef base_addr; + LLVMValueRef param_stride, constant16; + LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); + LLVMValueRef vertices_per_patch = LLVMConstInt(ctx->ac.i32, ctx->tcs_vertices_per_patch, false); + constant16 = LLVMConstInt(ctx->ac.i32, 16, false); + param_stride = calc_param_stride(ctx, vertex_index); + if (vertex_index) { + base_addr = ac_build_imad(&ctx->ac, rel_patch_id, + vertices_per_patch, vertex_index); + } else { + base_addr = rel_patch_id; + } + + base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, + LLVMBuildMul(ctx->ac.builder, param_index, + param_stride, ""), ""); + + base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, ""); + + if (!vertex_index) { + LLVMValueRef patch_data_offset = get_non_vertex_index_offset(ctx); + + base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, + patch_data_offset, ""); + } + return base_addr; +} + +static LLVMValueRef get_tcs_tes_buffer_address_params(struct radv_shader_context *ctx, + unsigned param, + unsigned const_index, + bool is_compact, + LLVMValueRef vertex_index, + LLVMValueRef indir_index) +{ + LLVMValueRef param_index; + + if (indir_index) + param_index = LLVMBuildAdd(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, param, false), + indir_index, ""); + else { + if (const_index && !is_compact) + param += const_index; + param_index = LLVMConstInt(ctx->ac.i32, param, false); + } + return get_tcs_tes_buffer_address(ctx, vertex_index, param_index); +} + +static LLVMValueRef +get_dw_address(struct radv_shader_context *ctx, + LLVMValueRef dw_addr, + unsigned param, + unsigned const_index, + bool compact_const_index, + LLVMValueRef vertex_index, + LLVMValueRef stride, + LLVMValueRef indir_index) + +{ + + if (vertex_index) { + dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, + LLVMBuildMul(ctx->ac.builder, + vertex_index, + stride, ""), ""); + } + + if (indir_index) + dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, + LLVMBuildMul(ctx->ac.builder, indir_index, + LLVMConstInt(ctx->ac.i32, 4, false), ""), ""); + else if (const_index && !compact_const_index) + dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, + LLVMConstInt(ctx->ac.i32, const_index * 4, false), ""); + + dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, + LLVMConstInt(ctx->ac.i32, param * 4, false), ""); + + if (const_index && compact_const_index) + dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, + LLVMConstInt(ctx->ac.i32, const_index, false), ""); + return dw_addr; +} + +static LLVMValueRef +load_tcs_varyings(struct ac_shader_abi *abi, + LLVMTypeRef type, + LLVMValueRef vertex_index, + LLVMValueRef indir_index, + unsigned const_index, + unsigned location, + unsigned driver_location, + unsigned component, + unsigned num_components, + bool is_patch, + bool is_compact, + bool load_input) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + LLVMValueRef dw_addr, stride; + LLVMValueRef value[4], result; + unsigned param = shader_io_get_unique_index(location); + + if (load_input) { + uint32_t input_vertex_size = (ctx->tcs_num_inputs * 16) / 4; + stride = LLVMConstInt(ctx->ac.i32, input_vertex_size, false); + dw_addr = get_tcs_in_current_patch_offset(ctx); + } else { + if (!is_patch) { + stride = get_tcs_out_vertex_stride(ctx); + dw_addr = get_tcs_out_current_patch_offset(ctx); + } else { + dw_addr = get_tcs_out_current_patch_data_offset(ctx); + stride = NULL; + } + } + + dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride, + indir_index); + + for (unsigned i = 0; i < num_components + component; i++) { + value[i] = ac_lds_load(&ctx->ac, dw_addr); + dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, + ctx->ac.i32_1, ""); + } + result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component); + return result; +} + +static void +store_tcs_output(struct ac_shader_abi *abi, + const nir_variable *var, + LLVMValueRef vertex_index, + LLVMValueRef param_index, + unsigned const_index, + LLVMValueRef src, + unsigned writemask) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + const unsigned location = var->data.location; + const unsigned component = var->data.location_frac; + const bool is_patch = var->data.patch; + const bool is_compact = var->data.compact; + LLVMValueRef dw_addr; + LLVMValueRef stride = NULL; + LLVMValueRef buf_addr = NULL; + unsigned param; + bool store_lds = true; + + if (is_patch) { + if (!(ctx->tcs_patch_outputs_read & (1U << (location - VARYING_SLOT_PATCH0)))) + store_lds = false; + } else { + if (!(ctx->tcs_outputs_read & (1ULL << location))) + store_lds = false; + } + + param = shader_io_get_unique_index(location); + if (location == VARYING_SLOT_CLIP_DIST0 && + is_compact && const_index > 3) { + const_index -= 3; + param++; + } + + if (!is_patch) { + stride = get_tcs_out_vertex_stride(ctx); + dw_addr = get_tcs_out_current_patch_offset(ctx); + } else { + dw_addr = get_tcs_out_current_patch_data_offset(ctx); + } + + dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride, + param_index); + buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact, + vertex_index, param_index); + + bool is_tess_factor = false; + if (location == VARYING_SLOT_TESS_LEVEL_INNER || + location == VARYING_SLOT_TESS_LEVEL_OUTER) + is_tess_factor = true; + + unsigned base = is_compact ? const_index : 0; + for (unsigned chan = 0; chan < 8; chan++) { + if (!(writemask & (1 << chan))) + continue; + LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component); + value = ac_to_integer(&ctx->ac, value); + value = LLVMBuildZExtOrBitCast(ctx->ac.builder, value, ctx->ac.i32, ""); + + if (store_lds || is_tess_factor) { + LLVMValueRef dw_addr_chan = + LLVMBuildAdd(ctx->ac.builder, dw_addr, + LLVMConstInt(ctx->ac.i32, chan, false), ""); + ac_lds_store(&ctx->ac, dw_addr_chan, value); + } + + if (!is_tess_factor && writemask != 0xF) + ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1, + buf_addr, ctx->oc_lds, + 4 * (base + chan), 1, 0, true, false); + } + + if (writemask == 0xF) { + ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, src, 4, + buf_addr, ctx->oc_lds, + (base * 4), 1, 0, true, false); + } +} + +static LLVMValueRef +load_tes_input(struct ac_shader_abi *abi, + LLVMTypeRef type, + LLVMValueRef vertex_index, + LLVMValueRef param_index, + unsigned const_index, + unsigned location, + unsigned driver_location, + unsigned component, + unsigned num_components, + bool is_patch, + bool is_compact, + bool load_input) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + LLVMValueRef buf_addr; + LLVMValueRef result; + unsigned param = shader_io_get_unique_index(location); + + if (location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 3) { + const_index -= 3; + param++; + } + + buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, + is_compact, vertex_index, param_index); + + LLVMValueRef comp_offset = LLVMConstInt(ctx->ac.i32, component * 4, false); + buf_addr = LLVMBuildAdd(ctx->ac.builder, buf_addr, comp_offset, ""); + + result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, num_components, NULL, + buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false); + result = ac_trim_vector(&ctx->ac, result, num_components); + return result; +} + +static LLVMValueRef +load_gs_input(struct ac_shader_abi *abi, + unsigned location, + unsigned driver_location, + unsigned component, + unsigned num_components, + unsigned vertex_index, + unsigned const_index, + LLVMTypeRef type) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + LLVMValueRef vtx_offset; + unsigned param, vtx_offset_param; + LLVMValueRef value[4], result; + + vtx_offset_param = vertex_index; + assert(vtx_offset_param < 6); + vtx_offset = LLVMBuildMul(ctx->ac.builder, ctx->gs_vtx_offset[vtx_offset_param], + LLVMConstInt(ctx->ac.i32, 4, false), ""); + + param = shader_io_get_unique_index(location); + + for (unsigned i = component; i < num_components + component; i++) { + if (ctx->ac.chip_class >= GFX9) { + LLVMValueRef dw_addr = ctx->gs_vtx_offset[vtx_offset_param]; + dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, + LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), ""); + value[i] = ac_lds_load(&ctx->ac, dw_addr); + } else { + LLVMValueRef soffset = + LLVMConstInt(ctx->ac.i32, + (param * 4 + i + const_index) * 256, + false); + + value[i] = ac_build_buffer_load(&ctx->ac, + ctx->esgs_ring, 1, + ctx->ac.i32_0, + vtx_offset, soffset, + 0, 1, 0, true, false); + } + + if (ac_get_type_size(type) == 2) { + value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], ctx->ac.i32, ""); + value[i] = LLVMBuildTrunc(ctx->ac.builder, value[i], ctx->ac.i16, ""); + } + value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, ""); + } + result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component); + result = ac_to_integer(&ctx->ac, result); + return result; +} + + +static void radv_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + ac_build_kill_if_false(&ctx->ac, visible); +} + +static LLVMValueRef lookup_interp_param(struct ac_shader_abi *abi, + enum glsl_interp_mode interp, unsigned location) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + + switch (interp) { + case INTERP_MODE_FLAT: + default: + return NULL; + case INTERP_MODE_SMOOTH: + case INTERP_MODE_NONE: + if (location == INTERP_CENTER) + return ctx->persp_center; + else if (location == INTERP_CENTROID) + return ctx->persp_centroid; + else if (location == INTERP_SAMPLE) + return ctx->persp_sample; + break; + case INTERP_MODE_NOPERSPECTIVE: + if (location == INTERP_CENTER) + return ctx->linear_center; + else if (location == INTERP_CENTROID) + return ctx->linear_centroid; + else if (location == INTERP_SAMPLE) + return ctx->linear_sample; + break; + } + return NULL; +} + +static uint32_t +radv_get_sample_pos_offset(uint32_t num_samples) +{ + uint32_t sample_pos_offset = 0; + + switch (num_samples) { + case 2: + sample_pos_offset = 1; + break; + case 4: + sample_pos_offset = 3; + break; + case 8: + sample_pos_offset = 7; + break; + case 16: + sample_pos_offset = 15; + break; + default: + break; + } + return sample_pos_offset; +} + +static LLVMValueRef load_sample_position(struct ac_shader_abi *abi, + LLVMValueRef sample_id) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + + LLVMValueRef result; + LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_PS_SAMPLE_POSITIONS, false)); + + ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, + ac_array_in_const_addr_space(ctx->ac.v2f32), ""); + + uint32_t sample_pos_offset = + radv_get_sample_pos_offset(ctx->options->key.fs.num_samples); + + sample_id = + LLVMBuildAdd(ctx->ac.builder, sample_id, + LLVMConstInt(ctx->ac.i32, sample_pos_offset, false), ""); + result = ac_build_load_invariant(&ctx->ac, ptr, sample_id); + + return result; +} + + +static LLVMValueRef load_sample_mask_in(struct ac_shader_abi *abi) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + uint8_t log2_ps_iter_samples; + + if (ctx->shader_info->info.ps.force_persample) { + log2_ps_iter_samples = + util_logbase2(ctx->options->key.fs.num_samples); + } else { + log2_ps_iter_samples = ctx->options->key.fs.log2_ps_iter_samples; + } + + /* The bit pattern matches that used by fixed function fragment + * processing. */ + static const uint16_t ps_iter_masks[] = { + 0xffff, /* not used */ + 0x5555, + 0x1111, + 0x0101, + 0x0001, + }; + assert(log2_ps_iter_samples < ARRAY_SIZE(ps_iter_masks)); + + uint32_t ps_iter_mask = ps_iter_masks[log2_ps_iter_samples]; + + LLVMValueRef result, sample_id; + sample_id = ac_unpack_param(&ctx->ac, abi->ancillary, 8, 4); + sample_id = LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, ps_iter_mask, false), sample_id, ""); + result = LLVMBuildAnd(ctx->ac.builder, sample_id, abi->sample_coverage, ""); + return result; +} + + +static void +visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs) +{ + LLVMValueRef gs_next_vertex; + LLVMValueRef can_emit; + unsigned offset = 0; + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + + /* Write vertex attribute values to GSVS ring */ + gs_next_vertex = LLVMBuildLoad(ctx->ac.builder, + ctx->gs_next_vertex[stream], + ""); + + /* If this thread has already emitted the declared maximum number of + * vertices, kill it: excessive vertex emissions are not supposed to + * have any effect, and GS threads have no externally observable + * effects other than emitting vertices. + */ + can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex, + LLVMConstInt(ctx->ac.i32, ctx->gs_max_out_vertices, false), ""); + ac_build_kill_if_false(&ctx->ac, can_emit); + + for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { + unsigned output_usage_mask = + ctx->shader_info->info.gs.output_usage_mask[i]; + uint8_t output_stream = + ctx->shader_info->info.gs.output_streams[i]; + LLVMValueRef *out_ptr = &addrs[i * 4]; + int length = util_last_bit(output_usage_mask); + + if (!(ctx->output_mask & (1ull << i)) || + output_stream != stream) + continue; + + for (unsigned j = 0; j < length; j++) { + if (!(output_usage_mask & (1 << j))) + continue; + + LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, + out_ptr[j], ""); + LLVMValueRef voffset = + LLVMConstInt(ctx->ac.i32, offset * + ctx->gs_max_out_vertices, false); + + offset++; + + voffset = LLVMBuildAdd(ctx->ac.builder, voffset, gs_next_vertex, ""); + voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), ""); + + out_val = ac_to_integer(&ctx->ac, out_val); + out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, ""); + + ac_build_buffer_store_dword(&ctx->ac, + ctx->gsvs_ring[stream], + out_val, 1, + voffset, ctx->gs2vs_offset, 0, + 1, 1, true, true); + } + } + + gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex, + ctx->ac.i32_1, ""); + LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]); + + ac_build_sendmsg(&ctx->ac, + AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8), + ctx->gs_wave_id); +} + +static void +visit_end_primitive(struct ac_shader_abi *abi, unsigned stream) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8), ctx->gs_wave_id); +} + +static LLVMValueRef +load_tess_coord(struct ac_shader_abi *abi) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + + LLVMValueRef coord[4] = { + ctx->tes_u, + ctx->tes_v, + ctx->ac.f32_0, + ctx->ac.f32_0, + }; + + if (ctx->tes_primitive_mode == GL_TRIANGLES) + coord[2] = LLVMBuildFSub(ctx->ac.builder, ctx->ac.f32_1, + LLVMBuildFAdd(ctx->ac.builder, coord[0], coord[1], ""), ""); + + return ac_build_gather_values(&ctx->ac, coord, 3); +} + +static LLVMValueRef +load_patch_vertices_in(struct ac_shader_abi *abi) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + return LLVMConstInt(ctx->ac.i32, ctx->options->key.tcs.input_vertices, false); +} + + +static LLVMValueRef radv_load_base_vertex(struct ac_shader_abi *abi) +{ + return abi->base_vertex; +} + +static LLVMValueRef radv_load_ssbo(struct ac_shader_abi *abi, + LLVMValueRef buffer_ptr, bool write) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + LLVMValueRef result; + + LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md); + + result = LLVMBuildLoad(ctx->ac.builder, buffer_ptr, ""); + LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md); + + return result; +} + +static LLVMValueRef radv_load_ubo(struct ac_shader_abi *abi, LLVMValueRef buffer_ptr) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + LLVMValueRef result; + + LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md); + + result = LLVMBuildLoad(ctx->ac.builder, buffer_ptr, ""); + LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md); + + return result; +} + +static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi, + unsigned descriptor_set, + unsigned base_index, + unsigned constant_index, + LLVMValueRef index, + enum ac_descriptor_type desc_type, + bool image, bool write, + bool bindless) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + LLVMValueRef list = ctx->descriptor_sets[descriptor_set]; + struct radv_descriptor_set_layout *layout = ctx->options->layout->set[descriptor_set].layout; + struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index; + unsigned offset = binding->offset; + unsigned stride = binding->size; + unsigned type_size; + LLVMBuilderRef builder = ctx->ac.builder; + LLVMTypeRef type; + + assert(base_index < layout->binding_count); + + switch (desc_type) { + case AC_DESC_IMAGE: + type = ctx->ac.v8i32; + type_size = 32; + break; + case AC_DESC_FMASK: + type = ctx->ac.v8i32; + offset += 32; + type_size = 32; + break; + case AC_DESC_SAMPLER: + type = ctx->ac.v4i32; + if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + offset += 64; + + type_size = 16; + break; + case AC_DESC_BUFFER: + type = ctx->ac.v4i32; + type_size = 16; + break; + default: + unreachable("invalid desc_type\n"); + } + + offset += constant_index * stride; + + if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset && + (!index || binding->immutable_samplers_equal)) { + if (binding->immutable_samplers_equal) + constant_index = 0; + + const uint32_t *samplers = radv_immutable_samplers(layout, binding); + + LLVMValueRef constants[] = { + LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 0], 0), + LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 1], 0), + LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 2], 0), + LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 3], 0), + }; + return ac_build_gather_values(&ctx->ac, constants, 4); + } + + assert(stride % type_size == 0); + + if (!index) + index = ctx->ac.i32_0; + + index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), ""); + + list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0)); + list = LLVMBuildPointerCast(builder, list, + ac_array_in_const32_addr_space(type), ""); + + return ac_build_load_to_sgpr(&ctx->ac, list, index); +} + +/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW. + * so we may need to fix it up. */ +static LLVMValueRef +adjust_vertex_fetch_alpha(struct radv_shader_context *ctx, + unsigned adjustment, + LLVMValueRef alpha) +{ + if (adjustment == RADV_ALPHA_ADJUST_NONE) + return alpha; + + LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0); + + if (adjustment == RADV_ALPHA_ADJUST_SSCALED) + alpha = LLVMBuildFPToUI(ctx->ac.builder, alpha, ctx->ac.i32, ""); + else + alpha = ac_to_integer(&ctx->ac, alpha); + + /* For the integer-like cases, do a natural sign extension. + * + * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0 + * and happen to contain 0, 1, 2, 3 as the two LSBs of the + * exponent. + */ + alpha = LLVMBuildShl(ctx->ac.builder, alpha, + adjustment == RADV_ALPHA_ADJUST_SNORM ? + LLVMConstInt(ctx->ac.i32, 7, 0) : c30, ""); + alpha = LLVMBuildAShr(ctx->ac.builder, alpha, c30, ""); + + /* Convert back to the right type. */ + if (adjustment == RADV_ALPHA_ADJUST_SNORM) { + LLVMValueRef clamp; + LLVMValueRef neg_one = LLVMConstReal(ctx->ac.f32, -1.0); + alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, ""); + clamp = LLVMBuildFCmp(ctx->ac.builder, LLVMRealULT, alpha, neg_one, ""); + alpha = LLVMBuildSelect(ctx->ac.builder, clamp, neg_one, alpha, ""); + } else if (adjustment == RADV_ALPHA_ADJUST_SSCALED) { + alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, ""); + } + + return alpha; +} + +static void +handle_vs_input_decl(struct radv_shader_context *ctx, + struct nir_variable *variable) +{ + LLVMValueRef t_list_ptr = ctx->vertex_buffers; + LLVMValueRef t_offset; + LLVMValueRef t_list; + LLVMValueRef input; + LLVMValueRef buffer_index; + unsigned attrib_count = glsl_count_attribute_slots(variable->type, true); + uint8_t input_usage_mask = + ctx->shader_info->info.vs.input_usage_mask[variable->data.location]; + unsigned num_channels = util_last_bit(input_usage_mask); + + variable->data.driver_location = variable->data.location * 4; + + enum glsl_base_type type = glsl_get_base_type(variable->type); + for (unsigned i = 0; i < attrib_count; ++i) { + LLVMValueRef output[4]; + unsigned attrib_index = variable->data.location + i - VERT_ATTRIB_GENERIC0; + + if (ctx->options->key.vs.instance_rate_inputs & (1u << attrib_index)) { + uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[attrib_index]; + + if (divisor) { + buffer_index = ctx->abi.instance_id; + + if (divisor != 1) { + buffer_index = LLVMBuildUDiv(ctx->ac.builder, buffer_index, + LLVMConstInt(ctx->ac.i32, divisor, 0), ""); + } + + if (ctx->options->key.vs.as_ls) { + ctx->shader_info->vs.vgpr_comp_cnt = + MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt); + } else { + ctx->shader_info->vs.vgpr_comp_cnt = + MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt); + } + } else { + buffer_index = ctx->ac.i32_0; + } + + buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.start_instance, buffer_index, ""); + } else + buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id, + ctx->abi.base_vertex, ""); + t_offset = LLVMConstInt(ctx->ac.i32, attrib_index, false); + + t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset); + + input = ac_build_buffer_load_format(&ctx->ac, t_list, + buffer_index, + ctx->ac.i32_0, + num_channels, false, true); + + input = ac_build_expand_to_vec4(&ctx->ac, input, num_channels); + + for (unsigned chan = 0; chan < 4; chan++) { + LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); + output[chan] = LLVMBuildExtractElement(ctx->ac.builder, input, llvm_chan, ""); + if (type == GLSL_TYPE_FLOAT16) { + output[chan] = LLVMBuildBitCast(ctx->ac.builder, output[chan], ctx->ac.f32, ""); + output[chan] = LLVMBuildFPTrunc(ctx->ac.builder, output[chan], ctx->ac.f16, ""); + } + } + + unsigned alpha_adjust = (ctx->options->key.vs.alpha_adjust >> (attrib_index * 2)) & 3; + output[3] = adjust_vertex_fetch_alpha(ctx, alpha_adjust, output[3]); + + for (unsigned chan = 0; chan < 4; chan++) { + output[chan] = ac_to_integer(&ctx->ac, output[chan]); + if (type == GLSL_TYPE_UINT16 || type == GLSL_TYPE_INT16) + output[chan] = LLVMBuildTrunc(ctx->ac.builder, output[chan], ctx->ac.i16, ""); + + ctx->inputs[ac_llvm_reg_index_soa(variable->data.location + i, chan)] = output[chan]; + } + } +} + +static void interp_fs_input(struct radv_shader_context *ctx, + unsigned attr, + LLVMValueRef interp_param, + LLVMValueRef prim_mask, + LLVMValueRef result[4]) +{ + LLVMValueRef attr_number; + unsigned chan; + LLVMValueRef i, j; + bool interp = !LLVMIsUndef(interp_param); + + attr_number = LLVMConstInt(ctx->ac.i32, attr, false); + + /* fs.constant returns the param from the middle vertex, so it's not + * really useful for flat shading. It's meant to be used for custom + * interpolation (but the intrinsic can't fetch from the other two + * vertices). + * + * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state + * to do the right thing. The only reason we use fs.constant is that + * fs.interp cannot be used on integers, because they can be equal + * to NaN. + */ + if (interp) { + interp_param = LLVMBuildBitCast(ctx->ac.builder, interp_param, + ctx->ac.v2f32, ""); + + i = LLVMBuildExtractElement(ctx->ac.builder, interp_param, + ctx->ac.i32_0, ""); + j = LLVMBuildExtractElement(ctx->ac.builder, interp_param, + ctx->ac.i32_1, ""); + } + + for (chan = 0; chan < 4; chan++) { + LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); + + if (interp) { + result[chan] = ac_build_fs_interp(&ctx->ac, + llvm_chan, + attr_number, + prim_mask, i, j); + } else { + result[chan] = ac_build_fs_interp_mov(&ctx->ac, + LLVMConstInt(ctx->ac.i32, 2, false), + llvm_chan, + attr_number, + prim_mask); + result[chan] = LLVMBuildBitCast(ctx->ac.builder, result[chan], ctx->ac.i32, ""); + result[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, result[chan], LLVMTypeOf(interp_param), ""); + } + } +} + +static void +handle_fs_input_decl(struct radv_shader_context *ctx, + struct nir_variable *variable) +{ + int idx = variable->data.location; + unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); + LLVMValueRef interp = NULL; + uint64_t mask; + + variable->data.driver_location = idx * 4; + mask = ((1ull << attrib_count) - 1) << variable->data.location; + + if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) { + unsigned interp_type; + if (variable->data.sample) + interp_type = INTERP_SAMPLE; + else if (variable->data.centroid) + interp_type = INTERP_CENTROID; + else + interp_type = INTERP_CENTER; + + interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type); + } + bool is_16bit = glsl_type_is_16bit(variable->type); + LLVMTypeRef type = is_16bit ? ctx->ac.i16 : ctx->ac.i32; + if (interp == NULL) + interp = LLVMGetUndef(type); + + for (unsigned i = 0; i < attrib_count; ++i) + ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp; + + if (idx == VARYING_SLOT_CLIP_DIST0) { + /* Do not account for the number of components inside the array + * of clip/cull distances because this might wrongly set other + * bits like primitive ID or layer. + */ + mask = 1ull << VARYING_SLOT_CLIP_DIST0; + } + + ctx->input_mask |= mask; +} + +static void +handle_vs_inputs(struct radv_shader_context *ctx, + struct nir_shader *nir) { + nir_foreach_variable(variable, &nir->inputs) + handle_vs_input_decl(ctx, variable); +} + +static void +prepare_interp_optimize(struct radv_shader_context *ctx, + struct nir_shader *nir) +{ + bool uses_center = false; + bool uses_centroid = false; + nir_foreach_variable(variable, &nir->inputs) { + if (glsl_get_base_type(glsl_without_array(variable->type)) != GLSL_TYPE_FLOAT || + variable->data.sample) + continue; + + if (variable->data.centroid) + uses_centroid = true; + else + uses_center = true; + } + + if (uses_center && uses_centroid) { + LLVMValueRef sel = LLVMBuildICmp(ctx->ac.builder, LLVMIntSLT, ctx->abi.prim_mask, ctx->ac.i32_0, ""); + ctx->persp_centroid = LLVMBuildSelect(ctx->ac.builder, sel, ctx->persp_center, ctx->persp_centroid, ""); + ctx->linear_centroid = LLVMBuildSelect(ctx->ac.builder, sel, ctx->linear_center, ctx->linear_centroid, ""); + } +} + +static void +handle_fs_inputs(struct radv_shader_context *ctx, + struct nir_shader *nir) +{ + prepare_interp_optimize(ctx, nir); + + nir_foreach_variable(variable, &nir->inputs) + handle_fs_input_decl(ctx, variable); + + unsigned index = 0; + + if (ctx->shader_info->info.ps.uses_input_attachments || + ctx->shader_info->info.needs_multiview_view_index) { + ctx->input_mask |= 1ull << VARYING_SLOT_LAYER; + ctx->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)] = LLVMGetUndef(ctx->ac.i32); + } + + for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) { + LLVMValueRef interp_param; + LLVMValueRef *inputs = ctx->inputs +ac_llvm_reg_index_soa(i, 0); + + if (!(ctx->input_mask & (1ull << i))) + continue; + + if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC || + i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) { + interp_param = *inputs; + interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask, + inputs); + + if (LLVMIsUndef(interp_param)) + ctx->shader_info->fs.flat_shaded_mask |= 1u << index; + if (i >= VARYING_SLOT_VAR0) + ctx->abi.fs_input_attr_indices[i - VARYING_SLOT_VAR0] = index; + ++index; + } else if (i == VARYING_SLOT_CLIP_DIST0) { + int length = ctx->shader_info->info.ps.num_input_clips_culls; + + for (unsigned j = 0; j < length; j += 4) { + inputs = ctx->inputs + ac_llvm_reg_index_soa(i, j); + + interp_param = *inputs; + interp_fs_input(ctx, index, interp_param, + ctx->abi.prim_mask, inputs); + ++index; + } + } else if (i == VARYING_SLOT_POS) { + for(int i = 0; i < 3; ++i) + inputs[i] = ctx->abi.frag_pos[i]; + + inputs[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, + ctx->abi.frag_pos[3]); + } + } + ctx->shader_info->fs.num_interp = index; + ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0; + + if (ctx->shader_info->info.needs_multiview_view_index) + ctx->abi.view_index = ctx->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]; +} + +static void +scan_shader_output_decl(struct radv_shader_context *ctx, + struct nir_variable *variable, + struct nir_shader *shader, + gl_shader_stage stage) +{ + int idx = variable->data.location + variable->data.index; + unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); + uint64_t mask_attribs; + + variable->data.driver_location = idx * 4; + + /* tess ctrl has it's own load/store paths for outputs */ + if (stage == MESA_SHADER_TESS_CTRL) + return; + + mask_attribs = ((1ull << attrib_count) - 1) << idx; + if (stage == MESA_SHADER_VERTEX || + stage == MESA_SHADER_TESS_EVAL || + stage == MESA_SHADER_GEOMETRY) { + if (idx == VARYING_SLOT_CLIP_DIST0) { + if (stage == MESA_SHADER_VERTEX) { + ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1; + ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1; + ctx->shader_info->vs.outinfo.cull_dist_mask <<= shader->info.clip_distance_array_size; + } + if (stage == MESA_SHADER_TESS_EVAL) { + ctx->shader_info->tes.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1; + ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1; + ctx->shader_info->tes.outinfo.cull_dist_mask <<= shader->info.clip_distance_array_size; + } + + mask_attribs = 1ull << idx; + } + } + + ctx->output_mask |= mask_attribs; +} + + +/* Initialize arguments for the shader export intrinsic */ +static void +si_llvm_init_export_args(struct radv_shader_context *ctx, + LLVMValueRef *values, + unsigned enabled_channels, + unsigned target, + struct ac_export_args *args) +{ + /* Specify the channels that are enabled. */ + args->enabled_channels = enabled_channels; + + /* Specify whether the EXEC mask represents the valid mask */ + args->valid_mask = 0; + + /* Specify whether this is the last export */ + args->done = 0; + + /* Specify the target we are exporting */ + args->target = target; + + args->compr = false; + args->out[0] = LLVMGetUndef(ctx->ac.f32); + args->out[1] = LLVMGetUndef(ctx->ac.f32); + args->out[2] = LLVMGetUndef(ctx->ac.f32); + args->out[3] = LLVMGetUndef(ctx->ac.f32); + + if (!values) + return; + + bool is_16bit = ac_get_type_size(LLVMTypeOf(values[0])) == 2; + if (ctx->stage == MESA_SHADER_FRAGMENT) { + unsigned index = target - V_008DFC_SQ_EXP_MRT; + unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf; + bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1; + bool is_int10 = (ctx->options->key.fs.is_int10 >> index) & 1; + unsigned chan; + + LLVMValueRef (*packf)(struct ac_llvm_context *ctx, LLVMValueRef args[2]) = NULL; + LLVMValueRef (*packi)(struct ac_llvm_context *ctx, LLVMValueRef args[2], + unsigned bits, bool hi) = NULL; + + switch(col_format) { + case V_028714_SPI_SHADER_ZERO: + args->enabled_channels = 0; /* writemask */ + args->target = V_008DFC_SQ_EXP_NULL; + break; + + case V_028714_SPI_SHADER_32_R: + args->enabled_channels = 1; + args->out[0] = values[0]; + break; + + case V_028714_SPI_SHADER_32_GR: + args->enabled_channels = 0x3; + args->out[0] = values[0]; + args->out[1] = values[1]; + break; + + case V_028714_SPI_SHADER_32_AR: + args->enabled_channels = 0x9; + args->out[0] = values[0]; + args->out[3] = values[3]; + break; + + case V_028714_SPI_SHADER_FP16_ABGR: + args->enabled_channels = 0x5; + packf = ac_build_cvt_pkrtz_f16; + if (is_16bit) { + for (unsigned chan = 0; chan < 4; chan++) + values[chan] = LLVMBuildFPExt(ctx->ac.builder, + values[chan], + ctx->ac.f32, ""); + } + break; + + case V_028714_SPI_SHADER_UNORM16_ABGR: + args->enabled_channels = 0x5; + packf = ac_build_cvt_pknorm_u16; + break; + + case V_028714_SPI_SHADER_SNORM16_ABGR: + args->enabled_channels = 0x5; + packf = ac_build_cvt_pknorm_i16; + break; + + case V_028714_SPI_SHADER_UINT16_ABGR: + args->enabled_channels = 0x5; + packi = ac_build_cvt_pk_u16; + if (is_16bit) { + for (unsigned chan = 0; chan < 4; chan++) + values[chan] = LLVMBuildZExt(ctx->ac.builder, + values[chan], + ctx->ac.i32, ""); + } + break; + + case V_028714_SPI_SHADER_SINT16_ABGR: + args->enabled_channels = 0x5; + packi = ac_build_cvt_pk_i16; + if (is_16bit) { + for (unsigned chan = 0; chan < 4; chan++) + values[chan] = LLVMBuildSExt(ctx->ac.builder, + values[chan], + ctx->ac.i32, ""); + } + break; + + default: + case V_028714_SPI_SHADER_32_ABGR: + memcpy(&args->out[0], values, sizeof(values[0]) * 4); + break; + } + + /* Pack f16 or norm_i16/u16. */ + if (packf) { + for (chan = 0; chan < 2; chan++) { + LLVMValueRef pack_args[2] = { + values[2 * chan], + values[2 * chan + 1] + }; + LLVMValueRef packed; + + packed = packf(&ctx->ac, pack_args); + args->out[chan] = ac_to_float(&ctx->ac, packed); + } + args->compr = 1; /* COMPR flag */ + } + + /* Pack i16/u16. */ + if (packi) { + for (chan = 0; chan < 2; chan++) { + LLVMValueRef pack_args[2] = { + ac_to_integer(&ctx->ac, values[2 * chan]), + ac_to_integer(&ctx->ac, values[2 * chan + 1]) + }; + LLVMValueRef packed; + + packed = packi(&ctx->ac, pack_args, + is_int8 ? 8 : is_int10 ? 10 : 16, + chan == 1); + args->out[chan] = ac_to_float(&ctx->ac, packed); + } + args->compr = 1; /* COMPR flag */ + } + return; + } + + if (is_16bit) { + for (unsigned chan = 0; chan < 4; chan++) { + values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i16, ""); + args->out[chan] = LLVMBuildZExt(ctx->ac.builder, values[chan], ctx->ac.i32, ""); + } + } else + memcpy(&args->out[0], values, sizeof(values[0]) * 4); + + for (unsigned i = 0; i < 4; ++i) { + if (!(args->enabled_channels & (1 << i))) + continue; + + args->out[i] = ac_to_float(&ctx->ac, args->out[i]); + } +} + +static void +radv_export_param(struct radv_shader_context *ctx, unsigned index, + LLVMValueRef *values, unsigned enabled_channels) +{ + struct ac_export_args args; + + si_llvm_init_export_args(ctx, values, enabled_channels, + V_008DFC_SQ_EXP_PARAM + index, &args); + ac_build_export(&ctx->ac, &args); +} + +static LLVMValueRef +radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan) +{ + LLVMValueRef output = + ctx->abi.outputs[ac_llvm_reg_index_soa(index, chan)]; + + return LLVMBuildLoad(ctx->ac.builder, output, ""); +} + +static void +radv_emit_stream_output(struct radv_shader_context *ctx, + LLVMValueRef const *so_buffers, + LLVMValueRef const *so_write_offsets, + const struct radv_stream_output *output) +{ + unsigned num_comps = util_bitcount(output->component_mask); + unsigned loc = output->location; + unsigned buf = output->buffer; + unsigned offset = output->offset; + unsigned start; + LLVMValueRef out[4]; + + assert(num_comps && num_comps <= 4); + if (!num_comps || num_comps > 4) + return; + + /* Get the first component. */ + start = ffs(output->component_mask) - 1; + + /* Load the output as int. */ + for (int i = 0; i < num_comps; i++) { + out[i] = ac_to_integer(&ctx->ac, + radv_load_output(ctx, loc, start + i)); + } + + /* Pack the output. */ + LLVMValueRef vdata = NULL; + + switch (num_comps) { + case 1: /* as i32 */ + vdata = out[0]; + break; + case 2: /* as v2i32 */ + case 3: /* as v4i32 (aligned to 4) */ + out[3] = LLVMGetUndef(ctx->ac.i32); + /* fall through */ + case 4: /* as v4i32 */ + vdata = ac_build_gather_values(&ctx->ac, out, + util_next_power_of_two(num_comps)); + break; + } + + ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf], + vdata, num_comps, so_write_offsets[buf], + ctx->ac.i32_0, offset, + 1, 1, true, false); +} + +static void +radv_emit_streamout(struct radv_shader_context *ctx, unsigned stream) +{ + struct ac_build_if_state if_ctx; + int i; + + /* Get bits [22:16], i.e. (so_param >> 16) & 127; */ + assert(ctx->streamout_config); + LLVMValueRef so_vtx_count = + ac_build_bfe(&ctx->ac, ctx->streamout_config, + LLVMConstInt(ctx->ac.i32, 16, false), + LLVMConstInt(ctx->ac.i32, 7, false), false); + + LLVMValueRef tid = ac_get_thread_id(&ctx->ac); + + /* can_emit = tid < so_vtx_count; */ + LLVMValueRef can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, + tid, so_vtx_count, ""); + + /* Emit the streamout code conditionally. This actually avoids + * out-of-bounds buffer access. The hw tells us via the SGPR + * (so_vtx_count) which threads are allowed to emit streamout data. + */ + ac_nir_build_if(&if_ctx, ctx, can_emit); + { + /* The buffer offset is computed as follows: + * ByteOffset = streamout_offset[buffer_id]*4 + + * (streamout_write_index + thread_id)*stride[buffer_id] + + * attrib_offset + */ + LLVMValueRef so_write_index = ctx->streamout_write_idx; + + /* Compute (streamout_write_index + thread_id). */ + so_write_index = + LLVMBuildAdd(ctx->ac.builder, so_write_index, tid, ""); + + /* Load the descriptor and compute the write offset for each + * enabled buffer. + */ + LLVMValueRef so_write_offset[4] = {}; + LLVMValueRef so_buffers[4] = {}; + LLVMValueRef buf_ptr = ctx->streamout_buffers; + + for (i = 0; i < 4; i++) { + uint16_t stride = ctx->shader_info->info.so.strides[i]; + + if (!stride) + continue; + + LLVMValueRef offset = + LLVMConstInt(ctx->ac.i32, i, false); + + so_buffers[i] = ac_build_load_to_sgpr(&ctx->ac, + buf_ptr, offset); + + LLVMValueRef so_offset = ctx->streamout_offset[i]; + + so_offset = LLVMBuildMul(ctx->ac.builder, so_offset, + LLVMConstInt(ctx->ac.i32, 4, false), ""); + + so_write_offset[i] = + ac_build_imad(&ctx->ac, so_write_index, + LLVMConstInt(ctx->ac.i32, + stride * 4, false), + so_offset); + } + + /* Write streamout data. */ + for (i = 0; i < ctx->shader_info->info.so.num_outputs; i++) { + struct radv_stream_output *output = + &ctx->shader_info->info.so.outputs[i]; + + if (stream != output->stream) + continue; + + radv_emit_stream_output(ctx, so_buffers, + so_write_offset, output); + } + } + ac_nir_build_endif(&if_ctx); +} + +static void +handle_vs_outputs_post(struct radv_shader_context *ctx, + bool export_prim_id, bool export_layer_id, + struct radv_vs_output_info *outinfo) +{ + uint32_t param_count = 0; + unsigned target; + unsigned pos_idx, num_pos_exports = 0; + struct ac_export_args args, pos_args[4] = {}; + LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL; + int i; + + if (ctx->options->key.has_multiview_view_index) { + LLVMValueRef* tmp_out = &ctx->abi.outputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]; + if(!*tmp_out) { + for(unsigned i = 0; i < 4; ++i) + ctx->abi.outputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, i)] = + ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, ""); + } + + LLVMBuildStore(ctx->ac.builder, ac_to_float(&ctx->ac, ctx->abi.view_index), *tmp_out); + ctx->output_mask |= 1ull << VARYING_SLOT_LAYER; + } + + memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, + sizeof(outinfo->vs_output_param_offset)); + + if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) { + unsigned output_usage_mask, length; + LLVMValueRef slots[8]; + unsigned j; + + if (ctx->stage == MESA_SHADER_VERTEX && + !ctx->is_gs_copy_shader) { + output_usage_mask = + ctx->shader_info->info.vs.output_usage_mask[VARYING_SLOT_CLIP_DIST0]; + } else if (ctx->stage == MESA_SHADER_TESS_EVAL) { + output_usage_mask = + ctx->shader_info->info.tes.output_usage_mask[VARYING_SLOT_CLIP_DIST0]; + } else { + assert(ctx->is_gs_copy_shader); + output_usage_mask = + ctx->shader_info->info.gs.output_usage_mask[VARYING_SLOT_CLIP_DIST0]; + } + + length = util_last_bit(output_usage_mask); + + i = VARYING_SLOT_CLIP_DIST0; + for (j = 0; j < length; j++) + slots[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j)); + + for (i = length; i < 8; i++) + slots[i] = LLVMGetUndef(ctx->ac.f32); + + if (length > 4) { + target = V_008DFC_SQ_EXP_POS + 3; + si_llvm_init_export_args(ctx, &slots[4], 0xf, target, &args); + memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], + &args, sizeof(args)); + } + + target = V_008DFC_SQ_EXP_POS + 2; + si_llvm_init_export_args(ctx, &slots[0], 0xf, target, &args); + memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], + &args, sizeof(args)); + + /* Export the clip/cull distances values to the next stage. */ + radv_export_param(ctx, param_count, &slots[0], 0xf); + outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = param_count++; + if (length > 4) { + radv_export_param(ctx, param_count, &slots[4], 0xf); + outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = param_count++; + } + } + + LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_1}; + if (ctx->output_mask & (1ull << VARYING_SLOT_POS)) { + for (unsigned j = 0; j < 4; j++) + pos_values[j] = radv_load_output(ctx, VARYING_SLOT_POS, j); + } + si_llvm_init_export_args(ctx, pos_values, 0xf, V_008DFC_SQ_EXP_POS, &pos_args[0]); + + if (ctx->output_mask & (1ull << VARYING_SLOT_PSIZ)) { + outinfo->writes_pointsize = true; + psize_value = radv_load_output(ctx, VARYING_SLOT_PSIZ, 0); + } + + if (ctx->output_mask & (1ull << VARYING_SLOT_LAYER)) { + outinfo->writes_layer = true; + layer_value = radv_load_output(ctx, VARYING_SLOT_LAYER, 0); + } + + if (ctx->output_mask & (1ull << VARYING_SLOT_VIEWPORT)) { + outinfo->writes_viewport_index = true; + viewport_index_value = radv_load_output(ctx, VARYING_SLOT_VIEWPORT, 0); + } + + if (ctx->shader_info->info.so.num_outputs && + !ctx->is_gs_copy_shader) { + /* The GS copy shader emission already emits streamout. */ + radv_emit_streamout(ctx, 0); + } + + if (outinfo->writes_pointsize || + outinfo->writes_layer || + outinfo->writes_viewport_index) { + pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) | + (outinfo->writes_layer == true ? 4 : 0)); + pos_args[1].valid_mask = 0; + pos_args[1].done = 0; + pos_args[1].target = V_008DFC_SQ_EXP_POS + 1; + pos_args[1].compr = 0; + pos_args[1].out[0] = ctx->ac.f32_0; /* X */ + pos_args[1].out[1] = ctx->ac.f32_0; /* Y */ + pos_args[1].out[2] = ctx->ac.f32_0; /* Z */ + pos_args[1].out[3] = ctx->ac.f32_0; /* W */ + + if (outinfo->writes_pointsize == true) + pos_args[1].out[0] = psize_value; + if (outinfo->writes_layer == true) + pos_args[1].out[2] = layer_value; + if (outinfo->writes_viewport_index == true) { + if (ctx->options->chip_class >= GFX9) { + /* GFX9 has the layer in out.z[10:0] and the viewport + * index in out.z[19:16]. + */ + LLVMValueRef v = viewport_index_value; + v = ac_to_integer(&ctx->ac, v); + v = LLVMBuildShl(ctx->ac.builder, v, + LLVMConstInt(ctx->ac.i32, 16, false), + ""); + v = LLVMBuildOr(ctx->ac.builder, v, + ac_to_integer(&ctx->ac, pos_args[1].out[2]), ""); + + pos_args[1].out[2] = ac_to_float(&ctx->ac, v); + pos_args[1].enabled_channels |= 1 << 2; + } else { + pos_args[1].out[3] = viewport_index_value; + pos_args[1].enabled_channels |= 1 << 3; + } + } + } + for (i = 0; i < 4; i++) { + if (pos_args[i].out[0]) + num_pos_exports++; + } + + pos_idx = 0; + for (i = 0; i < 4; i++) { + if (!pos_args[i].out[0]) + continue; + + /* Specify the target we are exporting */ + pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++; + if (pos_idx == num_pos_exports) + pos_args[i].done = 1; + ac_build_export(&ctx->ac, &pos_args[i]); + } + + for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { + LLVMValueRef values[4]; + if (!(ctx->output_mask & (1ull << i))) + continue; + + if (i != VARYING_SLOT_LAYER && + i != VARYING_SLOT_PRIMITIVE_ID && + i < VARYING_SLOT_VAR0) + continue; + + for (unsigned j = 0; j < 4; j++) + values[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j)); + + unsigned output_usage_mask; + + if (ctx->stage == MESA_SHADER_VERTEX && + !ctx->is_gs_copy_shader) { + output_usage_mask = + ctx->shader_info->info.vs.output_usage_mask[i]; + } else if (ctx->stage == MESA_SHADER_TESS_EVAL) { + output_usage_mask = + ctx->shader_info->info.tes.output_usage_mask[i]; + } else { + assert(ctx->is_gs_copy_shader); + output_usage_mask = + ctx->shader_info->info.gs.output_usage_mask[i]; + } + + radv_export_param(ctx, param_count, values, output_usage_mask); + + outinfo->vs_output_param_offset[i] = param_count++; + } + + if (export_prim_id) { + LLVMValueRef values[4]; + + values[0] = ctx->vs_prim_id; + ctx->shader_info->vs.vgpr_comp_cnt = MAX2(2, + ctx->shader_info->vs.vgpr_comp_cnt); + for (unsigned j = 1; j < 4; j++) + values[j] = ctx->ac.f32_0; + + radv_export_param(ctx, param_count, values, 0x1); + + outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count++; + outinfo->export_prim_id = true; + } + + if (export_layer_id && layer_value) { + LLVMValueRef values[4]; + + values[0] = layer_value; + for (unsigned j = 1; j < 4; j++) + values[j] = ctx->ac.f32_0; + + radv_export_param(ctx, param_count, values, 0x1); + + outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count++; + } + + outinfo->pos_exports = num_pos_exports; + outinfo->param_exports = param_count; +} + +static void +handle_es_outputs_post(struct radv_shader_context *ctx, + struct radv_es_output_info *outinfo) +{ + int j; + uint64_t max_output_written = 0; + LLVMValueRef lds_base = NULL; + + for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { + unsigned output_usage_mask; + int param_index; + int length = 4; + + if (!(ctx->output_mask & (1ull << i))) + continue; + + if (ctx->stage == MESA_SHADER_VERTEX) { + output_usage_mask = + ctx->shader_info->info.vs.output_usage_mask[i]; + } else { + assert(ctx->stage == MESA_SHADER_TESS_EVAL); + output_usage_mask = + ctx->shader_info->info.tes.output_usage_mask[i]; + } + + if (i == VARYING_SLOT_CLIP_DIST0) + length = util_last_bit(output_usage_mask); + + param_index = shader_io_get_unique_index(i); + + max_output_written = MAX2(param_index + (length > 4), max_output_written); + } + + outinfo->esgs_itemsize = (max_output_written + 1) * 16; + + if (ctx->ac.chip_class >= GFX9) { + unsigned itemsize_dw = outinfo->esgs_itemsize / 4; + LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac); + LLVMValueRef wave_idx = ac_unpack_param(&ctx->ac, ctx->merged_wave_info, 24, 4); + vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx, + LLVMBuildMul(ctx->ac.builder, wave_idx, + LLVMConstInt(ctx->ac.i32, 64, false), ""), ""); + lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx, + LLVMConstInt(ctx->ac.i32, itemsize_dw, 0), ""); + } + + for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { + LLVMValueRef dw_addr = NULL; + LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4]; + unsigned output_usage_mask; + int param_index; + int length = 4; + + if (!(ctx->output_mask & (1ull << i))) + continue; + + if (ctx->stage == MESA_SHADER_VERTEX) { + output_usage_mask = + ctx->shader_info->info.vs.output_usage_mask[i]; + } else { + assert(ctx->stage == MESA_SHADER_TESS_EVAL); + output_usage_mask = + ctx->shader_info->info.tes.output_usage_mask[i]; + } + + if (i == VARYING_SLOT_CLIP_DIST0) + length = util_last_bit(output_usage_mask); + + param_index = shader_io_get_unique_index(i); + + if (lds_base) { + dw_addr = LLVMBuildAdd(ctx->ac.builder, lds_base, + LLVMConstInt(ctx->ac.i32, param_index * 4, false), + ""); + } + + for (j = 0; j < length; j++) { + if (!(output_usage_mask & (1 << j))) + continue; + + LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], ""); + out_val = ac_to_integer(&ctx->ac, out_val); + out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, ""); + + if (ctx->ac.chip_class >= GFX9) { + LLVMValueRef dw_addr_offset = + LLVMBuildAdd(ctx->ac.builder, dw_addr, + LLVMConstInt(ctx->ac.i32, + j, false), ""); + + ac_lds_store(&ctx->ac, dw_addr_offset, out_val); + } else { + ac_build_buffer_store_dword(&ctx->ac, + ctx->esgs_ring, + out_val, 1, + NULL, ctx->es2gs_offset, + (4 * param_index + j) * 4, + 1, 1, true, true); + } + } + } +} + +static void +handle_ls_outputs_post(struct radv_shader_context *ctx) +{ + LLVMValueRef vertex_id = ctx->rel_auto_id; + uint32_t num_tcs_inputs = util_last_bit64(ctx->shader_info->info.vs.ls_outputs_written); + LLVMValueRef vertex_dw_stride = LLVMConstInt(ctx->ac.i32, num_tcs_inputs * 4, false); + LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, + vertex_dw_stride, ""); + + for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { + unsigned output_usage_mask = + ctx->shader_info->info.vs.output_usage_mask[i]; + LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4]; + int length = 4; + + if (!(ctx->output_mask & (1ull << i))) + continue; + + if (i == VARYING_SLOT_CLIP_DIST0) + length = util_last_bit(output_usage_mask); + + int param = shader_io_get_unique_index(i); + LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr, + LLVMConstInt(ctx->ac.i32, param * 4, false), + ""); + for (unsigned j = 0; j < length; j++) { + LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], ""); + value = ac_to_integer(&ctx->ac, value); + value = LLVMBuildZExtOrBitCast(ctx->ac.builder, value, ctx->ac.i32, ""); + ac_lds_store(&ctx->ac, dw_addr, value); + dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, ctx->ac.i32_1, ""); + } + } +} + +static void +write_tess_factors(struct radv_shader_context *ctx) +{ + unsigned stride, outer_comps, inner_comps; + struct ac_build_if_state if_ctx, inner_if_ctx; + LLVMValueRef invocation_id = ac_unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 8, 5); + LLVMValueRef rel_patch_id = ac_unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8); + unsigned tess_inner_index = 0, tess_outer_index; + LLVMValueRef lds_base, lds_inner = NULL, lds_outer, byteoffset, buffer; + LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4]; + int i; + ac_emit_barrier(&ctx->ac, ctx->stage); + + switch (ctx->options->key.tcs.primitive_mode) { + case GL_ISOLINES: + stride = 2; + outer_comps = 2; + inner_comps = 0; + break; + case GL_TRIANGLES: + stride = 4; + outer_comps = 3; + inner_comps = 1; + break; + case GL_QUADS: + stride = 6; + outer_comps = 4; + inner_comps = 2; + break; + default: + return; + } + + ac_nir_build_if(&if_ctx, ctx, + LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, + invocation_id, ctx->ac.i32_0, "")); + + lds_base = get_tcs_out_current_patch_data_offset(ctx); + + if (inner_comps) { + tess_inner_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER); + lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base, + LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, false), ""); + } + + tess_outer_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER); + lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base, + LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, false), ""); + + for (i = 0; i < 4; i++) { + inner[i] = LLVMGetUndef(ctx->ac.i32); + outer[i] = LLVMGetUndef(ctx->ac.i32); + } + + // LINES reversal + if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) { + outer[0] = out[1] = ac_lds_load(&ctx->ac, lds_outer); + lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_outer, + ctx->ac.i32_1, ""); + outer[1] = out[0] = ac_lds_load(&ctx->ac, lds_outer); + } else { + for (i = 0; i < outer_comps; i++) { + outer[i] = out[i] = + ac_lds_load(&ctx->ac, lds_outer); + lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_outer, + ctx->ac.i32_1, ""); + } + for (i = 0; i < inner_comps; i++) { + inner[i] = out[outer_comps+i] = + ac_lds_load(&ctx->ac, lds_inner); + lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_inner, + ctx->ac.i32_1, ""); + } + } + + /* Convert the outputs to vectors for stores. */ + vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4)); + vec1 = NULL; + + if (stride > 4) + vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4); + + + buffer = ctx->hs_ring_tess_factor; + tf_base = ctx->tess_factor_offset; + byteoffset = LLVMBuildMul(ctx->ac.builder, rel_patch_id, + LLVMConstInt(ctx->ac.i32, 4 * stride, false), ""); + unsigned tf_offset = 0; + + if (ctx->options->chip_class <= VI) { + ac_nir_build_if(&inner_if_ctx, ctx, + LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, + rel_patch_id, ctx->ac.i32_0, "")); + + /* Store the dynamic HS control word. */ + ac_build_buffer_store_dword(&ctx->ac, buffer, + LLVMConstInt(ctx->ac.i32, 0x80000000, false), + 1, ctx->ac.i32_0, tf_base, + 0, 1, 0, true, false); + tf_offset += 4; + + ac_nir_build_endif(&inner_if_ctx); + } + + /* Store the tessellation factors. */ + ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, + MIN2(stride, 4), byteoffset, tf_base, + tf_offset, 1, 0, true, false); + if (vec1) + ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, + stride - 4, byteoffset, tf_base, + 16 + tf_offset, 1, 0, true, false); + + //store to offchip for TES to read - only if TES reads them + if (ctx->options->key.tcs.tes_reads_tess_factors) { + LLVMValueRef inner_vec, outer_vec, tf_outer_offset; + LLVMValueRef tf_inner_offset; + unsigned param_outer, param_inner; + + param_outer = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER); + tf_outer_offset = get_tcs_tes_buffer_address(ctx, NULL, + LLVMConstInt(ctx->ac.i32, param_outer, 0)); + + outer_vec = ac_build_gather_values(&ctx->ac, outer, + util_next_power_of_two(outer_comps)); + + ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec, + outer_comps, tf_outer_offset, + ctx->oc_lds, 0, 1, 0, true, false); + if (inner_comps) { + param_inner = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER); + tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL, + LLVMConstInt(ctx->ac.i32, param_inner, 0)); + + inner_vec = inner_comps == 1 ? inner[0] : + ac_build_gather_values(&ctx->ac, inner, inner_comps); + ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec, + inner_comps, tf_inner_offset, + ctx->oc_lds, 0, 1, 0, true, false); + } + } + ac_nir_build_endif(&if_ctx); +} + +static void +handle_tcs_outputs_post(struct radv_shader_context *ctx) +{ + write_tess_factors(ctx); +} + +static bool +si_export_mrt_color(struct radv_shader_context *ctx, + LLVMValueRef *color, unsigned index, + struct ac_export_args *args) +{ + /* Export */ + si_llvm_init_export_args(ctx, color, 0xf, + V_008DFC_SQ_EXP_MRT + index, args); + if (!args->enabled_channels) + return false; /* unnecessary NULL export */ + + return true; +} + +static void +radv_export_mrt_z(struct radv_shader_context *ctx, + LLVMValueRef depth, LLVMValueRef stencil, + LLVMValueRef samplemask) +{ + struct ac_export_args args; + + ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args); + + ac_build_export(&ctx->ac, &args); +} + +static void +handle_fs_outputs_post(struct radv_shader_context *ctx) +{ + unsigned index = 0; + LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; + struct ac_export_args color_args[8]; + + for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { + LLVMValueRef values[4]; + + if (!(ctx->output_mask & (1ull << i))) + continue; + + if (i < FRAG_RESULT_DATA0) + continue; + + for (unsigned j = 0; j < 4; j++) + values[j] = ac_to_float(&ctx->ac, + radv_load_output(ctx, i, j)); + + bool ret = si_export_mrt_color(ctx, values, + i - FRAG_RESULT_DATA0, + &color_args[index]); + if (ret) + index++; + } + + /* Process depth, stencil, samplemask. */ + if (ctx->shader_info->info.ps.writes_z) { + depth = ac_to_float(&ctx->ac, + radv_load_output(ctx, FRAG_RESULT_DEPTH, 0)); + } + if (ctx->shader_info->info.ps.writes_stencil) { + stencil = ac_to_float(&ctx->ac, + radv_load_output(ctx, FRAG_RESULT_STENCIL, 0)); + } + if (ctx->shader_info->info.ps.writes_sample_mask) { + samplemask = ac_to_float(&ctx->ac, + radv_load_output(ctx, FRAG_RESULT_SAMPLE_MASK, 0)); + } + + /* Set the DONE bit on last non-null color export only if Z isn't + * exported. + */ + if (index > 0 && + !ctx->shader_info->info.ps.writes_z && + !ctx->shader_info->info.ps.writes_stencil && + !ctx->shader_info->info.ps.writes_sample_mask) { + unsigned last = index - 1; + + color_args[last].valid_mask = 1; /* whether the EXEC mask is valid */ + color_args[last].done = 1; /* DONE bit */ + } + + /* Export PS outputs. */ + for (unsigned i = 0; i < index; i++) + ac_build_export(&ctx->ac, &color_args[i]); + + if (depth || stencil || samplemask) + radv_export_mrt_z(ctx, depth, stencil, samplemask); + else if (!index) + ac_build_export_null(&ctx->ac); +} + +static void +emit_gs_epilogue(struct radv_shader_context *ctx) +{ + ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, ctx->gs_wave_id); +} + +static void +handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs, + LLVMValueRef *addrs) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + + switch (ctx->stage) { + case MESA_SHADER_VERTEX: + if (ctx->options->key.vs.as_ls) + handle_ls_outputs_post(ctx); + else if (ctx->options->key.vs.as_es) + handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info); + else + handle_vs_outputs_post(ctx, ctx->options->key.vs.export_prim_id, + ctx->options->key.vs.export_layer_id, + &ctx->shader_info->vs.outinfo); + break; + case MESA_SHADER_FRAGMENT: + handle_fs_outputs_post(ctx); + break; + case MESA_SHADER_GEOMETRY: + emit_gs_epilogue(ctx); + break; + case MESA_SHADER_TESS_CTRL: + handle_tcs_outputs_post(ctx); + break; + case MESA_SHADER_TESS_EVAL: + if (ctx->options->key.tes.as_es) + handle_es_outputs_post(ctx, &ctx->shader_info->tes.es_info); + else + handle_vs_outputs_post(ctx, ctx->options->key.tes.export_prim_id, + ctx->options->key.tes.export_layer_id, + &ctx->shader_info->tes.outinfo); + break; + default: + break; + } +} + +static void ac_llvm_finalize_module(struct radv_shader_context *ctx, + LLVMPassManagerRef passmgr, + const struct radv_nir_compiler_options *options) +{ + LLVMRunPassManager(passmgr, ctx->ac.module); + LLVMDisposeBuilder(ctx->ac.builder); + + ac_llvm_context_dispose(&ctx->ac); +} + +static void +ac_nir_eliminate_const_vs_outputs(struct radv_shader_context *ctx) +{ + struct radv_vs_output_info *outinfo; + + switch (ctx->stage) { + case MESA_SHADER_FRAGMENT: + case MESA_SHADER_COMPUTE: + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_GEOMETRY: + return; + case MESA_SHADER_VERTEX: + if (ctx->options->key.vs.as_ls || + ctx->options->key.vs.as_es) + return; + outinfo = &ctx->shader_info->vs.outinfo; + break; + case MESA_SHADER_TESS_EVAL: + if (ctx->options->key.vs.as_es) + return; + outinfo = &ctx->shader_info->tes.outinfo; + break; + default: + unreachable("Unhandled shader type"); + } + + ac_optimize_vs_outputs(&ctx->ac, + ctx->main_function, + outinfo->vs_output_param_offset, + VARYING_SLOT_MAX, + &outinfo->param_exports); +} + +static void +ac_setup_rings(struct radv_shader_context *ctx) +{ + if (ctx->options->chip_class <= VI && + (ctx->stage == MESA_SHADER_GEOMETRY || + ctx->options->key.vs.as_es || ctx->options->key.tes.as_es)) { + unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS + : RING_ESGS_VS; + LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, ring, false); + + ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, + ctx->ring_offsets, + offset); + } + + if (ctx->is_gs_copy_shader) { + ctx->gsvs_ring[0] = + ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, + LLVMConstInt(ctx->ac.i32, + RING_GSVS_VS, false)); + } + + if (ctx->stage == MESA_SHADER_GEOMETRY) { + /* The conceptual layout of the GSVS ring is + * v0c0 .. vLv0 v0c1 .. vLc1 .. + * but the real memory layout is swizzled across + * threads: + * t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL + * t16v0c0 .. + * Override the buffer descriptor accordingly. + */ + LLVMTypeRef v2i64 = LLVMVectorType(ctx->ac.i64, 2); + uint64_t stream_offset = 0; + unsigned num_records = 64; + LLVMValueRef base_ring; + + base_ring = + ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, + LLVMConstInt(ctx->ac.i32, + RING_GSVS_GS, false)); + + for (unsigned stream = 0; stream < 4; stream++) { + unsigned num_components, stride; + LLVMValueRef ring, tmp; + + num_components = + ctx->shader_info->info.gs.num_stream_output_components[stream]; + + if (!num_components) + continue; + + stride = 4 * num_components * ctx->gs_max_out_vertices; + + /* Limit on the stride field for <= CIK. */ + assert(stride < (1 << 14)); + + ring = LLVMBuildBitCast(ctx->ac.builder, + base_ring, v2i64, ""); + tmp = LLVMBuildExtractElement(ctx->ac.builder, + ring, ctx->ac.i32_0, ""); + tmp = LLVMBuildAdd(ctx->ac.builder, tmp, + LLVMConstInt(ctx->ac.i64, + stream_offset, 0), ""); + ring = LLVMBuildInsertElement(ctx->ac.builder, + ring, tmp, ctx->ac.i32_0, ""); + + stream_offset += stride * 64; + + ring = LLVMBuildBitCast(ctx->ac.builder, ring, + ctx->ac.v4i32, ""); + + tmp = LLVMBuildExtractElement(ctx->ac.builder, ring, + ctx->ac.i32_1, ""); + tmp = LLVMBuildOr(ctx->ac.builder, tmp, + LLVMConstInt(ctx->ac.i32, + S_008F04_STRIDE(stride), false), ""); + ring = LLVMBuildInsertElement(ctx->ac.builder, ring, tmp, + ctx->ac.i32_1, ""); + + ring = LLVMBuildInsertElement(ctx->ac.builder, ring, + LLVMConstInt(ctx->ac.i32, + num_records, false), + LLVMConstInt(ctx->ac.i32, 2, false), ""); + + ctx->gsvs_ring[stream] = ring; + } + } + + if (ctx->stage == MESA_SHADER_TESS_CTRL || + ctx->stage == MESA_SHADER_TESS_EVAL) { + ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_OFFCHIP, false)); + ctx->hs_ring_tess_factor = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_FACTOR, false)); + } +} + +static unsigned +ac_nir_get_max_workgroup_size(enum chip_class chip_class, + const struct nir_shader *nir) +{ + switch (nir->info.stage) { + case MESA_SHADER_TESS_CTRL: + return chip_class >= CIK ? 128 : 64; + case MESA_SHADER_GEOMETRY: + return chip_class >= GFX9 ? 128 : 64; + case MESA_SHADER_COMPUTE: + break; + default: + return 0; + } + + unsigned max_workgroup_size = nir->info.cs.local_size[0] * + nir->info.cs.local_size[1] * + nir->info.cs.local_size[2]; + return max_workgroup_size; +} + +/* Fixup the HW not emitting the TCS regs if there are no HS threads. */ +static void ac_nir_fixup_ls_hs_input_vgprs(struct radv_shader_context *ctx) +{ + LLVMValueRef count = ac_unpack_param(&ctx->ac, ctx->merged_wave_info, 8, 8); + LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count, + ctx->ac.i32_0, ""); + ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->rel_auto_id, ctx->abi.instance_id, ""); + ctx->rel_auto_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_rel_ids, ctx->rel_auto_id, ""); + ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_patch_id, ctx->abi.vertex_id, ""); +} + +static void prepare_gs_input_vgprs(struct radv_shader_context *ctx) +{ + for(int i = 5; i >= 0; --i) { + ctx->gs_vtx_offset[i] = ac_unpack_param(&ctx->ac, ctx->gs_vtx_offset[i & ~1], + (i & 1) * 16, 16); + } + + ctx->gs_wave_id = ac_unpack_param(&ctx->ac, ctx->merged_wave_info, 16, 8); +} + + +static +LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, + struct nir_shader *const *shaders, + int shader_count, + struct radv_shader_variant_info *shader_info, + const struct radv_nir_compiler_options *options) +{ + struct radv_shader_context ctx = {0}; + unsigned i; + ctx.options = options; + ctx.shader_info = shader_info; + + ac_llvm_context_init(&ctx.ac, options->chip_class, options->family); + ctx.context = ctx.ac.context; + ctx.ac.module = ac_create_module(ac_llvm->tm, ctx.context); + + enum ac_float_mode float_mode = + options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH : + AC_FLOAT_MODE_DEFAULT; + + ctx.ac.builder = ac_create_builder(ctx.context, float_mode); + + memset(shader_info, 0, sizeof(*shader_info)); + + for(int i = 0; i < shader_count; ++i) + radv_nir_shader_info_pass(shaders[i], options, &shader_info->info); + + for (i = 0; i < RADV_UD_MAX_SETS; i++) + shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1; + for (i = 0; i < AC_UD_MAX_UD; i++) + shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1; + + ctx.max_workgroup_size = 0; + for (int i = 0; i < shader_count; ++i) { + ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size, + ac_nir_get_max_workgroup_size(ctx.options->chip_class, + shaders[i])); + } + + create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2, + shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX); + + ctx.abi.inputs = &ctx.inputs[0]; + ctx.abi.emit_outputs = handle_shader_outputs_post; + ctx.abi.emit_vertex = visit_emit_vertex; + ctx.abi.load_ubo = radv_load_ubo; + ctx.abi.load_ssbo = radv_load_ssbo; + ctx.abi.load_sampler_desc = radv_get_sampler_desc; + ctx.abi.load_resource = radv_load_resource; + ctx.abi.clamp_shadow_reference = false; + ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9; + + if (shader_count >= 2) + ac_init_exec_full_mask(&ctx.ac); + + if (ctx.ac.chip_class == GFX9 && + shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL) + ac_nir_fixup_ls_hs_input_vgprs(&ctx); + + for(int i = 0; i < shader_count; ++i) { + ctx.stage = shaders[i]->info.stage; + ctx.output_mask = 0; + + if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) { + for (int i = 0; i < 4; i++) { + ctx.gs_next_vertex[i] = + ac_build_alloca(&ctx.ac, ctx.ac.i32, ""); + } + ctx.gs_max_out_vertices = shaders[i]->info.gs.vertices_out; + ctx.abi.load_inputs = load_gs_input; + ctx.abi.emit_primitive = visit_end_primitive; + } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) { + ctx.tcs_outputs_read = shaders[i]->info.outputs_read; + ctx.tcs_patch_outputs_read = shaders[i]->info.patch_outputs_read; + ctx.abi.load_tess_varyings = load_tcs_varyings; + ctx.abi.load_patch_vertices_in = load_patch_vertices_in; + ctx.abi.store_tcs_outputs = store_tcs_output; + ctx.tcs_vertices_per_patch = shaders[i]->info.tess.tcs_vertices_out; + if (shader_count == 1) + ctx.tcs_num_inputs = ctx.options->key.tcs.num_inputs; + else + ctx.tcs_num_inputs = util_last_bit64(shader_info->info.vs.ls_outputs_written); + ctx.tcs_num_patches = get_tcs_num_patches(&ctx); + } else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) { + ctx.tes_primitive_mode = shaders[i]->info.tess.primitive_mode; + ctx.abi.load_tess_varyings = load_tes_input; + ctx.abi.load_tess_coord = load_tess_coord; + ctx.abi.load_patch_vertices_in = load_patch_vertices_in; + ctx.tcs_vertices_per_patch = shaders[i]->info.tess.tcs_vertices_out; + ctx.tcs_num_patches = ctx.options->key.tes.num_patches; + } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) { + if (shader_info->info.vs.needs_instance_id) { + if (ctx.options->key.vs.as_ls) { + ctx.shader_info->vs.vgpr_comp_cnt = + MAX2(2, ctx.shader_info->vs.vgpr_comp_cnt); + } else { + ctx.shader_info->vs.vgpr_comp_cnt = + MAX2(1, ctx.shader_info->vs.vgpr_comp_cnt); + } + } + ctx.abi.load_base_vertex = radv_load_base_vertex; + } else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) { + shader_info->fs.can_discard = shaders[i]->info.fs.uses_discard; + ctx.abi.lookup_interp_param = lookup_interp_param; + ctx.abi.load_sample_position = load_sample_position; + ctx.abi.load_sample_mask_in = load_sample_mask_in; + ctx.abi.emit_kill = radv_emit_kill; + } + + if (i) + ac_emit_barrier(&ctx.ac, ctx.stage); + + nir_foreach_variable(variable, &shaders[i]->outputs) + scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage); + + if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) { + unsigned addclip = shaders[i]->info.clip_distance_array_size + + shaders[i]->info.cull_distance_array_size > 4; + ctx.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16; + ctx.max_gsvs_emit_size = ctx.gsvs_vertex_size * + shaders[i]->info.gs.vertices_out; + } + + ac_setup_rings(&ctx); + + LLVMBasicBlockRef merge_block; + if (shader_count >= 2) { + LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder)); + LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, ""); + merge_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, ""); + + LLVMValueRef count = ac_unpack_param(&ctx.ac, ctx.merged_wave_info, 8 * i, 8); + LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac); + LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT, + thread_id, count, ""); + LLVMBuildCondBr(ctx.ac.builder, cond, then_block, merge_block); + + LLVMPositionBuilderAtEnd(ctx.ac.builder, then_block); + } + + if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) + handle_fs_inputs(&ctx, shaders[i]); + else if(shaders[i]->info.stage == MESA_SHADER_VERTEX) + handle_vs_inputs(&ctx, shaders[i]); + else if(shader_count >= 2 && shaders[i]->info.stage == MESA_SHADER_GEOMETRY) + prepare_gs_input_vgprs(&ctx); + + ac_nir_translate(&ctx.ac, &ctx.abi, shaders[i]); + + if (shader_count >= 2) { + LLVMBuildBr(ctx.ac.builder, merge_block); + LLVMPositionBuilderAtEnd(ctx.ac.builder, merge_block); + } + + if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) { + shader_info->gs.gsvs_vertex_size = ctx.gsvs_vertex_size; + shader_info->gs.max_gsvs_emit_size = ctx.max_gsvs_emit_size; + } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) { + shader_info->tcs.num_patches = ctx.tcs_num_patches; + shader_info->tcs.lds_size = calculate_tess_lds_size(&ctx); + } + } + + LLVMBuildRetVoid(ctx.ac.builder); + + if (options->dump_preoptir) + ac_dump_module(ctx.ac.module); + + ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, options); + + if (shader_count == 1) + ac_nir_eliminate_const_vs_outputs(&ctx); + + if (options->dump_shader) { + ctx.shader_info->private_mem_vgprs = + ac_count_scratch_private_memory(ctx.main_function); + } + + return ctx.ac.module; +} + +static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context) +{ + unsigned *retval = (unsigned *)context; + LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di); + char *description = LLVMGetDiagInfoDescription(di); + + if (severity == LLVMDSError) { + *retval = 1; + fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n", + description); + } + + LLVMDisposeMessage(description); +} + +static unsigned ac_llvm_compile(LLVMModuleRef M, + struct ac_shader_binary *binary, + struct ac_llvm_compiler *ac_llvm) +{ + unsigned retval = 0; + LLVMContextRef llvm_ctx; + + /* Setup Diagnostic Handler*/ + llvm_ctx = LLVMGetModuleContext(M); + + LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler, + &retval); + + /* Compile IR*/ + if (!radv_compile_to_binary(ac_llvm, M, binary)) + retval = 1; + return retval; +} + +static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, + LLVMModuleRef llvm_module, + struct ac_shader_binary *binary, + struct ac_shader_config *config, + struct radv_shader_variant_info *shader_info, + gl_shader_stage stage, + const struct radv_nir_compiler_options *options) +{ + if (options->dump_shader) + ac_dump_module(llvm_module); + + memset(binary, 0, sizeof(*binary)); + + if (options->record_llvm_ir) { + char *llvm_ir = LLVMPrintModuleToString(llvm_module); + binary->llvm_ir_string = strdup(llvm_ir); + LLVMDisposeMessage(llvm_ir); + } + + int v = ac_llvm_compile(llvm_module, binary, ac_llvm); + if (v) { + fprintf(stderr, "compile failed\n"); + } + + if (options->dump_shader) + fprintf(stderr, "disasm:\n%s\n", binary->disasm_string); + + ac_shader_binary_read_config(binary, config, 0, options->supports_spill); + + LLVMContextRef ctx = LLVMGetModuleContext(llvm_module); + LLVMDisposeModule(llvm_module); + LLVMContextDispose(ctx); + + if (stage == MESA_SHADER_FRAGMENT) { + shader_info->num_input_vgprs = 0; + if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 2; + if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 2; + if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 2; + if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 3; + if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 2; + if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 2; + if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 2; + if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 1; + if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 1; + if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 1; + if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 1; + if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 1; + if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 1; + if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 1; + if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 1; + if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 1; + } + config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs); + + /* +3 for scratch wave offset and VCC */ + config->num_sgprs = MAX2(config->num_sgprs, + shader_info->num_input_sgprs + 3); + + /* Enable 64-bit and 16-bit denormals, because there is no performance + * cost. + * + * If denormals are enabled, all floating-point output modifiers are + * ignored. + * + * Don't enable denormals for 32-bit floats, because: + * - Floating-point output modifiers would be ignored by the hw. + * - Some opcodes don't support denormals, such as v_mad_f32. We would + * have to stop using those. + * - SI & CI would be very slow. + */ + config->float_mode |= V_00B028_FP_64_DENORMS; +} + +static void +ac_fill_shader_info(struct radv_shader_variant_info *shader_info, struct nir_shader *nir, const struct radv_nir_compiler_options *options) +{ + switch (nir->info.stage) { + case MESA_SHADER_COMPUTE: + for (int i = 0; i < 3; ++i) + shader_info->cs.block_size[i] = nir->info.cs.local_size[i]; + break; + case MESA_SHADER_FRAGMENT: + shader_info->fs.early_fragment_test = nir->info.fs.early_fragment_tests; + break; + case MESA_SHADER_GEOMETRY: + shader_info->gs.vertices_in = nir->info.gs.vertices_in; + shader_info->gs.vertices_out = nir->info.gs.vertices_out; + shader_info->gs.output_prim = nir->info.gs.output_primitive; + shader_info->gs.invocations = nir->info.gs.invocations; + break; + case MESA_SHADER_TESS_EVAL: + shader_info->tes.primitive_mode = nir->info.tess.primitive_mode; + shader_info->tes.spacing = nir->info.tess.spacing; + shader_info->tes.ccw = nir->info.tess.ccw; + shader_info->tes.point_mode = nir->info.tess.point_mode; + shader_info->tes.as_es = options->key.tes.as_es; + break; + case MESA_SHADER_TESS_CTRL: + shader_info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out; + break; + case MESA_SHADER_VERTEX: + shader_info->vs.as_es = options->key.vs.as_es; + shader_info->vs.as_ls = options->key.vs.as_ls; + /* in LS mode we need at least 1, invocation id needs 2, handled elsewhere */ + if (options->key.vs.as_ls) + shader_info->vs.vgpr_comp_cnt = MAX2(1, shader_info->vs.vgpr_comp_cnt); + break; + default: + break; + } +} + +void +radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, + struct ac_shader_binary *binary, + struct ac_shader_config *config, + struct radv_shader_variant_info *shader_info, + struct nir_shader *const *nir, + int nir_count, + const struct radv_nir_compiler_options *options) +{ + + LLVMModuleRef llvm_module; + + llvm_module = ac_translate_nir_to_llvm(ac_llvm, nir, nir_count, shader_info, + options); + + ac_compile_llvm_module(ac_llvm, llvm_module, binary, config, shader_info, + nir[0]->info.stage, options); + + for (int i = 0; i < nir_count; ++i) + ac_fill_shader_info(shader_info, nir[i], options); + + /* Determine the ES type (VS or TES) for the GS on GFX9. */ + if (options->chip_class == GFX9) { + if (nir_count == 2 && + nir[1]->info.stage == MESA_SHADER_GEOMETRY) { + shader_info->gs.es_type = nir[0]->info.stage; + } + } +} + +static void +ac_gs_copy_shader_emit(struct radv_shader_context *ctx) +{ + LLVMValueRef vtx_offset = + LLVMBuildMul(ctx->ac.builder, ctx->abi.vertex_id, + LLVMConstInt(ctx->ac.i32, 4, false), ""); + LLVMValueRef stream_id; + + /* Fetch the vertex stream ID. */ + if (ctx->shader_info->info.so.num_outputs) { + stream_id = + ac_unpack_param(&ctx->ac, ctx->streamout_config, 24, 2); + } else { + stream_id = ctx->ac.i32_0; + } + + LLVMBasicBlockRef end_bb; + LLVMValueRef switch_inst; + + end_bb = LLVMAppendBasicBlockInContext(ctx->ac.context, + ctx->main_function, "end"); + switch_inst = LLVMBuildSwitch(ctx->ac.builder, stream_id, end_bb, 4); + + for (unsigned stream = 0; stream < 4; stream++) { + unsigned num_components = + ctx->shader_info->info.gs.num_stream_output_components[stream]; + LLVMBasicBlockRef bb; + unsigned offset; + + if (!num_components) + continue; + + if (stream > 0 && !ctx->shader_info->info.so.num_outputs) + continue; + + bb = LLVMInsertBasicBlockInContext(ctx->ac.context, end_bb, "out"); + LLVMAddCase(switch_inst, LLVMConstInt(ctx->ac.i32, stream, 0), bb); + LLVMPositionBuilderAtEnd(ctx->ac.builder, bb); + + offset = 0; + for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { + unsigned output_usage_mask = + ctx->shader_info->info.gs.output_usage_mask[i]; + unsigned output_stream = + ctx->shader_info->info.gs.output_streams[i]; + int length = util_last_bit(output_usage_mask); + + if (!(ctx->output_mask & (1ull << i)) || + output_stream != stream) + continue; + + for (unsigned j = 0; j < length; j++) { + LLVMValueRef value, soffset; + + if (!(output_usage_mask & (1 << j))) + continue; + + soffset = LLVMConstInt(ctx->ac.i32, + offset * + ctx->gs_max_out_vertices * 16 * 4, false); + + offset++; + + value = ac_build_buffer_load(&ctx->ac, + ctx->gsvs_ring[0], + 1, ctx->ac.i32_0, + vtx_offset, soffset, + 0, 1, 1, true, false); + + LLVMTypeRef type = LLVMGetAllocatedType(ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]); + if (ac_get_type_size(type) == 2) { + value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->ac.i32, ""); + value = LLVMBuildTrunc(ctx->ac.builder, value, ctx->ac.i16, ""); + } + + LLVMBuildStore(ctx->ac.builder, + ac_to_float(&ctx->ac, value), ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]); + } + } + + if (ctx->shader_info->info.so.num_outputs) + radv_emit_streamout(ctx, stream); + + if (stream == 0) { + handle_vs_outputs_post(ctx, false, false, + &ctx->shader_info->vs.outinfo); + } + + LLVMBuildBr(ctx->ac.builder, end_bb); + } + + LLVMPositionBuilderAtEnd(ctx->ac.builder, end_bb); +} + +void +radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, + struct nir_shader *geom_shader, + struct ac_shader_binary *binary, + struct ac_shader_config *config, + struct radv_shader_variant_info *shader_info, + const struct radv_nir_compiler_options *options) +{ + struct radv_shader_context ctx = {0}; + ctx.options = options; + ctx.shader_info = shader_info; + + ac_llvm_context_init(&ctx.ac, options->chip_class, options->family); + ctx.context = ctx.ac.context; + ctx.ac.module = ac_create_module(ac_llvm->tm, ctx.context); + + ctx.is_gs_copy_shader = true; + + enum ac_float_mode float_mode = + options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH : + AC_FLOAT_MODE_DEFAULT; + + ctx.ac.builder = ac_create_builder(ctx.context, float_mode); + ctx.stage = MESA_SHADER_VERTEX; + + radv_nir_shader_info_pass(geom_shader, options, &shader_info->info); + + create_function(&ctx, MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX); + + ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out; + ac_setup_rings(&ctx); + + nir_foreach_variable(variable, &geom_shader->outputs) { + scan_shader_output_decl(&ctx, variable, geom_shader, MESA_SHADER_VERTEX); + ac_handle_shader_output_decl(&ctx.ac, &ctx.abi, geom_shader, + variable, MESA_SHADER_VERTEX); + } + + ac_gs_copy_shader_emit(&ctx); + + LLVMBuildRetVoid(ctx.ac.builder); + + ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, options); + + ac_compile_llvm_module(ac_llvm, ctx.ac.module, binary, config, shader_info, + MESA_SHADER_VERTEX, options); +} diff --git a/lib/mesa/src/amd/vulkan/radv_pass.c b/lib/mesa/src/amd/vulkan/radv_pass.c index a52dae39d..9cd1b31a0 100644 --- a/lib/mesa/src/amd/vulkan/radv_pass.c +++ b/lib/mesa/src/amd/vulkan/radv_pass.c @@ -38,7 +38,7 @@ VkResult radv_CreateRenderPass( struct radv_render_pass *pass; size_t size; size_t attachments_offset; - VkRenderPassMultiviewCreateInfoKHX *multiview_info = NULL; + VkRenderPassMultiviewCreateInfoKHR *multiview_info = NULL; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); @@ -50,7 +50,7 @@ VkResult radv_CreateRenderPass( pass = vk_alloc2(&device->alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pass == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); memset(pass, 0, size); pass->attachment_count = pCreateInfo->attachmentCount; @@ -59,8 +59,8 @@ VkResult radv_CreateRenderPass( vk_foreach_struct(ext, pCreateInfo->pNext) { switch(ext->sType) { - case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHX: - multiview_info = ( VkRenderPassMultiviewCreateInfoKHX*)ext; + case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR: + multiview_info = ( VkRenderPassMultiviewCreateInfoKHR*)ext; break; default: break; @@ -80,25 +80,25 @@ VkResult radv_CreateRenderPass( // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; } uint32_t subpass_attachment_count = 0; - VkAttachmentReference *p; + struct radv_subpass_attachment *p; for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; subpass_attachment_count += desc->inputAttachmentCount + desc->colorAttachmentCount + - /* Count colorAttachmentCount again for resolve_attachments */ - desc->colorAttachmentCount; + (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + + (desc->pDepthStencilAttachment != NULL); } if (subpass_attachment_count) { pass->subpass_attachments = vk_alloc2(&device->alloc, pAllocator, - subpass_attachment_count * sizeof(VkAttachmentReference), 8, + subpass_attachment_count * sizeof(struct radv_subpass_attachment), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (pass->subpass_attachments == NULL) { vk_free2(&device->alloc, pAllocator, pass); - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); } } else pass->subpass_attachments = NULL; @@ -106,6 +106,7 @@ VkResult radv_CreateRenderPass( p = pass->subpass_attachments; for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; + uint32_t color_sample_count = 1, depth_sample_count = 1; struct radv_subpass *subpass = &pass->subpasses[i]; subpass->input_count = desc->inputAttachmentCount; @@ -118,8 +119,10 @@ VkResult radv_CreateRenderPass( p += desc->inputAttachmentCount; for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { - subpass->input_attachments[j] - = desc->pInputAttachments[j]; + subpass->input_attachments[j] = (struct radv_subpass_attachment) { + .attachment = desc->pInputAttachments[j].attachment, + .layout = desc->pInputAttachments[j].layout, + }; if (desc->pInputAttachments[j].attachment != VK_ATTACHMENT_UNUSED) pass->attachments[desc->pInputAttachments[j].attachment].view_mask |= subpass->view_mask; } @@ -130,10 +133,171 @@ VkResult radv_CreateRenderPass( p += desc->colorAttachmentCount; for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { - subpass->color_attachments[j] - = desc->pColorAttachments[j]; - if (desc->pColorAttachments[j].attachment != VK_ATTACHMENT_UNUSED) + subpass->color_attachments[j] = (struct radv_subpass_attachment) { + .attachment = desc->pColorAttachments[j].attachment, + .layout = desc->pColorAttachments[j].layout, + }; + if (desc->pColorAttachments[j].attachment != VK_ATTACHMENT_UNUSED) { pass->attachments[desc->pColorAttachments[j].attachment].view_mask |= subpass->view_mask; + color_sample_count = pCreateInfo->pAttachments[desc->pColorAttachments[j].attachment].samples; + } + } + } + + subpass->has_resolve = false; + if (desc->pResolveAttachments) { + subpass->resolve_attachments = p; + p += desc->colorAttachmentCount; + + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { + uint32_t a = desc->pResolveAttachments[j].attachment; + subpass->resolve_attachments[j] = (struct radv_subpass_attachment) { + .attachment = desc->pResolveAttachments[j].attachment, + .layout = desc->pResolveAttachments[j].layout, + }; + if (a != VK_ATTACHMENT_UNUSED) { + subpass->has_resolve = true; + pass->attachments[desc->pResolveAttachments[j].attachment].view_mask |= subpass->view_mask; + } + } + } + + if (desc->pDepthStencilAttachment) { + subpass->depth_stencil_attachment = (struct radv_subpass_attachment) { + .attachment = desc->pDepthStencilAttachment->attachment, + .layout = desc->pDepthStencilAttachment->layout, + }; + if (desc->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) { + pass->attachments[desc->pDepthStencilAttachment->attachment].view_mask |= subpass->view_mask; + depth_sample_count = pCreateInfo->pAttachments[desc->pDepthStencilAttachment->attachment].samples; + } + } else { + subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; + } + + subpass->max_sample_count = MAX2(color_sample_count, + depth_sample_count); + } + + for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) { + uint32_t dst = pCreateInfo->pDependencies[i].dstSubpass; + if (dst == VK_SUBPASS_EXTERNAL) { + pass->end_barrier.src_stage_mask = pCreateInfo->pDependencies[i].srcStageMask; + pass->end_barrier.src_access_mask = pCreateInfo->pDependencies[i].srcAccessMask; + pass->end_barrier.dst_access_mask = pCreateInfo->pDependencies[i].dstAccessMask; + } else { + pass->subpasses[dst].start_barrier.src_stage_mask = pCreateInfo->pDependencies[i].srcStageMask; + pass->subpasses[dst].start_barrier.src_access_mask = pCreateInfo->pDependencies[i].srcAccessMask; + pass->subpasses[dst].start_barrier.dst_access_mask = pCreateInfo->pDependencies[i].dstAccessMask; + } + } + + *pRenderPass = radv_render_pass_to_handle(pass); + + return VK_SUCCESS; +} + +VkResult radv_CreateRenderPass2KHR( + VkDevice _device, + const VkRenderPassCreateInfo2KHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkRenderPass* pRenderPass) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + struct radv_render_pass *pass; + size_t size; + size_t attachments_offset; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR); + + size = sizeof(*pass); + size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); + attachments_offset = size; + size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); + + pass = vk_alloc2(&device->alloc, pAllocator, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pass == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + memset(pass, 0, size); + pass->attachment_count = pCreateInfo->attachmentCount; + pass->subpass_count = pCreateInfo->subpassCount; + pass->attachments = (void *) pass + attachments_offset; + + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + struct radv_render_pass_attachment *att = &pass->attachments[i]; + + att->format = pCreateInfo->pAttachments[i].format; + att->samples = pCreateInfo->pAttachments[i].samples; + att->load_op = pCreateInfo->pAttachments[i].loadOp; + att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; + att->initial_layout = pCreateInfo->pAttachments[i].initialLayout; + att->final_layout = pCreateInfo->pAttachments[i].finalLayout; + // att->store_op = pCreateInfo->pAttachments[i].storeOp; + // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; + } + uint32_t subpass_attachment_count = 0; + struct radv_subpass_attachment *p; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i]; + + subpass_attachment_count += + desc->inputAttachmentCount + + desc->colorAttachmentCount + + (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + + (desc->pDepthStencilAttachment != NULL); + } + + if (subpass_attachment_count) { + pass->subpass_attachments = + vk_alloc2(&device->alloc, pAllocator, + subpass_attachment_count * sizeof(struct radv_subpass_attachment), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pass->subpass_attachments == NULL) { + vk_free2(&device->alloc, pAllocator, pass); + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + } + } else + pass->subpass_attachments = NULL; + + p = pass->subpass_attachments; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i]; + uint32_t color_sample_count = 1, depth_sample_count = 1; + struct radv_subpass *subpass = &pass->subpasses[i]; + + subpass->input_count = desc->inputAttachmentCount; + subpass->color_count = desc->colorAttachmentCount; + subpass->view_mask = desc->viewMask; + + if (desc->inputAttachmentCount > 0) { + subpass->input_attachments = p; + p += desc->inputAttachmentCount; + + for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { + subpass->input_attachments[j] = (struct radv_subpass_attachment) { + .attachment = desc->pInputAttachments[j].attachment, + .layout = desc->pInputAttachments[j].layout, + }; + if (desc->pInputAttachments[j].attachment != VK_ATTACHMENT_UNUSED) + pass->attachments[desc->pInputAttachments[j].attachment].view_mask |= subpass->view_mask; + } + } + + if (desc->colorAttachmentCount > 0) { + subpass->color_attachments = p; + p += desc->colorAttachmentCount; + + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { + subpass->color_attachments[j] = (struct radv_subpass_attachment) { + .attachment = desc->pColorAttachments[j].attachment, + .layout = desc->pColorAttachments[j].layout, + }; + if (desc->pColorAttachments[j].attachment != VK_ATTACHMENT_UNUSED) { + pass->attachments[desc->pColorAttachments[j].attachment].view_mask |= subpass->view_mask; + color_sample_count = pCreateInfo->pAttachments[desc->pColorAttachments[j].attachment].samples; + } } } @@ -144,8 +308,10 @@ VkResult radv_CreateRenderPass( for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { uint32_t a = desc->pResolveAttachments[j].attachment; - subpass->resolve_attachments[j] - = desc->pResolveAttachments[j]; + subpass->resolve_attachments[j] = (struct radv_subpass_attachment) { + .attachment = desc->pResolveAttachments[j].attachment, + .layout = desc->pResolveAttachments[j].layout, + }; if (a != VK_ATTACHMENT_UNUSED) { subpass->has_resolve = true; pass->attachments[desc->pResolveAttachments[j].attachment].view_mask |= subpass->view_mask; @@ -154,13 +320,20 @@ VkResult radv_CreateRenderPass( } if (desc->pDepthStencilAttachment) { - subpass->depth_stencil_attachment = - *desc->pDepthStencilAttachment; - if (desc->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) + subpass->depth_stencil_attachment = (struct radv_subpass_attachment) { + .attachment = desc->pDepthStencilAttachment->attachment, + .layout = desc->pDepthStencilAttachment->layout, + }; + if (desc->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) { pass->attachments[desc->pDepthStencilAttachment->attachment].view_mask |= subpass->view_mask; + depth_sample_count = pCreateInfo->pAttachments[desc->pDepthStencilAttachment->attachment].samples; + } } else { subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; } + + subpass->max_sample_count = MAX2(color_sample_count, + depth_sample_count); } for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) { diff --git a/lib/mesa/src/amd/vulkan/radv_shader.c b/lib/mesa/src/amd/vulkan/radv_shader.c index 83e2e675e..f98ca6b4e 100644 --- a/lib/mesa/src/amd/vulkan/radv_shader.c +++ b/lib/mesa/src/amd/vulkan/radv_shader.c @@ -30,12 +30,14 @@ #include "radv_debug.h" #include "radv_private.h" #include "radv_shader.h" +#include "radv_shader_helper.h" #include "nir/nir.h" #include "nir/nir_builder.h" #include "spirv/nir_spirv.h" #include <llvm-c/Core.h> #include <llvm-c/TargetMachine.h> +#include <llvm-c/Support.h> #include "sid.h" #include "gfx9d.h" @@ -46,10 +48,14 @@ #include "util/debug.h" #include "ac_exp_param.h" +#include "util/string_buffer.h" + static const struct nir_shader_compiler_options nir_options = { .vertex_id_zero_based = true, .lower_scmp = true, .lower_flrp32 = true, + .lower_flrp64 = true, + .lower_device_index_to_zero = true, .lower_fsat = true, .lower_fdiv = true, .lower_sub = true, @@ -64,6 +70,7 @@ static const struct nir_shader_compiler_options nir_options = { .lower_extract_byte = true, .lower_extract_word = true, .lower_ffma = true, + .lower_fpow = true, .max_unroll_iterations = 32 }; @@ -83,7 +90,7 @@ VkResult radv_CreateShaderModule( sizeof(*module) + pCreateInfo->codeSize, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (module == NULL) - return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); module->nir = NULL; module->size = pCreateInfo->codeSize; @@ -110,55 +117,33 @@ void radv_DestroyShaderModule( vk_free2(&device->alloc, pAllocator, module); } -bool -radv_lower_indirect_derefs(struct nir_shader *nir, - struct radv_physical_device *device) -{ - /* While it would be nice not to have this flag, we are constrained - * by the reality that LLVM 5.0 doesn't have working VGPR indexing - * on GFX9. - */ - bool llvm_has_working_vgpr_indexing = - device->rad_info.chip_class <= VI; - - /* TODO: Indirect indexing of GS inputs is unimplemented. - * - * TCS and TES load inputs directly from LDS or offchip memory, so - * indirect indexing is trivial. - */ - nir_variable_mode indirect_mask = 0; - if (nir->info.stage == MESA_SHADER_GEOMETRY || - (nir->info.stage != MESA_SHADER_TESS_CTRL && - nir->info.stage != MESA_SHADER_TESS_EVAL && - !llvm_has_working_vgpr_indexing)) { - indirect_mask |= nir_var_shader_in; - } - if (!llvm_has_working_vgpr_indexing && - nir->info.stage != MESA_SHADER_TESS_CTRL) - indirect_mask |= nir_var_shader_out; - - /* TODO: We shouldn't need to do this, however LLVM isn't currently - * smart enough to handle indirects without causing excess spilling - * causing the gpu to hang. - * - * See the following thread for more details of the problem: - * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html - */ - indirect_mask |= nir_var_local; - - return nir_lower_indirect_derefs(nir, indirect_mask); -} - void -radv_optimize_nir(struct nir_shader *shader) +radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, + bool allow_copies) { bool progress; do { progress = false; + NIR_PASS(progress, shader, nir_split_array_vars, nir_var_local); + NIR_PASS(progress, shader, nir_shrink_vec_array_vars, nir_var_local); + NIR_PASS_V(shader, nir_lower_vars_to_ssa); - NIR_PASS_V(shader, nir_lower_64bit_pack); + NIR_PASS_V(shader, nir_lower_pack); + + if (allow_copies) { + /* Only run this pass in the first call to + * radv_optimize_nir. Later calls assume that we've + * lowered away any copy_deref instructions and we + * don't want to introduce any more. + */ + NIR_PASS(progress, shader, nir_opt_find_array_copies); + } + + NIR_PASS(progress, shader, nir_opt_copy_prop_vars); + NIR_PASS(progress, shader, nir_opt_dead_write_vars); + NIR_PASS_V(shader, nir_lower_alu_to_scalar); NIR_PASS_V(shader, nir_lower_phis_to_scalar); @@ -182,7 +167,10 @@ radv_optimize_nir(struct nir_shader *shader) if (shader->options->max_unroll_iterations) { NIR_PASS(progress, shader, nir_opt_loop_unroll, 0); } - } while (progress); + } while (progress && !optimize_conservatively); + + NIR_PASS(progress, shader, nir_opt_shrink_load); + NIR_PASS(progress, shader, nir_opt_move_load_ubo); } nir_shader * @@ -190,12 +178,9 @@ radv_shader_compile_to_nir(struct radv_device *device, struct radv_shader_module *module, const char *entrypoint_name, gl_shader_stage stage, - const VkSpecializationInfo *spec_info) + const VkSpecializationInfo *spec_info, + const VkPipelineCreateFlags flags) { - if (strcmp(entrypoint_name, "main") != 0) { - radv_finishme("Multiple shaders per module not really supported"); - } - nir_shader *nir; nir_function *entry_point; if (module->nir) { @@ -204,7 +189,7 @@ radv_shader_compile_to_nir(struct radv_device *device, * and just use the NIR shader */ nir = module->nir; nir->options = &nir_options; - nir_validate_shader(nir); + nir_validate_shader(nir, "in internal shader"); assert(exec_list_length(&nir->functions) == 1); struct exec_node *node = exec_list_get_head(&nir->functions); @@ -233,22 +218,42 @@ radv_shader_compile_to_nir(struct radv_device *device, spec_entries[i].data32 = *(const uint32_t *)data; } } - const struct nir_spirv_supported_extensions supported_ext = { - .draw_parameters = true, - .float64 = true, - .image_read_without_format = true, - .image_write_without_format = true, - .tessellation = true, - .int64 = true, - .multiview = true, - .variable_pointers = true, + const struct spirv_to_nir_options spirv_options = { + .caps = { + .device_group = true, + .draw_parameters = true, + .float64 = true, + .image_read_without_format = true, + .image_write_without_format = true, + .tessellation = true, + .int64 = true, + .int16 = true, + .multiview = true, + .subgroup_arithmetic = true, + .subgroup_ballot = true, + .subgroup_basic = true, + .subgroup_quad = true, + .subgroup_shuffle = true, + .subgroup_vote = true, + .variable_pointers = true, + .gcn_shader = true, + .trinary_minmax = true, + .shader_viewport_index_layer = true, + .descriptor_array_dynamic_indexing = true, + .runtime_descriptor_array = true, + .stencil_export = true, + .storage_16bit = true, + .geometry_streams = true, + .transform_feedback = true, + }, }; entry_point = spirv_to_nir(spirv, module->size / 4, spec_entries, num_spec_entries, - stage, entrypoint_name, &supported_ext, &nir_options); + stage, entrypoint_name, + &spirv_options, &nir_options); nir = entry_point->shader; assert(nir->info.stage == stage); - nir_validate_shader(nir); + nir_validate_shader(nir, "after spirv_to_nir"); free(spec_entries); @@ -259,6 +264,7 @@ radv_shader_compile_to_nir(struct radv_device *device, NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local); NIR_PASS_V(nir, nir_lower_returns); NIR_PASS_V(nir, nir_inline_functions); + NIR_PASS_V(nir, nir_copy_prop); /* Pick off the single entrypoint that we want */ foreach_list_typed_safe(nir_function, func, node, &nir->functions) { @@ -268,13 +274,25 @@ radv_shader_compile_to_nir(struct radv_device *device, assert(exec_list_length(&nir->functions) == 1); entry_point->name = ralloc_strdup(entry_point, "main"); - NIR_PASS_V(nir, nir_remove_dead_variables, - nir_var_shader_in | nir_var_shader_out | nir_var_system_value); + /* Make sure we lower constant initializers on output variables so that + * nir_remove_dead_variables below sees the corresponding stores + */ + NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_shader_out); /* Now that we've deleted all but the main function, we can go ahead and * lower the rest of the constant initializers. */ NIR_PASS_V(nir, nir_lower_constant_initializers, ~0); + + /* Split member structs. We do this before lower_io_to_temporaries so that + * it doesn't lower system values to temporaries by accident. + */ + NIR_PASS_V(nir, nir_split_var_copies); + NIR_PASS_V(nir, nir_split_per_member_structs); + + NIR_PASS_V(nir, nir_remove_dead_variables, + nir_var_shader_in | nir_var_shader_out | nir_var_system_value); + NIR_PASS_V(nir, nir_lower_system_values); NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays); } @@ -291,11 +309,48 @@ radv_shader_compile_to_nir(struct radv_device *device, nir_lower_tex(nir, &tex_options); nir_lower_vars_to_ssa(nir); - nir_lower_var_copies(nir); + + if (nir->info.stage == MESA_SHADER_VERTEX || + nir->info.stage == MESA_SHADER_GEOMETRY) { + NIR_PASS_V(nir, nir_lower_io_to_temporaries, + nir_shader_get_entrypoint(nir), true, true); + } else if (nir->info.stage == MESA_SHADER_TESS_EVAL|| + nir->info.stage == MESA_SHADER_FRAGMENT) { + NIR_PASS_V(nir, nir_lower_io_to_temporaries, + nir_shader_get_entrypoint(nir), true, false); + } + + nir_split_var_copies(nir); + nir_lower_global_vars_to_local(nir); nir_remove_dead_variables(nir, nir_var_local); - radv_lower_indirect_derefs(nir, device->physical_device); - radv_optimize_nir(nir); + nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) { + .subgroup_size = 64, + .ballot_bit_size = 64, + .lower_to_scalar = 1, + .lower_subgroup_masks = 1, + .lower_shuffle = 1, + .lower_shuffle_to_32bit = 1, + .lower_vote_eq_to_ballot = 1, + }); + + nir_lower_load_const_to_scalar(nir); + + if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)) + radv_optimize_nir(nir, false, true); + + /* We call nir_lower_var_copies() after the first radv_optimize_nir() + * to remove any copies introduced by nir_opt_find_array_copies(). + */ + nir_lower_var_copies(nir); + + /* Indirect lowering must be called after the radv_optimize_nir() loop + * has been called at least once. Otherwise indirect lowering can + * bloat the instruction count of the loop and cause it to be + * considered too large for unrolling. + */ + ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class); + radv_optimize_nir(nir, flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT, false); return nir; } @@ -331,7 +386,10 @@ radv_alloc_shader_memory(struct radv_device *device, slab->size = 256 * 1024; slab->bo = device->ws->buffer_create(device->ws, slab->size, 256, - RADEON_DOMAIN_VRAM, 0); + RADEON_DOMAIN_VRAM, + RADEON_FLAG_NO_INTERPROCESS_SHARING | + (device->physical_device->cpdma_prefetch_writes_memory ? + 0 : RADEON_FLAG_READ_ONLY)); slab->ptr = (char*)device->ws->buffer_map(slab->bo); list_inithead(&slab->shaders); @@ -355,6 +413,16 @@ radv_destroy_shader_slabs(struct radv_device *device) mtx_destroy(&device->shader_slab_mutex); } +/* For the UMR disassembler. */ +#define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */ +#define DEBUGGER_NUM_MARKERS 5 + +static unsigned +radv_get_shader_binary_size(struct ac_shader_binary *binary) +{ + return binary->code_size + DEBUGGER_NUM_MARKERS * 4; +} + static void radv_fill_shader_variant(struct radv_device *device, struct radv_shader_variant *variant, @@ -362,16 +430,20 @@ radv_fill_shader_variant(struct radv_device *device, gl_shader_stage stage) { bool scratch_enabled = variant->config.scratch_bytes_per_wave > 0; + struct radv_shader_info *info = &variant->info.info; unsigned vgpr_comp_cnt = 0; - if (scratch_enabled && !device->llvm_supports_spill) - radv_finishme("shader scratch support only available with LLVM 4.0"); - - variant->code_size = binary->code_size; + variant->code_size = radv_get_shader_binary_size(binary); variant->rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) | - S_00B12C_SCRATCH_EN(scratch_enabled); - - variant->rsrc1 = S_00B848_VGPRS((variant->config.num_vgprs - 1) / 4) | + S_00B12C_USER_SGPR_MSB(variant->info.num_user_sgprs >> 5) | + S_00B12C_SCRATCH_EN(scratch_enabled) | + S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) | + S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) | + S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) | + S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) | + S_00B12C_SO_EN(!!info->so.num_outputs); + + variant->rsrc1 = S_00B848_VGPRS((variant->config.num_vgprs - 1) / 4) | S_00B848_SGPRS((variant->config.num_sgprs - 1) / 8) | S_00B848_DX10_CLAMP(1) | S_00B848_FLOAT_MODE(variant->config.float_mode); @@ -382,10 +454,11 @@ radv_fill_shader_variant(struct radv_device *device, variant->rsrc2 |= S_00B12C_OC_LDS_EN(1); break; case MESA_SHADER_TESS_CTRL: - if (device->physical_device->rad_info.chip_class >= GFX9) + if (device->physical_device->rad_info.chip_class >= GFX9) { vgpr_comp_cnt = variant->info.vs.vgpr_comp_cnt; - else + } else { variant->rsrc2 |= S_00B12C_OC_LDS_EN(1); + } break; case MESA_SHADER_VERTEX: case MESA_SHADER_GEOMETRY: @@ -395,9 +468,12 @@ radv_fill_shader_variant(struct radv_device *device, break; case MESA_SHADER_COMPUTE: variant->rsrc2 |= - S_00B84C_TGID_X_EN(1) | S_00B84C_TGID_Y_EN(1) | - S_00B84C_TGID_Z_EN(1) | S_00B84C_TIDIG_COMP_CNT(2) | - S_00B84C_TG_SIZE_EN(1) | + S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) | + S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) | + S_00B84C_TGID_Z_EN(info->cs.uses_block_id[2]) | + S_00B84C_TIDIG_COMP_CNT(info->cs.uses_thread_id[2] ? 2 : + info->cs.uses_thread_id[1] ? 1 : 0) | + S_00B84C_TG_SIZE_EN(info->cs.uses_local_invocation_idx) | S_00B84C_LDS_SIZE(variant->config.lds_size); break; default: @@ -407,18 +483,81 @@ radv_fill_shader_variant(struct radv_device *device, if (device->physical_device->rad_info.chip_class >= GFX9 && stage == MESA_SHADER_GEOMETRY) { - /* TODO: Figure out how many we actually need. */ - variant->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(3); - variant->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(3) | - S_00B22C_OC_LDS_EN(1); + unsigned es_type = variant->info.gs.es_type; + unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt; + + if (es_type == MESA_SHADER_VERTEX) { + es_vgpr_comp_cnt = variant->info.vs.vgpr_comp_cnt; + } else if (es_type == MESA_SHADER_TESS_EVAL) { + es_vgpr_comp_cnt = 3; + } else { + unreachable("invalid shader ES type"); + } + + /* If offsets 4, 5 are used, GS_VGPR_COMP_CNT is ignored and + * VGPR[0:4] are always loaded. + */ + if (info->uses_invocation_id) { + gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */ + } else if (info->uses_prim_id) { + gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */ + } else if (variant->info.gs.vertices_in >= 3) { + gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */ + } else { + gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */ + } + + variant->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt); + variant->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | + S_00B22C_OC_LDS_EN(es_type == MESA_SHADER_TESS_EVAL); } else if (device->physical_device->rad_info.chip_class >= GFX9 && - stage == MESA_SHADER_TESS_CTRL) + stage == MESA_SHADER_TESS_CTRL) { variant->rsrc1 |= S_00B428_LS_VGPR_COMP_CNT(vgpr_comp_cnt); - else + } else { variant->rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt); + } void *ptr = radv_alloc_shader_memory(device, variant); memcpy(ptr, binary->code, binary->code_size); + + /* Add end-of-code markers for the UMR disassembler. */ + uint32_t *ptr32 = (uint32_t *)ptr + binary->code_size / 4; + for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; i++) + ptr32[i] = DEBUGGER_END_OF_CODE_MARKER; + +} + +static void radv_init_llvm_target() +{ + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeAMDGPUTarget(); + LLVMInitializeAMDGPUTargetMC(); + LLVMInitializeAMDGPUAsmPrinter(); + + /* For inline assembly. */ + LLVMInitializeAMDGPUAsmParser(); + + /* Workaround for bug in llvm 4.0 that causes image intrinsics + * to disappear. + * https://reviews.llvm.org/D26348 + * + * Workaround for bug in llvm that causes the GPU to hang in presence + * of nested loops because there is an exec mask issue. The proper + * solution is to fix LLVM but this might require a bunch of work. + * https://bugs.llvm.org/show_bug.cgi?id=37744 + * + * "mesa" is the prefix for error messages. + */ + const char *argv[3] = { "mesa", "-simplifycfg-sink-common=false", + "-amdgpu-skip-threshold=1" }; + LLVMParseCommandLineOptions(3, argv, NULL); +} + +static once_flag radv_init_llvm_target_once_flag = ONCE_FLAG_INIT; + +static void radv_init_llvm_once(void) +{ + call_once(&radv_init_llvm_target_once_flag, radv_init_llvm_target); } static struct radv_shader_variant * @@ -427,42 +566,55 @@ shader_variant_create(struct radv_device *device, struct nir_shader * const *shaders, int shader_count, gl_shader_stage stage, - struct ac_nir_compiler_options *options, + struct radv_nir_compiler_options *options, bool gs_copy_shader, void **code_out, unsigned *code_size_out) { enum radeon_family chip_family = device->physical_device->rad_info.family; - bool dump_shaders = device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS; enum ac_target_machine_options tm_options = 0; struct radv_shader_variant *variant; struct ac_shader_binary binary; - LLVMTargetMachineRef tm; - + struct ac_llvm_compiler ac_llvm; + bool thread_compiler; variant = calloc(1, sizeof(struct radv_shader_variant)); if (!variant) return NULL; options->family = chip_family; options->chip_class = device->physical_device->rad_info.chip_class; + options->dump_shader = radv_can_dump_shader(device, module, gs_copy_shader); + options->dump_preoptir = options->dump_shader && + device->instance->debug_flags & RADV_DEBUG_PREOPTIR; + options->record_llvm_ir = device->keep_shader_info; + options->check_ir = device->instance->debug_flags & RADV_DEBUG_CHECKIR; + options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size; + options->address32_hi = device->physical_device->rad_info.address32_hi; if (options->supports_spill) tm_options |= AC_TM_SUPPORTS_SPILL; if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED) tm_options |= AC_TM_SISCHED; - tm = ac_create_target_machine(chip_family, tm_options); - + if (options->check_ir) + tm_options |= AC_TM_CHECK_IR; + + thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM); + radv_init_llvm_once(); + radv_init_llvm_compiler(&ac_llvm, false, + thread_compiler, + chip_family, tm_options); if (gs_copy_shader) { assert(shader_count == 1); - ac_create_gs_copy_shader(tm, *shaders, &binary, &variant->config, - &variant->info, options, dump_shaders); + radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary, + &variant->config, &variant->info, + options); } else { - ac_compile_nir_shader(tm, &binary, &variant->config, - &variant->info, shaders, shader_count, options, - dump_shaders); + radv_compile_nir_shader(&ac_llvm, &binary, &variant->config, + &variant->info, shaders, shader_count, + options); } - LLVMDisposeTargetMachine(tm); + radv_destroy_llvm_compiler(&ac_llvm, thread_compiler); radv_fill_shader_variant(device, variant, &binary, stage); @@ -477,8 +629,9 @@ shader_variant_create(struct radv_device *device, free(binary.relocs); variant->ref_count = 1; - if (device->trace_bo) { + if (device->keep_shader_info) { variant->disasm_string = binary.disasm_string; + variant->llvm_ir_string = binary.llvm_ir_string; if (!gs_copy_shader && !module->nir) { variant->nir = *shaders; variant->spirv = (uint32_t *)module->data; @@ -497,18 +650,18 @@ radv_shader_variant_create(struct radv_device *device, struct nir_shader *const *shaders, int shader_count, struct radv_pipeline_layout *layout, - const struct ac_shader_variant_key *key, + const struct radv_shader_variant_key *key, void **code_out, unsigned *code_size_out) { - struct ac_nir_compiler_options options = {0}; + struct radv_nir_compiler_options options = {0}; options.layout = layout; if (key) options.key = *key; options.unsafe_math = !!(device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH); - options.supports_spill = device->llvm_supports_spill; + options.supports_spill = true; return shader_variant_create(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage, &options, false, code_out, code_size_out); @@ -521,7 +674,7 @@ radv_create_gs_copy_shader(struct radv_device *device, unsigned *code_size_out, bool multiview) { - struct ac_nir_compiler_options options = {0}; + struct radv_nir_compiler_options options = {0}; options.key.has_multiview_view_index = multiview; @@ -542,48 +695,10 @@ radv_shader_variant_destroy(struct radv_device *device, ralloc_free(variant->nir); free(variant->disasm_string); + free(variant->llvm_ir_string); free(variant); } -uint32_t -radv_shader_stage_to_user_data_0(gl_shader_stage stage, enum chip_class chip_class, - bool has_gs, bool has_tess) -{ - switch (stage) { - case MESA_SHADER_FRAGMENT: - return R_00B030_SPI_SHADER_USER_DATA_PS_0; - case MESA_SHADER_VERTEX: - if (chip_class >= GFX9) { - return has_tess ? R_00B430_SPI_SHADER_USER_DATA_LS_0 : - has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : - R_00B130_SPI_SHADER_USER_DATA_VS_0; - } - if (has_tess) - return R_00B530_SPI_SHADER_USER_DATA_LS_0; - else - return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : R_00B130_SPI_SHADER_USER_DATA_VS_0; - case MESA_SHADER_GEOMETRY: - return chip_class >= GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : - R_00B230_SPI_SHADER_USER_DATA_GS_0; - case MESA_SHADER_COMPUTE: - return R_00B900_COMPUTE_USER_DATA_0; - case MESA_SHADER_TESS_CTRL: - return chip_class >= GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0 : - R_00B430_SPI_SHADER_USER_DATA_HS_0; - case MESA_SHADER_TESS_EVAL: - if (chip_class >= GFX9) { - return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : - R_00B130_SPI_SHADER_USER_DATA_VS_0; - } - if (has_gs) - return R_00B330_SPI_SHADER_USER_DATA_ES_0; - else - return R_00B130_SPI_SHADER_USER_DATA_VS_0; - default: - unreachable("unknown shader"); - } -} - const char * radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage) { @@ -599,27 +714,18 @@ radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage) }; } -void -radv_shader_dump_stats(struct radv_device *device, - struct radv_shader_variant *variant, - gl_shader_stage stage, - FILE *file) +static void +generate_shader_stats(struct radv_device *device, + struct radv_shader_variant *variant, + gl_shader_stage stage, + struct _mesa_string_buffer *buf) { unsigned lds_increment = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256; struct ac_shader_config *conf; unsigned max_simd_waves; unsigned lds_per_wave = 0; - switch (device->physical_device->rad_info.family) { - /* These always have 8 waves: */ - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - max_simd_waves = 8; - break; - default: - max_simd_waves = 10; - } + max_simd_waves = ac_get_max_simd_waves(device->physical_device->rad_info.family); conf = &variant->config; @@ -629,15 +735,15 @@ radv_shader_dump_stats(struct radv_device *device, lds_increment); } - if (conf->num_sgprs) { - if (device->physical_device->rad_info.chip_class >= VI) - max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs); - else - max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs); - } + if (conf->num_sgprs) + max_simd_waves = + MIN2(max_simd_waves, + radv_get_num_physical_sgprs(device->physical_device) / conf->num_sgprs); if (conf->num_vgprs) - max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs); + max_simd_waves = + MIN2(max_simd_waves, + RADV_NUM_PHYSICAL_VGPRS / conf->num_vgprs); /* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD * that PS can use. @@ -645,27 +751,140 @@ radv_shader_dump_stats(struct radv_device *device, if (lds_per_wave) max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave); + if (stage == MESA_SHADER_FRAGMENT) { + _mesa_string_buffer_printf(buf, "*** SHADER CONFIG ***\n" + "SPI_PS_INPUT_ADDR = 0x%04x\n" + "SPI_PS_INPUT_ENA = 0x%04x\n", + conf->spi_ps_input_addr, conf->spi_ps_input_ena); + } + + _mesa_string_buffer_printf(buf, "*** SHADER STATS ***\n" + "SGPRS: %d\n" + "VGPRS: %d\n" + "Spilled SGPRs: %d\n" + "Spilled VGPRs: %d\n" + "PrivMem VGPRS: %d\n" + "Code Size: %d bytes\n" + "LDS: %d blocks\n" + "Scratch: %d bytes per wave\n" + "Max Waves: %d\n" + "********************\n\n\n", + conf->num_sgprs, conf->num_vgprs, + conf->spilled_sgprs, conf->spilled_vgprs, + variant->info.private_mem_vgprs, variant->code_size, + conf->lds_size, conf->scratch_bytes_per_wave, + max_simd_waves); +} + +void +radv_shader_dump_stats(struct radv_device *device, + struct radv_shader_variant *variant, + gl_shader_stage stage, + FILE *file) +{ + struct _mesa_string_buffer *buf = _mesa_string_buffer_create(NULL, 256); + + generate_shader_stats(device, variant, stage, buf); + fprintf(file, "\n%s:\n", radv_get_shader_name(variant, stage)); + fprintf(file, "%s", buf->buf); - if (stage == MESA_SHADER_FRAGMENT) { - fprintf(file, "*** SHADER CONFIG ***\n" - "SPI_PS_INPUT_ADDR = 0x%04x\n" - "SPI_PS_INPUT_ENA = 0x%04x\n", - conf->spi_ps_input_addr, conf->spi_ps_input_ena); + _mesa_string_buffer_destroy(buf); +} + +VkResult +radv_GetShaderInfoAMD(VkDevice _device, + VkPipeline _pipeline, + VkShaderStageFlagBits shaderStage, + VkShaderInfoTypeAMD infoType, + size_t* pInfoSize, + void* pInfo) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); + gl_shader_stage stage = vk_to_mesa_shader_stage(shaderStage); + struct radv_shader_variant *variant = pipeline->shaders[stage]; + struct _mesa_string_buffer *buf; + VkResult result = VK_SUCCESS; + + /* Spec doesn't indicate what to do if the stage is invalid, so just + * return no info for this. */ + if (!variant) + return vk_error(device->instance, VK_ERROR_FEATURE_NOT_PRESENT); + + switch (infoType) { + case VK_SHADER_INFO_TYPE_STATISTICS_AMD: + if (!pInfo) { + *pInfoSize = sizeof(VkShaderStatisticsInfoAMD); + } else { + unsigned lds_multiplier = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256; + struct ac_shader_config *conf = &variant->config; + + VkShaderStatisticsInfoAMD statistics = {}; + statistics.shaderStageMask = shaderStage; + statistics.numPhysicalVgprs = RADV_NUM_PHYSICAL_VGPRS; + statistics.numPhysicalSgprs = radv_get_num_physical_sgprs(device->physical_device); + statistics.numAvailableSgprs = statistics.numPhysicalSgprs; + + if (stage == MESA_SHADER_COMPUTE) { + unsigned *local_size = variant->nir->info.cs.local_size; + unsigned workgroup_size = local_size[0] * local_size[1] * local_size[2]; + + statistics.numAvailableVgprs = statistics.numPhysicalVgprs / + ceil((double)workgroup_size / statistics.numPhysicalVgprs); + + statistics.computeWorkGroupSize[0] = local_size[0]; + statistics.computeWorkGroupSize[1] = local_size[1]; + statistics.computeWorkGroupSize[2] = local_size[2]; + } else { + statistics.numAvailableVgprs = statistics.numPhysicalVgprs; + } + + statistics.resourceUsage.numUsedVgprs = conf->num_vgprs; + statistics.resourceUsage.numUsedSgprs = conf->num_sgprs; + statistics.resourceUsage.ldsSizePerLocalWorkGroup = 32768; + statistics.resourceUsage.ldsUsageSizeInBytes = conf->lds_size * lds_multiplier; + statistics.resourceUsage.scratchMemUsageInBytes = conf->scratch_bytes_per_wave; + + size_t size = *pInfoSize; + *pInfoSize = sizeof(statistics); + + memcpy(pInfo, &statistics, MIN2(size, *pInfoSize)); + + if (size < *pInfoSize) + result = VK_INCOMPLETE; + } + + break; + case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD: + buf = _mesa_string_buffer_create(NULL, 1024); + + _mesa_string_buffer_printf(buf, "%s:\n", radv_get_shader_name(variant, stage)); + _mesa_string_buffer_printf(buf, "%s\n\n", variant->disasm_string); + generate_shader_stats(device, variant, stage, buf); + + /* Need to include the null terminator. */ + size_t length = buf->length + 1; + + if (!pInfo) { + *pInfoSize = length; + } else { + size_t size = *pInfoSize; + *pInfoSize = length; + + memcpy(pInfo, buf->buf, MIN2(size, length)); + + if (size < length) + result = VK_INCOMPLETE; + } + + _mesa_string_buffer_destroy(buf); + break; + default: + /* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented for now. */ + result = VK_ERROR_FEATURE_NOT_PRESENT; + break; } - fprintf(file, "*** SHADER STATS ***\n" - "SGPRS: %d\n" - "VGPRS: %d\n" - "Spilled SGPRs: %d\n" - "Spilled VGPRs: %d\n" - "Code Size: %d bytes\n" - "LDS: %d blocks\n" - "Scratch: %d bytes per wave\n" - "Max Waves: %d\n" - "********************\n\n\n", - conf->num_sgprs, conf->num_vgprs, - conf->spilled_sgprs, conf->spilled_vgprs, variant->code_size, - conf->lds_size, conf->scratch_bytes_per_wave, - max_simd_waves); + return result; } diff --git a/lib/mesa/src/amd/vulkan/radv_shader.h b/lib/mesa/src/amd/vulkan/radv_shader.h index 6e4e9966c..a1d38b3ce 100644 --- a/lib/mesa/src/amd/vulkan/radv_shader.h +++ b/lib/mesa/src/amd/vulkan/radv_shader.h @@ -28,10 +28,26 @@ #ifndef RADV_SHADER_H #define RADV_SHADER_H +#include "radv_debug.h" #include "radv_private.h" #include "nir/nir.h" +/* descriptor index into scratch ring offsets */ +#define RING_SCRATCH 0 +#define RING_ESGS_VS 1 +#define RING_ESGS_GS 2 +#define RING_GSVS_VS 3 +#define RING_GSVS_GS 4 +#define RING_HS_TESS_FACTOR 5 +#define RING_HS_TESS_OFFCHIP 6 +#define RING_PS_SAMPLE_POSITIONS 7 + +// Match MAX_SETS from radv_descriptor_set.h +#define RADV_UD_MAX_SETS MAX_SETS + +#define RADV_NUM_PHYSICAL_VGPRS 256 + struct radv_shader_module { struct nir_shader *nir; unsigned char sha1[20]; @@ -39,6 +55,241 @@ struct radv_shader_module { char data[0]; }; +enum { + RADV_ALPHA_ADJUST_NONE = 0, + RADV_ALPHA_ADJUST_SNORM = 1, + RADV_ALPHA_ADJUST_SINT = 2, + RADV_ALPHA_ADJUST_SSCALED = 3, +}; + +struct radv_vs_variant_key { + uint32_t instance_rate_inputs; + uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS]; + + /* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW. + * so we may need to fix it up. */ + uint64_t alpha_adjust; + + uint32_t as_es:1; + uint32_t as_ls:1; + uint32_t export_prim_id:1; + uint32_t export_layer_id:1; +}; + +struct radv_tes_variant_key { + uint32_t as_es:1; + uint32_t export_prim_id:1; + uint32_t export_layer_id:1; + uint8_t num_patches; + uint8_t tcs_num_outputs; +}; + +struct radv_tcs_variant_key { + struct radv_vs_variant_key vs_key; + unsigned primitive_mode; + unsigned input_vertices; + unsigned num_inputs; + uint32_t tes_reads_tess_factors:1; +}; + +struct radv_fs_variant_key { + uint32_t col_format; + uint8_t log2_ps_iter_samples; + uint8_t num_samples; + uint32_t is_int8; + uint32_t is_int10; +}; + +struct radv_shader_variant_key { + union { + struct radv_vs_variant_key vs; + struct radv_fs_variant_key fs; + struct radv_tes_variant_key tes; + struct radv_tcs_variant_key tcs; + }; + bool has_multiview_view_index; +}; + +struct radv_nir_compiler_options { + struct radv_pipeline_layout *layout; + struct radv_shader_variant_key key; + bool unsafe_math; + bool supports_spill; + bool clamp_shadow_reference; + bool dump_shader; + bool dump_preoptir; + bool record_llvm_ir; + bool check_ir; + enum radeon_family family; + enum chip_class chip_class; + uint32_t tess_offchip_block_dw_size; + uint32_t address32_hi; +}; + +enum radv_ud_index { + AC_UD_SCRATCH_RING_OFFSETS = 0, + AC_UD_PUSH_CONSTANTS = 1, + AC_UD_INDIRECT_DESCRIPTOR_SETS = 2, + AC_UD_VIEW_INDEX = 3, + AC_UD_STREAMOUT_BUFFERS = 4, + AC_UD_SHADER_START = 5, + AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START, + AC_UD_VS_BASE_VERTEX_START_INSTANCE, + AC_UD_VS_MAX_UD, + AC_UD_PS_MAX_UD, + AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START, + AC_UD_CS_MAX_UD, + AC_UD_GS_MAX_UD, + AC_UD_TCS_MAX_UD, + AC_UD_TES_MAX_UD, + AC_UD_MAX_UD = AC_UD_TCS_MAX_UD, +}; + +struct radv_stream_output { + uint8_t location; + uint8_t buffer; + uint16_t offset; + uint8_t component_mask; + uint8_t stream; +}; + +struct radv_streamout_info { + uint16_t num_outputs; + struct radv_stream_output outputs[MAX_SO_OUTPUTS]; + uint16_t strides[MAX_SO_BUFFERS]; + uint32_t enabled_stream_buffers_mask; +}; + +struct radv_shader_info { + bool loads_push_constants; + uint32_t desc_set_used_mask; + bool needs_multiview_view_index; + bool uses_invocation_id; + bool uses_prim_id; + struct { + uint64_t ls_outputs_written; + uint8_t input_usage_mask[VERT_ATTRIB_MAX]; + uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; + bool has_vertex_buffers; /* needs vertex buffers and base/start */ + bool needs_draw_id; + bool needs_instance_id; + } vs; + struct { + uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; + uint8_t num_stream_output_components[4]; + uint8_t output_streams[VARYING_SLOT_VAR31 + 1]; + uint8_t max_stream; + } gs; + struct { + uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; + } tes; + struct { + bool force_persample; + bool needs_sample_positions; + bool uses_input_attachments; + bool writes_memory; + bool writes_z; + bool writes_stencil; + bool writes_sample_mask; + bool has_pcoord; + bool prim_id_input; + bool layer_input; + uint8_t num_input_clips_culls; + } ps; + struct { + bool uses_grid_size; + bool uses_block_id[3]; + bool uses_thread_id[3]; + bool uses_local_invocation_idx; + } cs; + struct { + uint64_t outputs_written; + uint64_t patch_outputs_written; + } tcs; + + struct radv_streamout_info so; +}; + +struct radv_userdata_info { + int8_t sgpr_idx; + uint8_t num_sgprs; + bool indirect; +}; + +struct radv_userdata_locations { + struct radv_userdata_info descriptor_sets[RADV_UD_MAX_SETS]; + struct radv_userdata_info shader_data[AC_UD_MAX_UD]; + uint32_t descriptor_sets_enabled; +}; + +struct radv_vs_output_info { + uint8_t vs_output_param_offset[VARYING_SLOT_MAX]; + uint8_t clip_dist_mask; + uint8_t cull_dist_mask; + uint8_t param_exports; + bool writes_pointsize; + bool writes_layer; + bool writes_viewport_index; + bool export_prim_id; + unsigned pos_exports; +}; + +struct radv_es_output_info { + uint32_t esgs_itemsize; +}; + +struct radv_shader_variant_info { + struct radv_userdata_locations user_sgprs_locs; + struct radv_shader_info info; + unsigned num_user_sgprs; + unsigned num_input_sgprs; + unsigned num_input_vgprs; + unsigned private_mem_vgprs; + bool need_indirect_descriptor_sets; + struct { + struct { + struct radv_vs_output_info outinfo; + struct radv_es_output_info es_info; + unsigned vgpr_comp_cnt; + bool as_es; + bool as_ls; + } vs; + struct { + unsigned num_interp; + uint32_t input_mask; + uint32_t flat_shaded_mask; + bool can_discard; + bool early_fragment_test; + } fs; + struct { + unsigned block_size[3]; + } cs; + struct { + unsigned vertices_in; + unsigned vertices_out; + unsigned output_prim; + unsigned invocations; + unsigned gsvs_vertex_size; + unsigned max_gsvs_emit_size; + unsigned es_type; /* GFX9: VS or TES */ + } gs; + struct { + unsigned tcs_vertices_out; + uint32_t num_patches; + uint32_t lds_size; + } tcs; + struct { + struct radv_vs_output_info outinfo; + struct radv_es_output_info es_info; + bool as_es; + unsigned primitive_mode; + enum gl_tess_spacing spacing; + bool ccw; + bool point_mode; + } tes; + }; +}; + struct radv_shader_variant { uint32_t ref_count; @@ -46,7 +297,7 @@ struct radv_shader_variant { uint64_t bo_offset; struct ac_shader_config config; uint32_t code_size; - struct ac_shader_variant_info info; + struct radv_shader_variant_info info; unsigned rsrc1; unsigned rsrc2; @@ -55,6 +306,7 @@ struct radv_shader_variant { uint32_t spirv_size; struct nir_shader *nir; char *disasm_string; + char *llvm_ir_string; struct list_head slab_list; }; @@ -68,14 +320,16 @@ struct radv_shader_slab { }; void -radv_optimize_nir(struct nir_shader *shader); +radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, + bool allow_copies); nir_shader * radv_shader_compile_to_nir(struct radv_device *device, struct radv_shader_module *module, const char *entrypoint_name, gl_shader_stage stage, - const VkSpecializationInfo *spec_info); + const VkSpecializationInfo *spec_info, + const VkPipelineCreateFlags flags); void * radv_alloc_shader_memory(struct radv_device *device, @@ -90,7 +344,7 @@ radv_shader_variant_create(struct radv_device *device, struct nir_shader *const *shaders, int shader_count, struct radv_pipeline_layout *layout, - const struct ac_shader_variant_key *key, + const struct radv_shader_variant_key *key, void **code_out, unsigned *code_size_out); @@ -103,14 +357,6 @@ void radv_shader_variant_destroy(struct radv_device *device, struct radv_shader_variant *variant); -bool -radv_lower_indirect_derefs(struct nir_shader *nir, - struct radv_physical_device *device); - -uint32_t -radv_shader_stage_to_user_data_0(gl_shader_stage stage, enum chip_class chip_class, - bool has_gs, bool has_tess); - const char * radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage); @@ -120,4 +366,52 @@ radv_shader_dump_stats(struct radv_device *device, gl_shader_stage stage, FILE *file); +static inline bool +radv_can_dump_shader(struct radv_device *device, + struct radv_shader_module *module, + bool is_gs_copy_shader) +{ + if (!(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)) + return false; + + /* Only dump non-meta shaders, useful for debugging purposes. */ + return (module && !module->nir) || is_gs_copy_shader; +} + +static inline bool +radv_can_dump_shader_stats(struct radv_device *device, + struct radv_shader_module *module) +{ + /* Only dump non-meta shader stats. */ + return device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS && + module && !module->nir; +} + +static inline unsigned shader_io_get_unique_index(gl_varying_slot slot) +{ + /* handle patch indices separate */ + if (slot == VARYING_SLOT_TESS_LEVEL_OUTER) + return 0; + if (slot == VARYING_SLOT_TESS_LEVEL_INNER) + return 1; + if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX) + return 2 + (slot - VARYING_SLOT_PATCH0); + if (slot == VARYING_SLOT_POS) + return 0; + if (slot == VARYING_SLOT_PSIZ) + return 1; + if (slot == VARYING_SLOT_CLIP_DIST0) + return 2; + /* 3 is reserved for clip dist as well */ + if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31) + return 4 + (slot - VARYING_SLOT_VAR0); + unreachable("illegal slot in get unique index\n"); +} + +static inline uint32_t +radv_get_num_physical_sgprs(struct radv_physical_device *physical_device) +{ + return physical_device->rad_info.chip_class >= VI ? 800 : 512; +} + #endif diff --git a/lib/mesa/src/amd/vulkan/radv_shader_helper.h b/lib/mesa/src/amd/vulkan/radv_shader_helper.h new file mode 100644 index 000000000..3c81f5be5 --- /dev/null +++ b/lib/mesa/src/amd/vulkan/radv_shader_helper.h @@ -0,0 +1,44 @@ +/* + * Copyright © 2018 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef RADV_SHADER_HELPER_H +#define RADV_SHADER_HELPER_H +#ifdef __cplusplus +extern "C" { +#endif + +bool radv_init_llvm_compiler(struct ac_llvm_compiler *info, + bool okay_to_leak_target_library_info, + bool thread_compiler, + enum radeon_family family, + enum ac_target_machine_options tm_options); +void radv_destroy_llvm_compiler(struct ac_llvm_compiler *info, + bool thread_compiler); + +bool radv_compile_to_binary(struct ac_llvm_compiler *info, + LLVMModuleRef module, + struct ac_shader_binary *binary); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/lib/mesa/src/amd/vulkan/radv_shader_info.c b/lib/mesa/src/amd/vulkan/radv_shader_info.c new file mode 100644 index 000000000..f7888ec6a --- /dev/null +++ b/lib/mesa/src/amd/vulkan/radv_shader_info.c @@ -0,0 +1,532 @@ +/* + * Copyright © 2017 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "radv_private.h" +#include "radv_shader.h" +#include "nir/nir.h" +#include "nir/nir_deref.h" +#include "nir/nir_xfb_info.h" + +static void mark_sampler_desc(const nir_variable *var, + struct radv_shader_info *info) +{ + info->desc_set_used_mask |= (1 << var->data.descriptor_set); +} + +static void mark_ls_output(struct radv_shader_info *info, + uint32_t param, int num_slots) +{ + uint64_t mask = (1ull << num_slots) - 1ull; + info->vs.ls_outputs_written |= (mask << param); +} + +static void mark_tess_output(struct radv_shader_info *info, + bool is_patch, uint32_t param, int num_slots) +{ + uint64_t mask = (1ull << num_slots) - 1ull; + if (is_patch) + info->tcs.patch_outputs_written |= (mask << param); + else + info->tcs.outputs_written |= (mask << param); +} + +static void +get_deref_offset(nir_deref_instr *instr, + unsigned *const_out) +{ + nir_variable *var = nir_deref_instr_get_variable(instr); + nir_deref_path path; + unsigned idx_lvl = 1; + + if (var->data.compact) { + assert(instr->deref_type == nir_deref_type_array); + nir_const_value *v = nir_src_as_const_value(instr->arr.index); + assert(v); + *const_out = v->u32[0]; + return; + } + + nir_deref_path_init(&path, instr, NULL); + + uint32_t const_offset = 0; + + for (; path.path[idx_lvl]; ++idx_lvl) { + const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type; + if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) { + unsigned index = path.path[idx_lvl]->strct.index; + + for (unsigned i = 0; i < index; i++) { + const struct glsl_type *ft = glsl_get_struct_field(parent_type, i); + const_offset += glsl_count_attribute_slots(ft, false); + } + } else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) { + unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, false); + nir_const_value *v = nir_src_as_const_value(path.path[idx_lvl]->arr.index); + if (v) + const_offset += v->u32[0] * size; + } else + unreachable("Uhandled deref type in get_deref_instr_offset"); + } + + *const_out = const_offset; + + nir_deref_path_finish(&path); +} + +static void +gather_intrinsic_load_deref_info(const nir_shader *nir, + const nir_intrinsic_instr *instr, + struct radv_shader_info *info) +{ + switch (nir->info.stage) { + case MESA_SHADER_VERTEX: { + nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr)); + + if (var->data.mode == nir_var_shader_in) { + unsigned idx = var->data.location; + uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa); + + info->vs.input_usage_mask[idx] |= + mask << var->data.location_frac; + } + break; + } + default: + break; + } +} + +static void +set_output_usage_mask(const nir_shader *nir, const nir_intrinsic_instr *instr, + uint8_t *output_usage_mask) +{ + nir_deref_instr *deref_instr = + nir_instr_as_deref(instr->src[0].ssa->parent_instr); + nir_variable *var = nir_deref_instr_get_variable(deref_instr); + unsigned attrib_count = glsl_count_attribute_slots(var->type, false); + unsigned idx = var->data.location; + unsigned comp = var->data.location_frac; + unsigned const_offset = 0; + + get_deref_offset(deref_instr, &const_offset); + + if (idx == VARYING_SLOT_CLIP_DIST0) { + /* Special case for clip/cull distances because there are + * combined into a single array that contains both. + */ + output_usage_mask[idx] |= 1 << const_offset; + return; + } + + for (unsigned i = 0; i < attrib_count; i++) { + output_usage_mask[idx + i + const_offset] |= + instr->const_index[0] << comp; + } +} + +static void +gather_intrinsic_store_deref_info(const nir_shader *nir, + const nir_intrinsic_instr *instr, + struct radv_shader_info *info) +{ + nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr)); + + if (var->data.mode == nir_var_shader_out) { + unsigned idx = var->data.location; + + switch (nir->info.stage) { + case MESA_SHADER_VERTEX: + set_output_usage_mask(nir, instr, + info->vs.output_usage_mask); + break; + case MESA_SHADER_GEOMETRY: + set_output_usage_mask(nir, instr, + info->gs.output_usage_mask); + break; + case MESA_SHADER_TESS_EVAL: + set_output_usage_mask(nir, instr, + info->tes.output_usage_mask); + break; + case MESA_SHADER_TESS_CTRL: { + unsigned param = shader_io_get_unique_index(idx); + const struct glsl_type *type = var->type; + + if (!var->data.patch) + type = glsl_get_array_element(var->type); + + unsigned slots = + var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4) + : glsl_count_attribute_slots(type, false); + + if (idx == VARYING_SLOT_CLIP_DIST0) + slots = (nir->info.clip_distance_array_size + + nir->info.cull_distance_array_size > 4) ? 2 : 1; + + mark_tess_output(info, var->data.patch, param, slots); + break; + } + default: + break; + } + } +} + +static void +gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, + struct radv_shader_info *info) +{ + switch (instr->intrinsic) { + case nir_intrinsic_interp_deref_at_sample: + info->ps.needs_sample_positions = true; + break; + case nir_intrinsic_load_draw_id: + info->vs.needs_draw_id = true; + break; + case nir_intrinsic_load_instance_id: + info->vs.needs_instance_id = true; + break; + case nir_intrinsic_load_num_work_groups: + info->cs.uses_grid_size = true; + break; + case nir_intrinsic_load_local_invocation_id: + case nir_intrinsic_load_work_group_id: { + unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); + while (mask) { + unsigned i = u_bit_scan(&mask); + + if (instr->intrinsic == nir_intrinsic_load_work_group_id) + info->cs.uses_block_id[i] = true; + else + info->cs.uses_thread_id[i] = true; + } + break; + } + case nir_intrinsic_load_local_invocation_index: + case nir_intrinsic_load_subgroup_id: + case nir_intrinsic_load_num_subgroups: + info->cs.uses_local_invocation_idx = true; + break; + case nir_intrinsic_load_sample_id: + info->ps.force_persample = true; + break; + case nir_intrinsic_load_sample_pos: + info->ps.force_persample = true; + break; + case nir_intrinsic_load_view_index: + info->needs_multiview_view_index = true; + if (nir->info.stage == MESA_SHADER_FRAGMENT) + info->ps.layer_input = true; + break; + case nir_intrinsic_load_invocation_id: + info->uses_invocation_id = true; + break; + case nir_intrinsic_load_primitive_id: + info->uses_prim_id = true; + break; + case nir_intrinsic_load_push_constant: + info->loads_push_constants = true; + break; + case nir_intrinsic_vulkan_resource_index: + info->desc_set_used_mask |= (1 << nir_intrinsic_desc_set(instr)); + break; + case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_deref_atomic_add: + case nir_intrinsic_image_deref_atomic_min: + case nir_intrinsic_image_deref_atomic_max: + case nir_intrinsic_image_deref_atomic_and: + case nir_intrinsic_image_deref_atomic_or: + case nir_intrinsic_image_deref_atomic_xor: + case nir_intrinsic_image_deref_atomic_exchange: + case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_deref_size: { + nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr)); + const struct glsl_type *type = glsl_without_array(var->type); + + enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); + if (dim == GLSL_SAMPLER_DIM_SUBPASS || + dim == GLSL_SAMPLER_DIM_SUBPASS_MS) { + info->ps.layer_input = true; + info->ps.uses_input_attachments = true; + } + mark_sampler_desc(var, info); + + if (nir_intrinsic_image_deref_store || + nir_intrinsic_image_deref_atomic_add || + nir_intrinsic_image_deref_atomic_min || + nir_intrinsic_image_deref_atomic_max || + nir_intrinsic_image_deref_atomic_and || + nir_intrinsic_image_deref_atomic_or || + nir_intrinsic_image_deref_atomic_xor || + nir_intrinsic_image_deref_atomic_exchange || + nir_intrinsic_image_deref_atomic_comp_swap) { + if (nir->info.stage == MESA_SHADER_FRAGMENT) + info->ps.writes_memory = true; + } + break; + } + case nir_intrinsic_store_ssbo: + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_comp_swap: + if (nir->info.stage == MESA_SHADER_FRAGMENT) + info->ps.writes_memory = true; + break; + case nir_intrinsic_load_deref: + gather_intrinsic_load_deref_info(nir, instr, info); + break; + case nir_intrinsic_store_deref: + gather_intrinsic_store_deref_info(nir, instr, info); + break; + default: + break; + } +} + +static void +gather_tex_info(const nir_shader *nir, const nir_tex_instr *instr, + struct radv_shader_info *info) +{ + for (unsigned i = 0; i < instr->num_srcs; i++) { + switch (instr->src[i].src_type) { + case nir_tex_src_texture_deref: + mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info); + break; + case nir_tex_src_sampler_deref: + mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info); + break; + default: + break; + } + } +} + +static void +gather_info_block(const nir_shader *nir, const nir_block *block, + struct radv_shader_info *info) +{ + nir_foreach_instr(instr, block) { + switch (instr->type) { + case nir_instr_type_intrinsic: + gather_intrinsic_info(nir, nir_instr_as_intrinsic(instr), info); + break; + case nir_instr_type_tex: + gather_tex_info(nir, nir_instr_as_tex(instr), info); + break; + default: + break; + } + } +} + +static void +gather_info_input_decl_vs(const nir_shader *nir, const nir_variable *var, + struct radv_shader_info *info) +{ + int idx = var->data.location; + + if (idx >= VERT_ATTRIB_GENERIC0 && idx <= VERT_ATTRIB_GENERIC15) + info->vs.has_vertex_buffers = true; +} + +static void +gather_info_input_decl_ps(const nir_shader *nir, const nir_variable *var, + struct radv_shader_info *info) +{ + unsigned attrib_count = glsl_count_attribute_slots(var->type, false); + const struct glsl_type *type = glsl_without_array(var->type); + int idx = var->data.location; + + switch (idx) { + case VARYING_SLOT_PNTC: + info->ps.has_pcoord = true; + break; + case VARYING_SLOT_PRIMITIVE_ID: + info->ps.prim_id_input = true; + break; + case VARYING_SLOT_LAYER: + info->ps.layer_input = true; + break; + case VARYING_SLOT_CLIP_DIST0: + info->ps.num_input_clips_culls = attrib_count; + break; + default: + break; + } + + if (glsl_get_base_type(type) == GLSL_TYPE_FLOAT) { + if (var->data.sample) + info->ps.force_persample = true; + } +} + +static void +gather_info_input_decl(const nir_shader *nir, const nir_variable *var, + struct radv_shader_info *info) +{ + switch (nir->info.stage) { + case MESA_SHADER_VERTEX: + gather_info_input_decl_vs(nir, var, info); + break; + case MESA_SHADER_FRAGMENT: + gather_info_input_decl_ps(nir, var, info); + break; + default: + break; + } +} + +static void +gather_info_output_decl_ls(const nir_shader *nir, const nir_variable *var, + struct radv_shader_info *info) +{ + int idx = var->data.location; + unsigned param = shader_io_get_unique_index(idx); + int num_slots = glsl_count_attribute_slots(var->type, false); + if (idx == VARYING_SLOT_CLIP_DIST0) + num_slots = (nir->info.clip_distance_array_size + nir->info.cull_distance_array_size > 4) ? 2 : 1; + mark_ls_output(info, param, num_slots); +} + +static void +gather_info_output_decl_ps(const nir_shader *nir, const nir_variable *var, + struct radv_shader_info *info) +{ + int idx = var->data.location; + + switch (idx) { + case FRAG_RESULT_DEPTH: + info->ps.writes_z = true; + break; + case FRAG_RESULT_STENCIL: + info->ps.writes_stencil = true; + break; + case FRAG_RESULT_SAMPLE_MASK: + info->ps.writes_sample_mask = true; + break; + default: + break; + } +} + +static void +gather_info_output_decl_gs(const nir_shader *nir, const nir_variable *var, + struct radv_shader_info *info) +{ + unsigned num_components = glsl_get_component_slots(var->type); + unsigned stream = var->data.stream; + unsigned idx = var->data.location; + + assert(stream < 4); + + info->gs.max_stream = MAX2(info->gs.max_stream, stream); + info->gs.num_stream_output_components[stream] += num_components; + info->gs.output_streams[idx] = stream; +} + +static void +gather_info_output_decl(const nir_shader *nir, const nir_variable *var, + struct radv_shader_info *info, + const struct radv_nir_compiler_options *options) +{ + switch (nir->info.stage) { + case MESA_SHADER_FRAGMENT: + gather_info_output_decl_ps(nir, var, info); + break; + case MESA_SHADER_VERTEX: + if (options->key.vs.as_ls) + gather_info_output_decl_ls(nir, var, info); + break; + case MESA_SHADER_GEOMETRY: + gather_info_output_decl_gs(nir, var, info); + break; + default: + break; + } +} + +static void +gather_xfb_info(const nir_shader *nir, struct radv_shader_info *info) +{ + nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL); + struct radv_streamout_info *so = &info->so; + + if (!xfb) + return; + + assert(xfb->output_count < MAX_SO_OUTPUTS); + so->num_outputs = xfb->output_count; + + for (unsigned i = 0; i < xfb->output_count; i++) { + struct radv_stream_output *output = &so->outputs[i]; + + output->buffer = xfb->outputs[i].buffer; + output->stream = xfb->buffer_to_stream[xfb->outputs[i].buffer]; + output->offset = xfb->outputs[i].offset; + output->location = xfb->outputs[i].location; + output->component_mask = xfb->outputs[i].component_mask; + + so->enabled_stream_buffers_mask |= + (1 << output->buffer) << (output->stream * 4); + + } + + for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++) { + so->strides[i] = xfb->strides[i] / 4; + } + + ralloc_free(xfb); +} + +void +radv_nir_shader_info_pass(const struct nir_shader *nir, + const struct radv_nir_compiler_options *options, + struct radv_shader_info *info) +{ + struct nir_function *func = + (struct nir_function *)exec_list_get_head_const(&nir->functions); + + if (options->layout && options->layout->dynamic_offset_count) + info->loads_push_constants = true; + + nir_foreach_variable(variable, &nir->inputs) + gather_info_input_decl(nir, variable, info); + + nir_foreach_block(block, func->impl) { + gather_info_block(nir, block, info); + } + + nir_foreach_variable(variable, &nir->outputs) + gather_info_output_decl(nir, variable, info, options); + + if (nir->info.stage == MESA_SHADER_VERTEX || + nir->info.stage == MESA_SHADER_TESS_EVAL || + nir->info.stage == MESA_SHADER_GEOMETRY) + gather_xfb_info(nir, info); +} diff --git a/lib/mesa/src/amd/vulkan/radv_wsi_display.c b/lib/mesa/src/amd/vulkan/radv_wsi_display.c new file mode 100644 index 000000000..d8743a06e --- /dev/null +++ b/lib/mesa/src/amd/vulkan/radv_wsi_display.c @@ -0,0 +1,354 @@ +/* + * Copyright © 2017 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include "radv_private.h" +#include "radv_cs.h" +#include "util/disk_cache.h" +#include "util/strtod.h" +#include "vk_util.h" +#include <xf86drm.h> +#include <xf86drmMode.h> +#include <amdgpu.h> +#include <amdgpu_drm.h> +#include "winsys/amdgpu/radv_amdgpu_winsys_public.h" +#include "ac_llvm_util.h" +#include "vk_format.h" +#include "sid.h" +#include "util/debug.h" +#include "wsi_common_display.h" + +#define MM_PER_PIXEL (1.0/96.0 * 25.4) + +VkResult +radv_GetPhysicalDeviceDisplayPropertiesKHR(VkPhysicalDevice physical_device, + uint32_t *property_count, + VkDisplayPropertiesKHR *properties) +{ + RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device); + + return wsi_display_get_physical_device_display_properties( + physical_device, + &pdevice->wsi_device, + property_count, + properties); +} + +VkResult +radv_GetPhysicalDeviceDisplayProperties2KHR(VkPhysicalDevice physical_device, + uint32_t *property_count, + VkDisplayProperties2KHR *properties) +{ + RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device); + + return wsi_display_get_physical_device_display_properties2( + physical_device, + &pdevice->wsi_device, + property_count, + properties); +} + +VkResult +radv_GetPhysicalDeviceDisplayPlanePropertiesKHR( + VkPhysicalDevice physical_device, + uint32_t *property_count, + VkDisplayPlanePropertiesKHR *properties) +{ + RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device); + + return wsi_display_get_physical_device_display_plane_properties( + physical_device, + &pdevice->wsi_device, + property_count, + properties); +} + +VkResult +radv_GetPhysicalDeviceDisplayPlaneProperties2KHR( + VkPhysicalDevice physical_device, + uint32_t *property_count, + VkDisplayPlaneProperties2KHR *properties) +{ + RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device); + + return wsi_display_get_physical_device_display_plane_properties2( + physical_device, + &pdevice->wsi_device, + property_count, + properties); +} + +VkResult +radv_GetDisplayPlaneSupportedDisplaysKHR(VkPhysicalDevice physical_device, + uint32_t plane_index, + uint32_t *display_count, + VkDisplayKHR *displays) +{ + RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device); + + return wsi_display_get_display_plane_supported_displays( + physical_device, + &pdevice->wsi_device, + plane_index, + display_count, + displays); +} + + +VkResult +radv_GetDisplayModePropertiesKHR(VkPhysicalDevice physical_device, + VkDisplayKHR display, + uint32_t *property_count, + VkDisplayModePropertiesKHR *properties) +{ + RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device); + + return wsi_display_get_display_mode_properties(physical_device, + &pdevice->wsi_device, + display, + property_count, + properties); +} + +VkResult +radv_GetDisplayModeProperties2KHR(VkPhysicalDevice physical_device, + VkDisplayKHR display, + uint32_t *property_count, + VkDisplayModeProperties2KHR *properties) +{ + RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device); + + return wsi_display_get_display_mode_properties2(physical_device, + &pdevice->wsi_device, + display, + property_count, + properties); +} + +VkResult +radv_CreateDisplayModeKHR(VkPhysicalDevice physical_device, + VkDisplayKHR display, + const VkDisplayModeCreateInfoKHR *create_info, + const VkAllocationCallbacks *allocator, + VkDisplayModeKHR *mode) +{ + RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device); + + return wsi_display_create_display_mode(physical_device, + &pdevice->wsi_device, + display, + create_info, + allocator, + mode); +} + +VkResult +radv_GetDisplayPlaneCapabilitiesKHR(VkPhysicalDevice physical_device, + VkDisplayModeKHR mode_khr, + uint32_t plane_index, + VkDisplayPlaneCapabilitiesKHR *capabilities) +{ + RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device); + + return wsi_get_display_plane_capabilities(physical_device, + &pdevice->wsi_device, + mode_khr, + plane_index, + capabilities); +} + +VkResult +radv_GetDisplayPlaneCapabilities2KHR(VkPhysicalDevice physical_device, + const VkDisplayPlaneInfo2KHR *pDisplayPlaneInfo, + VkDisplayPlaneCapabilities2KHR *capabilities) +{ + RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device); + + return wsi_get_display_plane_capabilities2(physical_device, + &pdevice->wsi_device, + pDisplayPlaneInfo, + capabilities); +} + +VkResult +radv_CreateDisplayPlaneSurfaceKHR( + VkInstance _instance, + const VkDisplaySurfaceCreateInfoKHR *create_info, + const VkAllocationCallbacks *allocator, + VkSurfaceKHR *surface) +{ + RADV_FROM_HANDLE(radv_instance, instance, _instance); + const VkAllocationCallbacks *alloc; + + if (allocator) + alloc = allocator; + else + alloc = &instance->alloc; + + return wsi_create_display_surface(_instance, alloc, + create_info, surface); +} + +VkResult +radv_ReleaseDisplayEXT(VkPhysicalDevice physical_device, + VkDisplayKHR display) +{ + RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device); + + return wsi_release_display(physical_device, + &pdevice->wsi_device, + display); +} + +#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT +VkResult +radv_AcquireXlibDisplayEXT(VkPhysicalDevice physical_device, + Display *dpy, + VkDisplayKHR display) +{ + RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device); + + return wsi_acquire_xlib_display(physical_device, + &pdevice->wsi_device, + dpy, + display); +} + +VkResult +radv_GetRandROutputDisplayEXT(VkPhysicalDevice physical_device, + Display *dpy, + RROutput output, + VkDisplayKHR *display) +{ + RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device); + + return wsi_get_randr_output_display(physical_device, + &pdevice->wsi_device, + dpy, + output, + display); +} +#endif /* VK_USE_PLATFORM_XLIB_XRANDR_EXT */ + +/* VK_EXT_display_control */ + +VkResult +radv_DisplayPowerControlEXT(VkDevice _device, + VkDisplayKHR display, + const VkDisplayPowerInfoEXT *display_power_info) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + + return wsi_display_power_control(_device, + &device->physical_device->wsi_device, + display, + display_power_info); +} + +VkResult +radv_RegisterDeviceEventEXT(VkDevice _device, + const VkDeviceEventInfoEXT *device_event_info, + const VkAllocationCallbacks *allocator, + VkFence *_fence) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + struct radv_fence *fence; + VkResult ret; + + fence = vk_alloc2(&device->instance->alloc, allocator, sizeof (*fence), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!fence) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + fence->fence = NULL; + fence->submitted = true; + fence->signalled = false; + fence->syncobj = 0; + fence->temp_syncobj = 0; + + ret = wsi_register_device_event(_device, + &device->physical_device->wsi_device, + device_event_info, + allocator, + &fence->fence_wsi); + if (ret == VK_SUCCESS) + *_fence = radv_fence_to_handle(fence); + else + vk_free2(&device->instance->alloc, allocator, fence); + return ret; +} + +VkResult +radv_RegisterDisplayEventEXT(VkDevice _device, + VkDisplayKHR display, + const VkDisplayEventInfoEXT *display_event_info, + const VkAllocationCallbacks *allocator, + VkFence *_fence) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + + struct radv_fence *fence; + VkResult ret; + + fence = vk_alloc2(&device->instance->alloc, allocator, sizeof (*fence), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!fence) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + fence->fence = NULL; + fence->submitted = true; + fence->signalled = false; + fence->syncobj = 0; + fence->temp_syncobj = 0; + + ret = wsi_register_display_event(_device, + &device->physical_device->wsi_device, + display, + display_event_info, + allocator, + &(fence->fence_wsi)); + + if (ret == VK_SUCCESS) + *_fence = radv_fence_to_handle(fence); + else + vk_free2(&device->instance->alloc, allocator, fence); + return ret; +} + +VkResult +radv_GetSwapchainCounterEXT(VkDevice _device, + VkSwapchainKHR swapchain, + VkSurfaceCounterFlagBitsEXT flag_bits, + uint64_t *value) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + + return wsi_get_swapchain_counter(_device, + &device->physical_device->wsi_device, + swapchain, + flag_bits, + value); +} + diff --git a/lib/mesa/src/amd/vulkan/vk_format_layout.csv b/lib/mesa/src/amd/vulkan/vk_format_layout.csv index ae9ceda08..f9c2e6f7c 100644 --- a/lib/mesa/src/amd/vulkan/vk_format_layout.csv +++ b/lib/mesa/src/amd/vulkan/vk_format_layout.csv @@ -148,16 +148,16 @@ VK_FORMAT_BC6H_UFLOAT_BLOCK , bptc, 4, 4, x128, , , , xyz1 VK_FORMAT_BC6H_SFLOAT_BLOCK , bptc, 4, 4, x128, , , , xyz1, rgb VK_FORMAT_BC7_UNORM_BLOCK , bptc, 4, 4, x128, , , , xyzw, rgb VK_FORMAT_BC7_SRGB_BLOCK , bptc, 4, 4, x128, , , , xyzw, srgb -VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, -VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, -VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, -VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, -VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, -VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, -VK_FORMAT_EAC_R11_UNORM_BLOCK, -VK_FORMAT_EAC_R11_SNORM_BLOCK, -VK_FORMAT_EAC_R11G11_UNORM_BLOCK, -VK_FORMAT_EAC_R11G11_SNORM_BLOCK, +VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK , etc, 4, 4, x64, , , , xyz1, rgb +VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK , etc, 4, 4, x64, , , , xyz1, srgb +VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK , etc, 4, 4, x64, , , , xyzw, rgb +VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK , etc, 4, 4, x64, , , , xyzw, srgb +VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK , etc, 4, 4, x128, , , , xyzw, rgb +VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK , etc, 4, 4, x128, , , , xyzw, srgb +VK_FORMAT_EAC_R11_UNORM_BLOCK , etc, 4, 4, x64, , , , x001, rgb +VK_FORMAT_EAC_R11_SNORM_BLOCK , etc, 4, 4, x64, , , , x001, rgb +VK_FORMAT_EAC_R11G11_UNORM_BLOCK , etc, 4, 4, x128, , , , xy01, rgb +VK_FORMAT_EAC_R11G11_SNORM_BLOCK , etc, 4, 4, x128, , , , xy01, rgb VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, VK_FORMAT_ASTC_5x4_UNORM_BLOCK, diff --git a/lib/mesa/src/amd/vulkan/vk_format_table.c b/lib/mesa/src/amd/vulkan/vk_format_table.c index de0808dc2..0b04ce97b 100644 --- a/lib/mesa/src/amd/vulkan/vk_format_table.c +++ b/lib/mesa/src/amd/vulkan/vk_format_table.c @@ -1,4 +1,4 @@ -/* This file is autogenerated by u_format_table.py from u_format.csv. Do not edit directly. */ +/* This file is autogenerated by vk_format_table.py from vk_format_layout.csv. Do not edit directly. */ /************************************************************************** * @@ -30,7 +30,7 @@ #include "stdbool.h" #include "vk_format.h" -const struct vk_format_description +static const struct vk_format_description vk_format_undefined_description = { VK_FORMAT_UNDEFINED, "VK_FORMAT_UNDEFINED", @@ -56,7 +56,7 @@ vk_format_undefined_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r4g4_unorm_pack8_description = { VK_FORMAT_R4G4_UNORM_PACK8, "VK_FORMAT_R4G4_UNORM_PACK8", @@ -100,7 +100,7 @@ vk_format_r4g4_unorm_pack8_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r4g4b4a4_unorm_pack16_description = { VK_FORMAT_R4G4B4A4_UNORM_PACK16, "VK_FORMAT_R4G4B4A4_UNORM_PACK16", @@ -144,7 +144,7 @@ vk_format_r4g4b4a4_unorm_pack16_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_b4g4r4a4_unorm_pack16_description = { VK_FORMAT_B4G4R4A4_UNORM_PACK16, "VK_FORMAT_B4G4R4A4_UNORM_PACK16", @@ -188,7 +188,7 @@ vk_format_b4g4r4a4_unorm_pack16_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r5g6b5_unorm_pack16_description = { VK_FORMAT_R5G6B5_UNORM_PACK16, "VK_FORMAT_R5G6B5_UNORM_PACK16", @@ -232,7 +232,7 @@ vk_format_r5g6b5_unorm_pack16_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_b5g6r5_unorm_pack16_description = { VK_FORMAT_B5G6R5_UNORM_PACK16, "VK_FORMAT_B5G6R5_UNORM_PACK16", @@ -276,7 +276,7 @@ vk_format_b5g6r5_unorm_pack16_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r5g5b5a1_unorm_pack16_description = { VK_FORMAT_R5G5B5A1_UNORM_PACK16, "VK_FORMAT_R5G5B5A1_UNORM_PACK16", @@ -320,7 +320,7 @@ vk_format_r5g5b5a1_unorm_pack16_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_b5g5r5a1_unorm_pack16_description = { VK_FORMAT_B5G5R5A1_UNORM_PACK16, "VK_FORMAT_B5G5R5A1_UNORM_PACK16", @@ -364,7 +364,7 @@ vk_format_b5g5r5a1_unorm_pack16_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a1r5g5b5_unorm_pack16_description = { VK_FORMAT_A1R5G5B5_UNORM_PACK16, "VK_FORMAT_A1R5G5B5_UNORM_PACK16", @@ -408,7 +408,7 @@ vk_format_a1r5g5b5_unorm_pack16_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8_unorm_description = { VK_FORMAT_R8_UNORM, "VK_FORMAT_R8_UNORM", @@ -434,7 +434,7 @@ vk_format_r8_unorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8_snorm_description = { VK_FORMAT_R8_SNORM, "VK_FORMAT_R8_SNORM", @@ -460,7 +460,7 @@ vk_format_r8_snorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8_uscaled_description = { VK_FORMAT_R8_USCALED, "VK_FORMAT_R8_USCALED", @@ -486,7 +486,7 @@ vk_format_r8_uscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8_sscaled_description = { VK_FORMAT_R8_SSCALED, "VK_FORMAT_R8_SSCALED", @@ -512,7 +512,7 @@ vk_format_r8_sscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8_uint_description = { VK_FORMAT_R8_UINT, "VK_FORMAT_R8_UINT", @@ -538,7 +538,7 @@ vk_format_r8_uint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8_sint_description = { VK_FORMAT_R8_SINT, "VK_FORMAT_R8_SINT", @@ -564,7 +564,7 @@ vk_format_r8_sint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8_srgb_description = { VK_FORMAT_R8_SRGB, "VK_FORMAT_R8_SRGB", @@ -590,7 +590,7 @@ vk_format_r8_srgb_description = { VK_FORMAT_COLORSPACE_SRGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8_unorm_description = { VK_FORMAT_R8G8_UNORM, "VK_FORMAT_R8G8_UNORM", @@ -634,7 +634,7 @@ vk_format_r8g8_unorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8_snorm_description = { VK_FORMAT_R8G8_SNORM, "VK_FORMAT_R8G8_SNORM", @@ -678,7 +678,7 @@ vk_format_r8g8_snorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8_uscaled_description = { VK_FORMAT_R8G8_USCALED, "VK_FORMAT_R8G8_USCALED", @@ -722,7 +722,7 @@ vk_format_r8g8_uscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8_sscaled_description = { VK_FORMAT_R8G8_SSCALED, "VK_FORMAT_R8G8_SSCALED", @@ -766,7 +766,7 @@ vk_format_r8g8_sscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8_uint_description = { VK_FORMAT_R8G8_UINT, "VK_FORMAT_R8G8_UINT", @@ -810,7 +810,7 @@ vk_format_r8g8_uint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8_sint_description = { VK_FORMAT_R8G8_SINT, "VK_FORMAT_R8G8_SINT", @@ -854,7 +854,7 @@ vk_format_r8g8_sint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8_srgb_description = { VK_FORMAT_R8G8_SRGB, "VK_FORMAT_R8G8_SRGB", @@ -898,7 +898,7 @@ vk_format_r8g8_srgb_description = { VK_FORMAT_COLORSPACE_SRGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8b8_unorm_description = { VK_FORMAT_R8G8B8_UNORM, "VK_FORMAT_R8G8B8_UNORM", @@ -942,7 +942,7 @@ vk_format_r8g8b8_unorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8b8_snorm_description = { VK_FORMAT_R8G8B8_SNORM, "VK_FORMAT_R8G8B8_SNORM", @@ -986,7 +986,7 @@ vk_format_r8g8b8_snorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8b8_uscaled_description = { VK_FORMAT_R8G8B8_USCALED, "VK_FORMAT_R8G8B8_USCALED", @@ -1030,7 +1030,7 @@ vk_format_r8g8b8_uscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8b8_sscaled_description = { VK_FORMAT_R8G8B8_SSCALED, "VK_FORMAT_R8G8B8_SSCALED", @@ -1074,7 +1074,7 @@ vk_format_r8g8b8_sscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8b8_uint_description = { VK_FORMAT_R8G8B8_UINT, "VK_FORMAT_R8G8B8_UINT", @@ -1118,7 +1118,7 @@ vk_format_r8g8b8_uint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8b8_sint_description = { VK_FORMAT_R8G8B8_SINT, "VK_FORMAT_R8G8B8_SINT", @@ -1162,7 +1162,7 @@ vk_format_r8g8b8_sint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8b8_srgb_description = { VK_FORMAT_R8G8B8_SRGB, "VK_FORMAT_R8G8B8_SRGB", @@ -1206,7 +1206,7 @@ vk_format_r8g8b8_srgb_description = { VK_FORMAT_COLORSPACE_SRGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_b8g8r8_unorm_description = { VK_FORMAT_B8G8R8_UNORM, "VK_FORMAT_B8G8R8_UNORM", @@ -1250,7 +1250,7 @@ vk_format_b8g8r8_unorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_b8g8r8_snorm_description = { VK_FORMAT_B8G8R8_SNORM, "VK_FORMAT_B8G8R8_SNORM", @@ -1294,7 +1294,7 @@ vk_format_b8g8r8_snorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_b8g8r8_uscaled_description = { VK_FORMAT_B8G8R8_USCALED, "VK_FORMAT_B8G8R8_USCALED", @@ -1338,7 +1338,7 @@ vk_format_b8g8r8_uscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_b8g8r8_sscaled_description = { VK_FORMAT_B8G8R8_SSCALED, "VK_FORMAT_B8G8R8_SSCALED", @@ -1382,7 +1382,7 @@ vk_format_b8g8r8_sscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_b8g8r8_uint_description = { VK_FORMAT_B8G8R8_UINT, "VK_FORMAT_B8G8R8_UINT", @@ -1426,7 +1426,7 @@ vk_format_b8g8r8_uint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_b8g8r8_sint_description = { VK_FORMAT_B8G8R8_SINT, "VK_FORMAT_B8G8R8_SINT", @@ -1470,7 +1470,7 @@ vk_format_b8g8r8_sint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_b8g8r8_srgb_description = { VK_FORMAT_B8G8R8_SRGB, "VK_FORMAT_B8G8R8_SRGB", @@ -1514,7 +1514,7 @@ vk_format_b8g8r8_srgb_description = { VK_FORMAT_COLORSPACE_SRGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8b8a8_unorm_description = { VK_FORMAT_R8G8B8A8_UNORM, "VK_FORMAT_R8G8B8A8_UNORM", @@ -1558,7 +1558,7 @@ vk_format_r8g8b8a8_unorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8b8a8_snorm_description = { VK_FORMAT_R8G8B8A8_SNORM, "VK_FORMAT_R8G8B8A8_SNORM", @@ -1602,7 +1602,7 @@ vk_format_r8g8b8a8_snorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8b8a8_uscaled_description = { VK_FORMAT_R8G8B8A8_USCALED, "VK_FORMAT_R8G8B8A8_USCALED", @@ -1646,7 +1646,7 @@ vk_format_r8g8b8a8_uscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8b8a8_sscaled_description = { VK_FORMAT_R8G8B8A8_SSCALED, "VK_FORMAT_R8G8B8A8_SSCALED", @@ -1690,7 +1690,7 @@ vk_format_r8g8b8a8_sscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8b8a8_uint_description = { VK_FORMAT_R8G8B8A8_UINT, "VK_FORMAT_R8G8B8A8_UINT", @@ -1734,7 +1734,7 @@ vk_format_r8g8b8a8_uint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8b8a8_sint_description = { VK_FORMAT_R8G8B8A8_SINT, "VK_FORMAT_R8G8B8A8_SINT", @@ -1778,7 +1778,7 @@ vk_format_r8g8b8a8_sint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r8g8b8a8_srgb_description = { VK_FORMAT_R8G8B8A8_SRGB, "VK_FORMAT_R8G8B8A8_SRGB", @@ -1822,7 +1822,7 @@ vk_format_r8g8b8a8_srgb_description = { VK_FORMAT_COLORSPACE_SRGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_b8g8r8a8_unorm_description = { VK_FORMAT_B8G8R8A8_UNORM, "VK_FORMAT_B8G8R8A8_UNORM", @@ -1866,7 +1866,7 @@ vk_format_b8g8r8a8_unorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_b8g8r8a8_snorm_description = { VK_FORMAT_B8G8R8A8_SNORM, "VK_FORMAT_B8G8R8A8_SNORM", @@ -1910,7 +1910,7 @@ vk_format_b8g8r8a8_snorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_b8g8r8a8_uscaled_description = { VK_FORMAT_B8G8R8A8_USCALED, "VK_FORMAT_B8G8R8A8_USCALED", @@ -1954,7 +1954,7 @@ vk_format_b8g8r8a8_uscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_b8g8r8a8_sscaled_description = { VK_FORMAT_B8G8R8A8_SSCALED, "VK_FORMAT_B8G8R8A8_SSCALED", @@ -1998,7 +1998,7 @@ vk_format_b8g8r8a8_sscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_b8g8r8a8_uint_description = { VK_FORMAT_B8G8R8A8_UINT, "VK_FORMAT_B8G8R8A8_UINT", @@ -2042,7 +2042,7 @@ vk_format_b8g8r8a8_uint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_b8g8r8a8_sint_description = { VK_FORMAT_B8G8R8A8_SINT, "VK_FORMAT_B8G8R8A8_SINT", @@ -2086,7 +2086,7 @@ vk_format_b8g8r8a8_sint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_b8g8r8a8_srgb_description = { VK_FORMAT_B8G8R8A8_SRGB, "VK_FORMAT_B8G8R8A8_SRGB", @@ -2130,7 +2130,7 @@ vk_format_b8g8r8a8_srgb_description = { VK_FORMAT_COLORSPACE_SRGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a8b8g8r8_unorm_pack32_description = { VK_FORMAT_A8B8G8R8_UNORM_PACK32, "VK_FORMAT_A8B8G8R8_UNORM_PACK32", @@ -2174,7 +2174,7 @@ vk_format_a8b8g8r8_unorm_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a8b8g8r8_snorm_pack32_description = { VK_FORMAT_A8B8G8R8_SNORM_PACK32, "VK_FORMAT_A8B8G8R8_SNORM_PACK32", @@ -2218,7 +2218,7 @@ vk_format_a8b8g8r8_snorm_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a8b8g8r8_uscaled_pack32_description = { VK_FORMAT_A8B8G8R8_USCALED_PACK32, "VK_FORMAT_A8B8G8R8_USCALED_PACK32", @@ -2262,7 +2262,7 @@ vk_format_a8b8g8r8_uscaled_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a8b8g8r8_sscaled_pack32_description = { VK_FORMAT_A8B8G8R8_SSCALED_PACK32, "VK_FORMAT_A8B8G8R8_SSCALED_PACK32", @@ -2306,7 +2306,7 @@ vk_format_a8b8g8r8_sscaled_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a8b8g8r8_uint_pack32_description = { VK_FORMAT_A8B8G8R8_UINT_PACK32, "VK_FORMAT_A8B8G8R8_UINT_PACK32", @@ -2350,7 +2350,7 @@ vk_format_a8b8g8r8_uint_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a8b8g8r8_sint_pack32_description = { VK_FORMAT_A8B8G8R8_SINT_PACK32, "VK_FORMAT_A8B8G8R8_SINT_PACK32", @@ -2394,7 +2394,7 @@ vk_format_a8b8g8r8_sint_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a8b8g8r8_srgb_pack32_description = { VK_FORMAT_A8B8G8R8_SRGB_PACK32, "VK_FORMAT_A8B8G8R8_SRGB_PACK32", @@ -2438,7 +2438,7 @@ vk_format_a8b8g8r8_srgb_pack32_description = { VK_FORMAT_COLORSPACE_SRGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a2r10g10b10_unorm_pack32_description = { VK_FORMAT_A2R10G10B10_UNORM_PACK32, "VK_FORMAT_A2R10G10B10_UNORM_PACK32", @@ -2482,7 +2482,7 @@ vk_format_a2r10g10b10_unorm_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a2r10g10b10_snorm_pack32_description = { VK_FORMAT_A2R10G10B10_SNORM_PACK32, "VK_FORMAT_A2R10G10B10_SNORM_PACK32", @@ -2526,7 +2526,7 @@ vk_format_a2r10g10b10_snorm_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a2r10g10b10_uscaled_pack32_description = { VK_FORMAT_A2R10G10B10_USCALED_PACK32, "VK_FORMAT_A2R10G10B10_USCALED_PACK32", @@ -2570,7 +2570,7 @@ vk_format_a2r10g10b10_uscaled_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a2r10g10b10_sscaled_pack32_description = { VK_FORMAT_A2R10G10B10_SSCALED_PACK32, "VK_FORMAT_A2R10G10B10_SSCALED_PACK32", @@ -2614,7 +2614,7 @@ vk_format_a2r10g10b10_sscaled_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a2r10g10b10_uint_pack32_description = { VK_FORMAT_A2R10G10B10_UINT_PACK32, "VK_FORMAT_A2R10G10B10_UINT_PACK32", @@ -2658,7 +2658,7 @@ vk_format_a2r10g10b10_uint_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a2r10g10b10_sint_pack32_description = { VK_FORMAT_A2R10G10B10_SINT_PACK32, "VK_FORMAT_A2R10G10B10_SINT_PACK32", @@ -2702,7 +2702,7 @@ vk_format_a2r10g10b10_sint_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a2b10g10r10_unorm_pack32_description = { VK_FORMAT_A2B10G10R10_UNORM_PACK32, "VK_FORMAT_A2B10G10R10_UNORM_PACK32", @@ -2746,7 +2746,7 @@ vk_format_a2b10g10r10_unorm_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a2b10g10r10_snorm_pack32_description = { VK_FORMAT_A2B10G10R10_SNORM_PACK32, "VK_FORMAT_A2B10G10R10_SNORM_PACK32", @@ -2790,7 +2790,7 @@ vk_format_a2b10g10r10_snorm_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a2b10g10r10_uscaled_pack32_description = { VK_FORMAT_A2B10G10R10_USCALED_PACK32, "VK_FORMAT_A2B10G10R10_USCALED_PACK32", @@ -2834,7 +2834,7 @@ vk_format_a2b10g10r10_uscaled_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a2b10g10r10_sscaled_pack32_description = { VK_FORMAT_A2B10G10R10_SSCALED_PACK32, "VK_FORMAT_A2B10G10R10_SSCALED_PACK32", @@ -2878,7 +2878,7 @@ vk_format_a2b10g10r10_sscaled_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a2b10g10r10_uint_pack32_description = { VK_FORMAT_A2B10G10R10_UINT_PACK32, "VK_FORMAT_A2B10G10R10_UINT_PACK32", @@ -2922,7 +2922,7 @@ vk_format_a2b10g10r10_uint_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_a2b10g10r10_sint_pack32_description = { VK_FORMAT_A2B10G10R10_SINT_PACK32, "VK_FORMAT_A2B10G10R10_SINT_PACK32", @@ -2966,7 +2966,7 @@ vk_format_a2b10g10r10_sint_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16_unorm_description = { VK_FORMAT_R16_UNORM, "VK_FORMAT_R16_UNORM", @@ -2992,7 +2992,7 @@ vk_format_r16_unorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16_snorm_description = { VK_FORMAT_R16_SNORM, "VK_FORMAT_R16_SNORM", @@ -3018,7 +3018,7 @@ vk_format_r16_snorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16_uscaled_description = { VK_FORMAT_R16_USCALED, "VK_FORMAT_R16_USCALED", @@ -3044,7 +3044,7 @@ vk_format_r16_uscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16_sscaled_description = { VK_FORMAT_R16_SSCALED, "VK_FORMAT_R16_SSCALED", @@ -3070,7 +3070,7 @@ vk_format_r16_sscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16_uint_description = { VK_FORMAT_R16_UINT, "VK_FORMAT_R16_UINT", @@ -3096,7 +3096,7 @@ vk_format_r16_uint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16_sint_description = { VK_FORMAT_R16_SINT, "VK_FORMAT_R16_SINT", @@ -3122,7 +3122,7 @@ vk_format_r16_sint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16_sfloat_description = { VK_FORMAT_R16_SFLOAT, "VK_FORMAT_R16_SFLOAT", @@ -3148,7 +3148,7 @@ vk_format_r16_sfloat_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16_unorm_description = { VK_FORMAT_R16G16_UNORM, "VK_FORMAT_R16G16_UNORM", @@ -3192,7 +3192,7 @@ vk_format_r16g16_unorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16_snorm_description = { VK_FORMAT_R16G16_SNORM, "VK_FORMAT_R16G16_SNORM", @@ -3236,7 +3236,7 @@ vk_format_r16g16_snorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16_uscaled_description = { VK_FORMAT_R16G16_USCALED, "VK_FORMAT_R16G16_USCALED", @@ -3280,7 +3280,7 @@ vk_format_r16g16_uscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16_sscaled_description = { VK_FORMAT_R16G16_SSCALED, "VK_FORMAT_R16G16_SSCALED", @@ -3324,7 +3324,7 @@ vk_format_r16g16_sscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16_uint_description = { VK_FORMAT_R16G16_UINT, "VK_FORMAT_R16G16_UINT", @@ -3368,7 +3368,7 @@ vk_format_r16g16_uint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16_sint_description = { VK_FORMAT_R16G16_SINT, "VK_FORMAT_R16G16_SINT", @@ -3412,7 +3412,7 @@ vk_format_r16g16_sint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16_sfloat_description = { VK_FORMAT_R16G16_SFLOAT, "VK_FORMAT_R16G16_SFLOAT", @@ -3456,7 +3456,7 @@ vk_format_r16g16_sfloat_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16b16_unorm_description = { VK_FORMAT_R16G16B16_UNORM, "VK_FORMAT_R16G16B16_UNORM", @@ -3500,7 +3500,7 @@ vk_format_r16g16b16_unorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16b16_snorm_description = { VK_FORMAT_R16G16B16_SNORM, "VK_FORMAT_R16G16B16_SNORM", @@ -3544,7 +3544,7 @@ vk_format_r16g16b16_snorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16b16_uscaled_description = { VK_FORMAT_R16G16B16_USCALED, "VK_FORMAT_R16G16B16_USCALED", @@ -3588,7 +3588,7 @@ vk_format_r16g16b16_uscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16b16_sscaled_description = { VK_FORMAT_R16G16B16_SSCALED, "VK_FORMAT_R16G16B16_SSCALED", @@ -3632,7 +3632,7 @@ vk_format_r16g16b16_sscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16b16_uint_description = { VK_FORMAT_R16G16B16_UINT, "VK_FORMAT_R16G16B16_UINT", @@ -3676,7 +3676,7 @@ vk_format_r16g16b16_uint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16b16_sint_description = { VK_FORMAT_R16G16B16_SINT, "VK_FORMAT_R16G16B16_SINT", @@ -3720,7 +3720,7 @@ vk_format_r16g16b16_sint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16b16_sfloat_description = { VK_FORMAT_R16G16B16_SFLOAT, "VK_FORMAT_R16G16B16_SFLOAT", @@ -3764,7 +3764,7 @@ vk_format_r16g16b16_sfloat_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16b16a16_unorm_description = { VK_FORMAT_R16G16B16A16_UNORM, "VK_FORMAT_R16G16B16A16_UNORM", @@ -3808,7 +3808,7 @@ vk_format_r16g16b16a16_unorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16b16a16_snorm_description = { VK_FORMAT_R16G16B16A16_SNORM, "VK_FORMAT_R16G16B16A16_SNORM", @@ -3852,7 +3852,7 @@ vk_format_r16g16b16a16_snorm_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16b16a16_uscaled_description = { VK_FORMAT_R16G16B16A16_USCALED, "VK_FORMAT_R16G16B16A16_USCALED", @@ -3896,7 +3896,7 @@ vk_format_r16g16b16a16_uscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16b16a16_sscaled_description = { VK_FORMAT_R16G16B16A16_SSCALED, "VK_FORMAT_R16G16B16A16_SSCALED", @@ -3940,7 +3940,7 @@ vk_format_r16g16b16a16_sscaled_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16b16a16_uint_description = { VK_FORMAT_R16G16B16A16_UINT, "VK_FORMAT_R16G16B16A16_UINT", @@ -3984,7 +3984,7 @@ vk_format_r16g16b16a16_uint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16b16a16_sint_description = { VK_FORMAT_R16G16B16A16_SINT, "VK_FORMAT_R16G16B16A16_SINT", @@ -4028,7 +4028,7 @@ vk_format_r16g16b16a16_sint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r16g16b16a16_sfloat_description = { VK_FORMAT_R16G16B16A16_SFLOAT, "VK_FORMAT_R16G16B16A16_SFLOAT", @@ -4072,7 +4072,7 @@ vk_format_r16g16b16a16_sfloat_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r32_uint_description = { VK_FORMAT_R32_UINT, "VK_FORMAT_R32_UINT", @@ -4098,7 +4098,7 @@ vk_format_r32_uint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r32_sint_description = { VK_FORMAT_R32_SINT, "VK_FORMAT_R32_SINT", @@ -4124,7 +4124,7 @@ vk_format_r32_sint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r32_sfloat_description = { VK_FORMAT_R32_SFLOAT, "VK_FORMAT_R32_SFLOAT", @@ -4150,7 +4150,7 @@ vk_format_r32_sfloat_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r32g32_uint_description = { VK_FORMAT_R32G32_UINT, "VK_FORMAT_R32G32_UINT", @@ -4194,7 +4194,7 @@ vk_format_r32g32_uint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r32g32_sint_description = { VK_FORMAT_R32G32_SINT, "VK_FORMAT_R32G32_SINT", @@ -4238,7 +4238,7 @@ vk_format_r32g32_sint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r32g32_sfloat_description = { VK_FORMAT_R32G32_SFLOAT, "VK_FORMAT_R32G32_SFLOAT", @@ -4282,7 +4282,7 @@ vk_format_r32g32_sfloat_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r32g32b32_uint_description = { VK_FORMAT_R32G32B32_UINT, "VK_FORMAT_R32G32B32_UINT", @@ -4326,7 +4326,7 @@ vk_format_r32g32b32_uint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r32g32b32_sint_description = { VK_FORMAT_R32G32B32_SINT, "VK_FORMAT_R32G32B32_SINT", @@ -4370,7 +4370,7 @@ vk_format_r32g32b32_sint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r32g32b32_sfloat_description = { VK_FORMAT_R32G32B32_SFLOAT, "VK_FORMAT_R32G32B32_SFLOAT", @@ -4414,7 +4414,7 @@ vk_format_r32g32b32_sfloat_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r32g32b32a32_uint_description = { VK_FORMAT_R32G32B32A32_UINT, "VK_FORMAT_R32G32B32A32_UINT", @@ -4458,7 +4458,7 @@ vk_format_r32g32b32a32_uint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r32g32b32a32_sint_description = { VK_FORMAT_R32G32B32A32_SINT, "VK_FORMAT_R32G32B32A32_SINT", @@ -4502,7 +4502,7 @@ vk_format_r32g32b32a32_sint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r32g32b32a32_sfloat_description = { VK_FORMAT_R32G32B32A32_SFLOAT, "VK_FORMAT_R32G32B32A32_SFLOAT", @@ -4546,7 +4546,7 @@ vk_format_r32g32b32a32_sfloat_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r64_uint_description = { VK_FORMAT_R64_UINT, "VK_FORMAT_R64_UINT", @@ -4572,7 +4572,7 @@ vk_format_r64_uint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r64_sint_description = { VK_FORMAT_R64_SINT, "VK_FORMAT_R64_SINT", @@ -4598,7 +4598,7 @@ vk_format_r64_sint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r64_sfloat_description = { VK_FORMAT_R64_SFLOAT, "VK_FORMAT_R64_SFLOAT", @@ -4624,7 +4624,7 @@ vk_format_r64_sfloat_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r64g64_uint_description = { VK_FORMAT_R64G64_UINT, "VK_FORMAT_R64G64_UINT", @@ -4668,7 +4668,7 @@ vk_format_r64g64_uint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r64g64_sint_description = { VK_FORMAT_R64G64_SINT, "VK_FORMAT_R64G64_SINT", @@ -4712,7 +4712,7 @@ vk_format_r64g64_sint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r64g64_sfloat_description = { VK_FORMAT_R64G64_SFLOAT, "VK_FORMAT_R64G64_SFLOAT", @@ -4756,7 +4756,7 @@ vk_format_r64g64_sfloat_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r64g64b64_uint_description = { VK_FORMAT_R64G64B64_UINT, "VK_FORMAT_R64G64B64_UINT", @@ -4800,7 +4800,7 @@ vk_format_r64g64b64_uint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r64g64b64_sint_description = { VK_FORMAT_R64G64B64_SINT, "VK_FORMAT_R64G64B64_SINT", @@ -4844,7 +4844,7 @@ vk_format_r64g64b64_sint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r64g64b64_sfloat_description = { VK_FORMAT_R64G64B64_SFLOAT, "VK_FORMAT_R64G64B64_SFLOAT", @@ -4888,7 +4888,7 @@ vk_format_r64g64b64_sfloat_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r64g64b64a64_uint_description = { VK_FORMAT_R64G64B64A64_UINT, "VK_FORMAT_R64G64B64A64_UINT", @@ -4932,7 +4932,7 @@ vk_format_r64g64b64a64_uint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r64g64b64a64_sint_description = { VK_FORMAT_R64G64B64A64_SINT, "VK_FORMAT_R64G64B64A64_SINT", @@ -4976,7 +4976,7 @@ vk_format_r64g64b64a64_sint_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_r64g64b64a64_sfloat_description = { VK_FORMAT_R64G64B64A64_SFLOAT, "VK_FORMAT_R64G64B64A64_SFLOAT", @@ -5020,7 +5020,7 @@ vk_format_r64g64b64a64_sfloat_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_b10g11r11_ufloat_pack32_description = { VK_FORMAT_B10G11R11_UFLOAT_PACK32, "VK_FORMAT_B10G11R11_UFLOAT_PACK32", @@ -5046,7 +5046,7 @@ vk_format_b10g11r11_ufloat_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_e5b9g9r9_ufloat_pack32_description = { VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, "VK_FORMAT_E5B9G9R9_UFLOAT_PACK32", @@ -5072,7 +5072,7 @@ vk_format_e5b9g9r9_ufloat_pack32_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_d16_unorm_description = { VK_FORMAT_D16_UNORM, "VK_FORMAT_D16_UNORM", @@ -5098,7 +5098,7 @@ vk_format_d16_unorm_description = { VK_FORMAT_COLORSPACE_ZS, }; -const struct vk_format_description +static const struct vk_format_description vk_format_x8_d24_unorm_pack32_description = { VK_FORMAT_X8_D24_UNORM_PACK32, "VK_FORMAT_X8_D24_UNORM_PACK32", @@ -5142,7 +5142,7 @@ vk_format_x8_d24_unorm_pack32_description = { VK_FORMAT_COLORSPACE_ZS, }; -const struct vk_format_description +static const struct vk_format_description vk_format_d32_sfloat_description = { VK_FORMAT_D32_SFLOAT, "VK_FORMAT_D32_SFLOAT", @@ -5168,7 +5168,7 @@ vk_format_d32_sfloat_description = { VK_FORMAT_COLORSPACE_ZS, }; -const struct vk_format_description +static const struct vk_format_description vk_format_s8_uint_description = { VK_FORMAT_S8_UINT, "VK_FORMAT_S8_UINT", @@ -5194,7 +5194,7 @@ vk_format_s8_uint_description = { VK_FORMAT_COLORSPACE_ZS, }; -const struct vk_format_description +static const struct vk_format_description vk_format_d16_unorm_s8_uint_description = { VK_FORMAT_D16_UNORM_S8_UINT, "VK_FORMAT_D16_UNORM_S8_UINT", @@ -5238,7 +5238,7 @@ vk_format_d16_unorm_s8_uint_description = { VK_FORMAT_COLORSPACE_ZS, }; -const struct vk_format_description +static const struct vk_format_description vk_format_d24_unorm_s8_uint_description = { VK_FORMAT_D24_UNORM_S8_UINT, "VK_FORMAT_D24_UNORM_S8_UINT", @@ -5282,7 +5282,7 @@ vk_format_d24_unorm_s8_uint_description = { VK_FORMAT_COLORSPACE_ZS, }; -const struct vk_format_description +static const struct vk_format_description vk_format_d32_sfloat_s8_uint_description = { VK_FORMAT_D32_SFLOAT_S8_UINT, "VK_FORMAT_D32_SFLOAT_S8_UINT", @@ -5326,7 +5326,7 @@ vk_format_d32_sfloat_s8_uint_description = { VK_FORMAT_COLORSPACE_ZS, }; -const struct vk_format_description +static const struct vk_format_description vk_format_bc1_rgb_unorm_block_description = { VK_FORMAT_BC1_RGB_UNORM_BLOCK, "VK_FORMAT_BC1_RGB_UNORM_BLOCK", @@ -5352,7 +5352,7 @@ vk_format_bc1_rgb_unorm_block_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_bc1_rgb_srgb_block_description = { VK_FORMAT_BC1_RGB_SRGB_BLOCK, "VK_FORMAT_BC1_RGB_SRGB_BLOCK", @@ -5378,7 +5378,7 @@ vk_format_bc1_rgb_srgb_block_description = { VK_FORMAT_COLORSPACE_SRGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_bc1_rgba_unorm_block_description = { VK_FORMAT_BC1_RGBA_UNORM_BLOCK, "VK_FORMAT_BC1_RGBA_UNORM_BLOCK", @@ -5404,7 +5404,7 @@ vk_format_bc1_rgba_unorm_block_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_bc1_rgba_srgb_block_description = { VK_FORMAT_BC1_RGBA_SRGB_BLOCK, "VK_FORMAT_BC1_RGBA_SRGB_BLOCK", @@ -5430,7 +5430,7 @@ vk_format_bc1_rgba_srgb_block_description = { VK_FORMAT_COLORSPACE_SRGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_bc2_unorm_block_description = { VK_FORMAT_BC2_UNORM_BLOCK, "VK_FORMAT_BC2_UNORM_BLOCK", @@ -5456,7 +5456,7 @@ vk_format_bc2_unorm_block_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_bc2_srgb_block_description = { VK_FORMAT_BC2_SRGB_BLOCK, "VK_FORMAT_BC2_SRGB_BLOCK", @@ -5482,7 +5482,7 @@ vk_format_bc2_srgb_block_description = { VK_FORMAT_COLORSPACE_SRGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_bc3_unorm_block_description = { VK_FORMAT_BC3_UNORM_BLOCK, "VK_FORMAT_BC3_UNORM_BLOCK", @@ -5508,7 +5508,7 @@ vk_format_bc3_unorm_block_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_bc3_srgb_block_description = { VK_FORMAT_BC3_SRGB_BLOCK, "VK_FORMAT_BC3_SRGB_BLOCK", @@ -5534,7 +5534,7 @@ vk_format_bc3_srgb_block_description = { VK_FORMAT_COLORSPACE_SRGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_bc4_unorm_block_description = { VK_FORMAT_BC4_UNORM_BLOCK, "VK_FORMAT_BC4_UNORM_BLOCK", @@ -5560,7 +5560,7 @@ vk_format_bc4_unorm_block_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_bc4_snorm_block_description = { VK_FORMAT_BC4_SNORM_BLOCK, "VK_FORMAT_BC4_SNORM_BLOCK", @@ -5586,7 +5586,7 @@ vk_format_bc4_snorm_block_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_bc5_unorm_block_description = { VK_FORMAT_BC5_UNORM_BLOCK, "VK_FORMAT_BC5_UNORM_BLOCK", @@ -5612,7 +5612,7 @@ vk_format_bc5_unorm_block_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_bc5_snorm_block_description = { VK_FORMAT_BC5_SNORM_BLOCK, "VK_FORMAT_BC5_SNORM_BLOCK", @@ -5638,7 +5638,7 @@ vk_format_bc5_snorm_block_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_bc6h_ufloat_block_description = { VK_FORMAT_BC6H_UFLOAT_BLOCK, "VK_FORMAT_BC6H_UFLOAT_BLOCK", @@ -5664,7 +5664,7 @@ vk_format_bc6h_ufloat_block_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_bc6h_sfloat_block_description = { VK_FORMAT_BC6H_SFLOAT_BLOCK, "VK_FORMAT_BC6H_SFLOAT_BLOCK", @@ -5690,7 +5690,7 @@ vk_format_bc6h_sfloat_block_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_bc7_unorm_block_description = { VK_FORMAT_BC7_UNORM_BLOCK, "VK_FORMAT_BC7_UNORM_BLOCK", @@ -5716,7 +5716,7 @@ vk_format_bc7_unorm_block_description = { VK_FORMAT_COLORSPACE_RGB, }; -const struct vk_format_description +static const struct vk_format_description vk_format_bc7_srgb_block_description = { VK_FORMAT_BC7_SRGB_BLOCK, "VK_FORMAT_BC7_SRGB_BLOCK", @@ -5742,6 +5742,266 @@ vk_format_bc7_srgb_block_description = { VK_FORMAT_COLORSPACE_SRGB, }; +static const struct vk_format_description +vk_format_etc2_r8g8b8_unorm_block_description = { + VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, + "VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK", + "etc2_r8g8b8_unorm_block", + {4, 4, 64}, /* block */ + VK_FORMAT_LAYOUT_ETC, + 1, /* nr_channels */ + false, /* is_array */ + false, /* is_bitmask */ + false, /* is_mixed */ + { + {VK_FORMAT_TYPE_VOID, false, false, false, 64, 0}, /* x = x */ + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0} + }, + { + VK_SWIZZLE_X, /* r */ + VK_SWIZZLE_Y, /* g */ + VK_SWIZZLE_Z, /* b */ + VK_SWIZZLE_1 /* a */ + }, + VK_FORMAT_COLORSPACE_RGB, +}; + +static const struct vk_format_description +vk_format_etc2_r8g8b8_srgb_block_description = { + VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, + "VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK", + "etc2_r8g8b8_srgb_block", + {4, 4, 64}, /* block */ + VK_FORMAT_LAYOUT_ETC, + 1, /* nr_channels */ + false, /* is_array */ + false, /* is_bitmask */ + false, /* is_mixed */ + { + {VK_FORMAT_TYPE_VOID, false, false, false, 64, 0}, /* x = x */ + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0} + }, + { + VK_SWIZZLE_X, /* sr */ + VK_SWIZZLE_Y, /* sg */ + VK_SWIZZLE_Z, /* sb */ + VK_SWIZZLE_1 /* a */ + }, + VK_FORMAT_COLORSPACE_SRGB, +}; + +static const struct vk_format_description +vk_format_etc2_r8g8b8a1_unorm_block_description = { + VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, + "VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK", + "etc2_r8g8b8a1_unorm_block", + {4, 4, 64}, /* block */ + VK_FORMAT_LAYOUT_ETC, + 1, /* nr_channels */ + false, /* is_array */ + false, /* is_bitmask */ + false, /* is_mixed */ + { + {VK_FORMAT_TYPE_VOID, false, false, false, 64, 0}, /* x = x */ + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0} + }, + { + VK_SWIZZLE_X, /* r */ + VK_SWIZZLE_Y, /* g */ + VK_SWIZZLE_Z, /* b */ + VK_SWIZZLE_W /* a */ + }, + VK_FORMAT_COLORSPACE_RGB, +}; + +static const struct vk_format_description +vk_format_etc2_r8g8b8a1_srgb_block_description = { + VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, + "VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK", + "etc2_r8g8b8a1_srgb_block", + {4, 4, 64}, /* block */ + VK_FORMAT_LAYOUT_ETC, + 1, /* nr_channels */ + false, /* is_array */ + false, /* is_bitmask */ + false, /* is_mixed */ + { + {VK_FORMAT_TYPE_VOID, false, false, false, 64, 0}, /* x = x */ + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0} + }, + { + VK_SWIZZLE_X, /* sr */ + VK_SWIZZLE_Y, /* sg */ + VK_SWIZZLE_Z, /* sb */ + VK_SWIZZLE_W /* a */ + }, + VK_FORMAT_COLORSPACE_SRGB, +}; + +static const struct vk_format_description +vk_format_etc2_r8g8b8a8_unorm_block_description = { + VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, + "VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK", + "etc2_r8g8b8a8_unorm_block", + {4, 4, 128}, /* block */ + VK_FORMAT_LAYOUT_ETC, + 1, /* nr_channels */ + false, /* is_array */ + false, /* is_bitmask */ + false, /* is_mixed */ + { + {VK_FORMAT_TYPE_VOID, false, false, false, 128, 0}, /* x = x */ + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0} + }, + { + VK_SWIZZLE_X, /* r */ + VK_SWIZZLE_Y, /* g */ + VK_SWIZZLE_Z, /* b */ + VK_SWIZZLE_W /* a */ + }, + VK_FORMAT_COLORSPACE_RGB, +}; + +static const struct vk_format_description +vk_format_etc2_r8g8b8a8_srgb_block_description = { + VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, + "VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK", + "etc2_r8g8b8a8_srgb_block", + {4, 4, 128}, /* block */ + VK_FORMAT_LAYOUT_ETC, + 1, /* nr_channels */ + false, /* is_array */ + false, /* is_bitmask */ + false, /* is_mixed */ + { + {VK_FORMAT_TYPE_VOID, false, false, false, 128, 0}, /* x = x */ + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0} + }, + { + VK_SWIZZLE_X, /* sr */ + VK_SWIZZLE_Y, /* sg */ + VK_SWIZZLE_Z, /* sb */ + VK_SWIZZLE_W /* a */ + }, + VK_FORMAT_COLORSPACE_SRGB, +}; + +static const struct vk_format_description +vk_format_eac_r11_unorm_block_description = { + VK_FORMAT_EAC_R11_UNORM_BLOCK, + "VK_FORMAT_EAC_R11_UNORM_BLOCK", + "eac_r11_unorm_block", + {4, 4, 64}, /* block */ + VK_FORMAT_LAYOUT_ETC, + 1, /* nr_channels */ + false, /* is_array */ + false, /* is_bitmask */ + false, /* is_mixed */ + { + {VK_FORMAT_TYPE_VOID, false, false, false, 64, 0}, /* x = x */ + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0} + }, + { + VK_SWIZZLE_X, /* r */ + VK_SWIZZLE_0, /* g */ + VK_SWIZZLE_0, /* b */ + VK_SWIZZLE_1 /* a */ + }, + VK_FORMAT_COLORSPACE_RGB, +}; + +static const struct vk_format_description +vk_format_eac_r11_snorm_block_description = { + VK_FORMAT_EAC_R11_SNORM_BLOCK, + "VK_FORMAT_EAC_R11_SNORM_BLOCK", + "eac_r11_snorm_block", + {4, 4, 64}, /* block */ + VK_FORMAT_LAYOUT_ETC, + 1, /* nr_channels */ + false, /* is_array */ + false, /* is_bitmask */ + false, /* is_mixed */ + { + {VK_FORMAT_TYPE_VOID, false, false, false, 64, 0}, /* x = x */ + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0} + }, + { + VK_SWIZZLE_X, /* r */ + VK_SWIZZLE_0, /* g */ + VK_SWIZZLE_0, /* b */ + VK_SWIZZLE_1 /* a */ + }, + VK_FORMAT_COLORSPACE_RGB, +}; + +static const struct vk_format_description +vk_format_eac_r11g11_unorm_block_description = { + VK_FORMAT_EAC_R11G11_UNORM_BLOCK, + "VK_FORMAT_EAC_R11G11_UNORM_BLOCK", + "eac_r11g11_unorm_block", + {4, 4, 128}, /* block */ + VK_FORMAT_LAYOUT_ETC, + 1, /* nr_channels */ + false, /* is_array */ + false, /* is_bitmask */ + false, /* is_mixed */ + { + {VK_FORMAT_TYPE_VOID, false, false, false, 128, 0}, /* x = x */ + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0} + }, + { + VK_SWIZZLE_X, /* r */ + VK_SWIZZLE_Y, /* g */ + VK_SWIZZLE_0, /* b */ + VK_SWIZZLE_1 /* a */ + }, + VK_FORMAT_COLORSPACE_RGB, +}; + +static const struct vk_format_description +vk_format_eac_r11g11_snorm_block_description = { + VK_FORMAT_EAC_R11G11_SNORM_BLOCK, + "VK_FORMAT_EAC_R11G11_SNORM_BLOCK", + "eac_r11g11_snorm_block", + {4, 4, 128}, /* block */ + VK_FORMAT_LAYOUT_ETC, + 1, /* nr_channels */ + false, /* is_array */ + false, /* is_bitmask */ + false, /* is_mixed */ + { + {VK_FORMAT_TYPE_VOID, false, false, false, 128, 0}, /* x = x */ + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0} + }, + { + VK_SWIZZLE_X, /* r */ + VK_SWIZZLE_Y, /* g */ + VK_SWIZZLE_0, /* b */ + VK_SWIZZLE_1 /* a */ + }, + VK_FORMAT_COLORSPACE_RGB, +}; + const struct vk_format_description * vk_format_description(VkFormat format) { @@ -6044,6 +6304,26 @@ vk_format_description(VkFormat format) return &vk_format_bc7_unorm_block_description; case VK_FORMAT_BC7_SRGB_BLOCK: return &vk_format_bc7_srgb_block_description; + case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK: + return &vk_format_etc2_r8g8b8_unorm_block_description; + case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK: + return &vk_format_etc2_r8g8b8_srgb_block_description; + case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK: + return &vk_format_etc2_r8g8b8a1_unorm_block_description; + case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK: + return &vk_format_etc2_r8g8b8a1_srgb_block_description; + case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK: + return &vk_format_etc2_r8g8b8a8_unorm_block_description; + case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK: + return &vk_format_etc2_r8g8b8a8_srgb_block_description; + case VK_FORMAT_EAC_R11_UNORM_BLOCK: + return &vk_format_eac_r11_unorm_block_description; + case VK_FORMAT_EAC_R11_SNORM_BLOCK: + return &vk_format_eac_r11_snorm_block_description; + case VK_FORMAT_EAC_R11G11_UNORM_BLOCK: + return &vk_format_eac_r11g11_unorm_block_description; + case VK_FORMAT_EAC_R11G11_SNORM_BLOCK: + return &vk_format_eac_r11g11_snorm_block_description; default: return NULL; } |