diff options
Diffstat (limited to 'lib/mesa/src')
98 files changed, 951 insertions, 506 deletions
diff --git a/lib/mesa/src/amd/addrlib/src/amdgpu_asic_addr.h b/lib/mesa/src/amd/addrlib/src/amdgpu_asic_addr.h index 36d39e5e4..34c16e2d5 100644 --- a/lib/mesa/src/amd/addrlib/src/amdgpu_asic_addr.h +++ b/lib/mesa/src/amd/addrlib/src/amdgpu_asic_addr.h @@ -116,7 +116,8 @@ #define AMDGPU_GFX1101_RANGE 0x20, 0xFF //# 32 <= x < 255 #define AMDGPU_GFX1102_RANGE 0x10, 0x20 //# 16 <= x < 32 -#define AMDGPU_GFX1103_RANGE 0x01, 0xFF //# 1 <= x < max +#define AMDGPU_GFX1103_R1_RANGE 0x01, 0x10 //# 1 <= x < 16 +#define AMDGPU_GFX1103_R2_RANGE 0x80, 0xFF //# 128 <= x < max #define AMDGPU_REMBRANDT_RANGE 0x01, 0xFF //# 01 <= x < 255 @@ -187,7 +188,8 @@ #define ASICREV_IS_GFX1100(r) ASICREV_IS(r, GFX1100) #define ASICREV_IS_GFX1101(r) ASICREV_IS(r, GFX1101) #define ASICREV_IS_GFX1102(r) ASICREV_IS(r, GFX1102) -#define ASICREV_IS_GFX1103(r) ASICREV_IS(r, GFX1103) +#define ASICREV_IS_GFX1103_R1(r) ASICREV_IS(r, GFX1103_R1) +#define ASICREV_IS_GFX1103_R2(r) ASICREV_IS(r, GFX1103_R2) #define ASICREV_IS_REMBRANDT(r) ASICREV_IS(r, REMBRANDT) diff --git a/lib/mesa/src/amd/addrlib/src/gfx11/gfx11addrlib.cpp b/lib/mesa/src/amd/addrlib/src/gfx11/gfx11addrlib.cpp index 9adc28a63..af48e7716 100644 --- a/lib/mesa/src/amd/addrlib/src/gfx11/gfx11addrlib.cpp +++ b/lib/mesa/src/amd/addrlib/src/gfx11/gfx11addrlib.cpp @@ -752,9 +752,6 @@ ChipFamily Gfx11Lib::HwlConvertChipFamily( } break; case FAMILY_GFX1103: - if (ASICREV_IS_GFX1103(chipRevision)) - { - } break; default: ADDR_ASSERT(!"Unknown chip family"); diff --git a/lib/mesa/src/amd/ci/radeonsi-raven-fails.txt b/lib/mesa/src/amd/ci/radeonsi-raven-fails.txt index a66c79b96..59844a339 100644 --- a/lib/mesa/src/amd/ci/radeonsi-raven-fails.txt +++ b/lib/mesa/src/amd/ci/radeonsi-raven-fails.txt @@ -1,6 +1,5 @@ glx@glx-make-current,Crash glx@glx-multi-window-single-context,Fail -glx@glx-swap-event_async,Fail glx@glx-swap-pixmap-bad,Fail glx@glx-visuals-depth -pixmap,Crash glx@glx-visuals-stencil -pixmap,Crash diff --git a/lib/mesa/src/amd/common/ac_nir_lower_ngg.c b/lib/mesa/src/amd/common/ac_nir_lower_ngg.c index cdc7ad05b..901245b35 100644 --- a/lib/mesa/src/amd/common/ac_nir_lower_ngg.c +++ b/lib/mesa/src/amd/common/ac_nir_lower_ngg.c @@ -468,6 +468,29 @@ has_input_primitive(nir_builder *b) } static void +nogs_prim_gen_query(nir_builder *b, lower_ngg_nogs_state *s) +{ + if (!s->options->has_gen_prim_query) + return; + + nir_if *if_shader_query = nir_push_if(b, nir_load_prim_gen_query_enabled_amd(b)); + { + /* Activate only 1 lane and add the number of primitives to query result. */ + nir_if *if_elected = nir_push_if(b, nir_elect(b, 1)); + { + /* Number of input primitives in the current wave. */ + nir_ssa_def *num_input_prims = nir_ubfe(b, nir_load_merged_wave_info_amd(b), + nir_imm_int(b, 8), nir_imm_int(b, 8)); + + /* Add to stream 0 primitive generated counter. */ + nir_atomic_add_gen_prim_count_amd(b, num_input_prims, .stream_id = 0); + } + nir_pop_if(b, if_elected); + } + nir_pop_if(b, if_shader_query); +} + +static void emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *st, nir_ssa_def *arg) { nir_ssa_def *gs_thread = @@ -506,23 +529,6 @@ emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *st, nir_ssa_def arg = nir_iand(b, arg, mask); } - if (st->options->has_gen_prim_query) { - nir_if *if_shader_query = nir_push_if(b, nir_load_prim_gen_query_enabled_amd(b)); - { - /* Number of active GS threads. Each has 1 output primitive. */ - nir_ssa_def *num_gs_threads = - nir_bit_count(b, nir_ballot(b, 1, st->options->wave_size, nir_imm_bool(b, true))); - /* Activate only 1 lane and add the number of primitives to query result. */ - nir_if *if_elected = nir_push_if(b, nir_elect(b, 1)); - { - /* Add to stream 0 primitive generated counter. */ - nir_atomic_add_gen_prim_count_amd(b, num_gs_threads, .stream_id = 0); - } - nir_pop_if(b, if_elected); - } - nir_pop_if(b, if_shader_query); - } - nir_export_primitive_amd(b, arg); } nir_pop_if(b, if_gs_thread); @@ -1373,6 +1379,9 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c nir_local_variable_create(impl, glsl_vec4_type(), "clip_vertex"); nogs_state->clipdist_neg_mask_var = nir_local_variable_create(impl, glsl_uint8_t_type(), "clipdist_neg_mask"); + + /* init mask to 0 */ + nir_store_var(b, nogs_state->clipdist_neg_mask_var, nir_imm_intN_t(b, 0, 8), 1); } /* Top part of the culling shader (aka. position shader part) @@ -1382,8 +1391,6 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c * The position output is stored into a temporary variable, and reloaded later. */ - b->cursor = nir_before_cf_list(&impl->body); - nir_ssa_def *es_thread = has_input_vertex(b); nir_if *if_es_thread = nir_push_if(b, es_thread); { @@ -2150,6 +2157,11 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option ngg_nogs_init_vertex_indices_vars(b, impl, &state); + /* Emit primitives generated query code here, so that + * it executes before culling and isn't in the extracted CF. + */ + nogs_prim_gen_query(b, &state); + if (!options->can_cull) { /* Newer chips can use PRIMGEN_PASSTHRU_NO_MSG to skip gs_alloc_req for NGG passthrough. */ if (!(options->passthrough && options->family >= CHIP_NAVI23)) { diff --git a/lib/mesa/src/amd/common/ac_rgp.c b/lib/mesa/src/amd/common/ac_rgp.c index d323b4b84..bbbf985c2 100644 --- a/lib/mesa/src/amd/common/ac_rgp.c +++ b/lib/mesa/src/amd/common/ac_rgp.c @@ -286,6 +286,7 @@ enum sqtt_memory_type SQTT_MEMORY_TYPE_DDR2 = 0x2, SQTT_MEMORY_TYPE_DDR3 = 0x3, SQTT_MEMORY_TYPE_DDR4 = 0x4, + SQTT_MEMORY_TYPE_DDR5 = 0x5, SQTT_MEMORY_TYPE_GDDR3 = 0x10, SQTT_MEMORY_TYPE_GDDR4 = 0x11, SQTT_MEMORY_TYPE_GDDR5 = 0x12, @@ -375,17 +376,22 @@ static enum sqtt_memory_type ac_vram_type_to_sqtt_memory_type(uint32_t vram_type return SQTT_MEMORY_TYPE_DDR3; case AMD_VRAM_TYPE_DDR4: return SQTT_MEMORY_TYPE_DDR4; + case AMD_VRAM_TYPE_DDR5: + return SQTT_MEMORY_TYPE_DDR5; + case AMD_VRAM_TYPE_GDDR3: + return SQTT_MEMORY_TYPE_GDDR3; + case AMD_VRAM_TYPE_GDDR4: + return SQTT_MEMORY_TYPE_GDDR4; case AMD_VRAM_TYPE_GDDR5: return SQTT_MEMORY_TYPE_GDDR5; - case AMD_VRAM_TYPE_HBM: - return SQTT_MEMORY_TYPE_HBM; case AMD_VRAM_TYPE_GDDR6: return SQTT_MEMORY_TYPE_GDDR6; - case AMD_VRAM_TYPE_DDR5: + case AMD_VRAM_TYPE_HBM: + return SQTT_MEMORY_TYPE_HBM; + case AMD_VRAM_TYPE_LPDDR4: + return SQTT_MEMORY_TYPE_LPDDR4; + case AMD_VRAM_TYPE_LPDDR5: return SQTT_MEMORY_TYPE_LPDDR5; - case AMD_VRAM_TYPE_GDDR1: - case AMD_VRAM_TYPE_GDDR3: - case AMD_VRAM_TYPE_GDDR4: default: unreachable("Invalid vram type"); } diff --git a/lib/mesa/src/amd/common/ac_shadowed_regs.c b/lib/mesa/src/amd/common/ac_shadowed_regs.c index 65a49bd9c..a532d2d99 100644 --- a/lib/mesa/src/amd/common/ac_shadowed_regs.c +++ b/lib/mesa/src/amd/common/ac_shadowed_regs.c @@ -1120,8 +1120,8 @@ static const struct ac_reg_range Gfx11UserConfigShadowRange[] = R_03092C_GE_MULTI_PRIM_IB_RESET_EN - R_030924_GE_MIN_VTX_INDX + 4, }, { - R_008974_VGT_NUM_INSTANCES, - R_030940_VGT_TF_MEMORY_BASE - R_008974_VGT_NUM_INSTANCES + 4, + R_030934_VGT_NUM_INSTANCES, + R_030940_VGT_TF_MEMORY_BASE - R_030934_VGT_NUM_INSTANCES + 4, }, { R_03097C_GE_STEREO_CNTL, diff --git a/lib/mesa/src/amd/common/amd_family.c b/lib/mesa/src/amd/common/amd_family.c index be6575791..90ec21cb9 100644 --- a/lib/mesa/src/amd/common/amd_family.c +++ b/lib/mesa/src/amd/common/amd_family.c @@ -108,8 +108,10 @@ const char *ac_get_family_name(enum radeon_family family) return "GFX1101"; case CHIP_GFX1102: return "GFX1102"; - case CHIP_GFX1103: - return "GFX1103"; + case CHIP_GFX1103_R1: + return "GFX1103_R1"; + case CHIP_GFX1103_R2: + return "GFX1103_R2"; default: unreachable("Unknown GPU family"); } diff --git a/lib/mesa/src/amd/compiler/aco_insert_NOPs.cpp b/lib/mesa/src/amd/compiler/aco_insert_NOPs.cpp index 2269324e7..068ff4f4b 100644 --- a/lib/mesa/src/amd/compiler/aco_insert_NOPs.cpp +++ b/lib/mesa/src/amd/compiler/aco_insert_NOPs.cpp @@ -1044,6 +1044,9 @@ struct LdsDirectVALUHazardGlobalState { struct LdsDirectVALUHazardBlockState { unsigned num_valu = 0; bool has_trans = false; + + unsigned num_instrs = 0; + unsigned num_blocks = 0; }; bool @@ -1076,6 +1079,14 @@ handle_lds_direct_valu_hazard_instr(LdsDirectVALUHazardGlobalState& global_state if (parse_vdst_wait(instr) == 0) return true; + block_state.num_instrs++; + if (block_state.num_instrs > 256 || block_state.num_blocks > 32) { + /* Exit to limit compile times and set wait_vdst to be safe. */ + global_state.wait_vdst = + MIN2(global_state.wait_vdst, block_state.has_trans ? 0 : block_state.num_valu); + return true; + } + return block_state.num_valu >= global_state.wait_vdst; } @@ -1089,6 +1100,8 @@ handle_lds_direct_valu_hazard_block(LdsDirectVALUHazardGlobalState& global_state global_state.loop_headers_visited.insert(block->index); } + block_state.num_blocks++; + return true; } @@ -1129,6 +1142,9 @@ struct VALUPartialForwardingHazardBlockState { enum VALUPartialForwardingHazardState state = nothing_written; unsigned num_valu_since_read = 0; unsigned num_valu_since_write = 0; + + unsigned num_instrs = 0; + unsigned num_blocks = 0; }; bool @@ -1191,6 +1207,13 @@ handle_valu_partial_forwarding_hazard_instr(VALUPartialForwardingHazardGlobalSta if (block_state.num_vgprs_read == 0) return true; /* All VGPRs have been written and a hazard was never found. */ + block_state.num_instrs++; + if (block_state.num_instrs > 256 || block_state.num_blocks > 32) { + /* Exit to limit compile times and set hazard_found=true to be safe. */ + global_state.hazard_found = true; + return true; + } + return false; } @@ -1205,6 +1228,8 @@ handle_valu_partial_forwarding_hazard_block(VALUPartialForwardingHazardGlobalSta global_state.loop_headers_visited.insert(block->index); } + block_state.num_blocks++; + return true; } diff --git a/lib/mesa/src/amd/compiler/aco_opcodes.py b/lib/mesa/src/amd/compiler/aco_opcodes.py index 2c11cf255..862696b11 100644 --- a/lib/mesa/src/amd/compiler/aco_opcodes.py +++ b/lib/mesa/src/amd/compiler/aco_opcodes.py @@ -698,85 +698,85 @@ for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) in SMEM: # VOP2 instructions: 2 inputs, 1 output (+ optional vcc) # TODO: misses some GFX6_7 opcodes which were shifted to VOP3 in GFX8 VOP2 = { - # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name, input/output modifiers - (0x01, 0x01, -1, -1, -1, -1, "v_readlane_b32", False), - (0x02, 0x02, -1, -1, -1, -1, "v_writelane_b32", False), - (0x03, 0x03, 0x01, 0x01, 0x03, 0x03, "v_add_f32", True), - (0x04, 0x04, 0x02, 0x02, 0x04, 0x04, "v_sub_f32", True), - (0x05, 0x05, 0x03, 0x03, 0x05, 0x05, "v_subrev_f32", True), - (0x06, 0x06, -1, -1, 0x06, -1, "v_mac_legacy_f32", True), #GFX6,7,10 - ( -1, -1, -1, -1, 0x06, 0x06, "v_fmac_legacy_f32", True), #GFX10.3+, v_fmac_dx9_zero_f32 in GFX11 - (0x07, 0x07, 0x04, 0x04, 0x07, 0x07, "v_mul_legacy_f32", True), #v_mul_dx9_zero_f32 in GFX11 - (0x08, 0x08, 0x05, 0x05, 0x08, 0x08, "v_mul_f32", True), - (0x09, 0x09, 0x06, 0x06, 0x09, 0x09, "v_mul_i32_i24", False), - (0x0a, 0x0a, 0x07, 0x07, 0x0a, 0x0a, "v_mul_hi_i32_i24", False), - (0x0b, 0x0b, 0x08, 0x08, 0x0b, 0x0b, "v_mul_u32_u24", False), - (0x0c, 0x0c, 0x09, 0x09, 0x0c, 0x0c, "v_mul_hi_u32_u24", False), - ( -1, -1, -1, 0x39, 0x0d, -1, "v_dot4c_i32_i8", False), - (0x0d, 0x0d, -1, -1, -1, -1, "v_min_legacy_f32", True), - (0x0e, 0x0e, -1, -1, -1, -1, "v_max_legacy_f32", True), - (0x0f, 0x0f, 0x0a, 0x0a, 0x0f, 0x0f, "v_min_f32", True), - (0x10, 0x10, 0x0b, 0x0b, 0x10, 0x10, "v_max_f32", True), - (0x11, 0x11, 0x0c, 0x0c, 0x11, 0x11, "v_min_i32", False), - (0x12, 0x12, 0x0d, 0x0d, 0x12, 0x12, "v_max_i32", False), - (0x13, 0x13, 0x0e, 0x0e, 0x13, 0x13, "v_min_u32", False), - (0x14, 0x14, 0x0f, 0x0f, 0x14, 0x14, "v_max_u32", False), - (0x15, 0x15, -1, -1, -1, -1, "v_lshr_b32", False), - (0x16, 0x16, 0x10, 0x10, 0x16, 0x19, "v_lshrrev_b32", False), - (0x17, 0x17, -1, -1, -1, -1, "v_ashr_i32", False), - (0x18, 0x18, 0x11, 0x11, 0x18, 0x1a, "v_ashrrev_i32", False), - (0x19, 0x19, -1, -1, -1, -1, "v_lshl_b32", False), - (0x1a, 0x1a, 0x12, 0x12, 0x1a, 0x18, "v_lshlrev_b32", False), - (0x1b, 0x1b, 0x13, 0x13, 0x1b, 0x1b, "v_and_b32", False), - (0x1c, 0x1c, 0x14, 0x14, 0x1c, 0x1c, "v_or_b32", False), - (0x1d, 0x1d, 0x15, 0x15, 0x1d, 0x1d, "v_xor_b32", False), - ( -1, -1, -1, -1, 0x1e, 0x1e, "v_xnor_b32", False), - (0x1f, 0x1f, 0x16, 0x16, 0x1f, -1, "v_mac_f32", True), - (0x20, 0x20, 0x17, 0x17, 0x20, -1, "v_madmk_f32", False), - (0x21, 0x21, 0x18, 0x18, 0x21, -1, "v_madak_f32", False), - (0x24, 0x24, -1, -1, -1, -1, "v_mbcnt_hi_u32_b32", False), - (0x25, 0x25, 0x19, 0x19, -1, -1, "v_add_co_u32", False), # VOP3B only in RDNA - (0x26, 0x26, 0x1a, 0x1a, -1, -1, "v_sub_co_u32", False), # VOP3B only in RDNA - (0x27, 0x27, 0x1b, 0x1b, -1, -1, "v_subrev_co_u32", False), # VOP3B only in RDNA - (0x28, 0x28, 0x1c, 0x1c, 0x28, 0x20, "v_addc_co_u32", False), # v_add_co_ci_u32 in RDNA - (0x29, 0x29, 0x1d, 0x1d, 0x29, 0x21, "v_subb_co_u32", False), # v_sub_co_ci_u32 in RDNA - (0x2a, 0x2a, 0x1e, 0x1e, 0x2a, 0x22, "v_subbrev_co_u32", False), # v_subrev_co_ci_u32 in RDNA - ( -1, -1, -1, -1, 0x2b, 0x2b, "v_fmac_f32", True), - ( -1, -1, -1, -1, 0x2c, 0x2c, "v_fmamk_f32", True), - ( -1, -1, -1, -1, 0x2d, 0x2d, "v_fmaak_f32", True), - (0x2f, 0x2f, -1, -1, 0x2f, 0x2f, "v_cvt_pkrtz_f16_f32", True), #v_cvt_pk_rtz_f16_f32 in GFX11 - ( -1, -1, 0x1f, 0x1f, 0x32, 0x32, "v_add_f16", True), - ( -1, -1, 0x20, 0x20, 0x33, 0x33, "v_sub_f16", True), - ( -1, -1, 0x21, 0x21, 0x34, 0x34, "v_subrev_f16", True), - ( -1, -1, 0x22, 0x22, 0x35, 0x35, "v_mul_f16", True), - ( -1, -1, 0x23, 0x23, -1, -1, "v_mac_f16", True), - ( -1, -1, 0x24, 0x24, -1, -1, "v_madmk_f16", False), - ( -1, -1, 0x25, 0x25, -1, -1, "v_madak_f16", False), - ( -1, -1, 0x26, 0x26, -1, -1, "v_add_u16", False), - ( -1, -1, 0x27, 0x27, -1, -1, "v_sub_u16", False), - ( -1, -1, 0x28, 0x28, -1, -1, "v_subrev_u16", False), - ( -1, -1, 0x29, 0x29, -1, -1, "v_mul_lo_u16", False), - ( -1, -1, 0x2a, 0x2a, -1, -1, "v_lshlrev_b16", False), - ( -1, -1, 0x2b, 0x2b, -1, -1, "v_lshrrev_b16", False), - ( -1, -1, 0x2c, 0x2c, -1, -1, "v_ashrrev_i16", False), - ( -1, -1, 0x2d, 0x2d, 0x39, 0x39, "v_max_f16", True), - ( -1, -1, 0x2e, 0x2e, 0x3a, 0x3a, "v_min_f16", True), - ( -1, -1, 0x2f, 0x2f, -1, -1, "v_max_u16", False), - ( -1, -1, 0x30, 0x30, -1, -1, "v_max_i16", False), - ( -1, -1, 0x31, 0x31, -1, -1, "v_min_u16", False), - ( -1, -1, 0x32, 0x32, -1, -1, "v_min_i16", False), - ( -1, -1, 0x33, 0x33, 0x3b, 0x3b, "v_ldexp_f16", False), - ( -1, -1, -1, 0x34, 0x25, 0x25, "v_add_u32", False), # called v_add_nc_u32 in RDNA - ( -1, -1, -1, 0x35, 0x26, 0x26, "v_sub_u32", False), # called v_sub_nc_u32 in RDNA - ( -1, -1, -1, 0x36, 0x27, 0x27, "v_subrev_u32", False), # called v_subrev_nc_u32 in RDNA - ( -1, -1, -1, -1, 0x36, 0x36, "v_fmac_f16", False), - ( -1, -1, -1, -1, 0x37, 0x37, "v_fmamk_f16", False), - ( -1, -1, -1, -1, 0x38, 0x38, "v_fmaak_f16", False), - ( -1, -1, -1, -1, 0x3c, 0x3c, "v_pk_fmac_f16", False), - ( -1, -1, -1, 0x37, 0x02, 0x02, "v_dot2c_f32_f16", False), #v_dot2acc_f32_f16 in GFX11 + # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name, input modifiers, output modifiers + (0x01, 0x01, -1, -1, -1, -1, "v_readlane_b32", False, False), + (0x02, 0x02, -1, -1, -1, -1, "v_writelane_b32", False, False), + (0x03, 0x03, 0x01, 0x01, 0x03, 0x03, "v_add_f32", True, True), + (0x04, 0x04, 0x02, 0x02, 0x04, 0x04, "v_sub_f32", True, True), + (0x05, 0x05, 0x03, 0x03, 0x05, 0x05, "v_subrev_f32", True, True), + (0x06, 0x06, -1, -1, 0x06, -1, "v_mac_legacy_f32", True, True), #GFX6,7,10 + ( -1, -1, -1, -1, 0x06, 0x06, "v_fmac_legacy_f32", True, True), #GFX10.3+, v_fmac_dx9_zero_f32 in GFX11 + (0x07, 0x07, 0x04, 0x04, 0x07, 0x07, "v_mul_legacy_f32", True, True), #v_mul_dx9_zero_f32 in GFX11 + (0x08, 0x08, 0x05, 0x05, 0x08, 0x08, "v_mul_f32", True, True), + (0x09, 0x09, 0x06, 0x06, 0x09, 0x09, "v_mul_i32_i24", False, False), + (0x0a, 0x0a, 0x07, 0x07, 0x0a, 0x0a, "v_mul_hi_i32_i24", False, False), + (0x0b, 0x0b, 0x08, 0x08, 0x0b, 0x0b, "v_mul_u32_u24", False, False), + (0x0c, 0x0c, 0x09, 0x09, 0x0c, 0x0c, "v_mul_hi_u32_u24", False, False), + ( -1, -1, -1, 0x39, 0x0d, -1, "v_dot4c_i32_i8", False, False), + (0x0d, 0x0d, -1, -1, -1, -1, "v_min_legacy_f32", True, True), + (0x0e, 0x0e, -1, -1, -1, -1, "v_max_legacy_f32", True, True), + (0x0f, 0x0f, 0x0a, 0x0a, 0x0f, 0x0f, "v_min_f32", True, True), + (0x10, 0x10, 0x0b, 0x0b, 0x10, 0x10, "v_max_f32", True, True), + (0x11, 0x11, 0x0c, 0x0c, 0x11, 0x11, "v_min_i32", False, False), + (0x12, 0x12, 0x0d, 0x0d, 0x12, 0x12, "v_max_i32", False, False), + (0x13, 0x13, 0x0e, 0x0e, 0x13, 0x13, "v_min_u32", False, False), + (0x14, 0x14, 0x0f, 0x0f, 0x14, 0x14, "v_max_u32", False, False), + (0x15, 0x15, -1, -1, -1, -1, "v_lshr_b32", False, False), + (0x16, 0x16, 0x10, 0x10, 0x16, 0x19, "v_lshrrev_b32", False, False), + (0x17, 0x17, -1, -1, -1, -1, "v_ashr_i32", False, False), + (0x18, 0x18, 0x11, 0x11, 0x18, 0x1a, "v_ashrrev_i32", False, False), + (0x19, 0x19, -1, -1, -1, -1, "v_lshl_b32", False, False), + (0x1a, 0x1a, 0x12, 0x12, 0x1a, 0x18, "v_lshlrev_b32", False, False), + (0x1b, 0x1b, 0x13, 0x13, 0x1b, 0x1b, "v_and_b32", False, False), + (0x1c, 0x1c, 0x14, 0x14, 0x1c, 0x1c, "v_or_b32", False, False), + (0x1d, 0x1d, 0x15, 0x15, 0x1d, 0x1d, "v_xor_b32", False, False), + ( -1, -1, -1, -1, 0x1e, 0x1e, "v_xnor_b32", False, False), + (0x1f, 0x1f, 0x16, 0x16, 0x1f, -1, "v_mac_f32", True, True), + (0x20, 0x20, 0x17, 0x17, 0x20, -1, "v_madmk_f32", False, False), + (0x21, 0x21, 0x18, 0x18, 0x21, -1, "v_madak_f32", False, False), + (0x24, 0x24, -1, -1, -1, -1, "v_mbcnt_hi_u32_b32", False, False), + (0x25, 0x25, 0x19, 0x19, -1, -1, "v_add_co_u32", False, False), # VOP3B only in RDNA + (0x26, 0x26, 0x1a, 0x1a, -1, -1, "v_sub_co_u32", False, False), # VOP3B only in RDNA + (0x27, 0x27, 0x1b, 0x1b, -1, -1, "v_subrev_co_u32", False, False), # VOP3B only in RDNA + (0x28, 0x28, 0x1c, 0x1c, 0x28, 0x20, "v_addc_co_u32", False, False), # v_add_co_ci_u32 in RDNA + (0x29, 0x29, 0x1d, 0x1d, 0x29, 0x21, "v_subb_co_u32", False, False), # v_sub_co_ci_u32 in RDNA + (0x2a, 0x2a, 0x1e, 0x1e, 0x2a, 0x22, "v_subbrev_co_u32", False, False), # v_subrev_co_ci_u32 in RDNA + ( -1, -1, -1, -1, 0x2b, 0x2b, "v_fmac_f32", True, True), + ( -1, -1, -1, -1, 0x2c, 0x2c, "v_fmamk_f32", True, True), + ( -1, -1, -1, -1, 0x2d, 0x2d, "v_fmaak_f32", True, True), + (0x2f, 0x2f, -1, -1, 0x2f, 0x2f, "v_cvt_pkrtz_f16_f32", True, False), #v_cvt_pk_rtz_f16_f32 in GFX11 + ( -1, -1, 0x1f, 0x1f, 0x32, 0x32, "v_add_f16", True, True), + ( -1, -1, 0x20, 0x20, 0x33, 0x33, "v_sub_f16", True, True), + ( -1, -1, 0x21, 0x21, 0x34, 0x34, "v_subrev_f16", True, True), + ( -1, -1, 0x22, 0x22, 0x35, 0x35, "v_mul_f16", True, True), + ( -1, -1, 0x23, 0x23, -1, -1, "v_mac_f16", True, True), + ( -1, -1, 0x24, 0x24, -1, -1, "v_madmk_f16", False, False), + ( -1, -1, 0x25, 0x25, -1, -1, "v_madak_f16", False, False), + ( -1, -1, 0x26, 0x26, -1, -1, "v_add_u16", False, False), + ( -1, -1, 0x27, 0x27, -1, -1, "v_sub_u16", False, False), + ( -1, -1, 0x28, 0x28, -1, -1, "v_subrev_u16", False, False), + ( -1, -1, 0x29, 0x29, -1, -1, "v_mul_lo_u16", False, False), + ( -1, -1, 0x2a, 0x2a, -1, -1, "v_lshlrev_b16", False, False), + ( -1, -1, 0x2b, 0x2b, -1, -1, "v_lshrrev_b16", False, False), + ( -1, -1, 0x2c, 0x2c, -1, -1, "v_ashrrev_i16", False, False), + ( -1, -1, 0x2d, 0x2d, 0x39, 0x39, "v_max_f16", True, True), + ( -1, -1, 0x2e, 0x2e, 0x3a, 0x3a, "v_min_f16", True, True), + ( -1, -1, 0x2f, 0x2f, -1, -1, "v_max_u16", False, False), + ( -1, -1, 0x30, 0x30, -1, -1, "v_max_i16", False, False), + ( -1, -1, 0x31, 0x31, -1, -1, "v_min_u16", False, False), + ( -1, -1, 0x32, 0x32, -1, -1, "v_min_i16", False, False), + ( -1, -1, 0x33, 0x33, 0x3b, 0x3b, "v_ldexp_f16", False, False), + ( -1, -1, -1, 0x34, 0x25, 0x25, "v_add_u32", False, False), # called v_add_nc_u32 in RDNA + ( -1, -1, -1, 0x35, 0x26, 0x26, "v_sub_u32", False, False), # called v_sub_nc_u32 in RDNA + ( -1, -1, -1, 0x36, 0x27, 0x27, "v_subrev_u32", False, False), # called v_subrev_nc_u32 in RDNA + ( -1, -1, -1, -1, 0x36, 0x36, "v_fmac_f16", False, False), + ( -1, -1, -1, -1, 0x37, 0x37, "v_fmamk_f16", False, False), + ( -1, -1, -1, -1, 0x38, 0x38, "v_fmaak_f16", False, False), + ( -1, -1, -1, -1, 0x3c, 0x3c, "v_pk_fmac_f16", False, False), + ( -1, -1, -1, 0x37, 0x02, 0x02, "v_dot2c_f32_f16", False, False), #v_dot2acc_f32_f16 in GFX11 } -for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, modifiers) in VOP2: - opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOP2, InstrClass.Valu32, modifiers, modifiers) +for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, in_mod, out_mod) in VOP2: + opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOP2, InstrClass.Valu32, in_mod, out_mod) if True: # v_cndmask_b32 can use input modifiers but not output modifiers @@ -1173,18 +1173,18 @@ VOP3 = { ( -1, -1, -1, -1, -1, 0x25f, "v_minmax_f32", True, True), ( -1, -1, -1, -1, -1, 0x260, "v_maxmin_f16", True, True), ( -1, -1, -1, -1, -1, 0x261, "v_minmax_f16", True, True), - ( -1, -1, -1, -1, -1, 0x262, "v_maxmin_u32", True, True), - ( -1, -1, -1, -1, -1, 0x263, "v_minmax_u32", True, True), - ( -1, -1, -1, -1, -1, 0x264, "v_maxmin_i32", True, True), - ( -1, -1, -1, -1, -1, 0x265, "v_minmax_i32", True, True), - ( -1, -1, -1, -1, -1, 0x266, "v_dot2_f16_f16", True, True), - ( -1, -1, -1, -1, -1, 0x267, "v_dot2_bf16_bf16", True, True), - ( -1, -1, -1, -1, -1, 0x306, "v_cvt_pk_i16_f32", True, True), - ( -1, -1, -1, -1, -1, 0x307, "v_cvt_pk_u16_f32", True, True), - ( -1, -1, -1, -1, -1, 0x362, "v_and_b16", True, True), - ( -1, -1, -1, -1, -1, 0x363, "v_or_b16", True, True), - ( -1, -1, -1, -1, -1, 0x364, "v_xor_b16", True, True), - ( -1, -1, -1, -1, -1, 0x25d, "v_cndmask_b16", True, True), + ( -1, -1, -1, -1, -1, 0x262, "v_maxmin_u32", False, False), + ( -1, -1, -1, -1, -1, 0x263, "v_minmax_u32", False, False), + ( -1, -1, -1, -1, -1, 0x264, "v_maxmin_i32", False, False), + ( -1, -1, -1, -1, -1, 0x265, "v_minmax_i32", False, False), + ( -1, -1, -1, -1, -1, 0x266, "v_dot2_f16_f16", False, False), + ( -1, -1, -1, -1, -1, 0x267, "v_dot2_bf16_bf16", False, False), + ( -1, -1, -1, -1, -1, 0x306, "v_cvt_pk_i16_f32", True, False), + ( -1, -1, -1, -1, -1, 0x307, "v_cvt_pk_u16_f32", True, False), + ( -1, -1, -1, -1, -1, 0x362, "v_and_b16", False, False), + ( -1, -1, -1, -1, -1, 0x363, "v_or_b16", False, False), + ( -1, -1, -1, -1, -1, 0x364, "v_xor_b16", False, False), + ( -1, -1, -1, -1, -1, 0x25d, "v_cndmask_b16", True, False), } for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, in_mod, out_mod, cls) in default_class(VOP3, InstrClass.Valu32): opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOP3, cls, in_mod, out_mod) diff --git a/lib/mesa/src/amd/compiler/aco_reduce_assign.cpp b/lib/mesa/src/amd/compiler/aco_reduce_assign.cpp index 3c31b468f..535e0315e 100644 --- a/lib/mesa/src/amd/compiler/aco_reduce_assign.cpp +++ b/lib/mesa/src/amd/compiler/aco_reduce_assign.cpp @@ -63,31 +63,36 @@ setup_reduce_temp(Program* program) Temp vtmp(0, RegClass(RegType::vgpr, maxSize).as_linear()); int inserted_at = -1; int vtmp_inserted_at = -1; - bool reduceTmp_in_loop = false; bool vtmp_in_loop = false; for (Block& block : program->blocks) { - /* insert p_end_linear_vgpr after the outermost loop */ - if (reduceTmp_in_loop && block.loop_nest_depth == 0) { - assert(inserted_at == (int)last_top_level_block_idx); - - aco_ptr<Instruction> end{create_instruction<Instruction>( - aco_opcode::p_end_linear_vgpr, Format::PSEUDO, vtmp_in_loop ? 2 : 1, 0)}; - end->operands[0] = Operand(reduceTmp); - if (vtmp_in_loop) - end->operands[1] = Operand(vtmp); - /* insert after the phis of the loop exit block */ - std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin(); - while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi) - ++it; - block.instructions.insert(it, std::move(end)); - reduceTmp_in_loop = false; - } - - if (block.kind & block_kind_top_level) + if (block.kind & block_kind_top_level) { last_top_level_block_idx = block.index; + /* TODO: this could be improved in this case: + * start_linear_vgpr + * if (...) { + * use_linear_vgpr + * } + * end_linear_vgpr + * Here, the linear vgpr is used before any phi copies, so this isn't necessary. + */ + if (inserted_at >= 0) { + aco_ptr<Instruction> end{create_instruction<Instruction>( + aco_opcode::p_end_linear_vgpr, Format::PSEUDO, vtmp_inserted_at >= 0 ? 2 : 1, 0)}; + end->operands[0] = Operand(reduceTmp); + if (vtmp_inserted_at >= 0) + end->operands[1] = Operand(vtmp); + /* insert after the phis of the block */ + std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin(); + while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi) + ++it; + block.instructions.insert(it, std::move(end)); + inserted_at = vtmp_inserted_at = -1; + } + } + if (!hasReductions[block.index]) continue; @@ -98,8 +103,6 @@ setup_reduce_temp(Program* program) instr->opcode != aco_opcode::p_interp_gfx11) continue; - reduceTmp_in_loop |= block.loop_nest_depth > 0; - if ((int)last_top_level_block_idx != inserted_at) { reduceTmp = program->allocateTmp(reduceTmp.regClass()); aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>( diff --git a/lib/mesa/src/amd/compiler/aco_register_allocation.cpp b/lib/mesa/src/amd/compiler/aco_register_allocation.cpp index c59c63538..83d19656d 100644 --- a/lib/mesa/src/amd/compiler/aco_register_allocation.cpp +++ b/lib/mesa/src/amd/compiler/aco_register_allocation.cpp @@ -2078,11 +2078,8 @@ get_reg_phi(ra_ctx& ctx, IDSet& live_in, RegisterFile& register_file, /* rename */ std::unordered_map<unsigned, Temp>::iterator orig_it = ctx.orig_names.find(pc.first.tempId()); - Temp orig = pc.first.getTemp(); - if (orig_it != ctx.orig_names.end()) - orig = orig_it->second; - else - ctx.orig_names[pc.second.tempId()] = orig; + Temp orig = orig_it != ctx.orig_names.end() ? orig_it->second : pc.first.getTemp(); + ctx.orig_names[pc.second.tempId()] = orig; ctx.renames[block.index][orig.id()] = pc.second.getTemp(); /* otherwise, this is a live-in and we need to create a new phi diff --git a/lib/mesa/src/amd/compiler/aco_scheduler.cpp b/lib/mesa/src/amd/compiler/aco_scheduler.cpp index f09781435..4ab13fe7c 100644 --- a/lib/mesa/src/amd/compiler/aco_scheduler.cpp +++ b/lib/mesa/src/amd/compiler/aco_scheduler.cpp @@ -679,7 +679,7 @@ schedule_SMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe current->operands[0].size() == 4)) break; /* don't move descriptor loads below buffer loads */ - if (candidate->format == Format::SMEM && current->operands[0].size() == 4 && + if (candidate->isSMEM() && !candidate->operands.empty() && current->operands[0].size() == 4 && candidate->operands[0].size() == 2) break; diff --git a/lib/mesa/src/amd/compiler/aco_spill.cpp b/lib/mesa/src/amd/compiler/aco_spill.cpp index 38a5cf8d4..12cb33325 100644 --- a/lib/mesa/src/amd/compiler/aco_spill.cpp +++ b/lib/mesa/src/amd/compiler/aco_spill.cpp @@ -1662,6 +1662,38 @@ assign_spill_slots_helper(spill_ctx& ctx, RegType type, std::vector<bool>& is_as } void +end_unused_spill_vgprs(spill_ctx& ctx, Block& block, std::vector<Temp>& vgpr_spill_temps, + const std::vector<uint32_t>& slots, + const std::unordered_map<Temp, uint32_t>& spills) +{ + std::vector<bool> is_used(vgpr_spill_temps.size()); + for (std::pair<Temp, uint32_t> pair : spills) { + if (pair.first.type() == RegType::sgpr && ctx.is_reloaded[pair.second]) + is_used[slots[pair.second] / ctx.wave_size] = true; + } + + std::vector<Temp> temps; + for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) { + if (vgpr_spill_temps[i].id() && !is_used[i]) { + temps.push_back(vgpr_spill_temps[i]); + vgpr_spill_temps[i] = Temp(); + } + } + if (temps.empty()) + return; + + aco_ptr<Instruction> destr{create_instruction<Pseudo_instruction>( + aco_opcode::p_end_linear_vgpr, Format::PSEUDO, temps.size(), 0)}; + for (unsigned i = 0; i < temps.size(); i++) + destr->operands[i] = Operand(temps[i]); + + std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin(); + while (is_phi(*it)) + ++it; + block.instructions.insert(it, std::move(destr)); +} + +void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) { std::vector<uint32_t> slots(ctx.interferences.size()); @@ -1709,54 +1741,12 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) /* replace pseudo instructions with actual hardware instructions */ unsigned last_top_level_block_idx = 0; - std::vector<bool> reload_in_loop(vgpr_spill_temps.size()); for (Block& block : ctx.program->blocks) { - /* after loops, we insert a user if there was a reload inside the loop */ - if (block.loop_nest_depth == 0) { - int end_vgprs = 0; - for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) { - if (reload_in_loop[i]) - end_vgprs++; - } - - if (end_vgprs > 0) { - aco_ptr<Instruction> destr{create_instruction<Pseudo_instruction>( - aco_opcode::p_end_linear_vgpr, Format::PSEUDO, end_vgprs, 0)}; - int k = 0; - for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) { - if (reload_in_loop[i]) - destr->operands[k++] = Operand(vgpr_spill_temps[i]); - reload_in_loop[i] = false; - } - /* find insertion point */ - std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin(); - while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi) - ++it; - block.instructions.insert(it, std::move(destr)); - } - } - if (block.kind & block_kind_top_level && !block.linear_preds.empty()) { last_top_level_block_idx = block.index; - /* check if any spilled variables use a created linear vgpr, otherwise destroy them */ - for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) { - if (vgpr_spill_temps[i] == Temp()) - continue; - - bool can_destroy = true; - for (std::pair<Temp, uint32_t> pair : ctx.spills_entry[block.index]) { - - if (ctx.interferences[pair.second].first.type() == RegType::sgpr && - slots[pair.second] / ctx.wave_size == i) { - can_destroy = false; - break; - } - } - if (can_destroy) - vgpr_spill_temps[i] = Temp(); - } + end_unused_spill_vgprs(ctx, block, vgpr_spill_temps, slots, ctx.spills_entry[block.index]); } std::vector<aco_ptr<Instruction>>::iterator it; @@ -1818,7 +1808,6 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) reload_vgpr(ctx, block, instructions, *it, slots); } else { uint32_t spill_slot = slots[spill_id]; - reload_in_loop[spill_slot / ctx.wave_size] = block.loop_nest_depth > 0; /* check if the linear vgpr already exists */ if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) { @@ -1858,58 +1847,6 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) /* update required scratch memory */ ctx.program->config->scratch_bytes_per_wave += align(ctx.vgpr_spill_slots * 4 * ctx.program->wave_size, 1024); - - /* SSA elimination inserts copies for logical phis right before p_logical_end - * So if a linear vgpr is used between that p_logical_end and the branch, - * we need to ensure logical phis don't choose a definition which aliases - * the linear vgpr. - * TODO: Moving the spills and reloads to before p_logical_end might produce - * slightly better code. */ - for (Block& block : ctx.program->blocks) { - /* loops exits are already handled */ - if (block.logical_preds.size() <= 1) - continue; - - bool has_logical_phis = false; - for (aco_ptr<Instruction>& instr : block.instructions) { - if (instr->opcode == aco_opcode::p_phi) { - has_logical_phis = true; - break; - } else if (instr->opcode != aco_opcode::p_linear_phi) { - break; - } - } - if (!has_logical_phis) - continue; - - std::set<Temp> vgprs; - for (unsigned pred_idx : block.logical_preds) { - Block& pred = ctx.program->blocks[pred_idx]; - for (int i = pred.instructions.size() - 1; i >= 0; i--) { - aco_ptr<Instruction>& pred_instr = pred.instructions[i]; - if (pred_instr->opcode == aco_opcode::p_logical_end) { - break; - } else if (pred_instr->opcode == aco_opcode::p_spill || - pred_instr->opcode == aco_opcode::p_reload) { - vgprs.insert(pred_instr->operands[0].getTemp()); - } - } - } - if (!vgprs.size()) - continue; - - aco_ptr<Instruction> destr{create_instruction<Pseudo_instruction>( - aco_opcode::p_end_linear_vgpr, Format::PSEUDO, vgprs.size(), 0)}; - int k = 0; - for (Temp tmp : vgprs) { - destr->operands[k++] = Operand(tmp); - } - /* find insertion point */ - std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin(); - while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi) - ++it; - block.instructions.insert(it, std::move(destr)); - } } } /* end namespace */ diff --git a/lib/mesa/src/amd/compiler/aco_ssa_elimination.cpp b/lib/mesa/src/amd/compiler/aco_ssa_elimination.cpp index b6b51fe4f..42a860d10 100644 --- a/lib/mesa/src/amd/compiler/aco_ssa_elimination.cpp +++ b/lib/mesa/src/amd/compiler/aco_ssa_elimination.cpp @@ -361,6 +361,10 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in exec_val->isVOPC() ? get_vcmpx(exec_val->opcode) : aco_opcode::num_opcodes; const bool vopc = v_cmpx_op != aco_opcode::num_opcodes; + /* V_CMPX+DPP returns 0 with reads from disabled lanes, unlike V_CMP+DPP (RDNA3 ISA doc, 7.7) */ + if (vopc && exec_val->isDPP()) + return; + /* If s_and_saveexec is used, we'll need to insert a new instruction to save the old exec. */ const bool save_original_exec = exec_copy->opcode == and_saveexec; /* Position where the original exec mask copy should be inserted. */ @@ -427,11 +431,10 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in if (vopc) { /* Add one extra definition for exec and copy the VOP3-specific fields if present. */ if (ctx.program->gfx_level < GFX10) { - if (exec_val->isSDWA() || exec_val->isDPP()) { + if (exec_val->isSDWA()) { /* This might work but it needs testing and more code to copy the instruction. */ return; - } - else if (!exec_val->isVOP3()) { + } else if (!exec_val->isVOP3()) { aco_ptr<Instruction> tmp = std::move(exec_val); exec_val.reset(create_instruction<VOPC_instruction>( tmp->opcode, tmp->format, tmp->operands.size(), tmp->definitions.size() + 1)); diff --git a/lib/mesa/src/amd/llvm/ac_nir_to_llvm.c b/lib/mesa/src/amd/llvm/ac_nir_to_llvm.c index feda6aa0c..98c99d002 100644 --- a/lib/mesa/src/amd/llvm/ac_nir_to_llvm.c +++ b/lib/mesa/src/amd/llvm/ac_nir_to_llvm.c @@ -2163,7 +2163,7 @@ static LLVMValueRef get_global_address(struct ac_nir_context *ctx, LLVMTypeRef i8_ptr_type = LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_GLOBAL); addr = LLVMBuildIntToPtr(ctx->ac.builder, addr, i8_ptr_type, ""); addr = LLVMBuildGEP2(ctx->ac.builder, ctx->ac.i8, addr, &offset, 1, ""); - return addr; + return LLVMBuildPointerCast(ctx->ac.builder, addr, ptr_type, ""); } else { return LLVMBuildIntToPtr(ctx->ac.builder, addr, ptr_type, ""); } diff --git a/lib/mesa/src/amd/registers/gfx11.json b/lib/mesa/src/amd/registers/gfx11.json index 38308f066..39de995af 100644 --- a/lib/mesa/src/amd/registers/gfx11.json +++ b/lib/mesa/src/amd/registers/gfx11.json @@ -10911,7 +10911,10 @@ {"bits": [21, 21], "name": "SKIP_LOW_COMP_RATIO"}, {"bits": [22, 22], "name": "FDCC_ENABLE"}, {"bits": [23, 23], "name": "DCC_COMPRESS_DISABLE"}, - {"bits": [24, 24], "name": "FRAGMENT_COMPRESS_DISABLE"} + {"bits": [24, 24], "name": "FRAGMENT_COMPRESS_DISABLE"}, + {"bits": [25, 25], "name": "DISABLE_OVERRIDE_INCONSISTENT_KEYS"}, + {"bits": [26, 26], "name": "ENABLE_MAX_COMP_FRAG_OVERRIDE"}, + {"bits": [27, 29], "name": "MAX_COMP_FRAGS"} ] }, "CB_COLOR0_INFO": { @@ -14558,7 +14561,8 @@ "fields": [ {"bits": [1, 1], "name": "EN_REG_RT_INDEX"}, {"bits": [3, 3], "name": "EN_PRIM_PAYLOAD"}, - {"bits": [4, 4], "name": "EN_DRAW_VP"} + {"bits": [4, 4], "name": "EN_DRAW_VP"}, + {"bits": [6, 6], "name": "EN_VRS_RATE"} ] }, "VGT_ESGS_RING_ITEMSIZE": { diff --git a/lib/mesa/src/amd/registers/parse_kernel_headers.py b/lib/mesa/src/amd/registers/parse_kernel_headers.py index 67883f409..b79c0cc7d 100644 --- a/lib/mesa/src/amd/registers/parse_kernel_headers.py +++ b/lib/mesa/src/amd/registers/parse_kernel_headers.py @@ -683,6 +683,34 @@ fields_missing = { "VGT_DRAW_PAYLOAD_CNTL": [["EN_VRS_RATE", 6, 6]], "VGT_SHADER_STAGES_EN": [["PRIMGEN_PASSTHRU_NO_MSG", 26, 26]], }, + 'gfx11': { + "VGT_DRAW_PAYLOAD_CNTL": [["EN_VRS_RATE", 6, 6]], + # Only GFX1103_R2: + "CB_COLOR0_FDCC_CONTROL": [["DISABLE_OVERRIDE_INCONSISTENT_KEYS", 25, 25], + ["ENABLE_MAX_COMP_FRAG_OVERRIDE", 26, 26], + ["MAX_COMP_FRAGS", 27, 29]], + "CB_COLOR1_FDCC_CONTROL": [["DISABLE_OVERRIDE_INCONSISTENT_KEYS", 25, 25], + ["ENABLE_MAX_COMP_FRAG_OVERRIDE", 26, 26], + ["MAX_COMP_FRAGS", 27, 29]], + "CB_COLOR2_FDCC_CONTROL": [["DISABLE_OVERRIDE_INCONSISTENT_KEYS", 25, 25], + ["ENABLE_MAX_COMP_FRAG_OVERRIDE", 26, 26], + ["MAX_COMP_FRAGS", 27, 29]], + "CB_COLOR3_FDCC_CONTROL": [["DISABLE_OVERRIDE_INCONSISTENT_KEYS", 25, 25], + ["ENABLE_MAX_COMP_FRAG_OVERRIDE", 26, 26], + ["MAX_COMP_FRAGS", 27, 29]], + "CB_COLOR4_FDCC_CONTROL": [["DISABLE_OVERRIDE_INCONSISTENT_KEYS", 25, 25], + ["ENABLE_MAX_COMP_FRAG_OVERRIDE", 26, 26], + ["MAX_COMP_FRAGS", 27, 29]], + "CB_COLOR5_FDCC_CONTROL": [["DISABLE_OVERRIDE_INCONSISTENT_KEYS", 25, 25], + ["ENABLE_MAX_COMP_FRAG_OVERRIDE", 26, 26], + ["MAX_COMP_FRAGS", 27, 29]], + "CB_COLOR6_FDCC_CONTROL": [["DISABLE_OVERRIDE_INCONSISTENT_KEYS", 25, 25], + ["ENABLE_MAX_COMP_FRAG_OVERRIDE", 26, 26], + ["MAX_COMP_FRAGS", 27, 29]], + "CB_COLOR7_FDCC_CONTROL": [["DISABLE_OVERRIDE_INCONSISTENT_KEYS", 25, 25], + ["ENABLE_MAX_COMP_FRAG_OVERRIDE", 26, 26], + ["MAX_COMP_FRAGS", 27, 29]], + }, } ######### END HARDCODED CONFIGURATION diff --git a/lib/mesa/src/amd/vulkan/layers/radv_rage2.c b/lib/mesa/src/amd/vulkan/layers/radv_rage2.c new file mode 100644 index 000000000..52438db6c --- /dev/null +++ b/lib/mesa/src/amd/vulkan/layers/radv_rage2.c @@ -0,0 +1,54 @@ +/* + * Copyright © 2023 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "radv_private.h" +#include "vk_framebuffer.h" +#include "vk_common_entrypoints.h" + +VKAPI_ATTR void VKAPI_CALL +rage2_CmdBeginRenderPass(VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo* pRenderPassBegin, + VkSubpassContents contents) +{ + VK_FROM_HANDLE(vk_framebuffer, framebuffer, pRenderPassBegin->framebuffer); + + VkRenderPassBeginInfo render_pass_begin = { + .sType = pRenderPassBegin->sType, + .pNext = pRenderPassBegin->pNext, + .renderPass = pRenderPassBegin->renderPass, + .framebuffer = pRenderPassBegin->framebuffer, + .clearValueCount = pRenderPassBegin->clearValueCount, + .pClearValues = pRenderPassBegin->pClearValues, + }; + + /* RAGE2 seems to incorrectly set the render area and with dynamic rendering the concept of + * framebuffer dimensions goes away. Forcing the render area to be the framebuffer dimensions + * restores previous logic and it fixes rendering issues. + */ + render_pass_begin.renderArea.offset.x = 0; + render_pass_begin.renderArea.offset.y = 0; + render_pass_begin.renderArea.extent.width = framebuffer->width; + render_pass_begin.renderArea.extent.height = framebuffer->height; + + vk_common_CmdBeginRenderPass(commandBuffer, &render_pass_begin, contents); +} diff --git a/lib/mesa/src/amd/vulkan/meson.build b/lib/mesa/src/amd/vulkan/meson.build index ad75d68b1..b0b8e2ca9 100644 --- a/lib/mesa/src/amd/vulkan/meson.build +++ b/lib/mesa/src/amd/vulkan/meson.build @@ -26,7 +26,7 @@ radv_entrypoints = custom_target( prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak', '--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'radv', '--device-prefix', 'sqtt', '--device-prefix', 'metro_exodus', - '--device-prefix', 'rra', + '--device-prefix', 'rra', '--device-prefix', 'rage2', ], depend_files : vk_entrypoints_gen_depend_files, ) @@ -34,6 +34,7 @@ radv_entrypoints = custom_target( libradv_files = files( 'bvh/bvh.h', 'layers/radv_metro_exodus.c', + 'layers/radv_rage2.c', 'layers/radv_rra_layer.c', 'layers/radv_sqtt_layer.c', 'winsys/null/radv_null_bo.c', @@ -155,8 +156,13 @@ libvulkan_radeon_ld_args = [] libvulkan_radeon_link_depends = [] if with_ld_version_script - libvulkan_radeon_ld_args += ['-Wl,--version-script', join_paths(meson.current_source_dir(), 'vulkan.sym')] - libvulkan_radeon_link_depends += files('vulkan.sym') + if with_platform_android + libvulkan_radeon_ld_args += ['-Wl,--version-script', join_paths(meson.current_source_dir(), 'vulkan-android.sym')] + libvulkan_radeon_link_depends += files('vulkan-android.sym') + else + libvulkan_radeon_ld_args += ['-Wl,--version-script', join_paths(meson.current_source_dir(), 'vulkan.sym')] + libvulkan_radeon_link_depends += files('vulkan.sym') + endif endif radv_build_id = get_option('radv-build-id') diff --git a/lib/mesa/src/amd/vulkan/radv_sdma_copy_image.c b/lib/mesa/src/amd/vulkan/radv_sdma_copy_image.c index 93044c6e6..1bee2b0d0 100644 --- a/lib/mesa/src/amd/vulkan/radv_sdma_copy_image.c +++ b/lib/mesa/src/amd/vulkan/radv_sdma_copy_image.c @@ -99,7 +99,7 @@ radv_sdma_v4_v5_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_emit(cmd_buffer->cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_LINEAR, (tmz ? 4 : 0))); - radeon_emit(cmd_buffer->cs, bytes); + radeon_emit(cmd_buffer->cs, bytes - 1); radeon_emit(cmd_buffer->cs, 0); radeon_emit(cmd_buffer->cs, src_address); radeon_emit(cmd_buffer->cs, src_address >> 32); @@ -139,8 +139,9 @@ radv_sdma_v4_v5_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct (tmz ? 4 : 0)) | dcc << 19 | (is_v5 ? 0 : 0 /* tiled->buffer.b.b.last_level */) << 20 | 1u << 31); - radeon_emit(cmd_buffer->cs, - (uint32_t)tiled_address | (image->planes[0].surface.tile_swizzle << 8)); + radeon_emit( + cmd_buffer->cs, + (uint32_t)tiled_address | (image->planes[0].surface.tile_swizzle << 8)); radeon_emit(cmd_buffer->cs, (uint32_t)(tiled_address >> 32)); radeon_emit(cmd_buffer->cs, 0); radeon_emit(cmd_buffer->cs, ((tiled_width - 1) << 16)); diff --git a/lib/mesa/src/amd/vulkan/vulkan-android.sym b/lib/mesa/src/amd/vulkan/vulkan-android.sym new file mode 100644 index 000000000..2ca40faa0 --- /dev/null +++ b/lib/mesa/src/amd/vulkan/vulkan-android.sym @@ -0,0 +1,16 @@ +{ + global: + vk_icdGetInstanceProcAddr; + vk_icdGetPhysicalDeviceProcAddr; + vk_icdNegotiateLoaderICDInterfaceVersion; + + # Andoid looks for this global in HAL modules. In the source it occurs + # as HAL_MODULE_INFO_SYM (which is just a #define for HMI) and it's an + # instance of struct hwvulkan_module_t. + HMI; + + local: + # When static linking LLVM, all its symbols are public API. + # That may cause symbol collision, so explicitly demote everything. + *; +}; diff --git a/lib/mesa/src/broadcom/common/v3d_limits.h b/lib/mesa/src/broadcom/common/v3d_limits.h index 755aedd78..492740c44 100644 --- a/lib/mesa/src/broadcom/common/v3d_limits.h +++ b/lib/mesa/src/broadcom/common/v3d_limits.h @@ -24,6 +24,8 @@ #ifndef V3D_LIMITS_H #define V3D_LIMITS_H +#define V3D_CL_MAX_INSTR_SIZE 25 + /* Number of channels a QPU thread executes in parallel. Also known as * gl_SubGroupSizeARB. */ diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_cl.c b/lib/mesa/src/broadcom/vulkan/v3dv_cl.c index 55cf122cc..d1181bac4 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_cl.c +++ b/lib/mesa/src/broadcom/vulkan/v3dv_cl.c @@ -114,14 +114,18 @@ v3dv_cl_ensure_space_with_branch(struct v3dv_cl *cl, uint32_t space) * end with a 'return from sub list' command. */ bool needs_return_from_sub_list = false; - if (cl->job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY) { - if (cl->size > 0) { + if (cl->job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY && cl->size > 0) needs_return_from_sub_list = true; - space += cl_packet_length(RETURN_FROM_SUB_LIST); - } - } else { - space += cl_packet_length(BRANCH); - } + + /* + * The CLE processor in the simulator tries to read V3D_CL_MAX_INSTR_SIZE + * bytes form the CL for each new instruction. If the last instruction in our + * CL is smaller than that, and there are not at least V3D_CL_MAX_INSTR_SIZE + * bytes until the end of the BO, it will read out of bounds and possibly + * cause a GMP violation interrupt to trigger. Ensure we always have at + * least that many bytes available to read with the last instruction. + */ + space += V3D_CL_MAX_INSTR_SIZE; if (v3dv_cl_offset(cl) + space <= cl->size) return; diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c b/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c index c92794bd9..dba43223d 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c +++ b/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c @@ -2789,8 +2789,9 @@ cmd_buffer_binning_sync_required(struct v3dv_cmd_buffer *cmd_buffer, return false; } -static void -consume_bcl_sync(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job) +void +v3dv_cmd_buffer_consume_bcl_sync(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_job *job) { job->needs_bcl_sync = true; cmd_buffer->state.barrier.bcl_buffer_access = 0; @@ -2890,7 +2891,7 @@ v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer, assert(!job->needs_bcl_sync); if (cmd_buffer_binning_sync_required(cmd_buffer, pipeline, indexed, indirect)) { - consume_bcl_sync(cmd_buffer, job); + v3dv_cmd_buffer_consume_bcl_sync(cmd_buffer, job); } } diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_private.h b/lib/mesa/src/broadcom/vulkan/v3dv_private.h index 27b0646d3..bbdb1ef57 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dv_private.h +++ b/lib/mesa/src/broadcom/vulkan/v3dv_private.h @@ -1660,6 +1660,9 @@ void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer, void v3dv_cmd_buffer_merge_barrier_state(struct v3dv_barrier_state *dst, struct v3dv_barrier_state *src); +void v3dv_cmd_buffer_consume_bcl_sync(struct v3dv_cmd_buffer *cmd_buffer, + struct v3dv_job *job); + bool v3dv_cmd_buffer_check_needs_load(const struct v3dv_cmd_buffer_state *state, VkImageAspectFlags aspect, uint32_t first_subpass_idx, diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c index 8f78dfb09..5219f4801 100644 --- a/lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c +++ b/lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c @@ -1623,6 +1623,20 @@ v3dX(cmd_buffer_execute_inside_pass)(struct v3dv_cmd_buffer *primary, { assert(primary->state.job); + /* Typically we postpone applying binning syncs until we see a draw call + * that may actually access proteted resources in the binning stage. However, + * if the draw calls are recorded in a secondary command buffer and the + * barriers were recorded in a primary command buffer, that won't work + * and we will have to check if we need a binning sync when executing the + * secondary. + */ + struct v3dv_job *primary_job = primary->state.job; + if (primary_job->serialize && + (primary->state.barrier.bcl_buffer_access || + primary->state.barrier.bcl_image_access)) { + v3dv_cmd_buffer_consume_bcl_sync(primary, primary_job); + } + /* Emit occlusion query state if needed so the draw calls inside our * secondaries update the counters. */ @@ -1668,7 +1682,7 @@ v3dX(cmd_buffer_execute_inside_pass)(struct v3dv_cmd_buffer *primary, * the RETURN_FROM_SUB_LIST into the primary job to skip the * branch? */ - struct v3dv_job *primary_job = primary->state.job; + primary_job = primary->state.job; if (!primary_job || secondary_job->serialize || pending_barrier.dst_mask) { const bool needs_bcl_barrier = diff --git a/lib/mesa/src/compiler/nir/nir_deref.c b/lib/mesa/src/compiler/nir/nir_deref.c index e03be3ac6..c883d2229 100644 --- a/lib/mesa/src/compiler/nir/nir_deref.c +++ b/lib/mesa/src/compiler/nir/nir_deref.c @@ -1134,7 +1134,12 @@ opt_replace_struct_wrapper_cast(nir_builder *b, nir_deref_instr *cast) if (glsl_get_struct_field_offset(parent->type, 0) != 0) return false; - if (cast->type != glsl_get_struct_field(parent->type, 0)) + const struct glsl_type *field_type = glsl_get_struct_field(parent->type, 0); + if (cast->type != field_type) + return false; + + /* we can't drop the stride information */ + if (cast->cast.ptr_stride != glsl_get_explicit_stride(field_type)) return false; nir_deref_instr *replace = nir_build_deref_struct(b, parent, 0); diff --git a/lib/mesa/src/compiler/nir/nir_range_analysis.c b/lib/mesa/src/compiler/nir/nir_range_analysis.c index 56fd3f092..06dd3eea5 100644 --- a/lib/mesa/src/compiler/nir/nir_range_analysis.c +++ b/lib/mesa/src/compiler/nir/nir_range_analysis.c @@ -1315,10 +1315,11 @@ static const nir_unsigned_upper_bound_config default_ub_config = { }, }; -uint32_t -nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, - nir_ssa_scalar scalar, - const nir_unsigned_upper_bound_config *config) +static uint32_t +nir_unsigned_upper_bound_impl(nir_shader *shader, struct hash_table *range_ht, + nir_ssa_scalar scalar, + const nir_unsigned_upper_bound_config *config, + unsigned stack_depth) { assert(scalar.def->bit_size <= 32); @@ -1335,6 +1336,11 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, uint32_t max = bitmask(scalar.def->bit_size); + /* Avoid stack overflows. 200 is just a random setting, that happened to work with wine stacks + * which tend to be smaller than normal Linux ones. */ + if (stack_depth >= 200) + return max; + if (scalar.def->parent_instr->type == nir_instr_type_intrinsic) { uint32_t res = max; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(scalar.def->parent_instr); @@ -1389,7 +1395,8 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, break; case nir_intrinsic_mbcnt_amd: { uint32_t src0 = config->max_subgroup_size - 1; - uint32_t src1 = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[1].ssa, 0), config); + uint32_t src1 = nir_unsigned_upper_bound_impl(shader, range_ht, nir_get_ssa_scalar(intrin->src[1].ssa, 0), + config, stack_depth + 1); if (src0 + src1 < src0) res = max; /* overflow */ @@ -1430,7 +1437,8 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, case nir_intrinsic_exclusive_scan: { nir_op op = nir_intrinsic_reduction_op(intrin); if (op == nir_op_umin || op == nir_op_umax || op == nir_op_imin || op == nir_op_imax) - res = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[0].ssa, 0), config); + res = nir_unsigned_upper_bound_impl(shader, range_ht, nir_get_ssa_scalar(intrin->src[0].ssa, 0), + config, stack_depth + 1); break; } case nir_intrinsic_read_first_invocation: @@ -1445,11 +1453,14 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, case nir_intrinsic_quad_swap_diagonal: case nir_intrinsic_quad_swizzle_amd: case nir_intrinsic_masked_swizzle_amd: - res = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[0].ssa, 0), config); + res = nir_unsigned_upper_bound_impl(shader, range_ht, nir_get_ssa_scalar(intrin->src[0].ssa, 0), + config, stack_depth + 1); break; case nir_intrinsic_write_invocation_amd: { - uint32_t src0 = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[0].ssa, 0), config); - uint32_t src1 = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[1].ssa, 0), config); + uint32_t src0 = nir_unsigned_upper_bound_impl(shader, range_ht, nir_get_ssa_scalar(intrin->src[0].ssa, 0), + config, stack_depth + 1); + uint32_t src1 = nir_unsigned_upper_bound_impl(shader, range_ht, nir_get_ssa_scalar(intrin->src[1].ssa, 0), + config, stack_depth + 1); res = MAX2(src0, src1); break; } @@ -1486,11 +1497,11 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, _mesa_set_destroy(visited, NULL); for (unsigned i = 0; i < def_count; i++) - res = MAX2(res, nir_unsigned_upper_bound(shader, range_ht, defs[i], config)); + res = MAX2(res, nir_unsigned_upper_bound_impl(shader, range_ht, defs[i], config, stack_depth + 1)); } else { nir_foreach_phi_src(src, nir_instr_as_phi(scalar.def->parent_instr)) { - res = MAX2(res, nir_unsigned_upper_bound( - shader, range_ht, nir_get_ssa_scalar(src->src.ssa, 0), config)); + res = MAX2(res, nir_unsigned_upper_bound_impl( + shader, range_ht, nir_get_ssa_scalar(src->src.ssa, 0), config, stack_depth + 1)); } } @@ -1541,12 +1552,15 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, return max; } - uint32_t src0 = nir_unsigned_upper_bound(shader, range_ht, nir_ssa_scalar_chase_alu_src(scalar, 0), config); + uint32_t src0 = nir_unsigned_upper_bound_impl(shader, range_ht, nir_ssa_scalar_chase_alu_src(scalar, 0), + config, stack_depth + 1); uint32_t src1 = max, src2 = max; if (nir_op_infos[op].num_inputs > 1) - src1 = nir_unsigned_upper_bound(shader, range_ht, nir_ssa_scalar_chase_alu_src(scalar, 1), config); + src1 = nir_unsigned_upper_bound_impl(shader, range_ht, nir_ssa_scalar_chase_alu_src(scalar, 1), + config, stack_depth + 1); if (nir_op_infos[op].num_inputs > 2) - src2 = nir_unsigned_upper_bound(shader, range_ht, nir_ssa_scalar_chase_alu_src(scalar, 2), config); + src2 = nir_unsigned_upper_bound_impl(shader, range_ht, nir_ssa_scalar_chase_alu_src(scalar, 2), + config, stack_depth + 1); uint32_t res = max; switch (op) { @@ -1683,6 +1697,14 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, return max; } +uint32_t +nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht, + nir_ssa_scalar scalar, + const nir_unsigned_upper_bound_config *config) +{ + return nir_unsigned_upper_bound_impl(shader, range_ht, scalar, config, 0); +} + bool nir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht, nir_ssa_scalar ssa, unsigned const_val, diff --git a/lib/mesa/src/etnaviv/drm/etnaviv_bo.c b/lib/mesa/src/etnaviv/drm/etnaviv_bo.c index d790823bc..aae81a015 100644 --- a/lib/mesa/src/etnaviv/drm/etnaviv_bo.c +++ b/lib/mesa/src/etnaviv/drm/etnaviv_bo.c @@ -138,8 +138,8 @@ static struct etna_bo *lookup_bo(void *tbl, uint32_t handle) /* found, incr refcnt and return: */ bo = etna_bo_ref(entry->data); - /* don't break the bucket if this bo was found in one */ - if (list_is_linked(&bo->list)) { + /* don't break the bucket/zombie list if this bo was found in one */ + if (!list_is_empty(&bo->list)) { VG_BO_OBTAIN(bo); etna_device_ref(bo->dev); list_delinit(&bo->list); diff --git a/lib/mesa/src/etnaviv/drm/etnaviv_cmd_stream.c b/lib/mesa/src/etnaviv/drm/etnaviv_cmd_stream.c index 3630d155d..657ef0c05 100644 --- a/lib/mesa/src/etnaviv/drm/etnaviv_cmd_stream.c +++ b/lib/mesa/src/etnaviv/drm/etnaviv_cmd_stream.c @@ -135,6 +135,8 @@ void etna_cmd_stream_del(struct etna_cmd_stream *stream) _mesa_hash_table_destroy(priv->bo_table, NULL); free(stream->buffer); + free(priv->bos); + free(priv->submit.bos); free(priv->submit.relocs); free(priv->submit.pmrs); free(priv); diff --git a/lib/mesa/src/freedreno/vulkan/tu_cmd_buffer.c b/lib/mesa/src/freedreno/vulkan/tu_cmd_buffer.c index 6f304634c..0acb45d71 100644 --- a/lib/mesa/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/lib/mesa/src/freedreno/vulkan/tu_cmd_buffer.c @@ -5387,6 +5387,11 @@ tu_CmdEndRendering(VkCommandBuffer commandBuffer) if (cmd_buffer->state.suspend_resume == SR_IN_PRE_CHAIN) { cmd_buffer->trace_renderpass_end = u_trace_end_iterator(&cmd_buffer->trace); tu_save_pre_chain(cmd_buffer); + + /* Even we don't call tu_cmd_render here, renderpass is finished + * and draw states should be disabled. + */ + tu_disable_draw_states(cmd_buffer, &cmd_buffer->cs); } else { tu_cmd_render(cmd_buffer); } diff --git a/lib/mesa/src/freedreno/vulkan/tu_pipeline.c b/lib/mesa/src/freedreno/vulkan/tu_pipeline.c index 0841d56f1..d4d3c9735 100644 --- a/lib/mesa/src/freedreno/vulkan/tu_pipeline.c +++ b/lib/mesa/src/freedreno/vulkan/tu_pipeline.c @@ -3158,7 +3158,8 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, for (unsigned j = 0; j < ARRAY_SIZE(library->shaders); j++) { if (library->shaders[j].nir) { assert(!nir[j]); - nir[j] = nir_shader_clone(NULL, library->shaders[j].nir); + nir[j] = nir_shader_clone(builder->mem_ctx, + library->shaders[j].nir); keys[j] = library->shaders[j].key; must_compile = true; } diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c index 78a3c7c87..aa18851d7 100644 --- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c +++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c @@ -2152,9 +2152,12 @@ visit_intrinsic(struct lp_build_nir_context *bld_base, visit_shared_atomic(bld_base, instr, result); break; case nir_intrinsic_control_barrier: - case nir_intrinsic_scoped_barrier: visit_barrier(bld_base); break; + case nir_intrinsic_scoped_barrier: + if (nir_intrinsic_execution_scope(instr) != NIR_SCOPE_NONE) + visit_barrier(bld_base); + break; case nir_intrinsic_group_memory_barrier: case nir_intrinsic_memory_barrier: case nir_intrinsic_memory_barrier_shared: diff --git a/lib/mesa/src/gallium/drivers/crocus/crocus_context.c b/lib/mesa/src/gallium/drivers/crocus/crocus_context.c index 903be3585..77ddb2e52 100644 --- a/lib/mesa/src/gallium/drivers/crocus/crocus_context.c +++ b/lib/mesa/src/gallium/drivers/crocus/crocus_context.c @@ -61,7 +61,7 @@ crocus_init_identifier_bo(struct crocus_context *ice) ice->workaround_bo->kflags |= EXEC_OBJECT_CAPTURE; ice->workaround_offset = ALIGN( - intel_debug_write_identifiers(bo_map, 4096, "Crocus") + 8, 8); + intel_debug_write_identifiers(bo_map, 4096, "Crocus"), 32); crocus_bo_unmap(ice->workaround_bo); diff --git a/lib/mesa/src/gallium/drivers/crocus/crocus_resource.c b/lib/mesa/src/gallium/drivers/crocus/crocus_resource.c index 7b4d50a66..789a04db1 100644 --- a/lib/mesa/src/gallium/drivers/crocus/crocus_resource.c +++ b/lib/mesa/src/gallium/drivers/crocus/crocus_resource.c @@ -189,11 +189,8 @@ crocus_resource_configure_main(const struct crocus_screen *screen, tiling_flags = 1 << res->mod_info->tiling; } else { - if (templ->bind & PIPE_BIND_RENDER_TARGET && devinfo->ver < 6) { - modifier = I915_FORMAT_MOD_X_TILED; - res->mod_info = isl_drm_modifier_get_info(modifier); - tiling_flags = 1 << res->mod_info->tiling; - } + if (templ->bind & PIPE_BIND_RENDER_TARGET && devinfo->ver < 6) + tiling_flags &= ISL_TILING_LINEAR_BIT | ISL_TILING_X_BIT; /* Use linear for staging buffers */ if (templ->usage == PIPE_USAGE_STAGING || templ->bind & (PIPE_BIND_LINEAR | PIPE_BIND_CURSOR) ) diff --git a/lib/mesa/src/gallium/drivers/d3d12/d3d12_video_dec_vp9.cpp b/lib/mesa/src/gallium/drivers/d3d12/d3d12_video_dec_vp9.cpp index 26638d429..0e7c5b45c 100644 --- a/lib/mesa/src/gallium/drivers/d3d12/d3d12_video_dec_vp9.cpp +++ b/lib/mesa/src/gallium/drivers/d3d12/d3d12_video_dec_vp9.cpp @@ -310,7 +310,7 @@ d3d12_video_decoder_dxva_picparams_from_pipe_picparams_vp9( dxvaStructure.ref_frame_coded_width[i] = pipe_vp9->ref[i]->width; dxvaStructure.ref_frame_coded_height[i] = pipe_vp9->ref[i]->height; } else - dxvaStructure.ref_frame_map[i].bPicEntry = DXVA_VP9_INVALID_PICTURE_INDEX; + dxvaStructure.ref_frame_map[i].bPicEntry = DXVA_VP9_INVALID_PICTURE_ENTRY; } /* DXVA spec The enums and indices for ref_frame_sign_bias[] are defined */ @@ -319,7 +319,7 @@ d3d12_video_decoder_dxva_picparams_from_pipe_picparams_vp9( const uint8_t signbias_alt_index = 3; /* AssociatedFlag When Index7Bits does not contain an index to a valid uncompressed surface, the value shall be set to 127, to indicate that the index is invalid. */ - memset(&dxvaStructure.frame_refs[0], DXVA_VP9_INVALID_PICTURE_INDEX, sizeof(dxvaStructure.frame_refs)); + memset(&dxvaStructure.frame_refs[0], DXVA_VP9_INVALID_PICTURE_ENTRY, sizeof(dxvaStructure.frame_refs)); if (pipe_vp9->ref[pipe_vp9->picture_parameter.pic_fields.last_ref_frame]) { /* AssociatedFlag When Index7Bits does not contain an index to a valid uncompressed surface, the value shall be set to 127, to indicate that the index is invalid. */ @@ -348,10 +348,16 @@ d3d12_video_decoder_dxva_picparams_from_pipe_picparams_vp9( dxvaStructure.filter_level = pipe_vp9->picture_parameter.filter_level; dxvaStructure.sharpness_level = pipe_vp9->picture_parameter.sharpness_level; - bool use_last_frame_mvs = !pipe_vp9->picture_parameter.pic_fields.error_resilient_mode && pipe_vp9->picture_parameter.pic_fields.show_frame; + bool use_prev_in_find_mv_refs = + !pipe_vp9->picture_parameter.pic_fields.error_resilient_mode && + !(pipe_vp9->picture_parameter.pic_fields.frame_type == 0 /*KEY_FRAME*/ || pipe_vp9->picture_parameter.pic_fields.intra_only) && + pipe_vp9->picture_parameter.pic_fields.prev_show_frame && + pipe_vp9->picture_parameter.frame_width == pipe_vp9->picture_parameter.prev_frame_width && + pipe_vp9->picture_parameter.frame_height == pipe_vp9->picture_parameter.prev_frame_height; + dxvaStructure.wControlInfoFlags = (pipe_vp9->picture_parameter.mode_ref_delta_enabled << 0) | (pipe_vp9->picture_parameter.mode_ref_delta_update << 1) | - (use_last_frame_mvs << 2) | + (use_prev_in_find_mv_refs << 2) | (0 << 3); for (uint32_t i = 0; i < 4; i++) diff --git a/lib/mesa/src/gallium/drivers/d3d12/d3d12_video_dec_vp9.h b/lib/mesa/src/gallium/drivers/d3d12/d3d12_video_dec_vp9.h index 322daf7bd..24aa032ef 100644 --- a/lib/mesa/src/gallium/drivers/d3d12/d3d12_video_dec_vp9.h +++ b/lib/mesa/src/gallium/drivers/d3d12/d3d12_video_dec_vp9.h @@ -27,7 +27,8 @@ #include "d3d12_video_types.h" -constexpr uint16_t DXVA_VP9_INVALID_PICTURE_INDEX = 0xFF; +constexpr uint16_t DXVA_VP9_INVALID_PICTURE_INDEX = 0x7F; +constexpr uint16_t DXVA_VP9_INVALID_PICTURE_ENTRY = 0xFF; #pragma pack(push, BeforeDXVApacking, 1) diff --git a/lib/mesa/src/gallium/drivers/iris/iris_batch.c b/lib/mesa/src/gallium/drivers/iris/iris_batch.c index ec32d88cc..d598fd701 100644 --- a/lib/mesa/src/gallium/drivers/iris/iris_batch.c +++ b/lib/mesa/src/gallium/drivers/iris/iris_batch.c @@ -1053,10 +1053,10 @@ _iris_batch_flush(struct iris_batch *batch, const char *file, int line) } - uint64_t start_ts = intel_ds_begin_submit(batch->ds); - uint64_t submission_id = batch->ds->submission_id; + uint64_t start_ts = intel_ds_begin_submit(&batch->ds); + uint64_t submission_id = batch->ds.submission_id; int ret = submit_batch(batch); - intel_ds_end_submit(batch->ds, start_ts); + intel_ds_end_submit(&batch->ds, start_ts); /* When batch submission fails, our end-of-batch syncobj remains * unsignalled, and in fact is not even considered submitted. diff --git a/lib/mesa/src/gallium/drivers/iris/iris_batch.h b/lib/mesa/src/gallium/drivers/iris/iris_batch.h index a1dfa6e63..437352a5a 100644 --- a/lib/mesa/src/gallium/drivers/iris/iris_batch.h +++ b/lib/mesa/src/gallium/drivers/iris/iris_batch.h @@ -197,7 +197,7 @@ struct iris_batch { struct u_trace trace; /** Batch wrapper structure for perfetto */ - struct intel_ds_queue *ds; + struct intel_ds_queue ds; }; void iris_init_batches(struct iris_context *ice, int priority); diff --git a/lib/mesa/src/gallium/drivers/iris/iris_state.c b/lib/mesa/src/gallium/drivers/iris/iris_state.c index b281393dc..30a0ba17e 100644 --- a/lib/mesa/src/gallium/drivers/iris/iris_state.c +++ b/lib/mesa/src/gallium/drivers/iris/iris_state.c @@ -2799,6 +2799,21 @@ iris_create_surface(struct pipe_context *ctx, &res->surf, view, &isl_surf, view, &offset_B, &tile_x_el, &tile_y_el); + + /* On Broadwell, HALIGN and VALIGN are specified in pixels and are + * hard-coded to align to exactly the block size of the compressed + * texture. This means that, when reinterpreted as a non-compressed + * texture, the tile offsets may be anything. + * + * We need them to be multiples of 4 to be usable in RENDER_SURFACE_STATE, + * so force the state tracker to take fallback paths if they're not. + */ +#if GFX_VER == 8 + if (tile_x_el % 4 != 0 || tile_y_el % 4 != 0) { + ok = false; + } +#endif + if (!ok) { free(surf); return NULL; diff --git a/lib/mesa/src/gallium/drivers/iris/iris_utrace.c b/lib/mesa/src/gallium/drivers/iris/iris_utrace.c index 7f49826d7..e66a56092 100644 --- a/lib/mesa/src/gallium/drivers/iris/iris_utrace.c +++ b/lib/mesa/src/gallium/drivers/iris/iris_utrace.c @@ -95,7 +95,7 @@ iris_utrace_delete_flush_data(struct u_trace_context *utctx, void iris_utrace_flush(struct iris_batch *batch, uint64_t submission_id) { struct intel_ds_flush_data *flush_data = malloc(sizeof(*flush_data)); - intel_ds_flush_data_init(flush_data, batch->ds, submission_id); + intel_ds_flush_data_init(flush_data, &batch->ds, submission_id); u_trace_flush(&batch->trace, flush_data, false); } @@ -122,9 +122,8 @@ void iris_utrace_init(struct iris_context *ice) iris_utrace_delete_flush_data); for (int i = 0; i < IRIS_BATCH_COUNT; i++) { - ice->batches[i].ds = - intel_ds_device_add_queue(&ice->ds, "%s", - iris_batch_name_to_string(i)); + intel_ds_device_init_queue(&ice->ds, &ice->batches[i].ds, "%s", + iris_batch_name_to_string(i)); } } diff --git a/lib/mesa/src/gallium/drivers/lima/lima_resource.c b/lib/mesa/src/gallium/drivers/lima/lima_resource.c index 260212178..ad55fa5c8 100644 --- a/lib/mesa/src/gallium/drivers/lima/lima_resource.c +++ b/lib/mesa/src/gallium/drivers/lima/lima_resource.c @@ -59,7 +59,10 @@ lima_resource_create_scanout(struct pipe_screen *pscreen, struct lima_screen *screen = lima_screen(pscreen); struct renderonly_scanout *scanout; struct winsys_handle handle; - struct pipe_resource *pres; + + struct lima_resource *res = CALLOC_STRUCT(lima_resource); + if (!res) + return NULL; struct pipe_resource scanout_templat = *templat; scanout_templat.width0 = width; @@ -71,20 +74,31 @@ lima_resource_create_scanout(struct pipe_screen *pscreen, if (!scanout) return NULL; + res->base = *templat; + res->base.screen = pscreen; + pipe_reference_init(&res->base.reference, 1); + res->levels[0].offset = handle.offset; + res->levels[0].stride = handle.stride; + assert(handle.type == WINSYS_HANDLE_TYPE_FD); - pres = pscreen->resource_from_handle(pscreen, templat, &handle, - PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE); + res->bo = lima_bo_import(screen, &handle); + if (!res->bo) { + FREE(res); + return NULL; + } + + res->modifier_constant = true; close(handle.handle); - if (!pres) { + if (!res->bo) { renderonly_scanout_destroy(scanout, screen->ro); + FREE(res); return NULL; } - struct lima_resource *res = lima_resource(pres); res->scanout = scanout; - return pres; + return &res->base; } static uint32_t diff --git a/lib/mesa/src/gallium/drivers/radeonsi/driinfo_radeonsi.h b/lib/mesa/src/gallium/drivers/radeonsi/driinfo_radeonsi.h index e8a2b4674..7cfe86175 100644 --- a/lib/mesa/src/gallium/drivers/radeonsi/driinfo_radeonsi.h +++ b/lib/mesa/src/gallium/drivers/radeonsi/driinfo_radeonsi.h @@ -1,7 +1,6 @@ // DriConf options specific to radeonsi DRI_CONF_SECTION_PERFORMANCE DRI_CONF_ADAPTIVE_SYNC(true) -DRI_CONF_MESA_GLTHREAD(true) DRI_CONF_SECTION_END DRI_CONF_SECTION_DEBUG diff --git a/lib/mesa/src/gallium/drivers/radeonsi/radeon_vcn_dec.c b/lib/mesa/src/gallium/drivers/radeonsi/radeon_vcn_dec.c index 414a8d699..1c8038325 100644 --- a/lib/mesa/src/gallium/drivers/radeonsi/radeon_vcn_dec.c +++ b/lib/mesa/src/gallium/drivers/radeonsi/radeon_vcn_dec.c @@ -278,6 +278,20 @@ static rvcn_dec_message_avc_t get_h264_msg(struct radeon_decoder *dec, } } + /* if reference picture exists, however no reference picture found at the end + curr_pic_ref_frame_num == 0, which is not reasonable, should be corrected. */ + if (result.used_for_reference_flags && (result.curr_pic_ref_frame_num == 0)) { + for (i = 0; i < ARRAY_SIZE(result.ref_frame_list); i++) { + result.ref_frame_list[i] = pic->ref[i] ? + (uintptr_t)vl_video_buffer_get_associated_data(pic->ref[i], &dec->base) : 0xff; + if (result.ref_frame_list[i] != 0xff) { + result.curr_pic_ref_frame_num++; + result.non_existing_frame_flags &= ~(1 << i); + break; + } + } + } + for (i = 0; i < ARRAY_SIZE(result.ref_frame_list); i++) { if (result.ref_frame_list[i] != 0xff) { dec->h264_valid_ref_num[i] = result.frame_num_list[i]; @@ -3160,7 +3174,8 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, case CHIP_GFX1100: case CHIP_GFX1101: case CHIP_GFX1102: - case CHIP_GFX1103: + case CHIP_GFX1103_R1: + case CHIP_GFX1103_R2: dec->jpg.direct_reg = true; dec->addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX11; dec->av1_version = RDECODE_AV1_VER_1; diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_sdma_copy_image.c b/lib/mesa/src/gallium/drivers/radeonsi/si_sdma_copy_image.c index b1b408b8f..c5e8b9eed 100644 --- a/lib/mesa/src/gallium/drivers/radeonsi/si_sdma_copy_image.c +++ b/lib/mesa/src/gallium/drivers/radeonsi/si_sdma_copy_image.c @@ -139,7 +139,7 @@ bool si_sdma_v4_v5_copy_texture(struct si_context *sctx, struct si_texture *sdst radeon_emit(CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_LINEAR, (tmz ? 4 : 0))); - radeon_emit(bytes); + radeon_emit(bytes - 1); radeon_emit(0); radeon_emit(src_address); radeon_emit(src_address >> 32); diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c b/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c index 218a3c2a3..9e717b926 100644 --- a/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c +++ b/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c @@ -570,7 +570,8 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx, * overriden by other states. (e.g. per-sample interpolation) * Interpolated colors are stored after the preloaded VGPRs. */ -void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part_key *key) +void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part_key *key, + bool separate_prolog) { LLVMValueRef ret, func; int num_returns, i, num_color_channels; @@ -694,13 +695,13 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part /* Read LINEAR_SAMPLE. */ for (i = 0; i < 2; i++) - linear_sample[i] = LLVMGetParam(func, base + 6 + i); + linear_sample[i] = LLVMGetParam(func, base + (separate_prolog ? 6 : 9) + i); /* Overwrite LINEAR_CENTER. */ for (i = 0; i < 2; i++) - ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_sample[i], base + 8 + i, ""); + ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_sample[i], base + (separate_prolog ? 8 : 11) + i, ""); /* Overwrite LINEAR_CENTROID. */ for (i = 0; i < 2; i++) - ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_sample[i], base + 10 + i, ""); + ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_sample[i], base + (separate_prolog ? 10 : 13) + i, ""); } /* Force center interpolation. */ @@ -825,7 +826,8 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part * Build the pixel shader epilog function. This handles everything that must be * emulated for pixel shader exports. (alpha-test, format conversions, etc) */ -void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part_key *key) +void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part_key *key, + UNUSED bool separate_epilog) { int i; struct si_ps_exports exp = {}; @@ -947,7 +949,7 @@ void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader si_get_ps_prolog_key(shader, &prolog_key, false); if (si_need_ps_prolog(&prolog_key)) { - si_llvm_build_ps_prolog(ctx, &prolog_key); + si_llvm_build_ps_prolog(ctx, &prolog_key, false); parts[num_parts++] = ctx->main_fn; } @@ -956,7 +958,7 @@ void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader union si_shader_part_key epilog_key; si_get_ps_epilog_key(shader, &epilog_key); - si_llvm_build_ps_epilog(ctx, &epilog_key); + si_llvm_build_ps_epilog(ctx, &epilog_key, false); parts[num_parts++] = ctx->main_fn; si_build_wrapper_function(ctx, parts, num_parts, main_index, 0, main_arg_types, false); diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index 5a5665a51..03da1e3e7 100644 --- a/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -602,7 +602,8 @@ void si_llvm_ls_build_end(struct si_shader_context *ctx) * Compile the TCS epilog function. This writes tesselation factors to memory * based on the output primitive type of the tesselator (determined by TES). */ -void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_part_key *key) +void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_part_key *key, + UNUSED bool separate_epilog) { memset(&ctx->args, 0, sizeof(ctx->args)); diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index 950daf49f..ca6c4c6f8 100644 --- a/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -923,7 +923,8 @@ void si_llvm_vs_build_end(struct si_shader_context *ctx) * (InstanceID + StartInstance), * (InstanceID / 2 + StartInstance) */ -void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key) +void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key, + UNUSED bool separate_prolog) { LLVMTypeRef *returns; LLVMValueRef ret, func; diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/lib/mesa/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 16012344a..b3c0f85a3 100644 --- a/lib/mesa/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/lib/mesa/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -673,7 +673,7 @@ unsigned si_get_shader_prefetch_size(struct si_shader *shader) /* Return 0 for some A0 chips only. Other chips don't need it. */ if ((shader->selector->screen->info.family == CHIP_GFX1100 || shader->selector->screen->info.family == CHIP_GFX1102 || - shader->selector->screen->info.family == CHIP_GFX1103) && + shader->selector->screen->info.family == CHIP_GFX1103_R1) && shader->selector->screen->info.chip_rev == 0) return 0; @@ -1234,7 +1234,8 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader (sctx->gfx_level >= GFX10 ? radeon_set_sh_reg_idx3_func : radeon_set_sh_reg_func)); ac_set_reg_cu_en(&sctx->gfx_cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs, - C_00B204_CU_EN_GFX10, 16, &sctx->screen->info, + sctx->gfx_level >= GFX11 ? C_00B204_CU_EN_GFX11 : C_00B204_CU_EN_GFX10, 16, + &sctx->screen->info, (void (*)(void*, unsigned, uint32_t)) (sctx->gfx_level >= GFX10 ? radeon_set_sh_reg_idx3_func : radeon_set_sh_reg_func)); sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS) & diff --git a/lib/mesa/src/gallium/drivers/zink/VP_ZINK_requirements.json b/lib/mesa/src/gallium/drivers/zink/VP_ZINK_requirements.json index f42f4c8d7..83d972b83 100644 --- a/lib/mesa/src/gallium/drivers/zink/VP_ZINK_requirements.json +++ b/lib/mesa/src/gallium/drivers/zink/VP_ZINK_requirements.json @@ -31,9 +31,15 @@ "VkPhysicalDeviceCustomBorderColorFeaturesEXT": { "customBorderColorWithoutFormat": true }, + "VkPhysicalDeviceBorderColorSwizzleFeaturesEXT": { + "borderColorSwizzleFromImage": true + }, "VkPhysicalDeviceLineRasterizationFeaturesEXT": { "rectangularLines": true, "bresenhamLines": true + }, + "VkPhysicalDeviceProvokingVertexFeaturesEXT": { + "provokingVertexLast": true } }, "properties": { @@ -49,13 +55,22 @@ "features": { "VkPhysicalDeviceScalarBlockLayoutFeaturesEXT": { "scalarBlockLayout": true + }, + "VkPhysicalDeviceTimelineSemaphoreFeaturesKHR": { + "timelineSemaphore": true + }, + "VkPhysicalDeviceImagelessFramebufferFeatures": { + "imagelessFramebuffer": true } } }, "gl21_baseline_vk12": { "features": { "VkPhysicalDeviceVulkan12Features": { - "scalarBlockLayout": true + "scalarBlockLayout": true, + "drawIndirectCount": true, + "imagelessFramebuffer": true, + "timelineSemaphore": true } } }, @@ -72,6 +87,12 @@ "features": { "VkPhysicalDeviceFeatures": { "independentBlend": true + }, + "VkPhysicalDeviceTransformFeedbackFeaturesEXT": { + "transformFeedback": true + }, + "VkPhysicalDeviceConditionalRenderingFeaturesEXT": { + "conditionalRendering": true } } }, @@ -107,6 +128,9 @@ "VkPhysicalDeviceFeatures": { "occlusionQueryPrecise": true, "dualSrcBlend": true + }, + "VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT": { + "vertexAttributeInstanceRateDivisor": true } } }, @@ -170,6 +194,9 @@ "shaderStorageImageWriteWithoutFormat": true, "vertexPipelineStoresAndAtomics": true, "fragmentStoresAndAtomics": true + }, + "VkPhysicalDeviceImage2DViewOf3DFeaturesEXT": { + "image2DViewOf3D": true } } }, @@ -271,9 +298,6 @@ } }, "gl44_baseline": { - "extensions": { - "VK_KHR_sampler_mirror_clamp_to_edge": 1 - }, "formats": { "VK_FORMAT_B10G11R11_UFLOAT_PACK32": { "VkFormatProperties": { @@ -284,6 +308,18 @@ } } }, + "gl44_baseline_ext": { + "extensions": { + "VK_KHR_sampler_mirror_clamp_to_edge": 1 + } + }, + "gl44_baseline_vk12": { + "features": { + "VkPhysicalDeviceVulkan12Features": { + "samplerMirrorClampToEdge": true + } + } + }, "gl45_baseline": { "features": { "VkPhysicalDeviceFeatures": { @@ -698,7 +734,8 @@ "gl42_baseline", [ "gl42_baseline_vk10", "gl42_baseline_vk12" ], "gl43_baseline", - "gl44_baseline" + "gl44_baseline", + [ "gl44_baseline_ext", "gl44_baseline_vk12" ] ] }, "VP_ZINK_gl45_baseline": { @@ -720,6 +757,7 @@ [ "gl42_baseline_vk10", "gl42_baseline_vk12" ], "gl43_baseline", "gl44_baseline", + [ "gl44_baseline_ext", "gl44_baseline_vk12" ], "gl45_baseline" ] }, @@ -742,6 +780,7 @@ [ "gl42_baseline_vk10", "gl42_baseline_vk12" ], "gl43_baseline", "gl44_baseline", + [ "gl44_baseline_ext", "gl44_baseline_vk12" ], "gl45_baseline", "gl46_baseline" ] @@ -765,6 +804,7 @@ [ "gl42_baseline_vk10", "gl42_baseline_vk12" ], "gl43_baseline", "gl44_baseline", + [ "gl44_baseline_ext", "gl44_baseline_vk12" ], "gl45_baseline", "gl46_baseline", "gl46_optimal", diff --git a/lib/mesa/src/gallium/drivers/zink/ci/zink-lvp-fails.txt b/lib/mesa/src/gallium/drivers/zink/ci/zink-lvp-fails.txt index e61edddab..54d5bbc7e 100644 --- a/lib/mesa/src/gallium/drivers/zink/ci/zink-lvp-fails.txt +++ b/lib/mesa/src/gallium/drivers/zink/ci/zink-lvp-fails.txt @@ -53,7 +53,6 @@ glx@glx_ext_import_context@imported context has same context id,Fail glx@glx_ext_import_context@make current- multi process,Fail glx@glx_ext_import_context@make current- single process,Fail glx@glx_ext_import_context@query context info,Fail -shaders@glsl-fs-pointcoord,Fail shaders@point-vertex-id divisor,Fail shaders@point-vertex-id gl_instanceid,Fail shaders@point-vertex-id gl_instanceid divisor,Fail @@ -92,9 +91,7 @@ spec@!opengl 2.0@gl-2.0-edgeflag-immediate,Fail spec@!opengl 2.1@pbo,Fail spec@!opengl 2.1@pbo@test_polygon_stip,Fail spec@!opengl 2.1@polygon-stipple-fs,Fail -spec@!opengl es 2.0@glsl-fs-pointcoord,Fail spec@arb_depth_texture@depth-tex-modes,Fail -spec@arb_framebuffer_object@fbo-gl_pointcoord,Fail spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail spec@arb_pipeline_statistics_query@arb_pipeline_statistics_query-frag,Fail spec@arb_point_sprite@arb_point_sprite-checkerboard,Fail @@ -156,33 +153,6 @@ spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail spec@ext_framebuffer_multisample@interpolation 4 centroid-edges,Fail spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-intel_external_sampler_only,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-invalid_attributes,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-invalid_hints,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-missing_attributes,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-ownership_transfer,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-refcount,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-reimport-bug,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_argb8888,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_ayuv,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv12,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xrgb8888,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xyuv,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y212,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y410,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y412,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y416,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuv420,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvu420,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-transcode-nv12-as-r8-gr88,Fail -spec@ext_image_dma_buf_import@ext_image_dma_buf_import-unsupported_format,Fail spec@ext_packed_float@query-rgba-signed-components,Fail spec@ext_texture_swizzle@depth_texture_mode_and_swizzle,Fail spec@intel_performance_query@intel_performance_query-issue_2235,Fail diff --git a/lib/mesa/src/gallium/drivers/zink/ci/zink-radv-fails.txt b/lib/mesa/src/gallium/drivers/zink/ci/zink-radv-fails.txt index 3fa433d8f..02c86646a 100644 --- a/lib/mesa/src/gallium/drivers/zink/ci/zink-radv-fails.txt +++ b/lib/mesa/src/gallium/drivers/zink/ci/zink-radv-fails.txt @@ -38,7 +38,6 @@ glx@glx-swap-pixmap-bad,Fail spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail -shaders@glsl-fs-pointcoord,Fail shaders@point-vertex-id divisor,Fail shaders@point-vertex-id gl_instanceid divisor,Fail shaders@point-vertex-id gl_instanceid,Fail @@ -59,7 +58,6 @@ spec@arb_fragment_program_shadow@txp-shadow2d,Fail spec@arb_fragment_program_shadow@txp-shadow2drect,Fail spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query,Fail spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@MS8,Fail -spec@arb_framebuffer_object@fbo-gl_pointcoord,Fail spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2-mat2,Fail spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2x3-mat2x3,Fail @@ -388,7 +386,6 @@ spec@!opengl 2.1@pbo,Fail spec@!opengl 2.1@pbo@test_polygon_stip,Fail spec@!opengl 2.1@polygon-stipple-fs,Fail spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail -spec@!opengl es 2.0@glsl-fs-pointcoord,Fail spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail diff --git a/lib/mesa/src/gallium/drivers/zink/zink_blit.c b/lib/mesa/src/gallium/drivers/zink/zink_blit.c index 135378f4d..df718afc7 100644 --- a/lib/mesa/src/gallium/drivers/zink/zink_blit.c +++ b/lib/mesa/src/gallium/drivers/zink/zink_blit.c @@ -128,7 +128,7 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info, bool *n return false; if (util_format_is_depth_or_stencil(info->dst.format) && - info->dst.format != info->src.format) + (info->dst.format != info->src.format || info->filter == PIPE_TEX_FILTER_LINEAR)) return false; /* vkCmdBlitImage must not be used for multisampled source or destination images. */ @@ -252,7 +252,8 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info, bool *n VKCTX(CmdBlitImage)(cmdbuf, src->obj->image, src->layout, dst->obj->image, dst->layout, 1, ®ion, - zink_filter(info->filter)); + /* VUID-vkCmdBlitImage-srcImage-00232: zs formats must use NEAREST filtering */ + util_format_is_depth_or_stencil(info->src.format) ? VK_FILTER_NEAREST : zink_filter(info->filter)); return true; } @@ -355,7 +356,7 @@ zink_blit(struct pipe_context *pctx, util_blitter_clear_depth_stencil(ctx->blitter, dst_view, PIPE_CLEAR_STENCIL, 0, 0, info->dst.box.x, info->dst.box.y, info->dst.box.width, info->dst.box.height); - zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS | ZINK_BLIT_SAVE_TEXTURES); + zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS | ZINK_BLIT_SAVE_TEXTURES | ZINK_BLIT_SAVE_FS_CONST_BUF); util_blitter_stencil_fallback(ctx->blitter, info->dst.resource, info->dst.level, @@ -390,8 +391,10 @@ zink_blit_begin(struct zink_context *ctx, enum zink_blit_flags flags) util_blitter_save_rasterizer(ctx->blitter, ctx->rast_state); util_blitter_save_so_targets(ctx->blitter, ctx->num_so_targets, ctx->so_targets); - if (flags & ZINK_BLIT_SAVE_FS) { + if (flags & ZINK_BLIT_SAVE_FS_CONST_BUF) util_blitter_save_fragment_constant_buffer_slot(ctx->blitter, ctx->ubos[MESA_SHADER_FRAGMENT]); + + if (flags & ZINK_BLIT_SAVE_FS) { util_blitter_save_blend(ctx->blitter, ctx->gfx_pipeline_state.blend_state); util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->dsa_state); util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref); diff --git a/lib/mesa/src/gallium/drivers/zink/zink_bo.h b/lib/mesa/src/gallium/drivers/zink/zink_bo.h index 42b1fc643..42e5ec225 100644 --- a/lib/mesa/src/gallium/drivers/zink/zink_bo.h +++ b/lib/mesa/src/gallium/drivers/zink/zink_bo.h @@ -30,7 +30,6 @@ #include "zink_batch.h" #define VK_VIS_VRAM (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) -#define VK_STAGING_RAM (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT) #define VK_LAZY_VRAM (VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) diff --git a/lib/mesa/src/gallium/drivers/zink/zink_clear.c b/lib/mesa/src/gallium/drivers/zink/zink_clear.c index cfb66df3f..1db046757 100644 --- a/lib/mesa/src/gallium/drivers/zink/zink_clear.c +++ b/lib/mesa/src/gallium/drivers/zink/zink_clear.c @@ -455,8 +455,10 @@ zink_clear_texture(struct pipe_context *pctx, util_blitter_save_framebuffer(ctx->blitter, &ctx->fb_state); set_clear_fb(pctx, surf, NULL); ctx->blitting = true; + ctx->queries_disabled = true; pctx->clear(pctx, PIPE_CLEAR_COLOR0, &scissor, &color, 0, 0); util_blitter_restore_fb_state(ctx->blitter); + ctx->queries_disabled = false; ctx->blitting = false; } else { float depth = 0.0; @@ -477,8 +479,10 @@ zink_clear_texture(struct pipe_context *pctx, util_blitter_save_framebuffer(ctx->blitter, &ctx->fb_state); ctx->blitting = true; set_clear_fb(pctx, NULL, surf); + ctx->queries_disabled = true; pctx->clear(pctx, flags, &scissor, NULL, depth, stencil); util_blitter_restore_fb_state(ctx->blitter); + ctx->queries_disabled = false; ctx->blitting = false; } /* this will never destroy the surface */ diff --git a/lib/mesa/src/gallium/drivers/zink/zink_compiler.c b/lib/mesa/src/gallium/drivers/zink/zink_compiler.c index c59c4d5ae..a23212aa8 100644 --- a/lib/mesa/src/gallium/drivers/zink/zink_compiler.c +++ b/lib/mesa/src/gallium/drivers/zink/zink_compiler.c @@ -2207,6 +2207,32 @@ prune_io(nir_shader *nir) } } +static bool +invert_point_coord_instr(nir_builder *b, nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_load_deref) + return false; + nir_variable *deref_var = nir_intrinsic_get_var(intr, 0); + if (deref_var->data.location != VARYING_SLOT_PNTC) + return false; + b->cursor = nir_after_instr(instr); + nir_ssa_def *def = nir_vec2(b, nir_channel(b, &intr->dest.ssa, 0), + nir_fsub(b, nir_imm_float(b, 1.0), nir_channel(b, &intr->dest.ssa, 1))); + nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr); + return true; +} + +static bool +invert_point_coord(nir_shader *nir) +{ + if (!(nir->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC))) + return false; + return nir_shader_instructions_pass(nir, invert_point_coord_instr, nir_metadata_dominance, NULL); +} + VkShaderModule zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *base_nir, const struct zink_shader_key *key) { @@ -2283,10 +2309,10 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shad if (zink_fs_key(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) { NIR_PASS_V(nir, lower_dual_blend); } - if (zink_fs_key(key)->coord_replace_bits) { - NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key(key)->coord_replace_bits, - false, zink_fs_key(key)->coord_replace_yinvert); - } + if (zink_fs_key(key)->coord_replace_bits) + NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key(key)->coord_replace_bits, false, false); + if (zink_fs_key(key)->point_coord_yinvert) + NIR_PASS_V(nir, invert_point_coord); if (zink_fs_key(key)->force_persample_interp || zink_fs_key(key)->fbfetch_ms) { nir_foreach_shader_in_variable(var, nir) var->data.sample = true; @@ -3368,7 +3394,7 @@ struct zink_shader * zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, const struct pipe_stream_output_info *so_info) { - struct zink_shader *ret = CALLOC_STRUCT(zink_shader); + struct zink_shader *ret = rzalloc(NULL, struct zink_shader); bool have_psiz = false; ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model; @@ -3482,6 +3508,8 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, ret->sinfo.sampler_mask = sampler_mask; } + unsigned ubo_binding_mask = 0; + unsigned ssbo_binding_mask = 0; foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) { if (_nir_shader_variable_has_mode(var, nir_var_uniform | nir_var_image | @@ -3504,13 +3532,14 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, if (!var->data.driver_location) { ret->has_uniforms = true; - } else { + } else if (!(ubo_binding_mask & BITFIELD_BIT(binding))) { ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location; ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding; ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype; ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type); assert(ret->bindings[ztype][ret->num_bindings[ztype]].size); ret->num_bindings[ztype]++; + ubo_binding_mask |= BITFIELD_BIT(binding); } } else if (var->data.mode == nir_var_mem_ssbo) { ztype = ZINK_DESCRIPTOR_TYPE_SSBO; @@ -3519,12 +3548,15 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, var->data.driver_location, screen->compact_descriptors); - ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location; - ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding; - ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type); - assert(ret->bindings[ztype][ret->num_bindings[ztype]].size); - ret->num_bindings[ztype]++; + if (!(ssbo_binding_mask & BITFIELD_BIT(var->data.binding))) { + ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location; + ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding; + ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type); + assert(ret->bindings[ztype][ret->num_bindings[ztype]].size); + ret->num_bindings[ztype]++; + ssbo_binding_mask |= BITFIELD_BIT(var->data.binding); + } } else { assert(var->data.mode == nir_var_uniform || var->data.mode == nir_var_image); @@ -3644,6 +3676,16 @@ zink_shader_free(struct zink_screen *screen, struct zink_shader *shader) prog->base.removed = true; simple_mtx_unlock(&prog->ctx->program_lock[idx]); util_queue_fence_wait(&prog->base.cache_fence); + + for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) { + for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) { + hash_table_foreach(&prog->pipelines[r][i], entry) { + struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data; + + util_queue_fence_wait(&pc_entry->fence); + } + } + } } if (stage != MESA_SHADER_TESS_CTRL || !shader->tcs.is_generated) { prog->shaders[stage] = NULL; @@ -3663,7 +3705,7 @@ zink_shader_free(struct zink_screen *screen, struct zink_shader *shader) _mesa_set_destroy(shader->programs, NULL); ralloc_free(shader->nir); ralloc_free(shader->spirv); - FREE(shader); + ralloc_free(shader); } @@ -3700,7 +3742,7 @@ void main() struct zink_shader * zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsigned vertices_per_patch) { - struct zink_shader *ret = CALLOC_STRUCT(zink_shader); + struct zink_shader *ret = rzalloc(NULL, struct zink_shader); ret->hash = _mesa_hash_pointer(ret); ret->programs = _mesa_pointer_set_create(NULL); simple_mtx_init(&ret->lock, mtx_plain); diff --git a/lib/mesa/src/gallium/drivers/zink/zink_context.c b/lib/mesa/src/gallium/drivers/zink/zink_context.c index 0cbb0ccad..18bd412f1 100644 --- a/lib/mesa/src/gallium/drivers/zink/zink_context.c +++ b/lib/mesa/src/gallium/drivers/zink/zink_context.c @@ -99,6 +99,9 @@ zink_context_destroy(struct pipe_context *pctx) struct zink_context *ctx = zink_context(pctx); struct zink_screen *screen = zink_screen(pctx->screen); + struct pipe_framebuffer_state fb = {0}; + pctx->set_framebuffer_state(pctx, &fb); + if (util_queue_is_initialized(&screen->flush_queue)) util_queue_finish(&screen->flush_queue); if (ctx->batch.state && !screen->device_lost) { @@ -1442,8 +1445,9 @@ zink_set_constant_buffer(struct pipe_context *pctx, ALWAYS_INLINE static void unbind_descriptor_reads(struct zink_resource *res, gl_shader_stage pstage) { - if (!res->sampler_binds[pstage] && !res->image_binds[pstage]) - res->barrier_access[pstage == MESA_SHADER_COMPUTE] &= ~VK_ACCESS_SHADER_READ_BIT; + bool is_compute = pstage == MESA_SHADER_COMPUTE; + if (!res->sampler_bind_count[is_compute] && !res->image_bind_count[is_compute]) + res->barrier_access[is_compute] &= ~VK_ACCESS_SHADER_READ_BIT; } ALWAYS_INLINE static void @@ -1520,7 +1524,8 @@ zink_set_shader_buffers(struct pipe_context *pctx, else new_res->obj->unordered_read = false; } else { - update = !!res; + if (res) + update = true; ssbo->buffer_offset = 0; ssbo->buffer_size = 0; if (res) { @@ -1609,6 +1614,7 @@ unbind_shader_image(struct zink_context *ctx, gl_shader_stage stage, unsigned sl zink_buffer_view_reference(zink_screen(ctx->base.screen), &image_view->buffer_view, NULL); } else { unbind_descriptor_stage(res, stage); + unbind_descriptor_reads(res, stage); if (!res->image_bind_count[is_compute]) check_for_layout_update(ctx, res, is_compute); zink_surface_reference(zink_screen(ctx->base.screen), &image_view->surface, NULL); @@ -1737,8 +1743,11 @@ zink_set_shader_images(struct pipe_context *pctx, res->image_bind_count[p_stage == MESA_SHADER_COMPUTE]++; update_res_bind_count(ctx, res, p_stage == MESA_SHADER_COMPUTE, false); unbind_shader_image(ctx, p_stage, start_slot + i); + image_view->surface = surface; + } else { + /* create_image_surface will always increment ref */ + zink_surface_reference(zink_screen(ctx->base.screen), &surface, NULL); } - image_view->surface = surface; finalize_image_bind(ctx, res, p_stage == MESA_SHADER_COMPUTE); zink_batch_resource_usage_set(&ctx->batch, res, zink_resource_access_is_write(access), false); @@ -2890,7 +2899,8 @@ unbind_fb_surface(struct zink_context *ctx, struct pipe_surface *surf, unsigned check_resource_for_batch_ref(ctx, res); if (res->sampler_bind_count[0]) { update_res_sampler_layouts(ctx, res); - _mesa_set_add(ctx->need_barriers[0], res); + if (res->layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) + _mesa_set_add(ctx->need_barriers[0], res); } } } @@ -3258,7 +3268,7 @@ pipeline_dst_stage(VkImageLayout layout) bool zink_resource_access_is_write(VkAccessFlags flags) { - return (flags & ALL_READ_ACCESS_FLAGS) != flags; + return (flags & ~ALL_READ_ACCESS_FLAGS) > 0; } bool @@ -4632,6 +4642,7 @@ zink_context_replace_buffer_storage(struct pipe_context *pctx, struct pipe_resou zink_batch_reference_resource(&ctx->batch, d); /* don't be too creative */ zink_resource_object_reference(screen, &d->obj, s->obj); + d->valid_buffer_range = s->valid_buffer_range; /* force counter buffer reset */ d->so_valid = false; if (num_rebinds && rebind_buffer(ctx, d, rebind_mask, num_rebinds) < num_rebinds) @@ -4682,6 +4693,13 @@ zink_get_dummy_pipe_surface(struct zink_context *ctx, int samples_index) { if (!ctx->dummy_surface[samples_index]) { ctx->dummy_surface[samples_index] = zink_surface_create_null(ctx, PIPE_TEXTURE_2D, 1024, 1024, BITFIELD_BIT(samples_index)); + /* This is possibly used with imageLoad which according to GL spec must return 0 */ + if (!samples_index) { + union pipe_color_union color = {0}; + struct pipe_box box; + u_box_2d(0, 0, 1024, 1024, &box); + ctx->base.clear_texture(&ctx->base, ctx->dummy_surface[samples_index]->texture, 0, &box, &color); + } } return ctx->dummy_surface[samples_index]; } @@ -4850,6 +4868,8 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) } ctx->gfx_pipeline_state.rendering_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO; ctx->gfx_pipeline_state.rendering_info.pColorAttachmentFormats = ctx->gfx_pipeline_state.rendering_formats; + ctx->gfx_pipeline_state.feedback_loop = screen->driver_workarounds.always_feedback_loop; + ctx->gfx_pipeline_state.feedback_loop_zs = screen->driver_workarounds.always_feedback_loop_zs; const uint32_t data[] = {0}; if (!is_copy_only) { diff --git a/lib/mesa/src/gallium/drivers/zink/zink_descriptors.c b/lib/mesa/src/gallium/drivers/zink/zink_descriptors.c index 6f5762b62..c19af2fe7 100644 --- a/lib/mesa/src/gallium/drivers/zink/zink_descriptors.c +++ b/lib/mesa/src/gallium/drivers/zink/zink_descriptors.c @@ -586,6 +586,8 @@ zink_descriptor_program_deinit(struct zink_screen *screen, struct zink_program * pg->dd.pool_key[i]->use_count--; pg->dd.pool_key[i] = NULL; } + } + for (unsigned i = 0; pg->num_dsl && i < ZINK_DESCRIPTOR_NON_BINDLESS_TYPES; i++) { if (pg->dd.templates[i]) { VKSCR(DestroyDescriptorUpdateTemplate)(screen->dev, pg->dd.templates[i], NULL); pg->dd.templates[i] = VK_NULL_HANDLE; @@ -972,7 +974,7 @@ zink_descriptors_update(struct zink_context *ctx, bool is_compute) /* bindless descriptors are context-based and get updated elsewhere */ if (pg->dd.bindless && unlikely(!ctx->dd.bindless_bound)) { VKCTX(CmdBindDescriptorSets)(ctx->batch.state->cmdbuf, is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS, - pg->layout, ZINK_DESCRIPTOR_BINDLESS, 1, &ctx->dd.bindless_set, + pg->layout, screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS], 1, &ctx->dd.bindless_set, 0, NULL); ctx->dd.bindless_bound = true; } @@ -1009,11 +1011,11 @@ void zink_batch_descriptor_deinit(struct zink_screen *screen, struct zink_batch_state *bs) { for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) { - while (util_dynarray_contains(&bs->dd.pools[i], struct zink_descriptor_pool_multi *)) { - struct zink_descriptor_pool_multi *mpool = util_dynarray_pop(&bs->dd.pools[i], struct zink_descriptor_pool_multi *); - if (mpool) { - deinit_multi_pool_overflow(screen, mpool); - multi_pool_destroy(screen, mpool); + for (unsigned j = 0; j < bs->dd.pools[i].capacity / sizeof(struct zink_descriptor_pool_multi *); j++) { + struct zink_descriptor_pool_multi **mppool = util_dynarray_element(&bs->dd.pools[i], struct zink_descriptor_pool_multi *, j); + if (mppool && *mppool) { + deinit_multi_pool_overflow(screen, *mppool); + multi_pool_destroy(screen, *mppool); } } util_dynarray_fini(&bs->dd.pools[i]); diff --git a/lib/mesa/src/gallium/drivers/zink/zink_kopper.c b/lib/mesa/src/gallium/drivers/zink/zink_kopper.c index 6070abb39..a67b7566a 100644 --- a/lib/mesa/src/gallium/drivers/zink/zink_kopper.c +++ b/lib/mesa/src/gallium/drivers/zink/zink_kopper.c @@ -297,7 +297,6 @@ kopper_CreateSwapchain(struct zink_screen *screen, struct kopper_displaytarget * *result = error; return NULL; } - cswap->max_acquires = cswap->scci.minImageCount - cdt->caps.minImageCount; cswap->last_present = UINT32_MAX; *result = VK_SUCCESS; @@ -320,6 +319,7 @@ kopper_GetSwapchainImages(struct zink_screen *screen, struct kopper_swapchain *c for (unsigned i = 0; i < cswap->num_images; i++) cswap->images[i].image = images[i]; } + cswap->max_acquires = cswap->num_images - cswap->scci.minImageCount + 1; return error; } @@ -490,7 +490,7 @@ kopper_acquire(struct zink_screen *screen, struct zink_resource *res, uint64_t t res->obj->access_stage = 0; } if (timeout == UINT64_MAX && util_queue_is_initialized(&screen->flush_queue) && - p_atomic_read_relaxed(&cdt->swapchain->num_acquires) > cdt->swapchain->max_acquires) { + p_atomic_read_relaxed(&cdt->swapchain->num_acquires) >= cdt->swapchain->max_acquires) { util_queue_fence_wait(&cdt->present_fence); } VkSemaphoreCreateInfo sci = { diff --git a/lib/mesa/src/gallium/drivers/zink/zink_program.c b/lib/mesa/src/gallium/drivers/zink/zink_program.c index 66e2161b0..1e742a31c 100644 --- a/lib/mesa/src/gallium/drivers/zink/zink_program.c +++ b/lib/mesa/src/gallium/drivers/zink/zink_program.c @@ -451,17 +451,37 @@ generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_scree } static uint32_t -hash_pipeline_lib(const void *key) +hash_pipeline_lib_generated_tcs(const void *key) { return 1; } + static bool -equals_pipeline_lib_optimal(const void *a, const void *b) +equals_pipeline_lib_generated_tcs(const void *a, const void *b) { return !memcmp(a, b, sizeof(uint32_t)); } +static uint32_t +hash_pipeline_lib(const void *key) +{ + const struct zink_gfx_library_key *gkey = key; + /* remove generated tcs bits */ + return zink_shader_key_optimal_no_tcs(gkey->optimal_key); +} + +static bool +equals_pipeline_lib(const void *a, const void *b) +{ + const struct zink_gfx_library_key *ak = a; + const struct zink_gfx_library_key *bk = b; + /* remove generated tcs bits */ + uint32_t val_a = zink_shader_key_optimal_no_tcs(ak->optimal_key); + uint32_t val_b = zink_shader_key_optimal_no_tcs(bk->optimal_key); + return val_a == val_b; +} + uint32_t hash_gfx_input_dynamic(const void *key) { @@ -866,20 +886,22 @@ zink_create_gfx_program(struct zink_context *ctx, prog->ctx = ctx; for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { - util_dynarray_init(&prog->shader_cache[i][0][0], NULL); - util_dynarray_init(&prog->shader_cache[i][0][1], NULL); - util_dynarray_init(&prog->shader_cache[i][1][0], NULL); - util_dynarray_init(&prog->shader_cache[i][1][1], NULL); + util_dynarray_init(&prog->shader_cache[i][0][0], prog); + util_dynarray_init(&prog->shader_cache[i][0][1], prog); + util_dynarray_init(&prog->shader_cache[i][1][0], prog); + util_dynarray_init(&prog->shader_cache[i][1][1], prog); if (stages[i]) { prog->shaders[i] = stages[i]; prog->stages_present |= BITFIELD_BIT(i); } } + bool generated_tcs = false; if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) { prog->shaders[MESA_SHADER_TESS_EVAL]->tes.generated = prog->shaders[MESA_SHADER_TESS_CTRL] = zink_shader_tcs_create(screen, stages[MESA_SHADER_VERTEX], vertices_per_patch); prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL); + generated_tcs = true; } prog->stages_remaining = prog->stages_present; @@ -902,7 +924,10 @@ zink_create_gfx_program(struct zink_context *ctx, } } - _mesa_set_init(&prog->libs, prog, hash_pipeline_lib, equals_pipeline_lib_optimal); + if (generated_tcs) + _mesa_set_init(&prog->libs, prog, hash_pipeline_lib_generated_tcs, equals_pipeline_lib_generated_tcs); + else + _mesa_set_init(&prog->libs, prog, hash_pipeline_lib, equals_pipeline_lib); struct mesa_sha1 sctx; _mesa_sha1_init(&sctx); @@ -986,8 +1011,8 @@ precompile_compute_job(void *data, void *gdata, int thread_index) assert(comp->module); comp->module->shader = zink_shader_compile(screen, comp->shader, comp->shader->nir, NULL); assert(comp->module->shader); - util_dynarray_init(&comp->shader_cache[0], NULL); - util_dynarray_init(&comp->shader_cache[1], NULL); + util_dynarray_init(&comp->shader_cache[0], comp); + util_dynarray_init(&comp->shader_cache[1], comp); struct blob blob = {0}; blob_init(&blob); @@ -1201,8 +1226,13 @@ zink_destroy_compute_program(struct zink_screen *screen, { deinit_program(screen, &comp->base); - if (comp->shader) - _mesa_set_remove_key(comp->shader->programs, comp); + assert(comp->shader); + assert(!comp->shader->spirv); + + _mesa_set_destroy(comp->shader->programs, NULL); + ralloc_free(comp->shader->nir); + ralloc_free(comp->shader); + destroy_shader_cache(screen, &comp->shader_cache[0]); destroy_shader_cache(screen, &comp->shader_cache[1]); @@ -1600,6 +1630,7 @@ zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *pr { struct zink_gfx_library_key *gkey = rzalloc(prog, struct zink_gfx_library_key); gkey->optimal_key = state->optimal_key; + assert(gkey->optimal_key); memcpy(gkey->modules, prog->modules, sizeof(gkey->modules)); gkey->pipeline = zink_create_gfx_pipeline_library(screen, prog); _mesa_set_add(&prog->libs, gkey); @@ -1695,6 +1726,8 @@ precompile_job(void *data, void *gdata, int thread_index) struct zink_gfx_pipeline_state state = {0}; state.shader_keys_optimal.key.vs_base.last_vertex_stage = true; + state.shader_keys_optimal.key.tcs.patch_vertices = 3; //random guess, generated tcs precompile is hard + state.optimal_key = state.shader_keys_optimal.key.val; generate_gfx_program_modules_optimal(NULL, screen, prog, &state); zink_screen_get_pipeline_cache(screen, &prog->base, true); zink_create_pipeline_lib(screen, prog, &state); diff --git a/lib/mesa/src/gallium/drivers/zink/zink_program.h b/lib/mesa/src/gallium/drivers/zink/zink_program.h index caa9c573d..12658458e 100644 --- a/lib/mesa/src/gallium/drivers/zink/zink_program.h +++ b/lib/mesa/src/gallium/drivers/zink/zink_program.h @@ -317,12 +317,12 @@ static inline void zink_set_fs_point_coord_key(struct zink_context *ctx) { const struct zink_fs_key *fs = zink_get_fs_key(ctx); - bool disable = ctx->gfx_pipeline_state.rast_prim != PIPE_PRIM_POINTS || !ctx->rast_state->base.sprite_coord_enable; + bool disable = ctx->gfx_pipeline_state.rast_prim != PIPE_PRIM_POINTS; uint8_t coord_replace_bits = disable ? 0 : ctx->rast_state->base.sprite_coord_enable; - bool coord_replace_yinvert = disable ? false : !!ctx->rast_state->base.sprite_coord_mode; - if (fs->coord_replace_bits != coord_replace_bits || fs->coord_replace_yinvert != coord_replace_yinvert) { + bool point_coord_yinvert = disable ? false : !!ctx->rast_state->base.sprite_coord_mode; + if (fs->coord_replace_bits != coord_replace_bits || fs->point_coord_yinvert != point_coord_yinvert) { zink_set_fs_key(ctx)->coord_replace_bits = coord_replace_bits; - zink_set_fs_key(ctx)->coord_replace_yinvert = coord_replace_yinvert; + zink_set_fs_key(ctx)->point_coord_yinvert = point_coord_yinvert; } } diff --git a/lib/mesa/src/gallium/drivers/zink/zink_query.c b/lib/mesa/src/gallium/drivers/zink/zink_query.c index 67ac1f915..e9dc921c2 100644 --- a/lib/mesa/src/gallium/drivers/zink/zink_query.c +++ b/lib/mesa/src/gallium/drivers/zink/zink_query.c @@ -1070,8 +1070,8 @@ zink_resume_queries(struct zink_context *ctx, struct zink_batch *batch) { struct zink_query *query, *next; LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) { - begin_query(ctx, batch, query); list_delinit(&query->active_list); + begin_query(ctx, batch, query); } } diff --git a/lib/mesa/src/gallium/drivers/zink/zink_resource.c b/lib/mesa/src/gallium/drivers/zink/zink_resource.c index eb7bb894c..56f445f63 100644 --- a/lib/mesa/src/gallium/drivers/zink/zink_resource.c +++ b/lib/mesa/src/gallium/drivers/zink/zink_resource.c @@ -105,6 +105,7 @@ zink_destroy_resource_object(struct zink_screen *screen, struct zink_resource_ob while (util_dynarray_contains(&obj->views, VkImageView)) VKSCR(DestroyImageView)(screen->dev, util_dynarray_pop(&obj->views, VkImageView), NULL); } + util_dynarray_fini(&obj->views); if (obj->is_buffer) { VKSCR(DestroyBuffer)(screen->dev, obj->buffer, NULL); VKSCR(DestroyBuffer)(screen->dev, obj->storage_buffer, NULL); @@ -190,6 +191,9 @@ create_bci(struct zink_screen *screen, const struct pipe_resource *templ, unsign VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT | VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT; + if (screen->info.have_KHR_buffer_device_address) + bci.usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; + if (bind & PIPE_BIND_SHADER_IMAGE) bci.usage |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; @@ -630,9 +634,12 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t #else external = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; #endif - } else { + } else if (screen->info.have_EXT_external_memory_dma_buf) { external = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; export_types |= VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + } else { + /* can't export anything, fail early */ + return NULL; } } @@ -672,6 +679,8 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; obj->is_buffer = true; obj->transfer_dst = true; + obj->vkflags = bci.flags; + obj->vkusage = bci.usage; } else { bool winsys_modifier = (export_types & VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT) && whandle && whandle->modifier != DRM_FORMAT_MOD_INVALID; uint64_t mods[10]; @@ -1283,7 +1292,7 @@ add_resource_bind(struct zink_context *ctx, struct zink_resource *res, unsigned } struct zink_resource_object *new_obj = resource_object_create(screen, &res->base.b, NULL, &res->linear, res->modifiers, res->modifiers_count, NULL); if (!new_obj) { - debug_printf("new backing resource alloc failed!"); + debug_printf("new backing resource alloc failed!\n"); res->base.b.bind &= ~bind; return false; } @@ -1293,11 +1302,6 @@ add_resource_bind(struct zink_context *ctx, struct zink_resource *res, unsigned res->layout = VK_IMAGE_LAYOUT_UNDEFINED; res->obj->access = 0; res->obj->access_stage = 0; - bool needs_unref = true; - if (zink_resource_has_usage(res)) { - zink_batch_reference_resource_move(&ctx->batch, res); - needs_unref = false; - } res->obj = new_obj; for (unsigned i = 0; i <= res->base.b.last_level; i++) { struct pipe_box box = {0, 0, 0, @@ -1306,8 +1310,7 @@ add_resource_bind(struct zink_context *ctx, struct zink_resource *res, unsigned box.depth = util_num_layers(&res->base.b, i); ctx->base.resource_copy_region(&ctx->base, &res->base.b, i, 0, 0, 0, &staging.base.b, i, &box); } - if (needs_unref) - zink_resource_object_reference(screen, &old_obj, NULL); + zink_resource_object_reference(screen, &old_obj, NULL); return true; } @@ -1638,7 +1641,7 @@ invalidate_buffer(struct zink_context *ctx, struct zink_resource *res) struct zink_resource_object *new_obj = resource_object_create(screen, &res->base.b, NULL, NULL, NULL, 0, NULL); if (!new_obj) { - debug_printf("new backing resource alloc failed!"); + debug_printf("new backing resource alloc failed!\n"); return false; } /* this ref must be transferred before rebind or else BOOM */ @@ -1864,9 +1867,7 @@ zink_buffer_map(struct pipe_context *pctx, goto success; usage |= PIPE_MAP_UNSYNCHRONIZED; } else if (!(usage & PIPE_MAP_UNSYNCHRONIZED) && - (((usage & PIPE_MAP_READ) && !(usage & PIPE_MAP_PERSISTENT) && - ((screen->info.mem_props.memoryTypes[res->obj->bo->base.placement].propertyFlags & VK_STAGING_RAM) != VK_STAGING_RAM)) || - !res->obj->host_visible)) { + (((usage & PIPE_MAP_READ) && !(usage & PIPE_MAP_PERSISTENT) && res->base.b.usage != PIPE_USAGE_STAGING) || !res->obj->host_visible)) { assert(!(usage & (TC_TRANSFER_MAP_THREADED_UNSYNC | PIPE_MAP_THREAD_SAFE))); if (!res->obj->host_visible || !(usage & PIPE_MAP_ONCE)) { overwrite: diff --git a/lib/mesa/src/gallium/drivers/zink/zink_screen.c b/lib/mesa/src/gallium/drivers/zink/zink_screen.c index 452f48dd4..3427b68cd 100644 --- a/lib/mesa/src/gallium/drivers/zink/zink_screen.c +++ b/lib/mesa/src/gallium/drivers/zink/zink_screen.c @@ -879,8 +879,12 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SPARSE_TEXTURE_FULL_ARRAY_CUBE_MIPMAPS: return screen->info.feats.features.sparseResidencyImage2D ? 1 : 0; case PIPE_CAP_QUERY_SPARSE_TEXTURE_RESIDENCY: + return screen->info.feats.features.sparseResidency2Samples && + screen->info.feats.features.shaderResourceResidency ? 1 : 0; case PIPE_CAP_CLAMP_SPARSE_TEXTURE_LOD: - return screen->info.feats.features.sparseResidency2Samples ? 1 : 0; + return screen->info.feats.features.shaderResourceMinLod && + screen->info.feats.features.sparseResidency2Samples && + screen->info.feats.features.shaderResourceResidency ? 1 : 0; case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS: return screen->info.props.limits.viewportSubPixelBits; diff --git a/lib/mesa/src/gallium/drivers/zink/zink_shader_keys.h b/lib/mesa/src/gallium/drivers/zink/zink_shader_keys.h index 295cbe3cf..fab6fb403 100644 --- a/lib/mesa/src/gallium/drivers/zink/zink_shader_keys.h +++ b/lib/mesa/src/gallium/drivers/zink/zink_shader_keys.h @@ -29,9 +29,9 @@ #include "compiler/shader_info.h" struct zink_vs_key_base { + bool last_vertex_stage : 1; bool clip_halfz : 1; bool push_drawid : 1; - bool last_vertex_stage : 1; uint8_t pad : 5; }; @@ -57,7 +57,7 @@ struct zink_vs_key { }; struct zink_fs_key { - bool coord_replace_yinvert : 1; + bool point_coord_yinvert : 1; bool samples : 1; bool force_dual_color_blend : 1; bool force_persample_interp : 1; @@ -107,6 +107,19 @@ union zink_shader_key_optimal { uint32_t val; }; +/* the default key has only last_vertex_stage set*/ +#define ZINK_SHADER_KEY_OPTIMAL_DEFAULT (1<<0) +/* Ignore patch_vertices bits that would only be used if we had to generate the missing TCS */ +static inline uint32_t +zink_shader_key_optimal_no_tcs(uint32_t key) +{ + union zink_shader_key_optimal k; + k.val = key; + k.tcs_bits = 0; + return k.val; +} +#define ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(key) (zink_shader_key_optimal_no_tcs(key) == ZINK_SHADER_KEY_OPTIMAL_DEFAULT) + static inline const struct zink_fs_key * zink_fs_key(const struct zink_shader_key *key) { diff --git a/lib/mesa/src/gallium/drivers/zink/zink_types.h b/lib/mesa/src/gallium/drivers/zink/zink_types.h index 93ae9ac8c..b05ad12ed 100644 --- a/lib/mesa/src/gallium/drivers/zink/zink_types.h +++ b/lib/mesa/src/gallium/drivers/zink/zink_types.h @@ -128,6 +128,7 @@ enum zink_blit_flags { ZINK_BLIT_SAVE_FB = 1 << 2, ZINK_BLIT_SAVE_TEXTURES = 1 << 3, ZINK_BLIT_NO_COND_RENDER = 1 << 4, + ZINK_BLIT_SAVE_FS_CONST_BUF = 1 << 5, }; /* descriptor types; also the ordering of the sets diff --git a/lib/mesa/src/gallium/frontends/dri/kopper.c b/lib/mesa/src/gallium/frontends/dri/kopper.c index 0102e2cf4..4abc5037b 100644 --- a/lib/mesa/src/gallium/frontends/dri/kopper.c +++ b/lib/mesa/src/gallium/frontends/dri/kopper.c @@ -194,7 +194,6 @@ fail: dri_destroy_screen_helper(screen); if (screen->dev) pipe_loader_release(&screen->dev, 1); - FREE(screen); return NULL; } @@ -608,6 +607,7 @@ XXX do this once swapinterval is hooked up unsigned bind; dri_drawable_get_format(drawable, statts[i], &format, &bind); + templ.format = format; /* the texture already exists or not requested */ if (!drawable->textures[statts[i]]) { @@ -619,7 +619,6 @@ XXX do this once swapinterval is hooked up if (format == PIPE_FORMAT_NONE) continue; - templ.format = format; templ.bind = bind; templ.nr_samples = 0; templ.nr_storage_samples = 0; @@ -646,7 +645,7 @@ XXX do this once swapinterval is hooked up } } if (drawable->stvis.samples > 1 && !drawable->msaa_textures[statts[i]]) { - templ.bind = templ.bind & + templ.bind = bind & ~(PIPE_BIND_SCANOUT | PIPE_BIND_SHARED | PIPE_BIND_DISPLAY_TARGET); templ.nr_samples = drawable->stvis.samples; templ.nr_storage_samples = drawable->stvis.samples; diff --git a/lib/mesa/src/gallium/frontends/lavapipe/lvp_formats.c b/lib/mesa/src/gallium/frontends/lavapipe/lvp_formats.c index 4a54a8981..6cc723d01 100644 --- a/lib/mesa/src/gallium/frontends/lavapipe/lvp_formats.c +++ b/lib/mesa/src/gallium/frontends/lavapipe/lvp_formats.c @@ -135,7 +135,7 @@ lvp_physical_device_get_format_properties(struct lvp_physical_device *physical_d PIPE_TEXTURE_2D, 0, 0, PIPE_BIND_RENDER_TARGET)) { features |= VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT; /* SNORM blending on llvmpipe fails CTS - disable for now */ - if (!util_format_is_snorm(pformat)) + if (!util_format_is_snorm(pformat) && !util_format_is_pure_integer(pformat)) features |= VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BLEND_BIT; } diff --git a/lib/mesa/src/gallium/frontends/lavapipe/lvp_inline_uniforms.c b/lib/mesa/src/gallium/frontends/lavapipe/lvp_inline_uniforms.c index 76c0b5175..1afb29be8 100644 --- a/lib/mesa/src/gallium/frontends/lavapipe/lvp_inline_uniforms.c +++ b/lib/mesa/src/gallium/frontends/lavapipe/lvp_inline_uniforms.c @@ -101,7 +101,7 @@ src_only_uses_uniforms(const nir_src *src, int component, /* Record the uniform offset. */ if (uni_offsets) - uni_offsets[ubo * MAX_INLINABLE_UNIFORMS + num_offsets[ubo]++] = offset; + uni_offsets[ubo * PIPE_MAX_CONSTANT_BUFFERS + num_offsets[ubo]++] = offset; return true; } return false; diff --git a/lib/mesa/src/gallium/frontends/rusticl/api/kernel.rs b/lib/mesa/src/gallium/frontends/rusticl/api/kernel.rs index 2e793a8cf..e9d5ec205 100644 --- a/lib/mesa/src/gallium/frontends/rusticl/api/kernel.rs +++ b/lib/mesa/src/gallium/frontends/rusticl/api/kernel.rs @@ -240,7 +240,11 @@ pub fn set_kernel_arg( return Err(CL_INVALID_ARG_SIZE); } } - KernelArgType::MemGlobal | KernelArgType::MemConstant => { + KernelArgType::MemGlobal + | KernelArgType::MemConstant + | KernelArgType::Image + | KernelArgType::RWImage + | KernelArgType::Texture => { if arg_size != std::mem::size_of::<cl_mem>() { return Err(CL_INVALID_ARG_SIZE); } diff --git a/lib/mesa/src/gallium/frontends/va/picture_vp9.c b/lib/mesa/src/gallium/frontends/va/picture_vp9.c index 3d5189a67..ff3da929f 100644 --- a/lib/mesa/src/gallium/frontends/va/picture_vp9.c +++ b/lib/mesa/src/gallium/frontends/va/picture_vp9.c @@ -37,12 +37,15 @@ void vlVaHandlePictureParameterBufferVP9(vlVaDriver *drv, vlVaContext *context, assert(buf->size >= sizeof(VADecPictureParameterBufferVP9) && buf->num_elements == 1); + context->desc.vp9.picture_parameter.prev_frame_width = context->desc.vp9.picture_parameter.frame_width; + context->desc.vp9.picture_parameter.prev_frame_height = context->desc.vp9.picture_parameter.frame_height; context->desc.vp9.picture_parameter.frame_width = vp9->frame_width; context->desc.vp9.picture_parameter.frame_height = vp9->frame_height; context->desc.vp9.picture_parameter.pic_fields.subsampling_x = vp9->pic_fields.bits.subsampling_x; context->desc.vp9.picture_parameter.pic_fields.subsampling_y = vp9->pic_fields.bits.subsampling_y; context->desc.vp9.picture_parameter.pic_fields.frame_type = vp9->pic_fields.bits.frame_type; + context->desc.vp9.picture_parameter.pic_fields.prev_show_frame = context->desc.vp9.picture_parameter.pic_fields.show_frame; context->desc.vp9.picture_parameter.pic_fields.show_frame = vp9->pic_fields.bits.show_frame; context->desc.vp9.picture_parameter.pic_fields.error_resilient_mode = vp9->pic_fields.bits.error_resilient_mode; context->desc.vp9.picture_parameter.pic_fields.intra_only = vp9->pic_fields.bits.intra_only; diff --git a/lib/mesa/src/intel/compiler/brw_mesh.cpp b/lib/mesa/src/intel/compiler/brw_mesh.cpp index cea5aef67..b1474c04b 100644 --- a/lib/mesa/src/intel/compiler/brw_mesh.cpp +++ b/lib/mesa/src/intel/compiler/brw_mesh.cpp @@ -1146,7 +1146,7 @@ emit_urb_indirect_reads(const fs_builder &bld, nir_intrinsic_instr *instr, retype(quarter(dest_comp, q), BRW_REGISTER_TYPE_UD), data, comp, - brw_imm_ud(4)); + brw_imm_ud(4 * REG_SIZE)); } } } diff --git a/lib/mesa/src/intel/ds/intel_driver_ds.cc b/lib/mesa/src/intel/ds/intel_driver_ds.cc index 344e7ed21..dc51faae0 100644 --- a/lib/mesa/src/intel/ds/intel_driver_ds.cc +++ b/lib/mesa/src/intel/ds/intel_driver_ds.cc @@ -185,12 +185,10 @@ static void send_descriptors(IntelRenderpassDataSource::TraceContext &ctx, struct intel_ds_device *device) { - struct intel_ds_queue *queue; - PERFETTO_LOG("Sending renderstage descriptors"); device->event_id = 0; - u_vector_foreach(queue, &device->queues) { + list_for_each_entry_safe(struct intel_ds_queue, queue, &device->queues, link) { for (uint32_t s = 0; s < ARRAY_SIZE(queue->stages); s++) { queue->stages[s].start_ns = 0; } @@ -222,7 +220,7 @@ send_descriptors(IntelRenderpassDataSource::TraceContext &ctx, } /* Emit all the IID picked at device/queue creation. */ - u_vector_foreach(queue, &device->queues) { + list_for_each_entry_safe(struct intel_ds_queue, queue, &device->queues, link) { for (unsigned s = 0; s < INTEL_DS_QUEUE_STAGE_N_STAGES; s++) { { /* We put the stage number in there so that all rows are order @@ -528,29 +526,26 @@ intel_ds_device_init(struct intel_ds_device *device, device->info = *devinfo; device->iid = get_iid(); device->api = api; - u_vector_init(&device->queues, 4, sizeof(struct intel_ds_queue)); + list_inithead(&device->queues); } void intel_ds_device_fini(struct intel_ds_device *device) { u_trace_context_fini(&device->trace_context); - u_vector_finish(&device->queues); } struct intel_ds_queue * -intel_ds_device_add_queue(struct intel_ds_device *device, - const char *fmt_name, - ...) +intel_ds_device_init_queue(struct intel_ds_device *device, + struct intel_ds_queue *queue, + const char *fmt_name, + ...) { - struct intel_ds_queue *queue = - (struct intel_ds_queue *) u_vector_add(&device->queues); va_list ap; memset(queue, 0, sizeof(*queue)); queue->device = device; - queue->queue_id = u_vector_length(&device->queues) - 1; va_start(ap, fmt_name); vsnprintf(queue->name, sizeof(queue->name), fmt_name, ap); @@ -561,6 +556,8 @@ intel_ds_device_add_queue(struct intel_ds_device *device, queue->stages[s].stage_iid = get_iid(); } + list_add(&queue->link, &device->queues); + return queue; } diff --git a/lib/mesa/src/intel/ds/intel_driver_ds.h b/lib/mesa/src/intel/ds/intel_driver_ds.h index ca03c6516..f88f5f7ee 100644 --- a/lib/mesa/src/intel/ds/intel_driver_ds.h +++ b/lib/mesa/src/intel/ds/intel_driver_ds.h @@ -107,7 +107,7 @@ struct intel_ds_device { struct u_trace_context trace_context; /* List of intel_ds_queue */ - struct u_vector queues; + struct list_head queues; }; struct intel_ds_stage { @@ -122,12 +122,11 @@ struct intel_ds_stage { }; struct intel_ds_queue { + struct list_head link; + /* Device this queue belongs to */ struct intel_ds_device *device; - /* Unique queue ID across the device */ - uint32_t queue_id; - /* Unique name of the queue */ char name[80]; @@ -158,9 +157,11 @@ void intel_ds_device_init(struct intel_ds_device *device, enum intel_ds_api api); void intel_ds_device_fini(struct intel_ds_device *device); -struct intel_ds_queue *intel_ds_device_add_queue(struct intel_ds_device *device, - const char *fmt_name, - ...); +struct intel_ds_queue * +intel_ds_device_init_queue(struct intel_ds_device *device, + struct intel_ds_queue *queue, + const char *fmt_name, + ...); void intel_ds_flush_data_init(struct intel_ds_flush_data *data, struct intel_ds_queue *queue, diff --git a/lib/mesa/src/intel/genxml/gen12.xml b/lib/mesa/src/intel/genxml/gen12.xml index bc066d48c..f3ecca813 100644 --- a/lib/mesa/src/intel/genxml/gen12.xml +++ b/lib/mesa/src/intel/genxml/gen12.xml @@ -886,7 +886,7 @@ <field name="Return Filter Weight for Null Texels" start="66" end="66" type="uint" /> <field name="Return Filter Weight for Border Texels" start="67" end="67" type="uint" /> <field name="Force gather4 Behavior" start="69" end="69" type="bool" /> - <field name="Border Color Pointer" start="70" end="87" type="offset" /> + <field name="Border Color Pointer" start="70" end="95" type="offset" /> <field name="TCZ Address Control Mode" start="96" end="98" type="Texture Coordinate Mode" /> <field name="TCY Address Control Mode" start="99" end="101" type="Texture Coordinate Mode" /> <field name="TCX Address Control Mode" start="102" end="104" type="Texture Coordinate Mode" /> diff --git a/lib/mesa/src/intel/genxml/gen125.xml b/lib/mesa/src/intel/genxml/gen125.xml index 6d27fb84b..1b9ad3332 100644 --- a/lib/mesa/src/intel/genxml/gen125.xml +++ b/lib/mesa/src/intel/genxml/gen125.xml @@ -1068,7 +1068,7 @@ <field name="Return Filter Weight for Null Texels" start="66" end="66" type="uint" /> <field name="Return Filter Weight for Border Texels" start="67" end="67" type="uint" /> <field name="Force gather4 Behavior" start="69" end="69" type="bool" /> - <field name="Border Color Pointer" start="70" end="87" type="offset" /> + <field name="Border Color Pointer" start="70" end="95" type="offset" /> <field name="TCZ Address Control Mode" start="96" end="98" type="Texture Coordinate Mode" /> <field name="TCY Address Control Mode" start="99" end="101" type="Texture Coordinate Mode" /> <field name="TCX Address Control Mode" start="102" end="104" type="Texture Coordinate Mode" /> @@ -6921,7 +6921,7 @@ <field name="Bindless Surface State Base Address Modify Enable" start="512" end="512" type="bool" /> <field name="Bindless Surface State MOCS" start="516" end="522" type="uint" nonzero="true" /> <field name="Bindless Surface State Base Address" start="524" end="575" type="address" /> - <field name="Bindless Surface State Size" start="588" end="607" type="uint" /> + <field name="Bindless Surface State Size" start="576" end="607" type="uint" /> <field name="Bindless Sampler State Base Address Modify Enable" start="608" end="608" type="bool" /> <field name="Bindless Sampler State MOCS" start="612" end="618" type="uint" nonzero="true" /> <field name="Bindless Sampler State Base Address" start="620" end="671" type="address" /> diff --git a/lib/mesa/src/intel/vulkan/anv_utrace.c b/lib/mesa/src/intel/vulkan/anv_utrace.c index 35a744dcb..3a35aefe4 100644 --- a/lib/mesa/src/intel/vulkan/anv_utrace.c +++ b/lib/mesa/src/intel/vulkan/anv_utrace.c @@ -111,7 +111,7 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, if (!flush) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - intel_ds_flush_data_init(&flush->ds, queue->ds, queue->ds->submission_id); + intel_ds_flush_data_init(&flush->ds, &queue->ds, queue->ds.submission_id); result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type, 0, 0, &flush->sync); @@ -284,8 +284,7 @@ anv_device_utrace_init(struct anv_device *device) for (uint32_t q = 0; q < device->queue_count; q++) { struct anv_queue *queue = &device->queues[q]; - queue->ds = - intel_ds_device_add_queue(&device->ds, "%s%u", + intel_ds_device_init_queue(&device->ds, &queue->ds, "%s%u", intel_engines_class_to_string(queue->family->engine_class), queue->index_in_family); } diff --git a/lib/mesa/src/intel/vulkan/genX_acceleration_structure.c b/lib/mesa/src/intel/vulkan/genX_acceleration_structure.c index 15f4fdbce..3958452f0 100644 --- a/lib/mesa/src/intel/vulkan/genX_acceleration_structure.c +++ b/lib/mesa/src/intel/vulkan/genX_acceleration_structure.c @@ -523,7 +523,10 @@ vk_to_grl_VertexFormat(VkFormat format) static struct Geo vk_to_grl_Geo(const VkAccelerationStructureGeometryKHR *pGeometry, - uint32_t prim_count) + uint32_t prim_count, + uint32_t transform_offset, + uint32_t primitive_offset, + uint32_t first_vertex) { struct Geo geo = { .Flags = vk_to_grl_GeometryFlags(pGeometry->flags), @@ -544,18 +547,25 @@ vk_to_grl_Geo(const VkAccelerationStructureGeometryKHR *pGeometry, vk_tri->vertexData.deviceAddress; geo.Desc.Triangles.VertexBufferByteStride = vk_tri->vertexStride; + if (geo.Desc.Triangles.pTransformBuffer) + geo.Desc.Triangles.pTransformBuffer += transform_offset; + if (vk_tri->indexType == VK_INDEX_TYPE_NONE_KHR) { geo.Desc.Triangles.IndexCount = 0; geo.Desc.Triangles.VertexCount = prim_count * 3; geo.Desc.Triangles.IndexFormat = INDEX_FORMAT_NONE; + geo.Desc.Triangles.pVertexBuffer += primitive_offset; } else { geo.Desc.Triangles.IndexCount = prim_count * 3; geo.Desc.Triangles.VertexCount = vk_tri->maxVertex; geo.Desc.Triangles.IndexFormat = vk_to_grl_IndexFormat(vk_tri->indexType); + geo.Desc.Triangles.pIndexBuffer += primitive_offset; } + geo.Desc.Triangles.VertexFormat = vk_to_grl_VertexFormat(vk_tri->vertexFormat); + geo.Desc.Triangles.pVertexBuffer += vk_tri->vertexStride * first_vertex; break; } @@ -563,7 +573,8 @@ vk_to_grl_Geo(const VkAccelerationStructureGeometryKHR *pGeometry, const VkAccelerationStructureGeometryAabbsDataKHR *vk_aabbs = &pGeometry->geometry.aabbs; geo.Type = GEOMETRY_TYPE_PROCEDURAL; - geo.Desc.Procedural.pAABBs_GPUVA = vk_aabbs->data.deviceAddress; + geo.Desc.Procedural.pAABBs_GPUVA = + vk_aabbs->data.deviceAddress + primitive_offset; geo.Desc.Procedural.AABBByteStride = vk_aabbs->stride; geo.Desc.Procedural.AABBCount = prim_count; break; @@ -818,7 +829,10 @@ cmd_build_acceleration_structures( for (unsigned g = 0; g < bs->num_geometries; g++) { const VkAccelerationStructureGeometryKHR *pGeometry = get_geometry(pInfo, g); uint32_t prim_count = pBuildRangeInfos[g].primitiveCount; - geos[g] = vk_to_grl_Geo(pGeometry, prim_count); + geos[g] = vk_to_grl_Geo(pGeometry, prim_count, + pBuildRangeInfos[g].transformOffset, + pBuildRangeInfos[g].primitiveOffset, + pBuildRangeInfos[g].firstVertex); prefixes[g] = prefix_sum; prefix_sum += prim_count; diff --git a/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c b/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c index 84ea02f48..34337c21f 100644 --- a/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c +++ b/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c @@ -679,7 +679,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE) || - BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_WRITE_MASKS) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES) || BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS)) { const uint8_t color_writes = dyn->cb.color_write_enables; const struct anv_cmd_graphics_state *state = &cmd_buffer->state.gfx; @@ -688,10 +689,14 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) && (color_writes & ((1u << state->color_att_count) - 1)) != 0; - uint32_t blend_dws[GENX(BLEND_STATE_length) + - MAX_RTS * GENX(BLEND_STATE_ENTRY_length)]; - uint32_t *dws = blend_dws; - memset(blend_dws, 0, sizeof(blend_dws)); + uint32_t num_dwords = GENX(BLEND_STATE_length) + + GENX(BLEND_STATE_ENTRY_length) * MAX_RTS; + struct anv_state blend_states = + anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, + num_dwords * 4, + 64); + + uint32_t *dws = blend_states.map; struct GENX(BLEND_STATE) blend_state = { .AlphaToCoverageEnable = dyn->ms.alpha_to_coverage_enable, @@ -720,10 +725,29 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) .WriteDisableBlue = write_disabled || (dyn->cb.attachments[i].write_mask & VK_COLOR_COMPONENT_B_BIT) == 0, + /* Vulkan specification 1.2.168, VkLogicOp: + * + * "Logical operations are controlled by the logicOpEnable and + * logicOp members of VkPipelineColorBlendStateCreateInfo. If + * logicOpEnable is VK_TRUE, then a logical operation selected + * by logicOp is applied between each color attachment and the + * fragment’s corresponding output value, and blending of all + * attachments is treated as if it were disabled." + * + * From the Broadwell PRM Volume 2d: Command Reference: + * Structures: BLEND_STATE_ENTRY: + * + * "Enabling LogicOp and Color Buffer Blending at the same time + * is UNDEFINED" + */ .LogicOpFunction = genX(vk_to_intel_logic_op)[dyn->cb.logic_op], .LogicOpEnable = dyn->cb.logic_op_enable, .ColorBufferBlendEnable = !dyn->cb.logic_op_enable && dyn->cb.attachments[i].blend_enable, + + .ColorClampRange = COLORCLAMP_RTFORMAT, + .PreBlendColorClampEnable = true, + .PostBlendColorClampEnable = true, }; /* Setup blend equation. */ @@ -791,7 +815,7 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) } /* Generate blend state after entries. */ - GENX(BLEND_STATE_pack)(NULL, blend_dws, &blend_state); + GENX(BLEND_STATE_pack)(NULL, blend_states.map, &blend_state); /* 3DSTATE_PS_BLEND to be consistent with the rest of the * BLEND_STATE_ENTRY. @@ -808,12 +832,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) blend.AlphaToCoverageEnable = dyn->ms.alpha_to_coverage_enable; } - uint32_t num_dwords = GENX(BLEND_STATE_length) + - GENX(BLEND_STATE_ENTRY_length) * MAX_RTS; - - struct anv_state blend_states = - anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws, - pipeline->gfx8.blend_state, num_dwords, 64); anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) { bsp.BlendStatePointer = blend_states.offset; bsp.BlendStatePointerValid = true; diff --git a/lib/mesa/src/intel/vulkan/grl/include/GRLOCLCompatibility.h b/lib/mesa/src/intel/vulkan/grl/include/GRLOCLCompatibility.h index dd9ff2c27..119104f15 100644 --- a/lib/mesa/src/intel/vulkan/grl/include/GRLOCLCompatibility.h +++ b/lib/mesa/src/intel/vulkan/grl/include/GRLOCLCompatibility.h @@ -179,17 +179,22 @@ inline float dot(const float3& a, const float3& b) { inline float as_float(uint32_t i) { - return *reinterpret_cast<float*>(&i); + union { float f; uint32_t i; } fi; + + fi.i = i; + return fi.f; } inline float3 as_float3(int3 i3) { - return *reinterpret_cast<float3*>(&i3); + float3 o = { as_float(i3.x), as_float(i3.y), as_float(i3.z) }; + return o; } inline float4 as_float4(int4 i4) { - return *reinterpret_cast<float4*>(&i4); + float4 o = { as_float(i4.x), as_float(i4.y), as_float(i4.z), as_float(i4.w) }; + return o; } inline float4 convert_float4_rtn(int4 i4) diff --git a/lib/mesa/src/intel/vulkan_hasvk/anv_batch_chain.c b/lib/mesa/src/intel/vulkan_hasvk/anv_batch_chain.c index 1a14a34f0..459f0df9d 100644 --- a/lib/mesa/src/intel/vulkan_hasvk/anv_batch_chain.c +++ b/lib/mesa/src/intel/vulkan_hasvk/anv_batch_chain.c @@ -2393,14 +2393,14 @@ anv_queue_submit(struct vk_queue *vk_queue, return VK_SUCCESS; } - uint64_t start_ts = intel_ds_begin_submit(queue->ds); + uint64_t start_ts = intel_ds_begin_submit(&queue->ds); pthread_mutex_lock(&device->mutex); result = anv_queue_submit_locked(queue, submit); /* Take submission ID under lock */ pthread_mutex_unlock(&device->mutex); - intel_ds_end_submit(queue->ds, start_ts); + intel_ds_end_submit(&queue->ds, start_ts); return result; } diff --git a/lib/mesa/src/intel/vulkan_hasvk/anv_image.c b/lib/mesa/src/intel/vulkan_hasvk/anv_image.c index f10f46454..807c897cb 100644 --- a/lib/mesa/src/intel/vulkan_hasvk/anv_image.c +++ b/lib/mesa/src/intel/vulkan_hasvk/anv_image.c @@ -375,6 +375,13 @@ can_fast_clear_with_non_zero_color(const struct intel_device_info *devinfo, uint32_t plane, const VkImageFormatListCreateInfo *fmt_list) { + /* Triangles rendered on non-zero fast cleared images with 8xMSAA can get + * black pixels around them on Haswell. + */ + if (devinfo->ver == 7 && image->vk.samples == 8) { + return false; + } + /* If we don't have an AUX surface where fast clears apply, we can return * early. */ diff --git a/lib/mesa/src/intel/vulkan_hasvk/anv_pipeline.c b/lib/mesa/src/intel/vulkan_hasvk/anv_pipeline.c index 58fb1c74c..03a9338df 100644 --- a/lib/mesa/src/intel/vulkan_hasvk/anv_pipeline.c +++ b/lib/mesa/src/intel/vulkan_hasvk/anv_pipeline.c @@ -108,7 +108,7 @@ anv_shader_stage_to_nir(struct anv_device *device, .subgroup_shuffle = true, .subgroup_vote = true, .tessellation = true, - .transform_feedback = pdevice->info.ver >= 8, + .transform_feedback = true, .variable_pointers = true, .vk_memory_model = true, .vk_memory_model_device_scope = true, diff --git a/lib/mesa/src/intel/vulkan_hasvk/anv_private.h b/lib/mesa/src/intel/vulkan_hasvk/anv_private.h index 0367cefec..39663e858 100644 --- a/lib/mesa/src/intel/vulkan_hasvk/anv_private.h +++ b/lib/mesa/src/intel/vulkan_hasvk/anv_private.h @@ -1074,7 +1074,7 @@ struct anv_queue { /** Synchronization object for debug purposes (DEBUG_SYNC) */ struct vk_sync *sync; - struct intel_ds_queue * ds; + struct intel_ds_queue ds; }; struct nir_xfb_info; @@ -2314,14 +2314,25 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device, pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; } break; - case VK_ACCESS_2_SHADER_READ_BIT: case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT: case VK_ACCESS_2_TRANSFER_READ_BIT: + case VK_ACCESS_2_SHADER_SAMPLED_READ_BIT: /* Transitioning a buffer to be read through the sampler, so * invalidate the texture cache, we don't want any stale data. */ pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; break; + case VK_ACCESS_2_SHADER_READ_BIT: + /* Same as VK_ACCESS_2_UNIFORM_READ_BIT and + * VK_ACCESS_2_SHADER_SAMPLED_READ_BIT cases above + */ + pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | + ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; + if (!device->physical->compiler->indirect_ubos_use_sampler) { + pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; + pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT; + } + break; case VK_ACCESS_2_MEMORY_READ_BIT: /* Transitioning a buffer for generic read, invalidate all the * caches. @@ -2360,6 +2371,7 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device, */ pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT; break; + case VK_ACCESS_2_SHADER_STORAGE_READ_BIT: default: break; /* Nothing to do */ } diff --git a/lib/mesa/src/intel/vulkan_hasvk/anv_utrace.c b/lib/mesa/src/intel/vulkan_hasvk/anv_utrace.c index 35a744dcb..3a35aefe4 100644 --- a/lib/mesa/src/intel/vulkan_hasvk/anv_utrace.c +++ b/lib/mesa/src/intel/vulkan_hasvk/anv_utrace.c @@ -111,7 +111,7 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue, if (!flush) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - intel_ds_flush_data_init(&flush->ds, queue->ds, queue->ds->submission_id); + intel_ds_flush_data_init(&flush->ds, &queue->ds, queue->ds.submission_id); result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type, 0, 0, &flush->sync); @@ -284,8 +284,7 @@ anv_device_utrace_init(struct anv_device *device) for (uint32_t q = 0; q < device->queue_count; q++) { struct anv_queue *queue = &device->queues[q]; - queue->ds = - intel_ds_device_add_queue(&device->ds, "%s%u", + intel_ds_device_init_queue(&device->ds, &queue->ds, "%s%u", intel_engines_class_to_string(queue->family->engine_class), queue->index_in_family); } diff --git a/lib/mesa/src/intel/vulkan_hasvk/genX_cmd_buffer.c b/lib/mesa/src/intel/vulkan_hasvk/genX_cmd_buffer.c index df815c55b..9a60486fa 100644 --- a/lib/mesa/src/intel/vulkan_hasvk/genX_cmd_buffer.c +++ b/lib/mesa/src/intel/vulkan_hasvk/genX_cmd_buffer.c @@ -690,7 +690,8 @@ vk_image_layout_stencil_write_optimal(VkImageLayout layout) { return layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || layout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL || - layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL; + layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL || + layout == VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL; } #endif @@ -721,6 +722,7 @@ transition_stencil_buffer(struct anv_cmd_buffer *cmd_buffer, * - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL * - VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL * - VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL + * - VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL * * For general, we have no nice opportunity to transition so we do the copy * to the shadow unconditionally at the end of the subpass. For transfer @@ -6701,6 +6703,7 @@ void genX(CmdEndRendering)( * - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL * - VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL * - VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL + * - VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL * - VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT * * For general, we have no nice opportunity to transition so we do the copy diff --git a/lib/mesa/src/loader/meson.build b/lib/mesa/src/loader/meson.build index 6334cb981..e5a8ceac1 100644 --- a/lib/mesa/src/loader/meson.build +++ b/lib/mesa/src/loader/meson.build @@ -28,7 +28,7 @@ if with_platform_x11 and with_dri3 include_directories : [inc_include, inc_src], dependencies : [ dep_libdrm, dep_xcb_dri3, dep_xcb_present, dep_xcb_sync, dep_xshmfence, - dep_xcb_xfixes, + dep_xcb_xfixes, dep_xcb_xrandr, ], build_by_default : false, ) diff --git a/lib/mesa/src/mapi/glapi/gen/meson.build b/lib/mesa/src/mapi/glapi/gen/meson.build index 8866701e4..7baa275ed 100644 --- a/lib/mesa/src/mapi/glapi/gen/meson.build +++ b/lib/mesa/src/mapi/glapi/gen/meson.build @@ -22,7 +22,6 @@ glapi_gen_gl_xml = files('../registry/gl.xml') glapi_gen_mapi_deps = [ glapi_gen_gl_xml, genCommon_py, - glapi_gen_gl_xml, ] gl_and_es_api_files = files('gl_and_es_API.xml') diff --git a/lib/mesa/src/mesa/main/consts_exts.h b/lib/mesa/src/mesa/main/consts_exts.h index 11221ca58..105c762e1 100644 --- a/lib/mesa/src/mesa/main/consts_exts.h +++ b/lib/mesa/src/mesa/main/consts_exts.h @@ -936,6 +936,9 @@ struct gl_constants /** GL_ARB_get_program_binary */ GLuint NumProgramBinaryFormats; + /** GL_ARB_gl_spirv */ + GLuint NumShaderBinaryFormats; + /** GL_NV_conservative_raster */ GLuint MaxSubpixelPrecisionBiasBits; diff --git a/lib/mesa/src/mesa/main/draw.c b/lib/mesa/src/mesa/main/draw.c index 87d88a81c..d97ff82b8 100644 --- a/lib/mesa/src/mesa/main/draw.c +++ b/lib/mesa/src/mesa/main/draw.c @@ -1971,9 +1971,11 @@ _mesa_validated_multidrawelements(struct gl_context *ctx, GLenum mode, min_index_ptr = (uintptr_t) indices[0]; max_index_ptr = 0; for (i = 0; i < primcount; i++) { - min_index_ptr = MIN2(min_index_ptr, (uintptr_t) indices[i]); - max_index_ptr = MAX2(max_index_ptr, (uintptr_t) indices[i] + - (count[i] << index_size_shift)); + if (count[i]) { + min_index_ptr = MIN2(min_index_ptr, (uintptr_t) indices[i]); + max_index_ptr = MAX2(max_index_ptr, (uintptr_t) indices[i] + + (count[i] << index_size_shift)); + } } /* Check if we can handle this thing as a bunch of index offsets from the @@ -1984,7 +1986,8 @@ _mesa_validated_multidrawelements(struct gl_context *ctx, GLenum mode, */ if (index_size_shift) { for (i = 0; i < primcount; i++) { - if ((((uintptr_t) indices[i] - min_index_ptr) & + if (count[i] && + (((uintptr_t)indices[i] - min_index_ptr) & ((1 << index_size_shift) - 1)) != 0) { fallback = true; break; diff --git a/lib/mesa/src/mesa/state_tracker/st_extensions.h b/lib/mesa/src/mesa/state_tracker/st_extensions.h index 7bf1aa8c8..fdfac7ece 100644 --- a/lib/mesa/src/mesa/state_tracker/st_extensions.h +++ b/lib/mesa/src/mesa/state_tracker/st_extensions.h @@ -35,7 +35,8 @@ struct pipe_screen; extern void st_init_limits(struct pipe_screen *screen, struct gl_constants *c, - struct gl_extensions *extensions); + struct gl_extensions *extensions, + gl_api api); extern void st_init_extensions(struct pipe_screen *screen, struct gl_constants *consts, diff --git a/lib/mesa/src/microsoft/vulkan/dzn_descriptor_set.c b/lib/mesa/src/microsoft/vulkan/dzn_descriptor_set.c index fe7b7b212..660933fed 100644 --- a/lib/mesa/src/microsoft/vulkan/dzn_descriptor_set.c +++ b/lib/mesa/src/microsoft/vulkan/dzn_descriptor_set.c @@ -645,7 +645,7 @@ dzn_pipeline_layout_create(struct dzn_device *device, D3D12_ROOT_PARAMETER1 root_params[MAX_ROOT_PARAMS] = { 0 }; D3D12_DESCRIPTOR_RANGE1 *range_ptr = ranges; D3D12_ROOT_PARAMETER1 *root_param; - uint32_t root_dwords = 0; + ASSERTED uint32_t root_dwords = 0; for (uint32_t i = 0; i < MAX_SHADER_VISIBILITIES; i++) { dzn_foreach_pool_type (type) { diff --git a/lib/mesa/src/panfrost/lib/pan_props.c b/lib/mesa/src/panfrost/lib/pan_props.c index 048954b4c..e37c68258 100644 --- a/lib/mesa/src/panfrost/lib/pan_props.c +++ b/lib/mesa/src/panfrost/lib/pan_props.c @@ -341,6 +341,7 @@ panfrost_close_device(struct panfrost_device *dev) if (dev->model) { pthread_mutex_destroy(&dev->submit_lock); panfrost_bo_unreference(dev->tiler_heap); + panfrost_bo_unreference(dev->sample_positions); panfrost_bo_cache_evict_all(dev); pthread_mutex_destroy(&dev->bo_cache.lock); util_sparse_array_finish(&dev->bo_map); diff --git a/lib/mesa/src/util/00-mesa-defaults.conf b/lib/mesa/src/util/00-mesa-defaults.conf index bee0e9ae2..6831de3ba 100644 --- a/lib/mesa/src/util/00-mesa-defaults.conf +++ b/lib/mesa/src/util/00-mesa-defaults.conf @@ -948,6 +948,13 @@ TODO: document the other workarounds. <option name="radeonsi_zerovram" value="true" /> </application> </device> + <device driver="zink"> + <application name="Hyperdimension Neptunia Re;Birth1" executable="NeptuniaReBirth1.exe"> + <!-- glthread uploads need too much vram and exceed 32bit VA limit --> + <!-- https://gitlab.freedesktop.org/mesa/mesa/-/issues/8333 --> + <option name="mesa_glthread" value="false"/> + </application> + </device> <device driver="iris"> <application name="Middle Earth: Shadow of Mordor" executable="ShadowOfMordor"> <option name="vs_position_always_invariant" value="true" /> @@ -987,6 +994,12 @@ TODO: document the other workarounds. <application name="Batman™: Arkham Knight" executable="BatmanAK.exe"> <option name="anv_sample_mask_out_opengl_behaviour" value="true"/> </application> + <application name="Rise of the Tomb Raider" executable="RiseOfTheTombRaider"> + <option name="limit_trig_input_range" value="true" /> + </application> + <application name="Rise of the Tomb Raider" executable="ROTTR.exe"> + <option name="limit_trig_input_range" value="true" /> + </application> </device> <device driver="r600"> diff --git a/lib/mesa/src/util/disk_cache_os.c b/lib/mesa/src/util/disk_cache_os.c index 6ef0e0dd4..158501045 100644 --- a/lib/mesa/src/util/disk_cache_os.c +++ b/lib/mesa/src/util/disk_cache_os.c @@ -200,12 +200,24 @@ choose_lru_file_matching(const char *dir_path, if (dir == NULL) return NULL; + const int dir_fd = dirfd(dir); + /* First count the number of files in the directory */ unsigned total_file_count = 0; while ((dir_ent = readdir(dir)) != NULL) { +#ifdef HAVE_DIRENT_D_TYPE if (dir_ent->d_type == DT_REG) { /* If the entry is a regular file */ total_file_count++; } +#else + struct stat st; + + if (fstatat(dir_fd, dir_ent->d_name, &st, AT_SYMLINK_NOFOLLOW) == 0) { + if (S_ISREG(st.st_mode)) { + total_file_count++; + } + } +#endif } /* Reset to the start of the directory */ @@ -225,7 +237,7 @@ choose_lru_file_matching(const char *dir_path, break; struct stat sb; - if (fstatat(dirfd(dir), dir_ent->d_name, &sb, 0) == 0) { + if (fstatat(dir_fd, dir_ent->d_name, &sb, 0) == 0) { struct lru_file *entry = NULL; if (!list_is_empty(lru_file_list)) entry = list_first_entry(lru_file_list, struct lru_file, node); diff --git a/lib/mesa/src/virtio/vulkan/vn_physical_device.c b/lib/mesa/src/virtio/vulkan/vn_physical_device.c index e45fd5e48..9ffff258e 100644 --- a/lib/mesa/src/virtio/vulkan/vn_physical_device.c +++ b/lib/mesa/src/virtio/vulkan/vn_physical_device.c @@ -1724,6 +1724,7 @@ vn_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, CASE(CUSTOM_BORDER_COLOR_FEATURES_EXT, custom_border_color); CASE(DEPTH_CLIP_CONTROL_FEATURES_EXT, depth_clip_control); CASE(DEPTH_CLIP_ENABLE_FEATURES_EXT, depth_clip_enable); + CASE(IMAGE_VIEW_MIN_LOD_FEATURES_EXT, image_view_min_lod); CASE(INDEX_TYPE_UINT8_FEATURES_EXT, index_type_uint8); CASE(LINE_RASTERIZATION_FEATURES_EXT, line_rasterization); CASE(MULTI_DRAW_FEATURES_EXT, multi_draw); diff --git a/lib/mesa/src/vulkan/runtime/vk_graphics_state.c b/lib/mesa/src/vulkan/runtime/vk_graphics_state.c index 80889b6ed..3679281e1 100644 --- a/lib/mesa/src/vulkan/runtime/vk_graphics_state.c +++ b/lib/mesa/src/vulkan/runtime/vk_graphics_state.c @@ -1156,10 +1156,25 @@ vk_graphics_pipeline_state_fill(const struct vk_device *device, */ VkGraphicsPipelineLibraryFlagsEXT lib; - if (info->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) { - const VkGraphicsPipelineLibraryCreateInfoEXT *gfx_lib_info = - vk_find_struct_const(info->pNext, GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT); - lib = gfx_lib_info->flags; + const VkGraphicsPipelineLibraryCreateInfoEXT *gpl_info = + vk_find_struct_const(info->pNext, GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT); + const VkPipelineLibraryCreateInfoKHR *lib_info = + vk_find_struct_const(info->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR); + + if (gpl_info) { + lib = gpl_info->flags; + } else if ((lib_info && lib_info->libraryCount > 0) || + (info->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)) { + /* + * From the Vulkan 1.3.210 spec: + * "If this structure is omitted, and either VkGraphicsPipelineCreateInfo::flags + * includes VK_PIPELINE_CREATE_LIBRARY_BIT_KHR or the + * VkGraphicsPipelineCreateInfo::pNext chain includes a + * VkPipelineLibraryCreateInfoKHR structure with a libraryCount greater than 0, + * it is as if flags is 0. Otherwise if this structure is omitted, it is as if + * flags includes all possible subsets of the graphics pipeline." + */ + lib = 0; } else { /* We're building a complete pipeline. From the Vulkan 1.3.218 spec: * @@ -2527,7 +2542,7 @@ vk_common_CmdSetColorWriteMaskEXT(VkCommandBuffer commandBuffer, uint32_t a = firstAttachment + i; assert(a < ARRAY_SIZE(dyn->cb.attachments)); - SET_DYN_VALUE(dyn, CB_BLEND_EQUATIONS, + SET_DYN_VALUE(dyn, CB_WRITE_MASKS, cb.attachments[a].write_mask, pColorWriteMasks[i]); } } diff --git a/lib/mesa/src/vulkan/wsi/wsi_common_display.c b/lib/mesa/src/vulkan/wsi/wsi_common_display.c index 0e5d27278..4d92cd13c 100644 --- a/lib/mesa/src/vulkan/wsi/wsi_common_display.c +++ b/lib/mesa/src/vulkan/wsi/wsi_common_display.c @@ -294,6 +294,8 @@ wsi_display_alloc_connector(struct wsi_display *wsi, struct wsi_display_connector *connector = vk_zalloc(wsi->alloc, sizeof (struct wsi_display_connector), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!connector) + return NULL; connector->id = connector_id; connector->wsi = wsi; |