summaryrefslogtreecommitdiff
path: root/lib/mesa/src
diff options
context:
space:
mode:
Diffstat (limited to 'lib/mesa/src')
-rw-r--r--lib/mesa/src/amd/addrlib/src/amdgpu_asic_addr.h6
-rw-r--r--lib/mesa/src/amd/addrlib/src/gfx11/gfx11addrlib.cpp3
-rw-r--r--lib/mesa/src/amd/ci/radeonsi-raven-fails.txt1
-rw-r--r--lib/mesa/src/amd/common/ac_nir_lower_ngg.c50
-rw-r--r--lib/mesa/src/amd/common/ac_rgp.c18
-rw-r--r--lib/mesa/src/amd/common/ac_shadowed_regs.c4
-rw-r--r--lib/mesa/src/amd/common/amd_family.c6
-rw-r--r--lib/mesa/src/amd/compiler/aco_insert_NOPs.cpp25
-rw-r--r--lib/mesa/src/amd/compiler/aco_opcodes.py180
-rw-r--r--lib/mesa/src/amd/compiler/aco_reduce_assign.cpp45
-rw-r--r--lib/mesa/src/amd/compiler/aco_register_allocation.cpp7
-rw-r--r--lib/mesa/src/amd/compiler/aco_scheduler.cpp2
-rw-r--r--lib/mesa/src/amd/compiler/aco_spill.cpp129
-rw-r--r--lib/mesa/src/amd/compiler/aco_ssa_elimination.cpp9
-rw-r--r--lib/mesa/src/amd/llvm/ac_nir_to_llvm.c2
-rw-r--r--lib/mesa/src/amd/registers/gfx11.json8
-rw-r--r--lib/mesa/src/amd/registers/parse_kernel_headers.py28
-rw-r--r--lib/mesa/src/amd/vulkan/layers/radv_rage2.c54
-rw-r--r--lib/mesa/src/amd/vulkan/meson.build12
-rw-r--r--lib/mesa/src/amd/vulkan/radv_sdma_copy_image.c7
-rw-r--r--lib/mesa/src/amd/vulkan/vulkan-android.sym16
-rw-r--r--lib/mesa/src/broadcom/common/v3d_limits.h2
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_cl.c18
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c7
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dv_private.h3
-rw-r--r--lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c16
-rw-r--r--lib/mesa/src/compiler/nir/nir_deref.c7
-rw-r--r--lib/mesa/src/compiler/nir/nir_range_analysis.c52
-rw-r--r--lib/mesa/src/etnaviv/drm/etnaviv_bo.c4
-rw-r--r--lib/mesa/src/etnaviv/drm/etnaviv_cmd_stream.c2
-rw-r--r--lib/mesa/src/freedreno/vulkan/tu_cmd_buffer.c5
-rw-r--r--lib/mesa/src/freedreno/vulkan/tu_pipeline.c3
-rw-r--r--lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c5
-rw-r--r--lib/mesa/src/gallium/drivers/crocus/crocus_context.c2
-rw-r--r--lib/mesa/src/gallium/drivers/crocus/crocus_resource.c7
-rw-r--r--lib/mesa/src/gallium/drivers/d3d12/d3d12_video_dec_vp9.cpp14
-rw-r--r--lib/mesa/src/gallium/drivers/d3d12/d3d12_video_dec_vp9.h3
-rw-r--r--lib/mesa/src/gallium/drivers/iris/iris_batch.c6
-rw-r--r--lib/mesa/src/gallium/drivers/iris/iris_batch.h2
-rw-r--r--lib/mesa/src/gallium/drivers/iris/iris_state.c15
-rw-r--r--lib/mesa/src/gallium/drivers/iris/iris_utrace.c7
-rw-r--r--lib/mesa/src/gallium/drivers/lima/lima_resource.c26
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/driinfo_radeonsi.h1
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/radeon_vcn_dec.c17
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_sdma_copy_image.c2
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c16
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c3
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c3
-rw-r--r--lib/mesa/src/gallium/drivers/radeonsi/si_state_shaders.cpp5
-rw-r--r--lib/mesa/src/gallium/drivers/zink/VP_ZINK_requirements.json50
-rw-r--r--lib/mesa/src/gallium/drivers/zink/ci/zink-lvp-fails.txt30
-rw-r--r--lib/mesa/src/gallium/drivers/zink/ci/zink-radv-fails.txt3
-rw-r--r--lib/mesa/src/gallium/drivers/zink/zink_blit.c11
-rw-r--r--lib/mesa/src/gallium/drivers/zink/zink_bo.h1
-rw-r--r--lib/mesa/src/gallium/drivers/zink/zink_clear.c4
-rw-r--r--lib/mesa/src/gallium/drivers/zink/zink_compiler.c70
-rw-r--r--lib/mesa/src/gallium/drivers/zink/zink_context.c32
-rw-r--r--lib/mesa/src/gallium/drivers/zink/zink_descriptors.c14
-rw-r--r--lib/mesa/src/gallium/drivers/zink/zink_kopper.c4
-rw-r--r--lib/mesa/src/gallium/drivers/zink/zink_program.c55
-rw-r--r--lib/mesa/src/gallium/drivers/zink/zink_program.h8
-rw-r--r--lib/mesa/src/gallium/drivers/zink/zink_query.c2
-rw-r--r--lib/mesa/src/gallium/drivers/zink/zink_resource.c27
-rw-r--r--lib/mesa/src/gallium/drivers/zink/zink_screen.c6
-rw-r--r--lib/mesa/src/gallium/drivers/zink/zink_shader_keys.h17
-rw-r--r--lib/mesa/src/gallium/drivers/zink/zink_types.h1
-rw-r--r--lib/mesa/src/gallium/frontends/dri/kopper.c5
-rw-r--r--lib/mesa/src/gallium/frontends/lavapipe/lvp_formats.c2
-rw-r--r--lib/mesa/src/gallium/frontends/lavapipe/lvp_inline_uniforms.c2
-rw-r--r--lib/mesa/src/gallium/frontends/rusticl/api/kernel.rs6
-rw-r--r--lib/mesa/src/gallium/frontends/va/picture_vp9.c3
-rw-r--r--lib/mesa/src/intel/compiler/brw_mesh.cpp2
-rw-r--r--lib/mesa/src/intel/ds/intel_driver_ds.cc21
-rw-r--r--lib/mesa/src/intel/ds/intel_driver_ds.h15
-rw-r--r--lib/mesa/src/intel/genxml/gen12.xml2
-rw-r--r--lib/mesa/src/intel/genxml/gen125.xml4
-rw-r--r--lib/mesa/src/intel/vulkan/anv_utrace.c5
-rw-r--r--lib/mesa/src/intel/vulkan/genX_acceleration_structure.c20
-rw-r--r--lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c42
-rw-r--r--lib/mesa/src/intel/vulkan/grl/include/GRLOCLCompatibility.h11
-rw-r--r--lib/mesa/src/intel/vulkan_hasvk/anv_batch_chain.c4
-rw-r--r--lib/mesa/src/intel/vulkan_hasvk/anv_image.c7
-rw-r--r--lib/mesa/src/intel/vulkan_hasvk/anv_pipeline.c2
-rw-r--r--lib/mesa/src/intel/vulkan_hasvk/anv_private.h16
-rw-r--r--lib/mesa/src/intel/vulkan_hasvk/anv_utrace.c5
-rw-r--r--lib/mesa/src/intel/vulkan_hasvk/genX_cmd_buffer.c5
-rw-r--r--lib/mesa/src/loader/meson.build2
-rw-r--r--lib/mesa/src/mapi/glapi/gen/meson.build1
-rw-r--r--lib/mesa/src/mesa/main/consts_exts.h3
-rw-r--r--lib/mesa/src/mesa/main/draw.c11
-rw-r--r--lib/mesa/src/mesa/state_tracker/st_extensions.h3
-rw-r--r--lib/mesa/src/microsoft/vulkan/dzn_descriptor_set.c2
-rw-r--r--lib/mesa/src/panfrost/lib/pan_props.c1
-rw-r--r--lib/mesa/src/util/00-mesa-defaults.conf13
-rw-r--r--lib/mesa/src/util/disk_cache_os.c14
-rw-r--r--lib/mesa/src/virtio/vulkan/vn_physical_device.c1
-rw-r--r--lib/mesa/src/vulkan/runtime/vk_graphics_state.c25
-rw-r--r--lib/mesa/src/vulkan/wsi/wsi_common_display.c2
98 files changed, 951 insertions, 506 deletions
diff --git a/lib/mesa/src/amd/addrlib/src/amdgpu_asic_addr.h b/lib/mesa/src/amd/addrlib/src/amdgpu_asic_addr.h
index 36d39e5e4..34c16e2d5 100644
--- a/lib/mesa/src/amd/addrlib/src/amdgpu_asic_addr.h
+++ b/lib/mesa/src/amd/addrlib/src/amdgpu_asic_addr.h
@@ -116,7 +116,8 @@
#define AMDGPU_GFX1101_RANGE 0x20, 0xFF //# 32 <= x < 255
#define AMDGPU_GFX1102_RANGE 0x10, 0x20 //# 16 <= x < 32
-#define AMDGPU_GFX1103_RANGE 0x01, 0xFF //# 1 <= x < max
+#define AMDGPU_GFX1103_R1_RANGE 0x01, 0x10 //# 1 <= x < 16
+#define AMDGPU_GFX1103_R2_RANGE 0x80, 0xFF //# 128 <= x < max
#define AMDGPU_REMBRANDT_RANGE 0x01, 0xFF //# 01 <= x < 255
@@ -187,7 +188,8 @@
#define ASICREV_IS_GFX1100(r) ASICREV_IS(r, GFX1100)
#define ASICREV_IS_GFX1101(r) ASICREV_IS(r, GFX1101)
#define ASICREV_IS_GFX1102(r) ASICREV_IS(r, GFX1102)
-#define ASICREV_IS_GFX1103(r) ASICREV_IS(r, GFX1103)
+#define ASICREV_IS_GFX1103_R1(r) ASICREV_IS(r, GFX1103_R1)
+#define ASICREV_IS_GFX1103_R2(r) ASICREV_IS(r, GFX1103_R2)
#define ASICREV_IS_REMBRANDT(r) ASICREV_IS(r, REMBRANDT)
diff --git a/lib/mesa/src/amd/addrlib/src/gfx11/gfx11addrlib.cpp b/lib/mesa/src/amd/addrlib/src/gfx11/gfx11addrlib.cpp
index 9adc28a63..af48e7716 100644
--- a/lib/mesa/src/amd/addrlib/src/gfx11/gfx11addrlib.cpp
+++ b/lib/mesa/src/amd/addrlib/src/gfx11/gfx11addrlib.cpp
@@ -752,9 +752,6 @@ ChipFamily Gfx11Lib::HwlConvertChipFamily(
}
break;
case FAMILY_GFX1103:
- if (ASICREV_IS_GFX1103(chipRevision))
- {
- }
break;
default:
ADDR_ASSERT(!"Unknown chip family");
diff --git a/lib/mesa/src/amd/ci/radeonsi-raven-fails.txt b/lib/mesa/src/amd/ci/radeonsi-raven-fails.txt
index a66c79b96..59844a339 100644
--- a/lib/mesa/src/amd/ci/radeonsi-raven-fails.txt
+++ b/lib/mesa/src/amd/ci/radeonsi-raven-fails.txt
@@ -1,6 +1,5 @@
glx@glx-make-current,Crash
glx@glx-multi-window-single-context,Fail
-glx@glx-swap-event_async,Fail
glx@glx-swap-pixmap-bad,Fail
glx@glx-visuals-depth -pixmap,Crash
glx@glx-visuals-stencil -pixmap,Crash
diff --git a/lib/mesa/src/amd/common/ac_nir_lower_ngg.c b/lib/mesa/src/amd/common/ac_nir_lower_ngg.c
index cdc7ad05b..901245b35 100644
--- a/lib/mesa/src/amd/common/ac_nir_lower_ngg.c
+++ b/lib/mesa/src/amd/common/ac_nir_lower_ngg.c
@@ -468,6 +468,29 @@ has_input_primitive(nir_builder *b)
}
static void
+nogs_prim_gen_query(nir_builder *b, lower_ngg_nogs_state *s)
+{
+ if (!s->options->has_gen_prim_query)
+ return;
+
+ nir_if *if_shader_query = nir_push_if(b, nir_load_prim_gen_query_enabled_amd(b));
+ {
+ /* Activate only 1 lane and add the number of primitives to query result. */
+ nir_if *if_elected = nir_push_if(b, nir_elect(b, 1));
+ {
+ /* Number of input primitives in the current wave. */
+ nir_ssa_def *num_input_prims = nir_ubfe(b, nir_load_merged_wave_info_amd(b),
+ nir_imm_int(b, 8), nir_imm_int(b, 8));
+
+ /* Add to stream 0 primitive generated counter. */
+ nir_atomic_add_gen_prim_count_amd(b, num_input_prims, .stream_id = 0);
+ }
+ nir_pop_if(b, if_elected);
+ }
+ nir_pop_if(b, if_shader_query);
+}
+
+static void
emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *st, nir_ssa_def *arg)
{
nir_ssa_def *gs_thread =
@@ -506,23 +529,6 @@ emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *st, nir_ssa_def
arg = nir_iand(b, arg, mask);
}
- if (st->options->has_gen_prim_query) {
- nir_if *if_shader_query = nir_push_if(b, nir_load_prim_gen_query_enabled_amd(b));
- {
- /* Number of active GS threads. Each has 1 output primitive. */
- nir_ssa_def *num_gs_threads =
- nir_bit_count(b, nir_ballot(b, 1, st->options->wave_size, nir_imm_bool(b, true)));
- /* Activate only 1 lane and add the number of primitives to query result. */
- nir_if *if_elected = nir_push_if(b, nir_elect(b, 1));
- {
- /* Add to stream 0 primitive generated counter. */
- nir_atomic_add_gen_prim_count_amd(b, num_gs_threads, .stream_id = 0);
- }
- nir_pop_if(b, if_elected);
- }
- nir_pop_if(b, if_shader_query);
- }
-
nir_export_primitive_amd(b, arg);
}
nir_pop_if(b, if_gs_thread);
@@ -1373,6 +1379,9 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
nir_local_variable_create(impl, glsl_vec4_type(), "clip_vertex");
nogs_state->clipdist_neg_mask_var =
nir_local_variable_create(impl, glsl_uint8_t_type(), "clipdist_neg_mask");
+
+ /* init mask to 0 */
+ nir_store_var(b, nogs_state->clipdist_neg_mask_var, nir_imm_intN_t(b, 0, 8), 1);
}
/* Top part of the culling shader (aka. position shader part)
@@ -1382,8 +1391,6 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
* The position output is stored into a temporary variable, and reloaded later.
*/
- b->cursor = nir_before_cf_list(&impl->body);
-
nir_ssa_def *es_thread = has_input_vertex(b);
nir_if *if_es_thread = nir_push_if(b, es_thread);
{
@@ -2150,6 +2157,11 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
ngg_nogs_init_vertex_indices_vars(b, impl, &state);
+ /* Emit primitives generated query code here, so that
+ * it executes before culling and isn't in the extracted CF.
+ */
+ nogs_prim_gen_query(b, &state);
+
if (!options->can_cull) {
/* Newer chips can use PRIMGEN_PASSTHRU_NO_MSG to skip gs_alloc_req for NGG passthrough. */
if (!(options->passthrough && options->family >= CHIP_NAVI23)) {
diff --git a/lib/mesa/src/amd/common/ac_rgp.c b/lib/mesa/src/amd/common/ac_rgp.c
index d323b4b84..bbbf985c2 100644
--- a/lib/mesa/src/amd/common/ac_rgp.c
+++ b/lib/mesa/src/amd/common/ac_rgp.c
@@ -286,6 +286,7 @@ enum sqtt_memory_type
SQTT_MEMORY_TYPE_DDR2 = 0x2,
SQTT_MEMORY_TYPE_DDR3 = 0x3,
SQTT_MEMORY_TYPE_DDR4 = 0x4,
+ SQTT_MEMORY_TYPE_DDR5 = 0x5,
SQTT_MEMORY_TYPE_GDDR3 = 0x10,
SQTT_MEMORY_TYPE_GDDR4 = 0x11,
SQTT_MEMORY_TYPE_GDDR5 = 0x12,
@@ -375,17 +376,22 @@ static enum sqtt_memory_type ac_vram_type_to_sqtt_memory_type(uint32_t vram_type
return SQTT_MEMORY_TYPE_DDR3;
case AMD_VRAM_TYPE_DDR4:
return SQTT_MEMORY_TYPE_DDR4;
+ case AMD_VRAM_TYPE_DDR5:
+ return SQTT_MEMORY_TYPE_DDR5;
+ case AMD_VRAM_TYPE_GDDR3:
+ return SQTT_MEMORY_TYPE_GDDR3;
+ case AMD_VRAM_TYPE_GDDR4:
+ return SQTT_MEMORY_TYPE_GDDR4;
case AMD_VRAM_TYPE_GDDR5:
return SQTT_MEMORY_TYPE_GDDR5;
- case AMD_VRAM_TYPE_HBM:
- return SQTT_MEMORY_TYPE_HBM;
case AMD_VRAM_TYPE_GDDR6:
return SQTT_MEMORY_TYPE_GDDR6;
- case AMD_VRAM_TYPE_DDR5:
+ case AMD_VRAM_TYPE_HBM:
+ return SQTT_MEMORY_TYPE_HBM;
+ case AMD_VRAM_TYPE_LPDDR4:
+ return SQTT_MEMORY_TYPE_LPDDR4;
+ case AMD_VRAM_TYPE_LPDDR5:
return SQTT_MEMORY_TYPE_LPDDR5;
- case AMD_VRAM_TYPE_GDDR1:
- case AMD_VRAM_TYPE_GDDR3:
- case AMD_VRAM_TYPE_GDDR4:
default:
unreachable("Invalid vram type");
}
diff --git a/lib/mesa/src/amd/common/ac_shadowed_regs.c b/lib/mesa/src/amd/common/ac_shadowed_regs.c
index 65a49bd9c..a532d2d99 100644
--- a/lib/mesa/src/amd/common/ac_shadowed_regs.c
+++ b/lib/mesa/src/amd/common/ac_shadowed_regs.c
@@ -1120,8 +1120,8 @@ static const struct ac_reg_range Gfx11UserConfigShadowRange[] =
R_03092C_GE_MULTI_PRIM_IB_RESET_EN - R_030924_GE_MIN_VTX_INDX + 4,
},
{
- R_008974_VGT_NUM_INSTANCES,
- R_030940_VGT_TF_MEMORY_BASE - R_008974_VGT_NUM_INSTANCES + 4,
+ R_030934_VGT_NUM_INSTANCES,
+ R_030940_VGT_TF_MEMORY_BASE - R_030934_VGT_NUM_INSTANCES + 4,
},
{
R_03097C_GE_STEREO_CNTL,
diff --git a/lib/mesa/src/amd/common/amd_family.c b/lib/mesa/src/amd/common/amd_family.c
index be6575791..90ec21cb9 100644
--- a/lib/mesa/src/amd/common/amd_family.c
+++ b/lib/mesa/src/amd/common/amd_family.c
@@ -108,8 +108,10 @@ const char *ac_get_family_name(enum radeon_family family)
return "GFX1101";
case CHIP_GFX1102:
return "GFX1102";
- case CHIP_GFX1103:
- return "GFX1103";
+ case CHIP_GFX1103_R1:
+ return "GFX1103_R1";
+ case CHIP_GFX1103_R2:
+ return "GFX1103_R2";
default:
unreachable("Unknown GPU family");
}
diff --git a/lib/mesa/src/amd/compiler/aco_insert_NOPs.cpp b/lib/mesa/src/amd/compiler/aco_insert_NOPs.cpp
index 2269324e7..068ff4f4b 100644
--- a/lib/mesa/src/amd/compiler/aco_insert_NOPs.cpp
+++ b/lib/mesa/src/amd/compiler/aco_insert_NOPs.cpp
@@ -1044,6 +1044,9 @@ struct LdsDirectVALUHazardGlobalState {
struct LdsDirectVALUHazardBlockState {
unsigned num_valu = 0;
bool has_trans = false;
+
+ unsigned num_instrs = 0;
+ unsigned num_blocks = 0;
};
bool
@@ -1076,6 +1079,14 @@ handle_lds_direct_valu_hazard_instr(LdsDirectVALUHazardGlobalState& global_state
if (parse_vdst_wait(instr) == 0)
return true;
+ block_state.num_instrs++;
+ if (block_state.num_instrs > 256 || block_state.num_blocks > 32) {
+ /* Exit to limit compile times and set wait_vdst to be safe. */
+ global_state.wait_vdst =
+ MIN2(global_state.wait_vdst, block_state.has_trans ? 0 : block_state.num_valu);
+ return true;
+ }
+
return block_state.num_valu >= global_state.wait_vdst;
}
@@ -1089,6 +1100,8 @@ handle_lds_direct_valu_hazard_block(LdsDirectVALUHazardGlobalState& global_state
global_state.loop_headers_visited.insert(block->index);
}
+ block_state.num_blocks++;
+
return true;
}
@@ -1129,6 +1142,9 @@ struct VALUPartialForwardingHazardBlockState {
enum VALUPartialForwardingHazardState state = nothing_written;
unsigned num_valu_since_read = 0;
unsigned num_valu_since_write = 0;
+
+ unsigned num_instrs = 0;
+ unsigned num_blocks = 0;
};
bool
@@ -1191,6 +1207,13 @@ handle_valu_partial_forwarding_hazard_instr(VALUPartialForwardingHazardGlobalSta
if (block_state.num_vgprs_read == 0)
return true; /* All VGPRs have been written and a hazard was never found. */
+ block_state.num_instrs++;
+ if (block_state.num_instrs > 256 || block_state.num_blocks > 32) {
+ /* Exit to limit compile times and set hazard_found=true to be safe. */
+ global_state.hazard_found = true;
+ return true;
+ }
+
return false;
}
@@ -1205,6 +1228,8 @@ handle_valu_partial_forwarding_hazard_block(VALUPartialForwardingHazardGlobalSta
global_state.loop_headers_visited.insert(block->index);
}
+ block_state.num_blocks++;
+
return true;
}
diff --git a/lib/mesa/src/amd/compiler/aco_opcodes.py b/lib/mesa/src/amd/compiler/aco_opcodes.py
index 2c11cf255..862696b11 100644
--- a/lib/mesa/src/amd/compiler/aco_opcodes.py
+++ b/lib/mesa/src/amd/compiler/aco_opcodes.py
@@ -698,85 +698,85 @@ for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name) in SMEM:
# VOP2 instructions: 2 inputs, 1 output (+ optional vcc)
# TODO: misses some GFX6_7 opcodes which were shifted to VOP3 in GFX8
VOP2 = {
- # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name, input/output modifiers
- (0x01, 0x01, -1, -1, -1, -1, "v_readlane_b32", False),
- (0x02, 0x02, -1, -1, -1, -1, "v_writelane_b32", False),
- (0x03, 0x03, 0x01, 0x01, 0x03, 0x03, "v_add_f32", True),
- (0x04, 0x04, 0x02, 0x02, 0x04, 0x04, "v_sub_f32", True),
- (0x05, 0x05, 0x03, 0x03, 0x05, 0x05, "v_subrev_f32", True),
- (0x06, 0x06, -1, -1, 0x06, -1, "v_mac_legacy_f32", True), #GFX6,7,10
- ( -1, -1, -1, -1, 0x06, 0x06, "v_fmac_legacy_f32", True), #GFX10.3+, v_fmac_dx9_zero_f32 in GFX11
- (0x07, 0x07, 0x04, 0x04, 0x07, 0x07, "v_mul_legacy_f32", True), #v_mul_dx9_zero_f32 in GFX11
- (0x08, 0x08, 0x05, 0x05, 0x08, 0x08, "v_mul_f32", True),
- (0x09, 0x09, 0x06, 0x06, 0x09, 0x09, "v_mul_i32_i24", False),
- (0x0a, 0x0a, 0x07, 0x07, 0x0a, 0x0a, "v_mul_hi_i32_i24", False),
- (0x0b, 0x0b, 0x08, 0x08, 0x0b, 0x0b, "v_mul_u32_u24", False),
- (0x0c, 0x0c, 0x09, 0x09, 0x0c, 0x0c, "v_mul_hi_u32_u24", False),
- ( -1, -1, -1, 0x39, 0x0d, -1, "v_dot4c_i32_i8", False),
- (0x0d, 0x0d, -1, -1, -1, -1, "v_min_legacy_f32", True),
- (0x0e, 0x0e, -1, -1, -1, -1, "v_max_legacy_f32", True),
- (0x0f, 0x0f, 0x0a, 0x0a, 0x0f, 0x0f, "v_min_f32", True),
- (0x10, 0x10, 0x0b, 0x0b, 0x10, 0x10, "v_max_f32", True),
- (0x11, 0x11, 0x0c, 0x0c, 0x11, 0x11, "v_min_i32", False),
- (0x12, 0x12, 0x0d, 0x0d, 0x12, 0x12, "v_max_i32", False),
- (0x13, 0x13, 0x0e, 0x0e, 0x13, 0x13, "v_min_u32", False),
- (0x14, 0x14, 0x0f, 0x0f, 0x14, 0x14, "v_max_u32", False),
- (0x15, 0x15, -1, -1, -1, -1, "v_lshr_b32", False),
- (0x16, 0x16, 0x10, 0x10, 0x16, 0x19, "v_lshrrev_b32", False),
- (0x17, 0x17, -1, -1, -1, -1, "v_ashr_i32", False),
- (0x18, 0x18, 0x11, 0x11, 0x18, 0x1a, "v_ashrrev_i32", False),
- (0x19, 0x19, -1, -1, -1, -1, "v_lshl_b32", False),
- (0x1a, 0x1a, 0x12, 0x12, 0x1a, 0x18, "v_lshlrev_b32", False),
- (0x1b, 0x1b, 0x13, 0x13, 0x1b, 0x1b, "v_and_b32", False),
- (0x1c, 0x1c, 0x14, 0x14, 0x1c, 0x1c, "v_or_b32", False),
- (0x1d, 0x1d, 0x15, 0x15, 0x1d, 0x1d, "v_xor_b32", False),
- ( -1, -1, -1, -1, 0x1e, 0x1e, "v_xnor_b32", False),
- (0x1f, 0x1f, 0x16, 0x16, 0x1f, -1, "v_mac_f32", True),
- (0x20, 0x20, 0x17, 0x17, 0x20, -1, "v_madmk_f32", False),
- (0x21, 0x21, 0x18, 0x18, 0x21, -1, "v_madak_f32", False),
- (0x24, 0x24, -1, -1, -1, -1, "v_mbcnt_hi_u32_b32", False),
- (0x25, 0x25, 0x19, 0x19, -1, -1, "v_add_co_u32", False), # VOP3B only in RDNA
- (0x26, 0x26, 0x1a, 0x1a, -1, -1, "v_sub_co_u32", False), # VOP3B only in RDNA
- (0x27, 0x27, 0x1b, 0x1b, -1, -1, "v_subrev_co_u32", False), # VOP3B only in RDNA
- (0x28, 0x28, 0x1c, 0x1c, 0x28, 0x20, "v_addc_co_u32", False), # v_add_co_ci_u32 in RDNA
- (0x29, 0x29, 0x1d, 0x1d, 0x29, 0x21, "v_subb_co_u32", False), # v_sub_co_ci_u32 in RDNA
- (0x2a, 0x2a, 0x1e, 0x1e, 0x2a, 0x22, "v_subbrev_co_u32", False), # v_subrev_co_ci_u32 in RDNA
- ( -1, -1, -1, -1, 0x2b, 0x2b, "v_fmac_f32", True),
- ( -1, -1, -1, -1, 0x2c, 0x2c, "v_fmamk_f32", True),
- ( -1, -1, -1, -1, 0x2d, 0x2d, "v_fmaak_f32", True),
- (0x2f, 0x2f, -1, -1, 0x2f, 0x2f, "v_cvt_pkrtz_f16_f32", True), #v_cvt_pk_rtz_f16_f32 in GFX11
- ( -1, -1, 0x1f, 0x1f, 0x32, 0x32, "v_add_f16", True),
- ( -1, -1, 0x20, 0x20, 0x33, 0x33, "v_sub_f16", True),
- ( -1, -1, 0x21, 0x21, 0x34, 0x34, "v_subrev_f16", True),
- ( -1, -1, 0x22, 0x22, 0x35, 0x35, "v_mul_f16", True),
- ( -1, -1, 0x23, 0x23, -1, -1, "v_mac_f16", True),
- ( -1, -1, 0x24, 0x24, -1, -1, "v_madmk_f16", False),
- ( -1, -1, 0x25, 0x25, -1, -1, "v_madak_f16", False),
- ( -1, -1, 0x26, 0x26, -1, -1, "v_add_u16", False),
- ( -1, -1, 0x27, 0x27, -1, -1, "v_sub_u16", False),
- ( -1, -1, 0x28, 0x28, -1, -1, "v_subrev_u16", False),
- ( -1, -1, 0x29, 0x29, -1, -1, "v_mul_lo_u16", False),
- ( -1, -1, 0x2a, 0x2a, -1, -1, "v_lshlrev_b16", False),
- ( -1, -1, 0x2b, 0x2b, -1, -1, "v_lshrrev_b16", False),
- ( -1, -1, 0x2c, 0x2c, -1, -1, "v_ashrrev_i16", False),
- ( -1, -1, 0x2d, 0x2d, 0x39, 0x39, "v_max_f16", True),
- ( -1, -1, 0x2e, 0x2e, 0x3a, 0x3a, "v_min_f16", True),
- ( -1, -1, 0x2f, 0x2f, -1, -1, "v_max_u16", False),
- ( -1, -1, 0x30, 0x30, -1, -1, "v_max_i16", False),
- ( -1, -1, 0x31, 0x31, -1, -1, "v_min_u16", False),
- ( -1, -1, 0x32, 0x32, -1, -1, "v_min_i16", False),
- ( -1, -1, 0x33, 0x33, 0x3b, 0x3b, "v_ldexp_f16", False),
- ( -1, -1, -1, 0x34, 0x25, 0x25, "v_add_u32", False), # called v_add_nc_u32 in RDNA
- ( -1, -1, -1, 0x35, 0x26, 0x26, "v_sub_u32", False), # called v_sub_nc_u32 in RDNA
- ( -1, -1, -1, 0x36, 0x27, 0x27, "v_subrev_u32", False), # called v_subrev_nc_u32 in RDNA
- ( -1, -1, -1, -1, 0x36, 0x36, "v_fmac_f16", False),
- ( -1, -1, -1, -1, 0x37, 0x37, "v_fmamk_f16", False),
- ( -1, -1, -1, -1, 0x38, 0x38, "v_fmaak_f16", False),
- ( -1, -1, -1, -1, 0x3c, 0x3c, "v_pk_fmac_f16", False),
- ( -1, -1, -1, 0x37, 0x02, 0x02, "v_dot2c_f32_f16", False), #v_dot2acc_f32_f16 in GFX11
+ # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name, input modifiers, output modifiers
+ (0x01, 0x01, -1, -1, -1, -1, "v_readlane_b32", False, False),
+ (0x02, 0x02, -1, -1, -1, -1, "v_writelane_b32", False, False),
+ (0x03, 0x03, 0x01, 0x01, 0x03, 0x03, "v_add_f32", True, True),
+ (0x04, 0x04, 0x02, 0x02, 0x04, 0x04, "v_sub_f32", True, True),
+ (0x05, 0x05, 0x03, 0x03, 0x05, 0x05, "v_subrev_f32", True, True),
+ (0x06, 0x06, -1, -1, 0x06, -1, "v_mac_legacy_f32", True, True), #GFX6,7,10
+ ( -1, -1, -1, -1, 0x06, 0x06, "v_fmac_legacy_f32", True, True), #GFX10.3+, v_fmac_dx9_zero_f32 in GFX11
+ (0x07, 0x07, 0x04, 0x04, 0x07, 0x07, "v_mul_legacy_f32", True, True), #v_mul_dx9_zero_f32 in GFX11
+ (0x08, 0x08, 0x05, 0x05, 0x08, 0x08, "v_mul_f32", True, True),
+ (0x09, 0x09, 0x06, 0x06, 0x09, 0x09, "v_mul_i32_i24", False, False),
+ (0x0a, 0x0a, 0x07, 0x07, 0x0a, 0x0a, "v_mul_hi_i32_i24", False, False),
+ (0x0b, 0x0b, 0x08, 0x08, 0x0b, 0x0b, "v_mul_u32_u24", False, False),
+ (0x0c, 0x0c, 0x09, 0x09, 0x0c, 0x0c, "v_mul_hi_u32_u24", False, False),
+ ( -1, -1, -1, 0x39, 0x0d, -1, "v_dot4c_i32_i8", False, False),
+ (0x0d, 0x0d, -1, -1, -1, -1, "v_min_legacy_f32", True, True),
+ (0x0e, 0x0e, -1, -1, -1, -1, "v_max_legacy_f32", True, True),
+ (0x0f, 0x0f, 0x0a, 0x0a, 0x0f, 0x0f, "v_min_f32", True, True),
+ (0x10, 0x10, 0x0b, 0x0b, 0x10, 0x10, "v_max_f32", True, True),
+ (0x11, 0x11, 0x0c, 0x0c, 0x11, 0x11, "v_min_i32", False, False),
+ (0x12, 0x12, 0x0d, 0x0d, 0x12, 0x12, "v_max_i32", False, False),
+ (0x13, 0x13, 0x0e, 0x0e, 0x13, 0x13, "v_min_u32", False, False),
+ (0x14, 0x14, 0x0f, 0x0f, 0x14, 0x14, "v_max_u32", False, False),
+ (0x15, 0x15, -1, -1, -1, -1, "v_lshr_b32", False, False),
+ (0x16, 0x16, 0x10, 0x10, 0x16, 0x19, "v_lshrrev_b32", False, False),
+ (0x17, 0x17, -1, -1, -1, -1, "v_ashr_i32", False, False),
+ (0x18, 0x18, 0x11, 0x11, 0x18, 0x1a, "v_ashrrev_i32", False, False),
+ (0x19, 0x19, -1, -1, -1, -1, "v_lshl_b32", False, False),
+ (0x1a, 0x1a, 0x12, 0x12, 0x1a, 0x18, "v_lshlrev_b32", False, False),
+ (0x1b, 0x1b, 0x13, 0x13, 0x1b, 0x1b, "v_and_b32", False, False),
+ (0x1c, 0x1c, 0x14, 0x14, 0x1c, 0x1c, "v_or_b32", False, False),
+ (0x1d, 0x1d, 0x15, 0x15, 0x1d, 0x1d, "v_xor_b32", False, False),
+ ( -1, -1, -1, -1, 0x1e, 0x1e, "v_xnor_b32", False, False),
+ (0x1f, 0x1f, 0x16, 0x16, 0x1f, -1, "v_mac_f32", True, True),
+ (0x20, 0x20, 0x17, 0x17, 0x20, -1, "v_madmk_f32", False, False),
+ (0x21, 0x21, 0x18, 0x18, 0x21, -1, "v_madak_f32", False, False),
+ (0x24, 0x24, -1, -1, -1, -1, "v_mbcnt_hi_u32_b32", False, False),
+ (0x25, 0x25, 0x19, 0x19, -1, -1, "v_add_co_u32", False, False), # VOP3B only in RDNA
+ (0x26, 0x26, 0x1a, 0x1a, -1, -1, "v_sub_co_u32", False, False), # VOP3B only in RDNA
+ (0x27, 0x27, 0x1b, 0x1b, -1, -1, "v_subrev_co_u32", False, False), # VOP3B only in RDNA
+ (0x28, 0x28, 0x1c, 0x1c, 0x28, 0x20, "v_addc_co_u32", False, False), # v_add_co_ci_u32 in RDNA
+ (0x29, 0x29, 0x1d, 0x1d, 0x29, 0x21, "v_subb_co_u32", False, False), # v_sub_co_ci_u32 in RDNA
+ (0x2a, 0x2a, 0x1e, 0x1e, 0x2a, 0x22, "v_subbrev_co_u32", False, False), # v_subrev_co_ci_u32 in RDNA
+ ( -1, -1, -1, -1, 0x2b, 0x2b, "v_fmac_f32", True, True),
+ ( -1, -1, -1, -1, 0x2c, 0x2c, "v_fmamk_f32", True, True),
+ ( -1, -1, -1, -1, 0x2d, 0x2d, "v_fmaak_f32", True, True),
+ (0x2f, 0x2f, -1, -1, 0x2f, 0x2f, "v_cvt_pkrtz_f16_f32", True, False), #v_cvt_pk_rtz_f16_f32 in GFX11
+ ( -1, -1, 0x1f, 0x1f, 0x32, 0x32, "v_add_f16", True, True),
+ ( -1, -1, 0x20, 0x20, 0x33, 0x33, "v_sub_f16", True, True),
+ ( -1, -1, 0x21, 0x21, 0x34, 0x34, "v_subrev_f16", True, True),
+ ( -1, -1, 0x22, 0x22, 0x35, 0x35, "v_mul_f16", True, True),
+ ( -1, -1, 0x23, 0x23, -1, -1, "v_mac_f16", True, True),
+ ( -1, -1, 0x24, 0x24, -1, -1, "v_madmk_f16", False, False),
+ ( -1, -1, 0x25, 0x25, -1, -1, "v_madak_f16", False, False),
+ ( -1, -1, 0x26, 0x26, -1, -1, "v_add_u16", False, False),
+ ( -1, -1, 0x27, 0x27, -1, -1, "v_sub_u16", False, False),
+ ( -1, -1, 0x28, 0x28, -1, -1, "v_subrev_u16", False, False),
+ ( -1, -1, 0x29, 0x29, -1, -1, "v_mul_lo_u16", False, False),
+ ( -1, -1, 0x2a, 0x2a, -1, -1, "v_lshlrev_b16", False, False),
+ ( -1, -1, 0x2b, 0x2b, -1, -1, "v_lshrrev_b16", False, False),
+ ( -1, -1, 0x2c, 0x2c, -1, -1, "v_ashrrev_i16", False, False),
+ ( -1, -1, 0x2d, 0x2d, 0x39, 0x39, "v_max_f16", True, True),
+ ( -1, -1, 0x2e, 0x2e, 0x3a, 0x3a, "v_min_f16", True, True),
+ ( -1, -1, 0x2f, 0x2f, -1, -1, "v_max_u16", False, False),
+ ( -1, -1, 0x30, 0x30, -1, -1, "v_max_i16", False, False),
+ ( -1, -1, 0x31, 0x31, -1, -1, "v_min_u16", False, False),
+ ( -1, -1, 0x32, 0x32, -1, -1, "v_min_i16", False, False),
+ ( -1, -1, 0x33, 0x33, 0x3b, 0x3b, "v_ldexp_f16", False, False),
+ ( -1, -1, -1, 0x34, 0x25, 0x25, "v_add_u32", False, False), # called v_add_nc_u32 in RDNA
+ ( -1, -1, -1, 0x35, 0x26, 0x26, "v_sub_u32", False, False), # called v_sub_nc_u32 in RDNA
+ ( -1, -1, -1, 0x36, 0x27, 0x27, "v_subrev_u32", False, False), # called v_subrev_nc_u32 in RDNA
+ ( -1, -1, -1, -1, 0x36, 0x36, "v_fmac_f16", False, False),
+ ( -1, -1, -1, -1, 0x37, 0x37, "v_fmamk_f16", False, False),
+ ( -1, -1, -1, -1, 0x38, 0x38, "v_fmaak_f16", False, False),
+ ( -1, -1, -1, -1, 0x3c, 0x3c, "v_pk_fmac_f16", False, False),
+ ( -1, -1, -1, 0x37, 0x02, 0x02, "v_dot2c_f32_f16", False, False), #v_dot2acc_f32_f16 in GFX11
}
-for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, modifiers) in VOP2:
- opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOP2, InstrClass.Valu32, modifiers, modifiers)
+for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, in_mod, out_mod) in VOP2:
+ opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOP2, InstrClass.Valu32, in_mod, out_mod)
if True:
# v_cndmask_b32 can use input modifiers but not output modifiers
@@ -1173,18 +1173,18 @@ VOP3 = {
( -1, -1, -1, -1, -1, 0x25f, "v_minmax_f32", True, True),
( -1, -1, -1, -1, -1, 0x260, "v_maxmin_f16", True, True),
( -1, -1, -1, -1, -1, 0x261, "v_minmax_f16", True, True),
- ( -1, -1, -1, -1, -1, 0x262, "v_maxmin_u32", True, True),
- ( -1, -1, -1, -1, -1, 0x263, "v_minmax_u32", True, True),
- ( -1, -1, -1, -1, -1, 0x264, "v_maxmin_i32", True, True),
- ( -1, -1, -1, -1, -1, 0x265, "v_minmax_i32", True, True),
- ( -1, -1, -1, -1, -1, 0x266, "v_dot2_f16_f16", True, True),
- ( -1, -1, -1, -1, -1, 0x267, "v_dot2_bf16_bf16", True, True),
- ( -1, -1, -1, -1, -1, 0x306, "v_cvt_pk_i16_f32", True, True),
- ( -1, -1, -1, -1, -1, 0x307, "v_cvt_pk_u16_f32", True, True),
- ( -1, -1, -1, -1, -1, 0x362, "v_and_b16", True, True),
- ( -1, -1, -1, -1, -1, 0x363, "v_or_b16", True, True),
- ( -1, -1, -1, -1, -1, 0x364, "v_xor_b16", True, True),
- ( -1, -1, -1, -1, -1, 0x25d, "v_cndmask_b16", True, True),
+ ( -1, -1, -1, -1, -1, 0x262, "v_maxmin_u32", False, False),
+ ( -1, -1, -1, -1, -1, 0x263, "v_minmax_u32", False, False),
+ ( -1, -1, -1, -1, -1, 0x264, "v_maxmin_i32", False, False),
+ ( -1, -1, -1, -1, -1, 0x265, "v_minmax_i32", False, False),
+ ( -1, -1, -1, -1, -1, 0x266, "v_dot2_f16_f16", False, False),
+ ( -1, -1, -1, -1, -1, 0x267, "v_dot2_bf16_bf16", False, False),
+ ( -1, -1, -1, -1, -1, 0x306, "v_cvt_pk_i16_f32", True, False),
+ ( -1, -1, -1, -1, -1, 0x307, "v_cvt_pk_u16_f32", True, False),
+ ( -1, -1, -1, -1, -1, 0x362, "v_and_b16", False, False),
+ ( -1, -1, -1, -1, -1, 0x363, "v_or_b16", False, False),
+ ( -1, -1, -1, -1, -1, 0x364, "v_xor_b16", False, False),
+ ( -1, -1, -1, -1, -1, 0x25d, "v_cndmask_b16", True, False),
}
for (gfx6, gfx7, gfx8, gfx9, gfx10, gfx11, name, in_mod, out_mod, cls) in default_class(VOP3, InstrClass.Valu32):
opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOP3, cls, in_mod, out_mod)
diff --git a/lib/mesa/src/amd/compiler/aco_reduce_assign.cpp b/lib/mesa/src/amd/compiler/aco_reduce_assign.cpp
index 3c31b468f..535e0315e 100644
--- a/lib/mesa/src/amd/compiler/aco_reduce_assign.cpp
+++ b/lib/mesa/src/amd/compiler/aco_reduce_assign.cpp
@@ -63,31 +63,36 @@ setup_reduce_temp(Program* program)
Temp vtmp(0, RegClass(RegType::vgpr, maxSize).as_linear());
int inserted_at = -1;
int vtmp_inserted_at = -1;
- bool reduceTmp_in_loop = false;
bool vtmp_in_loop = false;
for (Block& block : program->blocks) {
- /* insert p_end_linear_vgpr after the outermost loop */
- if (reduceTmp_in_loop && block.loop_nest_depth == 0) {
- assert(inserted_at == (int)last_top_level_block_idx);
-
- aco_ptr<Instruction> end{create_instruction<Instruction>(
- aco_opcode::p_end_linear_vgpr, Format::PSEUDO, vtmp_in_loop ? 2 : 1, 0)};
- end->operands[0] = Operand(reduceTmp);
- if (vtmp_in_loop)
- end->operands[1] = Operand(vtmp);
- /* insert after the phis of the loop exit block */
- std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin();
- while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi)
- ++it;
- block.instructions.insert(it, std::move(end));
- reduceTmp_in_loop = false;
- }
-
- if (block.kind & block_kind_top_level)
+ if (block.kind & block_kind_top_level) {
last_top_level_block_idx = block.index;
+ /* TODO: this could be improved in this case:
+ * start_linear_vgpr
+ * if (...) {
+ * use_linear_vgpr
+ * }
+ * end_linear_vgpr
+ * Here, the linear vgpr is used before any phi copies, so this isn't necessary.
+ */
+ if (inserted_at >= 0) {
+ aco_ptr<Instruction> end{create_instruction<Instruction>(
+ aco_opcode::p_end_linear_vgpr, Format::PSEUDO, vtmp_inserted_at >= 0 ? 2 : 1, 0)};
+ end->operands[0] = Operand(reduceTmp);
+ if (vtmp_inserted_at >= 0)
+ end->operands[1] = Operand(vtmp);
+ /* insert after the phis of the block */
+ std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin();
+ while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi)
+ ++it;
+ block.instructions.insert(it, std::move(end));
+ inserted_at = vtmp_inserted_at = -1;
+ }
+ }
+
if (!hasReductions[block.index])
continue;
@@ -98,8 +103,6 @@ setup_reduce_temp(Program* program)
instr->opcode != aco_opcode::p_interp_gfx11)
continue;
- reduceTmp_in_loop |= block.loop_nest_depth > 0;
-
if ((int)last_top_level_block_idx != inserted_at) {
reduceTmp = program->allocateTmp(reduceTmp.regClass());
aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>(
diff --git a/lib/mesa/src/amd/compiler/aco_register_allocation.cpp b/lib/mesa/src/amd/compiler/aco_register_allocation.cpp
index c59c63538..83d19656d 100644
--- a/lib/mesa/src/amd/compiler/aco_register_allocation.cpp
+++ b/lib/mesa/src/amd/compiler/aco_register_allocation.cpp
@@ -2078,11 +2078,8 @@ get_reg_phi(ra_ctx& ctx, IDSet& live_in, RegisterFile& register_file,
/* rename */
std::unordered_map<unsigned, Temp>::iterator orig_it = ctx.orig_names.find(pc.first.tempId());
- Temp orig = pc.first.getTemp();
- if (orig_it != ctx.orig_names.end())
- orig = orig_it->second;
- else
- ctx.orig_names[pc.second.tempId()] = orig;
+ Temp orig = orig_it != ctx.orig_names.end() ? orig_it->second : pc.first.getTemp();
+ ctx.orig_names[pc.second.tempId()] = orig;
ctx.renames[block.index][orig.id()] = pc.second.getTemp();
/* otherwise, this is a live-in and we need to create a new phi
diff --git a/lib/mesa/src/amd/compiler/aco_scheduler.cpp b/lib/mesa/src/amd/compiler/aco_scheduler.cpp
index f09781435..4ab13fe7c 100644
--- a/lib/mesa/src/amd/compiler/aco_scheduler.cpp
+++ b/lib/mesa/src/amd/compiler/aco_scheduler.cpp
@@ -679,7 +679,7 @@ schedule_SMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
current->operands[0].size() == 4))
break;
/* don't move descriptor loads below buffer loads */
- if (candidate->format == Format::SMEM && current->operands[0].size() == 4 &&
+ if (candidate->isSMEM() && !candidate->operands.empty() && current->operands[0].size() == 4 &&
candidate->operands[0].size() == 2)
break;
diff --git a/lib/mesa/src/amd/compiler/aco_spill.cpp b/lib/mesa/src/amd/compiler/aco_spill.cpp
index 38a5cf8d4..12cb33325 100644
--- a/lib/mesa/src/amd/compiler/aco_spill.cpp
+++ b/lib/mesa/src/amd/compiler/aco_spill.cpp
@@ -1662,6 +1662,38 @@ assign_spill_slots_helper(spill_ctx& ctx, RegType type, std::vector<bool>& is_as
}
void
+end_unused_spill_vgprs(spill_ctx& ctx, Block& block, std::vector<Temp>& vgpr_spill_temps,
+ const std::vector<uint32_t>& slots,
+ const std::unordered_map<Temp, uint32_t>& spills)
+{
+ std::vector<bool> is_used(vgpr_spill_temps.size());
+ for (std::pair<Temp, uint32_t> pair : spills) {
+ if (pair.first.type() == RegType::sgpr && ctx.is_reloaded[pair.second])
+ is_used[slots[pair.second] / ctx.wave_size] = true;
+ }
+
+ std::vector<Temp> temps;
+ for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) {
+ if (vgpr_spill_temps[i].id() && !is_used[i]) {
+ temps.push_back(vgpr_spill_temps[i]);
+ vgpr_spill_temps[i] = Temp();
+ }
+ }
+ if (temps.empty())
+ return;
+
+ aco_ptr<Instruction> destr{create_instruction<Pseudo_instruction>(
+ aco_opcode::p_end_linear_vgpr, Format::PSEUDO, temps.size(), 0)};
+ for (unsigned i = 0; i < temps.size(); i++)
+ destr->operands[i] = Operand(temps[i]);
+
+ std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin();
+ while (is_phi(*it))
+ ++it;
+ block.instructions.insert(it, std::move(destr));
+}
+
+void
assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
{
std::vector<uint32_t> slots(ctx.interferences.size());
@@ -1709,54 +1741,12 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
/* replace pseudo instructions with actual hardware instructions */
unsigned last_top_level_block_idx = 0;
- std::vector<bool> reload_in_loop(vgpr_spill_temps.size());
for (Block& block : ctx.program->blocks) {
- /* after loops, we insert a user if there was a reload inside the loop */
- if (block.loop_nest_depth == 0) {
- int end_vgprs = 0;
- for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) {
- if (reload_in_loop[i])
- end_vgprs++;
- }
-
- if (end_vgprs > 0) {
- aco_ptr<Instruction> destr{create_instruction<Pseudo_instruction>(
- aco_opcode::p_end_linear_vgpr, Format::PSEUDO, end_vgprs, 0)};
- int k = 0;
- for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) {
- if (reload_in_loop[i])
- destr->operands[k++] = Operand(vgpr_spill_temps[i]);
- reload_in_loop[i] = false;
- }
- /* find insertion point */
- std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin();
- while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi)
- ++it;
- block.instructions.insert(it, std::move(destr));
- }
- }
-
if (block.kind & block_kind_top_level && !block.linear_preds.empty()) {
last_top_level_block_idx = block.index;
- /* check if any spilled variables use a created linear vgpr, otherwise destroy them */
- for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) {
- if (vgpr_spill_temps[i] == Temp())
- continue;
-
- bool can_destroy = true;
- for (std::pair<Temp, uint32_t> pair : ctx.spills_entry[block.index]) {
-
- if (ctx.interferences[pair.second].first.type() == RegType::sgpr &&
- slots[pair.second] / ctx.wave_size == i) {
- can_destroy = false;
- break;
- }
- }
- if (can_destroy)
- vgpr_spill_temps[i] = Temp();
- }
+ end_unused_spill_vgprs(ctx, block, vgpr_spill_temps, slots, ctx.spills_entry[block.index]);
}
std::vector<aco_ptr<Instruction>>::iterator it;
@@ -1818,7 +1808,6 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
reload_vgpr(ctx, block, instructions, *it, slots);
} else {
uint32_t spill_slot = slots[spill_id];
- reload_in_loop[spill_slot / ctx.wave_size] = block.loop_nest_depth > 0;
/* check if the linear vgpr already exists */
if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) {
@@ -1858,58 +1847,6 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
/* update required scratch memory */
ctx.program->config->scratch_bytes_per_wave +=
align(ctx.vgpr_spill_slots * 4 * ctx.program->wave_size, 1024);
-
- /* SSA elimination inserts copies for logical phis right before p_logical_end
- * So if a linear vgpr is used between that p_logical_end and the branch,
- * we need to ensure logical phis don't choose a definition which aliases
- * the linear vgpr.
- * TODO: Moving the spills and reloads to before p_logical_end might produce
- * slightly better code. */
- for (Block& block : ctx.program->blocks) {
- /* loops exits are already handled */
- if (block.logical_preds.size() <= 1)
- continue;
-
- bool has_logical_phis = false;
- for (aco_ptr<Instruction>& instr : block.instructions) {
- if (instr->opcode == aco_opcode::p_phi) {
- has_logical_phis = true;
- break;
- } else if (instr->opcode != aco_opcode::p_linear_phi) {
- break;
- }
- }
- if (!has_logical_phis)
- continue;
-
- std::set<Temp> vgprs;
- for (unsigned pred_idx : block.logical_preds) {
- Block& pred = ctx.program->blocks[pred_idx];
- for (int i = pred.instructions.size() - 1; i >= 0; i--) {
- aco_ptr<Instruction>& pred_instr = pred.instructions[i];
- if (pred_instr->opcode == aco_opcode::p_logical_end) {
- break;
- } else if (pred_instr->opcode == aco_opcode::p_spill ||
- pred_instr->opcode == aco_opcode::p_reload) {
- vgprs.insert(pred_instr->operands[0].getTemp());
- }
- }
- }
- if (!vgprs.size())
- continue;
-
- aco_ptr<Instruction> destr{create_instruction<Pseudo_instruction>(
- aco_opcode::p_end_linear_vgpr, Format::PSEUDO, vgprs.size(), 0)};
- int k = 0;
- for (Temp tmp : vgprs) {
- destr->operands[k++] = Operand(tmp);
- }
- /* find insertion point */
- std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin();
- while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi)
- ++it;
- block.instructions.insert(it, std::move(destr));
- }
}
} /* end namespace */
diff --git a/lib/mesa/src/amd/compiler/aco_ssa_elimination.cpp b/lib/mesa/src/amd/compiler/aco_ssa_elimination.cpp
index b6b51fe4f..42a860d10 100644
--- a/lib/mesa/src/amd/compiler/aco_ssa_elimination.cpp
+++ b/lib/mesa/src/amd/compiler/aco_ssa_elimination.cpp
@@ -361,6 +361,10 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in
exec_val->isVOPC() ? get_vcmpx(exec_val->opcode) : aco_opcode::num_opcodes;
const bool vopc = v_cmpx_op != aco_opcode::num_opcodes;
+ /* V_CMPX+DPP returns 0 with reads from disabled lanes, unlike V_CMP+DPP (RDNA3 ISA doc, 7.7) */
+ if (vopc && exec_val->isDPP())
+ return;
+
/* If s_and_saveexec is used, we'll need to insert a new instruction to save the old exec. */
const bool save_original_exec = exec_copy->opcode == and_saveexec;
/* Position where the original exec mask copy should be inserted. */
@@ -427,11 +431,10 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in
if (vopc) {
/* Add one extra definition for exec and copy the VOP3-specific fields if present. */
if (ctx.program->gfx_level < GFX10) {
- if (exec_val->isSDWA() || exec_val->isDPP()) {
+ if (exec_val->isSDWA()) {
/* This might work but it needs testing and more code to copy the instruction. */
return;
- }
- else if (!exec_val->isVOP3()) {
+ } else if (!exec_val->isVOP3()) {
aco_ptr<Instruction> tmp = std::move(exec_val);
exec_val.reset(create_instruction<VOPC_instruction>(
tmp->opcode, tmp->format, tmp->operands.size(), tmp->definitions.size() + 1));
diff --git a/lib/mesa/src/amd/llvm/ac_nir_to_llvm.c b/lib/mesa/src/amd/llvm/ac_nir_to_llvm.c
index feda6aa0c..98c99d002 100644
--- a/lib/mesa/src/amd/llvm/ac_nir_to_llvm.c
+++ b/lib/mesa/src/amd/llvm/ac_nir_to_llvm.c
@@ -2163,7 +2163,7 @@ static LLVMValueRef get_global_address(struct ac_nir_context *ctx,
LLVMTypeRef i8_ptr_type = LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_GLOBAL);
addr = LLVMBuildIntToPtr(ctx->ac.builder, addr, i8_ptr_type, "");
addr = LLVMBuildGEP2(ctx->ac.builder, ctx->ac.i8, addr, &offset, 1, "");
- return addr;
+ return LLVMBuildPointerCast(ctx->ac.builder, addr, ptr_type, "");
} else {
return LLVMBuildIntToPtr(ctx->ac.builder, addr, ptr_type, "");
}
diff --git a/lib/mesa/src/amd/registers/gfx11.json b/lib/mesa/src/amd/registers/gfx11.json
index 38308f066..39de995af 100644
--- a/lib/mesa/src/amd/registers/gfx11.json
+++ b/lib/mesa/src/amd/registers/gfx11.json
@@ -10911,7 +10911,10 @@
{"bits": [21, 21], "name": "SKIP_LOW_COMP_RATIO"},
{"bits": [22, 22], "name": "FDCC_ENABLE"},
{"bits": [23, 23], "name": "DCC_COMPRESS_DISABLE"},
- {"bits": [24, 24], "name": "FRAGMENT_COMPRESS_DISABLE"}
+ {"bits": [24, 24], "name": "FRAGMENT_COMPRESS_DISABLE"},
+ {"bits": [25, 25], "name": "DISABLE_OVERRIDE_INCONSISTENT_KEYS"},
+ {"bits": [26, 26], "name": "ENABLE_MAX_COMP_FRAG_OVERRIDE"},
+ {"bits": [27, 29], "name": "MAX_COMP_FRAGS"}
]
},
"CB_COLOR0_INFO": {
@@ -14558,7 +14561,8 @@
"fields": [
{"bits": [1, 1], "name": "EN_REG_RT_INDEX"},
{"bits": [3, 3], "name": "EN_PRIM_PAYLOAD"},
- {"bits": [4, 4], "name": "EN_DRAW_VP"}
+ {"bits": [4, 4], "name": "EN_DRAW_VP"},
+ {"bits": [6, 6], "name": "EN_VRS_RATE"}
]
},
"VGT_ESGS_RING_ITEMSIZE": {
diff --git a/lib/mesa/src/amd/registers/parse_kernel_headers.py b/lib/mesa/src/amd/registers/parse_kernel_headers.py
index 67883f409..b79c0cc7d 100644
--- a/lib/mesa/src/amd/registers/parse_kernel_headers.py
+++ b/lib/mesa/src/amd/registers/parse_kernel_headers.py
@@ -683,6 +683,34 @@ fields_missing = {
"VGT_DRAW_PAYLOAD_CNTL": [["EN_VRS_RATE", 6, 6]],
"VGT_SHADER_STAGES_EN": [["PRIMGEN_PASSTHRU_NO_MSG", 26, 26]],
},
+ 'gfx11': {
+ "VGT_DRAW_PAYLOAD_CNTL": [["EN_VRS_RATE", 6, 6]],
+ # Only GFX1103_R2:
+ "CB_COLOR0_FDCC_CONTROL": [["DISABLE_OVERRIDE_INCONSISTENT_KEYS", 25, 25],
+ ["ENABLE_MAX_COMP_FRAG_OVERRIDE", 26, 26],
+ ["MAX_COMP_FRAGS", 27, 29]],
+ "CB_COLOR1_FDCC_CONTROL": [["DISABLE_OVERRIDE_INCONSISTENT_KEYS", 25, 25],
+ ["ENABLE_MAX_COMP_FRAG_OVERRIDE", 26, 26],
+ ["MAX_COMP_FRAGS", 27, 29]],
+ "CB_COLOR2_FDCC_CONTROL": [["DISABLE_OVERRIDE_INCONSISTENT_KEYS", 25, 25],
+ ["ENABLE_MAX_COMP_FRAG_OVERRIDE", 26, 26],
+ ["MAX_COMP_FRAGS", 27, 29]],
+ "CB_COLOR3_FDCC_CONTROL": [["DISABLE_OVERRIDE_INCONSISTENT_KEYS", 25, 25],
+ ["ENABLE_MAX_COMP_FRAG_OVERRIDE", 26, 26],
+ ["MAX_COMP_FRAGS", 27, 29]],
+ "CB_COLOR4_FDCC_CONTROL": [["DISABLE_OVERRIDE_INCONSISTENT_KEYS", 25, 25],
+ ["ENABLE_MAX_COMP_FRAG_OVERRIDE", 26, 26],
+ ["MAX_COMP_FRAGS", 27, 29]],
+ "CB_COLOR5_FDCC_CONTROL": [["DISABLE_OVERRIDE_INCONSISTENT_KEYS", 25, 25],
+ ["ENABLE_MAX_COMP_FRAG_OVERRIDE", 26, 26],
+ ["MAX_COMP_FRAGS", 27, 29]],
+ "CB_COLOR6_FDCC_CONTROL": [["DISABLE_OVERRIDE_INCONSISTENT_KEYS", 25, 25],
+ ["ENABLE_MAX_COMP_FRAG_OVERRIDE", 26, 26],
+ ["MAX_COMP_FRAGS", 27, 29]],
+ "CB_COLOR7_FDCC_CONTROL": [["DISABLE_OVERRIDE_INCONSISTENT_KEYS", 25, 25],
+ ["ENABLE_MAX_COMP_FRAG_OVERRIDE", 26, 26],
+ ["MAX_COMP_FRAGS", 27, 29]],
+ },
}
######### END HARDCODED CONFIGURATION
diff --git a/lib/mesa/src/amd/vulkan/layers/radv_rage2.c b/lib/mesa/src/amd/vulkan/layers/radv_rage2.c
new file mode 100644
index 000000000..52438db6c
--- /dev/null
+++ b/lib/mesa/src/amd/vulkan/layers/radv_rage2.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright © 2023 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "radv_private.h"
+#include "vk_framebuffer.h"
+#include "vk_common_entrypoints.h"
+
+VKAPI_ATTR void VKAPI_CALL
+rage2_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
+ const VkRenderPassBeginInfo* pRenderPassBegin,
+ VkSubpassContents contents)
+{
+ VK_FROM_HANDLE(vk_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
+
+ VkRenderPassBeginInfo render_pass_begin = {
+ .sType = pRenderPassBegin->sType,
+ .pNext = pRenderPassBegin->pNext,
+ .renderPass = pRenderPassBegin->renderPass,
+ .framebuffer = pRenderPassBegin->framebuffer,
+ .clearValueCount = pRenderPassBegin->clearValueCount,
+ .pClearValues = pRenderPassBegin->pClearValues,
+ };
+
+ /* RAGE2 seems to incorrectly set the render area and with dynamic rendering the concept of
+ * framebuffer dimensions goes away. Forcing the render area to be the framebuffer dimensions
+ * restores previous logic and it fixes rendering issues.
+ */
+ render_pass_begin.renderArea.offset.x = 0;
+ render_pass_begin.renderArea.offset.y = 0;
+ render_pass_begin.renderArea.extent.width = framebuffer->width;
+ render_pass_begin.renderArea.extent.height = framebuffer->height;
+
+ vk_common_CmdBeginRenderPass(commandBuffer, &render_pass_begin, contents);
+}
diff --git a/lib/mesa/src/amd/vulkan/meson.build b/lib/mesa/src/amd/vulkan/meson.build
index ad75d68b1..b0b8e2ca9 100644
--- a/lib/mesa/src/amd/vulkan/meson.build
+++ b/lib/mesa/src/amd/vulkan/meson.build
@@ -26,7 +26,7 @@ radv_entrypoints = custom_target(
prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak',
'--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'radv',
'--device-prefix', 'sqtt', '--device-prefix', 'metro_exodus',
- '--device-prefix', 'rra',
+ '--device-prefix', 'rra', '--device-prefix', 'rage2',
],
depend_files : vk_entrypoints_gen_depend_files,
)
@@ -34,6 +34,7 @@ radv_entrypoints = custom_target(
libradv_files = files(
'bvh/bvh.h',
'layers/radv_metro_exodus.c',
+ 'layers/radv_rage2.c',
'layers/radv_rra_layer.c',
'layers/radv_sqtt_layer.c',
'winsys/null/radv_null_bo.c',
@@ -155,8 +156,13 @@ libvulkan_radeon_ld_args = []
libvulkan_radeon_link_depends = []
if with_ld_version_script
- libvulkan_radeon_ld_args += ['-Wl,--version-script', join_paths(meson.current_source_dir(), 'vulkan.sym')]
- libvulkan_radeon_link_depends += files('vulkan.sym')
+ if with_platform_android
+ libvulkan_radeon_ld_args += ['-Wl,--version-script', join_paths(meson.current_source_dir(), 'vulkan-android.sym')]
+ libvulkan_radeon_link_depends += files('vulkan-android.sym')
+ else
+ libvulkan_radeon_ld_args += ['-Wl,--version-script', join_paths(meson.current_source_dir(), 'vulkan.sym')]
+ libvulkan_radeon_link_depends += files('vulkan.sym')
+ endif
endif
radv_build_id = get_option('radv-build-id')
diff --git a/lib/mesa/src/amd/vulkan/radv_sdma_copy_image.c b/lib/mesa/src/amd/vulkan/radv_sdma_copy_image.c
index 93044c6e6..1bee2b0d0 100644
--- a/lib/mesa/src/amd/vulkan/radv_sdma_copy_image.c
+++ b/lib/mesa/src/amd/vulkan/radv_sdma_copy_image.c
@@ -99,7 +99,7 @@ radv_sdma_v4_v5_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct
radeon_emit(cmd_buffer->cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
CIK_SDMA_COPY_SUB_OPCODE_LINEAR, (tmz ? 4 : 0)));
- radeon_emit(cmd_buffer->cs, bytes);
+ radeon_emit(cmd_buffer->cs, bytes - 1);
radeon_emit(cmd_buffer->cs, 0);
radeon_emit(cmd_buffer->cs, src_address);
radeon_emit(cmd_buffer->cs, src_address >> 32);
@@ -139,8 +139,9 @@ radv_sdma_v4_v5_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct
(tmz ? 4 : 0)) |
dcc << 19 | (is_v5 ? 0 : 0 /* tiled->buffer.b.b.last_level */) << 20 |
1u << 31);
- radeon_emit(cmd_buffer->cs,
- (uint32_t)tiled_address | (image->planes[0].surface.tile_swizzle << 8));
+ radeon_emit(
+ cmd_buffer->cs,
+ (uint32_t)tiled_address | (image->planes[0].surface.tile_swizzle << 8));
radeon_emit(cmd_buffer->cs, (uint32_t)(tiled_address >> 32));
radeon_emit(cmd_buffer->cs, 0);
radeon_emit(cmd_buffer->cs, ((tiled_width - 1) << 16));
diff --git a/lib/mesa/src/amd/vulkan/vulkan-android.sym b/lib/mesa/src/amd/vulkan/vulkan-android.sym
new file mode 100644
index 000000000..2ca40faa0
--- /dev/null
+++ b/lib/mesa/src/amd/vulkan/vulkan-android.sym
@@ -0,0 +1,16 @@
+{
+ global:
+ vk_icdGetInstanceProcAddr;
+ vk_icdGetPhysicalDeviceProcAddr;
+ vk_icdNegotiateLoaderICDInterfaceVersion;
+
+ # Andoid looks for this global in HAL modules. In the source it occurs
+ # as HAL_MODULE_INFO_SYM (which is just a #define for HMI) and it's an
+ # instance of struct hwvulkan_module_t.
+ HMI;
+
+ local:
+ # When static linking LLVM, all its symbols are public API.
+ # That may cause symbol collision, so explicitly demote everything.
+ *;
+};
diff --git a/lib/mesa/src/broadcom/common/v3d_limits.h b/lib/mesa/src/broadcom/common/v3d_limits.h
index 755aedd78..492740c44 100644
--- a/lib/mesa/src/broadcom/common/v3d_limits.h
+++ b/lib/mesa/src/broadcom/common/v3d_limits.h
@@ -24,6 +24,8 @@
#ifndef V3D_LIMITS_H
#define V3D_LIMITS_H
+#define V3D_CL_MAX_INSTR_SIZE 25
+
/* Number of channels a QPU thread executes in parallel. Also known as
* gl_SubGroupSizeARB.
*/
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_cl.c b/lib/mesa/src/broadcom/vulkan/v3dv_cl.c
index 55cf122cc..d1181bac4 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_cl.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_cl.c
@@ -114,14 +114,18 @@ v3dv_cl_ensure_space_with_branch(struct v3dv_cl *cl, uint32_t space)
* end with a 'return from sub list' command.
*/
bool needs_return_from_sub_list = false;
- if (cl->job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY) {
- if (cl->size > 0) {
+ if (cl->job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY && cl->size > 0)
needs_return_from_sub_list = true;
- space += cl_packet_length(RETURN_FROM_SUB_LIST);
- }
- } else {
- space += cl_packet_length(BRANCH);
- }
+
+ /*
+ * The CLE processor in the simulator tries to read V3D_CL_MAX_INSTR_SIZE
+ * bytes form the CL for each new instruction. If the last instruction in our
+ * CL is smaller than that, and there are not at least V3D_CL_MAX_INSTR_SIZE
+ * bytes until the end of the BO, it will read out of bounds and possibly
+ * cause a GMP violation interrupt to trigger. Ensure we always have at
+ * least that many bytes available to read with the last instruction.
+ */
+ space += V3D_CL_MAX_INSTR_SIZE;
if (v3dv_cl_offset(cl) + space <= cl->size)
return;
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c b/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c
index c92794bd9..dba43223d 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_cmd_buffer.c
@@ -2789,8 +2789,9 @@ cmd_buffer_binning_sync_required(struct v3dv_cmd_buffer *cmd_buffer,
return false;
}
-static void
-consume_bcl_sync(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
+void
+v3dv_cmd_buffer_consume_bcl_sync(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_job *job)
{
job->needs_bcl_sync = true;
cmd_buffer->state.barrier.bcl_buffer_access = 0;
@@ -2890,7 +2891,7 @@ v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer,
assert(!job->needs_bcl_sync);
if (cmd_buffer_binning_sync_required(cmd_buffer, pipeline,
indexed, indirect)) {
- consume_bcl_sync(cmd_buffer, job);
+ v3dv_cmd_buffer_consume_bcl_sync(cmd_buffer, job);
}
}
diff --git a/lib/mesa/src/broadcom/vulkan/v3dv_private.h b/lib/mesa/src/broadcom/vulkan/v3dv_private.h
index 27b0646d3..bbdb1ef57 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dv_private.h
+++ b/lib/mesa/src/broadcom/vulkan/v3dv_private.h
@@ -1660,6 +1660,9 @@ void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
void v3dv_cmd_buffer_merge_barrier_state(struct v3dv_barrier_state *dst,
struct v3dv_barrier_state *src);
+void v3dv_cmd_buffer_consume_bcl_sync(struct v3dv_cmd_buffer *cmd_buffer,
+ struct v3dv_job *job);
+
bool v3dv_cmd_buffer_check_needs_load(const struct v3dv_cmd_buffer_state *state,
VkImageAspectFlags aspect,
uint32_t first_subpass_idx,
diff --git a/lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c b/lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c
index 8f78dfb09..5219f4801 100644
--- a/lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c
+++ b/lib/mesa/src/broadcom/vulkan/v3dvx_cmd_buffer.c
@@ -1623,6 +1623,20 @@ v3dX(cmd_buffer_execute_inside_pass)(struct v3dv_cmd_buffer *primary,
{
assert(primary->state.job);
+ /* Typically we postpone applying binning syncs until we see a draw call
+ * that may actually access proteted resources in the binning stage. However,
+ * if the draw calls are recorded in a secondary command buffer and the
+ * barriers were recorded in a primary command buffer, that won't work
+ * and we will have to check if we need a binning sync when executing the
+ * secondary.
+ */
+ struct v3dv_job *primary_job = primary->state.job;
+ if (primary_job->serialize &&
+ (primary->state.barrier.bcl_buffer_access ||
+ primary->state.barrier.bcl_image_access)) {
+ v3dv_cmd_buffer_consume_bcl_sync(primary, primary_job);
+ }
+
/* Emit occlusion query state if needed so the draw calls inside our
* secondaries update the counters.
*/
@@ -1668,7 +1682,7 @@ v3dX(cmd_buffer_execute_inside_pass)(struct v3dv_cmd_buffer *primary,
* the RETURN_FROM_SUB_LIST into the primary job to skip the
* branch?
*/
- struct v3dv_job *primary_job = primary->state.job;
+ primary_job = primary->state.job;
if (!primary_job || secondary_job->serialize ||
pending_barrier.dst_mask) {
const bool needs_bcl_barrier =
diff --git a/lib/mesa/src/compiler/nir/nir_deref.c b/lib/mesa/src/compiler/nir/nir_deref.c
index e03be3ac6..c883d2229 100644
--- a/lib/mesa/src/compiler/nir/nir_deref.c
+++ b/lib/mesa/src/compiler/nir/nir_deref.c
@@ -1134,7 +1134,12 @@ opt_replace_struct_wrapper_cast(nir_builder *b, nir_deref_instr *cast)
if (glsl_get_struct_field_offset(parent->type, 0) != 0)
return false;
- if (cast->type != glsl_get_struct_field(parent->type, 0))
+ const struct glsl_type *field_type = glsl_get_struct_field(parent->type, 0);
+ if (cast->type != field_type)
+ return false;
+
+ /* we can't drop the stride information */
+ if (cast->cast.ptr_stride != glsl_get_explicit_stride(field_type))
return false;
nir_deref_instr *replace = nir_build_deref_struct(b, parent, 0);
diff --git a/lib/mesa/src/compiler/nir/nir_range_analysis.c b/lib/mesa/src/compiler/nir/nir_range_analysis.c
index 56fd3f092..06dd3eea5 100644
--- a/lib/mesa/src/compiler/nir/nir_range_analysis.c
+++ b/lib/mesa/src/compiler/nir/nir_range_analysis.c
@@ -1315,10 +1315,11 @@ static const nir_unsigned_upper_bound_config default_ub_config = {
},
};
-uint32_t
-nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
- nir_ssa_scalar scalar,
- const nir_unsigned_upper_bound_config *config)
+static uint32_t
+nir_unsigned_upper_bound_impl(nir_shader *shader, struct hash_table *range_ht,
+ nir_ssa_scalar scalar,
+ const nir_unsigned_upper_bound_config *config,
+ unsigned stack_depth)
{
assert(scalar.def->bit_size <= 32);
@@ -1335,6 +1336,11 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
uint32_t max = bitmask(scalar.def->bit_size);
+ /* Avoid stack overflows. 200 is just a random setting, that happened to work with wine stacks
+ * which tend to be smaller than normal Linux ones. */
+ if (stack_depth >= 200)
+ return max;
+
if (scalar.def->parent_instr->type == nir_instr_type_intrinsic) {
uint32_t res = max;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(scalar.def->parent_instr);
@@ -1389,7 +1395,8 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
break;
case nir_intrinsic_mbcnt_amd: {
uint32_t src0 = config->max_subgroup_size - 1;
- uint32_t src1 = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[1].ssa, 0), config);
+ uint32_t src1 = nir_unsigned_upper_bound_impl(shader, range_ht, nir_get_ssa_scalar(intrin->src[1].ssa, 0),
+ config, stack_depth + 1);
if (src0 + src1 < src0)
res = max; /* overflow */
@@ -1430,7 +1437,8 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
case nir_intrinsic_exclusive_scan: {
nir_op op = nir_intrinsic_reduction_op(intrin);
if (op == nir_op_umin || op == nir_op_umax || op == nir_op_imin || op == nir_op_imax)
- res = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[0].ssa, 0), config);
+ res = nir_unsigned_upper_bound_impl(shader, range_ht, nir_get_ssa_scalar(intrin->src[0].ssa, 0),
+ config, stack_depth + 1);
break;
}
case nir_intrinsic_read_first_invocation:
@@ -1445,11 +1453,14 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
case nir_intrinsic_quad_swap_diagonal:
case nir_intrinsic_quad_swizzle_amd:
case nir_intrinsic_masked_swizzle_amd:
- res = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[0].ssa, 0), config);
+ res = nir_unsigned_upper_bound_impl(shader, range_ht, nir_get_ssa_scalar(intrin->src[0].ssa, 0),
+ config, stack_depth + 1);
break;
case nir_intrinsic_write_invocation_amd: {
- uint32_t src0 = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[0].ssa, 0), config);
- uint32_t src1 = nir_unsigned_upper_bound(shader, range_ht, nir_get_ssa_scalar(intrin->src[1].ssa, 0), config);
+ uint32_t src0 = nir_unsigned_upper_bound_impl(shader, range_ht, nir_get_ssa_scalar(intrin->src[0].ssa, 0),
+ config, stack_depth + 1);
+ uint32_t src1 = nir_unsigned_upper_bound_impl(shader, range_ht, nir_get_ssa_scalar(intrin->src[1].ssa, 0),
+ config, stack_depth + 1);
res = MAX2(src0, src1);
break;
}
@@ -1486,11 +1497,11 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
_mesa_set_destroy(visited, NULL);
for (unsigned i = 0; i < def_count; i++)
- res = MAX2(res, nir_unsigned_upper_bound(shader, range_ht, defs[i], config));
+ res = MAX2(res, nir_unsigned_upper_bound_impl(shader, range_ht, defs[i], config, stack_depth + 1));
} else {
nir_foreach_phi_src(src, nir_instr_as_phi(scalar.def->parent_instr)) {
- res = MAX2(res, nir_unsigned_upper_bound(
- shader, range_ht, nir_get_ssa_scalar(src->src.ssa, 0), config));
+ res = MAX2(res, nir_unsigned_upper_bound_impl(
+ shader, range_ht, nir_get_ssa_scalar(src->src.ssa, 0), config, stack_depth + 1));
}
}
@@ -1541,12 +1552,15 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
return max;
}
- uint32_t src0 = nir_unsigned_upper_bound(shader, range_ht, nir_ssa_scalar_chase_alu_src(scalar, 0), config);
+ uint32_t src0 = nir_unsigned_upper_bound_impl(shader, range_ht, nir_ssa_scalar_chase_alu_src(scalar, 0),
+ config, stack_depth + 1);
uint32_t src1 = max, src2 = max;
if (nir_op_infos[op].num_inputs > 1)
- src1 = nir_unsigned_upper_bound(shader, range_ht, nir_ssa_scalar_chase_alu_src(scalar, 1), config);
+ src1 = nir_unsigned_upper_bound_impl(shader, range_ht, nir_ssa_scalar_chase_alu_src(scalar, 1),
+ config, stack_depth + 1);
if (nir_op_infos[op].num_inputs > 2)
- src2 = nir_unsigned_upper_bound(shader, range_ht, nir_ssa_scalar_chase_alu_src(scalar, 2), config);
+ src2 = nir_unsigned_upper_bound_impl(shader, range_ht, nir_ssa_scalar_chase_alu_src(scalar, 2),
+ config, stack_depth + 1);
uint32_t res = max;
switch (op) {
@@ -1683,6 +1697,14 @@ nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
return max;
}
+uint32_t
+nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
+ nir_ssa_scalar scalar,
+ const nir_unsigned_upper_bound_config *config)
+{
+ return nir_unsigned_upper_bound_impl(shader, range_ht, scalar, config, 0);
+}
+
bool
nir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht,
nir_ssa_scalar ssa, unsigned const_val,
diff --git a/lib/mesa/src/etnaviv/drm/etnaviv_bo.c b/lib/mesa/src/etnaviv/drm/etnaviv_bo.c
index d790823bc..aae81a015 100644
--- a/lib/mesa/src/etnaviv/drm/etnaviv_bo.c
+++ b/lib/mesa/src/etnaviv/drm/etnaviv_bo.c
@@ -138,8 +138,8 @@ static struct etna_bo *lookup_bo(void *tbl, uint32_t handle)
/* found, incr refcnt and return: */
bo = etna_bo_ref(entry->data);
- /* don't break the bucket if this bo was found in one */
- if (list_is_linked(&bo->list)) {
+ /* don't break the bucket/zombie list if this bo was found in one */
+ if (!list_is_empty(&bo->list)) {
VG_BO_OBTAIN(bo);
etna_device_ref(bo->dev);
list_delinit(&bo->list);
diff --git a/lib/mesa/src/etnaviv/drm/etnaviv_cmd_stream.c b/lib/mesa/src/etnaviv/drm/etnaviv_cmd_stream.c
index 3630d155d..657ef0c05 100644
--- a/lib/mesa/src/etnaviv/drm/etnaviv_cmd_stream.c
+++ b/lib/mesa/src/etnaviv/drm/etnaviv_cmd_stream.c
@@ -135,6 +135,8 @@ void etna_cmd_stream_del(struct etna_cmd_stream *stream)
_mesa_hash_table_destroy(priv->bo_table, NULL);
free(stream->buffer);
+ free(priv->bos);
+ free(priv->submit.bos);
free(priv->submit.relocs);
free(priv->submit.pmrs);
free(priv);
diff --git a/lib/mesa/src/freedreno/vulkan/tu_cmd_buffer.c b/lib/mesa/src/freedreno/vulkan/tu_cmd_buffer.c
index 6f304634c..0acb45d71 100644
--- a/lib/mesa/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/lib/mesa/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -5387,6 +5387,11 @@ tu_CmdEndRendering(VkCommandBuffer commandBuffer)
if (cmd_buffer->state.suspend_resume == SR_IN_PRE_CHAIN) {
cmd_buffer->trace_renderpass_end = u_trace_end_iterator(&cmd_buffer->trace);
tu_save_pre_chain(cmd_buffer);
+
+ /* Even we don't call tu_cmd_render here, renderpass is finished
+ * and draw states should be disabled.
+ */
+ tu_disable_draw_states(cmd_buffer, &cmd_buffer->cs);
} else {
tu_cmd_render(cmd_buffer);
}
diff --git a/lib/mesa/src/freedreno/vulkan/tu_pipeline.c b/lib/mesa/src/freedreno/vulkan/tu_pipeline.c
index 0841d56f1..d4d3c9735 100644
--- a/lib/mesa/src/freedreno/vulkan/tu_pipeline.c
+++ b/lib/mesa/src/freedreno/vulkan/tu_pipeline.c
@@ -3158,7 +3158,8 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
for (unsigned j = 0; j < ARRAY_SIZE(library->shaders); j++) {
if (library->shaders[j].nir) {
assert(!nir[j]);
- nir[j] = nir_shader_clone(NULL, library->shaders[j].nir);
+ nir[j] = nir_shader_clone(builder->mem_ctx,
+ library->shaders[j].nir);
keys[j] = library->shaders[j].key;
must_compile = true;
}
diff --git a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c
index 78a3c7c87..aa18851d7 100644
--- a/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c
+++ b/lib/mesa/src/gallium/auxiliary/gallivm/lp_bld_nir.c
@@ -2152,9 +2152,12 @@ visit_intrinsic(struct lp_build_nir_context *bld_base,
visit_shared_atomic(bld_base, instr, result);
break;
case nir_intrinsic_control_barrier:
- case nir_intrinsic_scoped_barrier:
visit_barrier(bld_base);
break;
+ case nir_intrinsic_scoped_barrier:
+ if (nir_intrinsic_execution_scope(instr) != NIR_SCOPE_NONE)
+ visit_barrier(bld_base);
+ break;
case nir_intrinsic_group_memory_barrier:
case nir_intrinsic_memory_barrier:
case nir_intrinsic_memory_barrier_shared:
diff --git a/lib/mesa/src/gallium/drivers/crocus/crocus_context.c b/lib/mesa/src/gallium/drivers/crocus/crocus_context.c
index 903be3585..77ddb2e52 100644
--- a/lib/mesa/src/gallium/drivers/crocus/crocus_context.c
+++ b/lib/mesa/src/gallium/drivers/crocus/crocus_context.c
@@ -61,7 +61,7 @@ crocus_init_identifier_bo(struct crocus_context *ice)
ice->workaround_bo->kflags |= EXEC_OBJECT_CAPTURE;
ice->workaround_offset = ALIGN(
- intel_debug_write_identifiers(bo_map, 4096, "Crocus") + 8, 8);
+ intel_debug_write_identifiers(bo_map, 4096, "Crocus"), 32);
crocus_bo_unmap(ice->workaround_bo);
diff --git a/lib/mesa/src/gallium/drivers/crocus/crocus_resource.c b/lib/mesa/src/gallium/drivers/crocus/crocus_resource.c
index 7b4d50a66..789a04db1 100644
--- a/lib/mesa/src/gallium/drivers/crocus/crocus_resource.c
+++ b/lib/mesa/src/gallium/drivers/crocus/crocus_resource.c
@@ -189,11 +189,8 @@ crocus_resource_configure_main(const struct crocus_screen *screen,
tiling_flags = 1 << res->mod_info->tiling;
} else {
- if (templ->bind & PIPE_BIND_RENDER_TARGET && devinfo->ver < 6) {
- modifier = I915_FORMAT_MOD_X_TILED;
- res->mod_info = isl_drm_modifier_get_info(modifier);
- tiling_flags = 1 << res->mod_info->tiling;
- }
+ if (templ->bind & PIPE_BIND_RENDER_TARGET && devinfo->ver < 6)
+ tiling_flags &= ISL_TILING_LINEAR_BIT | ISL_TILING_X_BIT;
/* Use linear for staging buffers */
if (templ->usage == PIPE_USAGE_STAGING ||
templ->bind & (PIPE_BIND_LINEAR | PIPE_BIND_CURSOR) )
diff --git a/lib/mesa/src/gallium/drivers/d3d12/d3d12_video_dec_vp9.cpp b/lib/mesa/src/gallium/drivers/d3d12/d3d12_video_dec_vp9.cpp
index 26638d429..0e7c5b45c 100644
--- a/lib/mesa/src/gallium/drivers/d3d12/d3d12_video_dec_vp9.cpp
+++ b/lib/mesa/src/gallium/drivers/d3d12/d3d12_video_dec_vp9.cpp
@@ -310,7 +310,7 @@ d3d12_video_decoder_dxva_picparams_from_pipe_picparams_vp9(
dxvaStructure.ref_frame_coded_width[i] = pipe_vp9->ref[i]->width;
dxvaStructure.ref_frame_coded_height[i] = pipe_vp9->ref[i]->height;
} else
- dxvaStructure.ref_frame_map[i].bPicEntry = DXVA_VP9_INVALID_PICTURE_INDEX;
+ dxvaStructure.ref_frame_map[i].bPicEntry = DXVA_VP9_INVALID_PICTURE_ENTRY;
}
/* DXVA spec The enums and indices for ref_frame_sign_bias[] are defined */
@@ -319,7 +319,7 @@ d3d12_video_decoder_dxva_picparams_from_pipe_picparams_vp9(
const uint8_t signbias_alt_index = 3;
/* AssociatedFlag When Index7Bits does not contain an index to a valid uncompressed surface, the value shall be set to 127, to indicate that the index is invalid. */
- memset(&dxvaStructure.frame_refs[0], DXVA_VP9_INVALID_PICTURE_INDEX, sizeof(dxvaStructure.frame_refs));
+ memset(&dxvaStructure.frame_refs[0], DXVA_VP9_INVALID_PICTURE_ENTRY, sizeof(dxvaStructure.frame_refs));
if (pipe_vp9->ref[pipe_vp9->picture_parameter.pic_fields.last_ref_frame]) {
/* AssociatedFlag When Index7Bits does not contain an index to a valid uncompressed surface, the value shall be set to 127, to indicate that the index is invalid. */
@@ -348,10 +348,16 @@ d3d12_video_decoder_dxva_picparams_from_pipe_picparams_vp9(
dxvaStructure.filter_level = pipe_vp9->picture_parameter.filter_level;
dxvaStructure.sharpness_level = pipe_vp9->picture_parameter.sharpness_level;
- bool use_last_frame_mvs = !pipe_vp9->picture_parameter.pic_fields.error_resilient_mode && pipe_vp9->picture_parameter.pic_fields.show_frame;
+ bool use_prev_in_find_mv_refs =
+ !pipe_vp9->picture_parameter.pic_fields.error_resilient_mode &&
+ !(pipe_vp9->picture_parameter.pic_fields.frame_type == 0 /*KEY_FRAME*/ || pipe_vp9->picture_parameter.pic_fields.intra_only) &&
+ pipe_vp9->picture_parameter.pic_fields.prev_show_frame &&
+ pipe_vp9->picture_parameter.frame_width == pipe_vp9->picture_parameter.prev_frame_width &&
+ pipe_vp9->picture_parameter.frame_height == pipe_vp9->picture_parameter.prev_frame_height;
+
dxvaStructure.wControlInfoFlags = (pipe_vp9->picture_parameter.mode_ref_delta_enabled << 0) |
(pipe_vp9->picture_parameter.mode_ref_delta_update << 1) |
- (use_last_frame_mvs << 2) |
+ (use_prev_in_find_mv_refs << 2) |
(0 << 3);
for (uint32_t i = 0; i < 4; i++)
diff --git a/lib/mesa/src/gallium/drivers/d3d12/d3d12_video_dec_vp9.h b/lib/mesa/src/gallium/drivers/d3d12/d3d12_video_dec_vp9.h
index 322daf7bd..24aa032ef 100644
--- a/lib/mesa/src/gallium/drivers/d3d12/d3d12_video_dec_vp9.h
+++ b/lib/mesa/src/gallium/drivers/d3d12/d3d12_video_dec_vp9.h
@@ -27,7 +27,8 @@
#include "d3d12_video_types.h"
-constexpr uint16_t DXVA_VP9_INVALID_PICTURE_INDEX = 0xFF;
+constexpr uint16_t DXVA_VP9_INVALID_PICTURE_INDEX = 0x7F;
+constexpr uint16_t DXVA_VP9_INVALID_PICTURE_ENTRY = 0xFF;
#pragma pack(push, BeforeDXVApacking, 1)
diff --git a/lib/mesa/src/gallium/drivers/iris/iris_batch.c b/lib/mesa/src/gallium/drivers/iris/iris_batch.c
index ec32d88cc..d598fd701 100644
--- a/lib/mesa/src/gallium/drivers/iris/iris_batch.c
+++ b/lib/mesa/src/gallium/drivers/iris/iris_batch.c
@@ -1053,10 +1053,10 @@ _iris_batch_flush(struct iris_batch *batch, const char *file, int line)
}
- uint64_t start_ts = intel_ds_begin_submit(batch->ds);
- uint64_t submission_id = batch->ds->submission_id;
+ uint64_t start_ts = intel_ds_begin_submit(&batch->ds);
+ uint64_t submission_id = batch->ds.submission_id;
int ret = submit_batch(batch);
- intel_ds_end_submit(batch->ds, start_ts);
+ intel_ds_end_submit(&batch->ds, start_ts);
/* When batch submission fails, our end-of-batch syncobj remains
* unsignalled, and in fact is not even considered submitted.
diff --git a/lib/mesa/src/gallium/drivers/iris/iris_batch.h b/lib/mesa/src/gallium/drivers/iris/iris_batch.h
index a1dfa6e63..437352a5a 100644
--- a/lib/mesa/src/gallium/drivers/iris/iris_batch.h
+++ b/lib/mesa/src/gallium/drivers/iris/iris_batch.h
@@ -197,7 +197,7 @@ struct iris_batch {
struct u_trace trace;
/** Batch wrapper structure for perfetto */
- struct intel_ds_queue *ds;
+ struct intel_ds_queue ds;
};
void iris_init_batches(struct iris_context *ice, int priority);
diff --git a/lib/mesa/src/gallium/drivers/iris/iris_state.c b/lib/mesa/src/gallium/drivers/iris/iris_state.c
index b281393dc..30a0ba17e 100644
--- a/lib/mesa/src/gallium/drivers/iris/iris_state.c
+++ b/lib/mesa/src/gallium/drivers/iris/iris_state.c
@@ -2799,6 +2799,21 @@ iris_create_surface(struct pipe_context *ctx,
&res->surf, view,
&isl_surf, view, &offset_B,
&tile_x_el, &tile_y_el);
+
+ /* On Broadwell, HALIGN and VALIGN are specified in pixels and are
+ * hard-coded to align to exactly the block size of the compressed
+ * texture. This means that, when reinterpreted as a non-compressed
+ * texture, the tile offsets may be anything.
+ *
+ * We need them to be multiples of 4 to be usable in RENDER_SURFACE_STATE,
+ * so force the state tracker to take fallback paths if they're not.
+ */
+#if GFX_VER == 8
+ if (tile_x_el % 4 != 0 || tile_y_el % 4 != 0) {
+ ok = false;
+ }
+#endif
+
if (!ok) {
free(surf);
return NULL;
diff --git a/lib/mesa/src/gallium/drivers/iris/iris_utrace.c b/lib/mesa/src/gallium/drivers/iris/iris_utrace.c
index 7f49826d7..e66a56092 100644
--- a/lib/mesa/src/gallium/drivers/iris/iris_utrace.c
+++ b/lib/mesa/src/gallium/drivers/iris/iris_utrace.c
@@ -95,7 +95,7 @@ iris_utrace_delete_flush_data(struct u_trace_context *utctx,
void iris_utrace_flush(struct iris_batch *batch, uint64_t submission_id)
{
struct intel_ds_flush_data *flush_data = malloc(sizeof(*flush_data));
- intel_ds_flush_data_init(flush_data, batch->ds, submission_id);
+ intel_ds_flush_data_init(flush_data, &batch->ds, submission_id);
u_trace_flush(&batch->trace, flush_data, false);
}
@@ -122,9 +122,8 @@ void iris_utrace_init(struct iris_context *ice)
iris_utrace_delete_flush_data);
for (int i = 0; i < IRIS_BATCH_COUNT; i++) {
- ice->batches[i].ds =
- intel_ds_device_add_queue(&ice->ds, "%s",
- iris_batch_name_to_string(i));
+ intel_ds_device_init_queue(&ice->ds, &ice->batches[i].ds, "%s",
+ iris_batch_name_to_string(i));
}
}
diff --git a/lib/mesa/src/gallium/drivers/lima/lima_resource.c b/lib/mesa/src/gallium/drivers/lima/lima_resource.c
index 260212178..ad55fa5c8 100644
--- a/lib/mesa/src/gallium/drivers/lima/lima_resource.c
+++ b/lib/mesa/src/gallium/drivers/lima/lima_resource.c
@@ -59,7 +59,10 @@ lima_resource_create_scanout(struct pipe_screen *pscreen,
struct lima_screen *screen = lima_screen(pscreen);
struct renderonly_scanout *scanout;
struct winsys_handle handle;
- struct pipe_resource *pres;
+
+ struct lima_resource *res = CALLOC_STRUCT(lima_resource);
+ if (!res)
+ return NULL;
struct pipe_resource scanout_templat = *templat;
scanout_templat.width0 = width;
@@ -71,20 +74,31 @@ lima_resource_create_scanout(struct pipe_screen *pscreen,
if (!scanout)
return NULL;
+ res->base = *templat;
+ res->base.screen = pscreen;
+ pipe_reference_init(&res->base.reference, 1);
+ res->levels[0].offset = handle.offset;
+ res->levels[0].stride = handle.stride;
+
assert(handle.type == WINSYS_HANDLE_TYPE_FD);
- pres = pscreen->resource_from_handle(pscreen, templat, &handle,
- PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE);
+ res->bo = lima_bo_import(screen, &handle);
+ if (!res->bo) {
+ FREE(res);
+ return NULL;
+ }
+
+ res->modifier_constant = true;
close(handle.handle);
- if (!pres) {
+ if (!res->bo) {
renderonly_scanout_destroy(scanout, screen->ro);
+ FREE(res);
return NULL;
}
- struct lima_resource *res = lima_resource(pres);
res->scanout = scanout;
- return pres;
+ return &res->base;
}
static uint32_t
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/driinfo_radeonsi.h b/lib/mesa/src/gallium/drivers/radeonsi/driinfo_radeonsi.h
index e8a2b4674..7cfe86175 100644
--- a/lib/mesa/src/gallium/drivers/radeonsi/driinfo_radeonsi.h
+++ b/lib/mesa/src/gallium/drivers/radeonsi/driinfo_radeonsi.h
@@ -1,7 +1,6 @@
// DriConf options specific to radeonsi
DRI_CONF_SECTION_PERFORMANCE
DRI_CONF_ADAPTIVE_SYNC(true)
-DRI_CONF_MESA_GLTHREAD(true)
DRI_CONF_SECTION_END
DRI_CONF_SECTION_DEBUG
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/radeon_vcn_dec.c b/lib/mesa/src/gallium/drivers/radeonsi/radeon_vcn_dec.c
index 414a8d699..1c8038325 100644
--- a/lib/mesa/src/gallium/drivers/radeonsi/radeon_vcn_dec.c
+++ b/lib/mesa/src/gallium/drivers/radeonsi/radeon_vcn_dec.c
@@ -278,6 +278,20 @@ static rvcn_dec_message_avc_t get_h264_msg(struct radeon_decoder *dec,
}
}
+ /* if reference picture exists, however no reference picture found at the end
+ curr_pic_ref_frame_num == 0, which is not reasonable, should be corrected. */
+ if (result.used_for_reference_flags && (result.curr_pic_ref_frame_num == 0)) {
+ for (i = 0; i < ARRAY_SIZE(result.ref_frame_list); i++) {
+ result.ref_frame_list[i] = pic->ref[i] ?
+ (uintptr_t)vl_video_buffer_get_associated_data(pic->ref[i], &dec->base) : 0xff;
+ if (result.ref_frame_list[i] != 0xff) {
+ result.curr_pic_ref_frame_num++;
+ result.non_existing_frame_flags &= ~(1 << i);
+ break;
+ }
+ }
+ }
+
for (i = 0; i < ARRAY_SIZE(result.ref_frame_list); i++) {
if (result.ref_frame_list[i] != 0xff) {
dec->h264_valid_ref_num[i] = result.frame_num_list[i];
@@ -3160,7 +3174,8 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
case CHIP_GFX1100:
case CHIP_GFX1101:
case CHIP_GFX1102:
- case CHIP_GFX1103:
+ case CHIP_GFX1103_R1:
+ case CHIP_GFX1103_R2:
dec->jpg.direct_reg = true;
dec->addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX11;
dec->av1_version = RDECODE_AV1_VER_1;
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_sdma_copy_image.c b/lib/mesa/src/gallium/drivers/radeonsi/si_sdma_copy_image.c
index b1b408b8f..c5e8b9eed 100644
--- a/lib/mesa/src/gallium/drivers/radeonsi/si_sdma_copy_image.c
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_sdma_copy_image.c
@@ -139,7 +139,7 @@ bool si_sdma_v4_v5_copy_texture(struct si_context *sctx, struct si_texture *sdst
radeon_emit(CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
(tmz ? 4 : 0)));
- radeon_emit(bytes);
+ radeon_emit(bytes - 1);
radeon_emit(0);
radeon_emit(src_address);
radeon_emit(src_address >> 32);
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c b/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c
index 218a3c2a3..9e717b926 100644
--- a/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c
@@ -570,7 +570,8 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
* overriden by other states. (e.g. per-sample interpolation)
* Interpolated colors are stored after the preloaded VGPRs.
*/
-void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part_key *key)
+void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part_key *key,
+ bool separate_prolog)
{
LLVMValueRef ret, func;
int num_returns, i, num_color_channels;
@@ -694,13 +695,13 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part
/* Read LINEAR_SAMPLE. */
for (i = 0; i < 2; i++)
- linear_sample[i] = LLVMGetParam(func, base + 6 + i);
+ linear_sample[i] = LLVMGetParam(func, base + (separate_prolog ? 6 : 9) + i);
/* Overwrite LINEAR_CENTER. */
for (i = 0; i < 2; i++)
- ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_sample[i], base + 8 + i, "");
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_sample[i], base + (separate_prolog ? 8 : 11) + i, "");
/* Overwrite LINEAR_CENTROID. */
for (i = 0; i < 2; i++)
- ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_sample[i], base + 10 + i, "");
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_sample[i], base + (separate_prolog ? 10 : 13) + i, "");
}
/* Force center interpolation. */
@@ -825,7 +826,8 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part
* Build the pixel shader epilog function. This handles everything that must be
* emulated for pixel shader exports. (alpha-test, format conversions, etc)
*/
-void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part_key *key)
+void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part_key *key,
+ UNUSED bool separate_epilog)
{
int i;
struct si_ps_exports exp = {};
@@ -947,7 +949,7 @@ void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader
si_get_ps_prolog_key(shader, &prolog_key, false);
if (si_need_ps_prolog(&prolog_key)) {
- si_llvm_build_ps_prolog(ctx, &prolog_key);
+ si_llvm_build_ps_prolog(ctx, &prolog_key, false);
parts[num_parts++] = ctx->main_fn;
}
@@ -956,7 +958,7 @@ void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader
union si_shader_part_key epilog_key;
si_get_ps_epilog_key(shader, &epilog_key);
- si_llvm_build_ps_epilog(ctx, &epilog_key);
+ si_llvm_build_ps_epilog(ctx, &epilog_key, false);
parts[num_parts++] = ctx->main_fn;
si_build_wrapper_function(ctx, parts, num_parts, main_index, 0, main_arg_types, false);
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
index 5a5665a51..03da1e3e7 100644
--- a/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
@@ -602,7 +602,8 @@ void si_llvm_ls_build_end(struct si_shader_context *ctx)
* Compile the TCS epilog function. This writes tesselation factors to memory
* based on the output primitive type of the tesselator (determined by TES).
*/
-void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_part_key *key)
+void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_part_key *key,
+ UNUSED bool separate_epilog)
{
memset(&ctx->args, 0, sizeof(ctx->args));
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
index 950daf49f..ca6c4c6f8 100644
--- a/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
@@ -923,7 +923,8 @@ void si_llvm_vs_build_end(struct si_shader_context *ctx)
* (InstanceID + StartInstance),
* (InstanceID / 2 + StartInstance)
*/
-void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key)
+void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key,
+ UNUSED bool separate_prolog)
{
LLVMTypeRef *returns;
LLVMValueRef ret, func;
diff --git a/lib/mesa/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/lib/mesa/src/gallium/drivers/radeonsi/si_state_shaders.cpp
index 16012344a..b3c0f85a3 100644
--- a/lib/mesa/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/lib/mesa/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -673,7 +673,7 @@ unsigned si_get_shader_prefetch_size(struct si_shader *shader)
/* Return 0 for some A0 chips only. Other chips don't need it. */
if ((shader->selector->screen->info.family == CHIP_GFX1100 ||
shader->selector->screen->info.family == CHIP_GFX1102 ||
- shader->selector->screen->info.family == CHIP_GFX1103) &&
+ shader->selector->screen->info.family == CHIP_GFX1103_R1) &&
shader->selector->screen->info.chip_rev == 0)
return 0;
@@ -1234,7 +1234,8 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader
(sctx->gfx_level >= GFX10 ? radeon_set_sh_reg_idx3_func : radeon_set_sh_reg_func));
ac_set_reg_cu_en(&sctx->gfx_cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs,
- C_00B204_CU_EN_GFX10, 16, &sctx->screen->info,
+ sctx->gfx_level >= GFX11 ? C_00B204_CU_EN_GFX11 : C_00B204_CU_EN_GFX10, 16,
+ &sctx->screen->info,
(void (*)(void*, unsigned, uint32_t))
(sctx->gfx_level >= GFX10 ? radeon_set_sh_reg_idx3_func : radeon_set_sh_reg_func));
sctx->tracked_regs.reg_saved &= ~BITFIELD64_BIT(SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS) &
diff --git a/lib/mesa/src/gallium/drivers/zink/VP_ZINK_requirements.json b/lib/mesa/src/gallium/drivers/zink/VP_ZINK_requirements.json
index f42f4c8d7..83d972b83 100644
--- a/lib/mesa/src/gallium/drivers/zink/VP_ZINK_requirements.json
+++ b/lib/mesa/src/gallium/drivers/zink/VP_ZINK_requirements.json
@@ -31,9 +31,15 @@
"VkPhysicalDeviceCustomBorderColorFeaturesEXT": {
"customBorderColorWithoutFormat": true
},
+ "VkPhysicalDeviceBorderColorSwizzleFeaturesEXT": {
+ "borderColorSwizzleFromImage": true
+ },
"VkPhysicalDeviceLineRasterizationFeaturesEXT": {
"rectangularLines": true,
"bresenhamLines": true
+ },
+ "VkPhysicalDeviceProvokingVertexFeaturesEXT": {
+ "provokingVertexLast": true
}
},
"properties": {
@@ -49,13 +55,22 @@
"features": {
"VkPhysicalDeviceScalarBlockLayoutFeaturesEXT": {
"scalarBlockLayout": true
+ },
+ "VkPhysicalDeviceTimelineSemaphoreFeaturesKHR": {
+ "timelineSemaphore": true
+ },
+ "VkPhysicalDeviceImagelessFramebufferFeatures": {
+ "imagelessFramebuffer": true
}
}
},
"gl21_baseline_vk12": {
"features": {
"VkPhysicalDeviceVulkan12Features": {
- "scalarBlockLayout": true
+ "scalarBlockLayout": true,
+ "drawIndirectCount": true,
+ "imagelessFramebuffer": true,
+ "timelineSemaphore": true
}
}
},
@@ -72,6 +87,12 @@
"features": {
"VkPhysicalDeviceFeatures": {
"independentBlend": true
+ },
+ "VkPhysicalDeviceTransformFeedbackFeaturesEXT": {
+ "transformFeedback": true
+ },
+ "VkPhysicalDeviceConditionalRenderingFeaturesEXT": {
+ "conditionalRendering": true
}
}
},
@@ -107,6 +128,9 @@
"VkPhysicalDeviceFeatures": {
"occlusionQueryPrecise": true,
"dualSrcBlend": true
+ },
+ "VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT": {
+ "vertexAttributeInstanceRateDivisor": true
}
}
},
@@ -170,6 +194,9 @@
"shaderStorageImageWriteWithoutFormat": true,
"vertexPipelineStoresAndAtomics": true,
"fragmentStoresAndAtomics": true
+ },
+ "VkPhysicalDeviceImage2DViewOf3DFeaturesEXT": {
+ "image2DViewOf3D": true
}
}
},
@@ -271,9 +298,6 @@
}
},
"gl44_baseline": {
- "extensions": {
- "VK_KHR_sampler_mirror_clamp_to_edge": 1
- },
"formats": {
"VK_FORMAT_B10G11R11_UFLOAT_PACK32": {
"VkFormatProperties": {
@@ -284,6 +308,18 @@
}
}
},
+ "gl44_baseline_ext": {
+ "extensions": {
+ "VK_KHR_sampler_mirror_clamp_to_edge": 1
+ }
+ },
+ "gl44_baseline_vk12": {
+ "features": {
+ "VkPhysicalDeviceVulkan12Features": {
+ "samplerMirrorClampToEdge": true
+ }
+ }
+ },
"gl45_baseline": {
"features": {
"VkPhysicalDeviceFeatures": {
@@ -698,7 +734,8 @@
"gl42_baseline",
[ "gl42_baseline_vk10", "gl42_baseline_vk12" ],
"gl43_baseline",
- "gl44_baseline"
+ "gl44_baseline",
+ [ "gl44_baseline_ext", "gl44_baseline_vk12" ]
]
},
"VP_ZINK_gl45_baseline": {
@@ -720,6 +757,7 @@
[ "gl42_baseline_vk10", "gl42_baseline_vk12" ],
"gl43_baseline",
"gl44_baseline",
+ [ "gl44_baseline_ext", "gl44_baseline_vk12" ],
"gl45_baseline"
]
},
@@ -742,6 +780,7 @@
[ "gl42_baseline_vk10", "gl42_baseline_vk12" ],
"gl43_baseline",
"gl44_baseline",
+ [ "gl44_baseline_ext", "gl44_baseline_vk12" ],
"gl45_baseline",
"gl46_baseline"
]
@@ -765,6 +804,7 @@
[ "gl42_baseline_vk10", "gl42_baseline_vk12" ],
"gl43_baseline",
"gl44_baseline",
+ [ "gl44_baseline_ext", "gl44_baseline_vk12" ],
"gl45_baseline",
"gl46_baseline",
"gl46_optimal",
diff --git a/lib/mesa/src/gallium/drivers/zink/ci/zink-lvp-fails.txt b/lib/mesa/src/gallium/drivers/zink/ci/zink-lvp-fails.txt
index e61edddab..54d5bbc7e 100644
--- a/lib/mesa/src/gallium/drivers/zink/ci/zink-lvp-fails.txt
+++ b/lib/mesa/src/gallium/drivers/zink/ci/zink-lvp-fails.txt
@@ -53,7 +53,6 @@ glx@glx_ext_import_context@imported context has same context id,Fail
glx@glx_ext_import_context@make current- multi process,Fail
glx@glx_ext_import_context@make current- single process,Fail
glx@glx_ext_import_context@query context info,Fail
-shaders@glsl-fs-pointcoord,Fail
shaders@point-vertex-id divisor,Fail
shaders@point-vertex-id gl_instanceid,Fail
shaders@point-vertex-id gl_instanceid divisor,Fail
@@ -92,9 +91,7 @@ spec@!opengl 2.0@gl-2.0-edgeflag-immediate,Fail
spec@!opengl 2.1@pbo,Fail
spec@!opengl 2.1@pbo@test_polygon_stip,Fail
spec@!opengl 2.1@polygon-stipple-fs,Fail
-spec@!opengl es 2.0@glsl-fs-pointcoord,Fail
spec@arb_depth_texture@depth-tex-modes,Fail
-spec@arb_framebuffer_object@fbo-gl_pointcoord,Fail
spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail
spec@arb_pipeline_statistics_query@arb_pipeline_statistics_query-frag,Fail
spec@arb_point_sprite@arb_point_sprite-checkerboard,Fail
@@ -156,33 +153,6 @@ spec@ext_framebuffer_multisample@interpolation 4 centroid-disabled,Fail
spec@ext_framebuffer_multisample@interpolation 4 centroid-edges,Fail
spec@ext_framebuffer_multisample@interpolation 4 non-centroid-deriv-disabled,Fail
spec@ext_framebuffer_multisample@interpolation 4 non-centroid-disabled,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-intel_external_sampler_only,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-invalid_attributes,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-invalid_hints,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-missing_attributes,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-ownership_transfer,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-refcount,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-reimport-bug,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_argb8888,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_ayuv,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_nv12,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p010,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p012,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_p016,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_uyvy,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xrgb8888,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_xyuv,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y210,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y212,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y216,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y410,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y412,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_y416,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuv420,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuyv,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvu420,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-transcode-nv12-as-r8-gr88,Fail
-spec@ext_image_dma_buf_import@ext_image_dma_buf_import-unsupported_format,Fail
spec@ext_packed_float@query-rgba-signed-components,Fail
spec@ext_texture_swizzle@depth_texture_mode_and_swizzle,Fail
spec@intel_performance_query@intel_performance_query-issue_2235,Fail
diff --git a/lib/mesa/src/gallium/drivers/zink/ci/zink-radv-fails.txt b/lib/mesa/src/gallium/drivers/zink/ci/zink-radv-fails.txt
index 3fa433d8f..02c86646a 100644
--- a/lib/mesa/src/gallium/drivers/zink/ci/zink-radv-fails.txt
+++ b/lib/mesa/src/gallium/drivers/zink/ci/zink-radv-fails.txt
@@ -38,7 +38,6 @@ glx@glx-swap-pixmap-bad,Fail
spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail
-shaders@glsl-fs-pointcoord,Fail
shaders@point-vertex-id divisor,Fail
shaders@point-vertex-id gl_instanceid divisor,Fail
shaders@point-vertex-id gl_instanceid,Fail
@@ -59,7 +58,6 @@ spec@arb_fragment_program_shadow@txp-shadow2d,Fail
spec@arb_fragment_program_shadow@txp-shadow2drect,Fail
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query,Fail
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@MS8,Fail
-spec@arb_framebuffer_object@fbo-gl_pointcoord,Fail
spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail
spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2-mat2,Fail
spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2x3-mat2x3,Fail
@@ -388,7 +386,6 @@ spec@!opengl 2.1@pbo,Fail
spec@!opengl 2.1@pbo@test_polygon_stip,Fail
spec@!opengl 2.1@polygon-stipple-fs,Fail
spec@!opengl 3.0@clearbuffer-depth-cs-probe,Fail
-spec@!opengl es 2.0@glsl-fs-pointcoord,Fail
spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail
spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail
diff --git a/lib/mesa/src/gallium/drivers/zink/zink_blit.c b/lib/mesa/src/gallium/drivers/zink/zink_blit.c
index 135378f4d..df718afc7 100644
--- a/lib/mesa/src/gallium/drivers/zink/zink_blit.c
+++ b/lib/mesa/src/gallium/drivers/zink/zink_blit.c
@@ -128,7 +128,7 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info, bool *n
return false;
if (util_format_is_depth_or_stencil(info->dst.format) &&
- info->dst.format != info->src.format)
+ (info->dst.format != info->src.format || info->filter == PIPE_TEX_FILTER_LINEAR))
return false;
/* vkCmdBlitImage must not be used for multisampled source or destination images. */
@@ -252,7 +252,8 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info, bool *n
VKCTX(CmdBlitImage)(cmdbuf, src->obj->image, src->layout,
dst->obj->image, dst->layout,
1, &region,
- zink_filter(info->filter));
+ /* VUID-vkCmdBlitImage-srcImage-00232: zs formats must use NEAREST filtering */
+ util_format_is_depth_or_stencil(info->src.format) ? VK_FILTER_NEAREST : zink_filter(info->filter));
return true;
}
@@ -355,7 +356,7 @@ zink_blit(struct pipe_context *pctx,
util_blitter_clear_depth_stencil(ctx->blitter, dst_view, PIPE_CLEAR_STENCIL,
0, 0, info->dst.box.x, info->dst.box.y,
info->dst.box.width, info->dst.box.height);
- zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS | ZINK_BLIT_SAVE_TEXTURES);
+ zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS | ZINK_BLIT_SAVE_TEXTURES | ZINK_BLIT_SAVE_FS_CONST_BUF);
util_blitter_stencil_fallback(ctx->blitter,
info->dst.resource,
info->dst.level,
@@ -390,8 +391,10 @@ zink_blit_begin(struct zink_context *ctx, enum zink_blit_flags flags)
util_blitter_save_rasterizer(ctx->blitter, ctx->rast_state);
util_blitter_save_so_targets(ctx->blitter, ctx->num_so_targets, ctx->so_targets);
- if (flags & ZINK_BLIT_SAVE_FS) {
+ if (flags & ZINK_BLIT_SAVE_FS_CONST_BUF)
util_blitter_save_fragment_constant_buffer_slot(ctx->blitter, ctx->ubos[MESA_SHADER_FRAGMENT]);
+
+ if (flags & ZINK_BLIT_SAVE_FS) {
util_blitter_save_blend(ctx->blitter, ctx->gfx_pipeline_state.blend_state);
util_blitter_save_depth_stencil_alpha(ctx->blitter, ctx->dsa_state);
util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);
diff --git a/lib/mesa/src/gallium/drivers/zink/zink_bo.h b/lib/mesa/src/gallium/drivers/zink/zink_bo.h
index 42b1fc643..42e5ec225 100644
--- a/lib/mesa/src/gallium/drivers/zink/zink_bo.h
+++ b/lib/mesa/src/gallium/drivers/zink/zink_bo.h
@@ -30,7 +30,6 @@
#include "zink_batch.h"
#define VK_VIS_VRAM (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
-#define VK_STAGING_RAM (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT)
#define VK_LAZY_VRAM (VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
diff --git a/lib/mesa/src/gallium/drivers/zink/zink_clear.c b/lib/mesa/src/gallium/drivers/zink/zink_clear.c
index cfb66df3f..1db046757 100644
--- a/lib/mesa/src/gallium/drivers/zink/zink_clear.c
+++ b/lib/mesa/src/gallium/drivers/zink/zink_clear.c
@@ -455,8 +455,10 @@ zink_clear_texture(struct pipe_context *pctx,
util_blitter_save_framebuffer(ctx->blitter, &ctx->fb_state);
set_clear_fb(pctx, surf, NULL);
ctx->blitting = true;
+ ctx->queries_disabled = true;
pctx->clear(pctx, PIPE_CLEAR_COLOR0, &scissor, &color, 0, 0);
util_blitter_restore_fb_state(ctx->blitter);
+ ctx->queries_disabled = false;
ctx->blitting = false;
} else {
float depth = 0.0;
@@ -477,8 +479,10 @@ zink_clear_texture(struct pipe_context *pctx,
util_blitter_save_framebuffer(ctx->blitter, &ctx->fb_state);
ctx->blitting = true;
set_clear_fb(pctx, NULL, surf);
+ ctx->queries_disabled = true;
pctx->clear(pctx, flags, &scissor, NULL, depth, stencil);
util_blitter_restore_fb_state(ctx->blitter);
+ ctx->queries_disabled = false;
ctx->blitting = false;
}
/* this will never destroy the surface */
diff --git a/lib/mesa/src/gallium/drivers/zink/zink_compiler.c b/lib/mesa/src/gallium/drivers/zink/zink_compiler.c
index c59c4d5ae..a23212aa8 100644
--- a/lib/mesa/src/gallium/drivers/zink/zink_compiler.c
+++ b/lib/mesa/src/gallium/drivers/zink/zink_compiler.c
@@ -2207,6 +2207,32 @@ prune_io(nir_shader *nir)
}
}
+static bool
+invert_point_coord_instr(nir_builder *b, nir_instr *instr, void *data)
+{
+ if (instr->type != nir_instr_type_intrinsic)
+ return false;
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_load_deref)
+ return false;
+ nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
+ if (deref_var->data.location != VARYING_SLOT_PNTC)
+ return false;
+ b->cursor = nir_after_instr(instr);
+ nir_ssa_def *def = nir_vec2(b, nir_channel(b, &intr->dest.ssa, 0),
+ nir_fsub(b, nir_imm_float(b, 1.0), nir_channel(b, &intr->dest.ssa, 1)));
+ nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
+ return true;
+}
+
+static bool
+invert_point_coord(nir_shader *nir)
+{
+ if (!(nir->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC)))
+ return false;
+ return nir_shader_instructions_pass(nir, invert_point_coord_instr, nir_metadata_dominance, NULL);
+}
+
VkShaderModule
zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *base_nir, const struct zink_shader_key *key)
{
@@ -2283,10 +2309,10 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shad
if (zink_fs_key(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
NIR_PASS_V(nir, lower_dual_blend);
}
- if (zink_fs_key(key)->coord_replace_bits) {
- NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key(key)->coord_replace_bits,
- false, zink_fs_key(key)->coord_replace_yinvert);
- }
+ if (zink_fs_key(key)->coord_replace_bits)
+ NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key(key)->coord_replace_bits, false, false);
+ if (zink_fs_key(key)->point_coord_yinvert)
+ NIR_PASS_V(nir, invert_point_coord);
if (zink_fs_key(key)->force_persample_interp || zink_fs_key(key)->fbfetch_ms) {
nir_foreach_shader_in_variable(var, nir)
var->data.sample = true;
@@ -3368,7 +3394,7 @@ struct zink_shader *
zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
const struct pipe_stream_output_info *so_info)
{
- struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
+ struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
bool have_psiz = false;
ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
@@ -3482,6 +3508,8 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
ret->sinfo.sampler_mask = sampler_mask;
}
+ unsigned ubo_binding_mask = 0;
+ unsigned ssbo_binding_mask = 0;
foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
if (_nir_shader_variable_has_mode(var, nir_var_uniform |
nir_var_image |
@@ -3504,13 +3532,14 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
if (!var->data.driver_location) {
ret->has_uniforms = true;
- } else {
+ } else if (!(ubo_binding_mask & BITFIELD_BIT(binding))) {
ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
ret->num_bindings[ztype]++;
+ ubo_binding_mask |= BITFIELD_BIT(binding);
}
} else if (var->data.mode == nir_var_mem_ssbo) {
ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
@@ -3519,12 +3548,15 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
var->data.driver_location,
screen->compact_descriptors);
- ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
- ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
- ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
- ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
- assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
- ret->num_bindings[ztype]++;
+ if (!(ssbo_binding_mask & BITFIELD_BIT(var->data.binding))) {
+ ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
+ ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
+ ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+ ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
+ assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
+ ret->num_bindings[ztype]++;
+ ssbo_binding_mask |= BITFIELD_BIT(var->data.binding);
+ }
} else {
assert(var->data.mode == nir_var_uniform ||
var->data.mode == nir_var_image);
@@ -3644,6 +3676,16 @@ zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
prog->base.removed = true;
simple_mtx_unlock(&prog->ctx->program_lock[idx]);
util_queue_fence_wait(&prog->base.cache_fence);
+
+ for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
+ for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
+ hash_table_foreach(&prog->pipelines[r][i], entry) {
+ struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data;
+
+ util_queue_fence_wait(&pc_entry->fence);
+ }
+ }
+ }
}
if (stage != MESA_SHADER_TESS_CTRL || !shader->tcs.is_generated) {
prog->shaders[stage] = NULL;
@@ -3663,7 +3705,7 @@ zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
_mesa_set_destroy(shader->programs, NULL);
ralloc_free(shader->nir);
ralloc_free(shader->spirv);
- FREE(shader);
+ ralloc_free(shader);
}
@@ -3700,7 +3742,7 @@ void main()
struct zink_shader *
zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsigned vertices_per_patch)
{
- struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
+ struct zink_shader *ret = rzalloc(NULL, struct zink_shader);
ret->hash = _mesa_hash_pointer(ret);
ret->programs = _mesa_pointer_set_create(NULL);
simple_mtx_init(&ret->lock, mtx_plain);
diff --git a/lib/mesa/src/gallium/drivers/zink/zink_context.c b/lib/mesa/src/gallium/drivers/zink/zink_context.c
index 0cbb0ccad..18bd412f1 100644
--- a/lib/mesa/src/gallium/drivers/zink/zink_context.c
+++ b/lib/mesa/src/gallium/drivers/zink/zink_context.c
@@ -99,6 +99,9 @@ zink_context_destroy(struct pipe_context *pctx)
struct zink_context *ctx = zink_context(pctx);
struct zink_screen *screen = zink_screen(pctx->screen);
+ struct pipe_framebuffer_state fb = {0};
+ pctx->set_framebuffer_state(pctx, &fb);
+
if (util_queue_is_initialized(&screen->flush_queue))
util_queue_finish(&screen->flush_queue);
if (ctx->batch.state && !screen->device_lost) {
@@ -1442,8 +1445,9 @@ zink_set_constant_buffer(struct pipe_context *pctx,
ALWAYS_INLINE static void
unbind_descriptor_reads(struct zink_resource *res, gl_shader_stage pstage)
{
- if (!res->sampler_binds[pstage] && !res->image_binds[pstage])
- res->barrier_access[pstage == MESA_SHADER_COMPUTE] &= ~VK_ACCESS_SHADER_READ_BIT;
+ bool is_compute = pstage == MESA_SHADER_COMPUTE;
+ if (!res->sampler_bind_count[is_compute] && !res->image_bind_count[is_compute])
+ res->barrier_access[is_compute] &= ~VK_ACCESS_SHADER_READ_BIT;
}
ALWAYS_INLINE static void
@@ -1520,7 +1524,8 @@ zink_set_shader_buffers(struct pipe_context *pctx,
else
new_res->obj->unordered_read = false;
} else {
- update = !!res;
+ if (res)
+ update = true;
ssbo->buffer_offset = 0;
ssbo->buffer_size = 0;
if (res) {
@@ -1609,6 +1614,7 @@ unbind_shader_image(struct zink_context *ctx, gl_shader_stage stage, unsigned sl
zink_buffer_view_reference(zink_screen(ctx->base.screen), &image_view->buffer_view, NULL);
} else {
unbind_descriptor_stage(res, stage);
+ unbind_descriptor_reads(res, stage);
if (!res->image_bind_count[is_compute])
check_for_layout_update(ctx, res, is_compute);
zink_surface_reference(zink_screen(ctx->base.screen), &image_view->surface, NULL);
@@ -1737,8 +1743,11 @@ zink_set_shader_images(struct pipe_context *pctx,
res->image_bind_count[p_stage == MESA_SHADER_COMPUTE]++;
update_res_bind_count(ctx, res, p_stage == MESA_SHADER_COMPUTE, false);
unbind_shader_image(ctx, p_stage, start_slot + i);
+ image_view->surface = surface;
+ } else {
+ /* create_image_surface will always increment ref */
+ zink_surface_reference(zink_screen(ctx->base.screen), &surface, NULL);
}
- image_view->surface = surface;
finalize_image_bind(ctx, res, p_stage == MESA_SHADER_COMPUTE);
zink_batch_resource_usage_set(&ctx->batch, res,
zink_resource_access_is_write(access), false);
@@ -2890,7 +2899,8 @@ unbind_fb_surface(struct zink_context *ctx, struct pipe_surface *surf, unsigned
check_resource_for_batch_ref(ctx, res);
if (res->sampler_bind_count[0]) {
update_res_sampler_layouts(ctx, res);
- _mesa_set_add(ctx->need_barriers[0], res);
+ if (res->layout != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)
+ _mesa_set_add(ctx->need_barriers[0], res);
}
}
}
@@ -3258,7 +3268,7 @@ pipeline_dst_stage(VkImageLayout layout)
bool
zink_resource_access_is_write(VkAccessFlags flags)
{
- return (flags & ALL_READ_ACCESS_FLAGS) != flags;
+ return (flags & ~ALL_READ_ACCESS_FLAGS) > 0;
}
bool
@@ -4632,6 +4642,7 @@ zink_context_replace_buffer_storage(struct pipe_context *pctx, struct pipe_resou
zink_batch_reference_resource(&ctx->batch, d);
/* don't be too creative */
zink_resource_object_reference(screen, &d->obj, s->obj);
+ d->valid_buffer_range = s->valid_buffer_range;
/* force counter buffer reset */
d->so_valid = false;
if (num_rebinds && rebind_buffer(ctx, d, rebind_mask, num_rebinds) < num_rebinds)
@@ -4682,6 +4693,13 @@ zink_get_dummy_pipe_surface(struct zink_context *ctx, int samples_index)
{
if (!ctx->dummy_surface[samples_index]) {
ctx->dummy_surface[samples_index] = zink_surface_create_null(ctx, PIPE_TEXTURE_2D, 1024, 1024, BITFIELD_BIT(samples_index));
+ /* This is possibly used with imageLoad which according to GL spec must return 0 */
+ if (!samples_index) {
+ union pipe_color_union color = {0};
+ struct pipe_box box;
+ u_box_2d(0, 0, 1024, 1024, &box);
+ ctx->base.clear_texture(&ctx->base, ctx->dummy_surface[samples_index]->texture, 0, &box, &color);
+ }
}
return ctx->dummy_surface[samples_index];
}
@@ -4850,6 +4868,8 @@ zink_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
}
ctx->gfx_pipeline_state.rendering_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO;
ctx->gfx_pipeline_state.rendering_info.pColorAttachmentFormats = ctx->gfx_pipeline_state.rendering_formats;
+ ctx->gfx_pipeline_state.feedback_loop = screen->driver_workarounds.always_feedback_loop;
+ ctx->gfx_pipeline_state.feedback_loop_zs = screen->driver_workarounds.always_feedback_loop_zs;
const uint32_t data[] = {0};
if (!is_copy_only) {
diff --git a/lib/mesa/src/gallium/drivers/zink/zink_descriptors.c b/lib/mesa/src/gallium/drivers/zink/zink_descriptors.c
index 6f5762b62..c19af2fe7 100644
--- a/lib/mesa/src/gallium/drivers/zink/zink_descriptors.c
+++ b/lib/mesa/src/gallium/drivers/zink/zink_descriptors.c
@@ -586,6 +586,8 @@ zink_descriptor_program_deinit(struct zink_screen *screen, struct zink_program *
pg->dd.pool_key[i]->use_count--;
pg->dd.pool_key[i] = NULL;
}
+ }
+ for (unsigned i = 0; pg->num_dsl && i < ZINK_DESCRIPTOR_NON_BINDLESS_TYPES; i++) {
if (pg->dd.templates[i]) {
VKSCR(DestroyDescriptorUpdateTemplate)(screen->dev, pg->dd.templates[i], NULL);
pg->dd.templates[i] = VK_NULL_HANDLE;
@@ -972,7 +974,7 @@ zink_descriptors_update(struct zink_context *ctx, bool is_compute)
/* bindless descriptors are context-based and get updated elsewhere */
if (pg->dd.bindless && unlikely(!ctx->dd.bindless_bound)) {
VKCTX(CmdBindDescriptorSets)(ctx->batch.state->cmdbuf, is_compute ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS,
- pg->layout, ZINK_DESCRIPTOR_BINDLESS, 1, &ctx->dd.bindless_set,
+ pg->layout, screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS], 1, &ctx->dd.bindless_set,
0, NULL);
ctx->dd.bindless_bound = true;
}
@@ -1009,11 +1011,11 @@ void
zink_batch_descriptor_deinit(struct zink_screen *screen, struct zink_batch_state *bs)
{
for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) {
- while (util_dynarray_contains(&bs->dd.pools[i], struct zink_descriptor_pool_multi *)) {
- struct zink_descriptor_pool_multi *mpool = util_dynarray_pop(&bs->dd.pools[i], struct zink_descriptor_pool_multi *);
- if (mpool) {
- deinit_multi_pool_overflow(screen, mpool);
- multi_pool_destroy(screen, mpool);
+ for (unsigned j = 0; j < bs->dd.pools[i].capacity / sizeof(struct zink_descriptor_pool_multi *); j++) {
+ struct zink_descriptor_pool_multi **mppool = util_dynarray_element(&bs->dd.pools[i], struct zink_descriptor_pool_multi *, j);
+ if (mppool && *mppool) {
+ deinit_multi_pool_overflow(screen, *mppool);
+ multi_pool_destroy(screen, *mppool);
}
}
util_dynarray_fini(&bs->dd.pools[i]);
diff --git a/lib/mesa/src/gallium/drivers/zink/zink_kopper.c b/lib/mesa/src/gallium/drivers/zink/zink_kopper.c
index 6070abb39..a67b7566a 100644
--- a/lib/mesa/src/gallium/drivers/zink/zink_kopper.c
+++ b/lib/mesa/src/gallium/drivers/zink/zink_kopper.c
@@ -297,7 +297,6 @@ kopper_CreateSwapchain(struct zink_screen *screen, struct kopper_displaytarget *
*result = error;
return NULL;
}
- cswap->max_acquires = cswap->scci.minImageCount - cdt->caps.minImageCount;
cswap->last_present = UINT32_MAX;
*result = VK_SUCCESS;
@@ -320,6 +319,7 @@ kopper_GetSwapchainImages(struct zink_screen *screen, struct kopper_swapchain *c
for (unsigned i = 0; i < cswap->num_images; i++)
cswap->images[i].image = images[i];
}
+ cswap->max_acquires = cswap->num_images - cswap->scci.minImageCount + 1;
return error;
}
@@ -490,7 +490,7 @@ kopper_acquire(struct zink_screen *screen, struct zink_resource *res, uint64_t t
res->obj->access_stage = 0;
}
if (timeout == UINT64_MAX && util_queue_is_initialized(&screen->flush_queue) &&
- p_atomic_read_relaxed(&cdt->swapchain->num_acquires) > cdt->swapchain->max_acquires) {
+ p_atomic_read_relaxed(&cdt->swapchain->num_acquires) >= cdt->swapchain->max_acquires) {
util_queue_fence_wait(&cdt->present_fence);
}
VkSemaphoreCreateInfo sci = {
diff --git a/lib/mesa/src/gallium/drivers/zink/zink_program.c b/lib/mesa/src/gallium/drivers/zink/zink_program.c
index 66e2161b0..1e742a31c 100644
--- a/lib/mesa/src/gallium/drivers/zink/zink_program.c
+++ b/lib/mesa/src/gallium/drivers/zink/zink_program.c
@@ -451,17 +451,37 @@ generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_scree
}
static uint32_t
-hash_pipeline_lib(const void *key)
+hash_pipeline_lib_generated_tcs(const void *key)
{
return 1;
}
+
static bool
-equals_pipeline_lib_optimal(const void *a, const void *b)
+equals_pipeline_lib_generated_tcs(const void *a, const void *b)
{
return !memcmp(a, b, sizeof(uint32_t));
}
+static uint32_t
+hash_pipeline_lib(const void *key)
+{
+ const struct zink_gfx_library_key *gkey = key;
+ /* remove generated tcs bits */
+ return zink_shader_key_optimal_no_tcs(gkey->optimal_key);
+}
+
+static bool
+equals_pipeline_lib(const void *a, const void *b)
+{
+ const struct zink_gfx_library_key *ak = a;
+ const struct zink_gfx_library_key *bk = b;
+ /* remove generated tcs bits */
+ uint32_t val_a = zink_shader_key_optimal_no_tcs(ak->optimal_key);
+ uint32_t val_b = zink_shader_key_optimal_no_tcs(bk->optimal_key);
+ return val_a == val_b;
+}
+
uint32_t
hash_gfx_input_dynamic(const void *key)
{
@@ -866,20 +886,22 @@ zink_create_gfx_program(struct zink_context *ctx,
prog->ctx = ctx;
for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) {
- util_dynarray_init(&prog->shader_cache[i][0][0], NULL);
- util_dynarray_init(&prog->shader_cache[i][0][1], NULL);
- util_dynarray_init(&prog->shader_cache[i][1][0], NULL);
- util_dynarray_init(&prog->shader_cache[i][1][1], NULL);
+ util_dynarray_init(&prog->shader_cache[i][0][0], prog);
+ util_dynarray_init(&prog->shader_cache[i][0][1], prog);
+ util_dynarray_init(&prog->shader_cache[i][1][0], prog);
+ util_dynarray_init(&prog->shader_cache[i][1][1], prog);
if (stages[i]) {
prog->shaders[i] = stages[i];
prog->stages_present |= BITFIELD_BIT(i);
}
}
+ bool generated_tcs = false;
if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) {
prog->shaders[MESA_SHADER_TESS_EVAL]->tes.generated =
prog->shaders[MESA_SHADER_TESS_CTRL] =
zink_shader_tcs_create(screen, stages[MESA_SHADER_VERTEX], vertices_per_patch);
prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
+ generated_tcs = true;
}
prog->stages_remaining = prog->stages_present;
@@ -902,7 +924,10 @@ zink_create_gfx_program(struct zink_context *ctx,
}
}
- _mesa_set_init(&prog->libs, prog, hash_pipeline_lib, equals_pipeline_lib_optimal);
+ if (generated_tcs)
+ _mesa_set_init(&prog->libs, prog, hash_pipeline_lib_generated_tcs, equals_pipeline_lib_generated_tcs);
+ else
+ _mesa_set_init(&prog->libs, prog, hash_pipeline_lib, equals_pipeline_lib);
struct mesa_sha1 sctx;
_mesa_sha1_init(&sctx);
@@ -986,8 +1011,8 @@ precompile_compute_job(void *data, void *gdata, int thread_index)
assert(comp->module);
comp->module->shader = zink_shader_compile(screen, comp->shader, comp->shader->nir, NULL);
assert(comp->module->shader);
- util_dynarray_init(&comp->shader_cache[0], NULL);
- util_dynarray_init(&comp->shader_cache[1], NULL);
+ util_dynarray_init(&comp->shader_cache[0], comp);
+ util_dynarray_init(&comp->shader_cache[1], comp);
struct blob blob = {0};
blob_init(&blob);
@@ -1201,8 +1226,13 @@ zink_destroy_compute_program(struct zink_screen *screen,
{
deinit_program(screen, &comp->base);
- if (comp->shader)
- _mesa_set_remove_key(comp->shader->programs, comp);
+ assert(comp->shader);
+ assert(!comp->shader->spirv);
+
+ _mesa_set_destroy(comp->shader->programs, NULL);
+ ralloc_free(comp->shader->nir);
+ ralloc_free(comp->shader);
+
destroy_shader_cache(screen, &comp->shader_cache[0]);
destroy_shader_cache(screen, &comp->shader_cache[1]);
@@ -1600,6 +1630,7 @@ zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *pr
{
struct zink_gfx_library_key *gkey = rzalloc(prog, struct zink_gfx_library_key);
gkey->optimal_key = state->optimal_key;
+ assert(gkey->optimal_key);
memcpy(gkey->modules, prog->modules, sizeof(gkey->modules));
gkey->pipeline = zink_create_gfx_pipeline_library(screen, prog);
_mesa_set_add(&prog->libs, gkey);
@@ -1695,6 +1726,8 @@ precompile_job(void *data, void *gdata, int thread_index)
struct zink_gfx_pipeline_state state = {0};
state.shader_keys_optimal.key.vs_base.last_vertex_stage = true;
+ state.shader_keys_optimal.key.tcs.patch_vertices = 3; //random guess, generated tcs precompile is hard
+ state.optimal_key = state.shader_keys_optimal.key.val;
generate_gfx_program_modules_optimal(NULL, screen, prog, &state);
zink_screen_get_pipeline_cache(screen, &prog->base, true);
zink_create_pipeline_lib(screen, prog, &state);
diff --git a/lib/mesa/src/gallium/drivers/zink/zink_program.h b/lib/mesa/src/gallium/drivers/zink/zink_program.h
index caa9c573d..12658458e 100644
--- a/lib/mesa/src/gallium/drivers/zink/zink_program.h
+++ b/lib/mesa/src/gallium/drivers/zink/zink_program.h
@@ -317,12 +317,12 @@ static inline void
zink_set_fs_point_coord_key(struct zink_context *ctx)
{
const struct zink_fs_key *fs = zink_get_fs_key(ctx);
- bool disable = ctx->gfx_pipeline_state.rast_prim != PIPE_PRIM_POINTS || !ctx->rast_state->base.sprite_coord_enable;
+ bool disable = ctx->gfx_pipeline_state.rast_prim != PIPE_PRIM_POINTS;
uint8_t coord_replace_bits = disable ? 0 : ctx->rast_state->base.sprite_coord_enable;
- bool coord_replace_yinvert = disable ? false : !!ctx->rast_state->base.sprite_coord_mode;
- if (fs->coord_replace_bits != coord_replace_bits || fs->coord_replace_yinvert != coord_replace_yinvert) {
+ bool point_coord_yinvert = disable ? false : !!ctx->rast_state->base.sprite_coord_mode;
+ if (fs->coord_replace_bits != coord_replace_bits || fs->point_coord_yinvert != point_coord_yinvert) {
zink_set_fs_key(ctx)->coord_replace_bits = coord_replace_bits;
- zink_set_fs_key(ctx)->coord_replace_yinvert = coord_replace_yinvert;
+ zink_set_fs_key(ctx)->point_coord_yinvert = point_coord_yinvert;
}
}
diff --git a/lib/mesa/src/gallium/drivers/zink/zink_query.c b/lib/mesa/src/gallium/drivers/zink/zink_query.c
index 67ac1f915..e9dc921c2 100644
--- a/lib/mesa/src/gallium/drivers/zink/zink_query.c
+++ b/lib/mesa/src/gallium/drivers/zink/zink_query.c
@@ -1070,8 +1070,8 @@ zink_resume_queries(struct zink_context *ctx, struct zink_batch *batch)
{
struct zink_query *query, *next;
LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) {
- begin_query(ctx, batch, query);
list_delinit(&query->active_list);
+ begin_query(ctx, batch, query);
}
}
diff --git a/lib/mesa/src/gallium/drivers/zink/zink_resource.c b/lib/mesa/src/gallium/drivers/zink/zink_resource.c
index eb7bb894c..56f445f63 100644
--- a/lib/mesa/src/gallium/drivers/zink/zink_resource.c
+++ b/lib/mesa/src/gallium/drivers/zink/zink_resource.c
@@ -105,6 +105,7 @@ zink_destroy_resource_object(struct zink_screen *screen, struct zink_resource_ob
while (util_dynarray_contains(&obj->views, VkImageView))
VKSCR(DestroyImageView)(screen->dev, util_dynarray_pop(&obj->views, VkImageView), NULL);
}
+ util_dynarray_fini(&obj->views);
if (obj->is_buffer) {
VKSCR(DestroyBuffer)(screen->dev, obj->buffer, NULL);
VKSCR(DestroyBuffer)(screen->dev, obj->storage_buffer, NULL);
@@ -190,6 +191,9 @@ create_bci(struct zink_screen *screen, const struct pipe_resource *templ, unsign
VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT |
VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT;
+ if (screen->info.have_KHR_buffer_device_address)
+ bci.usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
+
if (bind & PIPE_BIND_SHADER_IMAGE)
bci.usage |= VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
@@ -630,9 +634,12 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t
#else
external = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
#endif
- } else {
+ } else if (screen->info.have_EXT_external_memory_dma_buf) {
external = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
export_types |= VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
+ } else {
+ /* can't export anything, fail early */
+ return NULL;
}
}
@@ -672,6 +679,8 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t
flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
obj->is_buffer = true;
obj->transfer_dst = true;
+ obj->vkflags = bci.flags;
+ obj->vkusage = bci.usage;
} else {
bool winsys_modifier = (export_types & VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT) && whandle && whandle->modifier != DRM_FORMAT_MOD_INVALID;
uint64_t mods[10];
@@ -1283,7 +1292,7 @@ add_resource_bind(struct zink_context *ctx, struct zink_resource *res, unsigned
}
struct zink_resource_object *new_obj = resource_object_create(screen, &res->base.b, NULL, &res->linear, res->modifiers, res->modifiers_count, NULL);
if (!new_obj) {
- debug_printf("new backing resource alloc failed!");
+ debug_printf("new backing resource alloc failed!\n");
res->base.b.bind &= ~bind;
return false;
}
@@ -1293,11 +1302,6 @@ add_resource_bind(struct zink_context *ctx, struct zink_resource *res, unsigned
res->layout = VK_IMAGE_LAYOUT_UNDEFINED;
res->obj->access = 0;
res->obj->access_stage = 0;
- bool needs_unref = true;
- if (zink_resource_has_usage(res)) {
- zink_batch_reference_resource_move(&ctx->batch, res);
- needs_unref = false;
- }
res->obj = new_obj;
for (unsigned i = 0; i <= res->base.b.last_level; i++) {
struct pipe_box box = {0, 0, 0,
@@ -1306,8 +1310,7 @@ add_resource_bind(struct zink_context *ctx, struct zink_resource *res, unsigned
box.depth = util_num_layers(&res->base.b, i);
ctx->base.resource_copy_region(&ctx->base, &res->base.b, i, 0, 0, 0, &staging.base.b, i, &box);
}
- if (needs_unref)
- zink_resource_object_reference(screen, &old_obj, NULL);
+ zink_resource_object_reference(screen, &old_obj, NULL);
return true;
}
@@ -1638,7 +1641,7 @@ invalidate_buffer(struct zink_context *ctx, struct zink_resource *res)
struct zink_resource_object *new_obj = resource_object_create(screen, &res->base.b, NULL, NULL, NULL, 0, NULL);
if (!new_obj) {
- debug_printf("new backing resource alloc failed!");
+ debug_printf("new backing resource alloc failed!\n");
return false;
}
/* this ref must be transferred before rebind or else BOOM */
@@ -1864,9 +1867,7 @@ zink_buffer_map(struct pipe_context *pctx,
goto success;
usage |= PIPE_MAP_UNSYNCHRONIZED;
} else if (!(usage & PIPE_MAP_UNSYNCHRONIZED) &&
- (((usage & PIPE_MAP_READ) && !(usage & PIPE_MAP_PERSISTENT) &&
- ((screen->info.mem_props.memoryTypes[res->obj->bo->base.placement].propertyFlags & VK_STAGING_RAM) != VK_STAGING_RAM)) ||
- !res->obj->host_visible)) {
+ (((usage & PIPE_MAP_READ) && !(usage & PIPE_MAP_PERSISTENT) && res->base.b.usage != PIPE_USAGE_STAGING) || !res->obj->host_visible)) {
assert(!(usage & (TC_TRANSFER_MAP_THREADED_UNSYNC | PIPE_MAP_THREAD_SAFE)));
if (!res->obj->host_visible || !(usage & PIPE_MAP_ONCE)) {
overwrite:
diff --git a/lib/mesa/src/gallium/drivers/zink/zink_screen.c b/lib/mesa/src/gallium/drivers/zink/zink_screen.c
index 452f48dd4..3427b68cd 100644
--- a/lib/mesa/src/gallium/drivers/zink/zink_screen.c
+++ b/lib/mesa/src/gallium/drivers/zink/zink_screen.c
@@ -879,8 +879,12 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_SPARSE_TEXTURE_FULL_ARRAY_CUBE_MIPMAPS:
return screen->info.feats.features.sparseResidencyImage2D ? 1 : 0;
case PIPE_CAP_QUERY_SPARSE_TEXTURE_RESIDENCY:
+ return screen->info.feats.features.sparseResidency2Samples &&
+ screen->info.feats.features.shaderResourceResidency ? 1 : 0;
case PIPE_CAP_CLAMP_SPARSE_TEXTURE_LOD:
- return screen->info.feats.features.sparseResidency2Samples ? 1 : 0;
+ return screen->info.feats.features.shaderResourceMinLod &&
+ screen->info.feats.features.sparseResidency2Samples &&
+ screen->info.feats.features.shaderResourceResidency ? 1 : 0;
case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
return screen->info.props.limits.viewportSubPixelBits;
diff --git a/lib/mesa/src/gallium/drivers/zink/zink_shader_keys.h b/lib/mesa/src/gallium/drivers/zink/zink_shader_keys.h
index 295cbe3cf..fab6fb403 100644
--- a/lib/mesa/src/gallium/drivers/zink/zink_shader_keys.h
+++ b/lib/mesa/src/gallium/drivers/zink/zink_shader_keys.h
@@ -29,9 +29,9 @@
#include "compiler/shader_info.h"
struct zink_vs_key_base {
+ bool last_vertex_stage : 1;
bool clip_halfz : 1;
bool push_drawid : 1;
- bool last_vertex_stage : 1;
uint8_t pad : 5;
};
@@ -57,7 +57,7 @@ struct zink_vs_key {
};
struct zink_fs_key {
- bool coord_replace_yinvert : 1;
+ bool point_coord_yinvert : 1;
bool samples : 1;
bool force_dual_color_blend : 1;
bool force_persample_interp : 1;
@@ -107,6 +107,19 @@ union zink_shader_key_optimal {
uint32_t val;
};
+/* the default key has only last_vertex_stage set*/
+#define ZINK_SHADER_KEY_OPTIMAL_DEFAULT (1<<0)
+/* Ignore patch_vertices bits that would only be used if we had to generate the missing TCS */
+static inline uint32_t
+zink_shader_key_optimal_no_tcs(uint32_t key)
+{
+ union zink_shader_key_optimal k;
+ k.val = key;
+ k.tcs_bits = 0;
+ return k.val;
+}
+#define ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(key) (zink_shader_key_optimal_no_tcs(key) == ZINK_SHADER_KEY_OPTIMAL_DEFAULT)
+
static inline const struct zink_fs_key *
zink_fs_key(const struct zink_shader_key *key)
{
diff --git a/lib/mesa/src/gallium/drivers/zink/zink_types.h b/lib/mesa/src/gallium/drivers/zink/zink_types.h
index 93ae9ac8c..b05ad12ed 100644
--- a/lib/mesa/src/gallium/drivers/zink/zink_types.h
+++ b/lib/mesa/src/gallium/drivers/zink/zink_types.h
@@ -128,6 +128,7 @@ enum zink_blit_flags {
ZINK_BLIT_SAVE_FB = 1 << 2,
ZINK_BLIT_SAVE_TEXTURES = 1 << 3,
ZINK_BLIT_NO_COND_RENDER = 1 << 4,
+ ZINK_BLIT_SAVE_FS_CONST_BUF = 1 << 5,
};
/* descriptor types; also the ordering of the sets
diff --git a/lib/mesa/src/gallium/frontends/dri/kopper.c b/lib/mesa/src/gallium/frontends/dri/kopper.c
index 0102e2cf4..4abc5037b 100644
--- a/lib/mesa/src/gallium/frontends/dri/kopper.c
+++ b/lib/mesa/src/gallium/frontends/dri/kopper.c
@@ -194,7 +194,6 @@ fail:
dri_destroy_screen_helper(screen);
if (screen->dev)
pipe_loader_release(&screen->dev, 1);
- FREE(screen);
return NULL;
}
@@ -608,6 +607,7 @@ XXX do this once swapinterval is hooked up
unsigned bind;
dri_drawable_get_format(drawable, statts[i], &format, &bind);
+ templ.format = format;
/* the texture already exists or not requested */
if (!drawable->textures[statts[i]]) {
@@ -619,7 +619,6 @@ XXX do this once swapinterval is hooked up
if (format == PIPE_FORMAT_NONE)
continue;
- templ.format = format;
templ.bind = bind;
templ.nr_samples = 0;
templ.nr_storage_samples = 0;
@@ -646,7 +645,7 @@ XXX do this once swapinterval is hooked up
}
}
if (drawable->stvis.samples > 1 && !drawable->msaa_textures[statts[i]]) {
- templ.bind = templ.bind &
+ templ.bind = bind &
~(PIPE_BIND_SCANOUT | PIPE_BIND_SHARED | PIPE_BIND_DISPLAY_TARGET);
templ.nr_samples = drawable->stvis.samples;
templ.nr_storage_samples = drawable->stvis.samples;
diff --git a/lib/mesa/src/gallium/frontends/lavapipe/lvp_formats.c b/lib/mesa/src/gallium/frontends/lavapipe/lvp_formats.c
index 4a54a8981..6cc723d01 100644
--- a/lib/mesa/src/gallium/frontends/lavapipe/lvp_formats.c
+++ b/lib/mesa/src/gallium/frontends/lavapipe/lvp_formats.c
@@ -135,7 +135,7 @@ lvp_physical_device_get_format_properties(struct lvp_physical_device *physical_d
PIPE_TEXTURE_2D, 0, 0, PIPE_BIND_RENDER_TARGET)) {
features |= VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT;
/* SNORM blending on llvmpipe fails CTS - disable for now */
- if (!util_format_is_snorm(pformat))
+ if (!util_format_is_snorm(pformat) && !util_format_is_pure_integer(pformat))
features |= VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BLEND_BIT;
}
diff --git a/lib/mesa/src/gallium/frontends/lavapipe/lvp_inline_uniforms.c b/lib/mesa/src/gallium/frontends/lavapipe/lvp_inline_uniforms.c
index 76c0b5175..1afb29be8 100644
--- a/lib/mesa/src/gallium/frontends/lavapipe/lvp_inline_uniforms.c
+++ b/lib/mesa/src/gallium/frontends/lavapipe/lvp_inline_uniforms.c
@@ -101,7 +101,7 @@ src_only_uses_uniforms(const nir_src *src, int component,
/* Record the uniform offset. */
if (uni_offsets)
- uni_offsets[ubo * MAX_INLINABLE_UNIFORMS + num_offsets[ubo]++] = offset;
+ uni_offsets[ubo * PIPE_MAX_CONSTANT_BUFFERS + num_offsets[ubo]++] = offset;
return true;
}
return false;
diff --git a/lib/mesa/src/gallium/frontends/rusticl/api/kernel.rs b/lib/mesa/src/gallium/frontends/rusticl/api/kernel.rs
index 2e793a8cf..e9d5ec205 100644
--- a/lib/mesa/src/gallium/frontends/rusticl/api/kernel.rs
+++ b/lib/mesa/src/gallium/frontends/rusticl/api/kernel.rs
@@ -240,7 +240,11 @@ pub fn set_kernel_arg(
return Err(CL_INVALID_ARG_SIZE);
}
}
- KernelArgType::MemGlobal | KernelArgType::MemConstant => {
+ KernelArgType::MemGlobal
+ | KernelArgType::MemConstant
+ | KernelArgType::Image
+ | KernelArgType::RWImage
+ | KernelArgType::Texture => {
if arg_size != std::mem::size_of::<cl_mem>() {
return Err(CL_INVALID_ARG_SIZE);
}
diff --git a/lib/mesa/src/gallium/frontends/va/picture_vp9.c b/lib/mesa/src/gallium/frontends/va/picture_vp9.c
index 3d5189a67..ff3da929f 100644
--- a/lib/mesa/src/gallium/frontends/va/picture_vp9.c
+++ b/lib/mesa/src/gallium/frontends/va/picture_vp9.c
@@ -37,12 +37,15 @@ void vlVaHandlePictureParameterBufferVP9(vlVaDriver *drv, vlVaContext *context,
assert(buf->size >= sizeof(VADecPictureParameterBufferVP9) && buf->num_elements == 1);
+ context->desc.vp9.picture_parameter.prev_frame_width = context->desc.vp9.picture_parameter.frame_width;
+ context->desc.vp9.picture_parameter.prev_frame_height = context->desc.vp9.picture_parameter.frame_height;
context->desc.vp9.picture_parameter.frame_width = vp9->frame_width;
context->desc.vp9.picture_parameter.frame_height = vp9->frame_height;
context->desc.vp9.picture_parameter.pic_fields.subsampling_x = vp9->pic_fields.bits.subsampling_x;
context->desc.vp9.picture_parameter.pic_fields.subsampling_y = vp9->pic_fields.bits.subsampling_y;
context->desc.vp9.picture_parameter.pic_fields.frame_type = vp9->pic_fields.bits.frame_type;
+ context->desc.vp9.picture_parameter.pic_fields.prev_show_frame = context->desc.vp9.picture_parameter.pic_fields.show_frame;
context->desc.vp9.picture_parameter.pic_fields.show_frame = vp9->pic_fields.bits.show_frame;
context->desc.vp9.picture_parameter.pic_fields.error_resilient_mode = vp9->pic_fields.bits.error_resilient_mode;
context->desc.vp9.picture_parameter.pic_fields.intra_only = vp9->pic_fields.bits.intra_only;
diff --git a/lib/mesa/src/intel/compiler/brw_mesh.cpp b/lib/mesa/src/intel/compiler/brw_mesh.cpp
index cea5aef67..b1474c04b 100644
--- a/lib/mesa/src/intel/compiler/brw_mesh.cpp
+++ b/lib/mesa/src/intel/compiler/brw_mesh.cpp
@@ -1146,7 +1146,7 @@ emit_urb_indirect_reads(const fs_builder &bld, nir_intrinsic_instr *instr,
retype(quarter(dest_comp, q), BRW_REGISTER_TYPE_UD),
data,
comp,
- brw_imm_ud(4));
+ brw_imm_ud(4 * REG_SIZE));
}
}
}
diff --git a/lib/mesa/src/intel/ds/intel_driver_ds.cc b/lib/mesa/src/intel/ds/intel_driver_ds.cc
index 344e7ed21..dc51faae0 100644
--- a/lib/mesa/src/intel/ds/intel_driver_ds.cc
+++ b/lib/mesa/src/intel/ds/intel_driver_ds.cc
@@ -185,12 +185,10 @@ static void
send_descriptors(IntelRenderpassDataSource::TraceContext &ctx,
struct intel_ds_device *device)
{
- struct intel_ds_queue *queue;
-
PERFETTO_LOG("Sending renderstage descriptors");
device->event_id = 0;
- u_vector_foreach(queue, &device->queues) {
+ list_for_each_entry_safe(struct intel_ds_queue, queue, &device->queues, link) {
for (uint32_t s = 0; s < ARRAY_SIZE(queue->stages); s++) {
queue->stages[s].start_ns = 0;
}
@@ -222,7 +220,7 @@ send_descriptors(IntelRenderpassDataSource::TraceContext &ctx,
}
/* Emit all the IID picked at device/queue creation. */
- u_vector_foreach(queue, &device->queues) {
+ list_for_each_entry_safe(struct intel_ds_queue, queue, &device->queues, link) {
for (unsigned s = 0; s < INTEL_DS_QUEUE_STAGE_N_STAGES; s++) {
{
/* We put the stage number in there so that all rows are order
@@ -528,29 +526,26 @@ intel_ds_device_init(struct intel_ds_device *device,
device->info = *devinfo;
device->iid = get_iid();
device->api = api;
- u_vector_init(&device->queues, 4, sizeof(struct intel_ds_queue));
+ list_inithead(&device->queues);
}
void
intel_ds_device_fini(struct intel_ds_device *device)
{
u_trace_context_fini(&device->trace_context);
- u_vector_finish(&device->queues);
}
struct intel_ds_queue *
-intel_ds_device_add_queue(struct intel_ds_device *device,
- const char *fmt_name,
- ...)
+intel_ds_device_init_queue(struct intel_ds_device *device,
+ struct intel_ds_queue *queue,
+ const char *fmt_name,
+ ...)
{
- struct intel_ds_queue *queue =
- (struct intel_ds_queue *) u_vector_add(&device->queues);
va_list ap;
memset(queue, 0, sizeof(*queue));
queue->device = device;
- queue->queue_id = u_vector_length(&device->queues) - 1;
va_start(ap, fmt_name);
vsnprintf(queue->name, sizeof(queue->name), fmt_name, ap);
@@ -561,6 +556,8 @@ intel_ds_device_add_queue(struct intel_ds_device *device,
queue->stages[s].stage_iid = get_iid();
}
+ list_add(&queue->link, &device->queues);
+
return queue;
}
diff --git a/lib/mesa/src/intel/ds/intel_driver_ds.h b/lib/mesa/src/intel/ds/intel_driver_ds.h
index ca03c6516..f88f5f7ee 100644
--- a/lib/mesa/src/intel/ds/intel_driver_ds.h
+++ b/lib/mesa/src/intel/ds/intel_driver_ds.h
@@ -107,7 +107,7 @@ struct intel_ds_device {
struct u_trace_context trace_context;
/* List of intel_ds_queue */
- struct u_vector queues;
+ struct list_head queues;
};
struct intel_ds_stage {
@@ -122,12 +122,11 @@ struct intel_ds_stage {
};
struct intel_ds_queue {
+ struct list_head link;
+
/* Device this queue belongs to */
struct intel_ds_device *device;
- /* Unique queue ID across the device */
- uint32_t queue_id;
-
/* Unique name of the queue */
char name[80];
@@ -158,9 +157,11 @@ void intel_ds_device_init(struct intel_ds_device *device,
enum intel_ds_api api);
void intel_ds_device_fini(struct intel_ds_device *device);
-struct intel_ds_queue *intel_ds_device_add_queue(struct intel_ds_device *device,
- const char *fmt_name,
- ...);
+struct intel_ds_queue *
+intel_ds_device_init_queue(struct intel_ds_device *device,
+ struct intel_ds_queue *queue,
+ const char *fmt_name,
+ ...);
void intel_ds_flush_data_init(struct intel_ds_flush_data *data,
struct intel_ds_queue *queue,
diff --git a/lib/mesa/src/intel/genxml/gen12.xml b/lib/mesa/src/intel/genxml/gen12.xml
index bc066d48c..f3ecca813 100644
--- a/lib/mesa/src/intel/genxml/gen12.xml
+++ b/lib/mesa/src/intel/genxml/gen12.xml
@@ -886,7 +886,7 @@
<field name="Return Filter Weight for Null Texels" start="66" end="66" type="uint" />
<field name="Return Filter Weight for Border Texels" start="67" end="67" type="uint" />
<field name="Force gather4 Behavior" start="69" end="69" type="bool" />
- <field name="Border Color Pointer" start="70" end="87" type="offset" />
+ <field name="Border Color Pointer" start="70" end="95" type="offset" />
<field name="TCZ Address Control Mode" start="96" end="98" type="Texture Coordinate Mode" />
<field name="TCY Address Control Mode" start="99" end="101" type="Texture Coordinate Mode" />
<field name="TCX Address Control Mode" start="102" end="104" type="Texture Coordinate Mode" />
diff --git a/lib/mesa/src/intel/genxml/gen125.xml b/lib/mesa/src/intel/genxml/gen125.xml
index 6d27fb84b..1b9ad3332 100644
--- a/lib/mesa/src/intel/genxml/gen125.xml
+++ b/lib/mesa/src/intel/genxml/gen125.xml
@@ -1068,7 +1068,7 @@
<field name="Return Filter Weight for Null Texels" start="66" end="66" type="uint" />
<field name="Return Filter Weight for Border Texels" start="67" end="67" type="uint" />
<field name="Force gather4 Behavior" start="69" end="69" type="bool" />
- <field name="Border Color Pointer" start="70" end="87" type="offset" />
+ <field name="Border Color Pointer" start="70" end="95" type="offset" />
<field name="TCZ Address Control Mode" start="96" end="98" type="Texture Coordinate Mode" />
<field name="TCY Address Control Mode" start="99" end="101" type="Texture Coordinate Mode" />
<field name="TCX Address Control Mode" start="102" end="104" type="Texture Coordinate Mode" />
@@ -6921,7 +6921,7 @@
<field name="Bindless Surface State Base Address Modify Enable" start="512" end="512" type="bool" />
<field name="Bindless Surface State MOCS" start="516" end="522" type="uint" nonzero="true" />
<field name="Bindless Surface State Base Address" start="524" end="575" type="address" />
- <field name="Bindless Surface State Size" start="588" end="607" type="uint" />
+ <field name="Bindless Surface State Size" start="576" end="607" type="uint" />
<field name="Bindless Sampler State Base Address Modify Enable" start="608" end="608" type="bool" />
<field name="Bindless Sampler State MOCS" start="612" end="618" type="uint" nonzero="true" />
<field name="Bindless Sampler State Base Address" start="620" end="671" type="address" />
diff --git a/lib/mesa/src/intel/vulkan/anv_utrace.c b/lib/mesa/src/intel/vulkan/anv_utrace.c
index 35a744dcb..3a35aefe4 100644
--- a/lib/mesa/src/intel/vulkan/anv_utrace.c
+++ b/lib/mesa/src/intel/vulkan/anv_utrace.c
@@ -111,7 +111,7 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
if (!flush)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
- intel_ds_flush_data_init(&flush->ds, queue->ds, queue->ds->submission_id);
+ intel_ds_flush_data_init(&flush->ds, &queue->ds, queue->ds.submission_id);
result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type,
0, 0, &flush->sync);
@@ -284,8 +284,7 @@ anv_device_utrace_init(struct anv_device *device)
for (uint32_t q = 0; q < device->queue_count; q++) {
struct anv_queue *queue = &device->queues[q];
- queue->ds =
- intel_ds_device_add_queue(&device->ds, "%s%u",
+ intel_ds_device_init_queue(&device->ds, &queue->ds, "%s%u",
intel_engines_class_to_string(queue->family->engine_class),
queue->index_in_family);
}
diff --git a/lib/mesa/src/intel/vulkan/genX_acceleration_structure.c b/lib/mesa/src/intel/vulkan/genX_acceleration_structure.c
index 15f4fdbce..3958452f0 100644
--- a/lib/mesa/src/intel/vulkan/genX_acceleration_structure.c
+++ b/lib/mesa/src/intel/vulkan/genX_acceleration_structure.c
@@ -523,7 +523,10 @@ vk_to_grl_VertexFormat(VkFormat format)
static struct Geo
vk_to_grl_Geo(const VkAccelerationStructureGeometryKHR *pGeometry,
- uint32_t prim_count)
+ uint32_t prim_count,
+ uint32_t transform_offset,
+ uint32_t primitive_offset,
+ uint32_t first_vertex)
{
struct Geo geo = {
.Flags = vk_to_grl_GeometryFlags(pGeometry->flags),
@@ -544,18 +547,25 @@ vk_to_grl_Geo(const VkAccelerationStructureGeometryKHR *pGeometry,
vk_tri->vertexData.deviceAddress;
geo.Desc.Triangles.VertexBufferByteStride = vk_tri->vertexStride;
+ if (geo.Desc.Triangles.pTransformBuffer)
+ geo.Desc.Triangles.pTransformBuffer += transform_offset;
+
if (vk_tri->indexType == VK_INDEX_TYPE_NONE_KHR) {
geo.Desc.Triangles.IndexCount = 0;
geo.Desc.Triangles.VertexCount = prim_count * 3;
geo.Desc.Triangles.IndexFormat = INDEX_FORMAT_NONE;
+ geo.Desc.Triangles.pVertexBuffer += primitive_offset;
} else {
geo.Desc.Triangles.IndexCount = prim_count * 3;
geo.Desc.Triangles.VertexCount = vk_tri->maxVertex;
geo.Desc.Triangles.IndexFormat =
vk_to_grl_IndexFormat(vk_tri->indexType);
+ geo.Desc.Triangles.pIndexBuffer += primitive_offset;
}
+
geo.Desc.Triangles.VertexFormat =
vk_to_grl_VertexFormat(vk_tri->vertexFormat);
+ geo.Desc.Triangles.pVertexBuffer += vk_tri->vertexStride * first_vertex;
break;
}
@@ -563,7 +573,8 @@ vk_to_grl_Geo(const VkAccelerationStructureGeometryKHR *pGeometry,
const VkAccelerationStructureGeometryAabbsDataKHR *vk_aabbs =
&pGeometry->geometry.aabbs;
geo.Type = GEOMETRY_TYPE_PROCEDURAL;
- geo.Desc.Procedural.pAABBs_GPUVA = vk_aabbs->data.deviceAddress;
+ geo.Desc.Procedural.pAABBs_GPUVA =
+ vk_aabbs->data.deviceAddress + primitive_offset;
geo.Desc.Procedural.AABBByteStride = vk_aabbs->stride;
geo.Desc.Procedural.AABBCount = prim_count;
break;
@@ -818,7 +829,10 @@ cmd_build_acceleration_structures(
for (unsigned g = 0; g < bs->num_geometries; g++) {
const VkAccelerationStructureGeometryKHR *pGeometry = get_geometry(pInfo, g);
uint32_t prim_count = pBuildRangeInfos[g].primitiveCount;
- geos[g] = vk_to_grl_Geo(pGeometry, prim_count);
+ geos[g] = vk_to_grl_Geo(pGeometry, prim_count,
+ pBuildRangeInfos[g].transformOffset,
+ pBuildRangeInfos[g].primitiveOffset,
+ pBuildRangeInfos[g].firstVertex);
prefixes[g] = prefix_sum;
prefix_sum += prim_count;
diff --git a/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c b/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c
index 84ea02f48..34337c21f 100644
--- a/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c
+++ b/lib/mesa/src/intel/vulkan/gfx8_cmd_buffer.c
@@ -679,7 +679,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE) ||
- BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) ||
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_WRITE_MASKS) ||
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS)) {
const uint8_t color_writes = dyn->cb.color_write_enables;
const struct anv_cmd_graphics_state *state = &cmd_buffer->state.gfx;
@@ -688,10 +689,14 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
(color_writes & ((1u << state->color_att_count) - 1)) != 0;
- uint32_t blend_dws[GENX(BLEND_STATE_length) +
- MAX_RTS * GENX(BLEND_STATE_ENTRY_length)];
- uint32_t *dws = blend_dws;
- memset(blend_dws, 0, sizeof(blend_dws));
+ uint32_t num_dwords = GENX(BLEND_STATE_length) +
+ GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
+ struct anv_state blend_states =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
+ num_dwords * 4,
+ 64);
+
+ uint32_t *dws = blend_states.map;
struct GENX(BLEND_STATE) blend_state = {
.AlphaToCoverageEnable = dyn->ms.alpha_to_coverage_enable,
@@ -720,10 +725,29 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
.WriteDisableBlue = write_disabled ||
(dyn->cb.attachments[i].write_mask &
VK_COLOR_COMPONENT_B_BIT) == 0,
+ /* Vulkan specification 1.2.168, VkLogicOp:
+ *
+ * "Logical operations are controlled by the logicOpEnable and
+ * logicOp members of VkPipelineColorBlendStateCreateInfo. If
+ * logicOpEnable is VK_TRUE, then a logical operation selected
+ * by logicOp is applied between each color attachment and the
+ * fragment’s corresponding output value, and blending of all
+ * attachments is treated as if it were disabled."
+ *
+ * From the Broadwell PRM Volume 2d: Command Reference:
+ * Structures: BLEND_STATE_ENTRY:
+ *
+ * "Enabling LogicOp and Color Buffer Blending at the same time
+ * is UNDEFINED"
+ */
.LogicOpFunction = genX(vk_to_intel_logic_op)[dyn->cb.logic_op],
.LogicOpEnable = dyn->cb.logic_op_enable,
.ColorBufferBlendEnable =
!dyn->cb.logic_op_enable && dyn->cb.attachments[i].blend_enable,
+
+ .ColorClampRange = COLORCLAMP_RTFORMAT,
+ .PreBlendColorClampEnable = true,
+ .PostBlendColorClampEnable = true,
};
/* Setup blend equation. */
@@ -791,7 +815,7 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
}
/* Generate blend state after entries. */
- GENX(BLEND_STATE_pack)(NULL, blend_dws, &blend_state);
+ GENX(BLEND_STATE_pack)(NULL, blend_states.map, &blend_state);
/* 3DSTATE_PS_BLEND to be consistent with the rest of the
* BLEND_STATE_ENTRY.
@@ -808,12 +832,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
blend.AlphaToCoverageEnable = dyn->ms.alpha_to_coverage_enable;
}
- uint32_t num_dwords = GENX(BLEND_STATE_length) +
- GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
-
- struct anv_state blend_states =
- anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws,
- pipeline->gfx8.blend_state, num_dwords, 64);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
bsp.BlendStatePointer = blend_states.offset;
bsp.BlendStatePointerValid = true;
diff --git a/lib/mesa/src/intel/vulkan/grl/include/GRLOCLCompatibility.h b/lib/mesa/src/intel/vulkan/grl/include/GRLOCLCompatibility.h
index dd9ff2c27..119104f15 100644
--- a/lib/mesa/src/intel/vulkan/grl/include/GRLOCLCompatibility.h
+++ b/lib/mesa/src/intel/vulkan/grl/include/GRLOCLCompatibility.h
@@ -179,17 +179,22 @@ inline float dot(const float3& a, const float3& b) {
inline float as_float(uint32_t i)
{
- return *reinterpret_cast<float*>(&i);
+ union { float f; uint32_t i; } fi;
+
+ fi.i = i;
+ return fi.f;
}
inline float3 as_float3(int3 i3)
{
- return *reinterpret_cast<float3*>(&i3);
+ float3 o = { as_float(i3.x), as_float(i3.y), as_float(i3.z) };
+ return o;
}
inline float4 as_float4(int4 i4)
{
- return *reinterpret_cast<float4*>(&i4);
+ float4 o = { as_float(i4.x), as_float(i4.y), as_float(i4.z), as_float(i4.w) };
+ return o;
}
inline float4 convert_float4_rtn(int4 i4)
diff --git a/lib/mesa/src/intel/vulkan_hasvk/anv_batch_chain.c b/lib/mesa/src/intel/vulkan_hasvk/anv_batch_chain.c
index 1a14a34f0..459f0df9d 100644
--- a/lib/mesa/src/intel/vulkan_hasvk/anv_batch_chain.c
+++ b/lib/mesa/src/intel/vulkan_hasvk/anv_batch_chain.c
@@ -2393,14 +2393,14 @@ anv_queue_submit(struct vk_queue *vk_queue,
return VK_SUCCESS;
}
- uint64_t start_ts = intel_ds_begin_submit(queue->ds);
+ uint64_t start_ts = intel_ds_begin_submit(&queue->ds);
pthread_mutex_lock(&device->mutex);
result = anv_queue_submit_locked(queue, submit);
/* Take submission ID under lock */
pthread_mutex_unlock(&device->mutex);
- intel_ds_end_submit(queue->ds, start_ts);
+ intel_ds_end_submit(&queue->ds, start_ts);
return result;
}
diff --git a/lib/mesa/src/intel/vulkan_hasvk/anv_image.c b/lib/mesa/src/intel/vulkan_hasvk/anv_image.c
index f10f46454..807c897cb 100644
--- a/lib/mesa/src/intel/vulkan_hasvk/anv_image.c
+++ b/lib/mesa/src/intel/vulkan_hasvk/anv_image.c
@@ -375,6 +375,13 @@ can_fast_clear_with_non_zero_color(const struct intel_device_info *devinfo,
uint32_t plane,
const VkImageFormatListCreateInfo *fmt_list)
{
+ /* Triangles rendered on non-zero fast cleared images with 8xMSAA can get
+ * black pixels around them on Haswell.
+ */
+ if (devinfo->ver == 7 && image->vk.samples == 8) {
+ return false;
+ }
+
/* If we don't have an AUX surface where fast clears apply, we can return
* early.
*/
diff --git a/lib/mesa/src/intel/vulkan_hasvk/anv_pipeline.c b/lib/mesa/src/intel/vulkan_hasvk/anv_pipeline.c
index 58fb1c74c..03a9338df 100644
--- a/lib/mesa/src/intel/vulkan_hasvk/anv_pipeline.c
+++ b/lib/mesa/src/intel/vulkan_hasvk/anv_pipeline.c
@@ -108,7 +108,7 @@ anv_shader_stage_to_nir(struct anv_device *device,
.subgroup_shuffle = true,
.subgroup_vote = true,
.tessellation = true,
- .transform_feedback = pdevice->info.ver >= 8,
+ .transform_feedback = true,
.variable_pointers = true,
.vk_memory_model = true,
.vk_memory_model_device_scope = true,
diff --git a/lib/mesa/src/intel/vulkan_hasvk/anv_private.h b/lib/mesa/src/intel/vulkan_hasvk/anv_private.h
index 0367cefec..39663e858 100644
--- a/lib/mesa/src/intel/vulkan_hasvk/anv_private.h
+++ b/lib/mesa/src/intel/vulkan_hasvk/anv_private.h
@@ -1074,7 +1074,7 @@ struct anv_queue {
/** Synchronization object for debug purposes (DEBUG_SYNC) */
struct vk_sync *sync;
- struct intel_ds_queue * ds;
+ struct intel_ds_queue ds;
};
struct nir_xfb_info;
@@ -2314,14 +2314,25 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
}
break;
- case VK_ACCESS_2_SHADER_READ_BIT:
case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT:
case VK_ACCESS_2_TRANSFER_READ_BIT:
+ case VK_ACCESS_2_SHADER_SAMPLED_READ_BIT:
/* Transitioning a buffer to be read through the sampler, so
* invalidate the texture cache, we don't want any stale data.
*/
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
break;
+ case VK_ACCESS_2_SHADER_READ_BIT:
+ /* Same as VK_ACCESS_2_UNIFORM_READ_BIT and
+ * VK_ACCESS_2_SHADER_SAMPLED_READ_BIT cases above
+ */
+ pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT |
+ ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
+ if (!device->physical->compiler->indirect_ubos_use_sampler) {
+ pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
+ pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
+ }
+ break;
case VK_ACCESS_2_MEMORY_READ_BIT:
/* Transitioning a buffer for generic read, invalidate all the
* caches.
@@ -2360,6 +2371,7 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
*/
pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
break;
+ case VK_ACCESS_2_SHADER_STORAGE_READ_BIT:
default:
break; /* Nothing to do */
}
diff --git a/lib/mesa/src/intel/vulkan_hasvk/anv_utrace.c b/lib/mesa/src/intel/vulkan_hasvk/anv_utrace.c
index 35a744dcb..3a35aefe4 100644
--- a/lib/mesa/src/intel/vulkan_hasvk/anv_utrace.c
+++ b/lib/mesa/src/intel/vulkan_hasvk/anv_utrace.c
@@ -111,7 +111,7 @@ anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
if (!flush)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
- intel_ds_flush_data_init(&flush->ds, queue->ds, queue->ds->submission_id);
+ intel_ds_flush_data_init(&flush->ds, &queue->ds, queue->ds.submission_id);
result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type,
0, 0, &flush->sync);
@@ -284,8 +284,7 @@ anv_device_utrace_init(struct anv_device *device)
for (uint32_t q = 0; q < device->queue_count; q++) {
struct anv_queue *queue = &device->queues[q];
- queue->ds =
- intel_ds_device_add_queue(&device->ds, "%s%u",
+ intel_ds_device_init_queue(&device->ds, &queue->ds, "%s%u",
intel_engines_class_to_string(queue->family->engine_class),
queue->index_in_family);
}
diff --git a/lib/mesa/src/intel/vulkan_hasvk/genX_cmd_buffer.c b/lib/mesa/src/intel/vulkan_hasvk/genX_cmd_buffer.c
index df815c55b..9a60486fa 100644
--- a/lib/mesa/src/intel/vulkan_hasvk/genX_cmd_buffer.c
+++ b/lib/mesa/src/intel/vulkan_hasvk/genX_cmd_buffer.c
@@ -690,7 +690,8 @@ vk_image_layout_stencil_write_optimal(VkImageLayout layout)
{
return layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
layout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL ||
- layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL;
+ layout == VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL ||
+ layout == VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL;
}
#endif
@@ -721,6 +722,7 @@ transition_stencil_buffer(struct anv_cmd_buffer *cmd_buffer,
* - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
* - VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL
* - VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL
+ * - VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL
*
* For general, we have no nice opportunity to transition so we do the copy
* to the shadow unconditionally at the end of the subpass. For transfer
@@ -6701,6 +6703,7 @@ void genX(CmdEndRendering)(
* - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
* - VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL
* - VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL
+ * - VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL
* - VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT
*
* For general, we have no nice opportunity to transition so we do the copy
diff --git a/lib/mesa/src/loader/meson.build b/lib/mesa/src/loader/meson.build
index 6334cb981..e5a8ceac1 100644
--- a/lib/mesa/src/loader/meson.build
+++ b/lib/mesa/src/loader/meson.build
@@ -28,7 +28,7 @@ if with_platform_x11 and with_dri3
include_directories : [inc_include, inc_src],
dependencies : [
dep_libdrm, dep_xcb_dri3, dep_xcb_present, dep_xcb_sync, dep_xshmfence,
- dep_xcb_xfixes,
+ dep_xcb_xfixes, dep_xcb_xrandr,
],
build_by_default : false,
)
diff --git a/lib/mesa/src/mapi/glapi/gen/meson.build b/lib/mesa/src/mapi/glapi/gen/meson.build
index 8866701e4..7baa275ed 100644
--- a/lib/mesa/src/mapi/glapi/gen/meson.build
+++ b/lib/mesa/src/mapi/glapi/gen/meson.build
@@ -22,7 +22,6 @@ glapi_gen_gl_xml = files('../registry/gl.xml')
glapi_gen_mapi_deps = [
glapi_gen_gl_xml,
genCommon_py,
- glapi_gen_gl_xml,
]
gl_and_es_api_files = files('gl_and_es_API.xml')
diff --git a/lib/mesa/src/mesa/main/consts_exts.h b/lib/mesa/src/mesa/main/consts_exts.h
index 11221ca58..105c762e1 100644
--- a/lib/mesa/src/mesa/main/consts_exts.h
+++ b/lib/mesa/src/mesa/main/consts_exts.h
@@ -936,6 +936,9 @@ struct gl_constants
/** GL_ARB_get_program_binary */
GLuint NumProgramBinaryFormats;
+ /** GL_ARB_gl_spirv */
+ GLuint NumShaderBinaryFormats;
+
/** GL_NV_conservative_raster */
GLuint MaxSubpixelPrecisionBiasBits;
diff --git a/lib/mesa/src/mesa/main/draw.c b/lib/mesa/src/mesa/main/draw.c
index 87d88a81c..d97ff82b8 100644
--- a/lib/mesa/src/mesa/main/draw.c
+++ b/lib/mesa/src/mesa/main/draw.c
@@ -1971,9 +1971,11 @@ _mesa_validated_multidrawelements(struct gl_context *ctx, GLenum mode,
min_index_ptr = (uintptr_t) indices[0];
max_index_ptr = 0;
for (i = 0; i < primcount; i++) {
- min_index_ptr = MIN2(min_index_ptr, (uintptr_t) indices[i]);
- max_index_ptr = MAX2(max_index_ptr, (uintptr_t) indices[i] +
- (count[i] << index_size_shift));
+ if (count[i]) {
+ min_index_ptr = MIN2(min_index_ptr, (uintptr_t) indices[i]);
+ max_index_ptr = MAX2(max_index_ptr, (uintptr_t) indices[i] +
+ (count[i] << index_size_shift));
+ }
}
/* Check if we can handle this thing as a bunch of index offsets from the
@@ -1984,7 +1986,8 @@ _mesa_validated_multidrawelements(struct gl_context *ctx, GLenum mode,
*/
if (index_size_shift) {
for (i = 0; i < primcount; i++) {
- if ((((uintptr_t) indices[i] - min_index_ptr) &
+ if (count[i] &&
+ (((uintptr_t)indices[i] - min_index_ptr) &
((1 << index_size_shift) - 1)) != 0) {
fallback = true;
break;
diff --git a/lib/mesa/src/mesa/state_tracker/st_extensions.h b/lib/mesa/src/mesa/state_tracker/st_extensions.h
index 7bf1aa8c8..fdfac7ece 100644
--- a/lib/mesa/src/mesa/state_tracker/st_extensions.h
+++ b/lib/mesa/src/mesa/state_tracker/st_extensions.h
@@ -35,7 +35,8 @@ struct pipe_screen;
extern void st_init_limits(struct pipe_screen *screen,
struct gl_constants *c,
- struct gl_extensions *extensions);
+ struct gl_extensions *extensions,
+ gl_api api);
extern void st_init_extensions(struct pipe_screen *screen,
struct gl_constants *consts,
diff --git a/lib/mesa/src/microsoft/vulkan/dzn_descriptor_set.c b/lib/mesa/src/microsoft/vulkan/dzn_descriptor_set.c
index fe7b7b212..660933fed 100644
--- a/lib/mesa/src/microsoft/vulkan/dzn_descriptor_set.c
+++ b/lib/mesa/src/microsoft/vulkan/dzn_descriptor_set.c
@@ -645,7 +645,7 @@ dzn_pipeline_layout_create(struct dzn_device *device,
D3D12_ROOT_PARAMETER1 root_params[MAX_ROOT_PARAMS] = { 0 };
D3D12_DESCRIPTOR_RANGE1 *range_ptr = ranges;
D3D12_ROOT_PARAMETER1 *root_param;
- uint32_t root_dwords = 0;
+ ASSERTED uint32_t root_dwords = 0;
for (uint32_t i = 0; i < MAX_SHADER_VISIBILITIES; i++) {
dzn_foreach_pool_type (type) {
diff --git a/lib/mesa/src/panfrost/lib/pan_props.c b/lib/mesa/src/panfrost/lib/pan_props.c
index 048954b4c..e37c68258 100644
--- a/lib/mesa/src/panfrost/lib/pan_props.c
+++ b/lib/mesa/src/panfrost/lib/pan_props.c
@@ -341,6 +341,7 @@ panfrost_close_device(struct panfrost_device *dev)
if (dev->model) {
pthread_mutex_destroy(&dev->submit_lock);
panfrost_bo_unreference(dev->tiler_heap);
+ panfrost_bo_unreference(dev->sample_positions);
panfrost_bo_cache_evict_all(dev);
pthread_mutex_destroy(&dev->bo_cache.lock);
util_sparse_array_finish(&dev->bo_map);
diff --git a/lib/mesa/src/util/00-mesa-defaults.conf b/lib/mesa/src/util/00-mesa-defaults.conf
index bee0e9ae2..6831de3ba 100644
--- a/lib/mesa/src/util/00-mesa-defaults.conf
+++ b/lib/mesa/src/util/00-mesa-defaults.conf
@@ -948,6 +948,13 @@ TODO: document the other workarounds.
<option name="radeonsi_zerovram" value="true" />
</application>
</device>
+ <device driver="zink">
+ <application name="Hyperdimension Neptunia Re;Birth1" executable="NeptuniaReBirth1.exe">
+ <!-- glthread uploads need too much vram and exceed 32bit VA limit -->
+ <!-- https://gitlab.freedesktop.org/mesa/mesa/-/issues/8333 -->
+ <option name="mesa_glthread" value="false"/>
+ </application>
+ </device>
<device driver="iris">
<application name="Middle Earth: Shadow of Mordor" executable="ShadowOfMordor">
<option name="vs_position_always_invariant" value="true" />
@@ -987,6 +994,12 @@ TODO: document the other workarounds.
<application name="Batman™: Arkham Knight" executable="BatmanAK.exe">
<option name="anv_sample_mask_out_opengl_behaviour" value="true"/>
</application>
+ <application name="Rise of the Tomb Raider" executable="RiseOfTheTombRaider">
+ <option name="limit_trig_input_range" value="true" />
+ </application>
+ <application name="Rise of the Tomb Raider" executable="ROTTR.exe">
+ <option name="limit_trig_input_range" value="true" />
+ </application>
</device>
<device driver="r600">
diff --git a/lib/mesa/src/util/disk_cache_os.c b/lib/mesa/src/util/disk_cache_os.c
index 6ef0e0dd4..158501045 100644
--- a/lib/mesa/src/util/disk_cache_os.c
+++ b/lib/mesa/src/util/disk_cache_os.c
@@ -200,12 +200,24 @@ choose_lru_file_matching(const char *dir_path,
if (dir == NULL)
return NULL;
+ const int dir_fd = dirfd(dir);
+
/* First count the number of files in the directory */
unsigned total_file_count = 0;
while ((dir_ent = readdir(dir)) != NULL) {
+#ifdef HAVE_DIRENT_D_TYPE
if (dir_ent->d_type == DT_REG) { /* If the entry is a regular file */
total_file_count++;
}
+#else
+ struct stat st;
+
+ if (fstatat(dir_fd, dir_ent->d_name, &st, AT_SYMLINK_NOFOLLOW) == 0) {
+ if (S_ISREG(st.st_mode)) {
+ total_file_count++;
+ }
+ }
+#endif
}
/* Reset to the start of the directory */
@@ -225,7 +237,7 @@ choose_lru_file_matching(const char *dir_path,
break;
struct stat sb;
- if (fstatat(dirfd(dir), dir_ent->d_name, &sb, 0) == 0) {
+ if (fstatat(dir_fd, dir_ent->d_name, &sb, 0) == 0) {
struct lru_file *entry = NULL;
if (!list_is_empty(lru_file_list))
entry = list_first_entry(lru_file_list, struct lru_file, node);
diff --git a/lib/mesa/src/virtio/vulkan/vn_physical_device.c b/lib/mesa/src/virtio/vulkan/vn_physical_device.c
index e45fd5e48..9ffff258e 100644
--- a/lib/mesa/src/virtio/vulkan/vn_physical_device.c
+++ b/lib/mesa/src/virtio/vulkan/vn_physical_device.c
@@ -1724,6 +1724,7 @@ vn_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
CASE(CUSTOM_BORDER_COLOR_FEATURES_EXT, custom_border_color);
CASE(DEPTH_CLIP_CONTROL_FEATURES_EXT, depth_clip_control);
CASE(DEPTH_CLIP_ENABLE_FEATURES_EXT, depth_clip_enable);
+ CASE(IMAGE_VIEW_MIN_LOD_FEATURES_EXT, image_view_min_lod);
CASE(INDEX_TYPE_UINT8_FEATURES_EXT, index_type_uint8);
CASE(LINE_RASTERIZATION_FEATURES_EXT, line_rasterization);
CASE(MULTI_DRAW_FEATURES_EXT, multi_draw);
diff --git a/lib/mesa/src/vulkan/runtime/vk_graphics_state.c b/lib/mesa/src/vulkan/runtime/vk_graphics_state.c
index 80889b6ed..3679281e1 100644
--- a/lib/mesa/src/vulkan/runtime/vk_graphics_state.c
+++ b/lib/mesa/src/vulkan/runtime/vk_graphics_state.c
@@ -1156,10 +1156,25 @@ vk_graphics_pipeline_state_fill(const struct vk_device *device,
*/
VkGraphicsPipelineLibraryFlagsEXT lib;
- if (info->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) {
- const VkGraphicsPipelineLibraryCreateInfoEXT *gfx_lib_info =
- vk_find_struct_const(info->pNext, GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT);
- lib = gfx_lib_info->flags;
+ const VkGraphicsPipelineLibraryCreateInfoEXT *gpl_info =
+ vk_find_struct_const(info->pNext, GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT);
+ const VkPipelineLibraryCreateInfoKHR *lib_info =
+ vk_find_struct_const(info->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
+
+ if (gpl_info) {
+ lib = gpl_info->flags;
+ } else if ((lib_info && lib_info->libraryCount > 0) ||
+ (info->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)) {
+ /*
+ * From the Vulkan 1.3.210 spec:
+ * "If this structure is omitted, and either VkGraphicsPipelineCreateInfo::flags
+ * includes VK_PIPELINE_CREATE_LIBRARY_BIT_KHR or the
+ * VkGraphicsPipelineCreateInfo::pNext chain includes a
+ * VkPipelineLibraryCreateInfoKHR structure with a libraryCount greater than 0,
+ * it is as if flags is 0. Otherwise if this structure is omitted, it is as if
+ * flags includes all possible subsets of the graphics pipeline."
+ */
+ lib = 0;
} else {
/* We're building a complete pipeline. From the Vulkan 1.3.218 spec:
*
@@ -2527,7 +2542,7 @@ vk_common_CmdSetColorWriteMaskEXT(VkCommandBuffer commandBuffer,
uint32_t a = firstAttachment + i;
assert(a < ARRAY_SIZE(dyn->cb.attachments));
- SET_DYN_VALUE(dyn, CB_BLEND_EQUATIONS,
+ SET_DYN_VALUE(dyn, CB_WRITE_MASKS,
cb.attachments[a].write_mask, pColorWriteMasks[i]);
}
}
diff --git a/lib/mesa/src/vulkan/wsi/wsi_common_display.c b/lib/mesa/src/vulkan/wsi/wsi_common_display.c
index 0e5d27278..4d92cd13c 100644
--- a/lib/mesa/src/vulkan/wsi/wsi_common_display.c
+++ b/lib/mesa/src/vulkan/wsi/wsi_common_display.c
@@ -294,6 +294,8 @@ wsi_display_alloc_connector(struct wsi_display *wsi,
struct wsi_display_connector *connector =
vk_zalloc(wsi->alloc, sizeof (struct wsi_display_connector),
8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+ if (!connector)
+ return NULL;
connector->id = connector_id;
connector->wsi = wsi;