diff options
Diffstat (limited to 'lib/mesa/src/intel/compiler/brw_fs_nir.cpp')
-rw-r--r-- | lib/mesa/src/intel/compiler/brw_fs_nir.cpp | 29 |
1 files changed, 26 insertions, 3 deletions
diff --git a/lib/mesa/src/intel/compiler/brw_fs_nir.cpp b/lib/mesa/src/intel/compiler/brw_fs_nir.cpp index 769077473..9065fd39d 100644 --- a/lib/mesa/src/intel/compiler/brw_fs_nir.cpp +++ b/lib/mesa/src/intel/compiler/brw_fs_nir.cpp @@ -4608,6 +4608,15 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr assert(fence_regs_count <= ARRAY_SIZE(fence_regs)); + /* Be conservative in Gen11+ and always stall in a fence. Since + * there are two different fences, and shader might want to + * synchronize between them. + * + * TODO: Use scope and visibility information for the barriers from NIR + * to make a better decision on whether we need to stall. + */ + bool force_stall = devinfo->ver >= 11; + /* There are four cases where we want to insert a stall: * * 1. If we're a nir_intrinsic_end_invocation_interlock. This is @@ -4623,10 +4632,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr * scheduling barrier to keep the compiler from moving things * around in an invalid way. * - * 4. On platforms with LSC. + * 4. On Gen11+ and platforms with LSC, we have multiple fence types, + * without further information about the fence, we need to force a + * stall. */ if (instr->intrinsic == nir_intrinsic_end_invocation_interlock || - fence_regs_count != 1 || devinfo->has_lsc) { + fence_regs_count != 1 || devinfo->has_lsc || force_stall) { ubld.exec_all().group(1, 0).emit( FS_OPCODE_SCHEDULING_FENCE, ubld.null_reg_ud(), fence_regs, fence_regs_count); @@ -5441,10 +5452,22 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_read_invocation: { const fs_reg value = get_nir_src(instr->src[0]); const fs_reg invocation = get_nir_src(instr->src[1]); + fs_reg tmp = bld.vgrf(value.type); + /* When for some reason the subgroup_size picked by NIR is larger than + * the dispatch size picked by the backend (this could happen in RT, + * FS), bound the invocation to the dispatch size. + */ + fs_reg bound_invocation; + if (bld.dispatch_width() < bld.shader->nir->info.subgroup_size) { + bound_invocation = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.AND(bound_invocation, invocation, brw_imm_ud(dispatch_width - 1)); + } else { + bound_invocation = invocation; + } bld.exec_all().emit(SHADER_OPCODE_BROADCAST, tmp, value, - bld.emit_uniformize(invocation)); + bld.emit_uniformize(bound_invocation)); bld.MOV(retype(dest, value.type), fs_reg(component(tmp, 0))); break; |