summaryrefslogtreecommitdiff
path: root/lib/mesa/src/intel/compiler/brw_fs_nir.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/mesa/src/intel/compiler/brw_fs_nir.cpp')
-rw-r--r--lib/mesa/src/intel/compiler/brw_fs_nir.cpp29
1 files changed, 26 insertions, 3 deletions
diff --git a/lib/mesa/src/intel/compiler/brw_fs_nir.cpp b/lib/mesa/src/intel/compiler/brw_fs_nir.cpp
index 769077473..9065fd39d 100644
--- a/lib/mesa/src/intel/compiler/brw_fs_nir.cpp
+++ b/lib/mesa/src/intel/compiler/brw_fs_nir.cpp
@@ -4608,6 +4608,15 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
assert(fence_regs_count <= ARRAY_SIZE(fence_regs));
+ /* Be conservative in Gen11+ and always stall in a fence. Since
+ * there are two different fences, and shader might want to
+ * synchronize between them.
+ *
+ * TODO: Use scope and visibility information for the barriers from NIR
+ * to make a better decision on whether we need to stall.
+ */
+ bool force_stall = devinfo->ver >= 11;
+
/* There are four cases where we want to insert a stall:
*
* 1. If we're a nir_intrinsic_end_invocation_interlock. This is
@@ -4623,10 +4632,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
* scheduling barrier to keep the compiler from moving things
* around in an invalid way.
*
- * 4. On platforms with LSC.
+ * 4. On Gen11+ and platforms with LSC, we have multiple fence types,
+ * without further information about the fence, we need to force a
+ * stall.
*/
if (instr->intrinsic == nir_intrinsic_end_invocation_interlock ||
- fence_regs_count != 1 || devinfo->has_lsc) {
+ fence_regs_count != 1 || devinfo->has_lsc || force_stall) {
ubld.exec_all().group(1, 0).emit(
FS_OPCODE_SCHEDULING_FENCE, ubld.null_reg_ud(),
fence_regs, fence_regs_count);
@@ -5441,10 +5452,22 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_read_invocation: {
const fs_reg value = get_nir_src(instr->src[0]);
const fs_reg invocation = get_nir_src(instr->src[1]);
+
fs_reg tmp = bld.vgrf(value.type);
+ /* When for some reason the subgroup_size picked by NIR is larger than
+ * the dispatch size picked by the backend (this could happen in RT,
+ * FS), bound the invocation to the dispatch size.
+ */
+ fs_reg bound_invocation;
+ if (bld.dispatch_width() < bld.shader->nir->info.subgroup_size) {
+ bound_invocation = bld.vgrf(BRW_REGISTER_TYPE_UD);
+ bld.AND(bound_invocation, invocation, brw_imm_ud(dispatch_width - 1));
+ } else {
+ bound_invocation = invocation;
+ }
bld.exec_all().emit(SHADER_OPCODE_BROADCAST, tmp, value,
- bld.emit_uniformize(invocation));
+ bld.emit_uniformize(bound_invocation));
bld.MOV(retype(dest, value.type), fs_reg(component(tmp, 0)));
break;