diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2023-12-01 07:56:05 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2023-12-01 07:56:05 +0000 |
commit | 8aee24721d38be66987137fa8df1d7af01b49d47 (patch) | |
tree | 4da44392fb7f189b37f95acc6a79275b2adc7692 /lib/mesa | |
parent | f94c8b00738e4837446c6b2f70213915b9d8d10c (diff) |
r600/sfn: Don't try to re-use iterators when the set is made empty
From Gert Wollny
c13de0509c43f9b9764dc939aa64fe70c6a80870 in mainline Mesa
fixes games/xonotic crash on r600 reported by edd@ on bugs@
Diffstat (limited to 'lib/mesa')
-rw-r--r-- | lib/mesa/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp | 276 |
1 files changed, 225 insertions, 51 deletions
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp index a3ad4922a..3929cf4c8 100644 --- a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_optimizer.cpp @@ -32,8 +32,11 @@ #include "sfn_instr_export.h" #include "sfn_instr_fetch.h" #include "sfn_instr_lds.h" +#include "sfn_instr_mem.h" #include "sfn_instr_tex.h" #include "sfn_peephole.h" +#include "sfn_valuefactory.h" +#include "sfn_virtualvalues.h" #include <sstream> @@ -235,7 +238,7 @@ DCEVisitor::visit(Block *block) class CopyPropFwdVisitor : public InstrVisitor { public: - CopyPropFwdVisitor(); + CopyPropFwdVisitor(ValueFactory& vf); void visit(AluInstr *instr) override; void visit(AluGroup *instr) override; @@ -249,7 +252,7 @@ public: void visit(StreamOutInstr *instr) override { (void)instr; } void visit(MemRingOutInstr *instr) override { (void)instr; } void visit(EmitVertexInstr *instr) override { (void)instr; } - void visit(GDSInstr *instr) override { (void)instr; }; + void visit(GDSInstr *instr) override; void visit(WriteTFInstr *instr) override { (void)instr; }; void visit(RatInstr *instr) override { (void)instr; }; @@ -258,7 +261,9 @@ public: void visit(LDSReadInstr *instr) override { (void)instr; }; void propagate_to(RegisterVec4& src, Instr *instr); + bool assigned_register_direct(PRegister reg); + ValueFactory& value_factory; bool progress; }; @@ -291,7 +296,7 @@ bool copy_propagation_fwd(Shader& shader) { auto& root = shader.func(); - CopyPropFwdVisitor copy_prop; + CopyPropFwdVisitor copy_prop(shader.value_factory()); do { copy_prop.progress = false; @@ -330,8 +335,9 @@ copy_propagation_backward(Shader& shader) return copy_prop.progress; } -CopyPropFwdVisitor::CopyPropFwdVisitor(): - progress(false) +CopyPropFwdVisitor::CopyPropFwdVisitor(ValueFactory& vf): + value_factory(vf), + progress(false) { } @@ -354,15 +360,29 @@ CopyPropFwdVisitor::visit(AluInstr *instr) auto src = instr->psrc(0); auto dest = instr->dest(); + /* Don't propagate an indirect load to more than one + * instruction, because we may have to split the address loads + * creating more instructions */ + if (dest->uses().size() > 1) { + auto [addr, is_for_dest, index] = instr->indirect_addr(); + if (addr && !is_for_dest) + return; + } + + auto ii = dest->uses().begin(); auto ie = dest->uses().end(); - while(ii != ie) { + /** libc++ seems to invalidate the end iterator too if a std::set is + * made empty by an erase operation, + * https://gitlab.freedesktop.org/mesa/mesa/-/issues/7931 + */ + while(ii != ie && !dest->uses().empty()) { auto i = *ii; ++ii; /* SSA can always be propagated, registers only in the same block * and only if they are assigned in the same block */ - bool can_propagate = dest->is_ssa(); + bool can_propagate = dest->has_flag(Register::ssa); if (!can_propagate) { @@ -391,7 +411,11 @@ CopyPropFwdVisitor::visit(AluInstr *instr) if (can_propagate) { sfn_log << SfnLog::opt << " Try replace in " << i->block_id() << ":" << i->index() << *i << "\n"; - progress |= i->replace_source(dest, src); + + if (i->as_alu() && i->as_alu()->parent_group()) { + progress |= i->as_alu()->parent_group()->replace_source(dest, src); + } else + progress |= i->replace_source(dest, src); } } if (instr->dest()) { @@ -412,67 +436,190 @@ CopyPropFwdVisitor::visit(TexInstr *instr) propagate_to(instr->src(), instr); } +void CopyPropFwdVisitor::visit(GDSInstr *instr) +{ + propagate_to(instr->src(), instr); +} + void CopyPropFwdVisitor::visit(ExportInstr *instr) { propagate_to(instr->value(), instr); } +static bool register_sel_can_change(Pin pin) +{ + return pin == pin_free || pin == pin_none; +} + +static bool register_chan_is_pinned(Pin pin) +{ + return pin == pin_chan || + pin == pin_fully || + pin == pin_chgr; +} + + void -CopyPropFwdVisitor::propagate_to(RegisterVec4& src, Instr *instr) +CopyPropFwdVisitor::propagate_to(RegisterVec4& value, Instr *instr) { + /* Collect parent instructions - only ALU move without modifiers + * and without indirect access are allowed. */ AluInstr *parents[4] = {nullptr}; + bool have_candidates = false; for (int i = 0; i < 4; ++i) { - if (src[i]->chan() < 4 && src[i]->is_ssa()) { + if (value[i]->chan() < 4 && value[i]->has_flag(Register::ssa)) { /* We have a pre-define value, so we can't propagate a copy */ - if (src[i]->parents().empty()) + if (value[i]->parents().empty()) return; - assert(src[i]->parents().size() == 1); - parents[i] = (*src[i]->parents().begin())->as_alu(); + if (value[i]->uses().size() > 1) + return; + + assert(value[i]->parents().size() == 1); + parents[i] = (*value[i]->parents().begin())->as_alu(); + + /* Parent op is not an ALU instruction, so we can't + copy-propagate */ + if (!parents[i]) + return; + + + if ((parents[i]->opcode() != op1_mov) || + parents[i]->has_alu_flag(alu_src0_neg) || + parents[i]->has_alu_flag(alu_src0_abs) || + parents[i]->has_alu_flag(alu_dst_clamp) || + parents[i]->has_alu_flag(alu_src0_rel) || + std::get<0>(parents[i]->indirect_addr())) + return; + have_candidates = true; } } + + if (!have_candidates) + return; + + /* Collect the new source registers. We may have to move all registers + * to a new virtual sel index. */ + PRegister new_src[4] = {0}; + int new_chan[4] = {0,0,0,0}; + + uint8_t used_chan_mask = 0; + int new_sel = -1; + bool all_sel_can_change = true; + + bool is_ssa = true; - int sel = -1; for (int i = 0; i < 4; ++i) { + + /* No parent means we either ignore the channel or insert 0 or 1.*/ if (!parents[i]) continue; - if ((parents[i]->opcode() != op1_mov) || parents[i]->has_alu_flag(alu_src0_neg) || - parents[i]->has_alu_flag(alu_src0_abs) || - parents[i]->has_alu_flag(alu_dst_clamp) || - parents[i]->has_alu_flag(alu_src0_rel)) { + + unsigned allowed_mask = 0xf & ~used_chan_mask; + + auto src = parents[i]->src(0).as_register(); + if (!src) return; - } else { - auto src = parents[i]->src(0).as_register(); - if (!src) - return; - else if (!src->is_ssa()) - return; - else if (sel < 0) - sel = src->sel(); - else if (sel != src->sel()) + + /* Don't accept an array element for now, we would need extra checking + * that the value is not overwritten by an indirect access */ + if (src->pin() == pin_array) + return; + + /* Is this check still needed ? */ + if (!src->has_flag(Register::ssa) && + !assigned_register_direct(src)) { + return; + } + + /* If the channel chan't switch we have to update the channel mask + * TODO: assign channel pinned registers first might give more + * opportunities for this optimization */ + if (register_chan_is_pinned(src->pin())) + allowed_mask = 1 << src->chan(); + + /* Update the possible channel mask based on the sourcee's parent + * instruction(s) */ + for (auto p : src->parents()) { + auto alu = p->as_alu(); + if (alu) + allowed_mask &= alu->allowed_dest_chan_mask(); + } + + for (auto u : src->uses()) { + auto alu = u->as_alu(); + if (alu) + allowed_mask &= alu->allowed_src_chan_mask(); + } + + if (!allowed_mask) + return; + + /* Prefer keeping the channel, but if that's not possible + * i.e. if the sel has to change, then pick the next free channel + * (see below) */ + new_chan[i] = src->chan(); + + if (new_sel < 0) { + new_sel = src->sel(); + is_ssa = src->has_flag(Register::ssa); + } else if (new_sel != src->sel()) { + /* If we have to assign a new register sel index do so only + * if all already assigned source can get a new register index, + * and all registers are either SSA or registers. + * TODO: check whether this last restriction is required */ + if (all_sel_can_change && + register_sel_can_change(src->pin()) && + (is_ssa == src->has_flag(Register::ssa))) { + new_sel = value_factory.new_register_index(); + new_chan[i] = u_bit_scan(&allowed_mask); + } else /* Sources can't be combined to a vec4 so bail out */ return; - new_src[i] = src; } + + new_src[i] = src; + used_chan_mask |= 1 << new_chan[i]; + if (!register_sel_can_change(src->pin())) + all_sel_can_change = false; } + /* Apply the changes to the vec4 source */ + value.del_use(instr); for (int i = 0; i < 4; ++i) { if (parents[i]) { - src.del_use(instr); - src.set_value(i, new_src[i]); + new_src[i]->set_sel(new_sel); + if (is_ssa) + new_src[i]->set_flag(Register::ssa); + new_src[i]->set_chan(new_chan[i]); + + value.set_value(i, new_src[i]); + if (new_src[i]->pin() != pin_fully) { if (new_src[i]->pin() == pin_chan) new_src[i]->set_pin(pin_chgr); else new_src[i]->set_pin(pin_group); } - src.add_use(instr); progress |= true; } } + value.add_use(instr); if (progress) - src.validate(); + value.validate(); +} + +bool CopyPropFwdVisitor::assigned_register_direct(PRegister reg) +{ + for (auto p: reg->parents()) { + if (p->as_alu()) { + auto [addr, is_regoffs, is_index] = p->as_alu()->indirect_addr(); + if (addr) + return false; + } + } + return true; } void @@ -518,7 +665,7 @@ CopyPropBackVisitor::visit(AluInstr *instr) return; } - if (!dest->is_ssa() && dest->parents().size() > 1) + if (!dest->has_flag(Register::ssa) && dest->parents().size() > 1) return; for (auto& i : src_reg->parents()) { @@ -600,6 +747,36 @@ public: bool progress; }; +class HasVecDestVisitor : public ConstInstrVisitor { +public: + HasVecDestVisitor(): + has_group_dest(false) + { + } + + void visit(const AluInstr& instr) override { (void)instr; } + void visit(const AluGroup& instr) override { (void)instr; } + void visit(const TexInstr& instr) override { (void)instr; has_group_dest = true; }; + void visit(const ExportInstr& instr) override { (void)instr; } + void visit(const FetchInstr& instr) override { (void)instr; has_group_dest = true; }; + void visit(const Block& instr) override { (void)instr; }; + void visit(const ControlFlowInstr& instr) override{ (void)instr; } + void visit(const IfInstr& instr) override{ (void)instr; } + void visit(const ScratchIOInstr& instr) override { (void)instr; }; + void visit(const StreamOutInstr& instr) override { (void)instr; } + void visit(const MemRingOutInstr& instr) override { (void)instr; } + void visit(const EmitVertexInstr& instr) override { (void)instr; } + void visit(const GDSInstr& instr) override { (void)instr; } + void visit(const WriteTFInstr& instr) override { (void)instr; }; + void visit(const LDSAtomicInstr& instr) override { (void)instr; }; + void visit(const LDSReadInstr& instr) override { (void)instr; }; + void visit(const RatInstr& instr) override { (void)instr; }; + + bool has_group_dest; +}; + + + bool simplify_source_vectors(Shader& sh) { @@ -625,6 +802,16 @@ SimplifySourceVecVisitor::visit(TexInstr *instr) if (nvals == 1) { for (int i = 0; i < 4; ++i) if (src[i]->chan() < 4) { + HasVecDestVisitor check_dests; + for (auto p : src[i]->parents()) { + p->accept(check_dests); + if (check_dests.has_group_dest) + break; + } + + if (check_dests.has_group_dest) + break; + if (src[i]->pin() == pin_group) src[i]->set_pin(pin_free); else if (src[i]->pin() == pin_chgr) @@ -678,7 +865,7 @@ SimplifySourceVecVisitor::replace_src(Instr *instr, RegisterVec4& reg4) if (s->chan() > 3) continue; - if (!s->is_ssa()) + if (!s->has_flag(Register::ssa)) continue; /* Cayman trans ops have more then one parent for @@ -722,23 +909,10 @@ ReplaceConstSource::visit(AluInstr *alu) int override_chan = -1; - auto ic = src->as_inline_const(); - if (ic) { - if (ic->sel() == ALU_SRC_0) - override_chan = 4; - - if (ic->sel() == ALU_SRC_1) - override_chan = 5; - } - - auto literal = src->as_literal(); - if (literal) { - - if (literal->value() == 0) - override_chan = 4; - - if (literal->value() == 0x3F800000) - override_chan = 5; + if (value_is_const_uint(*src, 0)) { + override_chan = 4; + } else if (value_is_const_float(*src, 1.0f)) { + override_chan = 5; } if (override_chan >= 0) { |