diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2015-11-22 02:44:04 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2015-11-22 02:44:04 +0000 |
commit | a49bc27fea806457ac8ff093d9be74fd7ff3cdf7 (patch) | |
tree | be02e2962dcb74395545ecf2ea75dd6390fc51d3 /lib/mesa/src/gallium/drivers/r600/sb | |
parent | ef558f091c505ddc9ccc98e1f578db6a041f0db2 (diff) |
import Mesa 11.0.6
Diffstat (limited to 'lib/mesa/src/gallium/drivers/r600/sb')
16 files changed, 96 insertions, 706 deletions
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_builder.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_builder.cpp index f13f0d77a..55e2a8508 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_builder.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_builder.cpp @@ -129,11 +129,7 @@ int bc_builder::build_fetch_clause(cf_node* n) { I != E; ++I) { fetch_node *f = static_cast<fetch_node*>(*I); - if (f->bc.op_ptr->flags & FF_GDS) - build_fetch_gds(f); - else if (f->bc.op_ptr->flags & FF_MEM) - build_fetch_mem(f); - else if (f->bc.op_ptr->flags & FF_VTX) + if (f->bc.op_ptr->flags & FF_VTX) build_fetch_vtx(f); else build_fetch_tex(f); @@ -338,7 +334,7 @@ int bc_builder::build_cf_mem(cf_node* n) { if (!ctx.is_egcm()) bb << CF_ALLOC_EXPORT_WORD1_BUF_R6R7() - .ARR_SIZE(bc.array_size) + .ARRAY_SIZE(bc.array_size) .BARRIER(bc.barrier) .BURST_COUNT(bc.burst_count) .CF_INST(ctx.cf_opcode(bc.op)) @@ -349,7 +345,7 @@ int bc_builder::build_cf_mem(cf_node* n) { else if (ctx.is_evergreen()) bb << CF_ALLOC_EXPORT_WORD1_BUF_EG() - .ARR_SIZE(bc.array_size) + .ARRAY_SIZE(bc.array_size) .BARRIER(bc.barrier) .BURST_COUNT(bc.burst_count) .CF_INST(ctx.cf_opcode(bc.op)) @@ -360,7 +356,7 @@ int bc_builder::build_cf_mem(cf_node* n) { else // cayman bb << CF_ALLOC_EXPORT_WORD1_BUF_CM() - .ARR_SIZE(bc.array_size) + .ARRAY_SIZE(bc.array_size) .BARRIER(bc.barrier) .BURST_COUNT(bc.burst_count) .CF_INST(ctx.cf_opcode(bc.op)) @@ -375,37 +371,6 @@ int bc_builder::build_alu(alu_node* n) { const bc_alu &bc = n->bc; const alu_op_info *aop = bc.op_ptr; - if (n->bc.op_ptr->flags & AF_LDS) { - assert(ctx.is_egcm()); - bb << ALU_WORD0_LDS_IDX_OP_EGCM() - .SRC0_SEL(bc.src[0].sel) - .SRC0_REL(bc.src[0].rel) - .SRC0_CHAN(bc.src[0].chan) - .IDX_OFFSET_4((bc.lds_idx_offset >> 4) & 1) - .SRC1_SEL(bc.src[1].sel) - .SRC1_REL(bc.src[1].rel) - .SRC1_CHAN(bc.src[1].chan) - .IDX_OFFSET_5((bc.lds_idx_offset >> 5) & 1) - .INDEX_MODE(bc.index_mode) - .PRED_SEL(bc.pred_sel) - .LAST(bc.last); - - bb << ALU_WORD1_LDS_IDX_OP_EGCM() - .SRC2_SEL(bc.src[2].sel) - .SRC2_REL(bc.src[2].rel) - .SRC2_CHAN(bc.src[2].chan) - .IDX_OFFSET_1((bc.lds_idx_offset >> 1) & 1) - .ALU_INST(ctx.alu_opcode(ALU_OP3_LDS_IDX_OP)) - .BANK_SWIZZLE(bc.bank_swizzle) - .LDS_OP((bc.op_ptr->opcode[1] >> 8) & 0xff) - .IDX_OFFSET_0((bc.lds_idx_offset >> 0) & 1) - .IDX_OFFSET_2((bc.lds_idx_offset >> 2) & 1) - .DST_CHAN(bc.dst_chan) - .IDX_OFFSET_3((bc.lds_idx_offset >> 3) & 1); - - return 0; - } - bb << ALU_WORD0_ALL() .INDEX_MODE(bc.index_mode) .LAST(bc.last) @@ -500,7 +465,7 @@ int bc_builder::build_alu(alu_node* n) { int bc_builder::build_fetch_tex(fetch_node* n) { const bc_fetch &bc = n->bc; - ASSERTED const fetch_op_info *fop = bc.op_ptr; + const fetch_op_info *fop = bc.op_ptr; assert(!(fop->flags & FF_VTX)); @@ -562,49 +527,9 @@ int bc_builder::build_fetch_tex(fetch_node* n) { return 0; } -int bc_builder::build_fetch_gds(fetch_node *n) { - const bc_fetch &bc = n->bc; - ASSERTED const fetch_op_info *fop = bc.op_ptr; - unsigned gds_op = (ctx.fetch_opcode(bc.op) >> 8) & 0x3f; - unsigned mem_op = 4; - assert(fop->flags & FF_GDS); - - if (bc.op == FETCH_OP_TF_WRITE) { - mem_op = 5; - gds_op = 0; - } - - bb << MEM_GDS_WORD0_EGCM() - .MEM_INST(2) - .MEM_OP(mem_op) - .SRC_GPR(bc.src_gpr) - .SRC_SEL_X(bc.src_sel[0]) - .SRC_SEL_Y(bc.src_sel[1]) - .SRC_SEL_Z(bc.src_sel[2]); - - bb << MEM_GDS_WORD1_EGCM() - .DST_GPR(bc.dst_gpr) - .DST_REL_MODE(bc.dst_rel) - .GDS_OP(gds_op) - .SRC_GPR(bc.src2_gpr) - .UAV_INDEX_MODE(bc.uav_index_mode) - .UAV_ID(bc.uav_id) - .ALLOC_CONSUME(bc.alloc_consume) - .BCAST_FIRST_REQ(bc.bcast_first_req); - - bb << MEM_GDS_WORD2_EGCM() - .DST_SEL_X(bc.dst_sel[0]) - .DST_SEL_Y(bc.dst_sel[1]) - .DST_SEL_Z(bc.dst_sel[2]) - .DST_SEL_W(bc.dst_sel[3]); - - bb << 0; - return 0; -} - int bc_builder::build_fetch_vtx(fetch_node* n) { const bc_fetch &bc = n->bc; - ASSERTED const fetch_op_info *fop = bc.op_ptr; + const fetch_op_info *fop = bc.op_ptr; assert(fop->flags & FF_VTX); @@ -701,46 +626,4 @@ int bc_builder::build_fetch_vtx(fetch_node* n) { return 0; } -int bc_builder::build_fetch_mem(fetch_node* n) { - const bc_fetch &bc = n->bc; - ASSERTED const fetch_op_info *fop = bc.op_ptr; - - assert(fop->flags & FF_MEM); - - bb << MEM_RD_WORD0_R7EGCM() - .MEM_INST(2) - .ELEM_SIZE(bc.elem_size) - .FETCH_WHOLE_QUAD(bc.fetch_whole_quad) - .MEM_OP(0) - .UNCACHED(bc.uncached) - .INDEXED(bc.indexed) - .SRC_SEL_Y(bc.src_sel[1]) - .SRC_GPR(bc.src_gpr) - .SRC_REL(bc.src_rel) - .SRC_SEL_X(bc.src_sel[0]) - .BURST_COUNT(bc.burst_count) - .LDS_REQ(bc.lds_req) - .COALESCED_READ(bc.coalesced_read); - - bb << MEM_RD_WORD1_R7EGCM() - .DST_GPR(bc.dst_gpr) - .DST_REL(bc.dst_rel) - .DST_SEL_X(bc.dst_sel[0]) - .DST_SEL_Y(bc.dst_sel[1]) - .DST_SEL_Z(bc.dst_sel[2]) - .DST_SEL_W(bc.dst_sel[3]) - .DATA_FORMAT(bc.data_format) - .NUM_FORMAT_ALL(bc.num_format_all) - .FORMAT_COMP_ALL(bc.format_comp_all) - .SRF_MODE_ALL(bc.srf_mode_all); - - bb << MEM_RD_WORD2_R7EGCM() - .ARRAY_BASE(bc.array_base) - .ENDIAN_SWAP(bc.endian_swap) - .ARR_SIZE(bc.array_size); - - bb << 0; - return 0; -} - } diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp index b04cb73e2..5fe8f50aa 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp @@ -260,7 +260,7 @@ int bc_decoder::decode_cf_mem(unsigned & i, bc_cf& bc) { bc.end_of_program = w1.get_END_OF_PROGRAM(); bc.valid_pixel_mode = w1.get_VALID_PIXEL_MODE(); bc.mark = w1.get_MARK(); - bc.array_size = w1.get_ARR_SIZE(); + bc.array_size = w1.get_ARRAY_SIZE(); bc.comp_mask = w1.get_COMP_MASK(); } else if (ctx.is_cayman()) { @@ -269,7 +269,7 @@ int bc_decoder::decode_cf_mem(unsigned & i, bc_cf& bc) { bc.burst_count = w1.get_BURST_COUNT(); bc.mark = w1.get_MARK(); bc.valid_pixel_mode = w1.get_VALID_PIXEL_MODE(); - bc.array_size = w1.get_ARR_SIZE(); + bc.array_size = w1.get_ARRAY_SIZE(); bc.comp_mask = w1.get_COMP_MASK(); } else { // r67 @@ -279,7 +279,7 @@ int bc_decoder::decode_cf_mem(unsigned & i, bc_cf& bc) { bc.end_of_program = w1.get_END_OF_PROGRAM(); bc.valid_pixel_mode = w1.get_VALID_PIXEL_MODE(); bc.whole_quad_mode = w1.get_WHOLE_QUAD_MODE(); - bc.array_size = w1.get_ARR_SIZE(); + bc.array_size = w1.get_ARRAY_SIZE(); bc.comp_mask = w1.get_COMP_MASK(); bc.whole_quad_mode = w1.get_WHOLE_QUAD_MODE(); } @@ -310,53 +310,16 @@ int bc_decoder::decode_alu(unsigned & i, bc_alu& bc) { ALU_WORD1_OP3_ALL w1(dw1); bc.set_op(r600_isa_alu_by_opcode(ctx.isa, w1.get_ALU_INST(), 1)); - if (bc.op == ALU_OP3_LDS_IDX_OP) { - ALU_WORD0_LDS_IDX_OP_EGCM iw0(dw0); - ALU_WORD1_LDS_IDX_OP_EGCM iw1(dw1); - bc.index_mode = iw0.get_INDEX_MODE(); - bc.last = iw0.get_LAST(); - bc.pred_sel = iw0.get_PRED_SEL(); - bc.src[0].chan = iw0.get_SRC0_CHAN(); - bc.src[0].sel = iw0.get_SRC0_SEL(); - bc.src[0].rel = iw0.get_SRC0_REL(); - - bc.src[1].chan = iw0.get_SRC1_CHAN(); - bc.src[1].sel = iw0.get_SRC1_SEL(); - bc.src[1].rel = iw0.get_SRC1_REL(); - - bc.bank_swizzle = iw1.get_BANK_SWIZZLE(); - bc.src[2].chan = iw1.get_SRC2_CHAN(); - bc.src[2].sel = iw1.get_SRC2_SEL(); - bc.src[2].rel = iw1.get_SRC2_REL(); - bc.dst_chan = iw1.get_DST_CHAN(); - // TODO: clean up - for (size_t k = 0, e = r600_alu_op_table_size(); k != e; k++) { - if (((r600_alu_op_table[k].opcode[1] >> 8) & 0xff) == iw1.get_LDS_OP()) { - bc.op_ptr = &r600_alu_op_table[k]; - bc.op = k; - break; - } - } - bc.lds_idx_offset = - (iw0.get_IDX_OFFSET_4() << 4) | - (iw0.get_IDX_OFFSET_5() << 5) | - (iw1.get_IDX_OFFSET_1() << 1) | - (iw1.get_IDX_OFFSET_0() << 0) | - (iw1.get_IDX_OFFSET_2() << 2) | - (iw1.get_IDX_OFFSET_3() << 3); - } - else { - bc.bank_swizzle = w1.get_BANK_SWIZZLE(); - bc.clamp = w1.get_CLAMP(); - bc.dst_chan = w1.get_DST_CHAN(); - bc.dst_gpr = w1.get_DST_GPR(); - bc.dst_rel = w1.get_DST_REL(); + bc.bank_swizzle = w1.get_BANK_SWIZZLE(); + bc.clamp = w1.get_CLAMP(); + bc.dst_chan = w1.get_DST_CHAN(); + bc.dst_gpr = w1.get_DST_GPR(); + bc.dst_rel = w1.get_DST_REL(); - bc.src[2].chan = w1.get_SRC2_CHAN(); - bc.src[2].sel = w1.get_SRC2_SEL(); - bc.src[2].neg = w1.get_SRC2_NEG(); - bc.src[2].rel = w1.get_SRC2_REL(); - } + bc.src[2].chan = w1.get_SRC2_CHAN(); + bc.src[2].sel = w1.get_SRC2_SEL(); + bc.src[2].neg = w1.get_SRC2_NEG(); + bc.src[2].rel = w1.get_SRC2_REL(); } else { // op2 if (ctx.is_r600()) { @@ -410,28 +373,7 @@ int bc_decoder::decode_fetch(unsigned & i, bc_fetch& bc) { unsigned fetch_opcode = dw0 & 0x1F; - if (fetch_opcode == 2) { // MEM_INST_MEM - unsigned mem_op = (dw0 >> 8) & 0x7; - unsigned gds_op; - if (mem_op == 0 || mem_op == 2) { - fetch_opcode = mem_op == 0 ? FETCH_OP_READ_SCRATCH : FETCH_OP_READ_MEM; - } else if (mem_op == 4) { - gds_op = (dw1 >> 9) & 0x1f; - if ((dw1 >> 9) & 0x20) - fetch_opcode = FETCH_OP_GDS_ADD_RET + gds_op; - else - fetch_opcode = FETCH_OP_GDS_ADD + gds_op; - } else if (mem_op == 5) - fetch_opcode = FETCH_OP_TF_WRITE; - bc.set_op(fetch_opcode); - } else - bc.set_op(r600_isa_fetch_by_opcode(ctx.isa, fetch_opcode)); - - if (bc.op_ptr->flags & FF_MEM) - return decode_fetch_mem(i, bc); - - if (bc.op_ptr->flags & FF_GDS) - return decode_fetch_gds(i, bc); + bc.set_op(r600_isa_fetch_by_opcode(ctx.isa, fetch_opcode)); if (bc.op_ptr->flags & FF_VTX) return decode_fetch_vtx(i, bc); @@ -497,85 +439,6 @@ int bc_decoder::decode_fetch(unsigned & i, bc_fetch& bc) { return r; } -int bc_decoder::decode_fetch_gds(unsigned & i, bc_fetch& bc) { - int r = 0; - uint32_t dw0 = dw[i]; - uint32_t dw1 = dw[i+1]; - uint32_t dw2 = dw[i+2]; - uint32_t tmp; - /* GDS instructions align to 4 words boundaries */ - i+= 4; - assert(i <= ndw); - - MEM_GDS_WORD0_EGCM w0(dw0); - bc.src_gpr = w0.get_SRC_GPR(); - tmp = w0.get_SRC_REL_MODE(); - bc.src_rel_global = (tmp == 2); - bc.src_sel[0] = w0.get_SRC_SEL_X(); - bc.src_sel[1] = w0.get_SRC_SEL_Y(); - bc.src_sel[2] = w0.get_SRC_SEL_Z(); - - MEM_GDS_WORD1_EGCM w1(dw1); - bc.dst_gpr = w1.get_DST_GPR(); - tmp = w1.get_DST_REL_MODE(); - bc.dst_rel_global = (tmp == 2); - bc.src2_gpr = w1.get_SRC_GPR(); - bc.alloc_consume = w1.get_ALLOC_CONSUME(); - bc.uav_id = w1.get_UAV_ID(); - bc.uav_index_mode = w1.get_UAV_INDEX_MODE(); - bc.bcast_first_req = w1.get_BCAST_FIRST_REQ(); - - MEM_GDS_WORD2_EGCM w2(dw2); - bc.dst_sel[0] = w2.get_DST_SEL_X(); - bc.dst_sel[1] = w2.get_DST_SEL_Y(); - bc.dst_sel[2] = w2.get_DST_SEL_Z(); - bc.dst_sel[3] = w2.get_DST_SEL_W(); - return r; -} - -int bc_decoder::decode_fetch_mem(unsigned & i, bc_fetch& bc) { - int r = 0; - uint32_t dw0 = dw[i]; - uint32_t dw1 = dw[i+1]; - uint32_t dw2 = dw[i+2]; - - i += 4; // MEM instructions align to 4 words boundaries - - assert(i <= ndw); - - MEM_RD_WORD0_R7EGCM w0(dw0); - bc.elem_size = w0.get_ELEM_SIZE(); - bc.fetch_whole_quad = w0.get_FETCH_WHOLE_QUAD(); - bc.uncached = w0.get_UNCACHED(); - bc.indexed = w0.get_INDEXED(); - bc.src_sel[1] = w0.get_SRC_SEL_Y(); - bc.src_gpr = w0.get_SRC_GPR(); - bc.src_rel = w0.get_SRC_REL(); - bc.src_sel[0] = w0.get_SRC_SEL_X(); - bc.burst_count = w0.get_BURST_COUNT(); - bc.lds_req = w0.get_LDS_REQ(); - bc.coalesced_read = w0.get_COALESCED_READ(); - - MEM_RD_WORD1_R7EGCM w1(dw1); - bc.dst_gpr = w1.get_DST_GPR(); - bc.dst_rel = w1.get_DST_REL(); - bc.dst_sel[0] = w1.get_DST_SEL_X(); - bc.dst_sel[1] = w1.get_DST_SEL_Y(); - bc.dst_sel[2] = w1.get_DST_SEL_Z(); - bc.dst_sel[3] = w1.get_DST_SEL_W(); - bc.data_format = w1.get_DATA_FORMAT(); - bc.num_format_all = w1.get_NUM_FORMAT_ALL(); - bc.format_comp_all = w1.get_FORMAT_COMP_ALL(); - bc.srf_mode_all = w1.get_SRF_MODE_ALL(); - - MEM_RD_WORD2_R7EGCM w2(dw2); - bc.array_base = w2.get_ARRAY_BASE(); - bc.endian_swap = w2.get_ENDIAN_SWAP(); - bc.array_size = w2.get_ARR_SIZE(); - - return r; -} - int bc_decoder::decode_fetch_vtx(unsigned & i, bc_fetch& bc) { int r = 0; uint32_t dw0 = dw[i]; diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_dump.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_dump.cpp index a7c4ccdad..523278279 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_dump.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_dump.cpp @@ -27,7 +27,6 @@ #include "sb_bc.h" #include "sb_shader.h" #include "sb_pass.h" -#include "eg_sq.h" // V_SQ_CF_INDEX_0/1 namespace r600_sb { @@ -141,14 +140,8 @@ void bc_dump::dump(cf_node& n) { } else if (n.bc.op_ptr->flags & CF_MEM) { static const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK", "WRITE_IND_ACK"}; - static const char *exp_type_r600[] = {"WRITE", "WRITE_IND", - "READ", "READ_IND"}; - fill_to(s, 18); - if (ctx.hw_class == HW_CLASS_R600 && n.bc.op == CF_OP_MEM_SCRATCH) - s << " " << exp_type_r600[n.bc.type] << " "; - else - s << " " << exp_type[n.bc.type] << " "; + s << " " << exp_type[n.bc.type] << " "; s.print_wl(n.bc.array_base, 5); s << " R" << n.bc.rw_gpr << "."; for (int k = 0; k < 4; ++k) @@ -163,11 +156,6 @@ void bc_dump::dump(cf_node& n) { s << " ES:" << n.bc.elem_size; - s << " OP:" << n.bc.rat_inst; - - if (n.bc.mark) - s << " MARK"; - } else { if (n.bc.op_ptr->flags & CF_CLAUSE) { @@ -194,9 +182,6 @@ void bc_dump::dump(cf_node& n) { if (n.bc.pop_count) s << " POP:" << n.bc.pop_count; - - if (n.bc.count && (n.bc.op_ptr->flags & CF_EMIT)) - s << " STREAM" << n.bc.count; } if (!n.bc.barrier) @@ -243,7 +228,7 @@ static void print_dst(sb_ostream &s, bc_alu &alu) reg_char = 'T'; } - if (alu.write_mask || (alu.op_ptr->src_count == 3 && alu.op < LDS_OP2_LDS_ADD)) { + if (alu.write_mask || alu.op_ptr->src_count == 3) { s << reg_char; print_sel(s, sel, alu.dst_rel, alu.index_mode, 0); } else { @@ -291,28 +276,6 @@ static void print_src(sb_ostream &s, bc_alu &alu, unsigned idx) need_sel = 0; need_chan = 0; switch (sel) { - case ALU_SRC_LDS_OQ_A: - s << "LDS_OQ_A"; - need_chan = 1; - break; - case ALU_SRC_LDS_OQ_B: - s << "LDS_OQ_B"; - need_chan = 1; - break; - case ALU_SRC_LDS_OQ_A_POP: - s << "LDS_OQ_A_POP"; - need_chan = 1; - break; - case ALU_SRC_LDS_OQ_B_POP: - s << "LDS_OQ_B_POP"; - need_chan = 1; - break; - case ALU_SRC_LDS_DIRECT_A: - s << "LDS_A["; s.print_zw_hex(src->value.u, 8); s << "]"; - break; - case ALU_SRC_LDS_DIRECT_B: - s << "LDS_B["; s.print_zw_hex(src->value.u, 8); s << "]"; - break; case ALU_SRC_PS: s << "PS"; break; @@ -341,27 +304,6 @@ static void print_src(sb_ostream &s, bc_alu &alu, unsigned idx) case ALU_SRC_0: s << "0"; break; - case ALU_SRC_TIME_LO: - s << "TIME_LO"; - break; - case ALU_SRC_TIME_HI: - s << "TIME_HI"; - break; - case ALU_SRC_MASK_LO: - s << "MASK_LO"; - break; - case ALU_SRC_MASK_HI: - s << "MASK_HI"; - break; - case ALU_SRC_HW_WAVE_ID: - s << "HW_WAVE_ID"; - break; - case ALU_SRC_SIMD_ID: - s << "SIMD_ID"; - break; - case ALU_SRC_SE_ID: - s << "SE_ID"; - break; default: s << "??IMM_" << sel; break; @@ -409,18 +351,6 @@ void bc_dump::dump(alu_node& n) { s << " " << vec_bs[n.bc.bank_swizzle]; } - if (ctx.is_cayman()) { - if (n.bc.op == ALU_OP1_MOVA_INT) { - static const char *mova_str[] = { " AR_X", " PC", " CF_IDX0", " CF_IDX1", - " Unknown MOVA_INT dest" }; - s << mova_str[std::min(n.bc.dst_gpr, 4u)]; // CM_V_SQ_MOVA_DST_AR_* - } - } - - if (n.bc.lds_idx_offset) { - s << " IDX_OFFSET:" << n.bc.lds_idx_offset; - } - sblog << s.str() << "\n"; } @@ -483,29 +413,23 @@ bc_dump::bc_dump(shader& s, bytecode* bc) : void bc_dump::dump(fetch_node& n) { sb_ostringstream s; static const char * fetch_type[] = {"VERTEX", "INSTANCE", ""}; - unsigned gds = n.bc.op_ptr->flags & FF_GDS; - bool gds_has_ret = gds && n.bc.op >= FETCH_OP_GDS_ADD_RET && - n.bc.op <= FETCH_OP_GDS_USHORT_READ_RET; - bool show_dst = !gds || (gds && gds_has_ret); s << n.bc.op_ptr->name; fill_to(s, 20); - if (show_dst) { - s << "R"; - print_sel(s, n.bc.dst_gpr, n.bc.dst_rel, INDEX_LOOP, 0); - s << "."; - for (int k = 0; k < 4; ++k) - s << chans[n.bc.dst_sel[k]]; - s << ", "; - } + s << "R"; + print_sel(s, n.bc.dst_gpr, n.bc.dst_rel, INDEX_LOOP, 0); + s << "."; + for (int k = 0; k < 4; ++k) + s << chans[n.bc.dst_sel[k]]; + s << ", "; s << "R"; print_sel(s, n.bc.src_gpr, n.bc.src_rel, INDEX_LOOP, 0); s << "."; unsigned vtx = n.bc.op_ptr->flags & FF_VTX; - unsigned num_src_comp = gds ? 3 : vtx ? ctx.is_cayman() ? 2 : 1 : 4; + unsigned num_src_comp = vtx ? ctx.is_cayman() ? 2 : 1 : 4; for (unsigned k = 0; k < num_src_comp; ++k) s << chans[n.bc.src_sel[k]]; @@ -514,27 +438,18 @@ void bc_dump::dump(fetch_node& n) { s << " + " << n.bc.offset[0] << "b "; } - if (!gds) - s << ", RID:" << n.bc.resource_id; - - if (gds) { - s << " UAV:" << n.bc.uav_id; - if (n.bc.uav_index_mode) - s << " UAV:SQ_CF_INDEX_" << (n.bc.uav_index_mode - V_SQ_CF_INDEX_0); - if (n.bc.bcast_first_req) - s << " BFQ"; - if (n.bc.alloc_consume) - s << " AC"; - } else if (vtx) { + s << ", RID:" << n.bc.resource_id; + + if (vtx) { s << " " << fetch_type[n.bc.fetch_type]; if (!ctx.is_cayman() && n.bc.mega_fetch_count) s << " MFC:" << n.bc.mega_fetch_count; if (n.bc.fetch_whole_quad) s << " FWQ"; if (ctx.is_egcm() && n.bc.resource_index_mode) - s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0); + s << " RIM:SQ_CF_INDEX_" << n.bc.resource_index_mode; if (ctx.is_egcm() && n.bc.sampler_index_mode) - s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0); + s << " SID:SQ_CF_INDEX_" << n.bc.sampler_index_mode; s << " UCF:" << n.bc.use_const_fields << " FMT(DTA:" << n.bc.data_format @@ -551,22 +466,6 @@ void bc_dump::dump(fetch_node& n) { for (unsigned k = 0; k < 3; ++k) if (n.bc.offset[k]) s << " O" << chans[k] << ":" << n.bc.offset[k]; - if (ctx.is_egcm() && n.bc.resource_index_mode) - s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0); - if (ctx.is_egcm() && n.bc.sampler_index_mode) - s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0); - } - - if (n.bc.op_ptr->flags & FF_MEM) { - s << ", ELEM_SIZE:" << n.bc.elem_size; - if (n.bc.uncached) - s << ", UNCACHED"; - if (n.bc.indexed) - s << ", INDEXED"; - if (n.bc.burst_count) - s << ", BURST_COUNT:" << n.bc.burst_count; - s << ", ARRAY_BASE:" << n.bc.array_base; - s << ", ARRAY_SIZE:" << n.bc.array_size; } sblog << s.str() << "\n"; diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_parser.cpp index da27c2005..c4799270d 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_parser.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc_parser.cpp @@ -34,14 +34,12 @@ #include "r600_pipe.h" #include "r600_shader.h" -#include "eg_sq.h" // CM_V_SQ_MOVA_DST_CF_IDX0/1 #include <stack> #include "sb_bc.h" #include "sb_shader.h" #include "sb_pass.h" -#include "util/macros.h" namespace r600_sb { @@ -57,25 +55,23 @@ int bc_parser::decode() { if (pshader) { switch (bc->type) { - case PIPE_SHADER_FRAGMENT: t = TARGET_PS; break; - case PIPE_SHADER_VERTEX: - t = pshader->vs_as_ls ? TARGET_LS : (pshader->vs_as_es ? TARGET_ES : TARGET_VS); + case TGSI_PROCESSOR_FRAGMENT: t = TARGET_PS; break; + case TGSI_PROCESSOR_VERTEX: + t = pshader->vs_as_es ? TARGET_ES : TARGET_VS; break; - case PIPE_SHADER_GEOMETRY: t = TARGET_GS; break; - case PIPE_SHADER_COMPUTE: t = TARGET_COMPUTE; break; - case PIPE_SHADER_TESS_CTRL: t = TARGET_HS; break; - case PIPE_SHADER_TESS_EVAL: t = pshader->tes_as_es ? TARGET_ES : TARGET_VS; break; + case TGSI_PROCESSOR_GEOMETRY: t = TARGET_GS; break; + case TGSI_PROCESSOR_COMPUTE: t = TARGET_COMPUTE; break; default: assert(!"unknown shader target"); return -1; break; } } else { - if (bc->type == PIPE_SHADER_COMPUTE) + if (bc->type == TGSI_PROCESSOR_COMPUTE) t = TARGET_COMPUTE; else t = TARGET_FETCH; } sh = new shader(ctx, t, bc->debug_id); - sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE || bc->precise); + sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE); int r = decode_shader(); @@ -125,7 +121,7 @@ int bc_parser::parse_decls() { return 0; } - if (pshader->indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER))) { + if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) { assert(pshader->num_arrays); @@ -149,14 +145,11 @@ int bc_parser::parse_decls() { } } - if (sh->target == TARGET_VS || sh->target == TARGET_ES || sh->target == TARGET_HS || sh->target == TARGET_LS) + if (sh->target == TARGET_VS || sh->target == TARGET_ES) sh->add_input(0, 1, 0x0F); else if (sh->target == TARGET_GS) { sh->add_input(0, 1, 0x0F); sh->add_input(1, 1, 0x0F); - } else if (sh->target == TARGET_COMPUTE) { - sh->add_input(0, 1, 0x0F); - sh->add_input(1, 1, 0x0F); } bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN @@ -171,20 +164,15 @@ int bc_parser::parse_decls() { sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F); if (ps_interp && in.spi_sid) { int k = eg_get_interpolator_index(in.interpolate, in.interpolate_location); - if (k >= 0) { + if (k >= 0) ij_interpolators[k] |= true; - if (in.uses_interpolate_at_centroid) { - k = eg_get_interpolator_index(in.interpolate, TGSI_INTERPOLATE_LOC_CENTROID); - ij_interpolators[k] |= true; - } - } } } if (ps_interp) { /* add the egcm ij interpolators to live inputs */ unsigned num_ij = 0; - for (unsigned i = 0; i < ARRAY_SIZE(ij_interpolators); i++) { + for (unsigned i = 0; i < Elements(ij_interpolators); i++) { num_ij += ij_interpolators[i]; } @@ -227,7 +215,7 @@ int bc_parser::decode_cf(unsigned &i, bool &eop) { return r; } else if (flags & CF_FETCH) { if ((r = decode_fetch_clause(cf))) - return r; + return r;; } else if (flags & CF_EXP) { if (cf->bc.rw_rel) gpr_reladdr = true; @@ -254,7 +242,7 @@ int bc_parser::decode_alu_clause(cf_node* cf) { cgroup = 0; memset(slots[0], 0, 5*sizeof(slots[0][0])); - UNUSED unsigned ng = 0; + unsigned ng = 0; do { decode_alu_group(cf, i, gcnt); @@ -340,29 +328,6 @@ int bc_parser::prepare_alu_clause(cf_node* cf) { return 0; } -void bc_parser::save_set_cf_index(value *val, unsigned idx) -{ - assert(idx <= 1); - assert(val); - cf_index_value[idx] = val; -} -value *bc_parser::get_cf_index_value(unsigned idx) -{ - assert(idx <= 1); - assert(cf_index_value[idx]); - return cf_index_value[idx]; -} -void bc_parser::save_mova(alu_node *mova) -{ - assert(mova); - this->mova = mova; -} -alu_node *bc_parser::get_mova() -{ - assert(mova); - return mova; -} - int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { alu_node *n; @@ -373,7 +338,6 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { n = static_cast<alu_node*>(*I); - bool ubo_indexing[2] = {}; if (!sh->assign_slot(n, slots[cgroup])) { assert(!"alu slot assignment failed"); @@ -385,47 +349,11 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { if (ctx.alu_slots(n->bc.op) & AF_4SLOT) n->flags |= NF_ALU_4SLOT; - if (ctx.alu_slots(n->bc.op) & AF_2SLOT) - n->flags |= NF_ALU_2SLOT; - n->src.resize(src_count); unsigned flags = n->bc.op_ptr->flags; - if (flags & AF_LDS) { - bool need_rw = false, need_oqa = false, need_oqb = false; - int ndst = 0, ncount = 0; - - /* all non-read operations have side effects */ - if (n->bc.op != LDS_OP2_LDS_READ2_RET && - n->bc.op != LDS_OP1_LDS_READ_REL_RET && - n->bc.op != LDS_OP1_LDS_READ_RET) { - n->flags |= NF_DONT_KILL; - ndst++; - need_rw = true; - } - - if (n->bc.op >= LDS_OP2_LDS_ADD_RET && n->bc.op <= LDS_OP1_LDS_USHORT_READ_RET) { - need_oqa = true; - ndst++; - } - - if (n->bc.op == LDS_OP2_LDS_READ2_RET || n->bc.op == LDS_OP1_LDS_READ_REL_RET) { - need_oqb = true; - ndst++; - } - - n->dst.resize(ndst); - if (need_oqa) - n->dst[ncount++] = sh->get_special_value(SV_LDS_OQA); - if (need_oqb) - n->dst[ncount++] = sh->get_special_value(SV_LDS_OQB); - if (need_rw) - n->dst[ncount++] = sh->get_special_value(SV_LDS_RW); - - n->flags |= NF_DONT_MOVE | NF_DONT_HOIST; - - } else if (flags & AF_PRED) { + if (flags & AF_PRED) { n->dst.resize(3); if (n->bc.update_pred) n->dst[1] = sh->get_special_value(SV_ALU_PRED); @@ -447,18 +375,13 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { n->dst.resize(1); } - if (n->bc.op == ALU_OP0_SET_CF_IDX0 || n->bc.op == ALU_OP0_SET_CF_IDX1) { - // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX - // DCE will kill this op - save_set_cf_index(get_mova()->src[0], n->bc.op == ALU_OP0_SET_CF_IDX1); - } else if (flags & AF_MOVA) { + if (flags & AF_MOVA) { n->dst[0] = sh->get_special_value(SV_AR_INDEX); - save_mova(n); n->flags |= NF_DONT_HOIST; - } else if ((n->bc.op_ptr->src_count == 3 || n->bc.write_mask) && !(flags & AF_LDS)) { + } else if (n->bc.op_ptr->src_count == 3 || n->bc.write_mask) { assert(!n->bc.dst_rel || n->bc.index_mode == INDEX_AR_X); value *v = sh->get_gpr_value(false, n->bc.dst_gpr, n->bc.dst_chan, @@ -479,7 +402,7 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { n->src[s] = sh->get_const_value(src.value); } else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) { unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ? - ((unsigned)SLOT_TRANS) : src.chan; + SLOT_TRANS : src.chan; // XXX shouldn't happen but llvm backend uses PS on cayman if (prev_slot == SLOT_TRANS && ctx.is_cayman()) @@ -509,12 +432,7 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { bc_kcache &kc = cf->bc.kc[kc_set]; kc_addr = (kc.addr << 4) + (sel & 0x1F); - n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan, (alu_kcache_index_mode)kc.index_mode); - - if (kc.index_mode != KC_INDEX_NONE) { - assert(kc.index_mode != KC_LOCK_LOOP); - ubo_indexing[kc.index_mode - KC_INDEX_0] = true; - } + n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan); } else if (src.sel < MAX_GPR) { value *v = sh->get_gpr_value(true, src.sel, src.chan, src.rel); @@ -528,21 +446,6 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { // param index as equal instructions and leave only one of them n->src[s] = sh->get_special_ro_value(sel_chan(src.sel, n->bc.slot)); - } else if (ctx.is_lds_oq(src.sel)) { - switch (src.sel) { - case ALU_SRC_LDS_OQ_A: - case ALU_SRC_LDS_OQ_B: - assert(!"Unsupported LDS queue access in SB"); - break; - case ALU_SRC_LDS_OQ_A_POP: - n->src[s] = sh->get_special_value(SV_LDS_OQA); - break; - case ALU_SRC_LDS_OQ_B_POP: - n->src[s] = sh->get_special_value(SV_LDS_OQB); - break; - } - n->flags |= NF_DONT_HOIST | NF_DONT_MOVE; - } else { switch (src.sel) { case ALU_SRC_0: @@ -566,19 +469,6 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { } } } - - // add UBO index values if any as dependencies - if (ubo_indexing[0]) { - n->src.push_back(get_cf_index_value(0)); - } - if (ubo_indexing[1]) { - n->src.push_back(get_cf_index_value(1)); - } - - if ((flags & AF_MOVA) && (n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1) && - ctx.is_cayman()) - // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX - save_set_cf_index(n->src[0], n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1); } // pack multislot instructions into alu_packed_node @@ -589,16 +479,12 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { alu_node *a = static_cast<alu_node*>(*I); unsigned sflags = a->bc.slot_flags; - if (sflags == AF_4V || sflags == AF_2V || (ctx.is_cayman() && sflags == AF_S)) { + if (sflags == AF_4V || (ctx.is_cayman() && sflags == AF_S)) { if (!p) p = sh->create_alu_packed(); a->remove(); p->push_back(a); - if (sflags == AF_2V && p->count() == 2) { - g->push_front(p); - p = NULL; - } } } @@ -629,10 +515,7 @@ int bc_parser::decode_fetch_clause(cf_node* cf) { int r; unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1; - if (cf->bc.op_ptr->flags & FF_GDS) - cf->subtype = NST_GDS_CLAUSE; - else - cf->subtype = NST_TEX_CLAUSE; + cf->subtype = NST_TEX_CLAUSE; while (cnt--) { fetch_node *n = sh->create_fetch(); @@ -658,14 +541,10 @@ int bc_parser::prepare_fetch_clause(cf_node *cf) { unsigned flags = n->bc.op_ptr->flags; unsigned vtx = flags & FF_VTX; - unsigned gds = flags & FF_GDS; - unsigned num_src = gds ? 2 : vtx ? ctx.vtx_src_num : 4; + unsigned num_src = vtx ? ctx.vtx_src_num : 4; n->dst.resize(4); - if (gds) { - n->flags |= NF_DONT_HOIST | NF_DONT_MOVE | NF_DONT_KILL; - } if (flags & (FF_SETGRAD | FF_USEGRAD | FF_GETGRAD)) { sh->uses_gradients = true; } @@ -729,18 +608,6 @@ int bc_parser::prepare_fetch_clause(cf_node *cf) { n->bc.src_sel[s], false); } - // Scheduler will emit the appropriate instructions to set CF_IDX0/1 - if (n->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) { - n->src.push_back(get_cf_index_value(n->bc.sampler_index_mode == V_SQ_CF_INDEX_1)); - } - if (n->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) { - n->src.push_back(get_cf_index_value(n->bc.resource_index_mode == V_SQ_CF_INDEX_1)); - } - } - - if (n->bc.op == FETCH_OP_READ_SCRATCH) { - n->src.push_back(sh->get_special_value(SV_SCRATCH)); - n->dst.push_back(sh->get_special_value(SV_SCRATCH)); } } @@ -844,23 +711,12 @@ int bc_parser::prepare_ir() { do { - if (ctx.hw_class == HW_CLASS_R600 && c->bc.op == CF_OP_MEM_SCRATCH && - (c->bc.type == 2 || c->bc.type == 3)) { - c->dst.resize(4); - for(int s = 0; s < 4; ++s) { - if (c->bc.comp_mask & (1 << s)) - c->dst[s] = - sh->get_gpr_value(true, c->bc.rw_gpr, s, false); - } - } else { - c->src.resize(4); + c->src.resize(4); - - for(int s = 0; s < 4; ++s) { - if (c->bc.comp_mask & (1 << s)) - c->src[s] = + for(int s = 0; s < 4; ++s) { + if (c->bc.comp_mask & (1 << s)) + c->src[s] = sh->get_gpr_value(true, c->bc.rw_gpr, s, false); - } } if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) { // indexed write @@ -883,10 +739,6 @@ int bc_parser::prepare_ir() { c->flags |= NF_DONT_KILL; } } - else if (c->bc.op == CF_OP_MEM_SCRATCH) { - c->src.push_back(sh->get_special_value(SV_SCRATCH)); - c->dst.push_back(sh->get_special_value(SV_SCRATCH)); - } if (!burst_count--) break; @@ -905,25 +757,10 @@ int bc_parser::prepare_ir() { c->bc.end_of_program = eop; } else if (flags & CF_EMIT) { - /* quick peephole */ - cf_node *prev = static_cast<cf_node *>(c->prev); - if (c->bc.op == CF_OP_CUT_VERTEX && - prev && prev->is_valid() && - prev->bc.op == CF_OP_EMIT_VERTEX && - c->bc.count == prev->bc.count) { - prev->bc.set_op(CF_OP_EMIT_CUT_VERTEX); - prev->bc.end_of_program = c->bc.end_of_program; - c->remove(); - } - else { - c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE; + c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE; - c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); - c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); - } - } else if (c->bc.op == CF_OP_WAIT_ACK) { - c->src.push_back(sh->get_special_value(SV_SCRATCH)); - c->dst.push_back(sh->get_special_value(SV_SCRATCH)); + c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); + c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); } } diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_context.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_context.cpp index 2734b24fe..5dba85b86 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_context.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_context.cpp @@ -101,7 +101,7 @@ const char* sb_context::get_hw_class_name() { TRANSLATE_HW_CLASS(CAYMAN); #undef TRANSLATE_HW_CLASS default: - assert(!"unknown gfx level"); + assert(!"unknown chip class"); return "INVALID_CHIP_CLASS"; } } diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_core.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_core.cpp index 7c387d2a4..afea8188f 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_core.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_core.cpp @@ -26,7 +26,7 @@ #define SB_RA_SCHED_CHECK DEBUG -#include "util/os_time.h" +#include "os/os_time.h" #include "r600_pipe.h" #include "r600_shader.h" @@ -42,7 +42,7 @@ using namespace r600_sb; -static sb_hw_class translate_chip_class(enum amd_gfx_level cc); +static sb_hw_class translate_chip_class(enum chip_class cc); static sb_hw_chip translate_chip(enum radeon_family rf); sb_context *r600_sb_context_create(struct r600_context *rctx) { @@ -50,7 +50,7 @@ sb_context *r600_sb_context_create(struct r600_context *rctx) { sb_context *sctx = new sb_context(); if (sctx->init(rctx->isa, translate_chip(rctx->b.family), - translate_chip_class(rctx->b.gfx_level))) { + translate_chip_class(rctx->b.chip_class))) { delete sctx; sctx = NULL; } @@ -191,7 +191,7 @@ int r600_sb_bytecode_process(struct r600_context *rctx, // if conversion breaks the dependency tracking between CF_EMIT ops when it removes // the phi nodes for SV_GEOMETRY_EMIT. Just disable it for GS - if ((sh->target != TARGET_GS && sh->target != TARGET_HS) || pshader->needs_scratch_space) + if (sh->target != TARGET_GS) SB_RUN_PASS(if_conversion, 1); // if_conversion breaks info about uses, but next pass (peephole) @@ -324,7 +324,7 @@ static sb_hw_chip translate_chip(enum radeon_family rf) { } } -static sb_hw_class translate_chip_class(enum amd_gfx_level cc) { +static sb_hw_class translate_chip_class(enum chip_class cc) { switch(cc) { case R600: return HW_CLASS_R600; case R700: return HW_CLASS_R700; @@ -332,7 +332,7 @@ static sb_hw_class translate_chip_class(enum amd_gfx_level cc) { case CAYMAN: return HW_CLASS_CAYMAN; default: - assert(!"unknown gfx level"); + assert(!"unknown chip class"); return HW_CLASS_UNKNOWN; } } diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp index abae2bf69..79aef9106 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp @@ -30,18 +30,6 @@ namespace r600_sb { -int dce_cleanup::run() { - int r; - - // Run cleanup for as long as there are unused nodes. - do { - nodes_changed = false; - r = vpass::run(); - } while (r == 0 && nodes_changed); - - return r; -} - bool dce_cleanup::visit(node& n, bool enter) { if (enter) { } else { @@ -122,18 +110,7 @@ bool dce_cleanup::visit(region_node& n, bool enter) { void dce_cleanup::cleanup_dst(node& n) { if (!cleanup_dst_vec(n.dst) && remove_unused && !n.dst.empty() && !(n.flags & NF_DONT_KILL) && n.parent) - { - // Delete use references to the removed node from the src values. - for (vvec::iterator I = n.src.begin(), E = n.src.end(); I != E; ++I) { - value* v = *I; - if (v && v->def && v->uses.size()) - { - v->remove_use(&n); - } - } n.remove(); - nodes_changed = true; - } } bool dce_cleanup::visit(container_node& n, bool enter) { @@ -153,7 +130,7 @@ bool dce_cleanup::cleanup_dst_vec(vvec& vv) { if (v->gvn_source && v->gvn_source->is_dead()) v->gvn_source = NULL; - if (v->is_dead() || (remove_unused && !v->is_rel() && !v->uses.size())) + if (v->is_dead() || (remove_unused && !v->is_rel() && !v->uses)) v = NULL; else alive = true; diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_dump.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_dump.cpp index 402ba357f..d6051704c 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_dump.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_dump.cpp @@ -367,12 +367,7 @@ void dump::dump_op(node &n, const char *name) { sblog << ", "; } - if (n.subtype == NST_FETCH_INST) { - fetch_node *f = static_cast<fetch_node*>(&n); - if (f->bc.indexed) - dump_vec(n.src); - } else - dump_vec(n.src); + dump_vec(n.src); } void dump::dump_set(shader &sh, val_set& v) { @@ -396,8 +391,6 @@ void dump::dump_flags(node &n) { sblog << "CH_CONS "; if (n.flags & NF_ALU_4SLOT) sblog << "4S "; - if (n.flags & NF_ALU_2SLOT) - sblog << "2S "; } void dump::dump_val(value* v) { @@ -476,7 +469,6 @@ void dump::dump_op(node* n) { case NST_ALU_CLAUSE: case NST_TEX_CLAUSE: case NST_VTX_CLAUSE: - case NST_GDS_CLAUSE: dump_op(*n, static_cast<cf_node*>(n)->bc.op_ptr->name); break; case NST_ALU_PACKED_INST: diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_gvn.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_gvn.cpp index c994ff6a8..caea4ec66 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_gvn.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_gvn.cpp @@ -174,11 +174,10 @@ void gvn::process_alu_src_constants(node &n, value* &v) { } } - unsigned kcache_count = 0; for (vvec::iterator I = n.src.begin(), E = n.src.end(); I != E; ++I) { value *c = (*I); - if (c->is_kcache() && (!kc.try_reserve(c->select) || ++kcache_count == 2)) { + if (c->is_kcache() && !kc.try_reserve(c->select)) { process_src(v, false); return; } diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_pass.h b/lib/mesa/src/gallium/drivers/r600/sb/sb_pass.h index 179eab478..0346df1b1 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_pass.h +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_pass.h @@ -124,9 +124,7 @@ class dce_cleanup : public vpass { public: dce_cleanup(shader &s) : vpass(s), - remove_unused(s.dce_flags & DF_REMOVE_UNUSED), nodes_changed(false) {} - - virtual int run(); + remove_unused(s.dce_flags & DF_REMOVE_UNUSED) {} virtual bool visit(node &n, bool enter); virtual bool visit(alu_group_node &n, bool enter); @@ -142,8 +140,6 @@ private: void cleanup_dst(node &n); bool cleanup_dst_vec(vvec &vv); - // Did we alter/remove nodes during a single pass? - bool nodes_changed; }; @@ -223,7 +219,6 @@ class gcm : public pass { sched_queue ready; sched_queue ready_above; - unsigned outstanding_lds_oq; container_node pending; struct op_info { @@ -264,8 +259,7 @@ public: gcm(shader &sh) : pass(sh), bu_ready(), bu_ready_next(), bu_ready_early(), - ready(), outstanding_lds_oq(), - op_map(), uses(), nuc_stk(1), ucs_level(), + ready(), op_map(), uses(), nuc_stk(1), ucs_level(), bu_bb(), pending_defs(), pending_nodes(), cur_sq(), live(), live_count(), pending_exec_mask_update() {} @@ -546,10 +540,10 @@ private: void add_prev_chan(unsigned chan); unsigned get_preferable_chan_mask(); - bool ra_node(container_node *c); - bool process_op(node *n); + void ra_node(container_node *c); + void process_op(node *n); - bool color(value *v); + void color(value *v); void color_bs_constraint(ra_constraint *c); @@ -634,11 +628,7 @@ class ssa_rename : public vpass { typedef sb_map<value*, unsigned> def_map; def_map def_count; - def_map lds_oq_count; - def_map lds_rw_count; std::stack<def_map> rename_stack; - std::stack<def_map> rename_lds_oq_stack; - std::stack<def_map> rename_lds_rw_stack; typedef std::map<uint32_t, value*> val_map; val_map values; diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_peephole.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_peephole.cpp index 5e336310a..d4b97557d 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_peephole.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_peephole.cpp @@ -52,30 +52,11 @@ void peephole::run_on(container_node* c) { if (n->is_container()) run_on(static_cast<container_node*>(n)); else { - if (n->is_fetch_inst() && (n->fetch_op_flags() & FF_GDS)) { - fetch_node *f = static_cast<fetch_node*>(n); - bool has_dst = false; - - for(vvec::iterator I = f->dst.begin(), E = f->dst.end(); I != E; ++I) { - value *v = *I; - if (v) - has_dst = true; - } - if (!has_dst) - if (f->bc.op >= FETCH_OP_GDS_ADD_RET && f->bc.op <= FETCH_OP_GDS_USHORT_READ_RET) - f->bc.set_op(f->bc.op - FETCH_OP_GDS_ADD_RET + FETCH_OP_GDS_ADD); - } + if (n->is_alu_inst()) { alu_node *a = static_cast<alu_node*>(n); - if (a->bc.op_ptr->flags & AF_LDS) { - if (!a->dst[0]) { - if (a->bc.op >= LDS_OP2_LDS_ADD_RET && a->bc.op <= LDS_OP3_LDS_MSKOR_RET) - a->bc.set_op(a->bc.op - LDS_OP2_LDS_ADD_RET + LDS_OP2_LDS_ADD); - if (a->bc.op == LDS_OP1_LDS_READ_RET) - a->src[0] = sh.get_undef_value(); - } - } else if (a->bc.op_ptr->flags & + if (a->bc.op_ptr->flags & (AF_PRED | AF_SET | AF_CMOV | AF_KILL)) { optimize_cc_op(a); } else if (a->bc.op == ALU_OP1_FLT_TO_INT) { @@ -131,8 +112,8 @@ void peephole::optimize_cc_op2(alu_node* a) { std::swap(a->src[0],a->src[1]); swapped = true; // clear modifiers - a->bc.src[0].clear(); - a->bc.src[1].clear(); + memset(&a->bc.src[0], 0, sizeof(bc_alu_src)); + memset(&a->bc.src[1], 0, sizeof(bc_alu_src)); } if (swapped || (a->src[1]->is_const() && @@ -268,12 +249,6 @@ void peephole::optimize_CNDcc_op(alu_node* a) { if (d->bc.src[nds].abs) return; - // Don't create an instruction that uses three kcache values - // chances are high that it can't be scheduled - if (d->src[0]->is_kcache() && a->src[1]->is_kcache() && - a->src[2]->is_kcache()) - return; - // TODO we can handle some cases for uint comparison if (dcmp_type == AF_UINT_CMP) return; diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_checker.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_checker.cpp index 6b5c8e450..9681e69f6 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_checker.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_checker.cpp @@ -259,7 +259,7 @@ void ra_checker::run_on(container_node* c) { check_phi_src(r->target->phi, r->dep_id); pop_stack(); } else if (c->is_repeat()) { - ASSERTED repeat_node *r = static_cast<repeat_node*>(c); + repeat_node *r = static_cast<repeat_node*>(c); assert (r->target->loop_phi); pop_stack(); diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp index 4a9462d91..7f388af6e 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp @@ -273,7 +273,7 @@ void coalescer::build_constraint_queue() { } } -int coalescer::color_chunks() { +void coalescer::color_chunks() { for (chunk_queue::iterator I = chunks.begin(), E = chunks.end(); I != E; ++I) { @@ -333,13 +333,9 @@ int coalescer::color_chunks() { ++pass; } - if (!color) { - fprintf(stderr, "r600/SB: unable to color registers\n"); - return -1; - } + assert(color); color_chunk(c, color); } - return 0; } void coalescer::init_reg_bitset(sb_bitset &bs, val_set &vs) { @@ -435,7 +431,9 @@ int coalescer::run() { return r; build_chunk_queue(); - return color_chunks(); + color_chunks(); + + return 0; } void coalescer::color_phi_constraint(ra_constraint* c) { diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_shader.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_shader.cpp index a01972d7d..f996c0786 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_shader.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_shader.cpp @@ -37,7 +37,7 @@ shader::shader(sb_context &sctx, shader_target t, unsigned id) pool(), all_nodes(), src_stats(), opt_stats(), errors(), optimized(), id(id), coal(*this), bbs(), - target(t), ex(*this), vt(ex), root(), + target(t), vt(ex), ex(*this), root(), compute_interferences(), has_alu_predication(), uses_gradients(), safe_math(), ngpr(), nstack(), dce_flags() {} @@ -91,7 +91,6 @@ cf_node* shader::create_clause(node_subtype nst) { case NST_ALU_CLAUSE: n->bc.set_op(CF_OP_ALU); break; case NST_TEX_CLAUSE: n->bc.set_op(CF_OP_TEX); break; case NST_VTX_CLAUSE: n->bc.set_op(CF_OP_VTX); break; - case NST_GDS_CLAUSE: n->bc.set_op(CF_OP_GDS); break; default: assert(!"invalid clause type"); break; } @@ -189,9 +188,9 @@ value* shader::create_temp_value() { return get_value(VLK_TEMP, id, 0); } -value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan, alu_kcache_index_mode index_mode) { +value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan) { return get_ro_value(kcache_values, VLK_KCACHE, - sel_chan(bank, index, chan, index_mode)); + sel_chan((bank << 12) | index, chan)); } void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) { @@ -216,7 +215,7 @@ void shader::init() { void shader::init_call_fs(cf_node* cf) { unsigned gpr = 0; - assert(target == TARGET_LS || target == TARGET_VS || target == TARGET_ES); + assert(target == TARGET_VS || target == TARGET_ES); for(inputs_vec::const_iterator I = inputs.begin(), E = inputs.end(); I != E; ++I, ++gpr) { @@ -437,8 +436,6 @@ const char* shader::get_shader_target_name() { case TARGET_ES: return "ES"; case TARGET_PS: return "PS"; case TARGET_GS: return "GS"; - case TARGET_HS: return "HS"; - case TARGET_LS: return "LS"; case TARGET_COMPUTE: return "COMPUTE"; case TARGET_FETCH: return "FETCH"; default: @@ -598,8 +595,6 @@ sched_queue_id shader::get_queue_id(node* n) { fetch_node *f = static_cast<fetch_node*>(n); if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX)) return SQ_VTX; - if (f->bc.op_ptr->flags & FF_GDS) - return SQ_GDS; return SQ_TEX; } case NST_CF_INST: diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_shader.h b/lib/mesa/src/gallium/drivers/r600/sb/sb_shader.h index ccc70fa8d..7955bba9b 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_shader.h +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_shader.h @@ -209,7 +209,7 @@ public: void build_constraint_queue(); void build_chunk_queue(); int color_constraints(); - int color_chunks(); + void color_chunks(); ra_constraint* create_constraint(constraint_kind kind); @@ -293,9 +293,8 @@ public: const shader_target target; - expr_handler ex; - value_table vt; + expr_handler ex; container_node *root; @@ -324,7 +323,7 @@ public: value* get_special_ro_value(unsigned sel); - value* get_kcache_value(unsigned bank, unsigned index, unsigned chan, alu_kcache_index_mode index_mode); + value* get_kcache_value(unsigned bank, unsigned index, unsigned chan); value* get_value_version(value* v, unsigned ver); diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_ssa_builder.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_ssa_builder.cpp index 5cd41c2aa..3ad628bb6 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_ssa_builder.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_ssa_builder.cpp @@ -132,8 +132,6 @@ bool ssa_prepare::visit(depart_node& n, bool enter) { int ssa_rename::init() { rename_stack.push(def_map()); - rename_lds_oq_stack.push(def_map()); - rename_lds_rw_stack.push(def_map()); return 0; } @@ -289,16 +287,8 @@ void ssa_rename::pop() { value* ssa_rename::rename_use(node *n, value* v) { if (v->version) return v; - unsigned index; - if (v->is_lds_access()) { - index = get_index(rename_lds_rw_stack.top(), v); - } else if (v->is_lds_oq()) { - index = new_index(lds_oq_count, v); - set_index(rename_lds_oq_stack.top(), v, index); - } else { - index = get_index(rename_stack.top(), v); - } + unsigned index = get_index(rename_stack.top(), v); v = sh.get_value_version(v, index); // if (alu) instruction is predicated and source arg comes from psi node @@ -323,15 +313,8 @@ value* ssa_rename::rename_use(node *n, value* v) { } value* ssa_rename::rename_def(node *n, value* v) { - unsigned index; - - if (v->is_lds_access()) { - index = new_index(lds_rw_count, v); - set_index(rename_lds_rw_stack.top(), v, index); - } else { - index = new_index(def_count, v); - set_index(rename_stack.top(), v, index); - } + unsigned index = new_index(def_count, v); + set_index(rename_stack.top(), v, index); value *r = sh.get_value_version(v, index); return r; } |