diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2016-05-29 10:16:34 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2016-05-29 10:16:34 +0000 |
commit | a27a465cfc328a47052b6d70cdb000a5d7292f04 (patch) | |
tree | e64b2fed9d0240d52812cbbe125fd593afca43ec /lib/mesa | |
parent | a0e362bc327f26f7106d94700f05038ed585f53f (diff) |
Import Mesa 11.2.2
Diffstat (limited to 'lib/mesa')
-rw-r--r-- | lib/mesa/src/compiler/glsl/lower_discard.cpp | 4 | ||||
-rw-r--r-- | lib/mesa/src/compiler/nir/nir_lower_vec_to_movs.c | 192 |
2 files changed, 97 insertions, 99 deletions
diff --git a/lib/mesa/src/compiler/glsl/lower_discard.cpp b/lib/mesa/src/compiler/glsl/lower_discard.cpp index 203d9e3b9..b62eb20dc 100644 --- a/lib/mesa/src/compiler/glsl/lower_discard.cpp +++ b/lib/mesa/src/compiler/glsl/lower_discard.cpp @@ -158,7 +158,7 @@ replace_discard(void *mem_ctx, ir_variable *var, ir_discard *ir) ir_assignment *assignment = new(mem_ctx) ir_assignment(new(mem_ctx) ir_dereference_variable(var), - condition); + condition, NULL); ir->replace_with(assignment); } @@ -180,7 +180,7 @@ lower_discard_visitor::visit_leave(ir_if *ir) ir_var_temporary); ir_assignment *temp_initializer = new(mem_ctx) ir_assignment(new(mem_ctx) ir_dereference_variable(temp), - new(mem_ctx) ir_constant(false)); + new(mem_ctx) ir_constant(false), NULL); ir->insert_before(temp); ir->insert_before(temp_initializer); diff --git a/lib/mesa/src/compiler/nir/nir_lower_vec_to_movs.c b/lib/mesa/src/compiler/nir/nir_lower_vec_to_movs.c index 94bf29664..f51cede39 100644 --- a/lib/mesa/src/compiler/nir/nir_lower_vec_to_movs.c +++ b/lib/mesa/src/compiler/nir/nir_lower_vec_to_movs.c @@ -19,21 +19,24 @@ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * */ #include "nir.h" -#include "nir_builder.h" - -struct vec_to_movs_data { - nir_instr_writemask_filter_cb cb; - const void *data; -}; /* * Implements a simple pass that lowers vecN instructions to a series of * moves with partial writes. */ +struct vec_to_movs_state { + nir_function_impl *impl; + bool progress; +}; + static bool src_matches_dest_reg(nir_dest *dest, nir_src *src) { @@ -59,11 +62,7 @@ insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) { assert(start_idx < nir_op_infos[vec->op].num_inputs); - /* No sense generating a MOV from undef, we can just leave the dst channel undef. */ - if (nir_src_is_undef(vec->src[start_idx].src)) - return 1 << start_idx; - - nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_mov); + nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_imov); nir_alu_src_copy(&mov->src[0], &vec->src[start_idx], mov); nir_alu_dest_copy(&mov->dest, &vec->dest, mov); @@ -103,7 +102,7 @@ insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) if (mov->dest.write_mask) { nir_instr_insert_before(&vec->instr, &mov->instr); } else { - nir_instr_free(&mov->instr); + ralloc_free(mov); } return channels_handled; @@ -112,9 +111,9 @@ insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) static bool has_replicated_dest(nir_alu_instr *alu) { - return alu->op == nir_op_fdot2_replicated || - alu->op == nir_op_fdot3_replicated || - alu->op == nir_op_fdot4_replicated || + return alu->op == nir_op_fdot_replicated2 || + alu->op == nir_op_fdot_replicated3 || + alu->op == nir_op_fdot_replicated4 || alu->op == nir_op_fdph_replicated; } @@ -124,10 +123,8 @@ has_replicated_dest(nir_alu_instr *alu) * can then call insert_mov as normal. */ static unsigned -try_coalesce(nir_alu_instr *vec, unsigned start_idx, void *_data) +try_coalesce(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) { - struct vec_to_movs_data *data = _data; - assert(start_idx < nir_op_infos[vec->op].num_inputs); /* We will only even try if the source is SSA */ @@ -139,10 +136,7 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx, void *_data) /* If we are going to do a reswizzle, then the vecN operation must be the * only use of the source value. We also can't have any source modifiers. */ - nir_foreach_use_including_if(src, vec->src[start_idx].src.ssa) { - if (src->is_if) - return 0; - + nir_foreach_use(vec->src[start_idx].src.ssa, src) { if (src->parent_instr != &vec->instr) return 0; @@ -151,6 +145,9 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx, void *_data) return 0; } + if (!list_empty(&vec->src[start_idx].src.ssa->if_uses)) + return 0; + if (vec->src[start_idx].src.ssa->parent_instr->type != nir_instr_type_alu) return 0; @@ -185,7 +182,6 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx, void *_data) for (unsigned i = 0; i < 4; i++) swizzles[j][i] = src_alu->src[j].swizzle[i]; - /* Generate the final write mask */ unsigned write_mask = 0; for (unsigned i = start_idx; i < 4; i++) { if (!(vec->dest.write_mask & (1 << i))) @@ -195,21 +191,10 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx, void *_data) vec->src[i].src.ssa != &src_alu->dest.dest.ssa) continue; - write_mask |= 1 << i; - } - - /* If the instruction would be vectorized but the backend - * doesn't support vectorizing this op, abort. */ - if (data->cb && !data->cb(&src_alu->instr, write_mask, data->data)) - return 0; - - for (unsigned i = start_idx; i < 4; i++) { - if (!(write_mask & (1 << i))) - continue; - - /* At this point, the given vec source matches up with the ALU + /* At this point, the give vec source matchese up with the ALU * instruction so we can re-swizzle that component to match. */ + write_mask |= 1 << i; if (has_replicated_dest(src_alu)) { /* Since the destination is a single replicated value, we don't need * to do any reswizzling @@ -230,85 +215,98 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx, void *_data) } static bool -nir_lower_vec_to_movs_instr(nir_builder *b, nir_instr *instr, void *data) +lower_vec_to_movs_block(nir_block *block, void *void_state) { - if (instr->type != nir_instr_type_alu) - return false; + struct vec_to_movs_state *state = void_state; + nir_function_impl *impl = state->impl; + nir_shader *shader = impl->function->shader; - nir_alu_instr *vec = nir_instr_as_alu(instr); + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_alu) + continue; - switch (vec->op) { - case nir_op_vec2: - case nir_op_vec3: - case nir_op_vec4: - break; - default: - return false; - } + nir_alu_instr *vec = nir_instr_as_alu(instr); - bool vec_had_ssa_dest = vec->dest.dest.is_ssa; - if (vec->dest.dest.is_ssa) { - /* Since we insert multiple MOVs, we have a register destination. */ - nir_register *reg = nir_local_reg_create(b->impl); - reg->num_components = vec->dest.dest.ssa.num_components; - reg->bit_size = vec->dest.dest.ssa.bit_size; + switch (vec->op) { + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + break; + default: + continue; /* The loop */ + } - nir_ssa_def_rewrite_uses_src(&vec->dest.dest.ssa, nir_src_for_reg(reg)); + if (vec->dest.dest.is_ssa) { + /* Since we insert multiple MOVs, we have a register destination. */ + nir_register *reg = nir_local_reg_create(impl); + reg->num_components = vec->dest.dest.ssa.num_components; - nir_instr_rewrite_dest(&vec->instr, &vec->dest.dest, - nir_dest_for_reg(reg)); - } + nir_ssa_def_rewrite_uses(&vec->dest.dest.ssa, nir_src_for_reg(reg)); - unsigned finished_write_mask = 0; + nir_instr_rewrite_dest(&vec->instr, &vec->dest.dest, + nir_dest_for_reg(reg)); + } - /* First, emit a MOV for all the src channels that are in the - * destination reg, in case other values we're populating in the dest - * might overwrite them. - */ - for (unsigned i = 0; i < 4; i++) { - if (!(vec->dest.write_mask & (1 << i))) - continue; + unsigned finished_write_mask = 0; - if (src_matches_dest_reg(&vec->dest.dest, &vec->src[i].src)) { - finished_write_mask |= insert_mov(vec, i, b->shader); - break; + /* First, emit a MOV for all the src channels that are in the + * destination reg, in case other values we're populating in the dest + * might overwrite them. + */ + for (unsigned i = 0; i < 4; i++) { + if (!(vec->dest.write_mask & (1 << i))) + continue; + + if (src_matches_dest_reg(&vec->dest.dest, &vec->src[i].src)) { + finished_write_mask |= insert_mov(vec, i, shader); + break; + } } - } - /* Now, emit MOVs for all the other src channels. */ - for (unsigned i = 0; i < 4; i++) { - if (!(vec->dest.write_mask & (1 << i))) - continue; + /* Now, emit MOVs for all the other src channels. */ + for (unsigned i = 0; i < 4; i++) { + if (!(vec->dest.write_mask & (1 << i))) + continue; - /* Coalescing moves the register writes from the vec up to the ALU - * instruction in the source. We can only do this if the original - * vecN had an SSA destination. - */ - if (vec_had_ssa_dest && !(finished_write_mask & (1 << i))) - finished_write_mask |= try_coalesce(vec, i, data); + if (!(finished_write_mask & (1 << i))) + finished_write_mask |= try_coalesce(vec, i, shader); - if (!(finished_write_mask & (1 << i))) - finished_write_mask |= insert_mov(vec, i, b->shader); - } + if (!(finished_write_mask & (1 << i))) + finished_write_mask |= insert_mov(vec, i, shader); + } - nir_instr_remove(&vec->instr); - nir_instr_free(&vec->instr); + nir_instr_remove(&vec->instr); + ralloc_free(vec); + state->progress = true; + } return true; } +static bool +nir_lower_vec_to_movs_impl(nir_function_impl *impl) +{ + struct vec_to_movs_state state = { impl, false }; + + nir_foreach_block(impl, lower_vec_to_movs_block, &state); + + if (state.progress) { + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + } + + return state.progress; +} + bool -nir_lower_vec_to_movs(nir_shader *shader, nir_instr_writemask_filter_cb cb, - const void *_data) +nir_lower_vec_to_movs(nir_shader *shader) { - struct vec_to_movs_data data = { - .cb = cb, - .data = _data, - }; - - return nir_shader_instructions_pass(shader, - nir_lower_vec_to_movs_instr, - nir_metadata_block_index | - nir_metadata_dominance, - &data); + bool progress = false; + + nir_foreach_function(shader, function) { + if (function->impl) + progress = nir_lower_vec_to_movs_impl(function->impl) || progress; + } + + return progress; } |