diff options
Diffstat (limited to 'lib/mesa/src/compiler')
11 files changed, 477 insertions, 729 deletions
diff --git a/lib/mesa/src/compiler/glsl/loop_analysis.cpp b/lib/mesa/src/compiler/glsl/loop_analysis.cpp index c8db6f63b..096a80abb 100644 --- a/lib/mesa/src/compiler/glsl/loop_analysis.cpp +++ b/lib/mesa/src/compiler/glsl/loop_analysis.cpp @@ -25,235 +25,13 @@ #include "loop_analysis.h" #include "ir_hierarchical_visitor.h" -static void try_add_loop_terminator(loop_variable_state *ls, ir_if *ir); +static bool is_loop_terminator(ir_if *ir); static bool all_expression_operands_are_loop_constant(ir_rvalue *, hash_table *); static ir_rvalue *get_basic_induction_increment(ir_assignment *, hash_table *); -/** - * Find an initializer of a variable outside a loop - * - * Works backwards from the loop to find the pre-loop value of the variable. - * This is used, for example, to find the initial value of loop induction - * variables. - * - * \param loop Loop where \c var is an induction variable - * \param var Variable whose initializer is to be found - * - * \return - * The \c ir_rvalue assigned to the variable outside the loop. May return - * \c NULL if no initializer can be found. - */ -static ir_rvalue * -find_initial_value(ir_loop *loop, ir_variable *var) -{ - for (exec_node *node = loop->prev; !node->is_head_sentinel(); - node = node->prev) { - ir_instruction *ir = (ir_instruction *) node; - - switch (ir->ir_type) { - case ir_type_call: - case ir_type_loop: - case ir_type_loop_jump: - case ir_type_return: - case ir_type_if: - return NULL; - - case ir_type_function: - case ir_type_function_signature: - assert(!"Should not get here."); - return NULL; - - case ir_type_assignment: { - ir_assignment *assign = ir->as_assignment(); - ir_variable *assignee = assign->lhs->whole_variable_referenced(); - - if (assignee == var) - return assign->rhs; - - break; - } - - default: - break; - } - } - - return NULL; -} - - -static int -calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment, - enum ir_expression_operation op, bool continue_from_then, - bool swap_compare_operands, bool inc_before_terminator) -{ - if (from == NULL || to == NULL || increment == NULL) - return -1; - - void *mem_ctx = ralloc_context(NULL); - - ir_expression *const sub = - new(mem_ctx) ir_expression(ir_binop_sub, from->type, to, from); - - ir_expression *const div = - new(mem_ctx) ir_expression(ir_binop_div, sub->type, sub, increment); - - ir_constant *iter = div->constant_expression_value(mem_ctx); - if (iter == NULL) { - ralloc_free(mem_ctx); - return -1; - } - - if (!iter->type->is_integer()) { - const ir_expression_operation op = iter->type->is_double() - ? ir_unop_d2i : ir_unop_f2i; - ir_rvalue *cast = - new(mem_ctx) ir_expression(op, glsl_type::int_type, iter, NULL); - - iter = cast->constant_expression_value(mem_ctx); - } - - int64_t iter_value = iter->get_int64_component(0); - - /* Code after this block works under assumption that iterator will be - * incremented or decremented until it hits the limit, - * however the loop condition can be false on the first iteration. - * Handle such loops first. - */ - { - ir_rvalue *first_value = from; - if (inc_before_terminator) { - first_value = - new(mem_ctx) ir_expression(ir_binop_add, from->type, from, increment); - } - - ir_expression *cmp = swap_compare_operands - ? new(mem_ctx) ir_expression(op, glsl_type::bool_type, to, first_value) - : new(mem_ctx) ir_expression(op, glsl_type::bool_type, first_value, to); - if (continue_from_then) - cmp = new(mem_ctx) ir_expression(ir_unop_logic_not, cmp); - - ir_constant *const cmp_result = cmp->constant_expression_value(mem_ctx); - assert(cmp_result != NULL); - if (cmp_result->get_bool_component(0)) { - ralloc_free(mem_ctx); - return 0; - } - } - - /* Make sure that the calculated number of iterations satisfies the exit - * condition. This is needed to catch off-by-one errors and some types of - * ill-formed loops. For example, we need to detect that the following - * loop does not have a maximum iteration count. - * - * for (float x = 0.0; x != 0.9; x += 0.2) - * ; - */ - const int bias[] = { -1, 0, 1 }; - bool valid_loop = false; - - for (unsigned i = 0; i < ARRAY_SIZE(bias); i++) { - /* Increment may be of type int, uint or float. */ - switch (increment->type->base_type) { - case GLSL_TYPE_INT: - iter = new(mem_ctx) ir_constant(int32_t(iter_value + bias[i])); - break; - case GLSL_TYPE_INT16: - iter = new(mem_ctx) ir_constant(int16_t(iter_value + bias[i])); - break; - case GLSL_TYPE_INT64: - iter = new(mem_ctx) ir_constant(int64_t(iter_value + bias[i])); - break; - case GLSL_TYPE_UINT: - iter = new(mem_ctx) ir_constant(unsigned(iter_value + bias[i])); - break; - case GLSL_TYPE_UINT16: - iter = new(mem_ctx) ir_constant(uint16_t(iter_value + bias[i])); - break; - case GLSL_TYPE_UINT64: - iter = new(mem_ctx) ir_constant(uint64_t(iter_value + bias[i])); - break; - case GLSL_TYPE_FLOAT: - iter = new(mem_ctx) ir_constant(float(iter_value + bias[i])); - break; - case GLSL_TYPE_FLOAT16: - iter = new(mem_ctx) ir_constant(float16_t(float(iter_value + bias[i]))); - break; - case GLSL_TYPE_DOUBLE: - iter = new(mem_ctx) ir_constant(double(iter_value + bias[i])); - break; - default: - unreachable("Unsupported type for loop iterator."); - } - - ir_expression *const mul = - new(mem_ctx) ir_expression(ir_binop_mul, increment->type, iter, - increment); - - ir_expression *const add = - new(mem_ctx) ir_expression(ir_binop_add, mul->type, mul, from); - - ir_expression *cmp = swap_compare_operands - ? new(mem_ctx) ir_expression(op, glsl_type::bool_type, to, add) - : new(mem_ctx) ir_expression(op, glsl_type::bool_type, add, to); - if (continue_from_then) - cmp = new(mem_ctx) ir_expression(ir_unop_logic_not, cmp); - - ir_constant *const cmp_result = cmp->constant_expression_value(mem_ctx); - - assert(cmp_result != NULL); - if (cmp_result->get_bool_component(0)) { - iter_value += bias[i]; - valid_loop = true; - break; - } - } - - ralloc_free(mem_ctx); - - if (inc_before_terminator) { - iter_value--; - } - - return (valid_loop) ? iter_value : -1; -} - -static bool -incremented_before_terminator(ir_loop *loop, ir_variable *var, - ir_if *terminator) -{ - for (exec_node *node = loop->body_instructions.get_head(); - !node->is_tail_sentinel(); - node = node->get_next()) { - ir_instruction *ir = (ir_instruction *) node; - - switch (ir->ir_type) { - case ir_type_if: - if (ir->as_if() == terminator) - return false; - break; - - case ir_type_assignment: { - ir_assignment *assign = ir->as_assignment(); - ir_variable *assignee = assign->lhs->whole_variable_referenced(); - - if (assignee == var) { - return true; - } - - break; - } - - default: - break; - } - } - - unreachable("Unable to find induction variable"); -} /** * Record the fact that the given loop variable was referenced inside the loop. @@ -274,7 +52,8 @@ loop_variable::record_reference(bool in_assignee, if (in_assignee) { assert(current_assignment != NULL); - if (in_conditional_code_or_nested_loop) { + if (in_conditional_code_or_nested_loop || + current_assignment->condition != NULL) { this->conditional_or_nested_assignment = true; } @@ -296,7 +75,8 @@ loop_variable::record_reference(bool in_assignee, loop_state::loop_state() { - this->ht = _mesa_pointer_hash_table_create(NULL); + this->ht = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); this->mem_ctx = ralloc_context(NULL); this->loop_found = false; } @@ -304,7 +84,7 @@ loop_state::loop_state() loop_state::~loop_state() { - _mesa_hash_table_destroy(this->ht, NULL); + hash_table_dtor(this->ht); ralloc_free(this->mem_ctx); } @@ -314,7 +94,7 @@ loop_state::insert(ir_loop *ir) { loop_variable_state *ls = new(this->mem_ctx) loop_variable_state; - _mesa_hash_table_insert(this->ht, ir, ls); + hash_table_insert(this->ht, ls, ir); this->loop_found = true; return ls; @@ -324,19 +104,14 @@ loop_state::insert(ir_loop *ir) loop_variable_state * loop_state::get(const ir_loop *ir) { - hash_entry *entry = _mesa_hash_table_search(this->ht, ir); - return entry ? (loop_variable_state *) entry->data : NULL; + return (loop_variable_state *) hash_table_find(this->ht, ir); } loop_variable * loop_variable_state::get(const ir_variable *ir) { - if (ir == NULL) - return NULL; - - hash_entry *entry = _mesa_hash_table_search(this->var_hash, ir); - return entry ? (loop_variable *) entry->data : NULL; + return (loop_variable *) hash_table_find(this->var_hash, ir); } @@ -348,7 +123,7 @@ loop_variable_state::insert(ir_variable *var) lv->var = var; - _mesa_hash_table_insert(this->var_hash, lv->var, lv); + hash_table_insert(this->var_hash, lv, lv->var); this->variables.push_tail(lv); return lv; @@ -356,12 +131,12 @@ loop_variable_state::insert(ir_variable *var) loop_terminator * -loop_variable_state::insert(ir_if *if_stmt, bool continue_from_then) +loop_variable_state::insert(ir_if *if_stmt) { void *mem_ctx = ralloc_parent(this); - loop_terminator *t = new(mem_ctx) loop_terminator(if_stmt, - continue_from_then); + loop_terminator *t = new(mem_ctx) loop_terminator(); + t->ir = if_stmt; this->terminators.push_tail(t); return t; @@ -518,8 +293,10 @@ loop_analysis::visit_leave(ir_loop *ir) ir_if *if_stmt = ((ir_instruction *) node)->as_if(); - if (if_stmt != NULL) - try_add_loop_terminator(ls, if_stmt); + if ((if_stmt != NULL) && is_loop_terminator(if_stmt)) + ls->insert(if_stmt); + else + break; } @@ -628,6 +405,8 @@ loop_analysis::visit_leave(ir_loop *ir) switch (cond->operation) { case ir_binop_less: + case ir_binop_greater: + case ir_binop_lequal: case ir_binop_gequal: { /* The expressions that we care about will either be of the form * 'counter < limit' or 'limit < counter'. Figure out which is @@ -636,12 +415,18 @@ loop_analysis::visit_leave(ir_loop *ir) ir_rvalue *counter = cond->operands[0]->as_dereference_variable(); ir_constant *limit = cond->operands[1]->as_constant(); enum ir_expression_operation cmp = cond->operation; - bool swap_compare_operands = false; if (limit == NULL) { counter = cond->operands[1]->as_dereference_variable(); limit = cond->operands[0]->as_constant(); - swap_compare_operands = true; + + switch (cmp) { + case ir_binop_less: cmp = ir_binop_greater; break; + case ir_binop_greater: cmp = ir_binop_less; break; + case ir_binop_lequal: cmp = ir_binop_gequal; break; + case ir_binop_gequal: cmp = ir_binop_lequal; break; + default: assert(!"Should not get here."); + } } if ((counter == NULL) || (limit == NULL)) @@ -653,13 +438,8 @@ loop_analysis::visit_leave(ir_loop *ir) loop_variable *lv = ls->get(var); if (lv != NULL && lv->is_induction_var()) { - bool inc_before_terminator = - incremented_before_terminator(ir, var, t->ir); - t->iterations = calculate_iterations(init, limit, lv->increment, - cmp, t->continue_from_then, - swap_compare_operands, - inc_before_terminator); + cmp); if (t->iterations >= 0 && (ls->limiting_terminator == NULL || @@ -738,9 +518,8 @@ public: virtual ir_visitor_status visit(ir_dereference_variable *ir) { - hash_entry *entry = _mesa_hash_table_search(this->loop_variables, - ir->var); - loop_variable *lv = entry ? (loop_variable *) entry->data : NULL; + loop_variable *lv = + (loop_variable *) hash_table_find(this->loop_variables, ir->var); assert(lv != NULL); @@ -797,8 +576,8 @@ get_basic_induction_increment(ir_assignment *ir, hash_table *var_hash) if (inc->as_constant() == NULL) { ir_variable *const inc_var = inc->variable_referenced(); if (inc_var != NULL) { - hash_entry *entry = _mesa_hash_table_search(var_hash, inc_var); - loop_variable *lv = entry ? (loop_variable *) entry->data : NULL; + loop_variable *lv = + (loop_variable *) hash_table_find(var_hash, inc_var); if (lv == NULL || !lv->is_loop_constant()) { assert(lv != NULL); @@ -822,26 +601,31 @@ get_basic_induction_increment(ir_assignment *ir, hash_table *var_hash) /** - * Detect whether an if-statement is a loop terminating condition, if so - * add it to the list of loop terminators. + * Detect whether an if-statement is a loop terminating condition * * Detects if-statements of the form * - * (if (expression bool ...) (...then_instrs...break)) - * - * or - * - * (if (expression bool ...) ... (...else_instrs...break)) + * (if (expression bool ...) (break)) */ -void -try_add_loop_terminator(loop_variable_state *ls, ir_if *ir) +bool +is_loop_terminator(ir_if *ir) { - ir_instruction *inst = (ir_instruction *) ir->then_instructions.get_tail(); - ir_instruction *else_inst = - (ir_instruction *) ir->else_instructions.get_tail(); + if (!ir->else_instructions.is_empty()) + return false; + + ir_instruction *const inst = + (ir_instruction *) ir->then_instructions.get_head(); + if (inst == NULL) + return false; + + if (inst->ir_type != ir_type_loop_jump) + return false; + + ir_loop_jump *const jump = (ir_loop_jump *) inst; + if (jump->mode != ir_loop_jump::jump_break) + return false; - if (is_break(inst) || is_break(else_inst)) - ls->insert(ir, is_break(else_inst)); + return true; } diff --git a/lib/mesa/src/compiler/glsl/loop_unroll.cpp b/lib/mesa/src/compiler/glsl/loop_unroll.cpp index 04b8b4f49..aea2743cd 100644 --- a/lib/mesa/src/compiler/glsl/loop_unroll.cpp +++ b/lib/mesa/src/compiler/glsl/loop_unroll.cpp @@ -24,7 +24,8 @@ #include "compiler/glsl_types.h" #include "loop_analysis.h" #include "ir_hierarchical_visitor.h" -#include "main/consts_exts.h" + +#include "main/mtypes.h" namespace { @@ -41,9 +42,7 @@ public: virtual ir_visitor_status visit_leave(ir_loop *ir); void simple_unroll(ir_loop *ir, int iterations); void complex_unroll(ir_loop *ir, int iterations, - bool continue_from_then_branch, - bool limiting_term_first, - bool lt_continue_from_then_branch); + bool continue_from_then_branch); void splice_post_if_instructions(ir_if *ir_if, exec_list *splice_dest); loop_state *state; @@ -54,6 +53,13 @@ public: } /* anonymous namespace */ +static bool +is_break(ir_instruction *ir) +{ + return ir != NULL && ir->ir_type == ir_type_loop_jump + && ((ir_loop_jump *) ir)->is_break(); +} + class loop_unroll_count : public ir_hierarchical_visitor { public: int nodes; @@ -100,7 +106,7 @@ public: if (options->EmitNoIndirectSampler) { if ((ir->array->type->is_array() && ir->array->type->contains_sampler()) && - !ir->array_index->constant_expression_value(ralloc_parent(ir))) { + !ir->array_index->constant_expression_value()) { unsupported_variable_indexing = true; return visit_continue; } @@ -177,57 +183,6 @@ void loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations) { void *const mem_ctx = ralloc_parent(ir); - loop_variable_state *const ls = this->state->get(ir); - - /* If there are no terminators, then the loop iteration count must be 1. - * This is the 'do { } while (false);' case. - */ - assert(!ls->terminators.is_empty() || iterations == 1); - - ir_instruction *first_ir = - (ir_instruction *) ir->body_instructions.get_head(); - - if (!first_ir) { - /* The loop is empty remove it and return */ - ir->remove(); - return; - } - - ir_if *limit_if = NULL; - bool exit_branch_has_instructions = false; - if (ls->limiting_terminator) { - limit_if = ls->limiting_terminator->ir; - ir_instruction *ir_if_last = (ir_instruction *) - limit_if->then_instructions.get_tail(); - - if (is_break(ir_if_last)) { - if (ir_if_last != limit_if->then_instructions.get_head()) - exit_branch_has_instructions = true; - - splice_post_if_instructions(limit_if, &limit_if->else_instructions); - ir_if_last->remove(); - } else { - ir_if_last = (ir_instruction *) - limit_if->else_instructions.get_tail(); - assert(is_break(ir_if_last)); - - if (ir_if_last != limit_if->else_instructions.get_head()) - exit_branch_has_instructions = true; - - splice_post_if_instructions(limit_if, &limit_if->then_instructions); - ir_if_last->remove(); - } - } - - /* Because 'iterations' is the number of times we pass over the *entire* - * loop body before hitting the first break, we need to bump the number of - * iterations if the limiting terminator is not the first instruction in - * the loop, or it the exit branch contains instructions. This ensures we - * execute any instructions before the terminator or in its exit branch. - */ - if (!ls->terminators.is_empty() && - (limit_if != first_ir->as_if() || exit_branch_has_instructions)) - iterations++; for (int i = 0; i < iterations; i++) { exec_list copy_list; @@ -279,22 +234,11 @@ loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations) */ void loop_unroll_visitor::complex_unroll(ir_loop *ir, int iterations, - bool second_term_then_continue, - bool extra_iteration_required, - bool first_term_then_continue) + bool continue_from_then_branch) { void *const mem_ctx = ralloc_parent(ir); ir_instruction *ir_to_replace = ir; - /* Because 'iterations' is the number of times we pass over the *entire* - * loop body before hitting the first break, we need to bump the number of - * iterations if the limiting terminator is not the first instruction in - * the loop, or it the exit branch contains instructions. This ensures we - * execute any instructions before the terminator or in its exit branch. - */ - if (extra_iteration_required) - iterations++; - for (int i = 0; i < iterations; i++) { exec_list copy_list; @@ -304,10 +248,6 @@ loop_unroll_visitor::complex_unroll(ir_loop *ir, int iterations, ir_if *ir_if = ((ir_instruction *) copy_list.get_tail())->as_if(); assert(ir_if != NULL); - exec_list *const first_list = first_term_then_continue - ? &ir_if->then_instructions : &ir_if->else_instructions; - ir_if = ((ir_instruction *) first_list->get_tail())->as_if(); - ir_to_replace->insert_before(©_list); ir_to_replace->remove(); @@ -315,10 +255,10 @@ loop_unroll_visitor::complex_unroll(ir_loop *ir, int iterations, ir_to_replace = new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue); - exec_list *const second_term_continue_list = second_term_then_continue + exec_list *const list = (continue_from_then_branch) ? &ir_if->then_instructions : &ir_if->else_instructions; - second_term_continue_list->push_tail(ir_to_replace); + list->push_tail(ir_to_replace); } ir_to_replace->remove(); @@ -360,26 +300,12 @@ loop_unroll_visitor::splice_post_if_instructions(ir_if *ir_if, } } -static bool -exit_branch_has_instructions(ir_if *term_if, bool lt_then_continue) -{ - if (lt_then_continue) { - if (term_if->else_instructions.get_head() == - term_if->else_instructions.get_tail()) - return false; - } else { - if (term_if->then_instructions.get_head() == - term_if->then_instructions.get_tail()) - return false; - } - - return true; -} ir_visitor_status loop_unroll_visitor::visit_leave(ir_loop *ir) { loop_variable_state *const ls = this->state->get(ir); + int iterations; /* If we've entered a loop that hasn't been analyzed, something really, * really bad has happened. @@ -389,80 +315,13 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) return visit_continue; } - /* Limiting terminator may have iteration count of zero, - * this is a valid case because the loop may break during - * the first iteration. - */ - - /* Remove the conditional break statements associated with all terminators - * that are associated with a fixed iteration count, except for the one - * associated with the limiting terminator--that one needs to stay, since - * it terminates the loop. Exception: if the loop still has a normative - * bound, then that terminates the loop, so we don't even need the limiting - * terminator. + /* Don't try to unroll loops where the number of iterations is not known + * at compile-time. */ - foreach_in_list_safe(loop_terminator, t, &ls->terminators) { - if (t->iterations < 0) - continue; - - exec_list *branch_instructions; - if (t != ls->limiting_terminator) { - ir_instruction *ir_if_last = (ir_instruction *) - t->ir->then_instructions.get_tail(); - if (is_break(ir_if_last)) { - branch_instructions = &t->ir->else_instructions; - } else { - branch_instructions = &t->ir->then_instructions; - assert(is_break((ir_instruction *) - t->ir->else_instructions.get_tail())); - } - - exec_list copy_list; - copy_list.make_empty(); - clone_ir_list(ir, ©_list, branch_instructions); - - t->ir->insert_before(©_list); - t->ir->remove(); - - assert(ls->num_loop_jumps > 0); - ls->num_loop_jumps--; - - /* Also remove it from the terminator list */ - t->remove(); - - this->progress = true; - } - } - - if (ls->limiting_terminator == NULL) { - ir_instruction *last_ir = - (ir_instruction *) ir->body_instructions.get_tail(); - - /* If a loop has no induction variable and the last instruction is - * a break, unroll the loop with a count of 1. This is the classic - * - * do { - * // ... - * } while (false) - * - * that is used to wrap multi-line macros. - * - * If num_loop_jumps is not zero, last_ir cannot be NULL... there has to - * be at least num_loop_jumps instructions in the loop. - */ - if (ls->num_loop_jumps == 1 && is_break(last_ir)) { - last_ir->remove(); - - simple_unroll(ir, 1); - } - - /* Don't try to unroll loops where the number of iterations is not known - * at compile-time. - */ + if (ls->limiting_terminator == NULL) return visit_continue; - } - int iterations = ls->limiting_terminator->iterations; + iterations = ls->limiting_terminator->iterations; const int max_iterations = options->MaxUnrollIterations; @@ -492,6 +351,7 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) return visit_continue; if (predicted_num_loop_jumps == 0) { + ls->limiting_terminator->ir->remove(); simple_unroll(ir, iterations); return visit_continue; } @@ -506,69 +366,51 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) */ last_ir->remove(); + ls->limiting_terminator->ir->remove(); simple_unroll(ir, 1); return visit_continue; } - /* Complex unrolling can only handle two terminators. One with an unknown - * iteration count and one with a known iteration count. We have already - * made sure we have a known iteration count above and removed any - * unreachable terminators with a known count. Here we make sure there - * isn't any additional unknown terminators, or any other jumps nested - * inside futher ifs. - */ - if (ls->num_loop_jumps != 2 || ls->terminators.length() != 2) - return visit_continue; - - ir_instruction *first_ir = - (ir_instruction *) ir->body_instructions.get_head(); - - unsigned term_count = 0; - bool first_term_then_continue = false; - foreach_in_list(loop_terminator, t, &ls->terminators) { - ir_if *ir_if = t->ir->as_if(); - assert(ir_if != NULL); + /* recognize loops in the form produced by ir_lower_jumps */ + foreach_in_list(ir_instruction, cur_ir, &ir->body_instructions) { + /* Skip the limiting terminator, since it will go away when we + * unroll. + */ + if (cur_ir == ls->limiting_terminator->ir) + continue; - ir_instruction *ir_if_last = - (ir_instruction *) ir_if->then_instructions.get_tail(); - - if (is_break(ir_if_last)) { - splice_post_if_instructions(ir_if, &ir_if->else_instructions); - ir_if_last->remove(); - if (term_count == 1) { - bool ebi = - exit_branch_has_instructions(ls->limiting_terminator->ir, - first_term_then_continue); - complex_unroll(ir, iterations, false, - first_ir->as_if() != ls->limiting_terminator->ir || - ebi, - first_term_then_continue); - return visit_continue; - } - } else { - ir_if_last = - (ir_instruction *) ir_if->else_instructions.get_tail(); + ir_if *ir_if = cur_ir->as_if(); + if (ir_if != NULL) { + /* Determine which if-statement branch, if any, ends with a + * break. The branch that did *not* have the break will get a + * temporary continue inserted in each iteration of the loop + * unroll. + * + * Note that since ls->num_loop_jumps is <= 1, it is impossible + * for both branches to end with a break. + */ + ir_instruction *ir_if_last = + (ir_instruction *) ir_if->then_instructions.get_tail(); - assert(is_break(ir_if_last)); if (is_break(ir_if_last)) { - splice_post_if_instructions(ir_if, &ir_if->then_instructions); + ls->limiting_terminator->ir->remove(); + splice_post_if_instructions(ir_if, &ir_if->else_instructions); ir_if_last->remove(); - if (term_count == 1) { - bool ebi = - exit_branch_has_instructions(ls->limiting_terminator->ir, - first_term_then_continue); - complex_unroll(ir, iterations, true, - first_ir->as_if() != ls->limiting_terminator->ir || - ebi, - first_term_then_continue); + complex_unroll(ir, iterations, false); + return visit_continue; + } else { + ir_if_last = + (ir_instruction *) ir_if->else_instructions.get_tail(); + + if (is_break(ir_if_last)) { + ls->limiting_terminator->ir->remove(); + splice_post_if_instructions(ir_if, &ir_if->then_instructions); + ir_if_last->remove(); + complex_unroll(ir, iterations, true); return visit_continue; - } else { - first_term_then_continue = true; } } } - - term_count++; } /* Did not find the break statement. It must be in a complex if-nesting, diff --git a/lib/mesa/src/compiler/glsl/lower_const_arrays_to_uniforms.cpp b/lib/mesa/src/compiler/glsl/lower_const_arrays_to_uniforms.cpp index dbca6321b..2d024d4b7 100644 --- a/lib/mesa/src/compiler/glsl/lower_const_arrays_to_uniforms.cpp +++ b/lib/mesa/src/compiler/glsl/lower_const_arrays_to_uniforms.cpp @@ -45,13 +45,9 @@ namespace { class lower_const_array_visitor : public ir_rvalue_visitor { public: - lower_const_array_visitor(exec_list *insts, unsigned s, - unsigned available_uni_components) + lower_const_array_visitor(exec_list *insts) { instructions = insts; - stage = s; - const_count = 0; - free_uni_components = available_uni_components; progress = false; } @@ -61,54 +57,30 @@ public: return progress; } - ir_visitor_status visit_enter(ir_texture *); void handle_rvalue(ir_rvalue **rvalue); private: exec_list *instructions; - unsigned stage; - unsigned const_count; - unsigned free_uni_components; bool progress; }; -ir_visitor_status -lower_const_array_visitor::visit_enter(ir_texture *) -{ - return visit_continue_with_parent; -} - void lower_const_array_visitor::handle_rvalue(ir_rvalue **rvalue) { if (!*rvalue) return; - ir_constant *con = (*rvalue)->as_constant(); - if (!con || !con->type->is_array()) + ir_dereference_array *dra = (*rvalue)->as_dereference_array(); + if (!dra) return; - /* How many uniform component slots are required? */ - unsigned component_slots = con->type->component_slots(); - - /* We would utilize more than is available, bail out. */ - if (component_slots > free_uni_components) + ir_constant *con = dra->array->as_constant(); + if (!con || !con->type->is_array()) return; - free_uni_components -= component_slots; - void *mem_ctx = ralloc_parent(con); - /* In the very unlikely event of 4294967295 constant arrays in a single - * shader, don't promote this to a uniform. - */ - unsigned limit = ~0; - if (const_count == limit) - return; - - char *uniform_name = ralloc_asprintf(mem_ctx, "constarray_%x_%u", - const_count, stage); - const_count++; + char *uniform_name = ralloc_asprintf(mem_ctx, "constarray__%p", dra); ir_variable *uni = new(mem_ctx) ir_variable(con->type, uniform_name, ir_var_uniform); @@ -121,37 +93,17 @@ lower_const_array_visitor::handle_rvalue(ir_rvalue **rvalue) uni->data.max_array_access = uni->type->length - 1; instructions->push_head(uni); - *rvalue = new(mem_ctx) ir_dereference_variable(uni); + ir_dereference_variable *varref = new(mem_ctx) ir_dereference_variable(uni); + *rvalue = new(mem_ctx) ir_dereference_array(varref, dra->array_index); progress = true; } } /* anonymous namespace */ - -static unsigned -count_uniforms(exec_list *instructions) -{ - unsigned total = 0; - - foreach_in_list(ir_instruction, node, instructions) { - ir_variable *const var = node->as_variable(); - - if (!var || var->data.mode != ir_var_uniform) - continue; - - total += var->type->component_slots(); - } - return total; -} - bool -lower_const_arrays_to_uniforms(exec_list *instructions, unsigned stage, - unsigned max_uniform_components) +lower_const_arrays_to_uniforms(exec_list *instructions) { - unsigned uniform_components = count_uniforms(instructions); - unsigned free_uniform_slots = max_uniform_components - uniform_components; - - lower_const_array_visitor v(instructions, stage, free_uniform_slots); + lower_const_array_visitor v(instructions); return v.run(); } diff --git a/lib/mesa/src/compiler/glsl/lower_shared_reference.cpp b/lib/mesa/src/compiler/glsl/lower_shared_reference.cpp index fc2aaed4a..124996958 100644 --- a/lib/mesa/src/compiler/glsl/lower_shared_reference.cpp +++ b/lib/mesa/src/compiler/glsl/lower_shared_reference.cpp @@ -33,11 +33,8 @@ #include "lower_buffer_access.h" #include "ir_builder.h" -#include "linker.h" #include "main/macros.h" #include "util/list.h" -#include "main/consts_exts.h" -#include "main/shader_types.h" #include "glsl_parser_extras.h" using namespace ir_builder; @@ -54,10 +51,8 @@ class lower_shared_reference_visitor : public lower_buffer_access::lower_buffer_access { public: - lower_shared_reference_visitor(struct gl_linked_shader *shader) - : buffer_access_type(shared_load_access), - list_ctx(ralloc_context(NULL)), shader(shader), shared_size(0u), - progress(false) + lower_shared_reference_visitor(struct gl_shader *shader) + : list_ctx(ralloc_context(NULL)), shader(shader), shared_size(0u) { list_inithead(&var_offsets); } @@ -93,7 +88,7 @@ public: unsigned write_mask); void *list_ctx; - struct gl_linked_shader *shader; + struct gl_shader *shader; struct list_head var_offsets; unsigned shared_size; bool progress; @@ -141,13 +136,13 @@ lower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue) ir_rvalue *offset = NULL; unsigned const_offset = get_shared_offset(var); bool row_major; - const glsl_type *matrix_type; + int matrix_columns; assert(var->get_interface_type() == NULL); - const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430; + const unsigned packing = GLSL_INTERFACE_PACKING_STD430; - setup_buffer_access(mem_ctx, deref, + setup_buffer_access(mem_ctx, var, deref, &offset, &const_offset, - &row_major, &matrix_type, NULL, packing); + &row_major, &matrix_columns, NULL, packing); /* Now that we've calculated the offset to the start of the * dereference, walk over the type and emit loads into a temporary. @@ -167,7 +162,7 @@ lower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue) deref = new(mem_ctx) ir_dereference_variable(load_var); emit_access(mem_ctx, false, deref, load_offset, const_offset, row_major, - matrix_type, packing, 0); + matrix_columns, packing, 0); *rvalue = deref; @@ -209,13 +204,13 @@ lower_shared_reference_visitor::handle_assignment(ir_assignment *ir) ir_rvalue *offset = NULL; unsigned const_offset = get_shared_offset(var); bool row_major; - const glsl_type *matrix_type; + int matrix_columns; assert(var->get_interface_type() == NULL); - const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430; + const unsigned packing = GLSL_INTERFACE_PACKING_STD430; - setup_buffer_access(mem_ctx, deref, + setup_buffer_access(mem_ctx, var, deref, &offset, &const_offset, - &row_major, &matrix_type, NULL, packing); + &row_major, &matrix_columns, NULL, packing); deref = new(mem_ctx) ir_dereference_variable(store_var); @@ -227,7 +222,7 @@ lower_shared_reference_visitor::handle_assignment(ir_assignment *ir) /* Now we have to write the value assigned to the temporary back to memory */ emit_access(mem_ctx, true, deref, store_offset, const_offset, row_major, - matrix_type, packing, ir->write_mask); + matrix_columns, packing, ir->write_mask); progress = true; } @@ -245,7 +240,7 @@ lower_shared_reference_visitor::insert_buffer_access(void *mem_ctx, const glsl_type *type, ir_rvalue *offset, unsigned mask, - int /* channel */) + int channel) { if (buffer_access_type == shared_store_access) { ir_call *store = shared_store(mem_ctx, deref, offset, mask); @@ -289,7 +284,7 @@ lower_shared_reference_visitor::shared_store(void *mem_ctx, ir_function_signature(glsl_type::void_type, compute_shader_enabled); assert(sig); sig->replace_parameters(&sig_params); - sig->intrinsic_id = ir_intrinsic_shared_store; + sig->is_intrinsic = true; ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_shared"); f->add_signature(sig); @@ -316,7 +311,7 @@ lower_shared_reference_visitor::shared_load(void *mem_ctx, new(mem_ctx) ir_function_signature(type, compute_shader_enabled); assert(sig); sig->replace_parameters(&sig_params); - sig->intrinsic_id = ir_intrinsic_shared_load; + sig->is_intrinsic = true; ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_shared"); f->add_signature(sig); @@ -356,8 +351,7 @@ lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir) inst->ir_type == ir_type_swizzle); ir_rvalue *deref = (ir_rvalue *) inst; - assert(deref->type->is_scalar() && - (deref->type->is_integer_32_64() || deref->type->is_float())); + assert(deref->type->is_scalar() && deref->type->is_integer()); ir_variable *var = deref->variable_referenced(); assert(var); @@ -369,18 +363,18 @@ lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir) ir_rvalue *offset = NULL; unsigned const_offset = get_shared_offset(var); bool row_major; - const glsl_type *matrix_type; + int matrix_columns; assert(var->get_interface_type() == NULL); - const enum glsl_interface_packing packing = GLSL_INTERFACE_PACKING_STD430; + const unsigned packing = GLSL_INTERFACE_PACKING_STD430; buffer_access_type = shared_atomic_access; - setup_buffer_access(mem_ctx, deref, + setup_buffer_access(mem_ctx, var, deref, &offset, &const_offset, - &row_major, &matrix_type, NULL, packing); + &row_major, &matrix_columns, NULL, packing); assert(offset); assert(!row_major); - assert(matrix_type == NULL); + assert(matrix_columns == 1); ir_rvalue *deref_offset = add(offset, new(mem_ctx) ir_constant(const_offset)); @@ -393,7 +387,8 @@ lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir) ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); sig_params.push_tail(sig_param); - const glsl_type *type = deref->type->get_scalar_type(); + const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ? + glsl_type::int_type : glsl_type::uint_type; sig_param = new(mem_ctx) ir_variable(type, "data1", ir_var_function_in); sig_params.push_tail(sig_param); @@ -409,10 +404,7 @@ lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir) compute_shader_enabled); assert(sig); sig->replace_parameters(&sig_params); - - assert(ir->callee->intrinsic_id >= ir_intrinsic_generic_load); - assert(ir->callee->intrinsic_id <= ir_intrinsic_generic_atomic_comp_swap); - sig->intrinsic_id = MAP_INTRINSIC_TO_TYPE(ir->callee->intrinsic_id, shared); + sig->is_intrinsic = true; char func_name[64]; sprintf(func_name, "%s_shared", ir->callee_name()); @@ -452,15 +444,15 @@ lower_shared_reference_visitor::check_for_shared_atomic_intrinsic(ir_call *ir) if (!var || var->data.mode != ir_var_shader_shared) return ir; - const enum ir_intrinsic_id id = ir->callee->intrinsic_id; - if (id == ir_intrinsic_generic_atomic_add || - id == ir_intrinsic_generic_atomic_min || - id == ir_intrinsic_generic_atomic_max || - id == ir_intrinsic_generic_atomic_and || - id == ir_intrinsic_generic_atomic_or || - id == ir_intrinsic_generic_atomic_xor || - id == ir_intrinsic_generic_atomic_exchange || - id == ir_intrinsic_generic_atomic_comp_swap) { + const char *callee = ir->callee_name(); + if (!strcmp("__intrinsic_atomic_add", callee) || + !strcmp("__intrinsic_atomic_min", callee) || + !strcmp("__intrinsic_atomic_max", callee) || + !strcmp("__intrinsic_atomic_and", callee) || + !strcmp("__intrinsic_atomic_or", callee) || + !strcmp("__intrinsic_atomic_xor", callee) || + !strcmp("__intrinsic_atomic_exchange", callee) || + !strcmp("__intrinsic_atomic_comp_swap", callee)) { return lower_shared_atomic_intrinsic(ir); } @@ -483,9 +475,7 @@ lower_shared_reference_visitor::visit_enter(ir_call *ir) } /* unnamed namespace */ void -lower_shared_reference(const struct gl_constants *consts, - struct gl_shader_program *prog, - struct gl_linked_shader *shader) +lower_shared_reference(struct gl_shader *shader, unsigned *shared_size) { if (shader->Stage != MESA_SHADER_COMPUTE) return; @@ -502,19 +492,5 @@ lower_shared_reference(const struct gl_constants *consts, visit_list_elements(&v, shader->ir); } while (v.progress); - prog->Comp.SharedSize = v.shared_size; - - /* Section 19.1 (Compute Shader Variables) of the OpenGL 4.5 (Core Profile) - * specification says: - * - * "There is a limit to the total size of all variables declared as - * shared in a single program object. This limit, expressed in units of - * basic machine units, may be queried as the value of - * MAX_COMPUTE_SHARED_MEMORY_SIZE." - */ - if (prog->Comp.SharedSize > consts->MaxComputeSharedMemorySize) { - linker_error(prog, "Too much shared memory used (%u/%u)\n", - prog->Comp.SharedSize, - consts->MaxComputeSharedMemorySize); - } + *shared_size = v.shared_size; } diff --git a/lib/mesa/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp b/lib/mesa/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp index 4e7761e75..fcb12d1b7 100644 --- a/lib/mesa/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp +++ b/lib/mesa/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp @@ -51,10 +51,70 @@ #include "ir_optimization.h" #include "compiler/glsl_types.h" #include "main/macros.h" -#include "program/prog_instruction.h" /* For SWIZZLE_XXXX */ -#include "ir_builder.h" -using namespace ir_builder; +/** + * Generate a comparison value for a block of indices + * + * Lowering passes for non-constant indexing of arrays, matrices, or vectors + * can use this to generate blocks of index comparison values. + * + * \param instructions List where new instructions will be appended + * \param index \c ir_variable containing the desired index + * \param base Base value for this block of comparisons + * \param components Number of unique index values to compare. This must + * be on the range [1, 4]. + * \param mem_ctx ralloc memory context to be used for all allocations. + * + * \returns + * An \c ir_rvalue that \b must be cloned for each use in conditional + * assignments, etc. + */ +ir_rvalue * +compare_index_block(exec_list *instructions, ir_variable *index, + unsigned base, unsigned components, void *mem_ctx) +{ + ir_rvalue *broadcast_index = new(mem_ctx) ir_dereference_variable(index); + + assert(index->type->is_scalar()); + assert(index->type->base_type == GLSL_TYPE_INT || index->type->base_type == GLSL_TYPE_UINT); + assert(components >= 1 && components <= 4); + + if (components > 1) { + const ir_swizzle_mask m = { 0, 0, 0, 0, components, false }; + broadcast_index = new(mem_ctx) ir_swizzle(broadcast_index, m); + } + + /* Compare the desired index value with the next block of four indices. + */ + ir_constant_data test_indices_data; + memset(&test_indices_data, 0, sizeof(test_indices_data)); + test_indices_data.i[0] = base; + test_indices_data.i[1] = base + 1; + test_indices_data.i[2] = base + 2; + test_indices_data.i[3] = base + 3; + + ir_constant *const test_indices = + new(mem_ctx) ir_constant(broadcast_index->type, + &test_indices_data); + + ir_rvalue *const condition_val = + new(mem_ctx) ir_expression(ir_binop_equal, + glsl_type::bvec(components), + broadcast_index, + test_indices); + + ir_variable *const condition = + new(mem_ctx) ir_variable(condition_val->type, + "dereference_condition", + ir_var_temporary); + instructions->push_tail(condition); + + ir_rvalue *const cond_deref = + new(mem_ctx) ir_dereference_variable(condition); + instructions->push_tail(new(mem_ctx) ir_assignment(cond_deref, condition_val, 0)); + + return cond_deref; +} static inline bool is_array_or_matrix(const ir_rvalue *ir) @@ -73,7 +133,7 @@ class deref_replacer : public ir_rvalue_visitor { public: deref_replacer(const ir_variable *variable_to_replace, ir_rvalue *value) : variable_to_replace(variable_to_replace), value(value), - progress(false) + progress(false) { assert(this->variable_to_replace != NULL); assert(this->value != NULL); @@ -83,9 +143,9 @@ public: { ir_dereference_variable *const dv = (*rvalue)->as_dereference_variable(); - if (dv != NULL && dv->var == this->variable_to_replace) { - this->progress = true; - *rvalue = this->value->clone(ralloc_parent(*rvalue), NULL); + if ((dv != NULL) && (dv->var == this->variable_to_replace)) { + this->progress = true; + *rvalue = this->value->clone(ralloc_parent(*rvalue), NULL); } } @@ -107,10 +167,10 @@ public: virtual ir_visitor_status visit_enter(ir_dereference_array *ir) { - if (is_array_or_matrix(ir->array) && - ir->array_index->as_constant() == NULL) { - this->deref = ir; - return visit_stop; + if (is_array_or_matrix(ir->array) + && (ir->array_index->as_constant() == NULL)) { + this->deref = ir; + return visit_stop; } return visit_continue; @@ -141,22 +201,31 @@ struct assignment_generator { } - void generate(unsigned i, ir_factory &body) const + void generate(unsigned i, ir_rvalue* condition, exec_list *list) const { + /* Just clone the rest of the deref chain when trying to get at the + * underlying variable. + */ + void *mem_ctx = ralloc_parent(base_ir); + /* Clone the old r-value in its entirety. Then replace any occurances of * the old variable index with the new constant index. */ - ir_dereference *element = this->rvalue->clone(body.mem_ctx, NULL); - ir_constant *const index = body.constant(i); + ir_dereference *element = this->rvalue->clone(mem_ctx, NULL); + ir_constant *const index = new(mem_ctx) ir_constant(i); deref_replacer r(this->old_index, index); element->accept(&r); assert(r.progress); + /* Generate a conditional assignment to (or from) the constant indexed + * array dereference. + */ + ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var); ir_assignment *const assignment = (is_write) - ? assign(element, this->var, write_mask) - : assign(this->var, element); + ? new(mem_ctx) ir_assignment(element, variable, condition, write_mask) + : new(mem_ctx) ir_assignment(variable, element, condition); - body.emit(assignment); + list->push_tail(assignment); } }; @@ -167,45 +236,96 @@ struct switch_generator const TFunction& generator; ir_variable* index; + unsigned linear_sequence_max_length; + unsigned condition_components; void *mem_ctx; - switch_generator(const TFunction& generator, ir_variable *index) - : generator(generator), index(index) + switch_generator(const TFunction& generator, ir_variable *index, + unsigned linear_sequence_max_length, + unsigned condition_components) + : generator(generator), index(index), + linear_sequence_max_length(linear_sequence_max_length), + condition_components(condition_components) { this->mem_ctx = ralloc_parent(index); } - void bisect(unsigned begin, unsigned end, ir_factory &body) + void linear_sequence(unsigned begin, unsigned end, exec_list *list) + { + if (begin == end) + return; + + /* If the array access is a read, read the first element of this subregion + * unconditionally. The remaining tests will possibly overwrite this + * value with one of the other array elements. + * + * This optimization cannot be done for writes because it will cause the + * first element of the subregion to be written possibly *in addition* to + * one of the other elements. + */ + unsigned first; + if (!this->generator.is_write) { + this->generator.generate(begin, 0, list); + first = begin + 1; + } else { + first = begin; + } + + for (unsigned i = first; i < end; i += 4) { + const unsigned comps = MIN2(condition_components, end - i); + + ir_rvalue *const cond_deref = + compare_index_block(list, index, i, comps, this->mem_ctx); + + if (comps == 1) { + this->generator.generate(i, cond_deref->clone(this->mem_ctx, NULL), + list); + } else { + for (unsigned j = 0; j < comps; j++) { + ir_rvalue *const cond_swiz = + new(this->mem_ctx) ir_swizzle(cond_deref->clone(this->mem_ctx, NULL), + j, 0, 0, 0, 1); + + this->generator.generate(i + j, cond_swiz, list); + } + } + } + } + + void bisect(unsigned begin, unsigned end, exec_list *list) { unsigned middle = (begin + end) >> 1; - assert(index->type->is_integer_32()); + assert(index->type->is_integer()); ir_constant *const middle_c = (index->type->base_type == GLSL_TYPE_UINT) - ? new(body.mem_ctx) ir_constant((unsigned)middle) - : new(body.mem_ctx) ir_constant((int)middle); + ? new(this->mem_ctx) ir_constant((unsigned)middle) + : new(this->mem_ctx) ir_constant((int)middle); + + + ir_dereference_variable *deref = + new(this->mem_ctx) ir_dereference_variable(this->index); - ir_if *if_less = new(body.mem_ctx) ir_if(less(this->index, middle_c)); + ir_expression *less = + new(this->mem_ctx) ir_expression(ir_binop_less, glsl_type::bool_type, + deref, middle_c); - ir_factory then_body(&if_less->then_instructions, body.mem_ctx); - ir_factory else_body(&if_less->else_instructions, body.mem_ctx); - generate(begin, middle, then_body); - generate(middle, end, else_body); + ir_if *if_less = new(this->mem_ctx) ir_if(less); - body.emit(if_less); + generate(begin, middle, &if_less->then_instructions); + generate(middle, end, &if_less->else_instructions); + + list->push_tail(if_less); } - void generate(unsigned begin, unsigned end, ir_factory &body) + void generate(unsigned begin, unsigned end, exec_list *list) { - if (begin == end) - return; - unsigned length = end - begin; - if (length == 1) - generator.generate(begin, body); + if (length <= this->linear_sequence_max_length) + return linear_sequence(begin, end, list); else - bisect(begin, end, body); + return bisect(begin, end, list); } }; @@ -220,11 +340,13 @@ public: bool lower_output, bool lower_temp, bool lower_uniform) - : progress(false), stage(stage), lower_inputs(lower_input), - lower_outputs(lower_output), lower_temps(lower_temp), - lower_uniforms(lower_uniform) { - /* empty */ + this->progress = false; + this->stage = stage; + this->lower_inputs = lower_input; + this->lower_outputs = lower_output; + this->lower_temps = lower_temp; + this->lower_uniforms = lower_uniform; } bool progress; @@ -245,19 +367,19 @@ public: */ const ir_variable *const var = deref->array->variable_referenced(); if (var == NULL) - return this->lower_temps; + return this->lower_temps; switch (var->data.mode) { case ir_var_auto: case ir_var_temporary: - return this->lower_temps; + return this->lower_temps; case ir_var_uniform: case ir_var_shader_storage: - return this->lower_uniforms; + return this->lower_uniforms; case ir_var_shader_shared: - return false; + return false; case ir_var_function_in: case ir_var_const_in: @@ -313,7 +435,7 @@ public: return this->lower_outputs; case ir_var_function_inout: - return this->lower_temps; + return this->lower_temps; } assert(!"Should not get here."); @@ -322,27 +444,25 @@ public: bool needs_lowering(ir_dereference_array *deref) const { - if (deref == NULL || deref->array_index->as_constant() || - !is_array_or_matrix(deref->array)) - return false; + if (deref == NULL || deref->array_index->as_constant() + || !is_array_or_matrix(deref->array)) + return false; return this->storage_type_needs_lowering(deref); } ir_variable *convert_dereference_array(ir_dereference_array *orig_deref, - ir_assignment* orig_assign, - ir_dereference *orig_base) + ir_assignment* orig_assign, + ir_dereference *orig_base) { - void *const mem_ctx = ralloc_parent(base_ir); - exec_list list; - ir_factory body(&list, mem_ctx); - assert(is_array_or_matrix(orig_deref->array)); const unsigned length = (orig_deref->array->type->is_array()) ? orig_deref->array->type->length : orig_deref->array->type->matrix_columns; + void *const mem_ctx = ralloc_parent(base_ir); + /* Temporary storage for either the result of the dereference of * the array, or the RHS that's being assigned into the * dereference of the array. @@ -350,22 +470,36 @@ public: ir_variable *var; if (orig_assign) { - var = body.make_temp(orig_assign->rhs->type, - "dereference_array_value"); + var = new(mem_ctx) ir_variable(orig_assign->rhs->type, + "dereference_array_value", + ir_var_temporary); + base_ir->insert_before(var); - body.emit(assign(var, orig_assign->rhs)); + ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(var); + ir_assignment *assign = new(mem_ctx) ir_assignment(lhs, + orig_assign->rhs, + NULL); + + base_ir->insert_before(assign); } else { - var = body.make_temp(orig_deref->type, - "dereference_array_value"); + var = new(mem_ctx) ir_variable(orig_deref->type, + "dereference_array_value", + ir_var_temporary); + base_ir->insert_before(var); } /* Store the index to a temporary to avoid reusing its tree. */ - ir_variable *index = body.make_temp(orig_deref->array_index->type, - "dereference_array_index"); + ir_variable *index = + new(mem_ctx) ir_variable(orig_deref->array_index->type, + "dereference_array_index", ir_var_temporary); + base_ir->insert_before(index); - body.emit(assign(index, orig_deref->array_index)); + ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(index); + ir_assignment *assign = + new(mem_ctx) ir_assignment(lhs, orig_deref->array_index, NULL); + base_ir->insert_before(assign); - orig_deref->array_index = deref(index).val; + orig_deref->array_index = lhs->clone(mem_ctx, NULL); assignment_generator ag; ag.rvalue = orig_base; @@ -373,24 +507,40 @@ public: ag.old_index = index; ag.var = var; if (orig_assign) { - ag.is_write = true; - ag.write_mask = orig_assign->write_mask; + ag.is_write = true; + ag.write_mask = orig_assign->write_mask; } else { - ag.is_write = false; + ag.is_write = false; } - switch_generator sg(ag, index); + switch_generator sg(ag, index, 4, 4); + + /* If the original assignment has a condition, respect that original + * condition! This is acomplished by wrapping the new conditional + * assignments in an if-statement that uses the original condition. + */ + if ((orig_assign != NULL) && (orig_assign->condition != NULL)) { + /* No need to clone the condition because the IR that it hangs on is + * going to be removed from the instruction sequence. + */ + ir_if *if_stmt = new(mem_ctx) ir_if(orig_assign->condition); + + sg.generate(0, length, &if_stmt->then_instructions); + base_ir->insert_before(if_stmt); + } else { + exec_list list; - sg.generate(0, length, body); + sg.generate(0, length, &list); + base_ir->insert_before(&list); + } - base_ir->insert_before(&list); return var; } virtual void handle_rvalue(ir_rvalue **pir) { if (this->in_assignee) - return; + return; if (!*pir) return; @@ -398,7 +548,7 @@ public: ir_dereference_array* orig_deref = (*pir)->as_dereference_array(); if (needs_lowering(orig_deref)) { ir_variable *var = - convert_dereference_array(orig_deref, NULL, orig_deref); + convert_dereference_array(orig_deref, NULL, orig_deref); assert(var); *pir = new(ralloc_parent(base_ir)) ir_dereference_variable(var); this->progress = true; @@ -413,7 +563,7 @@ public: find_variable_index f; ir->lhs->accept(&f); - if (f.deref != NULL && storage_type_needs_lowering(f.deref)) { + if ((f.deref != NULL) && storage_type_needs_lowering(f.deref)) { convert_dereference_array(f.deref, ir, ir->lhs); ir->remove(); this->progress = true; diff --git a/lib/mesa/src/compiler/glsl/lower_vec_index_to_swizzle.cpp b/lib/mesa/src/compiler/glsl/lower_vec_index_to_swizzle.cpp index fdbad16a3..b49255e05 100644 --- a/lib/mesa/src/compiler/glsl/lower_vec_index_to_swizzle.cpp +++ b/lib/mesa/src/compiler/glsl/lower_vec_index_to_swizzle.cpp @@ -63,12 +63,11 @@ ir_vec_index_to_swizzle_visitor::handle_rvalue(ir_rvalue **rv) if (expr == NULL || expr->operation != ir_binop_vector_extract) return; - void *mem_ctx = ralloc_parent(expr); - ir_constant *const idx = - expr->operands[1]->constant_expression_value(mem_ctx); + ir_constant *const idx = expr->operands[1]->constant_expression_value(); if (idx == NULL) return; + void *ctx = ralloc_parent(expr); this->progress = true; /* Page 40 of the GLSL 1.20 spec says: @@ -88,7 +87,7 @@ ir_vec_index_to_swizzle_visitor::handle_rvalue(ir_rvalue **rv) const int i = CLAMP(idx->value.i[0], 0, (int) expr->operands[0]->type->vector_elements - 1); - *rv = new(mem_ctx) ir_swizzle(expr->operands[0], i, 0, 0, 0, 1); + *rv = new(ctx) ir_swizzle(expr->operands[0], i, 0, 0, 0, 1); } bool diff --git a/lib/mesa/src/compiler/glsl/lower_vector.cpp b/lib/mesa/src/compiler/glsl/lower_vector.cpp index 7b0883fdf..a658410ae 100644 --- a/lib/mesa/src/compiler/glsl/lower_vector.cpp +++ b/lib/mesa/src/compiler/glsl/lower_vector.cpp @@ -35,18 +35,87 @@ namespace { class lower_vector_visitor : public ir_rvalue_visitor { public: - lower_vector_visitor() : progress(false) + lower_vector_visitor() : dont_lower_swz(false), progress(false) { /* empty */ } void handle_rvalue(ir_rvalue **rvalue); + /** + * Should SWZ-like expressions be lowered? + */ + bool dont_lower_swz; + bool progress; }; } /* anonymous namespace */ +/** + * Determine if an IR expression tree looks like an extended swizzle + * + * Extended swizzles consist of access of a single vector source (with possible + * per component negation) and the constants -1, 0, or 1. + */ +bool +is_extended_swizzle(ir_expression *ir) +{ + /* Track any variables that are accessed by this expression. + */ + ir_variable *var = NULL; + + assert(ir->operation == ir_quadop_vector); + + for (unsigned i = 0; i < ir->type->vector_elements; i++) { + ir_rvalue *op = ir->operands[i]; + + while (op != NULL) { + switch (op->ir_type) { + case ir_type_constant: { + const ir_constant *const c = op->as_constant(); + + if (!c->is_one() && !c->is_zero() && !c->is_negative_one()) + return false; + + op = NULL; + break; + } + + case ir_type_dereference_variable: { + ir_dereference_variable *const d = (ir_dereference_variable *) op; + + if ((var != NULL) && (var != d->var)) + return false; + + var = d->var; + op = NULL; + break; + } + + case ir_type_expression: { + ir_expression *const ex = (ir_expression *) op; + + if (ex->operation != ir_unop_neg) + return false; + + op = ex->operands[0]; + break; + } + + case ir_type_swizzle: + op = ((ir_swizzle *) op)->val; + break; + + default: + return false; + } + } + } + + return true; +} + void lower_vector_visitor::handle_rvalue(ir_rvalue **rvalue) { @@ -57,11 +126,14 @@ lower_vector_visitor::handle_rvalue(ir_rvalue **rvalue) if ((expr == NULL) || (expr->operation != ir_quadop_vector)) return; + if (this->dont_lower_swz && is_extended_swizzle(expr)) + return; + /* FINISHME: Is this the right thing to use for the ralloc context? */ void *const mem_ctx = expr; - assert(expr->type->vector_elements == expr->num_operands); + assert(expr->type->vector_elements == expr->get_num_operands()); /* Generate a temporary with the same type as the ir_quadop_operation. */ @@ -119,7 +191,7 @@ lower_vector_visitor::handle_rvalue(ir_rvalue **rvalue) &d); ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp); ir_assignment *const assign = - new(mem_ctx) ir_assignment(lhs, c, write_mask); + new(mem_ctx) ir_assignment(lhs, c, NULL, write_mask); this->base_ir->insert_before(assign); } @@ -132,7 +204,7 @@ lower_vector_visitor::handle_rvalue(ir_rvalue **rvalue) ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp); ir_assignment *const assign = - new(mem_ctx) ir_assignment(lhs, expr->operands[i], 1U << i); + new(mem_ctx) ir_assignment(lhs, expr->operands[i], NULL, (1U << i)); this->base_ir->insert_before(assign); assigned++; @@ -145,10 +217,11 @@ lower_vector_visitor::handle_rvalue(ir_rvalue **rvalue) } bool -lower_quadop_vector(exec_list *instructions) +lower_quadop_vector(exec_list *instructions, bool dont_lower_swz) { lower_vector_visitor v; + v.dont_lower_swz = dont_lower_swz; visit_list_elements(&v, instructions); return v.progress; diff --git a/lib/mesa/src/compiler/glsl/opt_conditional_discard.cpp b/lib/mesa/src/compiler/glsl/opt_conditional_discard.cpp index 6d8a23460..1ca8803f6 100644 --- a/lib/mesa/src/compiler/glsl/opt_conditional_discard.cpp +++ b/lib/mesa/src/compiler/glsl/opt_conditional_discard.cpp @@ -65,21 +65,14 @@ opt_conditional_discard_visitor::visit_leave(ir_if *ir) { /* Look for "if (...) discard" with no else clause or extra statements. */ if (ir->then_instructions.is_empty() || - !ir->then_instructions.get_head_raw()->next->is_tail_sentinel() || - !((ir_instruction *) ir->then_instructions.get_head_raw())->as_discard() || + !ir->then_instructions.head->next->is_tail_sentinel() || + !((ir_instruction *) ir->then_instructions.head)->as_discard() || !ir->else_instructions.is_empty()) return visit_continue; /* Move the condition and replace the ir_if with the ir_discard. */ - ir_discard *discard = (ir_discard *) ir->then_instructions.get_head_raw(); - if (!discard->condition) - discard->condition = ir->condition; - else { - void *ctx = ralloc_parent(ir); - discard->condition = new(ctx) ir_expression(ir_binop_logic_and, - ir->condition, - discard->condition); - } + ir_discard *discard = (ir_discard *) ir->then_instructions.head; + discard->condition = ir->condition; ir->replace_with(discard); progress = true; diff --git a/lib/mesa/src/compiler/glsl/opt_dead_builtin_varyings.cpp b/lib/mesa/src/compiler/glsl/opt_dead_builtin_varyings.cpp index 981cedf0a..37bcbccf0 100644 --- a/lib/mesa/src/compiler/glsl/opt_dead_builtin_varyings.cpp +++ b/lib/mesa/src/compiler/glsl/opt_dead_builtin_varyings.cpp @@ -46,15 +46,13 @@ * The same is done for the gl_FragData fragment shader output. */ +#include "main/core.h" /* for snprintf and ARRAY_SIZE */ #include "ir.h" #include "ir_rvalue_visitor.h" #include "ir_optimization.h" #include "ir_print_visitor.h" #include "compiler/glsl_types.h" #include "link_varyings.h" -#include "main/consts_exts.h" -#include "main/shader_types.h" -#include "util/u_string.h" namespace { @@ -87,14 +85,10 @@ public: { ir_variable *var = ir->variable_referenced(); - if (!var || var->data.mode != this->mode || !var->type->is_array() || - !is_gl_identifier(var->name)) + if (!var || var->data.mode != this->mode || !var->type->is_array()) return visit_continue; - /* Only match gl_FragData[], not gl_SecondaryFragDataEXT[] or - * gl_LastFragData[]. - */ - if (this->find_frag_outputs && strcmp(var->name, "gl_FragData") == 0) { + if (this->find_frag_outputs && var->data.location == FRAG_RESULT_DATA0) { this->fragdata_array = var; ir_constant *index = ir->array_index->as_constant(); @@ -149,8 +143,7 @@ public: if (var->data.mode != this->mode || !var->type->is_array()) return visit_continue; - if (this->find_frag_outputs && var->data.location == FRAG_RESULT_DATA0 && - var->data.index == 0) { + if (this->find_frag_outputs && var->data.location == FRAG_RESULT_DATA0) { /* This is a whole array dereference. */ this->fragdata_usage |= (1 << var->type->array_size()) - 1; this->lower_fragdata_array = false; @@ -276,7 +269,7 @@ public: */ class replace_varyings_visitor : public ir_rvalue_visitor { public: - replace_varyings_visitor(struct gl_linked_shader *sha, + replace_varyings_visitor(struct gl_shader *sha, const varying_info_visitor *info, unsigned external_texcoord_usage, unsigned external_color_usage, @@ -382,7 +375,7 @@ public: new_var[i]->data.explicit_index = 0; } - ir->get_head_raw()->insert_before(new_var[i]); + ir->head->insert_before(new_var[i]); } } } @@ -489,6 +482,7 @@ public: virtual ir_visitor_status visit_leave(ir_assignment *ir) { handle_rvalue(&ir->rhs); + handle_rvalue(&ir->condition); /* We have to use set_lhs when changing the LHS of an assignment. */ ir_rvalue *lhs = ir->lhs; @@ -502,7 +496,7 @@ public: } private: - struct gl_linked_shader *shader; + struct gl_shader *shader; const varying_info_visitor *info; ir_variable *new_fragdata[MAX_DRAW_BUFFERS]; ir_variable *new_texcoord[MAX_TEXTURE_COORD_UNITS]; @@ -514,7 +508,7 @@ private: } /* anonymous namespace */ static void -lower_texcoord_array(struct gl_linked_shader *shader, const varying_info_visitor *info) +lower_texcoord_array(struct gl_shader *shader, const varying_info_visitor *info) { replace_varyings_visitor(shader, info, (1 << MAX_TEXTURE_COORD_UNITS) - 1, @@ -522,34 +516,31 @@ lower_texcoord_array(struct gl_linked_shader *shader, const varying_info_visitor } static void -lower_fragdata_array(struct gl_linked_shader *shader) +lower_fragdata_array(struct gl_shader *shader) { varying_info_visitor info(ir_var_shader_out, true); info.get(shader->ir, 0, NULL); - replace_varyings_visitor(shader, &info, 0, 0, false); + replace_varyings_visitor(shader, &info, 0, 0, 0); } void -do_dead_builtin_varyings(const struct gl_constants *consts, - gl_api api, - gl_linked_shader *producer, - gl_linked_shader *consumer, +do_dead_builtin_varyings(struct gl_context *ctx, + gl_shader *producer, gl_shader *consumer, unsigned num_tfeedback_decls, tfeedback_decl *tfeedback_decls) { /* Lower the gl_FragData array to separate variables. */ - if (consumer && consumer->Stage == MESA_SHADER_FRAGMENT && - !consts->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions) { + if (consumer && consumer->Stage == MESA_SHADER_FRAGMENT) { lower_fragdata_array(consumer); } /* Lowering of built-in varyings has no effect with the core context and * GLES2, because they are not available there. */ - if (api == API_OPENGL_CORE || - api == API_OPENGLES2) { + if (ctx->API == API_OPENGL_CORE || + ctx->API == API_OPENGLES2) { return; } @@ -560,9 +551,6 @@ do_dead_builtin_varyings(const struct gl_constants *consts, if (producer) { producer_info.get(producer->ir, num_tfeedback_decls, tfeedback_decls); - if (producer->Stage == MESA_SHADER_TESS_CTRL) - producer_info.lower_texcoord_array = false; - if (!consumer) { /* At least eliminate unused gl_TexCoord elements. */ if (producer_info.lower_texcoord_array) { @@ -575,9 +563,6 @@ do_dead_builtin_varyings(const struct gl_constants *consts, if (consumer) { consumer_info.get(consumer->ir, 0, NULL); - if (consumer->Stage != MESA_SHADER_FRAGMENT) - consumer_info.lower_texcoord_array = false; - if (!producer) { /* At least eliminate unused gl_TexCoord elements. */ if (consumer_info.lower_texcoord_array) { diff --git a/lib/mesa/src/compiler/glsl/tests/copy_constant_to_storage_tests.cpp b/lib/mesa/src/compiler/glsl/tests/copy_constant_to_storage_tests.cpp index f2d2af646..cd48bc523 100644 --- a/lib/mesa/src/compiler/glsl/tests/copy_constant_to_storage_tests.cpp +++ b/lib/mesa/src/compiler/glsl/tests/copy_constant_to_storage_tests.cpp @@ -21,7 +21,8 @@ * DEALINGS IN THE SOFTWARE. */ #include <gtest/gtest.h> -#include "util/compiler.h" +#include "main/compiler.h" +#include "main/mtypes.h" #include "main/macros.h" #include "util/ralloc.h" #include "uniform_initializer_utils.h" @@ -53,8 +54,6 @@ public: void copy_constant_to_storage::SetUp() { - glsl_type_singleton_init_or_ref(); - this->mem_ctx = ralloc_context(NULL); } @@ -63,8 +62,6 @@ copy_constant_to_storage::TearDown() { ralloc_free(this->mem_ctx); this->mem_ctx = NULL; - - glsl_type_singleton_decref(); } void diff --git a/lib/mesa/src/compiler/glsl/tests/invalidate_locations_test.cpp b/lib/mesa/src/compiler/glsl/tests/invalidate_locations_test.cpp index 7ed7f6a81..ba94d7e3a 100644 --- a/lib/mesa/src/compiler/glsl/tests/invalidate_locations_test.cpp +++ b/lib/mesa/src/compiler/glsl/tests/invalidate_locations_test.cpp @@ -21,7 +21,8 @@ * DEALINGS IN THE SOFTWARE. */ #include <gtest/gtest.h> -#include "util/compiler.h" +#include "main/compiler.h" +#include "main/mtypes.h" #include "main/macros.h" #include "util/ralloc.h" #include "ir.h" @@ -45,8 +46,6 @@ public: void invalidate_locations::SetUp() { - glsl_type_singleton_init_or_ref(); - this->mem_ctx = ralloc_context(NULL); this->ir.make_empty(); } @@ -56,8 +55,6 @@ invalidate_locations::TearDown() { ralloc_free(this->mem_ctx); this->mem_ctx = NULL; - - glsl_type_singleton_decref(); } TEST_F(invalidate_locations, simple_vertex_in_generic) |