/* * Copyright © 2015 Broadcom * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "vc4_qir.h" #include "tgsi/tgsi_info.h" #include "glsl/nir/nir_builder.h" /** * Walks the NIR generated by TGSI-to-NIR to lower its io intrinsics into * something amenable to the VC4 architecture. * * Currently, it split inputs, outputs, and uniforms into scalars, drops any * non-position outputs in coordinate shaders, and fixes up the addressing on * indirect uniform loads. */ static void replace_intrinsic_with_vec4(nir_builder *b, nir_intrinsic_instr *intr, nir_ssa_def **comps) { /* Batch things back together into a vec4. This will get split by the * later ALU scalarization pass. */ nir_ssa_def *vec = nir_vec4(b, comps[0], comps[1], comps[2], comps[3]); /* Replace the old intrinsic with a reference to our reconstructed * vec4. */ nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec), ralloc_parent(b->impl)); nir_instr_remove(&intr->instr); } static void vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr *intr) { nir_builder_insert_before_instr(b, &intr->instr); if (c->stage == QSTAGE_FRAG && intr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) { /* This doesn't need any lowering. */ return; } nir_variable *input_var = NULL; foreach_list_typed(nir_variable, var, node, &c->s->inputs) { if (var->data.driver_location == intr->const_index[0]) { input_var = var; break; } } assert(input_var); int semantic_name = input_var->data.location; int semantic_index = input_var->data.index; /* All TGSI-to-NIR inputs are vec4. */ assert(intr->num_components == 4); /* Generate scalar loads equivalent to the original VEC4. */ nir_ssa_def *dests[4]; for (unsigned i = 0; i < intr->num_components; i++) { nir_intrinsic_instr *intr_comp = nir_intrinsic_instr_create(c->s, nir_intrinsic_load_input); intr_comp->num_components = 1; intr_comp->const_index[0] = intr->const_index[0] * 4 + i; nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL); nir_builder_instr_insert(b, &intr_comp->instr); dests[i] = &intr_comp->dest.ssa; } switch (c->stage) { case QSTAGE_FRAG: switch (semantic_name) { case TGSI_SEMANTIC_FACE: dests[0] = nir_fsub(b, nir_imm_float(b, 1.0), nir_fmul(b, nir_i2f(b, dests[0]), nir_imm_float(b, 2.0))); dests[1] = nir_imm_float(b, 0.0); dests[2] = nir_imm_float(b, 0.0); dests[3] = nir_imm_float(b, 1.0); break; case TGSI_SEMANTIC_GENERIC: if (c->fs_key->point_sprite_mask & (1 << semantic_index)) { if (!c->fs_key->is_points) { dests[0] = nir_imm_float(b, 0.0); dests[1] = nir_imm_float(b, 0.0); } if (c->fs_key->point_coord_upper_left) { dests[1] = nir_fsub(b, nir_imm_float(b, 1.0), dests[1]); } dests[2] = nir_imm_float(b, 0.0); dests[3] = nir_imm_float(b, 1.0); } break; } break; case QSTAGE_COORD: case QSTAGE_VERT: break; } replace_intrinsic_with_vec4(b, intr, dests); } static void vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr *intr) { nir_variable *output_var = NULL; foreach_list_typed(nir_variable, var, node, &c->s->outputs) { if (var->data.driver_location == intr->const_index[0]) { output_var = var; break; } } assert(output_var); unsigned semantic_name = output_var->data.location; if (c->stage == QSTAGE_COORD && (semantic_name != TGSI_SEMANTIC_POSITION && semantic_name != TGSI_SEMANTIC_PSIZE)) { nir_instr_remove(&intr->instr); return; } /* Color output is lowered by vc4_nir_lower_blend(). */ if (c->stage == QSTAGE_FRAG && semantic_name == TGSI_SEMANTIC_COLOR) { intr->const_index[0] *= 4; return; } /* All TGSI-to-NIR outputs are VEC4. */ assert(intr->num_components == 4); nir_builder_insert_before_instr(b, &intr->instr); for (unsigned i = 0; i < intr->num_components; i++) { nir_intrinsic_instr *intr_comp = nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output); intr_comp->num_components = 1; intr_comp->const_index[0] = intr->const_index[0] * 4 + i; assert(intr->src[0].is_ssa); intr_comp->src[0] = nir_src_for_ssa(nir_swizzle(b, intr->src[0].ssa, &i, 1, false)); nir_builder_instr_insert(b, &intr_comp->instr); } nir_instr_remove(&intr->instr); } static void vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr *intr) { /* All TGSI-to-NIR uniform loads are vec4, but we may create dword * loads in our lowering passes. */ if (intr->num_components == 1) return; assert(intr->num_components == 4); nir_builder_insert_before_instr(b, &intr->instr); /* Generate scalar loads equivalent to the original VEC4. */ nir_ssa_def *dests[4]; for (unsigned i = 0; i < intr->num_components; i++) { nir_intrinsic_instr *intr_comp = nir_intrinsic_instr_create(c->s, intr->intrinsic); intr_comp->num_components = 1; nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL); if (intr->intrinsic == nir_intrinsic_load_uniform_indirect) { /* Convert the variable TGSI register index to a byte * offset. */ intr_comp->src[0] = nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa, nir_imm_int(b, 4))); /* Convert the offset to be a byte index, too. */ intr_comp->const_index[0] = (intr->const_index[0] * 16 + i * 4); } else { /* We want a dword index for non-indirect uniform * loads. */ intr_comp->const_index[0] = (intr->const_index[0] * 4 + i); } dests[i] = &intr_comp->dest.ssa; nir_builder_instr_insert(b, &intr_comp->instr); } replace_intrinsic_with_vec4(b, intr, dests); } static void vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b, struct nir_instr *instr) { if (instr->type != nir_instr_type_intrinsic) return; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); switch (intr->intrinsic) { case nir_intrinsic_load_input: vc4_nir_lower_input(c, b, intr); break; case nir_intrinsic_store_output: vc4_nir_lower_output(c, b, intr); break; case nir_intrinsic_load_uniform: case nir_intrinsic_load_uniform_indirect: vc4_nir_lower_uniform(c, b, intr); break; default: break; } } static bool vc4_nir_lower_io_block(nir_block *block, void *arg) { struct vc4_compile *c = arg; nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); nir_builder b; nir_builder_init(&b, impl); nir_foreach_instr_safe(block, instr) vc4_nir_lower_io_instr(c, &b, instr); return true; } static bool vc4_nir_lower_io_impl(struct vc4_compile *c, nir_function_impl *impl) { nir_foreach_block(impl, vc4_nir_lower_io_block, c); nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); return true; } void vc4_nir_lower_io(struct vc4_compile *c) { nir_foreach_overload(c->s, overload) { if (overload->impl) vc4_nir_lower_io_impl(c, overload->impl); } }