diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2021-07-22 10:17:30 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2021-07-22 10:17:30 +0000 |
commit | ca11beabae33eb59fb981b8adf50b1d47a2a98f0 (patch) | |
tree | 3e4691a396e6e54cd54224a190663d5cf976625b /lib/mesa/src/gallium/drivers/etnaviv | |
parent | 27c8a50e8bbde7d28b1fc46d715a4c469e24f2c4 (diff) |
Import Mesa 21.1.5
Diffstat (limited to 'lib/mesa/src/gallium/drivers/etnaviv')
19 files changed, 3156 insertions, 894 deletions
diff --git a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_blt.c b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_blt.c index 81217918d..ec7620245 100644 --- a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_blt.c +++ b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_blt.c @@ -229,7 +229,7 @@ etna_blit_clear_color_blt(struct pipe_context *pctx, struct pipe_surface *dst, if (surf->surf.ts_size) { clr.dest.use_ts = 1; clr.dest.ts_addr.bo = res->ts_bo; - clr.dest.ts_addr.offset = 0; + clr.dest.ts_addr.offset = surf->level->ts_offset; clr.dest.ts_addr.flags = ETNA_RELOC_WRITE; clr.dest.ts_clear_value[0] = new_clear_value; clr.dest.ts_clear_value[1] = new_clear_value >> 32; @@ -308,7 +308,7 @@ etna_blit_clear_zs_blt(struct pipe_context *pctx, struct pipe_surface *dst, if (surf->surf.ts_size) { clr.dest.use_ts = 1; clr.dest.ts_addr.bo = res->ts_bo; - clr.dest.ts_addr.offset = 0; + clr.dest.ts_addr.offset = surf->level->ts_offset; clr.dest.ts_addr.flags = ETNA_RELOC_WRITE; clr.dest.ts_clear_value[0] = surf->level->clear_value; clr.dest.ts_clear_value[1] = surf->level->clear_value; @@ -339,7 +339,7 @@ etna_blit_clear_zs_blt(struct pipe_context *pctx, struct pipe_surface *dst, } static void -etna_clear_blt(struct pipe_context *pctx, unsigned buffers, +etna_clear_blt(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor_state *scissor_state, const union pipe_color_union *color, double depth, unsigned stencil) { struct etna_context *ctx = etna_context(pctx); @@ -531,46 +531,26 @@ etna_try_blt_blit(struct pipe_context *pctx, return true; } -static void +static bool etna_blit_blt(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) { - struct etna_context *ctx = etna_context(pctx); - struct pipe_blit_info info = *blit_info; - - if (info.src.resource->nr_samples > 1 && - info.dst.resource->nr_samples <= 1 && - !util_format_is_depth_or_stencil(info.src.resource->format) && - !util_format_is_pure_integer(info.src.resource->format)) { + if (blit_info->src.resource->nr_samples > 1 && + blit_info->dst.resource->nr_samples <= 1 && + !util_format_is_depth_or_stencil(blit_info->src.resource->format) && + !util_format_is_pure_integer(blit_info->src.resource->format)) { DBG("color resolve unimplemented"); - return; - } - - if (etna_try_blt_blit(pctx, blit_info)) - return; - - if (util_try_blit_via_copy_region(pctx, blit_info)) - return; - - if (info.mask & PIPE_MASK_S) { - DBG("cannot blit stencil, skipping"); - info.mask &= ~PIPE_MASK_S; - } - - if (!util_blitter_is_blit_supported(ctx->blitter, &info)) { - DBG("blit unsupported %s -> %s", - util_format_short_name(info.src.resource->format), - util_format_short_name(info.dst.resource->format)); - return; + return false; } - etna_blit_save_state(ctx); - util_blitter_blit(ctx->blitter, &info); + return etna_try_blt_blit(pctx, blit_info); } void etna_clear_blit_blt_init(struct pipe_context *pctx) { + struct etna_context *ctx = etna_context(pctx); + DBG("etnaviv: Using BLT blit engine"); pctx->clear = etna_clear_blt; - pctx->blit = etna_blit_blt; + ctx->blit = etna_blit_blt; } diff --git a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c index 8a71f62a0..88c228191 100644 --- a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c +++ b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c @@ -27,10 +27,11 @@ */ #include "etnaviv_compiler.h" +#include "etnaviv_compiler_nir.h" #include "etnaviv_asm.h" #include "etnaviv_context.h" #include "etnaviv_debug.h" -#include "etnaviv_disasm.h" +#include "etnaviv_nir.h" #include "etnaviv_uniforms.h" #include "etnaviv_util.h" @@ -38,197 +39,10 @@ #include "util/u_memory.h" #include "util/register_allocate.h" #include "compiler/nir/nir_builder.h" -#include "compiler/nir/nir_worklist.h" #include "tgsi/tgsi_strings.h" -#include "util/u_half.h" - -struct etna_compile { - nir_shader *nir; -#define is_fs(c) ((c)->nir->info.stage == MESA_SHADER_FRAGMENT) - const struct etna_specs *specs; - struct etna_shader_variant *variant; - - /* block # to instr index */ - unsigned *block_ptr; - - /* Code generation */ - int inst_ptr; /* current instruction pointer */ - struct etna_inst code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE]; - - /* constants */ - uint64_t consts[ETNA_MAX_IMM]; - - /* There was an error during compilation */ - bool error; -}; - -/* io related lowering - * run after lower_int_to_float because it adds i2f/f2i ops - */ -static void -etna_lower_io(nir_shader *shader, struct etna_shader_variant *v) -{ - nir_foreach_function(function, shader) { - nir_builder b; - nir_builder_init(&b, function->impl); - - nir_foreach_block(block, function->impl) { - nir_foreach_instr_safe(instr, block) { - if (instr->type == nir_instr_type_intrinsic) { - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - - switch (intr->intrinsic) { - case nir_intrinsic_load_front_face: { - /* HW front_face is 0.0/1.0, not 0/~0u for bool - * lower with a comparison with 0 - */ - intr->dest.ssa.bit_size = 32; - - b.cursor = nir_after_instr(instr); - - nir_ssa_def *ssa = nir_ine(&b, &intr->dest.ssa, nir_imm_int(&b, 0)); - if (v->key.front_ccw) - nir_instr_as_alu(ssa->parent_instr)->op = nir_op_ieq; - - nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, - nir_src_for_ssa(ssa), - ssa->parent_instr); - } break; - case nir_intrinsic_store_deref: { - nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); - if (shader->info.stage != MESA_SHADER_FRAGMENT || !v->key.frag_rb_swap) - break; - - assert(deref->deref_type == nir_deref_type_var); - - if (deref->var->data.location != FRAG_RESULT_COLOR && - deref->var->data.location != FRAG_RESULT_DATA0) - break; - - b.cursor = nir_before_instr(instr); - - nir_ssa_def *ssa = nir_mov(&b, intr->src[1].ssa); - nir_alu_instr *alu = nir_instr_as_alu(ssa->parent_instr); - alu->src[0].swizzle[0] = 2; - alu->src[0].swizzle[2] = 0; - nir_instr_rewrite_src(instr, &intr->src[1], nir_src_for_ssa(ssa)); - } break; - case nir_intrinsic_load_uniform: { - /* convert indirect load_uniform to load_ubo when possible - * this is required on HALTI5+ because address register is not implemented - * address register loads also arent done optimally - */ - if (v->shader->specs->halti < 2 || nir_src_is_const(intr->src[0])) - break; - - nir_intrinsic_instr *load_ubo = - nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo); - load_ubo->num_components = intr->num_components; - nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest, - load_ubo->num_components, 32, NULL); - - b.cursor = nir_before_instr(instr); - load_ubo->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); - load_ubo->src[1] = nir_src_for_ssa(nir_iadd(&b, - nir_imul(&b, intr->src[0].ssa, nir_imm_int(&b, 16)), - nir_imm_int(&b, nir_intrinsic_base(intr) * 16))); - nir_builder_instr_insert(&b, &load_ubo->instr); - nir_ssa_def_rewrite_uses(&intr->dest.ssa, - nir_src_for_ssa(&load_ubo->dest.ssa)); - nir_instr_remove(&intr->instr); - } break; - case nir_intrinsic_load_ubo: { - nir_const_value *idx = nir_src_as_const_value(intr->src[0]); - assert(idx); - /* offset index by 1, index 0 is used for converted load_uniform */ - b.cursor = nir_before_instr(instr); - nir_instr_rewrite_src(instr, &intr->src[0], - nir_src_for_ssa(nir_imm_int(&b, idx[0].u32 + 1))); - } break; - case nir_intrinsic_load_vertex_id: - case nir_intrinsic_load_instance_id: - /* detect use of vertex_id/instance_id */ - v->vs_id_in_reg = v->infile.num_reg; - break; - default: - break; - } - } - - if (instr->type != nir_instr_type_tex) - continue; - - nir_tex_instr *tex = nir_instr_as_tex(instr); - nir_src *coord = NULL; - nir_src *lod_bias = NULL; - unsigned lod_bias_idx; - - assert(tex->sampler_index == tex->texture_index); - - for (unsigned i = 0; i < tex->num_srcs; i++) { - switch (tex->src[i].src_type) { - case nir_tex_src_coord: - coord = &tex->src[i].src; - break; - case nir_tex_src_bias: - case nir_tex_src_lod: - assert(!lod_bias); - lod_bias = &tex->src[i].src; - lod_bias_idx = i; - break; - case nir_tex_src_comparator: - break; - default: - assert(0); - break; - } - } - - if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) { - /* use a dummy load_uniform here to represent texcoord scale */ - b.cursor = nir_before_instr(instr); - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_uniform); - nir_intrinsic_set_base(load, ~tex->sampler_index); - load->num_components = 2; - load->src[0] = nir_src_for_ssa(nir_imm_float(&b, 0.0f)); - nir_ssa_dest_init(&load->instr, &load->dest, 2, 32, NULL); - nir_intrinsic_set_type(load, nir_type_float); - - nir_builder_instr_insert(&b, &load->instr); - - nir_ssa_def *new_coord = nir_fmul(&b, coord->ssa, &load->dest.ssa); - nir_instr_rewrite_src(&tex->instr, coord, nir_src_for_ssa(new_coord)); - } - - /* pre HALTI5 needs texture sources in a single source */ - - if (!lod_bias || v->shader->specs->halti >= 5) - continue; - - assert(coord && lod_bias && tex->coord_components < 4); - - nir_alu_instr *vec = nir_alu_instr_create(shader, nir_op_vec4); - for (unsigned i = 0; i < tex->coord_components; i++) { - vec->src[i].src = nir_src_for_ssa(coord->ssa); - vec->src[i].swizzle[0] = i; - } - for (unsigned i = tex->coord_components; i < 4; i++) - vec->src[i].src = nir_src_for_ssa(lod_bias->ssa); - - vec->dest.write_mask = 0xf; - nir_ssa_dest_init(&vec->instr, &vec->dest.dest, 4, 32, NULL); - - nir_tex_instr_remove_src(tex, lod_bias_idx); - nir_instr_rewrite_src(&tex->instr, coord, nir_src_for_ssa(&vec->dest.dest.ssa)); - tex->coord_components = 4; - - nir_instr_insert_before(&tex->instr, &vec->instr); - } - } - } -} +#include "util/compiler.h" +#include "util/half_float.h" static bool etna_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data) @@ -276,314 +90,12 @@ etna_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data) } static void -etna_lower_alu_impl(nir_function_impl *impl, struct etna_compile *c) -{ - nir_shader *shader = impl->function->shader; - - nir_builder b; - nir_builder_init(&b, impl); - - /* in a seperate loop so we can apply the multiple-uniform logic to the new fmul */ - nir_foreach_block(block, impl) { - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_alu) - continue; - - nir_alu_instr *alu = nir_instr_as_alu(instr); - /* multiply sin/cos src by constant - * TODO: do this earlier (but it breaks const_prop opt) - */ - if (alu->op == nir_op_fsin || alu->op == nir_op_fcos) { - b.cursor = nir_before_instr(instr); - - nir_ssa_def *imm = c->specs->has_new_transcendentals ? - nir_imm_float(&b, 1.0 / M_PI) : - nir_imm_float(&b, 2.0 / M_PI); - - nir_instr_rewrite_src(instr, &alu->src[0].src, - nir_src_for_ssa(nir_fmul(&b, alu->src[0].src.ssa, imm))); - } - - /* change transcendental ops to vec2 and insert vec1 mul for the result - * TODO: do this earlier (but it breaks with optimizations) - */ - if (c->specs->has_new_transcendentals && ( - alu->op == nir_op_fdiv || alu->op == nir_op_flog2 || - alu->op == nir_op_fsin || alu->op == nir_op_fcos)) { - nir_ssa_def *ssa = &alu->dest.dest.ssa; - - assert(ssa->num_components == 1); - - nir_alu_instr *mul = nir_alu_instr_create(shader, nir_op_fmul); - mul->src[0].src = mul->src[1].src = nir_src_for_ssa(ssa); - mul->src[1].swizzle[0] = 1; - - mul->dest.write_mask = 1; - nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, 32, NULL); - - ssa->num_components = 2; - - mul->dest.saturate = alu->dest.saturate; - alu->dest.saturate = 0; - - nir_instr_insert_after(instr, &mul->instr); - - nir_ssa_def_rewrite_uses_after(ssa, nir_src_for_ssa(&mul->dest.dest.ssa), &mul->instr); - } - } - } -} - -static void etna_lower_alu(nir_shader *shader, struct etna_compile *c) -{ - nir_foreach_function(function, shader) { - if (function->impl) - etna_lower_alu_impl(function->impl, c); - } -} - -static void -emit_inst(struct etna_compile *c, struct etna_inst *inst) -{ - c->code[c->inst_ptr++] = *inst; -} - -/* to map nir srcs should to etna_inst srcs */ -enum { - SRC_0_1_2 = (0 << 0) | (1 << 2) | (2 << 4), - SRC_0_1_X = (0 << 0) | (1 << 2) | (3 << 4), - SRC_0_X_X = (0 << 0) | (3 << 2) | (3 << 4), - SRC_0_X_1 = (0 << 0) | (3 << 2) | (1 << 4), - SRC_0_1_0 = (0 << 0) | (1 << 2) | (0 << 4), - SRC_X_X_0 = (3 << 0) | (3 << 2) | (0 << 4), - SRC_0_X_0 = (0 << 0) | (3 << 2) | (0 << 4), -}; - -/* info to translate a nir op to etna_inst */ -struct etna_op_info { - uint8_t opcode; /* INST_OPCODE_ */ - uint8_t src; /* SRC_ enum */ - uint8_t cond; /* INST_CONDITION_ */ - uint8_t type; /* INST_TYPE_ */ -}; - -static const struct etna_op_info etna_ops[] = { - [0 ... nir_num_opcodes - 1] = {0xff}, -#undef TRUE -#undef FALSE -#define OPCT(nir, op, src, cond, type) [nir_op_##nir] = { \ - INST_OPCODE_##op, \ - SRC_##src, \ - INST_CONDITION_##cond, \ - INST_TYPE_##type \ -} -#define OPC(nir, op, src, cond) OPCT(nir, op, src, cond, F32) -#define IOPC(nir, op, src, cond) OPCT(nir, op, src, cond, S32) -#define UOPC(nir, op, src, cond) OPCT(nir, op, src, cond, U32) -#define OP(nir, op, src) OPC(nir, op, src, TRUE) -#define IOP(nir, op, src) IOPC(nir, op, src, TRUE) -#define UOP(nir, op, src) UOPC(nir, op, src, TRUE) - OP(mov, MOV, X_X_0), OP(fneg, MOV, X_X_0), OP(fabs, MOV, X_X_0), OP(fsat, MOV, X_X_0), - OP(fmul, MUL, 0_1_X), OP(fadd, ADD, 0_X_1), OP(ffma, MAD, 0_1_2), - OP(fdot2, DP2, 0_1_X), OP(fdot3, DP3, 0_1_X), OP(fdot4, DP4, 0_1_X), - OPC(fmin, SELECT, 0_1_0, GT), OPC(fmax, SELECT, 0_1_0, LT), - OP(ffract, FRC, X_X_0), OP(frcp, RCP, X_X_0), OP(frsq, RSQ, X_X_0), - OP(fsqrt, SQRT, X_X_0), OP(fsin, SIN, X_X_0), OP(fcos, COS, X_X_0), - OP(fsign, SIGN, X_X_0), OP(ffloor, FLOOR, X_X_0), OP(fceil, CEIL, X_X_0), - OP(flog2, LOG, X_X_0), OP(fexp2, EXP, X_X_0), - OPC(seq, SET, 0_1_X, EQ), OPC(sne, SET, 0_1_X, NE), OPC(sge, SET, 0_1_X, GE), OPC(slt, SET, 0_1_X, LT), - OPC(fcsel, SELECT, 0_1_2, NZ), - OP(fdiv, DIV, 0_1_X), - OP(fddx, DSX, 0_X_0), OP(fddy, DSY, 0_X_0), - - /* type convert */ - IOP(i2f32, I2F, 0_X_X), - UOP(u2f32, I2F, 0_X_X), - IOP(f2i32, F2I, 0_X_X), - UOP(f2u32, F2I, 0_X_X), - UOP(b2f32, AND, 0_X_X), /* AND with fui(1.0f) */ - UOP(b2i32, AND, 0_X_X), /* AND with 1 */ - OPC(f2b32, CMP, 0_X_X, NE), /* != 0.0 */ - UOPC(i2b32, CMP, 0_X_X, NE), /* != 0 */ - - /* arithmetic */ - IOP(iadd, ADD, 0_X_1), - IOP(imul, IMULLO0, 0_1_X), - /* IOP(imad, IMADLO0, 0_1_2), */ - IOP(ineg, ADD, X_X_0), /* ADD 0, -x */ - IOP(iabs, IABS, X_X_0), - IOP(isign, SIGN, X_X_0), - IOPC(imin, SELECT, 0_1_0, GT), - IOPC(imax, SELECT, 0_1_0, LT), - UOPC(umin, SELECT, 0_1_0, GT), - UOPC(umax, SELECT, 0_1_0, LT), - - /* select */ - UOPC(b32csel, SELECT, 0_1_2, NZ), - - /* compare with int result */ - OPC(feq32, CMP, 0_1_X, EQ), - OPC(fne32, CMP, 0_1_X, NE), - OPC(fge32, CMP, 0_1_X, GE), - OPC(flt32, CMP, 0_1_X, LT), - IOPC(ieq32, CMP, 0_1_X, EQ), - IOPC(ine32, CMP, 0_1_X, NE), - IOPC(ige32, CMP, 0_1_X, GE), - IOPC(ilt32, CMP, 0_1_X, LT), - UOPC(uge32, CMP, 0_1_X, GE), - UOPC(ult32, CMP, 0_1_X, LT), - - /* bit ops */ - IOP(ior, OR, 0_X_1), - IOP(iand, AND, 0_X_1), - IOP(ixor, XOR, 0_X_1), - IOP(inot, NOT, X_X_0), - IOP(ishl, LSHIFT, 0_X_1), - IOP(ishr, RSHIFT, 0_X_1), - UOP(ushr, RSHIFT, 0_X_1), -}; - -static void etna_emit_block_start(struct etna_compile *c, unsigned block) { c->block_ptr[block] = c->inst_ptr; } static void -etna_emit_alu(struct etna_compile *c, nir_op op, struct etna_inst_dst dst, - struct etna_inst_src src[3], bool saturate) -{ - struct etna_op_info ei = etna_ops[op]; - unsigned swiz_scalar = INST_SWIZ_BROADCAST(ffs(dst.write_mask) - 1); - - assert(ei.opcode != 0xff); - - struct etna_inst inst = { - .opcode = ei.opcode, - .type = ei.type, - .cond = ei.cond, - .dst = dst, - .sat = saturate, - }; - - switch (op) { - case nir_op_fdiv: - case nir_op_flog2: - case nir_op_fsin: - case nir_op_fcos: - if (c->specs->has_new_transcendentals) - inst.tex.amode = 1; - /* fall through */ - case nir_op_frsq: - case nir_op_frcp: - case nir_op_fexp2: - case nir_op_fsqrt: - case nir_op_imul: - /* scalar instructions we want src to be in x component */ - src[0].swiz = inst_swiz_compose(src[0].swiz, swiz_scalar); - src[1].swiz = inst_swiz_compose(src[1].swiz, swiz_scalar); - break; - /* deal with instructions which don't have 1:1 mapping */ - case nir_op_b2f32: - inst.src[2] = etna_immediate_float(1.0f); - break; - case nir_op_b2i32: - inst.src[2] = etna_immediate_int(1); - break; - case nir_op_f2b32: - inst.src[1] = etna_immediate_float(0.0f); - break; - case nir_op_i2b32: - inst.src[1] = etna_immediate_int(0); - break; - case nir_op_ineg: - inst.src[0] = etna_immediate_int(0); - src[0].neg = 1; - break; - default: - break; - } - - /* set the "true" value for CMP instructions */ - if (inst.opcode == INST_OPCODE_CMP) - inst.src[2] = etna_immediate_int(-1); - - for (unsigned j = 0; j < 3; j++) { - unsigned i = ((ei.src >> j*2) & 3); - if (i < 3) - inst.src[j] = src[i]; - } - - emit_inst(c, &inst); -} - -static void -etna_emit_tex(struct etna_compile *c, nir_texop op, unsigned texid, unsigned dst_swiz, - struct etna_inst_dst dst, struct etna_inst_src coord, - struct etna_inst_src lod_bias, struct etna_inst_src compare) -{ - struct etna_inst inst = { - .dst = dst, - .tex.id = texid + (is_fs(c) ? 0 : c->specs->vertex_sampler_offset), - .tex.swiz = dst_swiz, - .src[0] = coord, - }; - - if (lod_bias.use) - inst.src[1] = lod_bias; - - if (compare.use) - inst.src[2] = compare; - - switch (op) { - case nir_texop_tex: inst.opcode = INST_OPCODE_TEXLD; break; - case nir_texop_txb: inst.opcode = INST_OPCODE_TEXLDB; break; - case nir_texop_txl: inst.opcode = INST_OPCODE_TEXLDL; break; - default: - assert(0); - } - - emit_inst(c, &inst); -} - -static void -etna_emit_jump(struct etna_compile *c, unsigned block, struct etna_inst_src condition) -{ - if (!condition.use) { - emit_inst(c, &(struct etna_inst) {.opcode = INST_OPCODE_BRANCH, .imm = block }); - return; - } - - struct etna_inst inst = { - .opcode = INST_OPCODE_BRANCH, - .cond = INST_CONDITION_NOT, - .type = INST_TYPE_U32, - .src[0] = condition, - .imm = block, - }; - inst.src[0].swiz = INST_SWIZ_BROADCAST(inst.src[0].swiz & 3); - emit_inst(c, &inst); -} - -static void -etna_emit_discard(struct etna_compile *c, struct etna_inst_src condition) -{ - if (!condition.use) { - emit_inst(c, &(struct etna_inst) { .opcode = INST_OPCODE_TEXKILL }); - return; - } - - struct etna_inst inst = { - .opcode = INST_OPCODE_TEXKILL, - .cond = INST_CONDITION_NZ, - .type = (c->specs->halti < 2) ? INST_TYPE_F32 : INST_TYPE_U32, - .src[0] = condition, - }; - inst.src[0].swiz = INST_SWIZ_BROADCAST(inst.src[0].swiz & 3); - emit_inst(c, &inst); -} - -static void etna_emit_output(struct etna_compile *c, nir_variable *var, struct etna_inst_src src) { struct etna_shader_io_file *sf = &c->variant->outfile; @@ -624,7 +136,6 @@ etna_emit_output(struct etna_compile *c, nir_variable *var, struct etna_inst_src NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ this_progress; \ }) -#define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__) static void etna_optimize_loop(nir_shader *s) @@ -633,8 +144,9 @@ etna_optimize_loop(nir_shader *s) do { progress = false; - OPT_V(s, nir_lower_vars_to_ssa); + NIR_PASS_V(s, nir_lower_vars_to_ssa); progress |= OPT(s, nir_opt_copy_prop_vars); + progress |= OPT(s, nir_opt_shrink_vectors, true); progress |= OPT(s, nir_copy_prop); progress |= OPT(s, nir_opt_dce); progress |= OPT(s, nir_opt_cse); @@ -671,19 +183,869 @@ copy_uniform_state_to_shader(struct etna_shader_variant *sobj, uint64_t *consts, { struct etna_shader_uniform_info *uinfo = &sobj->uniforms; - uinfo->imm_count = count * 4; - uinfo->imm_data = MALLOC(uinfo->imm_count * sizeof(*uinfo->imm_data)); - uinfo->imm_contents = MALLOC(uinfo->imm_count * sizeof(*uinfo->imm_contents)); + uinfo->count = count * 4; + uinfo->data = MALLOC(uinfo->count * sizeof(*uinfo->data)); + uinfo->contents = MALLOC(uinfo->count * sizeof(*uinfo->contents)); - for (unsigned i = 0; i < uinfo->imm_count; i++) { - uinfo->imm_data[i] = consts[i]; - uinfo->imm_contents[i] = consts[i] >> 32; + for (unsigned i = 0; i < uinfo->count; i++) { + uinfo->data[i] = consts[i]; + uinfo->contents[i] = consts[i] >> 32; } etna_set_shader_uniforms_dirty_flags(sobj); } -#include "etnaviv_compiler_nir_emit.h" +#define ALU_SWIZ(s) INST_SWIZ((s)->swizzle[0], (s)->swizzle[1], (s)->swizzle[2], (s)->swizzle[3]) +#define SRC_DISABLE ((hw_src){}) +#define SRC_CONST(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_UNIFORM_0, .reg=idx, .swiz=s}) +#define SRC_REG(idx, s) ((hw_src){.use=1, .rgroup = INST_RGROUP_TEMP, .reg=idx, .swiz=s}) + +typedef struct etna_inst_dst hw_dst; +typedef struct etna_inst_src hw_src; + +static inline hw_src +src_swizzle(hw_src src, unsigned swizzle) +{ + if (src.rgroup != INST_RGROUP_IMMEDIATE) + src.swiz = inst_swiz_compose(src.swiz, swizzle); + + return src; +} + +/* constants are represented as 64-bit ints + * 32-bit for the value and 32-bit for the type (imm, uniform, etc) + */ + +#define CONST_VAL(a, b) (nir_const_value) {.u64 = (uint64_t)(a) << 32 | (uint64_t)(b)} +#define CONST(x) CONST_VAL(ETNA_UNIFORM_CONSTANT, x) +#define UNIFORM(x) CONST_VAL(ETNA_UNIFORM_UNIFORM, x) +#define TEXSCALE(x, i) CONST_VAL(ETNA_UNIFORM_TEXRECT_SCALE_X + (i), x) + +static int +const_add(uint64_t *c, uint64_t value) +{ + for (unsigned i = 0; i < 4; i++) { + if (c[i] == value || !c[i]) { + c[i] = value; + return i; + } + } + return -1; +} + +static hw_src +const_src(struct etna_compile *c, nir_const_value *value, unsigned num_components) +{ + /* use inline immediates if possible */ + if (c->specs->halti >= 2 && num_components == 1 && + value[0].u64 >> 32 == ETNA_UNIFORM_CONSTANT) { + uint32_t bits = value[0].u32; + + /* "float" - shifted by 12 */ + if ((bits & 0xfff) == 0) + return etna_immediate_src(0, bits >> 12); + + /* "unsigned" - raw 20 bit value */ + if (bits < (1 << 20)) + return etna_immediate_src(2, bits); + + /* "signed" - sign extended 20-bit (sign included) value */ + if (bits >= 0xfff80000) + return etna_immediate_src(1, bits); + } + + unsigned i; + int swiz = -1; + for (i = 0; swiz < 0; i++) { + uint64_t *a = &c->consts[i*4]; + uint64_t save[4]; + memcpy(save, a, sizeof(save)); + swiz = 0; + for (unsigned j = 0; j < num_components; j++) { + int c = const_add(a, value[j].u64); + if (c < 0) { + memcpy(a, save, sizeof(save)); + swiz = -1; + break; + } + swiz |= c << j * 2; + } + } + + assert(i <= ETNA_MAX_IMM / 4); + c->const_count = MAX2(c->const_count, i); + + return SRC_CONST(i - 1, swiz); +} + +/* how to swizzle when used as a src */ +static const uint8_t +reg_swiz[NUM_REG_TYPES] = { + [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(Y, Y, Y, Y), + [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(Z, Z, Z, Z), + [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, Z, X, Z), + [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(Y, Z, Y, Z), + [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(Y, Z, Y, Z), + [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(W, W, W, W), + [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, W, X, W), + [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(Y, W, Y, W), + [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, W, X), + [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(Z, W, Z, W), + [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(Z, W, Z, W), + [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(Z, W, Z, W), + [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Z, W, X), + [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(Y, Z, W, X), + [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(Y, Z, W, X), +}; + +/* how to swizzle when used as a dest */ +static const uint8_t +reg_dst_swiz[NUM_REG_TYPES] = { + [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(X, X, X, X), + [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(X, X, X, X), + [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, X, Y, Y), + [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(X, X, Y, Y), + [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(X, X, Y, Y), + [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY, + [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(X, X, X, X), + [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, X, Y, Y), + [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(X, X, Y, Y), + [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, Z, Z), + [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(X, X, X, Y), + [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(X, X, X, Y), + [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(X, X, X, Y), + [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Y, Y, Z), + [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(X, X, Y, Z), + [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(X, X, Y, Z), +}; + +/* nir_src to allocated register */ +static hw_src +ra_src(struct etna_compile *c, nir_src *src) +{ + unsigned reg = ra_get_node_reg(c->g, c->live_map[src_index(c->impl, src)]); + return SRC_REG(reg_get_base(c, reg), reg_swiz[reg_get_type(reg)]); +} + +static hw_src +get_src(struct etna_compile *c, nir_src *src) +{ + if (!src->is_ssa) + return ra_src(c, src); + + nir_instr *instr = src->ssa->parent_instr; + + if (instr->pass_flags & BYPASS_SRC) { + assert(instr->type == nir_instr_type_alu); + nir_alu_instr *alu = nir_instr_as_alu(instr); + assert(alu->op == nir_op_mov); + return src_swizzle(get_src(c, &alu->src[0].src), ALU_SWIZ(&alu->src[0])); + } + + switch (instr->type) { + case nir_instr_type_load_const: + return const_src(c, nir_instr_as_load_const(instr)->value, src->ssa->num_components); + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + switch (intr->intrinsic) { + case nir_intrinsic_load_input: + case nir_intrinsic_load_instance_id: + case nir_intrinsic_load_uniform: + case nir_intrinsic_load_ubo: + return ra_src(c, src); + case nir_intrinsic_load_front_face: + return (hw_src) { .use = 1, .rgroup = INST_RGROUP_INTERNAL }; + case nir_intrinsic_load_frag_coord: + return SRC_REG(0, INST_SWIZ_IDENTITY); + case nir_intrinsic_load_texture_rect_scaling: { + int sampler = nir_src_as_int(intr->src[0]); + nir_const_value values[] = { + TEXSCALE(sampler, 0), + TEXSCALE(sampler, 1), + }; + + return src_swizzle(const_src(c, values, 2), SWIZZLE(X,Y,X,X)); + } + default: + compile_error(c, "Unhandled NIR intrinsic type: %s\n", + nir_intrinsic_infos[intr->intrinsic].name); + break; + } + } break; + case nir_instr_type_alu: + case nir_instr_type_tex: + return ra_src(c, src); + case nir_instr_type_ssa_undef: { + /* return zero to deal with broken Blur demo */ + nir_const_value value = CONST(0); + return src_swizzle(const_src(c, &value, 1), SWIZZLE(X,X,X,X)); + } + default: + compile_error(c, "Unhandled NIR instruction type: %d\n", instr->type); + break; + } + + return SRC_DISABLE; +} + +static bool +vec_dest_has_swizzle(nir_alu_instr *vec, nir_ssa_def *ssa) +{ + for (unsigned i = 0; i < 4; i++) { + if (!(vec->dest.write_mask & (1 << i)) || vec->src[i].src.ssa != ssa) + continue; + + if (vec->src[i].swizzle[0] != i) + return true; + } + + /* don't deal with possible bypassed vec/mov chain */ + nir_foreach_use(use_src, ssa) { + nir_instr *instr = use_src->parent_instr; + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + + switch (alu->op) { + case nir_op_mov: + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + return true; + default: + break; + } + } + return false; +} + +/* get allocated dest register for nir_dest + * *p_swiz tells how the components need to be placed into register + */ +static hw_dst +ra_dest(struct etna_compile *c, nir_dest *dest, unsigned *p_swiz) +{ + unsigned swiz = INST_SWIZ_IDENTITY, mask = 0xf; + dest = real_dest(dest, &swiz, &mask); + + unsigned r = ra_get_node_reg(c->g, c->live_map[dest_index(c->impl, dest)]); + unsigned t = reg_get_type(r); + + *p_swiz = inst_swiz_compose(swiz, reg_dst_swiz[t]); + + return (hw_dst) { + .use = 1, + .reg = reg_get_base(c, r), + .write_mask = inst_write_mask_compose(mask, reg_writemask[t]), + }; +} + +static void +emit_alu(struct etna_compile *c, nir_alu_instr * alu) +{ + const nir_op_info *info = &nir_op_infos[alu->op]; + + /* marked as dead instruction (vecN and other bypassed instr) */ + if (alu->instr.pass_flags) + return; + + assert(!(alu->op >= nir_op_vec2 && alu->op <= nir_op_vec4)); + + unsigned dst_swiz; + hw_dst dst = ra_dest(c, &alu->dest.dest, &dst_swiz); + + /* compose alu write_mask with RA write mask */ + if (!alu->dest.dest.is_ssa) + dst.write_mask = inst_write_mask_compose(alu->dest.write_mask, dst.write_mask); + + switch (alu->op) { + case nir_op_fdot2: + case nir_op_fdot3: + case nir_op_fdot4: + /* not per-component - don't compose dst_swiz */ + dst_swiz = INST_SWIZ_IDENTITY; + break; + default: + break; + } + + hw_src srcs[3]; + + for (int i = 0; i < info->num_inputs; i++) { + nir_alu_src *asrc = &alu->src[i]; + hw_src src; + + src = src_swizzle(get_src(c, &asrc->src), ALU_SWIZ(asrc)); + src = src_swizzle(src, dst_swiz); + + if (src.rgroup != INST_RGROUP_IMMEDIATE) { + src.neg = asrc->negate || (alu->op == nir_op_fneg); + src.abs = asrc->abs || (alu->op == nir_op_fabs); + } else { + assert(!asrc->negate && alu->op != nir_op_fneg); + assert(!asrc->abs && alu->op != nir_op_fabs); + } + + srcs[i] = src; + } + + etna_emit_alu(c, alu->op, dst, srcs, alu->dest.saturate || (alu->op == nir_op_fsat)); +} + +static void +emit_tex(struct etna_compile *c, nir_tex_instr * tex) +{ + unsigned dst_swiz; + hw_dst dst = ra_dest(c, &tex->dest, &dst_swiz); + nir_src *coord = NULL, *lod_bias = NULL, *compare = NULL; + + for (unsigned i = 0; i < tex->num_srcs; i++) { + switch (tex->src[i].src_type) { + case nir_tex_src_coord: + coord = &tex->src[i].src; + break; + case nir_tex_src_bias: + case nir_tex_src_lod: + assert(!lod_bias); + lod_bias = &tex->src[i].src; + break; + case nir_tex_src_comparator: + compare = &tex->src[i].src; + break; + default: + compile_error(c, "Unhandled NIR tex src type: %d\n", + tex->src[i].src_type); + break; + } + } + + etna_emit_tex(c, tex->op, tex->sampler_index, dst_swiz, dst, get_src(c, coord), + lod_bias ? get_src(c, lod_bias) : SRC_DISABLE, + compare ? get_src(c, compare) : SRC_DISABLE); +} + +static void +emit_intrinsic(struct etna_compile *c, nir_intrinsic_instr * intr) +{ + switch (intr->intrinsic) { + case nir_intrinsic_store_deref: + etna_emit_output(c, nir_src_as_deref(intr->src[0])->var, get_src(c, &intr->src[1])); + break; + case nir_intrinsic_discard_if: + etna_emit_discard(c, get_src(c, &intr->src[0])); + break; + case nir_intrinsic_discard: + etna_emit_discard(c, SRC_DISABLE); + break; + case nir_intrinsic_load_uniform: { + unsigned dst_swiz; + struct etna_inst_dst dst = ra_dest(c, &intr->dest, &dst_swiz); + + /* TODO: rework so extra MOV isn't required, load up to 4 addresses at once */ + emit_inst(c, &(struct etna_inst) { + .opcode = INST_OPCODE_MOVAR, + .dst.write_mask = 0x1, + .src[2] = get_src(c, &intr->src[0]), + }); + emit_inst(c, &(struct etna_inst) { + .opcode = INST_OPCODE_MOV, + .dst = dst, + .src[2] = { + .use = 1, + .rgroup = INST_RGROUP_UNIFORM_0, + .reg = nir_intrinsic_base(intr), + .swiz = dst_swiz, + .amode = INST_AMODE_ADD_A_X, + }, + }); + } break; + case nir_intrinsic_load_ubo: { + /* TODO: if offset is of the form (x + C) then add C to the base instead */ + unsigned idx = nir_src_as_const_value(intr->src[0])[0].u32; + unsigned dst_swiz; + emit_inst(c, &(struct etna_inst) { + .opcode = INST_OPCODE_LOAD, + .type = INST_TYPE_U32, + .dst = ra_dest(c, &intr->dest, &dst_swiz), + .src[0] = get_src(c, &intr->src[1]), + .src[1] = const_src(c, &CONST_VAL(ETNA_UNIFORM_UBO0_ADDR + idx, 0), 1), + }); + } break; + case nir_intrinsic_load_front_face: + case nir_intrinsic_load_frag_coord: + assert(intr->dest.is_ssa); /* TODO - lower phis could cause this */ + break; + case nir_intrinsic_load_input: + case nir_intrinsic_load_instance_id: + case nir_intrinsic_load_texture_rect_scaling: + break; + default: + compile_error(c, "Unhandled NIR intrinsic type: %s\n", + nir_intrinsic_infos[intr->intrinsic].name); + } +} + +static void +emit_instr(struct etna_compile *c, nir_instr * instr) +{ + switch (instr->type) { + case nir_instr_type_alu: + emit_alu(c, nir_instr_as_alu(instr)); + break; + case nir_instr_type_tex: + emit_tex(c, nir_instr_as_tex(instr)); + break; + case nir_instr_type_intrinsic: + emit_intrinsic(c, nir_instr_as_intrinsic(instr)); + break; + case nir_instr_type_jump: + assert(nir_instr_is_last(instr)); + case nir_instr_type_load_const: + case nir_instr_type_ssa_undef: + case nir_instr_type_deref: + break; + default: + compile_error(c, "Unhandled NIR instruction type: %d\n", instr->type); + break; + } +} + +static void +emit_block(struct etna_compile *c, nir_block * block) +{ + etna_emit_block_start(c, block->index); + + nir_foreach_instr(instr, block) + emit_instr(c, instr); + + /* succs->index < block->index is for the loop case */ + nir_block *succs = block->successors[0]; + if (nir_block_ends_in_jump(block) || succs->index < block->index) + etna_emit_jump(c, succs->index, SRC_DISABLE); +} + +static void +emit_cf_list(struct etna_compile *c, struct exec_list *list); + +static void +emit_if(struct etna_compile *c, nir_if * nif) +{ + etna_emit_jump(c, nir_if_first_else_block(nif)->index, get_src(c, &nif->condition)); + emit_cf_list(c, &nif->then_list); + + /* jump at end of then_list to skip else_list + * not needed if then_list already ends with a jump or else_list is empty + */ + if (!nir_block_ends_in_jump(nir_if_last_then_block(nif)) && + !nir_cf_list_is_empty_block(&nif->else_list)) + etna_emit_jump(c, nir_if_last_else_block(nif)->successors[0]->index, SRC_DISABLE); + + emit_cf_list(c, &nif->else_list); +} + +static void +emit_cf_list(struct etna_compile *c, struct exec_list *list) +{ + foreach_list_typed(nir_cf_node, node, node, list) { + switch (node->type) { + case nir_cf_node_block: + emit_block(c, nir_cf_node_as_block(node)); + break; + case nir_cf_node_if: + emit_if(c, nir_cf_node_as_if(node)); + break; + case nir_cf_node_loop: + emit_cf_list(c, &nir_cf_node_as_loop(node)->body); + break; + default: + compile_error(c, "Unknown NIR node type\n"); + break; + } + } +} + +/* based on nir_lower_vec_to_movs */ +static unsigned +insert_vec_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) +{ + assert(start_idx < nir_op_infos[vec->op].num_inputs); + unsigned write_mask = (1u << start_idx); + + nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_mov); + nir_alu_src_copy(&mov->src[0], &vec->src[start_idx], mov); + + mov->src[0].swizzle[0] = vec->src[start_idx].swizzle[0]; + mov->src[0].negate = vec->src[start_idx].negate; + mov->src[0].abs = vec->src[start_idx].abs; + + unsigned num_components = 1; + + for (unsigned i = start_idx + 1; i < 4; i++) { + if (!(vec->dest.write_mask & (1 << i))) + continue; + + if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src) && + vec->src[i].negate == vec->src[start_idx].negate && + vec->src[i].abs == vec->src[start_idx].abs) { + write_mask |= (1 << i); + mov->src[0].swizzle[num_components] = vec->src[i].swizzle[0]; + num_components++; + } + } + + mov->dest.write_mask = (1 << num_components) - 1; + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, 32, NULL); + + /* replace vec srcs with inserted mov */ + for (unsigned i = 0, j = 0; i < 4; i++) { + if (!(write_mask & (1 << i))) + continue; + + nir_instr_rewrite_src(&vec->instr, &vec->src[i].src, nir_src_for_ssa(&mov->dest.dest.ssa)); + vec->src[i].swizzle[0] = j++; + } + + nir_instr_insert_before(&vec->instr, &mov->instr); + + return write_mask; +} + +/* + * for vecN instructions: + * -merge constant sources into a single src + * -insert movs (nir_lower_vec_to_movs equivalent) + * for non-vecN instructions: + * -try to merge constants as single constant + * -insert movs for multiple constants (pre-HALTI5) + */ +static void +lower_alu(struct etna_compile *c, nir_alu_instr *alu) +{ + const nir_op_info *info = &nir_op_infos[alu->op]; + + nir_builder b; + nir_builder_init(&b, c->impl); + b.cursor = nir_before_instr(&alu->instr); + + switch (alu->op) { + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + break; + default: + /* pre-GC7000L can only have 1 uniform src per instruction */ + if (c->specs->halti >= 5) + return; + + nir_const_value value[4] = {}; + uint8_t swizzle[4][4] = {}; + unsigned swiz_max = 0, num_const = 0; + + for (unsigned i = 0; i < info->num_inputs; i++) { + nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); + if (!cv) + continue; + + unsigned num_components = info->input_sizes[i] ?: alu->dest.dest.ssa.num_components; + for (unsigned j = 0; j < num_components; j++) { + int idx = const_add(&value[0].u64, cv[alu->src[i].swizzle[j]].u64); + swizzle[i][j] = idx; + swiz_max = MAX2(swiz_max, (unsigned) idx); + } + num_const++; + } + + /* nothing to do */ + if (num_const <= 1) + return; + + /* resolve with single combined const src */ + if (swiz_max < 4) { + nir_ssa_def *def = nir_build_imm(&b, swiz_max + 1, 32, value); + + for (unsigned i = 0; i < info->num_inputs; i++) { + nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); + if (!cv) + continue; + + nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def)); + + for (unsigned j = 0; j < 4; j++) + alu->src[i].swizzle[j] = swizzle[i][j]; + } + return; + } + + /* resolve with movs */ + num_const = 0; + for (unsigned i = 0; i < info->num_inputs; i++) { + nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); + if (!cv) + continue; + + num_const++; + if (num_const == 1) + continue; + + nir_ssa_def *mov = nir_mov(&b, alu->src[i].src.ssa); + nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(mov)); + } + return; + } + + nir_const_value value[4]; + unsigned num_components = 0; + + for (unsigned i = 0; i < info->num_inputs; i++) { + nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); + if (cv) + value[num_components++] = cv[alu->src[i].swizzle[0]]; + } + + /* if there is more than one constant source to the vecN, combine them + * into a single load_const (removing the vecN completely if all components + * are constant) + */ + if (num_components > 1) { + nir_ssa_def *def = nir_build_imm(&b, num_components, 32, value); + + if (num_components == info->num_inputs) { + nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, def); + nir_instr_remove(&alu->instr); + return; + } + + for (unsigned i = 0, j = 0; i < info->num_inputs; i++) { + nir_const_value *cv = nir_src_as_const_value(alu->src[i].src); + if (!cv) + continue; + + nir_instr_rewrite_src(&alu->instr, &alu->src[i].src, nir_src_for_ssa(def)); + alu->src[i].swizzle[0] = j++; + } + } + + unsigned finished_write_mask = 0; + for (unsigned i = 0; i < 4; i++) { + if (!(alu->dest.write_mask & (1 << i))) + continue; + + nir_ssa_def *ssa = alu->src[i].src.ssa; + + /* check that vecN instruction is only user of this */ + bool need_mov = list_length(&ssa->if_uses) != 0; + nir_foreach_use(use_src, ssa) { + if (use_src->parent_instr != &alu->instr) + need_mov = true; + } + + nir_instr *instr = ssa->parent_instr; + switch (instr->type) { + case nir_instr_type_alu: + case nir_instr_type_tex: + break; + case nir_instr_type_intrinsic: + if (nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_input) { + need_mov = vec_dest_has_swizzle(alu, &nir_instr_as_intrinsic(instr)->dest.ssa); + break; + } + FALLTHROUGH; + default: + need_mov = true; + } + + if (need_mov && !(finished_write_mask & (1 << i))) + finished_write_mask |= insert_vec_mov(alu, i, c->nir); + } +} + +static bool +emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts) +{ + nir_shader *shader = c->nir; + c->impl = nir_shader_get_entrypoint(shader); + + bool have_indirect_uniform = false; + unsigned indirect_max = 0; + + nir_builder b; + nir_builder_init(&b, c->impl); + + /* convert non-dynamic uniform loads to constants, etc */ + nir_foreach_block(block, c->impl) { + nir_foreach_instr_safe(instr, block) { + switch(instr->type) { + case nir_instr_type_alu: + /* deals with vecN and const srcs */ + lower_alu(c, nir_instr_as_alu(instr)); + break; + case nir_instr_type_load_const: { + nir_load_const_instr *load_const = nir_instr_as_load_const(instr); + for (unsigned i = 0; i < load_const->def.num_components; i++) + load_const->value[i] = CONST(load_const->value[i].u32); + } break; + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + /* TODO: load_ubo can also become a constant in some cases + * (at the moment it can end up emitting a LOAD with two + * uniform sources, which could be a problem on HALTI2) + */ + if (intr->intrinsic != nir_intrinsic_load_uniform) + break; + nir_const_value *off = nir_src_as_const_value(intr->src[0]); + if (!off || off[0].u64 >> 32 != ETNA_UNIFORM_CONSTANT) { + have_indirect_uniform = true; + indirect_max = nir_intrinsic_base(intr) + nir_intrinsic_range(intr); + break; + } + + unsigned base = nir_intrinsic_base(intr); + /* pre halti2 uniform offset will be float */ + if (c->specs->halti < 2) + base += (unsigned) off[0].f32; + else + base += off[0].u32; + nir_const_value value[4]; + + for (unsigned i = 0; i < intr->dest.ssa.num_components; i++) + value[i] = UNIFORM(base * 4 + i); + + b.cursor = nir_after_instr(instr); + nir_ssa_def *def = nir_build_imm(&b, intr->dest.ssa.num_components, 32, value); + + nir_ssa_def_rewrite_uses(&intr->dest.ssa, def); + nir_instr_remove(instr); + } break; + default: + break; + } + } + } + + /* TODO: only emit required indirect uniform ranges */ + if (have_indirect_uniform) { + for (unsigned i = 0; i < indirect_max * 4; i++) + c->consts[i] = UNIFORM(i).u64; + c->const_count = indirect_max; + } + + /* add mov for any store output using sysval/const */ + nir_foreach_block(block, c->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + switch (intr->intrinsic) { + case nir_intrinsic_store_deref: { + nir_src *src = &intr->src[1]; + if (nir_src_is_const(*src) || is_sysval(src->ssa->parent_instr)) { + b.cursor = nir_before_instr(instr); + nir_instr_rewrite_src(instr, src, nir_src_for_ssa(nir_mov(&b, src->ssa))); + } + } break; + default: + break; + } + } + } + + /* call directly to avoid validation (load_const don't pass validation at this point) */ + nir_convert_from_ssa(shader, true); + nir_opt_dce(shader); + + etna_ra_assign(c, shader); + + emit_cf_list(c, &nir_shader_get_entrypoint(shader)->body); + + *num_temps = etna_ra_finish(c); + *num_consts = c->const_count; + return true; +} + +static bool +etna_compile_check_limits(struct etna_shader_variant *v) +{ + const struct etna_specs *specs = v->shader->specs; + int max_uniforms = (v->stage == MESA_SHADER_VERTEX) + ? specs->max_vs_uniforms + : specs->max_ps_uniforms; + + if (!specs->has_icache && v->needs_icache) { + DBG("Number of instructions (%d) exceeds maximum %d", v->code_size / 4, + specs->max_instructions); + return false; + } + + if (v->num_temps > specs->max_registers) { + DBG("Number of registers (%d) exceeds maximum %d", v->num_temps, + specs->max_registers); + return false; + } + + if (v->uniforms.count / 4 > max_uniforms) { + DBG("Number of uniforms (%d) exceeds maximum %d", + v->uniforms.count / 4, max_uniforms); + return false; + } + + return true; +} + +static void +fill_vs_mystery(struct etna_shader_variant *v) +{ + const struct etna_specs *specs = v->shader->specs; + + v->input_count_unk8 = DIV_ROUND_UP(v->infile.num_reg + 4, 16); /* XXX what is this */ + + /* fill in "mystery meat" load balancing value. This value determines how + * work is scheduled between VS and PS + * in the unified shader architecture. More precisely, it is determined from + * the number of VS outputs, as well as chip-specific + * vertex output buffer size, vertex cache size, and the number of shader + * cores. + * + * XXX this is a conservative estimate, the "optimal" value is only known for + * sure at link time because some + * outputs may be unused and thus unmapped. Then again, in the general use + * case with GLSL the vertex and fragment + * shaders are linked already before submitting to Gallium, thus all outputs + * are used. + * + * note: TGSI compiler counts all outputs (including position and pointsize), here + * v->outfile.num_reg only counts varyings, +1 to compensate for the position output + * TODO: might have a problem that we don't count pointsize when it is used + */ + + int half_out = v->outfile.num_reg / 2 + 1; + assert(half_out); + + uint32_t b = ((20480 / (specs->vertex_output_buffer_size - + 2 * half_out * specs->vertex_cache_size)) + + 9) / + 10; + uint32_t a = (b + 256 / (specs->shader_core_count * half_out)) / 2; + v->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) | + VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) | + VIVS_VS_LOAD_BALANCING_C(0x3f) | + VIVS_VS_LOAD_BALANCING_D(0x0f); +} bool etna_compile_shader_nir(struct etna_shader_variant *v) @@ -703,6 +1065,7 @@ etna_compile_shader_nir(struct etna_shader_variant *v) const struct etna_specs *specs = c->specs; v->stage = s->info.stage; + v->uses_discard = s->info.fs.uses_discard; v->num_loops = 0; /* TODO */ v->vs_id_in_reg = -1; v->vs_pos_out_reg = -1; @@ -710,10 +1073,19 @@ etna_compile_shader_nir(struct etna_shader_variant *v) v->ps_color_out_reg = 0; /* 0 for shader that doesn't write fragcolor.. */ v->ps_depth_out_reg = -1; + /* + * Lower glTexCoord, fixes e.g. neverball point sprite (exit cylinder stars) + * and gl4es pointsprite.trace apitrace + */ + if (s->info.stage == MESA_SHADER_FRAGMENT && v->key.sprite_coord_enable) { + NIR_PASS_V(s, nir_lower_texcoord_replace, v->key.sprite_coord_enable, + false, v->key.sprite_coord_yinvert); + } + /* setup input linking */ struct etna_shader_io_file *sf = &v->infile; if (s->info.stage == MESA_SHADER_VERTEX) { - nir_foreach_variable(var, &s->inputs) { + nir_foreach_shader_in_variable(var, s) { unsigned idx = var->data.driver_location; sf->reg[idx].reg = idx; sf->reg[idx].slot = var->data.location; @@ -722,7 +1094,7 @@ etna_compile_shader_nir(struct etna_shader_variant *v) } } else { unsigned count = 0; - nir_foreach_variable(var, &s->inputs) { + nir_foreach_shader_in_variable(var, s) { unsigned idx = var->data.driver_location; sf->reg[idx].reg = idx + 1; sf->reg[idx].slot = var->data.location; @@ -733,18 +1105,27 @@ etna_compile_shader_nir(struct etna_shader_variant *v) assert(sf->num_reg == count); } - NIR_PASS_V(s, nir_lower_io, ~nir_var_shader_out, etna_glsl_type_size, + NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_uniform, etna_glsl_type_size, (nir_lower_io_options)0); - OPT_V(s, nir_lower_regs_to_ssa); - OPT_V(s, nir_lower_vars_to_ssa); - OPT_V(s, nir_lower_indirect_derefs, nir_var_all); - OPT_V(s, nir_lower_tex, &(struct nir_lower_tex_options) { .lower_txp = ~0u }); - OPT_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs); + NIR_PASS_V(s, nir_lower_regs_to_ssa); + NIR_PASS_V(s, nir_lower_vars_to_ssa); + NIR_PASS_V(s, nir_lower_indirect_derefs, nir_var_all, UINT32_MAX); + NIR_PASS_V(s, nir_lower_tex, &(struct nir_lower_tex_options) { .lower_txp = ~0u }); + NIR_PASS_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs); + nir_lower_idiv_options idiv_options = { + .imprecise_32bit_lowering = true, + .allow_fp16 = true, + }; + NIR_PASS_V(s, nir_lower_idiv, &idiv_options); etna_optimize_loop(s); - OPT_V(s, etna_lower_io, v); + /* TODO: remove this extra run if nir_opt_peephole_select is able to handle ubo's. */ + if (OPT(s, etna_nir_lower_ubo_to_uniform)) + etna_optimize_loop(s); + + NIR_PASS_V(s, etna_lower_io, v); if (v->shader->specs->vs_need_z_div) NIR_PASS_V(s, nir_lower_clip_halfz); @@ -754,23 +1135,17 @@ etna_compile_shader_nir(struct etna_shader_variant *v) /* use opt_algebraic between int_to_float and boot_to_float because * int_to_float emits ftrunc, and ftrunc lowering generates bool ops */ - OPT_V(s, nir_lower_int_to_float); - OPT_V(s, nir_opt_algebraic); - OPT_V(s, nir_lower_bool_to_float); + NIR_PASS_V(s, nir_lower_int_to_float); + NIR_PASS_V(s, nir_opt_algebraic); + NIR_PASS_V(s, nir_lower_bool_to_float); } else { - OPT_V(s, nir_lower_idiv, nir_lower_idiv_fast); - OPT_V(s, nir_lower_bool_to_int32); + NIR_PASS_V(s, nir_lower_bool_to_int32); } - etna_optimize_loop(s); - - if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS)) - nir_print_shader(s, stdout); - - while( OPT(s, nir_opt_vectorize) ); - OPT_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs); + while( OPT(s, nir_opt_vectorize, NULL, NULL) ); + NIR_PASS_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs); - NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp); + NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL); NIR_PASS_V(s, nir_opt_algebraic_late); NIR_PASS_V(s, nir_move_vec_src_uses_to_dest); @@ -783,7 +1158,8 @@ etna_compile_shader_nir(struct etna_shader_variant *v) NIR_PASS_V(s, nir_opt_dce); - NIR_PASS_V(s, etna_lower_alu, c); + NIR_PASS_V(s, nir_lower_bool_to_bitsize); + NIR_PASS_V(s, etna_lower_alu, c->specs->has_new_transcendentals); if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS)) nir_print_shader(s, stdout); @@ -819,109 +1195,14 @@ etna_compile_shader_nir(struct etna_shader_variant *v) if (s->info.stage == MESA_SHADER_FRAGMENT) { v->input_count_unk8 = 31; /* XXX what is this */ assert(v->ps_depth_out_reg <= 0); - ralloc_free(c->nir); - FREE(c); - return true; + } else { + fill_vs_mystery(v); } - v->input_count_unk8 = DIV_ROUND_UP(v->infile.num_reg + 4, 16); /* XXX what is this */ - - /* fill in "mystery meat" load balancing value. This value determines how - * work is scheduled between VS and PS - * in the unified shader architecture. More precisely, it is determined from - * the number of VS outputs, as well as chip-specific - * vertex output buffer size, vertex cache size, and the number of shader - * cores. - * - * XXX this is a conservative estimate, the "optimal" value is only known for - * sure at link time because some - * outputs may be unused and thus unmapped. Then again, in the general use - * case with GLSL the vertex and fragment - * shaders are linked already before submitting to Gallium, thus all outputs - * are used. - * - * note: TGSI compiler counts all outputs (including position and pointsize), here - * v->outfile.num_reg only counts varyings, +1 to compensate for the position output - * TODO: might have a problem that we don't count pointsize when it is used - */ - - int half_out = v->outfile.num_reg / 2 + 1; - assert(half_out); - - uint32_t b = ((20480 / (specs->vertex_output_buffer_size - - 2 * half_out * specs->vertex_cache_size)) + - 9) / - 10; - uint32_t a = (b + 256 / (specs->shader_core_count * half_out)) / 2; - v->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) | - VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) | - VIVS_VS_LOAD_BALANCING_C(0x3f) | - VIVS_VS_LOAD_BALANCING_D(0x0f); - + bool result = etna_compile_check_limits(v); ralloc_free(c->nir); FREE(c); - return true; -} - -void -etna_destroy_shader_nir(struct etna_shader_variant *shader) -{ - assert(shader); - - FREE(shader->code); - FREE(shader->uniforms.imm_data); - FREE(shader->uniforms.imm_contents); - FREE(shader); -} - -extern const char *tgsi_swizzle_names[]; -void -etna_dump_shader_nir(const struct etna_shader_variant *shader) -{ - if (shader->stage == MESA_SHADER_VERTEX) - printf("VERT\n"); - else - printf("FRAG\n"); - - etna_disasm(shader->code, shader->code_size, PRINT_RAW); - - printf("num loops: %i\n", shader->num_loops); - printf("num temps: %i\n", shader->num_temps); - printf("immediates:\n"); - for (int idx = 0; idx < shader->uniforms.imm_count; ++idx) { - printf(" [%i].%s = %f (0x%08x) (%d)\n", - idx / 4, - tgsi_swizzle_names[idx % 4], - *((float *)&shader->uniforms.imm_data[idx]), - shader->uniforms.imm_data[idx], - shader->uniforms.imm_contents[idx]); - } - printf("inputs:\n"); - for (int idx = 0; idx < shader->infile.num_reg; ++idx) { - printf(" [%i] name=%s comps=%i\n", shader->infile.reg[idx].reg, - (shader->stage == MESA_SHADER_VERTEX) ? - gl_vert_attrib_name(shader->infile.reg[idx].slot) : - gl_varying_slot_name(shader->infile.reg[idx].slot), - shader->infile.reg[idx].num_components); - } - printf("outputs:\n"); - for (int idx = 0; idx < shader->outfile.num_reg; ++idx) { - printf(" [%i] name=%s comps=%i\n", shader->outfile.reg[idx].reg, - (shader->stage == MESA_SHADER_VERTEX) ? - gl_varying_slot_name(shader->outfile.reg[idx].slot) : - gl_frag_result_name(shader->outfile.reg[idx].slot), - shader->outfile.reg[idx].num_components); - } - printf("special:\n"); - if (shader->stage == MESA_SHADER_VERTEX) { - printf(" vs_pos_out_reg=%i\n", shader->vs_pos_out_reg); - printf(" vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg); - printf(" vs_load_balancing=0x%08x\n", shader->vs_load_balancing); - } else { - printf(" ps_color_out_reg=%i\n", shader->ps_color_out_reg); - printf(" ps_depth_out_reg=%i\n", shader->ps_depth_out_reg); - } - printf(" input_count_unk8=0x%08x\n", shader->input_count_unk8); + return result; } static const struct etna_shader_inout * @@ -973,7 +1254,7 @@ etna_link_shader_nir(struct etna_shader_link_info *info, varying->use[2] = VARYING_COMPONENT_USE_UNUSED; varying->use[3] = VARYING_COMPONENT_USE_UNUSED; - /* point coord is an input to the PS without matching VS output, + /* point/tex coord is an input to the PS without matching VS output, * so it gets a varying slot without being assigned a VS register. */ if (fsio->slot == VARYING_SLOT_PNTC) { @@ -981,6 +1262,13 @@ etna_link_shader_nir(struct etna_shader_link_info *info, varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y; info->pcoord_varying_comp_ofs = comp_ofs; + } else if (util_varying_is_point_coord(fsio->slot, fs->key.sprite_coord_enable)) { + /* + * Do nothing, TexCoord is lowered to PointCoord above + * and the TexCoord here is just a remnant. This needs + * to be removed with some nir_remove_dead_variables(), + * but that one removes all FS inputs ... why? + */ } else { if (vsio == NULL) { /* not found -- link error */ BUG("Semantic value not found in vertex shader outputs\n"); diff --git a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.h b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.h new file mode 100644 index 000000000..149532b64 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.h @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2020 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jonathan Marek <jonathan@marek.ca> + */ + +#ifndef H_ETNAVIV_COMPILER_NIR +#define H_ETNAVIV_COMPILER_NIR + +#include "compiler/nir/nir.h" +#include "etnaviv_asm.h" +#include "etnaviv_compiler.h" +#include "util/compiler.h" + +struct etna_compile { + nir_shader *nir; + nir_function_impl *impl; +#define is_fs(c) ((c)->nir->info.stage == MESA_SHADER_FRAGMENT) + const struct etna_specs *specs; + struct etna_shader_variant *variant; + + /* block # to instr index */ + unsigned *block_ptr; + + /* Code generation */ + int inst_ptr; /* current instruction pointer */ + struct etna_inst code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE]; + + /* constants */ + uint64_t consts[ETNA_MAX_IMM]; + unsigned const_count; + + /* ra state */ + struct ra_graph *g; + unsigned *live_map; + unsigned num_nodes; + + /* There was an error during compilation */ + bool error; +}; + +#define compile_error(ctx, args...) ({ \ + printf(args); \ + ctx->error = true; \ + assert(0); \ +}) + +enum { + BYPASS_DST = 1, + BYPASS_SRC = 2, +}; + +static inline bool is_sysval(nir_instr *instr) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + return intr->intrinsic == nir_intrinsic_load_front_face || + intr->intrinsic == nir_intrinsic_load_frag_coord; +} + +/* get unique ssa/reg index for nir_src */ +static inline unsigned +src_index(nir_function_impl *impl, nir_src *src) +{ + return src->is_ssa ? src->ssa->index : (src->reg.reg->index + impl->ssa_alloc); +} + +/* get unique ssa/reg index for nir_dest */ +static inline unsigned +dest_index(nir_function_impl *impl, nir_dest *dest) +{ + return dest->is_ssa ? dest->ssa.index : (dest->reg.reg->index + impl->ssa_alloc); +} + +static inline void +update_swiz_mask(nir_alu_instr *alu, nir_dest *dest, unsigned *swiz, unsigned *mask) +{ + if (!swiz) + return; + + bool is_vec = dest != NULL; + unsigned swizzle = 0, write_mask = 0; + for (unsigned i = 0; i < 4; i++) { + /* channel not written */ + if (!(alu->dest.write_mask & (1 << i))) + continue; + /* src is different (only check for vecN) */ + if (is_vec && alu->src[i].src.ssa != &dest->ssa) + continue; + + unsigned src_swiz = is_vec ? alu->src[i].swizzle[0] : alu->src[0].swizzle[i]; + swizzle |= (*swiz >> src_swiz * 2 & 3) << i * 2; + /* this channel isn't written through this chain */ + if (*mask & (1 << src_swiz)) + write_mask |= 1 << i; + } + *swiz = swizzle; + *mask = write_mask; +} + +static nir_dest * +real_dest(nir_dest *dest, unsigned *swiz, unsigned *mask) +{ + if (!dest || !dest->is_ssa) + return dest; + + bool can_bypass_src = !list_length(&dest->ssa.if_uses); + nir_instr *p_instr = dest->ssa.parent_instr; + + /* if used by a vecN, the "real" destination becomes the vecN destination + * lower_alu guarantees that values used by a vecN are only used by that vecN + * we can apply the same logic to movs in a some cases too + */ + nir_foreach_use(use_src, &dest->ssa) { + nir_instr *instr = use_src->parent_instr; + + /* src bypass check: for now only deal with tex src mov case + * note: for alu don't bypass mov for multiple uniform sources + */ + switch (instr->type) { + case nir_instr_type_tex: + if (p_instr->type == nir_instr_type_alu && + nir_instr_as_alu(p_instr)->op == nir_op_mov) { + break; + } + FALLTHROUGH; + default: + can_bypass_src = false; + break; + } + + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + + switch (alu->op) { + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + assert(list_length(&dest->ssa.if_uses) == 0); + nir_foreach_use(use_src, &dest->ssa) + assert(use_src->parent_instr == instr); + + update_swiz_mask(alu, dest, swiz, mask); + break; + case nir_op_mov: { + switch (dest->ssa.parent_instr->type) { + case nir_instr_type_alu: + case nir_instr_type_tex: + break; + default: + continue; + } + if (list_length(&dest->ssa.if_uses) || list_length(&dest->ssa.uses) > 1) + continue; + + update_swiz_mask(alu, NULL, swiz, mask); + break; + }; + default: + continue; + } + + assert(!(instr->pass_flags & BYPASS_SRC)); + instr->pass_flags |= BYPASS_DST; + return real_dest(&alu->dest.dest, swiz, mask); + } + + if (can_bypass_src && !(p_instr->pass_flags & BYPASS_DST)) { + p_instr->pass_flags |= BYPASS_SRC; + return NULL; + } + + return dest; +} + +/* if instruction dest needs a register, return nir_dest for it */ +static inline nir_dest * +dest_for_instr(nir_instr *instr) +{ + nir_dest *dest = NULL; + + switch (instr->type) { + case nir_instr_type_alu: + dest = &nir_instr_as_alu(instr)->dest.dest; + break; + case nir_instr_type_tex: + dest = &nir_instr_as_tex(instr)->dest; + break; + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic == nir_intrinsic_load_uniform || + intr->intrinsic == nir_intrinsic_load_ubo || + intr->intrinsic == nir_intrinsic_load_input || + intr->intrinsic == nir_intrinsic_load_instance_id || + intr->intrinsic == nir_intrinsic_load_texture_rect_scaling) + dest = &intr->dest; + } break; + case nir_instr_type_deref: + return NULL; + default: + break; + } + return real_dest(dest, NULL, NULL); +} + +struct live_def { + nir_instr *instr; + nir_dest *dest; /* cached dest_for_instr */ + unsigned live_start, live_end; /* live range */ +}; + +unsigned +etna_live_defs(nir_function_impl *impl, struct live_def *defs, unsigned *live_map); + +/* Swizzles and write masks can be used to layer virtual non-interfering + * registers on top of the real VEC4 registers. For example, the virtual + * VEC3_XYZ register and the virtual SCALAR_W register that use the same + * physical VEC4 base register do not interfere. + */ +enum reg_class { + REG_CLASS_VIRT_SCALAR, + REG_CLASS_VIRT_VEC2, + REG_CLASS_VIRT_VEC3, + REG_CLASS_VEC4, + /* special vec2 class for fast transcendentals, limited to XY or ZW */ + REG_CLASS_VIRT_VEC2T, + /* special classes for LOAD - contiguous components */ + REG_CLASS_VIRT_VEC2C, + REG_CLASS_VIRT_VEC3C, + NUM_REG_CLASSES, +}; + +enum reg_type { + REG_TYPE_VEC4, + REG_TYPE_VIRT_VEC3_XYZ, + REG_TYPE_VIRT_VEC3_XYW, + REG_TYPE_VIRT_VEC3_XZW, + REG_TYPE_VIRT_VEC3_YZW, + REG_TYPE_VIRT_VEC2_XY, + REG_TYPE_VIRT_VEC2_XZ, + REG_TYPE_VIRT_VEC2_XW, + REG_TYPE_VIRT_VEC2_YZ, + REG_TYPE_VIRT_VEC2_YW, + REG_TYPE_VIRT_VEC2_ZW, + REG_TYPE_VIRT_SCALAR_X, + REG_TYPE_VIRT_SCALAR_Y, + REG_TYPE_VIRT_SCALAR_Z, + REG_TYPE_VIRT_SCALAR_W, + REG_TYPE_VIRT_VEC2T_XY, + REG_TYPE_VIRT_VEC2T_ZW, + REG_TYPE_VIRT_VEC2C_XY, + REG_TYPE_VIRT_VEC2C_YZ, + REG_TYPE_VIRT_VEC2C_ZW, + REG_TYPE_VIRT_VEC3C_XYZ, + REG_TYPE_VIRT_VEC3C_YZW, + NUM_REG_TYPES, +}; + +/* writemask when used as dest */ +static const uint8_t +reg_writemask[NUM_REG_TYPES] = { + [REG_TYPE_VEC4] = 0xf, + [REG_TYPE_VIRT_SCALAR_X] = 0x1, + [REG_TYPE_VIRT_SCALAR_Y] = 0x2, + [REG_TYPE_VIRT_VEC2_XY] = 0x3, + [REG_TYPE_VIRT_VEC2T_XY] = 0x3, + [REG_TYPE_VIRT_VEC2C_XY] = 0x3, + [REG_TYPE_VIRT_SCALAR_Z] = 0x4, + [REG_TYPE_VIRT_VEC2_XZ] = 0x5, + [REG_TYPE_VIRT_VEC2_YZ] = 0x6, + [REG_TYPE_VIRT_VEC2C_YZ] = 0x6, + [REG_TYPE_VIRT_VEC3_XYZ] = 0x7, + [REG_TYPE_VIRT_VEC3C_XYZ] = 0x7, + [REG_TYPE_VIRT_SCALAR_W] = 0x8, + [REG_TYPE_VIRT_VEC2_XW] = 0x9, + [REG_TYPE_VIRT_VEC2_YW] = 0xa, + [REG_TYPE_VIRT_VEC3_XYW] = 0xb, + [REG_TYPE_VIRT_VEC2_ZW] = 0xc, + [REG_TYPE_VIRT_VEC2T_ZW] = 0xc, + [REG_TYPE_VIRT_VEC2C_ZW] = 0xc, + [REG_TYPE_VIRT_VEC3_XZW] = 0xd, + [REG_TYPE_VIRT_VEC3_YZW] = 0xe, + [REG_TYPE_VIRT_VEC3C_YZW] = 0xe, +}; + +static inline int reg_get_type(int virt_reg) +{ + return virt_reg % NUM_REG_TYPES; +} + +static inline int reg_get_base(struct etna_compile *c, int virt_reg) +{ + /* offset by 1 to avoid reserved position register */ + if (c->nir->info.stage == MESA_SHADER_FRAGMENT) + return (virt_reg / NUM_REG_TYPES + 1) % ETNA_MAX_TEMPS; + return virt_reg / NUM_REG_TYPES; +} + +struct ra_regs * +etna_ra_setup(void *mem_ctx); + +void +etna_ra_assign(struct etna_compile *c, nir_shader *shader); + +unsigned +etna_ra_finish(struct etna_compile *c); + +static inline void +emit_inst(struct etna_compile *c, struct etna_inst *inst) +{ + c->code[c->inst_ptr++] = *inst; +} + +void +etna_emit_alu(struct etna_compile *c, nir_op op, struct etna_inst_dst dst, + struct etna_inst_src src[3], bool saturate); + +void +etna_emit_tex(struct etna_compile *c, nir_texop op, unsigned texid, unsigned dst_swiz, + struct etna_inst_dst dst, struct etna_inst_src coord, + struct etna_inst_src lod_bias, struct etna_inst_src compare); + +void +etna_emit_jump(struct etna_compile *c, unsigned block, struct etna_inst_src condition); + +void +etna_emit_discard(struct etna_compile *c, struct etna_inst_src condition); + +#endif diff --git a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.c b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.c new file mode 100644 index 000000000..dbbc9d5a0 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.c @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2019 Zodiac Inflight Innovations + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jonathan Marek <jonathan@marek.ca> + */ + +#include "etnaviv_compiler_nir.h" +#include "util/compiler.h" + +/* to map nir srcs should to etna_inst srcs */ +enum { + SRC_0_1_2 = (0 << 0) | (1 << 2) | (2 << 4), + SRC_0_1_X = (0 << 0) | (1 << 2) | (3 << 4), + SRC_0_X_X = (0 << 0) | (3 << 2) | (3 << 4), + SRC_0_X_1 = (0 << 0) | (3 << 2) | (1 << 4), + SRC_0_1_0 = (0 << 0) | (1 << 2) | (0 << 4), + SRC_X_X_0 = (3 << 0) | (3 << 2) | (0 << 4), + SRC_0_X_0 = (0 << 0) | (3 << 2) | (0 << 4), +}; + +/* info to translate a nir op to etna_inst */ +struct etna_op_info { + uint8_t opcode; /* INST_OPCODE_ */ + uint8_t src; /* SRC_ enum */ + uint8_t cond; /* INST_CONDITION_ */ + uint8_t type; /* INST_TYPE_ */ +}; + +static const struct etna_op_info etna_ops[] = { + [0 ... nir_num_opcodes - 1] = {0xff}, +#undef TRUE +#undef FALSE +#define OPCT(nir, op, src, cond, type) [nir_op_##nir] = { \ + INST_OPCODE_##op, \ + SRC_##src, \ + INST_CONDITION_##cond, \ + INST_TYPE_##type \ +} +#define OPC(nir, op, src, cond) OPCT(nir, op, src, cond, F32) +#define IOPC(nir, op, src, cond) OPCT(nir, op, src, cond, S32) +#define UOPC(nir, op, src, cond) OPCT(nir, op, src, cond, U32) +#define OP(nir, op, src) OPC(nir, op, src, TRUE) +#define IOP(nir, op, src) IOPC(nir, op, src, TRUE) +#define UOP(nir, op, src) UOPC(nir, op, src, TRUE) + OP(mov, MOV, X_X_0), OP(fneg, MOV, X_X_0), OP(fabs, MOV, X_X_0), OP(fsat, MOV, X_X_0), + OP(fmul, MUL, 0_1_X), OP(fadd, ADD, 0_X_1), OP(ffma, MAD, 0_1_2), + OP(fdot2, DP2, 0_1_X), OP(fdot3, DP3, 0_1_X), OP(fdot4, DP4, 0_1_X), + OPC(fmin, SELECT, 0_1_0, GT), OPC(fmax, SELECT, 0_1_0, LT), + OP(ffract, FRC, X_X_0), OP(frcp, RCP, X_X_0), OP(frsq, RSQ, X_X_0), + OP(fsqrt, SQRT, X_X_0), OP(fsin, SIN, X_X_0), OP(fcos, COS, X_X_0), + OP(fsign, SIGN, X_X_0), OP(ffloor, FLOOR, X_X_0), OP(fceil, CEIL, X_X_0), + OP(flog2, LOG, X_X_0), OP(fexp2, EXP, X_X_0), + OPC(seq, SET, 0_1_X, EQ), OPC(sne, SET, 0_1_X, NE), OPC(sge, SET, 0_1_X, GE), OPC(slt, SET, 0_1_X, LT), + OPC(fcsel, SELECT, 0_1_2, NZ), + OP(fdiv, DIV, 0_1_X), + OP(fddx, DSX, 0_X_0), OP(fddy, DSY, 0_X_0), + + /* type convert */ + IOP(i2f32, I2F, 0_X_X), + UOP(u2f32, I2F, 0_X_X), + IOP(f2i32, F2I, 0_X_X), + UOP(f2u32, F2I, 0_X_X), + UOP(b2f32, AND, 0_X_X), /* AND with fui(1.0f) */ + UOP(b2i32, AND, 0_X_X), /* AND with 1 */ + OPC(f2b32, CMP, 0_X_X, NE), /* != 0.0 */ + UOPC(i2b32, CMP, 0_X_X, NE), /* != 0 */ + + /* arithmetic */ + IOP(iadd, ADD, 0_X_1), + IOP(imul, IMULLO0, 0_1_X), + /* IOP(imad, IMADLO0, 0_1_2), */ + IOP(ineg, ADD, X_X_0), /* ADD 0, -x */ + IOP(iabs, IABS, X_X_0), + IOP(isign, SIGN, X_X_0), + IOPC(imin, SELECT, 0_1_0, GT), + IOPC(imax, SELECT, 0_1_0, LT), + UOPC(umin, SELECT, 0_1_0, GT), + UOPC(umax, SELECT, 0_1_0, LT), + + /* select */ + UOPC(b32csel, SELECT, 0_1_2, NZ), + + /* compare with int result */ + OPC(feq32, CMP, 0_1_X, EQ), + OPC(fneu32, CMP, 0_1_X, NE), + OPC(fge32, CMP, 0_1_X, GE), + OPC(flt32, CMP, 0_1_X, LT), + IOPC(ieq32, CMP, 0_1_X, EQ), + IOPC(ine32, CMP, 0_1_X, NE), + IOPC(ige32, CMP, 0_1_X, GE), + IOPC(ilt32, CMP, 0_1_X, LT), + UOPC(uge32, CMP, 0_1_X, GE), + UOPC(ult32, CMP, 0_1_X, LT), + + /* bit ops */ + IOP(ior, OR, 0_X_1), + IOP(iand, AND, 0_X_1), + IOP(ixor, XOR, 0_X_1), + IOP(inot, NOT, X_X_0), + IOP(ishl, LSHIFT, 0_X_1), + IOP(ishr, RSHIFT, 0_X_1), + UOP(ushr, RSHIFT, 0_X_1), +}; + +void +etna_emit_alu(struct etna_compile *c, nir_op op, struct etna_inst_dst dst, + struct etna_inst_src src[3], bool saturate) +{ + struct etna_op_info ei = etna_ops[op]; + unsigned swiz_scalar = INST_SWIZ_BROADCAST(ffs(dst.write_mask) - 1); + + if (ei.opcode == 0xff) + compile_error(c, "Unhandled ALU op: %s\n", nir_op_infos[op].name); + + struct etna_inst inst = { + .opcode = ei.opcode, + .type = ei.type, + .cond = ei.cond, + .dst = dst, + .sat = saturate, + }; + + switch (op) { + case nir_op_fdiv: + case nir_op_flog2: + case nir_op_fsin: + case nir_op_fcos: + if (c->specs->has_new_transcendentals) + inst.tex.amode = 1; + FALLTHROUGH; + case nir_op_frsq: + case nir_op_frcp: + case nir_op_fexp2: + case nir_op_fsqrt: + case nir_op_imul: + /* scalar instructions we want src to be in x component */ + src[0].swiz = inst_swiz_compose(src[0].swiz, swiz_scalar); + src[1].swiz = inst_swiz_compose(src[1].swiz, swiz_scalar); + break; + /* deal with instructions which don't have 1:1 mapping */ + case nir_op_b2f32: + inst.src[2] = etna_immediate_float(1.0f); + break; + case nir_op_b2i32: + inst.src[2] = etna_immediate_int(1); + break; + case nir_op_f2b32: + inst.src[1] = etna_immediate_float(0.0f); + break; + case nir_op_i2b32: + inst.src[1] = etna_immediate_int(0); + break; + case nir_op_ineg: + inst.src[0] = etna_immediate_int(0); + src[0].neg = 1; + break; + default: + break; + } + + /* set the "true" value for CMP instructions */ + if (inst.opcode == INST_OPCODE_CMP) + inst.src[2] = etna_immediate_int(-1); + + for (unsigned j = 0; j < 3; j++) { + unsigned i = ((ei.src >> j*2) & 3); + if (i < 3) + inst.src[j] = src[i]; + } + + emit_inst(c, &inst); +} + +void +etna_emit_tex(struct etna_compile *c, nir_texop op, unsigned texid, unsigned dst_swiz, + struct etna_inst_dst dst, struct etna_inst_src coord, + struct etna_inst_src lod_bias, struct etna_inst_src compare) +{ + struct etna_inst inst = { + .dst = dst, + .tex.id = texid + (is_fs(c) ? 0 : c->specs->vertex_sampler_offset), + .tex.swiz = dst_swiz, + .src[0] = coord, + }; + + if (lod_bias.use) + inst.src[1] = lod_bias; + + if (compare.use) + inst.src[2] = compare; + + switch (op) { + case nir_texop_tex: inst.opcode = INST_OPCODE_TEXLD; break; + case nir_texop_txb: inst.opcode = INST_OPCODE_TEXLDB; break; + case nir_texop_txl: inst.opcode = INST_OPCODE_TEXLDL; break; + default: + compile_error(c, "Unhandled NIR tex type: %d\n", op); + } + + emit_inst(c, &inst); +} + +void +etna_emit_jump(struct etna_compile *c, unsigned block, struct etna_inst_src condition) +{ + if (!condition.use) { + emit_inst(c, &(struct etna_inst) {.opcode = INST_OPCODE_BRANCH, .imm = block }); + return; + } + + struct etna_inst inst = { + .opcode = INST_OPCODE_BRANCH, + .cond = INST_CONDITION_NOT, + .type = INST_TYPE_U32, + .src[0] = condition, + .imm = block, + }; + inst.src[0].swiz = INST_SWIZ_BROADCAST(inst.src[0].swiz & 3); + emit_inst(c, &inst); +} + +void +etna_emit_discard(struct etna_compile *c, struct etna_inst_src condition) +{ + if (!condition.use) { + emit_inst(c, &(struct etna_inst) { .opcode = INST_OPCODE_TEXKILL }); + return; + } + + struct etna_inst inst = { + .opcode = INST_OPCODE_TEXKILL, + .cond = INST_CONDITION_NZ, + .type = (c->specs->halti < 2) ? INST_TYPE_F32 : INST_TYPE_U32, + .src[0] = condition, + }; + inst.src[0].swiz = INST_SWIZ_BROADCAST(inst.src[0].swiz & 3); + emit_inst(c, &inst); +} diff --git a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_liveness.c b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_liveness.c new file mode 100644 index 000000000..5ce12dce6 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_liveness.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2019 Zodiac Inflight Innovations + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jonathan Marek <jonathan@marek.ca> + */ + +#include "etnaviv_compiler_nir.h" +#include "compiler/nir/nir_worklist.h" + +static void +range_include(struct live_def *def, unsigned index) +{ + if (def->live_start > index) + def->live_start = index; + if (def->live_end < index) + def->live_end = index; +} + +struct live_defs_state { + unsigned num_defs; + unsigned bitset_words; + + nir_function_impl *impl; + nir_block *block; /* current block pointer */ + unsigned index; /* current live index */ + + struct live_def *defs; + unsigned *live_map; /* to map ssa/reg index into defs array */ + + nir_block_worklist worklist; +}; + +static bool +init_liveness_block(nir_block *block, + struct live_defs_state *state) +{ + block->live_in = reralloc(block, block->live_in, BITSET_WORD, + state->bitset_words); + memset(block->live_in, 0, state->bitset_words * sizeof(BITSET_WORD)); + + block->live_out = reralloc(block, block->live_out, BITSET_WORD, + state->bitset_words); + memset(block->live_out, 0, state->bitset_words * sizeof(BITSET_WORD)); + + nir_block_worklist_push_head(&state->worklist, block); + + return true; +} + +static bool +set_src_live(nir_src *src, void *void_state) +{ + struct live_defs_state *state = void_state; + + if (src->is_ssa) { + nir_instr *instr = src->ssa->parent_instr; + + if (is_sysval(instr) || instr->type == nir_instr_type_deref) + return true; + + switch (instr->type) { + case nir_instr_type_load_const: + case nir_instr_type_ssa_undef: + return true; + case nir_instr_type_alu: { + /* alu op bypass */ + nir_alu_instr *alu = nir_instr_as_alu(instr); + if (instr->pass_flags & BYPASS_SRC) { + for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) + set_src_live(&alu->src[i].src, state); + return true; + } + } break; + default: + break; + } + } + + unsigned i = state->live_map[src_index(state->impl, src)]; + assert(i != ~0u); + + BITSET_SET(state->block->live_in, i); + range_include(&state->defs[i], state->index); + + return true; +} + +static bool +propagate_across_edge(nir_block *pred, nir_block *succ, + struct live_defs_state *state) +{ + BITSET_WORD progress = 0; + for (unsigned i = 0; i < state->bitset_words; ++i) { + progress |= succ->live_in[i] & ~pred->live_out[i]; + pred->live_out[i] |= succ->live_in[i]; + } + return progress != 0; +} + +unsigned +etna_live_defs(nir_function_impl *impl, struct live_def *defs, unsigned *live_map) +{ + struct live_defs_state state; + unsigned block_live_index[impl->num_blocks + 1]; + + state.impl = impl; + state.defs = defs; + state.live_map = live_map; + + state.num_defs = 0; + nir_foreach_block(block, impl) { + block_live_index[block->index] = state.num_defs; + nir_foreach_instr(instr, block) { + nir_dest *dest = dest_for_instr(instr); + if (!dest) + continue; + + unsigned idx = dest_index(impl, dest); + /* register is already in defs */ + if (live_map[idx] != ~0u) + continue; + + defs[state.num_defs] = (struct live_def) {instr, dest, state.num_defs, 0}; + + /* input live from the start */ + if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic == nir_intrinsic_load_input || + intr->intrinsic == nir_intrinsic_load_instance_id) + defs[state.num_defs].live_start = 0; + } + + live_map[idx] = state.num_defs; + state.num_defs++; + } + } + block_live_index[impl->num_blocks] = state.num_defs; + + nir_block_worklist_init(&state.worklist, impl->num_blocks, NULL); + + /* We now know how many unique ssa definitions we have and we can go + * ahead and allocate live_in and live_out sets and add all of the + * blocks to the worklist. + */ + state.bitset_words = BITSET_WORDS(state.num_defs); + nir_foreach_block(block, impl) { + init_liveness_block(block, &state); + } + + /* We're now ready to work through the worklist and update the liveness + * sets of each of the blocks. By the time we get to this point, every + * block in the function implementation has been pushed onto the + * worklist in reverse order. As long as we keep the worklist + * up-to-date as we go, everything will get covered. + */ + while (!nir_block_worklist_is_empty(&state.worklist)) { + /* We pop them off in the reverse order we pushed them on. This way + * the first walk of the instructions is backwards so we only walk + * once in the case of no control flow. + */ + nir_block *block = nir_block_worklist_pop_head(&state.worklist); + state.block = block; + + memcpy(block->live_in, block->live_out, + state.bitset_words * sizeof(BITSET_WORD)); + + state.index = block_live_index[block->index + 1]; + + nir_if *following_if = nir_block_get_following_if(block); + if (following_if) + set_src_live(&following_if->condition, &state); + + nir_foreach_instr_reverse(instr, block) { + /* when we come across the next "live" instruction, decrement index */ + if (state.index && instr == defs[state.index - 1].instr) { + state.index--; + /* the only source of writes to registers is phis: + * we don't expect any partial write_mask alus + * so clearing live_in here is OK + */ + BITSET_CLEAR(block->live_in, state.index); + } + + /* don't set_src_live for not-emitted instructions */ + if (instr->pass_flags) + continue; + + unsigned index = state.index; + + /* output live till the end */ + if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic == nir_intrinsic_store_deref) + state.index = ~0u; + } + + nir_foreach_src(instr, set_src_live, &state); + + state.index = index; + } + assert(state.index == block_live_index[block->index]); + + /* Walk over all of the predecessors of the current block updating + * their live in with the live out of this one. If anything has + * changed, add the predecessor to the work list so that we ensure + * that the new information is used. + */ + set_foreach(block->predecessors, entry) { + nir_block *pred = (nir_block *)entry->key; + if (propagate_across_edge(pred, block, &state)) + nir_block_worklist_push_tail(&state.worklist, pred); + } + } + + nir_block_worklist_fini(&state.worklist); + + /* apply live_in/live_out to ranges */ + + nir_foreach_block(block, impl) { + int i; + + BITSET_FOREACH_SET(i, block->live_in, state.num_defs) + range_include(&state.defs[i], block_live_index[block->index]); + + BITSET_FOREACH_SET(i, block->live_out, state.num_defs) + range_include(&state.defs[i], block_live_index[block->index + 1]); + } + + return state.num_defs; +} diff --git a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_ra.c b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_ra.c new file mode 100644 index 000000000..fef982c9b --- /dev/null +++ b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_ra.c @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2019 Zodiac Inflight Innovations + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jonathan Marek <jonathan@marek.ca> + */ + +#include "etnaviv_compiler_nir.h" +#include "util/register_allocate.h" + +/* use "r63.z" for depth reg, it will wrap around to r0.z by reg_get_base + * (fs registers are offset by 1 to avoid reserving r0) + */ +#define REG_FRAG_DEPTH ((ETNA_MAX_TEMPS - 1) * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Z) + +/* precomputed by register_allocate */ +static unsigned int *q_values[] = { + (unsigned int[]) {1, 2, 3, 4, 2, 2, 3, }, + (unsigned int[]) {3, 5, 6, 6, 5, 5, 6, }, + (unsigned int[]) {3, 4, 4, 4, 4, 4, 4, }, + (unsigned int[]) {1, 1, 1, 1, 1, 1, 1, }, + (unsigned int[]) {1, 2, 2, 2, 1, 2, 2, }, + (unsigned int[]) {2, 3, 3, 3, 2, 3, 3, }, + (unsigned int[]) {2, 2, 2, 2, 2, 2, 2, }, +}; + +static inline int reg_get_class(int virt_reg) +{ + switch (reg_get_type(virt_reg)) { + case REG_TYPE_VEC4: + return REG_CLASS_VEC4; + case REG_TYPE_VIRT_VEC3_XYZ: + case REG_TYPE_VIRT_VEC3_XYW: + case REG_TYPE_VIRT_VEC3_XZW: + case REG_TYPE_VIRT_VEC3_YZW: + return REG_CLASS_VIRT_VEC3; + case REG_TYPE_VIRT_VEC2_XY: + case REG_TYPE_VIRT_VEC2_XZ: + case REG_TYPE_VIRT_VEC2_XW: + case REG_TYPE_VIRT_VEC2_YZ: + case REG_TYPE_VIRT_VEC2_YW: + case REG_TYPE_VIRT_VEC2_ZW: + return REG_CLASS_VIRT_VEC2; + case REG_TYPE_VIRT_SCALAR_X: + case REG_TYPE_VIRT_SCALAR_Y: + case REG_TYPE_VIRT_SCALAR_Z: + case REG_TYPE_VIRT_SCALAR_W: + return REG_CLASS_VIRT_SCALAR; + case REG_TYPE_VIRT_VEC2T_XY: + case REG_TYPE_VIRT_VEC2T_ZW: + return REG_CLASS_VIRT_VEC2T; + case REG_TYPE_VIRT_VEC2C_XY: + case REG_TYPE_VIRT_VEC2C_YZ: + case REG_TYPE_VIRT_VEC2C_ZW: + return REG_CLASS_VIRT_VEC2C; + case REG_TYPE_VIRT_VEC3C_XYZ: + case REG_TYPE_VIRT_VEC3C_YZW: + return REG_CLASS_VIRT_VEC3C; + } + + assert(false); + return 0; +} + +struct ra_regs * +etna_ra_setup(void *mem_ctx) +{ + struct ra_regs *regs = ra_alloc_reg_set(mem_ctx, ETNA_MAX_TEMPS * + NUM_REG_TYPES, false); + + /* classes always be created from index 0, so equal to the class enum + * which represents a register with (c+1) components + */ + for (int c = 0; c < NUM_REG_CLASSES; c++) + ra_alloc_reg_class(regs); + /* add each register of each class */ + for (int r = 0; r < NUM_REG_TYPES * ETNA_MAX_TEMPS; r++) + ra_class_add_reg(regs, reg_get_class(r), r); + /* set conflicts */ + for (int r = 0; r < ETNA_MAX_TEMPS; r++) { + for (int i = 0; i < NUM_REG_TYPES; i++) { + for (int j = 0; j < i; j++) { + if (reg_writemask[i] & reg_writemask[j]) { + ra_add_reg_conflict(regs, NUM_REG_TYPES * r + i, + NUM_REG_TYPES * r + j); + } + } + } + } + ra_set_finalize(regs, q_values); + + return regs; +} + +void +etna_ra_assign(struct etna_compile *c, nir_shader *shader) +{ + struct etna_compiler *compiler = c->variant->shader->compiler; + struct ra_regs *regs = compiler->regs; + + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + + /* liveness and interference */ + + nir_index_blocks(impl); + nir_index_ssa_defs(impl); + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) + instr->pass_flags = 0; + } + + /* this gives an approximation/upper limit on how many nodes are needed + * (some ssa values do not represent an allocated register) + */ + unsigned max_nodes = impl->ssa_alloc + impl->reg_alloc; + unsigned *live_map = ralloc_array(NULL, unsigned, max_nodes); + memset(live_map, 0xff, sizeof(unsigned) * max_nodes); + struct live_def *defs = rzalloc_array(NULL, struct live_def, max_nodes); + + unsigned num_nodes = etna_live_defs(impl, defs, live_map); + struct ra_graph *g = ra_alloc_interference_graph(regs, num_nodes); + + /* set classes from num_components */ + for (unsigned i = 0; i < num_nodes; i++) { + nir_instr *instr = defs[i].instr; + nir_dest *dest = defs[i].dest; + unsigned comp = nir_dest_num_components(*dest) - 1; + + if (instr->type == nir_instr_type_alu && + c->specs->has_new_transcendentals) { + switch (nir_instr_as_alu(instr)->op) { + case nir_op_fdiv: + case nir_op_flog2: + case nir_op_fsin: + case nir_op_fcos: + assert(dest->is_ssa); + comp = REG_CLASS_VIRT_VEC2T; + default: + break; + } + } + + if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + /* can't have dst swizzle or sparse writemask on UBO loads */ + if (intr->intrinsic == nir_intrinsic_load_ubo) { + assert(dest == &intr->dest); + if (dest->ssa.num_components == 2) + comp = REG_CLASS_VIRT_VEC2C; + if (dest->ssa.num_components == 3) + comp = REG_CLASS_VIRT_VEC3C; + } + } + + ra_set_node_class(g, i, comp); + } + + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_dest *dest = dest_for_instr(instr); + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + unsigned reg; + + switch (intr->intrinsic) { + case nir_intrinsic_store_deref: { + /* don't want outputs to be swizzled + * TODO: better would be to set the type to X/XY/XYZ/XYZW + * TODO: what if fragcoord.z is read after writing fragdepth? + */ + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + unsigned index = live_map[src_index(impl, &intr->src[1])]; + + if (shader->info.stage == MESA_SHADER_FRAGMENT && + deref->var->data.location == FRAG_RESULT_DEPTH) { + ra_set_node_reg(g, index, REG_FRAG_DEPTH); + } else { + ra_set_node_class(g, index, REG_CLASS_VEC4); + } + } continue; + case nir_intrinsic_load_input: + reg = nir_intrinsic_base(intr) * NUM_REG_TYPES + (unsigned[]) { + REG_TYPE_VIRT_SCALAR_X, + REG_TYPE_VIRT_VEC2_XY, + REG_TYPE_VIRT_VEC3_XYZ, + REG_TYPE_VEC4, + }[nir_dest_num_components(*dest) - 1]; + break; + case nir_intrinsic_load_instance_id: + reg = c->variant->infile.num_reg * NUM_REG_TYPES + REG_TYPE_VIRT_SCALAR_Y; + break; + default: + continue; + } + + ra_set_node_reg(g, live_map[dest_index(impl, dest)], reg); + } + } + + /* add interference for intersecting live ranges */ + for (unsigned i = 0; i < num_nodes; i++) { + assert(defs[i].live_start < defs[i].live_end); + for (unsigned j = 0; j < i; j++) { + if (defs[i].live_start >= defs[j].live_end || defs[j].live_start >= defs[i].live_end) + continue; + ra_add_node_interference(g, i, j); + } + } + + ralloc_free(defs); + + /* Allocate registers */ + ASSERTED bool ok = ra_allocate(g); + assert(ok); + + c->g = g; + c->live_map = live_map; + c->num_nodes = num_nodes; +} + +unsigned +etna_ra_finish(struct etna_compile *c) +{ + /* TODO: better way to get number of registers used? */ + unsigned j = 0; + for (unsigned i = 0; i < c->num_nodes; i++) { + j = MAX2(j, reg_get_base(c, ra_get_node_reg(c->g, i)) + 1); + } + + ralloc_free(c->g); + ralloc_free(c->live_map); + + return j; +} diff --git a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_tgsi.c b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_tgsi.c index f153f8049..7f7e0b670 100644 --- a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_tgsi.c +++ b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_compiler_tgsi.c @@ -53,10 +53,10 @@ #include "etnaviv_asm.h" #include "etnaviv_context.h" #include "etnaviv_debug.h" -#include "etnaviv_disasm.h" #include "etnaviv_uniforms.h" #include "etnaviv_util.h" +#include "nir/tgsi_to_nir.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_iterate.h" @@ -160,7 +160,7 @@ struct etna_compile { bool dead_inst[ETNA_MAX_TOKENS]; /* Immediate data */ - enum etna_immediate_contents imm_contents[ETNA_MAX_IMM]; + enum etna_uniform_contents imm_contents[ETNA_MAX_IMM]; uint32_t imm_data[ETNA_MAX_IMM]; uint32_t imm_base; /* base of immediates (in 32 bit units) */ uint32_t imm_size; /* size of immediates (in 32 bit units) */ @@ -369,7 +369,7 @@ assign_inouts_to_temporaries(struct etna_compile *c, uint file) * there is already an immediate with that value, return that. */ static struct etna_inst_src -alloc_imm(struct etna_compile *c, enum etna_immediate_contents contents, +alloc_imm(struct etna_compile *c, enum etna_uniform_contents contents, uint32_t value) { int idx; @@ -383,7 +383,7 @@ alloc_imm(struct etna_compile *c, enum etna_immediate_contents contents, /* look if there is an unused slot */ if (idx == c->imm_size) { for (idx = 0; idx < c->imm_size; ++idx) { - if (c->imm_contents[idx] == ETNA_IMMEDIATE_UNUSED) + if (c->imm_contents[idx] == ETNA_UNIFORM_UNUSED) break; } } @@ -411,11 +411,11 @@ alloc_imm(struct etna_compile *c, enum etna_immediate_contents contents, static struct etna_inst_src alloc_imm_u32(struct etna_compile *c, uint32_t value) { - return alloc_imm(c, ETNA_IMMEDIATE_CONSTANT, value); + return alloc_imm(c, ETNA_UNIFORM_CONSTANT, value); } static struct etna_inst_src -alloc_imm_vec4u(struct etna_compile *c, enum etna_immediate_contents contents, +alloc_imm_vec4u(struct etna_compile *c, enum etna_uniform_contents contents, const uint32_t *values) { struct etna_inst_src imm_src = { }; @@ -479,7 +479,7 @@ etna_imm_vec4f(struct etna_compile *c, const float *vec4) for (int i = 0; i < 4; i++) val[i] = fui(vec4[i]); - return alloc_imm_vec4u(c, ETNA_IMMEDIATE_CONSTANT, val); + return alloc_imm_vec4u(c, ETNA_UNIFORM_CONSTANT, val); } /* Pass -- check register file declarations and immediates */ @@ -504,7 +504,7 @@ etna_compile_parse_declarations(struct etna_compile *c) unsigned idx = c->imm_size++; c->imm_data[idx] = imm->u[i].Uint; - c->imm_contents[idx] = ETNA_IMMEDIATE_CONSTANT; + c->imm_contents[idx] = ETNA_UNIFORM_CONSTANT; } } break; @@ -1699,12 +1699,12 @@ trans_sampler(const struct instr_translater *t, struct etna_compile *c, ins[0].opcode = INST_OPCODE_MUL; ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X); ins[0].src[0] = src[0]; - ins[0].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_X, unit); + ins[0].src[1] = alloc_imm(c, ETNA_UNIFORM_TEXRECT_SCALE_X, unit); ins[1].opcode = INST_OPCODE_MUL; ins[1].dst = etna_native_to_dst(temp, INST_COMPS_Y); ins[1].src[0] = src[0]; - ins[1].src[1] = alloc_imm(c, ETNA_IMMEDIATE_TEXRECT_SCALE_Y, unit); + ins[1].src[1] = alloc_imm(c, ETNA_UNIFORM_TEXRECT_SCALE_Y, unit); emit_inst(c, &ins[0]); emit_inst(c, &ins[1]); @@ -1882,12 +1882,27 @@ etna_compile_pass_generate_code(struct etna_compile *c) for (int i = 0; i < tgsi->num_src && i < ETNA_NUM_SRC; i++) { const struct tgsi_full_src_register *reg = &inst->Src[i]; - const struct etna_native_reg *n = &etna_get_src_reg(c, reg->Register)->native; + const struct etna_reg_desc *srcreg = etna_get_src_reg(c, reg->Register); + const struct etna_native_reg *n = &srcreg->native; if (!n->valid || n->is_tex) continue; src[i] = etna_create_src(reg, n); + + /* + * Replace W=1.0 for point sprite coordinates, since hardware + * can only replace X,Y and leaves Z,W=0,0 instead of Z,W=0,1 + */ + if (srcreg && srcreg->has_semantic && + srcreg->semantic.Name == TGSI_SEMANTIC_TEXCOORD && + (c->key->sprite_coord_enable & BITFIELD_BIT(srcreg->semantic.Index))) { + emit_inst(c, &(struct etna_inst) { + .opcode = INST_OPCODE_SET, + .cond = INST_CONDITION_TRUE, + .dst = etna_native_to_dst(srcreg->native, INST_COMPS_W), + }); + } } const unsigned opc = inst->Instruction.Opcode; @@ -2084,6 +2099,7 @@ permute_ps_inputs(struct etna_compile *c) * gl_FragCoord VARYING_SLOT_POS TGSI_SEMANTIC_POSITION * gl_FrontFacing VARYING_SLOT_FACE TGSI_SEMANTIC_FACE * gl_PointCoord VARYING_SLOT_PNTC TGSI_SEMANTIC_PCOORD + * gl_TexCoord VARYING_SLOT_TEX TGSI_SEMANTIC_TEXCOORD */ uint native_idx = 1; @@ -2108,6 +2124,11 @@ permute_ps_inputs(struct etna_compile *c) c->next_free_native = native_idx; } +static inline int sem2slot(const struct tgsi_declaration_semantic *semantic) +{ + return tgsi_varying_semantic_to_slot(semantic->Name, semantic->Index); +} + /* fill in ps inputs into shader object */ static void fill_in_ps_inputs(struct etna_shader_variant *sobj, struct etna_compile *c) @@ -2122,7 +2143,7 @@ fill_in_ps_inputs(struct etna_shader_variant *sobj, struct etna_compile *c) if (reg->native.id > 0) { assert(sf->num_reg < ETNA_NUM_INPUTS); sf->reg[sf->num_reg].reg = reg->native.id; - sf->reg[sf->num_reg].semantic = reg->semantic; + sf->reg[sf->num_reg].slot = sem2slot(®->semantic); /* convert usage mask to number of components (*=wildcard) * .r (0..1) -> 1 component * .*g (2..3) -> 2 component @@ -2176,7 +2197,7 @@ fill_in_vs_inputs(struct etna_shader_variant *sobj, struct etna_compile *c) /* XXX exclude inputs with special semantics such as gl_frontFacing */ sf->reg[sf->num_reg].reg = reg->native.id; - sf->reg[sf->num_reg].semantic = reg->semantic; + sf->reg[sf->num_reg].slot = sem2slot(®->semantic); sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask); sf->num_reg++; } @@ -2184,30 +2205,6 @@ fill_in_vs_inputs(struct etna_shader_variant *sobj, struct etna_compile *c) sobj->input_count_unk8 = (sf->num_reg + 19) / 16; /* XXX what is this */ } -/* build two-level output index [Semantic][Index] for fast linking */ -static void -build_output_index(struct etna_shader_variant *sobj) -{ - int total = 0; - int offset = 0; - - for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name) - total += sobj->output_count_per_semantic[name]; - - sobj->output_per_semantic_list = CALLOC(total, sizeof(struct etna_shader_inout *)); - - for (int name = 0; name < TGSI_SEMANTIC_COUNT; ++name) { - sobj->output_per_semantic[name] = &sobj->output_per_semantic_list[offset]; - offset += sobj->output_count_per_semantic[name]; - } - - for (int idx = 0; idx < sobj->outfile.num_reg; ++idx) { - sobj->output_per_semantic[sobj->outfile.reg[idx].semantic.Name] - [sobj->outfile.reg[idx].semantic.Index] = - &sobj->outfile.reg[idx]; - } -} - /* fill in outputs for vs into shader object */ static void fill_in_vs_outputs(struct etna_shader_variant *sobj, struct etna_compile *c) @@ -2228,18 +2225,12 @@ fill_in_vs_outputs(struct etna_shader_variant *sobj, struct etna_compile *c) break; default: sf->reg[sf->num_reg].reg = reg->native.id; - sf->reg[sf->num_reg].semantic = reg->semantic; + sf->reg[sf->num_reg].slot = sem2slot(®->semantic); sf->reg[sf->num_reg].num_components = 4; // XXX reg->num_components; sf->num_reg++; - sobj->output_count_per_semantic[reg->semantic.Name] = - MAX2(reg->semantic.Index + 1, - sobj->output_count_per_semantic[reg->semantic.Name]); } } - /* build two-level index for linking */ - build_output_index(sobj); - /* fill in "mystery meat" load balancing value. This value determines how * work is scheduled between VS and PS * in the unified shader architecture. More precisely, it is determined from @@ -2315,17 +2306,17 @@ copy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader_variant uint32_t count = c->imm_base + c->imm_size; struct etna_shader_uniform_info *uinfo = &sobj->uniforms; - uinfo->imm_count = count; + uinfo->count = count; - uinfo->imm_data = malloc(count * sizeof(*c->imm_data)); + uinfo->data = malloc(count * sizeof(*c->imm_data)); for (unsigned i = 0; i < c->imm_base; i++) - uinfo->imm_data[i] = i; - memcpy(&uinfo->imm_data[c->imm_base], c->imm_data, c->imm_size * sizeof(*c->imm_data)); + uinfo->data[i] = i; + memcpy(&uinfo->data[c->imm_base], c->imm_data, c->imm_size * sizeof(*c->imm_data)); - uinfo->imm_contents = malloc(count * sizeof(*c->imm_contents)); + uinfo->contents = malloc(count * sizeof(*c->imm_contents)); for (unsigned i = 0; i < c->imm_base; i++) - uinfo->imm_contents[i] = ETNA_IMMEDIATE_UNIFORM; - memcpy(&uinfo->imm_contents[c->imm_base], c->imm_contents, c->imm_size * sizeof(*c->imm_contents)); + uinfo->contents[i] = ETNA_UNIFORM_UNIFORM; + memcpy(&uinfo->contents[c->imm_base], c->imm_contents, c->imm_size * sizeof(*c->imm_contents)); etna_set_shader_uniforms_dirty_flags(sobj); } @@ -2496,6 +2487,7 @@ etna_compile_shader(struct etna_shader_variant *v) /* fill in output structure */ v->stage = c->info.processor == PIPE_SHADER_FRAGMENT ? MESA_SHADER_FRAGMENT : MESA_SHADER_VERTEX; + v->uses_discard = c->info.uses_kill; v->code_size = c->inst_ptr * 4; v->code = mem_dup(c->code, c->inst_ptr * 16); v->num_loops = c->num_loops; @@ -2526,73 +2518,13 @@ out: return ret; } -extern const char *tgsi_swizzle_names[]; -void -etna_dump_shader(const struct etna_shader_variant *shader) -{ - if (shader->stage == MESA_SHADER_VERTEX) - printf("VERT\n"); - else - printf("FRAG\n"); - - - etna_disasm(shader->code, shader->code_size, PRINT_RAW); - - printf("num loops: %i\n", shader->num_loops); - printf("num temps: %i\n", shader->num_temps); - printf("immediates:\n"); - for (int idx = 0; idx < shader->uniforms.imm_count; ++idx) { - printf(" [%i].%s = %f (0x%08x) (%d)\n", - idx / 4, - tgsi_swizzle_names[idx % 4], - *((float *)&shader->uniforms.imm_data[idx]), - shader->uniforms.imm_data[idx], - shader->uniforms.imm_contents[idx]); - } - printf("inputs:\n"); - for (int idx = 0; idx < shader->infile.num_reg; ++idx) { - printf(" [%i] name=%s index=%i comps=%i\n", shader->infile.reg[idx].reg, - tgsi_semantic_names[shader->infile.reg[idx].semantic.Name], - shader->infile.reg[idx].semantic.Index, - shader->infile.reg[idx].num_components); - } - printf("outputs:\n"); - for (int idx = 0; idx < shader->outfile.num_reg; ++idx) { - printf(" [%i] name=%s index=%i comps=%i\n", shader->outfile.reg[idx].reg, - tgsi_semantic_names[shader->outfile.reg[idx].semantic.Name], - shader->outfile.reg[idx].semantic.Index, - shader->outfile.reg[idx].num_components); - } - printf("special:\n"); - if (shader->stage == MESA_SHADER_VERTEX) { - printf(" vs_pos_out_reg=%i\n", shader->vs_pos_out_reg); - printf(" vs_pointsize_out_reg=%i\n", shader->vs_pointsize_out_reg); - printf(" vs_load_balancing=0x%08x\n", shader->vs_load_balancing); - } else { - printf(" ps_color_out_reg=%i\n", shader->ps_color_out_reg); - printf(" ps_depth_out_reg=%i\n", shader->ps_depth_out_reg); - } - printf(" input_count_unk8=0x%08x\n", shader->input_count_unk8); -} - -void -etna_destroy_shader(struct etna_shader_variant *shader) -{ - assert(shader); - - FREE(shader->code); - FREE(shader->uniforms.imm_data); - FREE(shader->uniforms.imm_contents); - FREE(shader->output_per_semantic_list); - FREE(shader); -} - static const struct etna_shader_inout * etna_shader_vs_lookup(const struct etna_shader_variant *sobj, const struct etna_shader_inout *in) { - if (in->semantic.Index < sobj->output_count_per_semantic[in->semantic.Name]) - return sobj->output_per_semantic[in->semantic.Name][in->semantic.Index]; + for (int i = 0; i < sobj->outfile.num_reg; i++) + if (sobj->outfile.reg[i].slot == in->slot) + return &sobj->outfile.reg[i]; return NULL; } @@ -2614,7 +2546,8 @@ etna_link_shader(struct etna_shader_link_info *info, const struct etna_shader_inout *fsio = &fs->infile.reg[idx]; const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio); struct etna_varying *varying; - bool interpolate_always = fsio->semantic.Name != TGSI_SEMANTIC_COLOR; + bool interpolate_always = ((fsio->slot != VARYING_SLOT_COL0) && + (fsio->slot != VARYING_SLOT_COL1)); assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings)); @@ -2634,17 +2567,17 @@ etna_link_shader(struct etna_shader_link_info *info, varying->use[2] = VARYING_COMPONENT_USE_UNUSED; varying->use[3] = VARYING_COMPONENT_USE_UNUSED; - /* point coord is an input to the PS without matching VS output, + /* point/tex coord is an input to the PS without matching VS output, * so it gets a varying slot without being assigned a VS register. */ - if (fsio->semantic.Name == TGSI_SEMANTIC_PCOORD) { + if (util_varying_is_point_coord(fsio->slot, fs->key.sprite_coord_enable)) { varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X; varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y; info->pcoord_varying_comp_ofs = comp_ofs; } else { if (vsio == NULL) { /* not found -- link error */ - BUG("Semantic %d value %d not found in vertex shader outputs\n", fsio->semantic.Name, fsio->semantic.Index); + BUG("Semantic value not found in vertex shader outputs\n"); return true; } diff --git a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_disk_cache.c b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_disk_cache.c new file mode 100644 index 000000000..321cbe9c4 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_disk_cache.c @@ -0,0 +1,186 @@ +/* + * Copyright © 2020 Google, Inc. + * Copyright (c) 2020 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Christian Gmeiner <christian.gmeiner@gmail.com> + */ + +#include "etnaviv_debug.h" +#include "etnaviv_disk_cache.h" +#include "nir_serialize.h" + +#define debug 0 + +void +etna_disk_cache_init(struct etna_compiler *compiler, const char *renderer) +{ + if (!(etna_mesa_debug & ETNA_DBG_NIR)) + return; + + if (etna_mesa_debug & ETNA_DBG_NOCACHE) + return; + + const struct build_id_note *note = + build_id_find_nhdr_for_addr(etna_disk_cache_init); + assert(note && build_id_length(note) == 20); /* sha1 */ + + const uint8_t *id_sha1 = build_id_data(note); + assert(id_sha1); + + char timestamp[41]; + _mesa_sha1_format(timestamp, id_sha1); + + compiler->disk_cache = disk_cache_create(renderer, timestamp, etna_mesa_debug); +} + +void +etna_disk_cache_init_shader_key(struct etna_compiler *compiler, struct etna_shader *shader) +{ + if (!compiler->disk_cache) + return; + + struct mesa_sha1 ctx; + + _mesa_sha1_init(&ctx); + + /* Serialize the NIR to a binary blob that we can hash for the disk + * cache. Drop unnecessary information (like variable names) + * so the serialized NIR is smaller, and also to let us detect more + * isomorphic shaders when hashing, increasing cache hits. + */ + struct blob blob; + + blob_init(&blob); + nir_serialize(&blob, shader->nir, true); + _mesa_sha1_update(&ctx, blob.data, blob.size); + blob_finish(&blob); + + _mesa_sha1_final(&ctx, shader->cache_key); +} + +static void +compute_variant_key(struct etna_compiler *compiler, struct etna_shader_variant *v, + cache_key cache_key) +{ + struct blob blob; + + blob_init(&blob); + + blob_write_bytes(&blob, &v->shader->cache_key, sizeof(v->shader->cache_key)); + blob_write_bytes(&blob, &v->key, sizeof(v->key)); + + disk_cache_compute_key(compiler->disk_cache, blob.data, blob.size, cache_key); + + blob_finish(&blob); +} + +static void +retrieve_variant(struct blob_reader *blob, struct etna_shader_variant *v) +{ + blob_copy_bytes(blob, VARIANT_CACHE_PTR(v), VARIANT_CACHE_SIZE); + + v->code = malloc(4 * v->code_size); + blob_copy_bytes(blob, v->code, 4 * v->code_size); + + blob_copy_bytes(blob, &v->uniforms.count, sizeof(v->uniforms.count)); + v->uniforms.contents = malloc(v->uniforms.count * sizeof(v->uniforms.contents)); + v->uniforms.data = malloc(v->uniforms.count * sizeof(v->uniforms.data)); + + blob_copy_bytes(blob, v->uniforms.contents, v->uniforms.count * sizeof(v->uniforms.contents)); + blob_copy_bytes(blob, v->uniforms.data, v->uniforms.count * sizeof(v->uniforms.data)); +} + +static void +store_variant(struct blob *blob, const struct etna_shader_variant *v) +{ + const uint32_t imm_count = v->uniforms.count; + + blob_write_bytes(blob, VARIANT_CACHE_PTR(v), VARIANT_CACHE_SIZE); + blob_write_bytes(blob, v->code, 4 * v->code_size); + + blob_write_bytes(blob, &v->uniforms.count, sizeof(v->uniforms.count)); + blob_write_bytes(blob, v->uniforms.contents, imm_count * sizeof(v->uniforms.contents)); + blob_write_bytes(blob, v->uniforms.data, imm_count * sizeof(v->uniforms.data)); +} + +bool +etna_disk_cache_retrieve(struct etna_compiler *compiler, struct etna_shader_variant *v) +{ + if (!compiler->disk_cache) + return false; + + cache_key cache_key; + + compute_variant_key(compiler, v, cache_key); + + if (debug) { + char sha1[41]; + + _mesa_sha1_format(sha1, cache_key); + fprintf(stderr, "[mesa disk cache] retrieving variant %s: ", sha1); + } + + size_t size; + void *buffer = disk_cache_get(compiler->disk_cache, cache_key, &size); + + if (debug) + fprintf(stderr, "%s\n", buffer ? "found" : "missing"); + + if (!buffer) + return false; + + struct blob_reader blob; + blob_reader_init(&blob, buffer, size); + + retrieve_variant(&blob, v); + + free(buffer); + + return true; +} + +void +etna_disk_cache_store(struct etna_compiler *compiler, struct etna_shader_variant *v) +{ + if (!compiler->disk_cache) + return; + + cache_key cache_key; + + compute_variant_key(compiler, v, cache_key); + + if (debug) { + char sha1[41]; + + _mesa_sha1_format(sha1, cache_key); + fprintf(stderr, "[mesa disk cache] storing variant %s\n", sha1); + } + + struct blob blob; + blob_init(&blob); + + store_variant(&blob, v); + + disk_cache_put(compiler->disk_cache, cache_key, blob.data, blob.size, NULL); + blob_finish(&blob); +} diff --git a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_disk_cache.h b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_disk_cache.h new file mode 100644 index 000000000..61fa302ff --- /dev/null +++ b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_disk_cache.h @@ -0,0 +1,45 @@ +/* + * Copyright © 2020 Google, Inc. + * Copyright (c) 2020 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Christian Gmeiner <christian.gmeiner@gmail.com> + */ + +#ifndef H_ETNAVIV_DISK_CACHE +#define H_ETNAVIV_DISK_CACHE + +#include "etnaviv_compiler.h" + +void +etna_disk_cache_init(struct etna_compiler *compiler, const char *renderer); + +void +etna_disk_cache_init_shader_key(struct etna_compiler *compiler, struct etna_shader *shader); + +bool +etna_disk_cache_retrieve(struct etna_compiler *compiler, struct etna_shader_variant *v); + +void +etna_disk_cache_store(struct etna_compiler *compiler, struct etna_shader_variant *v); + +#endif diff --git a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_nir.c b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_nir.c new file mode 100644 index 000000000..a92f96972 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_nir.c @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2019 Zodiac Inflight Innovations + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jonathan Marek <jonathan@marek.ca> + */ + +#include "etnaviv_nir.h" + +/* io related lowering + * run after lower_int_to_float because it adds i2f/f2i ops + */ +void +etna_lower_io(nir_shader *shader, struct etna_shader_variant *v) +{ + nir_foreach_function(function, shader) { + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + switch (intr->intrinsic) { + case nir_intrinsic_load_front_face: { + /* HW front_face is 0.0/1.0, not 0/~0u for bool + * lower with a comparison with 0 + */ + intr->dest.ssa.bit_size = 32; + + b.cursor = nir_after_instr(instr); + + nir_ssa_def *ssa = nir_ine(&b, &intr->dest.ssa, nir_imm_int(&b, 0)); + if (v->key.front_ccw) + nir_instr_as_alu(ssa->parent_instr)->op = nir_op_ieq; + + nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, + ssa, + ssa->parent_instr); + } break; + case nir_intrinsic_store_deref: { + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + if (shader->info.stage != MESA_SHADER_FRAGMENT || !v->key.frag_rb_swap) + break; + + assert(deref->deref_type == nir_deref_type_var); + + if (deref->var->data.location != FRAG_RESULT_COLOR && + deref->var->data.location != FRAG_RESULT_DATA0) + break; + + b.cursor = nir_before_instr(instr); + + nir_ssa_def *ssa = nir_mov(&b, intr->src[1].ssa); + nir_alu_instr *alu = nir_instr_as_alu(ssa->parent_instr); + alu->src[0].swizzle[0] = 2; + alu->src[0].swizzle[2] = 0; + nir_instr_rewrite_src(instr, &intr->src[1], nir_src_for_ssa(ssa)); + } break; + case nir_intrinsic_load_vertex_id: + case nir_intrinsic_load_instance_id: + /* detect use of vertex_id/instance_id */ + v->vs_id_in_reg = v->infile.num_reg; + break; + default: + break; + } + } + + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + nir_src *coord = NULL; + nir_src *lod_bias = NULL; + unsigned lod_bias_idx; + + assert(tex->sampler_index == tex->texture_index); + + for (unsigned i = 0; i < tex->num_srcs; i++) { + switch (tex->src[i].src_type) { + case nir_tex_src_coord: + coord = &tex->src[i].src; + break; + case nir_tex_src_bias: + case nir_tex_src_lod: + assert(!lod_bias); + lod_bias = &tex->src[i].src; + lod_bias_idx = i; + break; + case nir_tex_src_comparator: + break; + default: + assert(0); + break; + } + } + + /* pre HALTI5 needs texture sources in a single source */ + + if (!lod_bias || v->shader->specs->halti >= 5) + continue; + + assert(coord && lod_bias && tex->coord_components < 4); + + nir_alu_instr *vec = nir_alu_instr_create(shader, nir_op_vec4); + for (unsigned i = 0; i < tex->coord_components; i++) { + vec->src[i].src = nir_src_for_ssa(coord->ssa); + vec->src[i].swizzle[0] = i; + } + for (unsigned i = tex->coord_components; i < 4; i++) + vec->src[i].src = nir_src_for_ssa(lod_bias->ssa); + + vec->dest.write_mask = 0xf; + nir_ssa_dest_init(&vec->instr, &vec->dest.dest, 4, 32, NULL); + + nir_tex_instr_remove_src(tex, lod_bias_idx); + nir_instr_rewrite_src(&tex->instr, coord, nir_src_for_ssa(&vec->dest.dest.ssa)); + tex->coord_components = 4; + + nir_instr_insert_before(&tex->instr, &vec->instr); + } + } + } +} + +static void +etna_lower_alu_impl(nir_function_impl *impl, bool has_new_transcendentals) +{ + nir_shader *shader = impl->function->shader; + + nir_builder b; + nir_builder_init(&b, impl); + + /* in a seperate loop so we can apply the multiple-uniform logic to the new fmul */ + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + /* multiply sin/cos src by constant + * TODO: do this earlier (but it breaks const_prop opt) + */ + if (alu->op == nir_op_fsin || alu->op == nir_op_fcos) { + b.cursor = nir_before_instr(instr); + + nir_ssa_def *imm = has_new_transcendentals ? + nir_imm_float(&b, 1.0 / M_PI) : + nir_imm_float(&b, 2.0 / M_PI); + + nir_instr_rewrite_src(instr, &alu->src[0].src, + nir_src_for_ssa(nir_fmul(&b, alu->src[0].src.ssa, imm))); + } + + /* change transcendental ops to vec2 and insert vec1 mul for the result + * TODO: do this earlier (but it breaks with optimizations) + */ + if (has_new_transcendentals && ( + alu->op == nir_op_fdiv || alu->op == nir_op_flog2 || + alu->op == nir_op_fsin || alu->op == nir_op_fcos)) { + nir_ssa_def *ssa = &alu->dest.dest.ssa; + + assert(ssa->num_components == 1); + + nir_alu_instr *mul = nir_alu_instr_create(shader, nir_op_fmul); + mul->src[0].src = mul->src[1].src = nir_src_for_ssa(ssa); + mul->src[1].swizzle[0] = 1; + + mul->dest.write_mask = 1; + nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, 32, NULL); + + ssa->num_components = 2; + + mul->dest.saturate = alu->dest.saturate; + alu->dest.saturate = 0; + + nir_instr_insert_after(instr, &mul->instr); + + nir_ssa_def_rewrite_uses_after(ssa, &mul->dest.dest.ssa, + &mul->instr); + } + } + } +} + +void +etna_lower_alu(nir_shader *shader, bool has_new_transcendentals) +{ + nir_foreach_function(function, shader) { + if (function->impl) + etna_lower_alu_impl(function->impl, has_new_transcendentals); + } +} diff --git a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_nir.h b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_nir.h new file mode 100644 index 000000000..bf7bba2e6 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_nir.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2012-2015 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef H_ETNA_NIR +#define H_ETNA_NIR + +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" +#include "etnaviv_compiler.h" + +void +etna_lower_io(nir_shader *shader, struct etna_shader_variant *v); + +void +etna_lower_alu(nir_shader *shader, bool has_new_transcendentals); + +bool +etna_nir_lower_ubo_to_uniform(nir_shader *shader); + +#endif diff --git a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_nir_lower_ubo_to_uniform.c b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_nir_lower_ubo_to_uniform.c new file mode 100644 index 000000000..2b66dc087 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_nir_lower_ubo_to_uniform.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2020 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Christian Gmeiner <christian.gmeiner@gmail.com> + */ + +#include "etnaviv_nir.h" + +/* + * Pass to lower the load_ubo intrinsics for block 0 back to load_uniform intrinsics. + */ + +static bool +is_const_ubo(const nir_instr *instr, const void *_data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_load_ubo) + return false; + + if (!nir_src_is_const(intr->src[0]) || !nir_src_is_const(intr->src[1])) + return false; + + const uint32_t block = nir_src_as_uint(intr->src[0]); + if (block > 0) + return false; + + return true; +} + +static nir_ssa_def * +lower_ubo_to_uniform(nir_builder *b, nir_instr *instr, void *_data) +{ + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + b->cursor = nir_before_instr(instr); + + /* Undo the operations done in nir_lower_uniforms_to_ubo. */ + nir_ssa_def *ubo_offset = nir_ssa_for_src(b, intr->src[1], 1); + nir_ssa_def *range_base = nir_imm_int(b, nir_intrinsic_range_base(intr)); + + nir_ssa_def *uniform_offset = + nir_ushr(b, nir_isub(b, ubo_offset, range_base), nir_imm_int(b, 4)); + + nir_ssa_def *uniform = + nir_load_uniform(b, intr->num_components, intr->dest.ssa.bit_size, uniform_offset, + .base = nir_intrinsic_range_base(intr) / 16, + .range = nir_intrinsic_range(intr) / 16, + .dest_type = nir_type_float32); + + nir_ssa_def_rewrite_uses(&intr->dest.ssa, uniform); + + return uniform; +} + +bool +etna_nir_lower_ubo_to_uniform(nir_shader *shader) +{ + return nir_shader_lower_instructions(shader, + is_const_ubo, + lower_ubo_to_uniform, + NULL); + +} diff --git a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_texture_desc.c b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_texture_desc.c index fc5d799d9..b97152161 100644 --- a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_texture_desc.c +++ b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_texture_desc.c @@ -47,6 +47,7 @@ struct etna_sampler_state_desc { uint32_t SAMP_CTRL1; uint32_t SAMP_LOD_MINMAX; uint32_t SAMP_LOD_BIAS; + uint32_t SAMP_ANISOTROPY; }; static inline struct etna_sampler_state_desc * @@ -77,10 +78,13 @@ etna_create_sampler_state_desc(struct pipe_context *pipe, const struct pipe_sampler_state *ss) { struct etna_sampler_state_desc *cs = CALLOC_STRUCT(etna_sampler_state_desc); + const bool ansio = ss->max_anisotropy > 1; if (!cs) return NULL; + cs->base = *ss; + cs->SAMP_CTRL0 = VIVS_NTE_DESCRIPTOR_SAMP_CTRL0_UWRAP(translate_texture_wrapmode(ss->wrap_s)) | VIVS_NTE_DESCRIPTOR_SAMP_CTRL0_VWRAP(translate_texture_wrapmode(ss->wrap_t)) | @@ -107,6 +111,7 @@ etna_create_sampler_state_desc(struct pipe_context *pipe, cs->SAMP_LOD_BIAS = VIVS_NTE_DESCRIPTOR_SAMP_LOD_BIAS_BIAS(etna_float_to_fixp88(ss->lod_bias)) | COND(ss->lod_bias != 0.0, VIVS_NTE_DESCRIPTOR_SAMP_LOD_BIAS_ENABLE); + cs->SAMP_ANISOTROPY = COND(ansio, etna_log2_fixp88(ss->max_anisotropy)); return cs; } @@ -158,9 +163,6 @@ etna_create_sampler_view_desc(struct pipe_context *pctx, struct pipe_resource *p if (util_format_is_srgb(so->format)) sv->SAMP_CTRL1 |= VIVS_NTE_DESCRIPTOR_SAMP_CTRL1_SRGB; - if (texture_use_int_filter(so, true)) - sv->SAMP_CTRL0 |= VIVS_NTE_DESCRIPTOR_SAMP_CTRL0_INT_FILTER; - /* Create texture descriptor */ sv->bo = etna_bo_new(ctx->screen->dev, 0x100, DRM_ETNA_GEM_CACHE_WC); if (!sv->bo) @@ -290,14 +292,21 @@ etna_emit_texture_desc(struct etna_context *ctx) if ((1 << x) & active_samplers) { struct etna_sampler_state_desc *ss = etna_sampler_state_desc(ctx->sampler[x]); struct etna_sampler_view_desc *sv = etna_sampler_view_desc(ctx->sampler_view[x]); + uint32_t SAMP_CTRL0 = ss->SAMP_CTRL0 | sv->SAMP_CTRL0; + + if (texture_use_int_filter(&sv->base, &ss->base, true)) + SAMP_CTRL0 |= VIVS_NTE_DESCRIPTOR_SAMP_CTRL0_INT_FILTER; + etna_set_state(stream, VIVS_NTE_DESCRIPTOR_TX_CTRL(x), COND(sv->ts.enable, VIVS_NTE_DESCRIPTOR_TX_CTRL_TS_ENABLE) | VIVS_NTE_DESCRIPTOR_TX_CTRL_TS_MODE(sv->ts.mode) | - VIVS_NTE_DESCRIPTOR_TX_CTRL_TS_INDEX(x)); - etna_set_state(stream, VIVS_NTE_DESCRIPTOR_SAMP_CTRL0(x), ss->SAMP_CTRL0 | sv->SAMP_CTRL0); + VIVS_NTE_DESCRIPTOR_TX_CTRL_TS_INDEX(x)| + COND(sv->ts.comp, VIVS_NTE_DESCRIPTOR_TX_CTRL_COMPRESSION)); + etna_set_state(stream, VIVS_NTE_DESCRIPTOR_SAMP_CTRL0(x), SAMP_CTRL0); etna_set_state(stream, VIVS_NTE_DESCRIPTOR_SAMP_CTRL1(x), ss->SAMP_CTRL1 | sv->SAMP_CTRL1); etna_set_state(stream, VIVS_NTE_DESCRIPTOR_SAMP_LOD_MINMAX(x), ss->SAMP_LOD_MINMAX); etna_set_state(stream, VIVS_NTE_DESCRIPTOR_SAMP_LOD_BIAS(x), ss->SAMP_LOD_BIAS); + etna_set_state(stream, VIVS_NTE_DESCRIPTOR_SAMP_ANISOTROPY(x), ss->SAMP_ANISOTROPY); } } } diff --git a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_texture_state.c b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_texture_state.c index 5c50f1efe..e9e930033 100644 --- a/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_texture_state.c +++ b/lib/mesa/src/gallium/drivers/etnaviv/etnaviv_texture_state.c @@ -43,11 +43,11 @@ struct etna_sampler_state { struct pipe_sampler_state base; /* sampler offset +4*sampler, interleave when committing state */ - uint32_t TE_SAMPLER_CONFIG0; - uint32_t TE_SAMPLER_CONFIG1; - uint32_t TE_SAMPLER_LOD_CONFIG; - uint32_t TE_SAMPLER_3D_CONFIG; - uint32_t NTE_SAMPLER_BASELOD; + uint32_t config0; + uint32_t config1; + uint32_t config_lod; + uint32_t config_3d; + uint32_t baselod; unsigned min_lod, max_lod, max_lod_min; }; @@ -61,15 +61,15 @@ struct etna_sampler_view { struct pipe_sampler_view base; /* sampler offset +4*sampler, interleave when committing state */ - uint32_t TE_SAMPLER_CONFIG0; - uint32_t TE_SAMPLER_CONFIG0_MASK; - uint32_t TE_SAMPLER_CONFIG1; - uint32_t TE_SAMPLER_3D_CONFIG; - uint32_t TE_SAMPLER_SIZE; - uint32_t TE_SAMPLER_LOG_SIZE; - uint32_t TE_SAMPLER_ASTC0; - uint32_t TE_SAMPLER_LINEAR_STRIDE[VIVS_TE_SAMPLER_LINEAR_STRIDE__LEN]; - struct etna_reloc TE_SAMPLER_LOD_ADDR[VIVS_TE_SAMPLER_LOD_ADDR__LEN]; + uint32_t config0; + uint32_t config0_mask; + uint32_t config1; + uint32_t config_3d; + uint32_t size; + uint32_t log_size; + uint32_t astc0; + uint32_t linear_stride; /* only LOD0 */ + struct etna_reloc lod_addr[VIVS_TE_SAMPLER_LOD_ADDR__LEN]; unsigned min_lod, max_lod; /* 5.5 fixp */ struct etna_sampler_ts ts; @@ -89,40 +89,44 @@ etna_create_sampler_state_state(struct pipe_context *pipe, struct etna_context *ctx = etna_context(pipe); struct etna_screen *screen = ctx->screen; const bool ansio = ss->max_anisotropy > 1; + const bool mipmap = ss->min_mip_filter != PIPE_TEX_MIPFILTER_NONE; if (!cs) return NULL; - cs->TE_SAMPLER_CONFIG0 = + cs->base = *ss; + + cs->config0 = VIVS_TE_SAMPLER_CONFIG0_UWRAP(translate_texture_wrapmode(ss->wrap_s)) | VIVS_TE_SAMPLER_CONFIG0_VWRAP(translate_texture_wrapmode(ss->wrap_t)) | VIVS_TE_SAMPLER_CONFIG0_MIN(translate_texture_filter(ss->min_img_filter)) | VIVS_TE_SAMPLER_CONFIG0_MIP(translate_texture_mipfilter(ss->min_mip_filter)) | - VIVS_TE_SAMPLER_CONFIG0_MAG(translate_texture_filter(ss->mag_img_filter)); + VIVS_TE_SAMPLER_CONFIG0_MAG(translate_texture_filter(ss->mag_img_filter)) | + VIVS_TE_SAMPLER_CONFIG0_ANISOTROPY(COND(ansio, etna_log2_fixp55(ss->max_anisotropy))); /* ROUND_UV improves precision - but not compatible with NEAREST filter */ if (ss->min_img_filter != PIPE_TEX_FILTER_NEAREST && ss->mag_img_filter != PIPE_TEX_FILTER_NEAREST) { - cs->TE_SAMPLER_CONFIG0 |= VIVS_TE_SAMPLER_CONFIG0_ROUND_UV; + cs->config0 |= VIVS_TE_SAMPLER_CONFIG0_ROUND_UV; } - cs->TE_SAMPLER_CONFIG1 = screen->specs.seamless_cube_map ? + cs->config1 = screen->specs.seamless_cube_map ? COND(ss->seamless_cube_map, VIVS_TE_SAMPLER_CONFIG1_SEAMLESS_CUBE_MAP) : 0; - cs->TE_SAMPLER_LOD_CONFIG = - COND(ss->lod_bias != 0.0, VIVS_TE_SAMPLER_LOD_CONFIG_BIAS_ENABLE) | + cs->config_lod = + COND(ss->lod_bias != 0.0 && mipmap, VIVS_TE_SAMPLER_LOD_CONFIG_BIAS_ENABLE) | VIVS_TE_SAMPLER_LOD_CONFIG_BIAS(etna_float_to_fixp55(ss->lod_bias)); - cs->TE_SAMPLER_3D_CONFIG = + cs->config_3d = VIVS_TE_SAMPLER_3D_CONFIG_WRAP(translate_texture_wrapmode(ss->wrap_r)); - if (ss->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { + if (mipmap) { cs->min_lod = etna_float_to_fixp55(ss->min_lod); cs->max_lod = etna_float_to_fixp55(ss->max_lod); } else { /* when not mipmapping, we need to set max/min lod so that always * lowest LOD is selected */ - cs->min_lod = cs->max_lod = etna_float_to_fixp55(ss->min_lod); + cs->min_lod = cs->max_lod = etna_float_to_fixp55(0.0f); } /* if max_lod is 0, MIN filter will never be used (GC3000) @@ -131,7 +135,7 @@ etna_create_sampler_state_state(struct pipe_context *pipe, */ cs->max_lod_min = (ss->min_img_filter != ss->mag_img_filter) ? 1 : 0; - cs->NTE_SAMPLER_BASELOD = + cs->baselod = COND(ss->compare_mode, VIVS_NTE_SAMPLER_BASELOD_COMPARE_ENABLE) | VIVS_NTE_SAMPLER_BASELOD_COMPARE_FUNC(translate_texture_compare(ss->compare_func)); @@ -150,6 +154,7 @@ etna_create_sampler_view_state(struct pipe_context *pctx, struct pipe_resource * { struct etna_sampler_view *sv = CALLOC_STRUCT(etna_sampler_view); struct etna_context *ctx = etna_context(pctx); + struct etna_screen *screen = ctx->screen; const uint32_t format = translate_texture_format(so->format); const bool ext = !!(format & EXT_FORMAT); const bool astc = !!(format & ASTC_FORMAT); @@ -174,10 +179,10 @@ etna_create_sampler_view_state(struct pipe_context *pctx, struct pipe_resource * sv->base.context = pctx; /* merged with sampler state */ - sv->TE_SAMPLER_CONFIG0 = + sv->config0 = VIVS_TE_SAMPLER_CONFIG0_TYPE(translate_texture_target(sv->base.target)) | COND(!ext && !astc, VIVS_TE_SAMPLER_CONFIG0_FORMAT(format)); - sv->TE_SAMPLER_CONFIG0_MASK = 0xffffffff; + sv->config0_mask = 0xffffffff; uint32_t base_height = res->base.height0; uint32_t base_depth = res->base.depth0; @@ -188,9 +193,9 @@ etna_create_sampler_view_state(struct pipe_context *pctx, struct pipe_resource * /* use 2D texture with T wrap to repeat for 1D texture * TODO: check if old HW supports 1D texture */ - sv->TE_SAMPLER_CONFIG0_MASK = ~VIVS_TE_SAMPLER_CONFIG0_VWRAP__MASK; - sv->TE_SAMPLER_CONFIG0 &= ~VIVS_TE_SAMPLER_CONFIG0_TYPE__MASK; - sv->TE_SAMPLER_CONFIG0 |= + sv->config0_mask = ~VIVS_TE_SAMPLER_CONFIG0_VWRAP__MASK; + sv->config0 &= ~VIVS_TE_SAMPLER_CONFIG0_TYPE__MASK; + sv->config0 |= VIVS_TE_SAMPLER_CONFIG0_TYPE(TEXTURE_TYPE_2D) | VIVS_TE_SAMPLER_CONFIG0_VWRAP(TEXTURE_WRAPMODE_REPEAT); break; @@ -207,54 +212,52 @@ etna_create_sampler_view_state(struct pipe_context *pctx, struct pipe_resource * } if (res->layout == ETNA_LAYOUT_LINEAR && !util_format_is_compressed(so->format)) { - sv->TE_SAMPLER_CONFIG0 |= VIVS_TE_SAMPLER_CONFIG0_ADDRESSING_MODE(TEXTURE_ADDRESSING_MODE_LINEAR); - - for (int lod = 0; lod <= res->base.last_level; ++lod) - sv->TE_SAMPLER_LINEAR_STRIDE[lod] = res->levels[lod].stride; + sv->config0 |= VIVS_TE_SAMPLER_CONFIG0_ADDRESSING_MODE(TEXTURE_ADDRESSING_MODE_LINEAR); + assert(res->base.last_level == 0); + sv->linear_stride = res->levels[0].stride; } else { - sv->TE_SAMPLER_CONFIG0 |= VIVS_TE_SAMPLER_CONFIG0_ADDRESSING_MODE(TEXTURE_ADDRESSING_MODE_TILED); - memset(&sv->TE_SAMPLER_LINEAR_STRIDE, 0, sizeof(sv->TE_SAMPLER_LINEAR_STRIDE)); + sv->config0 |= VIVS_TE_SAMPLER_CONFIG0_ADDRESSING_MODE(TEXTURE_ADDRESSING_MODE_TILED); + sv->linear_stride = 0; } - sv->TE_SAMPLER_CONFIG1 |= COND(ext, VIVS_TE_SAMPLER_CONFIG1_FORMAT_EXT(format)) | - COND(astc, VIVS_TE_SAMPLER_CONFIG1_FORMAT_EXT(TEXTURE_FORMAT_EXT_ASTC)) | - COND(is_array, VIVS_TE_SAMPLER_CONFIG1_TEXTURE_ARRAY) | - VIVS_TE_SAMPLER_CONFIG1_HALIGN(res->halign) | swiz; - sv->TE_SAMPLER_ASTC0 = COND(astc, VIVS_NTE_SAMPLER_ASTC0_ASTC_FORMAT(format)) | - COND(astc && srgb, VIVS_NTE_SAMPLER_ASTC0_ASTC_SRGB) | - VIVS_NTE_SAMPLER_ASTC0_UNK8(0xc) | - VIVS_NTE_SAMPLER_ASTC0_UNK16(0xc) | - VIVS_NTE_SAMPLER_ASTC0_UNK24(0xc); - sv->TE_SAMPLER_SIZE = VIVS_TE_SAMPLER_SIZE_WIDTH(res->base.width0) | - VIVS_TE_SAMPLER_SIZE_HEIGHT(base_height); - sv->TE_SAMPLER_LOG_SIZE = + sv->config1 |= COND(ext, VIVS_TE_SAMPLER_CONFIG1_FORMAT_EXT(format)) | + COND(astc, VIVS_TE_SAMPLER_CONFIG1_FORMAT_EXT(TEXTURE_FORMAT_EXT_ASTC)) | + COND(is_array, VIVS_TE_SAMPLER_CONFIG1_TEXTURE_ARRAY) | + VIVS_TE_SAMPLER_CONFIG1_HALIGN(res->halign) | swiz; + sv->astc0 = COND(astc, VIVS_NTE_SAMPLER_ASTC0_ASTC_FORMAT(format)) | + COND(astc && srgb, VIVS_NTE_SAMPLER_ASTC0_ASTC_SRGB) | + VIVS_NTE_SAMPLER_ASTC0_UNK8(0xc) | + VIVS_NTE_SAMPLER_ASTC0_UNK16(0xc) | + VIVS_NTE_SAMPLER_ASTC0_UNK24(0xc); + sv->size = VIVS_TE_SAMPLER_SIZE_WIDTH(res->base.width0) | + VIVS_TE_SAMPLER_SIZE_HEIGHT(base_height); + sv->log_size = VIVS_TE_SAMPLER_LOG_SIZE_WIDTH(etna_log2_fixp55(res->base.width0)) | VIVS_TE_SAMPLER_LOG_SIZE_HEIGHT(etna_log2_fixp55(base_height)) | COND(util_format_is_srgb(so->format) && !astc, VIVS_TE_SAMPLER_LOG_SIZE_SRGB) | - COND(astc, VIVS_TE_SAMPLER_LOG_SIZE_ASTC) | - COND(texture_use_int_filter(so, false), VIVS_TE_SAMPLER_LOG_SIZE_INT_FILTER); - sv->TE_SAMPLER_3D_CONFIG = + COND(astc, VIVS_TE_SAMPLER_LOG_SIZE_ASTC); + sv->config_3d = VIVS_TE_SAMPLER_3D_CONFIG_DEPTH(base_depth) | VIVS_TE_SAMPLER_3D_CONFIG_LOG_DEPTH(etna_log2_fixp55(base_depth)); /* Set up levels-of-detail */ for (int lod = 0; lod <= res->base.last_level; ++lod) { - sv->TE_SAMPLER_LOD_ADDR[lod].bo = res->bo; - sv->TE_SAMPLER_LOD_ADDR[lod].offset = res->levels[lod].offset; - sv->TE_SAMPLER_LOD_ADDR[lod].flags = ETNA_RELOC_READ; + sv->lod_addr[lod].bo = res->bo; + sv->lod_addr[lod].offset = res->levels[lod].offset; + sv->lod_addr[lod].flags = ETNA_RELOC_READ; } sv->min_lod = sv->base.u.tex.first_level << 5; sv->max_lod = MIN2(sv->base.u.tex.last_level, res->base.last_level) << 5; /* Workaround for npot textures -- it appears that only CLAMP_TO_EDGE is * supported when the appropriate capability is not set. */ - if (!ctx->specs.npot_tex_any_wrap && + if (!screen->specs.npot_tex_any_wrap && (!util_is_power_of_two_or_zero(res->base.width0) || !util_is_power_of_two_or_zero(res->base.height0))) { - sv->TE_SAMPLER_CONFIG0_MASK = ~(VIVS_TE_SAMPLER_CONFIG0_UWRAP__MASK | - VIVS_TE_SAMPLER_CONFIG0_VWRAP__MASK); - sv->TE_SAMPLER_CONFIG0 |= + sv->config0_mask = ~(VIVS_TE_SAMPLER_CONFIG0_UWRAP__MASK | + VIVS_TE_SAMPLER_CONFIG0_VWRAP__MASK); + sv->config0 |= VIVS_TE_SAMPLER_CONFIG0_UWRAP(TEXTURE_WRAPMODE_CLAMP_TO_EDGE) | VIVS_TE_SAMPLER_CONFIG0_VWRAP(TEXTURE_WRAPMODE_CLAMP_TO_EDGE); } @@ -279,9 +282,8 @@ etna_sampler_view_state_destroy(struct pipe_context *pctx, #define EMIT_STATE_RELOC(state_name, src_value) \ etna_coalsence_emit_reloc(stream, &coalesce, VIVS_##state_name, src_value) -/* Emit plain (non-descriptor) texture state */ static void -etna_emit_texture_state(struct etna_context *ctx) +etna_emit_ts_state(struct etna_context *ctx) { struct etna_cmd_stream *stream = ctx->stream; uint32_t active_samplers = active_samplers_bits(ctx); @@ -316,6 +318,155 @@ etna_emit_texture_state(struct etna_context *ctx) } } } + + etna_coalesce_end(stream, &coalesce); +} + +static void +etna_emit_new_texture_state(struct etna_context *ctx) +{ + struct etna_cmd_stream *stream = ctx->stream; + struct etna_screen *screen = ctx->screen; + uint32_t active_samplers = active_samplers_bits(ctx); + uint32_t dirty = ctx->dirty; + struct etna_coalesce coalesce; + + etna_emit_ts_state(ctx); + + etna_coalesce_start(stream, &coalesce); + + if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS | ETNA_DIRTY_SAMPLERS))) { + for (int x = 0; x < VIVS_NTE_SAMPLER__LEN; ++x) { + uint32_t val = 0; /* 0 == sampler inactive */ + + /* set active samplers to their configuration value (determined by both + * the sampler state and sampler view) */ + if ((1 << x) & active_samplers) { + struct etna_sampler_state *ss = etna_sampler_state(ctx->sampler[x]); + struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]); + + val = (ss->config0 & sv->config0_mask) | sv->config0; + } + + /*10000*/ EMIT_STATE(NTE_SAMPLER_CONFIG0(x), val); + } + } + if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS))) { + struct etna_sampler_state *ss; + struct etna_sampler_view *sv; + + for (int x = 0; x < VIVS_NTE_SAMPLER__LEN; ++x) { + if ((1 << x) & active_samplers) { + sv = etna_sampler_view(ctx->sampler_view[x]); + /*10080*/ EMIT_STATE(NTE_SAMPLER_SIZE(x), sv->size); + } + } + for (int x = 0; x < VIVS_NTE_SAMPLER__LEN; ++x) { + if ((1 << x) & active_samplers) { + ss = etna_sampler_state(ctx->sampler[x]); + sv = etna_sampler_view(ctx->sampler_view[x]); + uint32_t log_size = sv->log_size; + + if (texture_use_int_filter(&sv->base, &ss->base, false)) + log_size |= VIVS_TE_SAMPLER_LOG_SIZE_INT_FILTER; + + /*10100*/ EMIT_STATE(NTE_SAMPLER_LOG_SIZE(x), log_size); + } + } + } + if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS | ETNA_DIRTY_SAMPLERS))) { + struct etna_sampler_state *ss; + struct etna_sampler_view *sv; + + for (int x = 0; x < VIVS_NTE_SAMPLER__LEN; ++x) { + if ((1 << x) & active_samplers) { + ss = etna_sampler_state(ctx->sampler[x]); + sv = etna_sampler_view(ctx->sampler_view[x]); + + unsigned max_lod = MAX2(MIN2(ss->max_lod + sv->min_lod, sv->max_lod), ss->max_lod_min); + unsigned min_lod = MIN2(MAX2(ss->min_lod + sv->min_lod, sv->min_lod), max_lod); + + /* min and max lod is determined both by the sampler and the view */ + /*10180*/ EMIT_STATE(NTE_SAMPLER_LOD_CONFIG(x), + ss->config_lod | + VIVS_TE_SAMPLER_LOD_CONFIG_MAX(max_lod) | + VIVS_TE_SAMPLER_LOD_CONFIG_MIN(min_lod)); + } + } + if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS))) { + /* only LOD0 is valid for this register */ + for (int x = 0; x < VIVS_NTE_SAMPLER__LEN; ++x) { + if ((1 << x) & active_samplers) { + struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]); + /*10280*/ EMIT_STATE(NTE_SAMPLER_LINEAR_STRIDE(0, x), sv->linear_stride); + } + } + } + for (int x = 0; x < VIVS_NTE_SAMPLER__LEN; ++x) { + if ((1 << x) & active_samplers) { + ss = etna_sampler_state(ctx->sampler[x]); + sv = etna_sampler_view(ctx->sampler_view[x]); + + /*10300*/ EMIT_STATE(NTE_SAMPLER_3D_CONFIG(x), ss->config_3d | + sv->config_3d); + } + } + for (int x = 0; x < VIVS_NTE_SAMPLER__LEN; ++x) { + if ((1 << x) & active_samplers) { + ss = etna_sampler_state(ctx->sampler[x]); + sv = etna_sampler_view(ctx->sampler_view[x]); + + /*10380*/ EMIT_STATE(NTE_SAMPLER_CONFIG1(x), ss->config1 | + sv->config1 | + COND(sv->ts.enable, VIVS_TE_SAMPLER_CONFIG1_USE_TS)); + } + } + } + if (unlikely(screen->specs.tex_astc && (dirty & (ETNA_DIRTY_SAMPLER_VIEWS)))) { + for (int x = 0; x < VIVS_NTE_SAMPLER__LEN; ++x) { + if ((1 << x) & active_samplers) { + struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]); + /*10500*/ EMIT_STATE(NTE_SAMPLER_ASTC0(x), sv->astc0); + } + } + } + if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS))) { + for (int x = 0; x < VIVS_NTE_SAMPLER__LEN; ++x) { + if ((1 << x) & active_samplers) { + struct etna_sampler_state *ss = etna_sampler_state(ctx->sampler[x]); + /*10700*/ EMIT_STATE(NTE_SAMPLER_BASELOD(x), ss->baselod); + } + } + } + + if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS))) { + for (int x = 0; x < VIVS_NTE_SAMPLER__LEN; ++x) { + if ((1 << x) & active_samplers) { + for (int y = 0; y < VIVS_NTE_SAMPLER_ADDR_LOD__LEN; ++y) { + struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]); + /*10800*/ EMIT_STATE_RELOC(NTE_SAMPLER_ADDR_LOD(x, y), &sv->lod_addr[y]); + } + } + } + } + + etna_coalesce_end(stream, &coalesce); +} + +/* Emit plain (non-descriptor) texture state */ +static void +etna_emit_texture_state(struct etna_context *ctx) +{ + struct etna_cmd_stream *stream = ctx->stream; + struct etna_screen *screen = ctx->screen; + uint32_t active_samplers = active_samplers_bits(ctx); + uint32_t dirty = ctx->dirty; + struct etna_coalesce coalesce; + + etna_emit_ts_state(ctx); + + etna_coalesce_start(stream, &coalesce); + if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS | ETNA_DIRTY_SAMPLERS))) { for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) { uint32_t val = 0; /* 0 == sampler inactive */ @@ -326,26 +477,32 @@ etna_emit_texture_state(struct etna_context *ctx) struct etna_sampler_state *ss = etna_sampler_state(ctx->sampler[x]); struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]); - val = (ss->TE_SAMPLER_CONFIG0 & sv->TE_SAMPLER_CONFIG0_MASK) | - sv->TE_SAMPLER_CONFIG0; + val = (ss->config0 & sv->config0_mask) | sv->config0; } /*02000*/ EMIT_STATE(TE_SAMPLER_CONFIG0(x), val); } } if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS))) { + struct etna_sampler_state *ss; struct etna_sampler_view *sv; for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) { if ((1 << x) & active_samplers) { sv = etna_sampler_view(ctx->sampler_view[x]); - /*02040*/ EMIT_STATE(TE_SAMPLER_SIZE(x), sv->TE_SAMPLER_SIZE); + /*02040*/ EMIT_STATE(TE_SAMPLER_SIZE(x), sv->size); } } for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) { if ((1 << x) & active_samplers) { + ss = etna_sampler_state(ctx->sampler[x]); sv = etna_sampler_view(ctx->sampler_view[x]); - /*02080*/ EMIT_STATE(TE_SAMPLER_LOG_SIZE(x), sv->TE_SAMPLER_LOG_SIZE); + uint32_t log_size = sv->log_size; + + if (texture_use_int_filter(&sv->base, &ss->base, false)) + log_size |= VIVS_TE_SAMPLER_LOG_SIZE_INT_FILTER; + + /*02080*/ EMIT_STATE(TE_SAMPLER_LOG_SIZE(x), log_size); } } } @@ -358,13 +515,14 @@ etna_emit_texture_state(struct etna_context *ctx) ss = etna_sampler_state(ctx->sampler[x]); sv = etna_sampler_view(ctx->sampler_view[x]); - unsigned max_lod = MAX2(MIN2(ss->max_lod, sv->max_lod), ss->max_lod_min); + unsigned max_lod = MAX2(MIN2(ss->max_lod + sv->min_lod, sv->max_lod), ss->max_lod_min); + unsigned min_lod = MIN2(MAX2(ss->min_lod + sv->min_lod, sv->min_lod), max_lod); /* min and max lod is determined both by the sampler and the view */ /*020C0*/ EMIT_STATE(TE_SAMPLER_LOD_CONFIG(x), - ss->TE_SAMPLER_LOD_CONFIG | + ss->config_lod | VIVS_TE_SAMPLER_LOD_CONFIG_MAX(max_lod) | - VIVS_TE_SAMPLER_LOD_CONFIG_MIN(MAX2(ss->min_lod, sv->min_lod))); + VIVS_TE_SAMPLER_LOD_CONFIG_MIN(min_lod)); } } for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) { @@ -372,8 +530,8 @@ etna_emit_texture_state(struct etna_context *ctx) ss = etna_sampler_state(ctx->sampler[x]); sv = etna_sampler_view(ctx->sampler_view[x]); - /*02180*/ EMIT_STATE(TE_SAMPLER_3D_CONFIG(x), ss->TE_SAMPLER_3D_CONFIG | - sv->TE_SAMPLER_3D_CONFIG); + /*02180*/ EMIT_STATE(TE_SAMPLER_3D_CONFIG(x), ss->config_3d | + sv->config_3d); } } for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) { @@ -381,8 +539,8 @@ etna_emit_texture_state(struct etna_context *ctx) ss = etna_sampler_state(ctx->sampler[x]); sv = etna_sampler_view(ctx->sampler_view[x]); - /*021C0*/ EMIT_STATE(TE_SAMPLER_CONFIG1(x), ss->TE_SAMPLER_CONFIG1 | - sv->TE_SAMPLER_CONFIG1 | + /*021C0*/ EMIT_STATE(TE_SAMPLER_CONFIG1(x), ss->config1 | + sv->config1 | COND(sv->ts.enable, VIVS_TE_SAMPLER_CONFIG1_USE_TS)); } } @@ -392,37 +550,29 @@ etna_emit_texture_state(struct etna_context *ctx) for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) { if ((1 << x) & active_samplers) { struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]); - /*02400*/ EMIT_STATE_RELOC(TE_SAMPLER_LOD_ADDR(x, y),&sv->TE_SAMPLER_LOD_ADDR[y]); + /*02400*/ EMIT_STATE_RELOC(TE_SAMPLER_LOD_ADDR(x, y), &sv->lod_addr[y]); } } } } if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS))) { - for (int y = 0; y < VIVS_TE_SAMPLER_LINEAR_STRIDE__LEN; ++y) { - for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) { - if ((1 << x) & active_samplers) { - struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]); - /*02C00*/ EMIT_STATE(TE_SAMPLER_LINEAR_STRIDE(x, y), sv->TE_SAMPLER_LINEAR_STRIDE[y]); - } - } - } - } - if (unlikely(ctx->specs.tex_astc && (dirty & (ETNA_DIRTY_SAMPLER_VIEWS)))) { + /* only LOD0 is valid for this register */ for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) { if ((1 << x) & active_samplers) { struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]); - /*10500*/ EMIT_STATE(NTE_SAMPLER_ASTC0(x), sv->TE_SAMPLER_ASTC0); + /*02C00*/ EMIT_STATE(TE_SAMPLER_LINEAR_STRIDE(0, x), sv->linear_stride); } } } - if (unlikely(ctx->specs.halti >= 1 && (dirty & (ETNA_DIRTY_SAMPLER_VIEWS)))) { + if (unlikely(screen->specs.tex_astc && (dirty & (ETNA_DIRTY_SAMPLER_VIEWS)))) { for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) { if ((1 << x) & active_samplers) { - struct etna_sampler_state *ss = etna_sampler_state(ctx->sampler[x]); - /*10700*/ EMIT_STATE(NTE_SAMPLER_BASELOD(x), ss->NTE_SAMPLER_BASELOD); + struct etna_sampler_view *sv = etna_sampler_view(ctx->sampler_view[x]); + /*10500*/ EMIT_STATE(NTE_SAMPLER_ASTC0(x), sv->astc0); } } } + etna_coalesce_end(stream, &coalesce); } @@ -446,6 +596,12 @@ etna_texture_state_init(struct pipe_context *pctx) ctx->base.delete_sampler_state = etna_delete_sampler_state_state; ctx->base.create_sampler_view = etna_create_sampler_view_state; ctx->base.sampler_view_destroy = etna_sampler_view_state_destroy; - ctx->emit_texture_state = etna_emit_texture_state; ctx->ts_for_sampler_view = etna_ts_for_sampler_view_state; + + STATIC_ASSERT(VIVS_TE_SAMPLER_LOD_ADDR__LEN == VIVS_NTE_SAMPLER_ADDR_LOD__LEN); + + if (ctx->screen->specs.halti >= 1) + ctx->emit_texture_state = etna_emit_new_texture_state; + else + ctx->emit_texture_state = etna_emit_texture_state; } diff --git a/lib/mesa/src/gallium/drivers/etnaviv/hw/common_3d.xml.h b/lib/mesa/src/gallium/drivers/etnaviv/hw/common_3d.xml.h index 8308bffa9..c8e03646b 100644 --- a/lib/mesa/src/gallium/drivers/etnaviv/hw/common_3d.xml.h +++ b/lib/mesa/src/gallium/drivers/etnaviv/hw/common_3d.xml.h @@ -8,12 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- texdesc_3d.xml ( 3183 bytes, from 2019-08-09 17:33:50) -- copyright.xml ( 1597 bytes, from 2019-08-09 17:34:08) -- common.xml ( 35468 bytes, from 2019-08-09 17:16:20) -- common_3d.xml ( 14991 bytes, from 2019-09-12 20:32:47) +- texdesc_3d.xml ( 3183 bytes, from 2018-02-10 13:09:26) +- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26) +- common.xml ( 35468 bytes, from 2020-01-04 20:02:31) +- common_3d.xml ( 15058 bytes, from 2020-04-17 16:31:50) -Copyright (C) 2012-2019 by the following authors: +Copyright (C) 2012-2020 by the following authors: - Wladimir J. van der Laan <laanwj@gmail.com> - Christian Gmeiner <christian.gmeiner@gmail.com> - Lucas Stach <l.stach@pengutronix.de> diff --git a/lib/mesa/src/gallium/drivers/etnaviv/hw/state_blt.xml.h b/lib/mesa/src/gallium/drivers/etnaviv/hw/state_blt.xml.h index d01a5fc4d..060a56bcc 100644 --- a/lib/mesa/src/gallium/drivers/etnaviv/hw/state_blt.xml.h +++ b/lib/mesa/src/gallium/drivers/etnaviv/hw/state_blt.xml.h @@ -8,17 +8,17 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- state.xml ( 26666 bytes, from 2019-08-19 14:35:07) -- common.xml ( 35468 bytes, from 2019-01-07 09:52:31) -- common_3d.xml ( 14322 bytes, from 2019-08-19 14:35:07) -- state_hi.xml ( 30232 bytes, from 2019-01-07 09:52:31) -- copyright.xml ( 1597 bytes, from 2019-01-07 09:52:31) -- state_2d.xml ( 51552 bytes, from 2019-01-07 09:52:31) -- state_3d.xml ( 83505 bytes, from 2019-08-19 14:46:17) -- state_blt.xml ( 14252 bytes, from 2019-08-19 14:35:07) -- state_vg.xml ( 5975 bytes, from 2019-01-07 09:52:31) - -Copyright (C) 2012-2019 by the following authors: +- state.xml ( 26877 bytes, from 2020-02-14 10:19:56) +- common.xml ( 35468 bytes, from 2020-01-04 20:02:31) +- common_3d.xml ( 15058 bytes, from 2020-04-17 16:31:50) +- state_hi.xml ( 34851 bytes, from 2020-04-17 16:25:34) +- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26) +- state_2d.xml ( 51552 bytes, from 2018-02-10 13:09:26) +- state_3d.xml ( 83771 bytes, from 2020-04-17 17:15:55) +- state_blt.xml ( 14252 bytes, from 2020-01-10 14:36:29) +- state_vg.xml ( 5975 bytes, from 2018-02-10 13:09:26) + +Copyright (C) 2012-2020 by the following authors: - Wladimir J. van der Laan <laanwj@gmail.com> - Christian Gmeiner <christian.gmeiner@gmail.com> - Lucas Stach <l.stach@pengutronix.de> diff --git a/lib/mesa/src/gallium/drivers/etnaviv/hw/texdesc_3d.xml.h b/lib/mesa/src/gallium/drivers/etnaviv/hw/texdesc_3d.xml.h index 0226c2e3c..7c1a1cfab 100644 --- a/lib/mesa/src/gallium/drivers/etnaviv/hw/texdesc_3d.xml.h +++ b/lib/mesa/src/gallium/drivers/etnaviv/hw/texdesc_3d.xml.h @@ -8,12 +8,12 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- texdesc_3d.xml ( 3183 bytes, from 2019-08-09 17:33:50) -- copyright.xml ( 1597 bytes, from 2019-08-09 17:34:08) -- common.xml ( 35468 bytes, from 2019-08-09 17:16:20) -- common_3d.xml ( 14991 bytes, from 2019-09-12 20:32:47) +- texdesc_3d.xml ( 3183 bytes, from 2018-02-10 13:09:26) +- copyright.xml ( 1597 bytes, from 2018-02-10 13:09:26) +- common.xml ( 35468 bytes, from 2020-01-04 20:02:31) +- common_3d.xml ( 15058 bytes, from 2020-04-17 16:31:50) -Copyright (C) 2012-2019 by the following authors: +Copyright (C) 2012-2018 by the following authors: - Wladimir J. van der Laan <laanwj@gmail.com> - Christian Gmeiner <christian.gmeiner@gmail.com> - Lucas Stach <l.stach@pengutronix.de> diff --git a/lib/mesa/src/gallium/drivers/etnaviv/meson.build b/lib/mesa/src/gallium/drivers/etnaviv/meson.build index e62a37e58..f8dcba887 100644 --- a/lib/mesa/src/gallium/drivers/etnaviv/meson.build +++ b/lib/mesa/src/gallium/drivers/etnaviv/meson.build @@ -35,15 +35,20 @@ files_etnaviv = files( 'etnaviv_blt.h', 'etnaviv_clear_blit.c', 'etnaviv_clear_blit.h', + 'etnaviv_compiler.c', 'etnaviv_compiler.h', 'etnaviv_compiler_nir.c', - 'etnaviv_compiler_nir_emit.h', + 'etnaviv_compiler_nir_emit.c', + 'etnaviv_compiler_nir_liveness.c', + 'etnaviv_compiler_nir_ra.c', 'etnaviv_compiler_tgsi.c', 'etnaviv_context.c', 'etnaviv_context.h', 'etnaviv_debug.h', 'etnaviv_disasm.c', 'etnaviv_disasm.h', + 'etnaviv_disk_cache.c', + 'etnaviv_disk_cache.h', 'etnaviv_emit.c', 'etnaviv_emit.h', 'etnaviv_etc2.c', @@ -53,14 +58,19 @@ files_etnaviv = files( 'etnaviv_format.c', 'etnaviv_format.h', 'etnaviv_internal.h', + 'etnaviv_nir_lower_ubo_to_uniform.c', + 'etnaviv_nir.c', + 'etnaviv_nir.h', + 'etnaviv_perfmon.c', + 'etnaviv_perfmon.h', 'etnaviv_query.c', 'etnaviv_query.h', - 'etnaviv_query_hw.c', - 'etnaviv_query_hw.h', + 'etnaviv_query_acc_occlusion.c', + 'etnaviv_query_acc_perfmon.c', + 'etnaviv_query_acc.c', + 'etnaviv_query_acc.h', 'etnaviv_query_sw.c', 'etnaviv_query_sw.h', - 'etnaviv_query_pm.c', - 'etnaviv_query_pm.h', 'etnaviv_rasterizer.c', 'etnaviv_rasterizer.h', 'etnaviv_resource.c', @@ -96,12 +106,12 @@ files_etnaviv = files( libetnaviv = static_library( 'etnaviv', files_etnaviv, - c_args : [c_vis_args], + gnu_symbol_visibility : 'hidden', include_directories : [ inc_include, inc_src, inc_gallium, inc_gallium_aux, inc_etnaviv, ], link_with: libetnaviv_drm, - dependencies : [dep_libdrm, idep_nir_headers], + dependencies : [dep_libdrm, idep_nir_headers, idep_mesautil], ) etnaviv_compiler = executable( @@ -121,3 +131,19 @@ driver_etnaviv = declare_dependency( link_with : [libetnaviv, libetnavivdrm], dependencies : idep_nir, ) + +if with_tests + test( + 'lower_ubo', + executable( + 'nir_lower_ubo_test', + files('tests/lower_ubo_tests.cpp'), + cpp_args : [cpp_msvc_compat_args], + gnu_symbol_visibility : 'hidden', + include_directories : [inc_include, inc_src, inc_gallium], + link_with : [libetnaviv], + dependencies : [idep_gtest, idep_nir], + ), + suite : ['compiler', 'etnaviv'], + ) +endif diff --git a/lib/mesa/src/gallium/drivers/etnaviv/tests/lower_ubo_tests.cpp b/lib/mesa/src/gallium/drivers/etnaviv/tests/lower_ubo_tests.cpp new file mode 100644 index 000000000..24342b288 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/etnaviv/tests/lower_ubo_tests.cpp @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2020 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Christian Gmeiner <christian.gmeiner@gmail.com> + */ + +#include <gtest/gtest.h> + +#include "nir.h" +#include "nir_builder.h" + +extern "C" { + /* we really do not want to include etnaviv_nir.h as it makes it + * harder to get this test compiling. as we are only working with + * nir we do not need any etnaviv specifc stuff here. */ + + extern bool + etna_nir_lower_ubo_to_uniform(nir_shader *shader); +} + +class nir_lower_ubo_test : public ::testing::Test { +protected: + nir_lower_ubo_test(); + ~nir_lower_ubo_test(); + + nir_intrinsic_instr *intrinsic(nir_intrinsic_op op); + unsigned count_intrinsic(nir_intrinsic_op op); + + nir_builder b; +}; + +nir_lower_ubo_test::nir_lower_ubo_test() +{ + glsl_type_singleton_init_or_ref(); + + static const nir_shader_compiler_options options = { }; + b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, &options, "ubo lowering tests"); +} + +nir_lower_ubo_test::~nir_lower_ubo_test() +{ + if (HasFailure()) { + printf("\nShader from the failed test:\n\n"); + nir_print_shader(b.shader, stdout); + } + + ralloc_free(b.shader); + + glsl_type_singleton_decref(); +} + +nir_intrinsic_instr * +nir_lower_ubo_test::intrinsic(nir_intrinsic_op op) +{ + nir_foreach_block(block, b.impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic == op) + return intr; + } + } + return NULL; +} + +unsigned +nir_lower_ubo_test::count_intrinsic(nir_intrinsic_op op) +{ + unsigned count = 0; + + nir_foreach_block(block, b.impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic == op) + count++; + } + } + return count; +} + +TEST_F(nir_lower_ubo_test, nothing_to_lower) +{ + nir_ssa_def *offset = nir_imm_int(&b, 4); + + nir_load_uniform(&b, 1, 32, offset); + + nir_validate_shader(b.shader, NULL); + + ASSERT_FALSE(etna_nir_lower_ubo_to_uniform(b.shader)); + nir_validate_shader(b.shader, NULL); + + ASSERT_EQ(count_intrinsic(nir_intrinsic_load_ubo), 0); + ASSERT_EQ(count_intrinsic(nir_intrinsic_load_uniform), 1); +} + +TEST_F(nir_lower_ubo_test, basic) +{ + nir_ssa_def *offset = nir_imm_int(&b, 4); + nir_load_uniform(&b, 1, 32, offset); + + nir_lower_uniforms_to_ubo(b.shader, false, false); + nir_opt_constant_folding(b.shader); + + ASSERT_TRUE(etna_nir_lower_ubo_to_uniform(b.shader)); + nir_validate_shader(b.shader, NULL); + nir_opt_constant_folding(b.shader); + + nir_intrinsic_instr *load_uniform = intrinsic(nir_intrinsic_load_uniform); + ASSERT_EQ(nir_src_as_uint(load_uniform->src[0]), 4); + ASSERT_EQ(intrinsic(nir_intrinsic_load_ubo), nullptr); +} + +TEST_F(nir_lower_ubo_test, index_not_null) +{ + nir_ssa_def *index = nir_imm_int(&b, 1); + nir_ssa_def *offset = nir_imm_int(&b, 4); + + nir_load_ubo(&b, 1, 32, index, offset, .align_mul = 16, .align_offset = 0, .range_base = 0, .range = 8); + + nir_validate_shader(b.shader, NULL); + + ASSERT_FALSE(etna_nir_lower_ubo_to_uniform(b.shader)); + ASSERT_EQ(count_intrinsic(nir_intrinsic_load_ubo), 1); + ASSERT_EQ(count_intrinsic(nir_intrinsic_load_uniform), 0); +} + +TEST_F(nir_lower_ubo_test, indirect_index) +{ + nir_ssa_def *one = nir_imm_int(&b, 1); + nir_ssa_def *index = nir_fadd(&b, one, one); + nir_ssa_def *offset = nir_imm_int(&b, 4); + + nir_load_ubo(&b, 1, 32, index, offset, .align_mul = 16, .align_offset = 0, .range_base = 0, .range = 8); + + nir_validate_shader(b.shader, NULL); + + ASSERT_FALSE(etna_nir_lower_ubo_to_uniform(b.shader)); + nir_validate_shader(b.shader, NULL); + + ASSERT_EQ(count_intrinsic(nir_intrinsic_load_ubo), 1); + ASSERT_EQ(count_intrinsic(nir_intrinsic_load_uniform), 0); +} + +TEST_F(nir_lower_ubo_test, indirect_offset) +{ + nir_ssa_def *one = nir_imm_int(&b, 1); + nir_ssa_def *index = nir_imm_int(&b, 0); + nir_ssa_def *offset = nir_fadd(&b, one, one); + + nir_load_ubo(&b, 1, 32, index, offset, .align_mul = 16, .align_offset = 0, .range_base = 0, .range = 8); + + nir_validate_shader(b.shader, NULL); + + ASSERT_FALSE(etna_nir_lower_ubo_to_uniform(b.shader)); + nir_validate_shader(b.shader, NULL); + + ASSERT_EQ(count_intrinsic(nir_intrinsic_load_ubo), 1); + ASSERT_EQ(count_intrinsic(nir_intrinsic_load_uniform), 0); +} |