diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2021-07-22 10:50:50 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2021-07-22 10:50:50 +0000 |
commit | 9130ec005fbc78a62420643414d8354d0929ca50 (patch) | |
tree | 6762777acdd2d4eee17ef87290e80dc7afe2b73d /lib/mesa/src/gallium/drivers/r600 | |
parent | ca11beabae33eb59fb981b8adf50b1d47a2a98f0 (diff) |
Merge Mesa 21.1.5
Diffstat (limited to 'lib/mesa/src/gallium/drivers/r600')
96 files changed, 21571 insertions, 382 deletions
diff --git a/lib/mesa/src/gallium/drivers/r600/Makefile.sources b/lib/mesa/src/gallium/drivers/r600/Makefile.sources index 763a49a07..52563b257 100644 --- a/lib/mesa/src/gallium/drivers/r600/Makefile.sources +++ b/lib/mesa/src/gallium/drivers/r600/Makefile.sources @@ -87,7 +87,81 @@ CXX_SOURCES = \ sb/sb_shader.cpp \ sb/sb_shader.h \ sb/sb_ssa_builder.cpp \ - sb/sb_valtable.cpp + sb/sb_valtable.cpp \ + sfn/sfn_alu_defines.cpp \ + sfn/sfn_alu_defines.h \ + sfn/sfn_callstack.cpp \ + sfn/sfn_callstack.h \ + sfn/sfn_conditionaljumptracker.cpp \ + sfn/sfn_conditionaljumptracker.h \ + sfn/sfn_defines.h \ + sfn/sfn_debug.cpp \ + sfn/sfn_debug.h \ + sfn/sfn_emitaluinstruction.cpp \ + sfn/sfn_emitaluinstruction.h \ + sfn/sfn_emitinstruction.cpp \ + sfn/sfn_emitinstruction.h \ + sfn/sfn_emitssboinstruction.cpp \ + sfn/sfn_emitssboinstruction.h \ + sfn/sfn_emittexinstruction.cpp \ + sfn/sfn_emittexinstruction.h \ + sfn/sfn_emitinstruction.h \ + sfn/sfn_instruction_alu.cpp \ + sfn/sfn_instruction_alu.h \ + sfn/sfn_instruction_base.cpp \ + sfn/sfn_instruction_base.h \ + sfn/sfn_instruction_block.cpp \ + sfn/sfn_instruction_block.h \ + sfn/sfn_instruction_cf.cpp \ + sfn/sfn_instruction_cf.h \ + sfn/sfn_instruction_export.cpp \ + sfn/sfn_instruction_export.h \ + sfn/sfn_instruction_fetch.cpp \ + sfn/sfn_instruction_fetch.h \ + sfn/sfn_instruction_lds.cpp \ + sfn/sfn_instruction_lds.h \ + sfn/sfn_instruction_gds.cpp \ + sfn/sfn_instruction_gds.h \ + sfn/sfn_instruction_misc.cpp \ + sfn/sfn_instruction_misc.h \ + sfn/sfn_instruction_tex.cpp \ + sfn/sfn_instruction_tex.h \ + sfn/sfn_ir_to_assembly.cpp \ + sfn/sfn_ir_to_assembly.h \ + sfn/sfn_liverange.cpp \ + sfn/sfn_liverange.h \ + sfn/sfn_nir.cpp \ + sfn/sfn_nir.h \ + sfn/sfn_nir_lower_64bit.cpp \ + sfn/sfn_nir_lower_fs_out_to_vector.cpp \ + sfn/sfn_nir_lower_fs_out_to_vector.h \ + sfn/sfn_nir_lower_tess_io.cpp \ + sfn/sfn_nir_vectorize_vs_inputs.c \ + sfn/sfn_shader_base.cpp \ + sfn/sfn_shader_base.h \ + sfn/sfn_shader_compute.cpp \ + sfn/sfn_shader_compute.h \ + sfn/sfn_shader_fragment.cpp \ + sfn/sfn_shader_fragment.h \ + sfn/sfn_shader_geometry.cpp \ + sfn/sfn_shader_geometry.h \ + sfn/sfn_shader_tcs.cpp \ + sfn/sfn_shader_tcs.h \ + sfn/sfn_shader_tess_eval.cpp \ + sfn/sfn_shader_tess_eval.h \ + sfn/sfn_shader_vertex.cpp \ + sfn/sfn_shader_vertex.h \ + sfn/sfn_shaderio.cpp \ + sfn/sfn_shaderio.h \ + sfn/sfn_value.cpp \ + sfn/sfn_value.h \ + sfn/sfn_value_gpr.cpp \ + sfn/sfn_value_gpr.h \ + sfn/sfn_valuepool.cpp \ + sfn/sfn_valuepool.h \ + sfn/sfn_vertexstageexport.cpp \ + sfn/sfn_vertexstageexport.h R600_GENERATED_FILES = \ - egd_tables.h
\ No newline at end of file + egd_tables.h \ + sfn_nir_algebraic.c diff --git a/lib/mesa/src/gallium/drivers/r600/eg_asm.c b/lib/mesa/src/gallium/drivers/r600/eg_asm.c index acf3fd374..9468e4b01 100644 --- a/lib/mesa/src/gallium/drivers/r600/eg_asm.c +++ b/lib/mesa/src/gallium/drivers/r600/eg_asm.c @@ -189,7 +189,7 @@ int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_c memset(&alu, 0, sizeof(alu)); alu.op = ALU_OP1_MOVA_INT; alu.src[0].sel = bc->index_reg[id]; - alu.src[0].chan = 0; + alu.src[0].chan = bc->index_reg_chan[id]; if (bc->chip_class == CAYMAN) alu.dst.sel = id == 0 ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1; diff --git a/lib/mesa/src/gallium/drivers/r600/eg_debug.c b/lib/mesa/src/gallium/drivers/r600/eg_debug.c index 56195df29..853b61044 100644 --- a/lib/mesa/src/gallium/drivers/r600/eg_debug.c +++ b/lib/mesa/src/gallium/drivers/r600/eg_debug.c @@ -256,7 +256,7 @@ static uint32_t *ac_parse_packet3(FILE *f, uint32_t *ib, int *num_dw, COLOR_RESET "\n"); break; } - /* fall through, print all dwords */ + FALLTHROUGH; /* print all dwords */ default: for (i = 0; i < count+1; i++) { print_spaces(f, INDENT_PKT); @@ -305,7 +305,7 @@ static void eg_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id, num_dw--; break; } - /* fall through */ + FALLTHROUGH; default: fprintf(f, "Unknown packet type %i\n", type); return; @@ -332,10 +332,10 @@ static void eg_dump_last_ib(struct r600_context *rctx, FILE *f) * waited for the context, so this buffer should be idle. * If the GPU is hung, there is no point in waiting for it. */ - uint32_t *map = rctx->b.ws->buffer_map(rctx->last_trace_buf->buf, + uint32_t *map = rctx->b.ws->buffer_map(rctx->b.ws, rctx->last_trace_buf->buf, NULL, - PIPE_TRANSFER_UNSYNCHRONIZED | - PIPE_TRANSFER_READ); + PIPE_MAP_UNSYNCHRONIZED | + PIPE_MAP_READ); if (map) last_trace_id = *map; } diff --git a/lib/mesa/src/gallium/drivers/r600/evergreen_compute.c b/lib/mesa/src/gallium/drivers/r600/evergreen_compute.c index 419738eec..0602a54dc 100644 --- a/lib/mesa/src/gallium/drivers/r600/evergreen_compute.c +++ b/lib/mesa/src/gallium/drivers/r600/evergreen_compute.c @@ -193,7 +193,7 @@ static void evergreen_cs_set_constant_buffer(struct r600_context *rctx, cb.buffer = buffer; cb.user_buffer = NULL; - rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_COMPUTE, cb_index, &cb); + rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_COMPUTE, cb_index, false, &cb); } /* We need to define these R600 registers here, because we can't include @@ -441,8 +441,9 @@ static void *evergreen_create_compute_state(struct pipe_context *ctx, shader->ir_type = cso->ir_type; - if (shader->ir_type == PIPE_SHADER_IR_TGSI) { - shader->sel = r600_create_shader_state_tokens(ctx, cso->prog, PIPE_SHADER_COMPUTE); + if (shader->ir_type == PIPE_SHADER_IR_TGSI || + shader->ir_type == PIPE_SHADER_IR_NIR) { + shader->sel = r600_create_shader_state_tokens(ctx, cso->prog, cso->ir_type, PIPE_SHADER_COMPUTE); return shader; } #ifdef HAVE_OPENCL @@ -457,10 +458,10 @@ static void *evergreen_create_compute_state(struct pipe_context *ctx, shader->bc.ndw * 4); p = r600_buffer_map_sync_with_rings( &rctx->b, shader->code_bo, - PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); //TODO: use util_memcpy_cpu_to_le32 ? memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4); - rctx->b.ws->buffer_unmap(shader->code_bo->buf); + rctx->b.ws->buffer_unmap(rctx->b.ws, shader->code_bo->buf); #endif return shader; @@ -476,7 +477,8 @@ static void evergreen_delete_compute_state(struct pipe_context *ctx, void *state if (!shader) return; - if (shader->ir_type == PIPE_SHADER_IR_TGSI) { + if (shader->ir_type == PIPE_SHADER_IR_TGSI || + shader->ir_type == PIPE_SHADER_IR_NIR) { r600_delete_shader_selector(ctx, shader->sel); } else { #ifdef HAVE_OPENCL @@ -500,12 +502,14 @@ static void evergreen_bind_compute_state(struct pipe_context *ctx, void *state) return; } - if (cstate->ir_type == PIPE_SHADER_IR_TGSI) { + if (cstate->ir_type == PIPE_SHADER_IR_TGSI || + cstate->ir_type == PIPE_SHADER_IR_NIR) { bool compute_dirty; - - r600_shader_select(ctx, cstate->sel, &compute_dirty); + cstate->sel->ir_type = cstate->ir_type; + if (r600_shader_select(ctx, cstate->sel, &compute_dirty)) + R600_ERR("Failed to select compute shader\n"); } - + rctx->cs_shader_state.shader = (struct r600_pipe_compute *)state; } @@ -553,7 +557,7 @@ static void evergreen_compute_upload_input(struct pipe_context *ctx, u_box_1d(0, input_size, &box); num_work_groups_start = ctx->transfer_map(ctx, (struct pipe_resource*)shader->kernel_param, - 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE, + 0, PIPE_MAP_WRITE | PIPE_MAP_DISCARD_RANGE, &box, &transfer); global_size_start = num_work_groups_start + (3 * (sizeof(uint) /4)); local_size_start = global_size_start + (3 * (sizeof(uint)) / 4); @@ -594,7 +598,7 @@ static void evergreen_emit_dispatch(struct r600_context *rctx, uint32_t indirect_grid[3]) { int i; - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_pipe_compute *shader = rctx->cs_shader_state.shader; bool render_cond_bit = rctx->b.render_cond && !rctx->b.render_cond_force_off; unsigned num_waves; @@ -604,9 +608,10 @@ static void evergreen_emit_dispatch(struct r600_context *rctx, int grid_size = 1; unsigned lds_size = shader->local_size / 4; - if (shader->ir_type != PIPE_SHADER_IR_TGSI) + if (shader->ir_type != PIPE_SHADER_IR_TGSI && + shader->ir_type != PIPE_SHADER_IR_NIR) lds_size += shader->bc.nlds_dw; - + /* Calculate group_size/grid_size */ for (i = 0; i < 3; i++) { group_size *= info->block[i]; @@ -673,7 +678,7 @@ static void evergreen_emit_dispatch(struct r600_context *rctx, static void compute_setup_cbs(struct r600_context *rctx) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; unsigned i; /* Emit colorbuffers. */ @@ -715,7 +720,7 @@ static void compute_setup_cbs(struct r600_context *rctx) static void compute_emit_cs(struct r600_context *rctx, const struct pipe_grid_info *info) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; bool compute_dirty = false; struct r600_pipe_shader *current; struct r600_shader_atomic combined_atomics[8]; @@ -723,7 +728,7 @@ static void compute_emit_cs(struct r600_context *rctx, uint32_t indirect_grid[3] = { 0, 0, 0 }; /* make sure that the gfx ring is only one active */ - if (radeon_emitted(rctx->b.dma.cs, 0)) { + if (radeon_emitted(&rctx->b.dma.cs, 0)) { rctx->b.dma.flush(rctx, PIPE_FLUSH_ASYNC, NULL); } @@ -734,8 +739,13 @@ static void compute_emit_cs(struct r600_context *rctx, rctx->cmd_buf_is_compute = true; } - if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI) { - r600_shader_select(&rctx->b.b, rctx->cs_shader_state.shader->sel, &compute_dirty); + if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI|| + rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_NIR) { + if (r600_shader_select(&rctx->b.b, rctx->cs_shader_state.shader->sel, &compute_dirty)) { + R600_ERR("Failed to select compute shader\n"); + return; + } + current = rctx->cs_shader_state.shader->sel->current; if (compute_dirty) { rctx->cs_shader_state.atom.num_dw = current->command_buffer.num_dw; @@ -748,7 +758,7 @@ static void compute_emit_cs(struct r600_context *rctx, if (info->indirect) { struct r600_resource *indirect_resource = (struct r600_resource *)info->indirect; - unsigned *data = r600_buffer_map_sync_with_rings(&rctx->b, indirect_resource, PIPE_TRANSFER_READ); + unsigned *data = r600_buffer_map_sync_with_rings(&rctx->b, indirect_resource, PIPE_MAP_READ); unsigned offset = info->indirect_offset / 4; indirect_grid[0] = data[offset]; indirect_grid[1] = data[offset + 1]; @@ -786,7 +796,8 @@ static void compute_emit_cs(struct r600_context *rctx, /* emit config state */ if (rctx->b.chip_class == EVERGREEN) { - if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI) { + if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI|| + rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_NIR) { radeon_set_config_reg_seq(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, 3); radeon_emit(cs, S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs)); radeon_emit(cs, 0); @@ -799,7 +810,8 @@ static void compute_emit_cs(struct r600_context *rctx, rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; r600_flush_emit(rctx); - if (rctx->cs_shader_state.shader->ir_type != PIPE_SHADER_IR_TGSI) { + if (rctx->cs_shader_state.shader->ir_type != PIPE_SHADER_IR_TGSI && + rctx->cs_shader_state.shader->ir_type != PIPE_SHADER_IR_NIR) { compute_setup_cbs(rctx); @@ -855,7 +867,8 @@ static void compute_emit_cs(struct r600_context *rctx, radeon_emit(cs, PKT3C(PKT3_DEALLOC_STATE, 0, 0)); radeon_emit(cs, 0); } - if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI) + if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI || + rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_NIR) evergreen_emit_atomic_buffer_save(rctx, true, combined_atomics, &atomic_used_mask); #if 0 @@ -877,12 +890,13 @@ void evergreen_emit_cs_shader(struct r600_context *rctx, struct r600_cs_shader_state *state = (struct r600_cs_shader_state*)atom; struct r600_pipe_compute *shader = state->shader; - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint64_t va; struct r600_resource *code_bo; unsigned ngpr, nstack; - if (shader->ir_type == PIPE_SHADER_IR_TGSI) { + if (shader->ir_type == PIPE_SHADER_IR_TGSI || + shader->ir_type == PIPE_SHADER_IR_NIR) { code_bo = shader->sel->current->bo; va = shader->sel->current->bo->gpu_address; ngpr = shader->sel->current->shader.bc.ngpr; @@ -916,7 +930,8 @@ static void evergreen_launch_grid(struct pipe_context *ctx, struct r600_pipe_compute *shader = rctx->cs_shader_state.shader; boolean use_kill; - if (shader->ir_type != PIPE_SHADER_IR_TGSI) { + if (shader->ir_type != PIPE_SHADER_IR_TGSI && + shader->ir_type != PIPE_SHADER_IR_NIR) { rctx->cs_shader_state.pc = info->pc; /* Get the config information for this kernel. */ r600_shader_binary_read_config(&shader->binary, &shader->bc, @@ -1243,7 +1258,7 @@ static void *r600_compute_global_transfer_map(struct pipe_context *ctx, dst = (struct pipe_resource*)item->real_buffer; - if (usage & PIPE_TRANSFER_READ) + if (usage & PIPE_MAP_READ) buffer->chunk->status |= ITEM_MAPPED_FOR_READING; COMPUTE_DBG(rctx->screen, "* r600_compute_global_transfer_map()\n" @@ -1273,7 +1288,7 @@ static void r600_compute_global_transfer_unmap(struct pipe_context *ctx, * to an offset within the compute memory pool. The function * r600_compute_global_transfer_map() maps the memory pool * resource rather than the struct r600_resource_global passed to - * it as an argument and then initalizes ptransfer->resource with + * it as an argument and then initializes ptransfer->resource with * the memory pool resource (via pipe_buffer_map_range). * When transfer_unmap is called it uses the memory pool's * vtable which calls r600_buffer_transfer_map() rather than diff --git a/lib/mesa/src/gallium/drivers/r600/evergreen_hw_context.c b/lib/mesa/src/gallium/drivers/r600/evergreen_hw_context.c index da8553886..54bd19fbc 100644 --- a/lib/mesa/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/lib/mesa/src/gallium/drivers/r600/evergreen_hw_context.c @@ -35,7 +35,7 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx, uint64_t src_offset, uint64_t size) { - struct radeon_cmdbuf *cs = rctx->b.dma.cs; + struct radeon_cmdbuf *cs = &rctx->b.dma.cs; unsigned i, ncopy, csize, sub_cmd, shift; struct r600_resource *rdst = (struct r600_resource*)dst; struct r600_resource *rsrc = (struct r600_resource*)src; @@ -85,7 +85,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx, unsigned size, uint32_t clear_value, enum r600_coherency coher) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; assert(size); assert(rctx->screen->b.has_cp_dma); diff --git a/lib/mesa/src/gallium/drivers/r600/evergreen_state.c b/lib/mesa/src/gallium/drivers/r600/evergreen_state.c index 9c103c590..f76b1e331 100644 --- a/lib/mesa/src/gallium/drivers/r600/evergreen_state.c +++ b/lib/mesa/src/gallium/drivers/r600/evergreen_state.c @@ -427,11 +427,11 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, dsa->valuemask[1] = state->stencil[1].valuemask; dsa->writemask[0] = state->stencil[0].writemask; dsa->writemask[1] = state->stencil[1].writemask; - dsa->zwritemask = state->depth.writemask; + dsa->zwritemask = state->depth_writemask; - db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | - S_028800_Z_WRITE_ENABLE(state->depth.writemask) | - S_028800_ZFUNC(state->depth.func); + db_depth_control = S_028800_Z_ENABLE(state->depth_enabled) | + S_028800_Z_WRITE_ENABLE(state->depth_writemask) | + S_028800_ZFUNC(state->depth_func); /* stencil */ if (state->stencil[0].enabled) { @@ -453,10 +453,10 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, /* alpha */ alpha_test_control = 0; alpha_ref = 0; - if (state->alpha.enabled) { - alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); + if (state->alpha_enabled) { + alpha_test_control = S_028410_ALPHA_FUNC(state->alpha_func); alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); - alpha_ref = fui(state->alpha.ref_value); + alpha_ref = fui(state->alpha_ref_value); } dsa->sx_alpha_test_control = alpha_test_control & 0xff; dsa->alpha_ref = alpha_ref; @@ -514,15 +514,13 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, } spi_interp = S_0286D4_FLAT_SHADE_ENA(1); - if (state->sprite_coord_enable) { - spi_interp |= S_0286D4_PNT_SPRITE_ENA(1) | - S_0286D4_PNT_SPRITE_OVRD_X(2) | - S_0286D4_PNT_SPRITE_OVRD_Y(3) | - S_0286D4_PNT_SPRITE_OVRD_Z(0) | - S_0286D4_PNT_SPRITE_OVRD_W(1); - if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { - spi_interp |= S_0286D4_PNT_SPRITE_TOP_1(1); - } + spi_interp |= S_0286D4_PNT_SPRITE_ENA(1) | + S_0286D4_PNT_SPRITE_OVRD_X(2) | + S_0286D4_PNT_SPRITE_OVRD_Y(3) | + S_0286D4_PNT_SPRITE_OVRD_Z(0) | + S_0286D4_PNT_SPRITE_OVRD_W(1); + if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { + spi_interp |= S_0286D4_PNT_SPRITE_TOP_1(1); } r600_store_context_reg_seq(&rs->buffer, R_028A00_PA_SU_POINT_SIZE, 3); @@ -576,6 +574,8 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso : state->max_anisotropy; unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso); + bool trunc_coord = state->min_img_filter == PIPE_TEX_FILTER_NEAREST && + state->mag_img_filter == PIPE_TEX_FILTER_NEAREST; float max_lod = state->max_lod; if (!ss) { @@ -610,6 +610,7 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, ss->tex_sampler_words[2] = S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | (state->seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) | + S_03C008_TRUNCATE_COORD(trunc_coord) | S_03C008_TYPE(1); if (ss->border_color_use) { @@ -755,7 +756,7 @@ static int evergreen_fill_tex_resource_words(struct r600_context *rctx, case PIPE_FORMAT_X32_S8X24_UINT: params->pipe_format = PIPE_FORMAT_S8_UINT; tile_split = tmp->surface.u.legacy.stencil_tile_split; - surflevel = tmp->surface.u.legacy.stencil_level; + surflevel = tmp->surface.u.legacy.zs.stencil_level; break; default:; } @@ -846,7 +847,7 @@ static int evergreen_fill_tex_resource_words(struct r600_context *rctx, tex_resource_words[1] = (S_030004_TEX_HEIGHT(height - 1) | S_030004_TEX_DEPTH(depth - 1) | S_030004_ARRAY_MODE(array_mode)); - tex_resource_words[2] = (surflevel[base_level].offset + va) >> 8; + tex_resource_words[2] = ((uint64_t)surflevel[base_level].offset_256B * 256 + va) >> 8; *skip_mip_address_reloc = false; /* TEX_RESOURCE_WORD3.MIP_ADDRESS */ @@ -860,9 +861,9 @@ static int evergreen_fill_tex_resource_words(struct r600_context *rctx, tex_resource_words[3] = (tmp->fmask.offset + va) >> 8; } } else if (last_level && texture->nr_samples <= 1) { - tex_resource_words[3] = (surflevel[1].offset + va) >> 8; + tex_resource_words[3] = ((uint64_t)surflevel[1].offset_256B * 256 + va) >> 8; } else { - tex_resource_words[3] = (surflevel[base_level].offset + va) >> 8; + tex_resource_words[3] = ((uint64_t)surflevel[base_level].offset_256B * 256 + va) >> 8; } last_layer = params->last_layer; @@ -974,7 +975,7 @@ evergreen_create_sampler_view(struct pipe_context *ctx, static void evergreen_emit_config_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_config_state *a = (struct r600_config_state*)atom; radeon_set_config_reg_seq(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, 3); @@ -1001,7 +1002,7 @@ static void evergreen_emit_config_state(struct r600_context *rctx, struct r600_a static void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct pipe_clip_state *state = &rctx->clip_state.state; radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP0_X, 6*4); @@ -1123,7 +1124,7 @@ static void evergreen_set_color_surface_common(struct r600_context *rctx, bool blend_clamp = 0, blend_bypass = 0, do_endian_swap = FALSE; int i; - color->offset = rtex->surface.u.legacy.level[level].offset; + color->offset = (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256; color->view = S_028C6C_SLICE_START(first_layer) | S_028C6C_SLICE_MAX(last_layer); @@ -1251,7 +1252,7 @@ static void evergreen_set_color_surface_common(struct r600_context *rctx, color->info |= S_028C70_COMPRESSION(1); } - /* EXPORT_NORM is an optimzation that can be enabled for better + /* EXPORT_NORM is an optimization that can be enabled for better * performance in certain cases. * EXPORT_NORM can be enabled if: * - 11-bit or smaller UNORM/SNORM/SRGB @@ -1281,7 +1282,7 @@ static void evergreen_set_color_surface_common(struct r600_context *rctx, } /** - * This function intializes the CB* register values for RATs. It is meant + * This function initializes the CB* register values for RATs. It is meant * to be used for 1D aligned buffers that do not have an associated * radeon_surf. */ @@ -1360,7 +1361,7 @@ static void evergreen_init_depth_surface(struct r600_context *rctx, assert(format != ~0); offset = rtex->resource.gpu_address; - offset += rtex->surface.u.legacy.level[level].offset; + offset += (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256; switch (rtex->surface.u.legacy.level[level].mode) { case RADEON_SURF_MODE_2D: @@ -1410,7 +1411,7 @@ static void evergreen_init_depth_surface(struct r600_context *rctx, stile_split = eg_tile_split(stile_split); - stencil_offset = rtex->surface.u.legacy.stencil_level[level].offset; + stencil_offset = (uint64_t)rtex->surface.u.legacy.zs.stencil_level[level].offset_256B * 256; stencil_offset += rtex->resource.gpu_address; surf->db_stencil_base = stencil_offset >> 8; @@ -1657,7 +1658,7 @@ static void evergreen_get_sample_position(struct pipe_context *ctx, static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples, int ps_iter_samples) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; unsigned max_dist = 0; switch (nr_samples) { @@ -1706,7 +1707,7 @@ static void evergreen_emit_image_state(struct r600_context *rctx, struct r600_at { struct r600_image_state *state = (struct r600_image_state *)atom; struct pipe_framebuffer_state *fb_state = &rctx->framebuffer.state; - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_texture *rtex; struct r600_resource *resource; int i; @@ -1833,7 +1834,7 @@ static void evergreen_emit_compute_buffer_state(struct r600_context *rctx, struc static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct pipe_framebuffer_state *state = &rctx->framebuffer.state; unsigned nr_cbufs = state->nr_cbufs; unsigned i, tl, br; @@ -1972,7 +1973,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a; float offset_units = state->offset_units; float offset_scale = state->offset_scale; @@ -2030,7 +2031,7 @@ uint32_t evergreen_construct_rat_mask(struct r600_context *rctx, struct r600_cb_ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom; unsigned fb_colormask = a->bound_cbufs_target_mask; unsigned ps_colormask = a->ps_color_export_mask; @@ -2045,7 +2046,7 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_db_state *a = (struct r600_db_state*)atom; if (a->rsurf && a->rsurf->db_htile_surface) { @@ -2068,7 +2069,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom; unsigned db_render_control = 0; unsigned db_count_control = 0; @@ -2123,7 +2124,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, unsigned resource_offset, unsigned pkt_flags) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -2182,7 +2183,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, unsigned reg_alu_const_cache, unsigned pkt_flags) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -2334,7 +2335,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx, struct r600_samplerview_state *state, unsigned resource_id_base, unsigned pkt_flags) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -2443,7 +2444,7 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx, unsigned border_index_reg, unsigned pkt_flags) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t dirty_mask = texinfo->states.dirty_mask; union pipe_color_union border_color = {{0,0,0,1}}; union pipe_color_union *border_color_ptr = &border_color; @@ -2527,14 +2528,14 @@ static void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_at struct r600_sample_mask *s = (struct r600_sample_mask*)a; uint8_t mask = s->sample_mask; - radeon_set_context_reg(rctx->b.gfx.cs, R_028C3C_PA_SC_AA_MASK, + radeon_set_context_reg(&rctx->b.gfx.cs, R_028C3C_PA_SC_AA_MASK, mask | (mask << 8) | (mask << 16) | (mask << 24)); } static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a) { struct r600_sample_mask *s = (struct r600_sample_mask*)a; - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint16_t mask = s->sample_mask; radeon_set_context_reg_seq(cs, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); @@ -2544,7 +2545,7 @@ static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_cso_state *state = (struct r600_cso_state*)a; struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso; @@ -2561,7 +2562,7 @@ static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a; uint32_t v = 0, v2 = 0, primid = 0, tf_param = 0; @@ -2665,7 +2666,7 @@ static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a; struct r600_resource *rbuffer; @@ -3389,8 +3390,9 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader tmp |= S_028644_FLAT_SHADE(1); } - if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && - (sprite_coord_enable & (1 << rshader->input[i].sid))) { + if (rshader->input[i].name == TGSI_SEMANTIC_PCOORD || + (rshader->input[i].name == TGSI_SEMANTIC_TEXCOORD && + (sprite_coord_enable & (1 << rshader->input[i].sid)))) { tmp |= S_028644_PT_SPRITE_TEX(1); } @@ -3712,7 +3714,7 @@ void *evergreen_create_fastclear_blend(struct r600_context *rctx) void *evergreen_create_db_flush_dsa(struct r600_context *rctx) { - struct pipe_depth_stencil_alpha_state dsa = {{0}}; + struct pipe_depth_stencil_alpha_state dsa = {{{0}}}; return rctx->b.b.create_depth_stencil_alpha_state(&rctx->b.b, &dsa); } @@ -3774,7 +3776,7 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx, unsigned pitch, unsigned bpp) { - struct radeon_cmdbuf *cs = rctx->b.dma.cs; + struct radeon_cmdbuf *cs = &rctx->b.dma.cs; struct r600_texture *rsrc = (struct r600_texture*)src; struct r600_texture *rdst = (struct r600_texture*)dst; unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size; @@ -3811,8 +3813,8 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx, x = src_x; y = src_y; z = src_z; - base = rsrc->surface.u.legacy.level[src_level].offset; - addr = rdst->surface.u.legacy.level[dst_level].offset; + base = (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256; + addr = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256; addr += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z; addr += dst_y * pitch + dst_x * bpp; bank_h = eg_bank_wh(rsrc->surface.u.legacy.bankh); @@ -3836,8 +3838,8 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx, x = dst_x; y = dst_y; z = dst_z; - base = rdst->surface.u.legacy.level[dst_level].offset; - addr = rsrc->surface.u.legacy.level[src_level].offset; + base = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256; + addr = (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256; addr += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_z; addr += src_y * pitch + src_x * bpp; bank_h = eg_bank_wh(rdst->surface.u.legacy.bankh); @@ -3896,7 +3898,7 @@ static void evergreen_dma_copy(struct pipe_context *ctx, unsigned src_x, src_y; unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz; - if (rctx->b.dma.cs == NULL) { + if (rctx->b.dma.cs.priv == NULL) { goto fallback; } @@ -3959,10 +3961,10 @@ static void evergreen_dma_copy(struct pipe_context *ctx, * dst_x/y == 0 * dst_pitch == src_pitch */ - src_offset= rsrc->surface.u.legacy.level[src_level].offset; + src_offset= (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256; src_offset += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_box->z; src_offset += src_y * src_pitch + src_x * bpp; - dst_offset = rdst->surface.u.legacy.level[dst_level].offset; + dst_offset = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256; dst_offset += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z; dst_offset += dst_y * dst_pitch + dst_x * bpp; evergreen_dma_copy_buffer(rctx, dst, src, dst_offset, src_offset, @@ -4148,7 +4150,7 @@ static void evergreen_set_shader_buffers(struct pipe_context *ctx, static void evergreen_set_shader_images(struct pipe_context *ctx, enum pipe_shader_type shader, unsigned start_slot, - unsigned count, + unsigned count, unsigned unbind_num_trailing_slots, const struct pipe_image_view *images) { struct r600_context *rctx = (struct r600_context *)ctx; @@ -4162,7 +4164,9 @@ static void evergreen_set_shader_images(struct pipe_context *ctx, unsigned old_mask; struct r600_image_state *istate = NULL; int idx; - if (shader != PIPE_SHADER_FRAGMENT && shader != PIPE_SHADER_COMPUTE && count == 0) + if (shader != PIPE_SHADER_FRAGMENT && shader != PIPE_SHADER_COMPUTE) + return; + if (!count && !unbind_num_trailing_slots) return; if (shader == PIPE_SHADER_FRAGMENT) @@ -4305,6 +4309,16 @@ static void evergreen_set_shader_images(struct pipe_context *ctx, istate->enabled_mask |= (1 << i); } + for (i = start_slot + count, idx = 0; + i < start_slot + count + unbind_num_trailing_slots; i++, idx++) { + rview = &istate->views[i]; + + pipe_resource_reference((struct pipe_resource **)&rview->base.resource, NULL); + istate->enabled_mask &= ~(1 << i); + istate->compressed_colortex_mask &= ~(1 << i); + istate->compressed_depthtex_mask &= ~(1 << i); + } + istate->atom.num_dw = util_bitcount(istate->enabled_mask) * 46; istate->dirty_buffer_constants = TRUE; rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; @@ -4523,11 +4537,11 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe if (!rctx->tes_shader) { rctx->lds_alloc = 0; rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, - R600_LDS_INFO_CONST_BUFFER, NULL); + R600_LDS_INFO_CONST_BUFFER, false, NULL); rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL, - R600_LDS_INFO_CONST_BUFFER, NULL); + R600_LDS_INFO_CONST_BUFFER, false, NULL); rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL, - R600_LDS_INFO_CONST_BUFFER, NULL); + R600_LDS_INFO_CONST_BUFFER, false, NULL); return; } @@ -4587,12 +4601,11 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe constbuf.buffer_size = 8 * 4; rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, - R600_LDS_INFO_CONST_BUFFER, &constbuf); + R600_LDS_INFO_CONST_BUFFER, false, &constbuf); rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL, - R600_LDS_INFO_CONST_BUFFER, &constbuf); + R600_LDS_INFO_CONST_BUFFER, false, &constbuf); rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL, - R600_LDS_INFO_CONST_BUFFER, &constbuf); - pipe_resource_reference(&constbuf.buffer, NULL); + R600_LDS_INFO_CONST_BUFFER, true, &constbuf); } uint32_t evergreen_get_ls_hs_config(struct r600_context *rctx, @@ -4750,7 +4763,7 @@ bool evergreen_adjust_gprs(struct r600_context *rctx) void eg_trace_emit(struct r600_context *rctx) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; unsigned reloc; if (rctx->b.chip_class < EVERGREEN) @@ -4780,7 +4793,7 @@ static void evergreen_emit_set_append_cnt(struct r600_context *rctx, struct r600_resource *resource, uint32_t pkt_flags) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, resource, RADEON_USAGE_READ, @@ -4803,7 +4816,7 @@ static void evergreen_emit_event_write_eos(struct r600_context *rctx, struct r600_resource *resource, uint32_t pkt_flags) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t event = EVENT_TYPE_PS_DONE; uint32_t base_reg_0 = R_02872C_GDS_APPEND_COUNT_0; uint32_t reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, @@ -4830,7 +4843,7 @@ static void cayman_emit_event_write_eos(struct r600_context *rctx, struct r600_resource *resource, uint32_t pkt_flags) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t event = EVENT_TYPE_PS_DONE; uint32_t reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, resource, @@ -4856,7 +4869,7 @@ static void cayman_write_count_to_gds(struct r600_context *rctx, struct r600_resource *resource, uint32_t pkt_flags) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, resource, RADEON_USAGE_READ, @@ -4951,7 +4964,7 @@ void evergreen_emit_atomic_buffer_save(struct r600_context *rctx, struct r600_shader_atomic *combined_atomics, uint8_t *atomic_used_mask_p) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state; uint32_t pkt_flags = 0; uint32_t event = EVENT_TYPE_PS_DONE; diff --git a/lib/mesa/src/gallium/drivers/r600/r600_asm.c b/lib/mesa/src/gallium/drivers/r600/r600_asm.c index 6affa3d3a..6a9690f69 100644 --- a/lib/mesa/src/gallium/drivers/r600/r600_asm.c +++ b/lib/mesa/src/gallium/drivers/r600/r600_asm.c @@ -362,7 +362,7 @@ static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu * } assignment[4] = alu; } else { - if (assignment[chan]) { + if (assignment[chan]) { assert(0); /* ALU.chan has already been allocated. */ return -1; } @@ -686,7 +686,7 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc, return 0; } -void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg, unsigned abs) +void r600_bytecode_special_constants(uint32_t value, unsigned *sel) { switch(value) { case 0: @@ -704,14 +704,6 @@ void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *ne case 0x3F000000: /* 0.5f */ *sel = V_SQ_ALU_SRC_0_5; break; - case 0xBF800000: /* -1.0f */ - *sel = V_SQ_ALU_SRC_1; - *neg ^= !abs; - break; - case 0xBF000000: /* -0.5f */ - *sel = V_SQ_ALU_SRC_0_5; - *neg ^= !abs; - break; default: *sel = V_SQ_ALU_SRC_LITERAL; break; @@ -1232,7 +1224,7 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc, /* Load index register if required */ if (bc->chip_class >= EVERGREEN) { for (i = 0; i < 3; i++) - if (nalu->src[i].kc_bank && nalu->src[i].kc_rel) + if (nalu->src[i].kc_bank && nalu->src[i].kc_rel) egcm_load_index_reg(bc, 0, true); } @@ -1261,7 +1253,7 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc, } if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) r600_bytecode_special_constants(nalu->src[i].value, - &nalu->src[i].sel, &nalu->src[i].neg, nalu->src[i].abs); + &nalu->src[i].sel); } if (nalu->dst.sel >= bc->ngpr) { bc->ngpr = nalu->dst.sel + 1; @@ -1450,7 +1442,9 @@ int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_t bc->cf_last->op == CF_OP_TEX) { struct r600_bytecode_tex *ttex; LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) { - if (ttex->dst_gpr == ntex->src_gpr) { + if (ttex->dst_gpr == ntex->src_gpr && + (ttex->dst_sel_x < 4 || ttex->dst_sel_y < 4 || + ttex->dst_sel_z < 4 || ttex->dst_sel_w < 4)) { bc->force_add_cf = 1; break; } @@ -2638,7 +2632,8 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, uint32_t *bytecode; int i, j, r, fs_size; struct r600_fetch_shader *shader; - unsigned no_sb = rctx->screen->b.debug_flags & DBG_NO_SB; + unsigned no_sb = rctx->screen->b.debug_flags & DBG_NO_SB || + (rctx->screen->b.debug_flags & DBG_NIR); unsigned sb_disasm = !no_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); assert(count < 32); @@ -2763,7 +2758,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, return NULL; } - u_suballocator_alloc(rctx->allocator_fetch_shader, fs_size, 256, + u_suballocator_alloc(&rctx->allocator_fetch_shader, fs_size, 256, &shader->offset, (struct pipe_resource**)&shader->buffer); if (!shader->buffer) { @@ -2774,7 +2769,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, bytecode = r600_buffer_map_sync_with_rings (&rctx->b, shader->buffer, - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED | RADEON_TRANSFER_TEMPORARY); + PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED | RADEON_MAP_TEMPORARY); bytecode += shader->offset / 4; if (R600_BIG_ENDIAN) { @@ -2784,7 +2779,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, } else { memcpy(bytecode, bc.bytecode, fs_size); } - rctx->b.ws->buffer_unmap(shader->buffer->buf); + rctx->b.ws->buffer_unmap(rctx->b.ws, shader->buffer->buf); r600_bytecode_clear(&bc); return shader; diff --git a/lib/mesa/src/gallium/drivers/r600/r600_asm.h b/lib/mesa/src/gallium/drivers/r600/r600_asm.h index 71a3ae1ba..a526993b3 100644 --- a/lib/mesa/src/gallium/drivers/r600/r600_asm.h +++ b/lib/mesa/src/gallium/drivers/r600/r600_asm.h @@ -214,6 +214,8 @@ struct r600_bytecode_cf { struct r600_bytecode_alu *prev_bs_head; struct r600_bytecode_alu *prev2_bs_head; unsigned isa[2]; + unsigned nlds_read; + unsigned nqueue_read; }; #define FC_NONE 0 @@ -276,6 +278,7 @@ struct r600_bytecode { unsigned r6xx_nop_after_rel_dst; bool index_loaded[2]; unsigned index_reg[2]; /* indexing register CF_INDEX_[01] */ + unsigned index_reg_chan[2]; /* indexing register chanel CF_INDEX_[01] */ unsigned debug_id; struct r600_isa* isa; struct r600_bytecode_output pending_outputs[5]; @@ -318,8 +321,7 @@ int r600_bytecode_add_cfinst(struct r600_bytecode *bc, unsigned op); int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, unsigned type); -void r600_bytecode_special_constants(uint32_t value, - unsigned *sel, unsigned *neg, unsigned abs); +void r600_bytecode_special_constants(uint32_t value, unsigned *sel); void r600_bytecode_disasm(struct r600_bytecode *bc); void r600_bytecode_alu_read(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1); diff --git a/lib/mesa/src/gallium/drivers/r600/r600_blit.c b/lib/mesa/src/gallium/drivers/r600/r600_blit.c index 606b3892e..b8924f826 100644 --- a/lib/mesa/src/gallium/drivers/r600/r600_blit.c +++ b/lib/mesa/src/gallium/drivers/r600/r600_blit.c @@ -463,6 +463,7 @@ static bool r600_decompress_subresource(struct pipe_context *ctx, } static void r600_clear(struct pipe_context *ctx, unsigned buffers, + const struct pipe_scissor_state *scissor_state, const union pipe_color_union *color, double depth, unsigned stencil) { @@ -660,7 +661,7 @@ static void r600_clear_buffer(struct pipe_context *ctx, struct pipe_resource *ds r600_blitter_end(ctx); } else { uint32_t *map = r600_buffer_map_sync_with_rings(&rctx->b, r600_resource(dst), - PIPE_TRANSFER_WRITE); + PIPE_MAP_WRITE); map += offset / 4; size /= 4; for (unsigned i = 0; i < size; i++) diff --git a/lib/mesa/src/gallium/drivers/r600/r600_hw_context.c b/lib/mesa/src/gallium/drivers/r600/r600_hw_context.c index 494b7ed69..de032c6dc 100644 --- a/lib/mesa/src/gallium/drivers/r600/r600_hw_context.c +++ b/lib/mesa/src/gallium/drivers/r600/r600_hw_context.c @@ -34,17 +34,17 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in, unsigned num_atomics) { /* Flush the DMA IB if it's not empty. */ - if (radeon_emitted(ctx->b.dma.cs, 0)) + if (radeon_emitted(&ctx->b.dma.cs, 0)) ctx->b.dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL); - if (!radeon_cs_memory_below_limit(ctx->b.screen, ctx->b.gfx.cs, + if (!radeon_cs_memory_below_limit(ctx->b.screen, &ctx->b.gfx.cs, ctx->b.vram, ctx->b.gtt)) { ctx->b.gtt = 0; ctx->b.vram = 0; ctx->b.gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL); return; } - /* all will be accounted once relocation are emited */ + /* all will be accounted once relocation are emitted */ ctx->b.gtt = 0; ctx->b.vram = 0; @@ -84,14 +84,14 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, num_dw += 10; /* Flush if there's not enough space. */ - if (!ctx->b.ws->cs_check_space(ctx->b.gfx.cs, num_dw, false)) { + if (!ctx->b.ws->cs_check_space(&ctx->b.gfx.cs, num_dw, false)) { ctx->b.gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL); } } void r600_flush_emit(struct r600_context *rctx) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; unsigned cp_coher_cntl = 0; unsigned wait_until = 0; @@ -260,7 +260,7 @@ void r600_context_gfx_flush(void *context, unsigned flags, struct pipe_fence_handle **fence) { struct r600_context *ctx = context; - struct radeon_cmdbuf *cs = ctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &ctx->b.gfx.cs; struct radeon_winsys *ws = ctx->b.ws; if (!radeon_emitted(cs, ctx->b.initial_gfx_cs_size)) @@ -345,7 +345,7 @@ void r600_begin_new_cs(struct r600_context *ctx) ctx->b.vram = 0; /* Begin a new CS. */ - r600_emit_command_buffer(ctx->b.gfx.cs, &ctx->start_cs_cmd); + r600_emit_command_buffer(&ctx->b.gfx.cs, &ctx->start_cs_cmd); /* Re-emit states. */ r600_mark_atom_dirty(ctx, &ctx->alphatest_state.atom); @@ -430,13 +430,13 @@ void r600_begin_new_cs(struct r600_context *ctx) ctx->last_rast_prim = -1; ctx->current_rast_prim = -1; - assert(!ctx->b.gfx.cs->prev_dw); - ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->current.cdw; + assert(!ctx->b.gfx.cs.prev_dw); + ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs.current.cdw; } void r600_emit_pfp_sync_me(struct r600_context *rctx) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; if (rctx->b.chip_class >= EVERGREEN && rctx->b.screen->info.drm_minor >= 46) { @@ -451,7 +451,7 @@ void r600_emit_pfp_sync_me(struct r600_context *rctx) uint64_t va; /* 16-byte address alignment is required by WAIT_REG_MEM. */ - u_suballocator_alloc(rctx->b.allocator_zeroed_memory, 4, 16, + u_suballocator_alloc(&rctx->b.allocator_zeroed_memory, 4, 16, &offset, (struct pipe_resource**)&buf); if (!buf) { /* This is too heavyweight, but will work. */ @@ -502,7 +502,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, struct pipe_resource *src, uint64_t src_offset, unsigned size) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; assert(size); assert(rctx->screen->b.has_cp_dma); @@ -584,7 +584,7 @@ void r600_dma_copy_buffer(struct r600_context *rctx, uint64_t src_offset, uint64_t size) { - struct radeon_cmdbuf *cs = rctx->b.dma.cs; + struct radeon_cmdbuf *cs = &rctx->b.dma.cs; unsigned i, ncopy, csize; struct r600_resource *rdst = (struct r600_resource*)dst; struct r600_resource *rsrc = (struct r600_resource*)src; diff --git a/lib/mesa/src/gallium/drivers/r600/r600_pipe.c b/lib/mesa/src/gallium/drivers/r600/r600_pipe.c index f26da31d2..9e11c7442 100644 --- a/lib/mesa/src/gallium/drivers/r600/r600_pipe.c +++ b/lib/mesa/src/gallium/drivers/r600/r600_pipe.c @@ -55,6 +55,7 @@ static const struct debug_named_value r600_debug_options[] = { { "sbnofallback", DBG_SB_NO_FALLBACK, "Abort on errors instead of fallback" }, { "sbdisasm", DBG_SB_DISASM, "Use sb disassembler for shader dumps" }, { "sbsafemath", DBG_SB_SAFEMATH, "Disable unsafe math optimizations" }, + { "nirsb", DBG_NIR_SB, "Enable NIR with SB optimizer"}, DEBUG_NAMED_VALUE_END /* must be last */ }; @@ -81,7 +82,7 @@ static void r600_destroy_context(struct pipe_context *context) if (rctx->append_fence) pipe_resource_reference((struct pipe_resource**)&rctx->append_fence, NULL); for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) { - rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, NULL); + rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, false, NULL); free(rctx->driver_consts[sh].constants); } @@ -113,14 +114,12 @@ static void r600_destroy_context(struct pipe_context *context) for (sh = 0; sh < PIPE_SHADER_TYPES; ++sh) for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; ++i) - rctx->b.b.set_constant_buffer(context, sh, i, NULL); + rctx->b.b.set_constant_buffer(context, sh, i, false, NULL); if (rctx->blitter) { util_blitter_destroy(rctx->blitter); } - if (rctx->allocator_fetch_shader) { - u_suballocator_destroy(rctx->allocator_fetch_shader); - } + u_suballocator_destroy(&rctx->allocator_fetch_shader); r600_release_command_buffer(&rctx->start_cs_cmd); @@ -211,15 +210,12 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, goto fail; } - rctx->b.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX, - r600_context_gfx_flush, rctx, false); + ws->cs_create(&rctx->b.gfx.cs, rctx->b.ctx, RING_GFX, + r600_context_gfx_flush, rctx, false); rctx->b.gfx.flush = r600_context_gfx_flush; - rctx->allocator_fetch_shader = - u_suballocator_create(&rctx->b.b, 64 * 1024, - 0, PIPE_USAGE_DEFAULT, 0, FALSE); - if (!rctx->allocator_fetch_shader) - goto fail; + u_suballocator_init(&rctx->allocator_fetch_shader, &rctx->b.b, 64 * 1024, + 0, PIPE_USAGE_DEFAULT, 0, FALSE); rctx->isa = calloc(1, sizeof(struct r600_isa)); if (!rctx->isa || r600_isa_init(rctx, rctx->isa)) @@ -249,6 +245,12 @@ fail: return NULL; } +static bool is_nir_enabled(struct r600_common_screen *screen) { + return ((screen->debug_flags & DBG_NIR_PREFERRED) && + screen->family >= CHIP_CEDAR && + screen->family < CHIP_CAYMAN); +} + /* * pipe_screen */ @@ -282,6 +284,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_VERTEX_SHADER_SATURATE: case PIPE_CAP_SEAMLESS_CUBE_MAP: case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX: case PIPE_CAP_CONDITIONAL_RENDER: case PIPE_CAP_TEXTURE_BARRIER: case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: @@ -317,8 +320,12 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: + case PIPE_CAP_NIR_ATOMICS_AS_DEREF: return 1; + case PIPE_CAP_SHAREABLE_SHADERS: + return 0; + case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET: /* Optimal number for good TexSubImage performance on Polaris10. */ return 64 * 1024 * 1024; @@ -333,8 +340,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return rscreen->b.chip_class > R700; case PIPE_CAP_TGSI_TEXCOORD: - return 0; + return 1; + case PIPE_CAP_NIR_IMAGES_AS_DEREF: case PIPE_CAP_FAKE_SW_MSAA: return 0; @@ -348,11 +356,11 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 256; case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: - return 1; + return 4; case PIPE_CAP_GLSL_FEATURE_LEVEL: if (family >= CHIP_CEDAR) - return 430; + return is_nir_enabled(&rscreen->b) ? 450 : 430; /* pre-evergreen geom shaders need newer kernel */ if (rscreen->b.info.drm_minor >= 37) return 330; @@ -403,13 +411,21 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: return 0; + case PIPE_CAP_INT64: case PIPE_CAP_DOUBLES: if (rscreen->b.family == CHIP_ARUBA || rscreen->b.family == CHIP_CAYMAN || rscreen->b.family == CHIP_CYPRESS || rscreen->b.family == CHIP_HEMLOCK) return 1; + if (is_nir_enabled(&rscreen->b)) + return 1; return 0; + case PIPE_CAP_INT64_DIVMOD: + /* it is actually not supported, but the nir lowering hdanles this corectly wheras + * the glsl lowering path seems to not initialize the buildins correctly. + */ + return is_nir_enabled(&rscreen->b); case PIPE_CAP_CULL_DISTANCE: return 1; @@ -542,7 +558,6 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, { case PIPE_SHADER_FRAGMENT: case PIPE_SHADER_VERTEX: - case PIPE_SHADER_COMPUTE: break; case PIPE_SHADER_GEOMETRY: if (rscreen->b.family >= CHIP_CEDAR) @@ -553,8 +568,10 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, return 0; case PIPE_SHADER_TESS_CTRL: case PIPE_SHADER_TESS_EVAL: + case PIPE_SHADER_COMPUTE: if (rscreen->b.family >= CHIP_CEDAR) break; + FALLTHROUGH; default: return 0; } @@ -576,9 +593,11 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: if (shader == PIPE_SHADER_COMPUTE) { uint64_t max_const_buffer_size; - pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI, - PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, - &max_const_buffer_size); + enum pipe_shader_ir ir_type = is_nir_enabled(&rscreen->b) ? + PIPE_SHADER_IR_NIR: PIPE_SHADER_IR_TGSI; + pscreen->get_compute_param(pscreen, ir_type, + PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, + &max_const_buffer_size); return MIN2(max_const_buffer_size, INT_MAX); } else { @@ -598,6 +617,10 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, case PIPE_SHADER_CAP_SUBROUTINES: case PIPE_SHADER_CAP_INT64_ATOMICS: case PIPE_SHADER_CAP_FP16: + case PIPE_SHADER_CAP_FP16_DERIVATIVES: + case PIPE_SHADER_CAP_FP16_CONST_BUFFERS: + case PIPE_SHADER_CAP_INT16: + case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS: return 0; case PIPE_SHADER_CAP_INTEGERS: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: @@ -605,14 +628,19 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: return 16; - case PIPE_SHADER_CAP_PREFERRED_IR: + case PIPE_SHADER_CAP_PREFERRED_IR: + if (is_nir_enabled(&rscreen->b)) + return PIPE_SHADER_IR_NIR; return PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_SUPPORTED_IRS: { int ir = 0; if (shader == PIPE_SHADER_COMPUTE) ir = 1 << PIPE_SHADER_IR_NATIVE; - if (rscreen->b.family >= CHIP_CEDAR) + if (rscreen->b.family >= CHIP_CEDAR) { ir |= 1 << PIPE_SHADER_IR_TGSI; + if (is_nir_enabled(&rscreen->b)) + ir |= 1 << PIPE_SHADER_IR_NIR; + } return ir; } case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: @@ -791,7 +819,7 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws, templ.usage = PIPE_USAGE_DEFAULT; struct r600_resource *res = r600_resource(rscreen->screen.resource_create(&rscreen->screen, &templ)); - unsigned char *map = ws->buffer_map(res->buf, NULL, PIPE_TRANSFER_WRITE); + unsigned char *map = ws->buffer_map(res->buf, NULL, PIPE_MAP_WRITE); memset(map, 0, 256); diff --git a/lib/mesa/src/gallium/drivers/r600/r600_pipe.h b/lib/mesa/src/gallium/drivers/r600/r600_pipe.h index 11c16957a..3cb171a0d 100644 --- a/lib/mesa/src/gallium/drivers/r600/r600_pipe.h +++ b/lib/mesa/src/gallium/drivers/r600/r600_pipe.h @@ -268,6 +268,9 @@ struct r600_gs_rings_state { #define DBG_SB_NO_FALLBACK (1 << 26) #define DBG_SB_DISASM (1 << 27) #define DBG_SB_SAFEMATH (1 << 28) +#define DBG_NIR_SB (1 << 28) + +#define DBG_NIR_PREFERRED (DBG_NIR_SB | DBG_NIR) struct r600_screen { struct r600_common_screen b; @@ -343,12 +346,14 @@ struct r600_pipe_shader_selector { struct r600_pipe_shader *current; struct tgsi_token *tokens; + struct nir_shader *nir; struct pipe_stream_output_info so; struct tgsi_shader_info info; unsigned num_shaders; enum pipe_shader_type type; + enum pipe_shader_ir ir_type; /* geometry shader properties */ enum pipe_prim_type gs_output_prim; @@ -488,7 +493,7 @@ struct r600_context { struct r600_common_context b; struct r600_screen *screen; struct blitter_context *blitter; - struct u_suballocator *allocator_fetch_shader; + struct u_suballocator allocator_fetch_shader; /* Hardware info. */ boolean has_vertex_cache; @@ -1055,7 +1060,8 @@ void eg_dump_debug_state(struct pipe_context *ctx, FILE *f, unsigned flags); struct r600_pipe_shader_selector *r600_create_shader_state_tokens(struct pipe_context *ctx, - const struct tgsi_token *tokens, + const void *tokens, + enum pipe_shader_ir, unsigned pipe_shader_type); int r600_shader_select(struct pipe_context *ctx, struct r600_pipe_shader_selector* sel, diff --git a/lib/mesa/src/gallium/drivers/r600/r600_shader.c b/lib/mesa/src/gallium/drivers/r600/r600_shader.c index 85e584baf..c23adf2ea 100644 --- a/lib/mesa/src/gallium/drivers/r600/r600_shader.c +++ b/lib/mesa/src/gallium/drivers/r600/r600_shader.c @@ -24,7 +24,9 @@ #include "r600_formats.h" #include "r600_opcodes.h" #include "r600_shader.h" +#include "r600_dump.h" #include "r600d.h" +#include "sfn/sfn_nir.h" #include "sb/sb_public.h" @@ -33,6 +35,10 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_from_mesa.h" +#include "nir/tgsi_to_nir.h" +#include "nir/nir_to_tgsi_info.h" +#include "compiler/nir/nir.h" #include "util/u_bitcast.h" #include "util/u_memory.h" #include "util/u_math.h" @@ -143,7 +149,7 @@ static int store_shader(struct pipe_context *ctx, } ptr = r600_buffer_map_sync_with_rings( &rctx->b, shader->bo, - PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); + PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY); if (R600_BIG_ENDIAN) { for (i = 0; i < shader->shader.bc.ndw; ++i) { ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]); @@ -151,12 +157,14 @@ static int store_shader(struct pipe_context *ctx, } else { memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr)); } - rctx->b.ws->buffer_unmap(shader->bo->buf); + rctx->b.ws->buffer_unmap(rctx->b.ws, shader->bo->buf); } return 0; } +extern const struct nir_shader_compiler_options r600_nir_options; +static int nshader = 0; int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, union r600_shader_key key) @@ -164,27 +172,76 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_context *rctx = (struct r600_context *)ctx; struct r600_pipe_shader_selector *sel = shader->selector; int r; - bool dump = r600_can_dump_shader(&rctx->screen->b, - tgsi_get_processor_type(sel->tokens)); - unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB); + struct r600_screen *rscreen = (struct r600_screen *)ctx->screen; + + int processor = sel->ir_type == PIPE_SHADER_IR_TGSI ? + tgsi_get_processor_type(sel->tokens): + pipe_shader_type_from_mesa(sel->nir->info.stage); + + bool dump = r600_can_dump_shader(&rctx->screen->b, processor); + unsigned use_sb = !(rctx->screen->b.debug_flags & (DBG_NO_SB | DBG_NIR)) || + (rctx->screen->b.debug_flags & DBG_NIR_SB); unsigned sb_disasm; unsigned export_shader; - + shader->shader.bc.isa = rctx->isa; + + if (!(rscreen->b.debug_flags & DBG_NIR_PREFERRED)) { + assert(sel->ir_type == PIPE_SHADER_IR_TGSI); + r = r600_shader_from_tgsi(rctx, shader, key); + if (r) { + R600_ERR("translation from TGSI failed !\n"); + goto error; + } + } else { + if (sel->ir_type == PIPE_SHADER_IR_TGSI) { + sel->nir = tgsi_to_nir(sel->tokens, ctx->screen, true); + const nir_shader_compiler_options *nir_options = + (const nir_shader_compiler_options *) + ctx->screen->get_compiler_options(ctx->screen, + PIPE_SHADER_IR_NIR, + shader->shader.processor_type); + /* Lower int64 ops because we have some r600 build-in shaders that use it */ + if (nir_options->lower_int64_options) { + NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa); + NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, NULL, NULL); + NIR_PASS_V(sel->nir, nir_lower_int64); + NIR_PASS_V(sel->nir, nir_opt_vectorize, NULL, NULL); + } + NIR_PASS_V(sel->nir, nir_lower_flrp, ~0, false); + } + nir_tgsi_scan_shader(sel->nir, &sel->info, true); + r = r600_shader_from_nir(rctx, shader, &key); + if (r) { + fprintf(stderr, "--Failed shader--------------------------------------------------\n"); + + if (sel->ir_type == PIPE_SHADER_IR_TGSI) { + fprintf(stderr, "--TGSI--------------------------------------------------------\n"); + tgsi_dump(sel->tokens, 0); + } + + if (rscreen->b.debug_flags & (DBG_NIR_PREFERRED)) { + fprintf(stderr, "--NIR --------------------------------------------------------\n"); + nir_print_shader(sel->nir, stderr); + } + + R600_ERR("translation from NIR failed !\n"); + goto error; + } + } + if (dump) { - fprintf(stderr, "--------------------------------------------------------------\n"); - tgsi_dump(sel->tokens, 0); - + if (sel->ir_type == PIPE_SHADER_IR_TGSI) { + fprintf(stderr, "--TGSI--------------------------------------------------------\n"); + tgsi_dump(sel->tokens, 0); + } + if (sel->so.num_outputs) { r600_dump_streamout(&sel->so); } } - r = r600_shader_from_tgsi(rctx, shader, key); - if (r) { - R600_ERR("translation from TGSI failed !\n"); - goto error; - } + if (shader->shader.processor_type == PIPE_SHADER_VERTEX) { /* only disable for vertex shaders in tess paths */ if (key.vs.as_ls) @@ -216,7 +273,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, r600_bytecode_disasm(&shader->shader.bc); fprintf(stderr, "______________________________________________________________\n"); } else if ((dump && sb_disasm) || use_sb) { - r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader, + r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader, dump, use_sb); if (r) { R600_ERR("r600_sb_bytecode_process failed !\n"); @@ -224,6 +281,30 @@ int r600_pipe_shader_create(struct pipe_context *ctx, } } + if (dump) { + FILE *f; + char fname[1024]; + snprintf(fname, 1024, "shader_from_%s_%d.cpp", + (sel->ir_type == PIPE_SHADER_IR_TGSI ? + (rscreen->b.debug_flags & DBG_NIR_PREFERRED ? "tgsi-nir" : "tgsi") + : "nir"), nshader); + f = fopen(fname, "w"); + print_shader_info(f, nshader++, &shader->shader); + print_shader_info(stderr, nshader++, &shader->shader); + print_pipe_info(stderr, &sel->info); + if (sel->ir_type == PIPE_SHADER_IR_TGSI) { + fprintf(f, "/****TGSI**********************************\n"); + tgsi_dump_to_file(sel->tokens, 0, f); + } + + if (rscreen->b.debug_flags & DBG_NIR_PREFERRED){ + fprintf(f, "/****NIR **********************************\n"); + nir_print_shader(sel->nir, f); + } + fprintf(f, "******************************************/\n"); + fclose(f); + } + if (shader->gs_copy_shader) { if (dump) { // dump copy shader @@ -301,7 +382,8 @@ error: void r600_pipe_shader_destroy(struct pipe_context *ctx UNUSED, struct r600_pipe_shader *shader) { r600_resource_reference(&shader->bo, NULL); - r600_bytecode_clear(&shader->shader.bc); + if (list_is_linked(&shader->shader.bc.cf)) + r600_bytecode_clear(&shader->shader.bc); r600_release_command_buffer(&shader->command_buffer); } @@ -433,24 +515,26 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) #endif for (j = 0; j < i->Instruction.NumSrcRegs; j++) { if (i->Src[j].Register.Dimension) { - switch (i->Src[j].Register.File) { - case TGSI_FILE_CONSTANT: - case TGSI_FILE_HW_ATOMIC: - break; - case TGSI_FILE_INPUT: - if (ctx->type == PIPE_SHADER_GEOMETRY || - ctx->type == PIPE_SHADER_TESS_CTRL || - ctx->type == PIPE_SHADER_TESS_EVAL) - break; - case TGSI_FILE_OUTPUT: - if (ctx->type == PIPE_SHADER_TESS_CTRL) - break; - default: - R600_ERR("unsupported src %d (file %d, dimension %d)\n", j, - i->Src[j].Register.File, - i->Src[j].Register.Dimension); - return -EINVAL; - } + switch (i->Src[j].Register.File) { + case TGSI_FILE_CONSTANT: + case TGSI_FILE_HW_ATOMIC: + break; + case TGSI_FILE_INPUT: + if (ctx->type == PIPE_SHADER_GEOMETRY || + ctx->type == PIPE_SHADER_TESS_CTRL || + ctx->type == PIPE_SHADER_TESS_EVAL) + break; + FALLTHROUGH; + case TGSI_FILE_OUTPUT: + if (ctx->type == PIPE_SHADER_TESS_CTRL) + break; + FALLTHROUGH; + default: + R600_ERR("unsupported src %d (file %d, dimension %d)\n", j, + i->Src[j].Register.File, + i->Src[j].Register.Dimension); + return -EINVAL; + } } } for (j = 0; j < i->Instruction.NumDstRegs; j++) { @@ -620,6 +704,8 @@ static int r600_spi_sid(struct r600_shader_io * io) else { if (name == TGSI_SEMANTIC_GENERIC) { /* For generic params simply use sid from tgsi */ + index = 9 + io->sid; + } else if (name == TGSI_SEMANTIC_TEXCOORD) { index = io->sid; } else { /* For non-generic params - pack name and sid into 8 bits */ @@ -646,9 +732,11 @@ int r600_get_lds_unique_index(unsigned semantic_name, unsigned index) case TGSI_SEMANTIC_CLIPDIST: assert(index <= 1); return 2 + index; + case TGSI_SEMANTIC_TEXCOORD: + return 4 + index; case TGSI_SEMANTIC_GENERIC: if (index <= 63-4) - return 4 + index - 9; + return 4 + index; else /* same explanation as in the default statement, * the only user hitting this is st/nine. @@ -1614,7 +1702,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx, (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; - r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg, r600_src->abs); + r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel); if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) return; } @@ -2469,9 +2557,9 @@ static void convert_edgeflag_to_int(struct r600_shader_ctx *ctx) r600_bytecode_add_alu(ctx->bc, &alu); } -static int generate_gs_copy_shader(struct r600_context *rctx, - struct r600_pipe_shader *gs, - struct pipe_stream_output_info *so) +int generate_gs_copy_shader(struct r600_context *rctx, + struct r600_pipe_shader *gs, + struct pipe_stream_output_info *so) { struct r600_shader_ctx ctx = {}; struct r600_shader *gs_shader = &gs->shader; @@ -2969,7 +3057,8 @@ static int emit_lds_vs_writes(struct r600_shader_ctx *ctx) for (i = 0; i < ctx->shader->noutput; i++) { struct r600_bytecode_alu alu; - int param = r600_get_lds_unique_index(ctx->shader->output[i].name, ctx->shader->output[i].sid); + int param = r600_get_lds_unique_index(ctx->shader->output[i].name, + ctx->shader->output[i].sid); if (param) { r = single_alu_op2(ctx, ALU_OP2_ADD_INT, @@ -4625,6 +4714,14 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS]) op = ALU_OP2_MUL; + /* nir_to_tgsi lowers nir_op_isub to UADD + negate, since r600 doesn't support + * source modifiers with integer ops we switch back to SUB_INT */ + bool src1_neg = ctx->src[1].neg; + if (op == ALU_OP2_ADD_INT && src1_neg) { + src1_neg = false; + op = ALU_OP2_SUB_INT; + } + for (i = 0; i <= lasti; i++) { if (!(write_mask & (1 << i))) continue; @@ -4642,6 +4739,7 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { r600_bytecode_src(&alu.src[j], &ctx->src[j], i); } + alu.src[1].neg = src1_neg; } else { r600_bytecode_src(&alu.src[0], &ctx->src[1], i); r600_bytecode_src(&alu.src[1], &ctx->src[0], i); @@ -8090,7 +8188,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; - /* fall through */ + FALLTHROUGH; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_SHADOW2D: @@ -8111,7 +8209,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; - /* fall through */ + FALLTHROUGH; case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: @@ -8135,7 +8233,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) switch (inst->Texture.Texture) { case TGSI_TEXTURE_3D: offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1; - /* fallthrough */ + FALLTHROUGH; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_SHADOW2D: case TGSI_TEXTURE_RECT: @@ -8143,7 +8241,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) case TGSI_TEXTURE_2D_ARRAY: case TGSI_TEXTURE_SHADOW2D_ARRAY: offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1; - /* fallthrough */ + FALLTHROUGH; case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: case TGSI_TEXTURE_1D_ARRAY: @@ -10346,7 +10444,7 @@ static inline int callstack_update_max_depth(struct r600_shader_ctx *ctx, * elements */ elements += 2; - /* fallthrough */ + FALLTHROUGH; /* FIXME: do the two elements added above cover the cases for the * r8xx+ below? */ @@ -11050,6 +11148,76 @@ static int egcm_u64add(struct r600_shader_ctx *ctx) return 0; } + +static int egcm_i64neg(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bytecode_alu alu; + int r; + int treg = ctx->temp_reg; + const int op = ALU_OP2_SUB_INT; + const int opc = ALU_OP2_SUBB_UINT; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = op; ; + alu.dst.sel = treg; + alu.dst.chan = 0; + alu.dst.write = 1; + alu.src[0].sel = V_SQ_ALU_SRC_0; + r600_bytecode_src(&alu.src[1], &ctx->src[0], 0); + alu.src[1].neg = 0; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = op; + alu.dst.sel = treg; + alu.dst.chan = 1; + alu.dst.write = 1; + alu.src[0].sel = V_SQ_ALU_SRC_0; + r600_bytecode_src(&alu.src[1], &ctx->src[0], 1); + alu.src[1].neg = 0; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = opc ; + alu.dst.sel = treg; + alu.dst.chan = 2; + alu.dst.write = 1; + alu.last = 1; + alu.src[0].sel = V_SQ_ALU_SRC_0; + r600_bytecode_src(&alu.src[1], &ctx->src[0], 0); + alu.src[1].neg = 0; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = op; + tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); + alu.src[0].sel = treg; + alu.src[0].chan = 1; + alu.src[1].sel = treg; + alu.src[1].chan = 2; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + alu.src[0].sel = treg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + return 0; +} + /* result.y = mul_high a, b result.x = mul a,b result.y += a.x * b.y + a.y * b.x; @@ -12007,6 +12175,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_U64ADD] = { ALU_OP0_NOP, egcm_u64add }, [TGSI_OPCODE_U64MUL] = { ALU_OP0_NOP, egcm_u64mul }, [TGSI_OPCODE_U64DIV] = { ALU_OP0_NOP, egcm_u64div }, + [TGSI_OPCODE_I64NEG] = { ALU_OP0_NOP, egcm_i64neg }, [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, }; @@ -12233,5 +12402,6 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = [TGSI_OPCODE_U64ADD] = { ALU_OP0_NOP, egcm_u64add }, [TGSI_OPCODE_U64MUL] = { ALU_OP0_NOP, egcm_u64mul }, [TGSI_OPCODE_U64DIV] = { ALU_OP0_NOP, egcm_u64div }, + [TGSI_OPCODE_I64NEG] = { ALU_OP0_NOP, egcm_i64neg }, [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, }; diff --git a/lib/mesa/src/gallium/drivers/r600/r600_shader.h b/lib/mesa/src/gallium/drivers/r600/r600_shader.h index 7dffd592a..8acd9a3af 100644 --- a/lib/mesa/src/gallium/drivers/r600/r600_shader.h +++ b/lib/mesa/src/gallium/drivers/r600/r600_shader.h @@ -72,8 +72,8 @@ struct r600_shader { unsigned nhwatomic; unsigned nlds; unsigned nsys_inputs; - struct r600_shader_io input[64]; - struct r600_shader_io output[64]; + struct r600_shader_io input[PIPE_MAX_SHADER_INPUTS]; + struct r600_shader_io output[PIPE_MAX_SHADER_OUTPUTS]; struct r600_shader_atomic atomics[8]; unsigned nhwatomic_ranges; boolean uses_kill; @@ -136,6 +136,8 @@ union r600_shader_key { unsigned image_size_const_offset:5; unsigned color_two_side:1; unsigned alpha_to_one:1; + unsigned apply_sample_id_mask:1; + unsigned dual_source_blend:1; } ps; struct { unsigned prim_id_out:8; @@ -191,6 +193,10 @@ int eg_get_interpolator_index(unsigned interpolate, unsigned location); int r600_get_lds_unique_index(unsigned semantic_name, unsigned index); +int generate_gs_copy_shader(struct r600_context *rctx, + struct r600_pipe_shader *gs, + struct pipe_stream_output_info *so); + #ifdef __cplusplus } // extern "C" #endif diff --git a/lib/mesa/src/gallium/drivers/r600/r600_state.c b/lib/mesa/src/gallium/drivers/r600/r600_state.c index b20a9d2a2..6eb2bd42b 100644 --- a/lib/mesa/src/gallium/drivers/r600/r600_state.c +++ b/lib/mesa/src/gallium/drivers/r600/r600_state.c @@ -246,7 +246,7 @@ bool r600_is_format_supported(struct pipe_screen *screen, static void r600_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a; float offset_units = state->offset_units; float offset_scale = state->offset_scale; @@ -415,11 +415,11 @@ static void *r600_create_dsa_state(struct pipe_context *ctx, dsa->valuemask[1] = state->stencil[1].valuemask; dsa->writemask[0] = state->stencil[0].writemask; dsa->writemask[1] = state->stencil[1].writemask; - dsa->zwritemask = state->depth.writemask; + dsa->zwritemask = state->depth_writemask; - db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) | - S_028800_Z_WRITE_ENABLE(state->depth.writemask) | - S_028800_ZFUNC(state->depth.func); + db_depth_control = S_028800_Z_ENABLE(state->depth_enabled) | + S_028800_Z_WRITE_ENABLE(state->depth_writemask) | + S_028800_ZFUNC(state->depth_func); /* stencil */ if (state->stencil[0].enabled) { @@ -441,10 +441,10 @@ static void *r600_create_dsa_state(struct pipe_context *ctx, /* alpha */ alpha_test_control = 0; alpha_ref = 0; - if (state->alpha.enabled) { - alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func); + if (state->alpha_enabled) { + alpha_test_control = S_028410_ALPHA_FUNC(state->alpha_func); alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); - alpha_ref = fui(state->alpha.ref_value); + alpha_ref = fui(state->alpha_ref_value); } dsa->sx_alpha_test_control = alpha_test_control & 0xff; dsa->alpha_ref = alpha_ref; @@ -520,15 +520,13 @@ static void *r600_create_rs_state(struct pipe_context *ctx, } spi_interp = S_0286D4_FLAT_SHADE_ENA(1); - if (state->sprite_coord_enable) { - spi_interp |= S_0286D4_PNT_SPRITE_ENA(1) | - S_0286D4_PNT_SPRITE_OVRD_X(2) | - S_0286D4_PNT_SPRITE_OVRD_Y(3) | - S_0286D4_PNT_SPRITE_OVRD_Z(0) | - S_0286D4_PNT_SPRITE_OVRD_W(1); - if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { - spi_interp |= S_0286D4_PNT_SPRITE_TOP_1(1); - } + spi_interp |= S_0286D4_PNT_SPRITE_ENA(1) | + S_0286D4_PNT_SPRITE_OVRD_X(2) | + S_0286D4_PNT_SPRITE_OVRD_Y(3) | + S_0286D4_PNT_SPRITE_OVRD_Z(0) | + S_0286D4_PNT_SPRITE_OVRD_W(1); + if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) { + spi_interp |= S_0286D4_PNT_SPRITE_TOP_1(1); } r600_store_context_reg_seq(&rs->buffer, R_028A00_PA_SU_POINT_SIZE, 3); @@ -757,11 +755,11 @@ r600_create_sampler_view_custom(struct pipe_context *ctx, view->tex_resource_words[1] = (S_038004_TEX_HEIGHT(height - 1) | S_038004_TEX_DEPTH(depth - 1) | S_038004_DATA_FORMAT(format)); - view->tex_resource_words[2] = tmp->surface.u.legacy.level[offset_level].offset >> 8; + view->tex_resource_words[2] = tmp->surface.u.legacy.level[offset_level].offset_256B; if (offset_level >= tmp->resource.b.b.last_level) { - view->tex_resource_words[3] = tmp->surface.u.legacy.level[offset_level].offset >> 8; + view->tex_resource_words[3] = tmp->surface.u.legacy.level[offset_level].offset_256B; } else { - view->tex_resource_words[3] = tmp->surface.u.legacy.level[offset_level + 1].offset >> 8; + view->tex_resource_words[3] = tmp->surface.u.legacy.level[offset_level + 1].offset_256B; } view->tex_resource_words[4] = (word4 | S_038010_REQUEST_SIZE(1) | @@ -792,7 +790,7 @@ r600_create_sampler_view(struct pipe_context *ctx, static void r600_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct pipe_clip_state *state = &rctx->clip_state.state; radeon_set_context_reg_seq(cs, R_028E20_PA_CL_UCP0_X, 6*4); @@ -826,7 +824,7 @@ static void r600_init_color_surface(struct r600_context *rctx, assert(rtex); } - offset = rtex->surface.u.legacy.level[level].offset; + offset = (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256; color_view = S_028080_SLICE_START(surf->base.u.tex.first_layer) | S_028080_SLICE_MAX(surf->base.u.tex.last_layer); @@ -910,7 +908,7 @@ static void r600_init_color_surface(struct r600_context *rctx, S_0280A0_NUMBER_TYPE(ntype) | S_0280A0_ENDIAN(endian); - /* EXPORT_NORM is an optimzation that can be enabled for better + /* EXPORT_NORM is an optimization that can be enabled for better * performance in certain cases */ if (rctx->b.chip_class == R600) { @@ -984,7 +982,7 @@ static void r600_init_color_surface(struct r600_context *rctx, /* CMASK. */ if (!rctx->dummy_cmask || rctx->dummy_cmask->b.b.width0 < cmask.size || - rctx->dummy_cmask->buf->alignment % cmask.alignment != 0) { + (1 << rctx->dummy_cmask->buf->alignment_log2) % cmask.alignment != 0) { struct pipe_transfer *transfer; void *ptr; @@ -1000,7 +998,7 @@ static void r600_init_color_surface(struct r600_context *rctx, } /* Set the contents to 0xCC. */ - ptr = pipe_buffer_map(&rctx->b.b, &rctx->dummy_cmask->b.b, PIPE_TRANSFER_WRITE, &transfer); + ptr = pipe_buffer_map(&rctx->b.b, &rctx->dummy_cmask->b.b, PIPE_MAP_WRITE, &transfer); memset(ptr, 0xCC, cmask.size); pipe_buffer_unmap(&rctx->b.b, transfer); } @@ -1009,7 +1007,7 @@ static void r600_init_color_surface(struct r600_context *rctx, /* FMASK. */ if (!rctx->dummy_fmask || rctx->dummy_fmask->b.b.width0 < fmask.size || - rctx->dummy_fmask->buf->alignment % fmask.alignment != 0) { + (1 << rctx->dummy_fmask->buf->alignment_log2) % fmask.alignment != 0) { r600_resource_reference(&rctx->dummy_fmask, NULL); rctx->dummy_fmask = (struct r600_resource*) r600_aligned_buffer_create(&rscreen->b.b, 0, @@ -1043,7 +1041,7 @@ static void r600_init_depth_surface(struct r600_context *rctx, unsigned level, pitch, slice, format, offset, array_mode; level = surf->base.u.tex.level; - offset = rtex->surface.u.legacy.level[level].offset; + offset = (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256; pitch = rtex->surface.u.legacy.level[level].nblk_x / 8 - 1; slice = (rtex->surface.u.legacy.level[level].nblk_x * rtex->surface.u.legacy.level[level].nblk_y) / 64; if (slice) { @@ -1284,7 +1282,7 @@ static void r600_get_sample_position(struct pipe_context *ctx, static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; unsigned max_dist = 0; if (rctx->b.family == CHIP_R600) { @@ -1351,7 +1349,7 @@ static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples) static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct pipe_framebuffer_state *state = &rctx->framebuffer.state; unsigned nr_cbufs = state->nr_cbufs; struct r600_surface **cb = (struct r600_surface**)&state->cbufs[0]; @@ -1517,7 +1515,7 @@ static void r600_set_min_samples(struct pipe_context *ctx, unsigned min_samples) static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom; if (G_028808_SPECIAL_OP(a->cb_color_control) == V_028808_SPECIAL_RESOLVE_BOX) { @@ -1547,7 +1545,7 @@ static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_db_state *a = (struct r600_db_state*)atom; if (a->rsurf && a->rsurf->db_htile_surface) { @@ -1568,7 +1566,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom; unsigned db_render_control = 0; unsigned db_render_override = @@ -1653,7 +1651,7 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_config_state *a = (struct r600_config_state*)atom; radeon_set_config_reg(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, a->sq_gpr_resource_mgmt_1); @@ -1662,7 +1660,7 @@ static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom * static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t dirty_mask = rctx->vertex_buffer_state.dirty_mask; while (dirty_mask) { @@ -1702,7 +1700,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx, unsigned reg_alu_constbuf_size, unsigned reg_alu_const_cache) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -1776,7 +1774,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx, struct r600_samplerview_state *state, unsigned resource_id_base) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -1823,7 +1821,7 @@ static void r600_emit_sampler_states(struct r600_context *rctx, unsigned resource_id_base, unsigned border_color_reg) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; uint32_t dirty_mask = texinfo->states.dirty_mask; while (dirty_mask) { @@ -1883,7 +1881,7 @@ static void r600_emit_ps_sampler_states(struct r600_context *rctx, struct r600_a static void r600_emit_seamless_cube_map(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; unsigned tmp; tmp = S_009508_DISABLE_CUBE_ANISO(1) | @@ -1901,13 +1899,13 @@ static void r600_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a struct r600_sample_mask *s = (struct r600_sample_mask*)a; uint8_t mask = s->sample_mask; - radeon_set_context_reg(rctx->b.gfx.cs, R_028C48_PA_SC_AA_MASK, + radeon_set_context_reg(&rctx->b.gfx.cs, R_028C48_PA_SC_AA_MASK, mask | (mask << 8) | (mask << 16) | (mask << 24)); } static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_cso_state *state = (struct r600_cso_state*)a; struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso; @@ -1923,7 +1921,7 @@ static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600 static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a; uint32_t v2 = 0, primid = 0; @@ -1958,7 +1956,7 @@ static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a; struct r600_resource *rbuffer; @@ -2474,8 +2472,9 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha rctx->rasterizer && rctx->rasterizer->flatshade)) tmp |= S_028644_FLAT_SHADE(1); - if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && - sprite_coord_enable & (1 << rshader->input[i].sid)) { + if (rshader->input[i].name == TGSI_SEMANTIC_PCOORD || + (rshader->input[i].name == TGSI_SEMANTIC_TEXCOORD && + sprite_coord_enable & (1 << rshader->input[i].sid))) { tmp |= S_028644_PT_SPRITE_TEX(1); } @@ -2777,8 +2776,8 @@ void *r600_create_db_flush_dsa(struct r600_context *rctx) memset(&dsa, 0, sizeof(dsa)); if (quirk) { - dsa.depth.enabled = 1; - dsa.depth.func = PIPE_FUNC_LEQUAL; + dsa.depth_enabled = 1; + dsa.depth_func = PIPE_FUNC_LEQUAL; dsa.stencil[0].enabled = 1; dsa.stencil[0].func = PIPE_FUNC_ALWAYS; dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_KEEP; @@ -2855,7 +2854,7 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, unsigned pitch, unsigned bpp) { - struct radeon_cmdbuf *cs = rctx->b.dma.cs; + struct radeon_cmdbuf *cs = &rctx->b.dma.cs; struct r600_texture *rsrc = (struct r600_texture*)src; struct r600_texture *rdst = (struct r600_texture*)dst; unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size; @@ -2885,8 +2884,8 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, x = src_x; y = src_y; z = src_z; - base = rsrc->surface.u.legacy.level[src_level].offset; - addr = rdst->surface.u.legacy.level[dst_level].offset; + base = (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256; + addr = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256; addr += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z; addr += dst_y * pitch + dst_x * bpp; } else { @@ -2904,8 +2903,8 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, x = dst_x; y = dst_y; z = dst_z; - base = rdst->surface.u.legacy.level[dst_level].offset; - addr = rsrc->surface.u.legacy.level[src_level].offset; + base = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256; + addr = (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256; addr += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_z; addr += src_y * pitch + src_x * bpp; } @@ -2959,7 +2958,7 @@ static void r600_dma_copy(struct pipe_context *ctx, unsigned src_x, src_y; unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz; - if (rctx->b.dma.cs == NULL) { + if (rctx->b.dma.cs.priv == NULL) { goto fallback; } @@ -3008,10 +3007,10 @@ static void r600_dma_copy(struct pipe_context *ctx, * dst_x/y == 0 * dst_pitch == src_pitch */ - src_offset= rsrc->surface.u.legacy.level[src_level].offset; + src_offset= (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256; src_offset += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_box->z; src_offset += src_y * src_pitch + src_x * bpp; - dst_offset = rdst->surface.u.legacy.level[dst_level].offset; + dst_offset = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256; dst_offset += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z; dst_offset += dst_y * dst_pitch + dst_x * bpp; size = src_box->height * src_pitch; diff --git a/lib/mesa/src/gallium/drivers/r600/r600_state_common.c b/lib/mesa/src/gallium/drivers/r600/r600_state_common.c index 4718286bd..2ded6c822 100644 --- a/lib/mesa/src/gallium/drivers/r600/r600_state_common.c +++ b/lib/mesa/src/gallium/drivers/r600/r600_state_common.c @@ -29,6 +29,7 @@ #include "r600d.h" #include "util/format/u_format_s3tc.h" +#include "util/u_draw.h" #include "util/u_index_modify.h" #include "util/u_memory.h" #include "util/u_upload_mgr.h" @@ -37,6 +38,10 @@ #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_ureg.h" +#include "nir.h" +#include "nir/nir_to_tgsi_info.h" +#include "tgsi/tgsi_from_mesa.h" + void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw) { assert(!cb->buf); @@ -72,12 +77,12 @@ void r600_init_atom(struct r600_context *rctx, void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom) { - r600_emit_command_buffer(rctx->b.gfx.cs, ((struct r600_cso_state*)atom)->cb); + r600_emit_command_buffer(&rctx->b.gfx.cs, ((struct r600_cso_state*)atom)->cb); } void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_alphatest_state *a = (struct r600_alphatest_state*)atom; unsigned alpha_ref = a->sx_alpha_ref; @@ -245,7 +250,7 @@ static void r600_set_blend_color(struct pipe_context *ctx, void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct pipe_blend_color *state = &rctx->blend_color.state; radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4); @@ -257,7 +262,7 @@ void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom) void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_vgt_state *a = (struct r600_vgt_state *)atom; radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, a->vgt_multi_prim_ib_reset_en); @@ -281,17 +286,17 @@ static void r600_set_clip_state(struct pipe_context *ctx, } static void r600_set_stencil_ref(struct pipe_context *ctx, - const struct r600_stencil_ref *state) + const struct r600_stencil_ref state) { struct r600_context *rctx = (struct r600_context *)ctx; - rctx->stencil_ref.state = *state; + rctx->stencil_ref.state = state; r600_mark_atom_dirty(rctx, &rctx->stencil_ref.atom); } void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_stencil_ref_state *a = (struct r600_stencil_ref_state*)atom; radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2); @@ -306,25 +311,25 @@ void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom) } static void r600_set_pipe_stencil_ref(struct pipe_context *ctx, - const struct pipe_stencil_ref *state) + const struct pipe_stencil_ref state) { struct r600_context *rctx = (struct r600_context *)ctx; struct r600_dsa_state *dsa = (struct r600_dsa_state*)rctx->dsa_state.cso; struct r600_stencil_ref ref; - rctx->stencil_ref.pipe_state = *state; + rctx->stencil_ref.pipe_state = state; if (!dsa) return; - ref.ref_value[0] = state->ref_value[0]; - ref.ref_value[1] = state->ref_value[1]; + ref.ref_value[0] = state.ref_value[0]; + ref.ref_value[1] = state.ref_value[1]; ref.valuemask[0] = dsa->valuemask[0]; ref.valuemask[1] = dsa->valuemask[1]; ref.writemask[0] = dsa->writemask[0]; ref.writemask[1] = dsa->writemask[1]; - r600_set_stencil_ref(ctx, &ref); + r600_set_stencil_ref(ctx, ref); } static void r600_bind_dsa_state(struct pipe_context *ctx, void *state) @@ -357,7 +362,7 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, void *state) } } - r600_set_stencil_ref(ctx, &ref); + r600_set_stencil_ref(ctx, ref); /* Update alphatest state. */ if (rctx->alphatest_state.sx_alpha_test_control != dsa->sx_alpha_test_control || @@ -562,6 +567,8 @@ void r600_vertex_buffers_dirty(struct r600_context *rctx) static void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned start_slot, unsigned count, + unsigned unbind_num_trailing_slots, + bool take_ownership, const struct pipe_vertex_buffer *input) { struct r600_context *rctx = (struct r600_context *)ctx; @@ -582,7 +589,13 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx, if (input[i].buffer.resource) { vb[i].stride = input[i].stride; vb[i].buffer_offset = input[i].buffer_offset; - pipe_resource_reference(&vb[i].buffer.resource, input[i].buffer.resource); + if (take_ownership) { + pipe_resource_reference(&vb[i].buffer.resource, NULL); + vb[i].buffer.resource = input[i].buffer.resource; + } else { + pipe_resource_reference(&vb[i].buffer.resource, + input[i].buffer.resource); + } new_buffer_mask |= 1 << i; r600_context_add_resource_size(ctx, input[i].buffer.resource); } else { @@ -598,6 +611,11 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx, disable_mask = ((1ull << count) - 1); } + for (i = 0; i < unbind_num_trailing_slots; i++) { + pipe_resource_reference(&vb[count + i].buffer.resource, NULL); + } + disable_mask |= ((1ull << unbind_num_trailing_slots) - 1) << count; + disable_mask <<= start_slot; new_buffer_mask <<= start_slot; @@ -622,6 +640,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx, static void r600_set_sampler_views(struct pipe_context *pipe, enum pipe_shader_type shader, unsigned start, unsigned count, + unsigned unbind_num_trailing_slots, struct pipe_sampler_view **views) { struct r600_context *rctx = (struct r600_context *) pipe; @@ -815,9 +834,12 @@ static inline void r600_shader_selector_key(const struct pipe_context *ctx, rctx->rasterizer && rctx->rasterizer->multisample_enable && !rctx->framebuffer.cb0_is_integer; key->ps.nr_cbufs = rctx->framebuffer.state.nr_cbufs; + key->ps.apply_sample_id_mask = (rctx->ps_iter_samples > 1) || !rctx->rasterizer->multisample_enable; /* Dual-source blending only makes sense with nr_cbufs == 1. */ - if (key->ps.nr_cbufs == 1 && rctx->dual_src_blend) + if (key->ps.nr_cbufs == 1 && rctx->dual_src_blend) { key->ps.nr_cbufs = 2; + key->ps.dual_source_blend = 1; + } break; } case PIPE_SHADER_TESS_EVAL: @@ -906,14 +928,19 @@ int r600_shader_select(struct pipe_context *ctx, } struct r600_pipe_shader_selector *r600_create_shader_state_tokens(struct pipe_context *ctx, - const struct tgsi_token *tokens, + const void *prog, enum pipe_shader_ir ir, unsigned pipe_shader_type) { struct r600_pipe_shader_selector *sel = CALLOC_STRUCT(r600_pipe_shader_selector); sel->type = pipe_shader_type; - sel->tokens = tgsi_dup_tokens(tokens); - tgsi_scan_shader(tokens, &sel->info); + if (ir == PIPE_SHADER_IR_TGSI) { + sel->tokens = tgsi_dup_tokens((const struct tgsi_token *)prog); + tgsi_scan_shader(sel->tokens, &sel->info); + } else if (ir == PIPE_SHADER_IR_NIR){ + sel->nir = nir_shader_clone(NULL, (const nir_shader *)prog); + nir_tgsi_scan_shader(sel->nir, &sel->info, true); + } return sel; } @@ -922,8 +949,16 @@ static void *r600_create_shader_state(struct pipe_context *ctx, unsigned pipe_shader_type) { int i; - struct r600_pipe_shader_selector *sel = r600_create_shader_state_tokens(ctx, state->tokens, pipe_shader_type); - + struct r600_pipe_shader_selector *sel; + + if (state->type == PIPE_SHADER_IR_TGSI) + sel = r600_create_shader_state_tokens(ctx, state->tokens, state->type, pipe_shader_type); + else if (state->type == PIPE_SHADER_IR_NIR) { + sel = r600_create_shader_state_tokens(ctx, state->ir.nir, state->type, pipe_shader_type); + } else + assert(0 && "Unknown shader type\n"); + + sel->ir_type = state->type; sel->so = state->stream_output; switch (pipe_shader_type) { @@ -1082,7 +1117,14 @@ void r600_delete_shader_selector(struct pipe_context *ctx, p = c; } - free(sel->tokens); + if (sel->ir_type == PIPE_SHADER_IR_TGSI) { + free(sel->tokens); + /* We might have converted the TGSI shader to a NIR shader */ + if (sel->nir) + ralloc_free(sel->nir); + } + else if (sel->ir_type == PIPE_SHADER_IR_NIR) + ralloc_free(sel->nir); free(sel); } @@ -1159,6 +1201,7 @@ void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf static void r600_set_constant_buffer(struct pipe_context *ctx, enum pipe_shader_type shader, uint index, + bool take_ownership, const struct pipe_constant_buffer *input) { struct r600_context *rctx = (struct r600_context *)ctx; @@ -1166,7 +1209,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, struct pipe_constant_buffer *cb; const uint8_t *ptr; - /* Note that the state tracker can unbind constant buffers by + /* Note that the gallium frontend can unbind constant buffers by * passing NULL here. */ if (unlikely(!input || (!input->buffer && !input->user_buffer))) { @@ -1209,7 +1252,12 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, } else { /* Setup the hw buffer. */ cb->buffer_offset = input->buffer_offset; - pipe_resource_reference(&cb->buffer, input->buffer); + if (take_ownership) { + pipe_resource_reference(&cb->buffer, NULL); + cb->buffer = input->buffer; + } else { + pipe_resource_reference(&cb->buffer, input->buffer); + } r600_context_add_resource_size(ctx, input->buffer); } @@ -1315,7 +1363,7 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on cb.user_buffer = ptr; cb.buffer_offset = 0; cb.buffer_size = size; - rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, &cb); + rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, false, &cb); pipe_resource_reference(&cb.buffer, NULL); } } @@ -1504,21 +1552,21 @@ static void update_gs_block_state(struct r600_context *rctx, unsigned enable) if (enable) { r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_GEOMETRY, - R600_GS_RING_CONST_BUFFER, &rctx->gs_rings.esgs_ring); + R600_GS_RING_CONST_BUFFER, false, &rctx->gs_rings.esgs_ring); if (rctx->tes_shader) { r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL, - R600_GS_RING_CONST_BUFFER, &rctx->gs_rings.gsvs_ring); + R600_GS_RING_CONST_BUFFER, false, &rctx->gs_rings.gsvs_ring); } else { r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, - R600_GS_RING_CONST_BUFFER, &rctx->gs_rings.gsvs_ring); + R600_GS_RING_CONST_BUFFER, false, &rctx->gs_rings.gsvs_ring); } } else { r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_GEOMETRY, - R600_GS_RING_CONST_BUFFER, NULL); + R600_GS_RING_CONST_BUFFER, false, NULL); r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, - R600_GS_RING_CONST_BUFFER, NULL); + R600_GS_RING_CONST_BUFFER, false, NULL); r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL, - R600_GS_RING_CONST_BUFFER, NULL); + R600_GS_RING_CONST_BUFFER, false, NULL); } } } @@ -1638,7 +1686,7 @@ void r600_setup_scratch_area_for_shader(struct r600_context *rctx, if (scratch->dirty || unlikely(shader->scratch_space_needed != scratch->item_size || size > scratch->size)) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; scratch->dirty = false; @@ -1846,7 +1894,7 @@ static bool r600_update_derived_state(struct r600_context *rctx) * to LS slots and won't reflect what is dirty as VS stage even if the * TES didn't overwrite it. The story for re-enabled TES is similar. * In any case, we're not allowed to submit any TES state when - * TES is disabled (the state tracker may not do this but this looks + * TES is disabled (the gallium frontend may not do this but this looks * like an optimization to me, not something which can be relied on). */ @@ -1982,7 +2030,7 @@ static bool r600_update_derived_state(struct r600_context *rctx) void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_clip_misc_state *state = &rctx->clip_misc_state; radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, @@ -2002,7 +2050,7 @@ void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom /* rast_prim is the primitive type after GS. */ static inline void r600_emit_rasterizer_prim_state(struct r600_context *rctx) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; enum pipe_prim_type rast_prim = rctx->current_rast_prim; /* Skip this if not rendering lines. */ @@ -2025,21 +2073,35 @@ static inline void r600_emit_rasterizer_prim_state(struct r600_context *rctx) rctx->last_rast_prim = rast_prim; } -static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) +static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count *draws, + unsigned num_draws) { + if (num_draws > 1) { + util_draw_multi(ctx, info, indirect, draws, num_draws); + return; + } + struct r600_context *rctx = (struct r600_context *)ctx; - struct pipe_resource *indexbuf = info->has_user_indices ? NULL : info->index.resource; - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct pipe_resource *indexbuf = !info->index_size || info->has_user_indices ? NULL : info->index.resource; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; bool render_cond_bit = rctx->b.render_cond && !rctx->b.render_cond_force_off; - bool has_user_indices = info->has_user_indices; + bool has_user_indices = info->index_size && info->has_user_indices; uint64_t mask; unsigned num_patches, dirty_tex_counter, index_offset = 0; unsigned index_size = info->index_size; int index_bias; struct r600_shader_atomic combined_atomics[8]; - uint8_t atomic_used_mask; + uint8_t atomic_used_mask = 0; + struct pipe_stream_output_target *count_from_so = NULL; + + if (indirect && indirect->count_from_stream_output) { + count_from_so = indirect->count_from_stream_output; + indirect = NULL; + } - if (!info->indirect && !info->count && (index_size || !info->count_from_stream_output)) { + if (!indirect && !draws[0].count && (index_size || !count_from_so)) { return; } @@ -2054,7 +2116,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info } /* make sure that the gfx ring is only one active */ - if (radeon_emitted(rctx->b.dma.cs, 0)) { + if (radeon_emitted(&rctx->b.dma.cs, 0)) { rctx->b.dma.flush(rctx, PIPE_FLUSH_ASYNC, NULL); } @@ -2101,7 +2163,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info } if (index_size) { - index_offset += info->start * index_size; + index_offset += draws[0].start * index_size; /* Translate 8-bit indices to 16-bit. */ if (unlikely(index_size == 1)) { @@ -2110,17 +2172,17 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info void *ptr; unsigned start, count; - if (likely(!info->indirect)) { + if (likely(!indirect)) { start = 0; - count = info->count; + count = draws[0].count; } else { /* Have to get start/count from indirect buffer, slow path ahead... */ - struct r600_resource *indirect_resource = (struct r600_resource *)info->indirect->buffer; + struct r600_resource *indirect_resource = (struct r600_resource *)indirect->buffer; unsigned *data = r600_buffer_map_sync_with_rings(&rctx->b, indirect_resource, - PIPE_TRANSFER_READ); + PIPE_MAP_READ); if (data) { - data += info->indirect->offset / sizeof(unsigned); + data += indirect->offset / sizeof(unsigned); start = data[2] * index_size; count = data[0]; } @@ -2149,25 +2211,28 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info * and the indices are emitted via PKT3_DRAW_INDEX_IMMD. * Indirect draws never use immediate indices. * Note: Instanced rendering in combination with immediate indices hangs. */ - if (has_user_indices && (R600_BIG_ENDIAN || info->indirect || + if (has_user_indices && (R600_BIG_ENDIAN || indirect || info->instance_count > 1 || - info->count*index_size > 20)) { + draws[0].count*index_size > 20)) { + unsigned start_offset = draws[0].start * index_size; indexbuf = NULL; - u_upload_data(ctx->stream_uploader, 0, - info->count * index_size, 256, - info->index.user, &index_offset, &indexbuf); + u_upload_data(ctx->stream_uploader, start_offset, + draws[0].count * index_size, 256, + (char*)info->index.user + start_offset, + &index_offset, &indexbuf); + index_offset -= start_offset; has_user_indices = false; } index_bias = info->index_bias; } else { - index_bias = info->start; + index_bias = indirect ? 0 : draws[0].start; } /* Set the index offset and primitive restart. */ if (rctx->vgt_state.vgt_multi_prim_ib_reset_en != info->primitive_restart || rctx->vgt_state.vgt_multi_prim_ib_reset_indx != info->restart_index || rctx->vgt_state.vgt_indx_offset != index_bias || - (rctx->vgt_state.last_draw_was_indirect && !info->indirect)) { + (rctx->vgt_state.last_draw_was_indirect && !indirect)) { rctx->vgt_state.vgt_multi_prim_ib_reset_en = info->primitive_restart; rctx->vgt_state.vgt_multi_prim_ib_reset_indx = info->restart_index; rctx->vgt_state.vgt_indx_offset = index_bias; @@ -2247,7 +2312,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info } /* Update start instance. */ - if (!info->indirect && rctx->last_start_instance != info->start_instance) { + if (!indirect && rctx->last_start_instance != info->start_instance) { radeon_set_ctl_const(cs, R_03CFF4_SQ_VTX_START_INST_LOC, info->start_instance); rctx->last_start_instance = info->start_instance; } @@ -2262,11 +2327,11 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info } /* Draw packets. */ - if (likely(!info->indirect)) { + if (likely(!indirect)) { radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0)); radeon_emit(cs, info->instance_count); } else { - uint64_t va = r600_resource(info->indirect->buffer)->gpu_address; + uint64_t va = r600_resource(indirect->buffer)->gpu_address; assert(rctx->b.chip_class >= EVERGREEN); // Invalidate so non-indirect draw calls reset this state @@ -2280,7 +2345,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, - (struct r600_resource*)info->indirect->buffer, + (struct r600_resource*)indirect->buffer, RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT)); } @@ -2292,20 +2357,20 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info (VGT_INDEX_16 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_16_BIT : 0))); if (has_user_indices) { - unsigned size_bytes = info->count*index_size; + unsigned size_bytes = draws[0].count*index_size; unsigned size_dw = align(size_bytes, 4) / 4; radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_IMMD, 1 + size_dw, render_cond_bit)); - radeon_emit(cs, info->count); + radeon_emit(cs, draws[0].count); radeon_emit(cs, V_0287F0_DI_SRC_SEL_IMMEDIATE); - radeon_emit_array(cs, info->index.user, size_dw); + radeon_emit_array(cs, info->index.user + draws[0].start * index_size, size_dw); } else { uint64_t va = r600_resource(indexbuf)->gpu_address + index_offset; - if (likely(!info->indirect)) { + if (likely(!indirect)) { radeon_emit(cs, PKT3(PKT3_DRAW_INDEX, 3, render_cond_bit)); radeon_emit(cs, va); radeon_emit(cs, (va >> 32UL) & 0xFF); - radeon_emit(cs, info->count); + radeon_emit(cs, draws[0].count); radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, @@ -2330,13 +2395,13 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info radeon_emit(cs, max_size); radeon_emit(cs, PKT3(EG_PKT3_DRAW_INDEX_INDIRECT, 1, render_cond_bit)); - radeon_emit(cs, info->indirect->offset); + radeon_emit(cs, indirect->offset); radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA); } } } else { - if (unlikely(info->count_from_stream_output)) { - struct r600_so_target *t = (struct r600_so_target*)info->count_from_stream_output; + if (unlikely(count_from_so)) { + struct r600_so_target *t = (struct r600_so_target*)count_from_so; uint64_t va = t->buf_filled_size->gpu_address + t->buf_filled_size_offset; radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw); @@ -2354,16 +2419,16 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info RADEON_PRIO_SO_FILLED_SIZE)); } - if (likely(!info->indirect)) { + if (likely(!indirect)) { radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit)); - radeon_emit(cs, info->count); + radeon_emit(cs, draws[0].count); } else { radeon_emit(cs, PKT3(EG_PKT3_DRAW_INDIRECT, 1, render_cond_bit)); - radeon_emit(cs, info->indirect->offset); + radeon_emit(cs, indirect->offset); } radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX | - (info->count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0)); + (count_from_so ? S_0287F0_USE_OPAQUE(1) : 0)); } /* SMX returns CONTEXT_DONE too early workaround */ @@ -2549,7 +2614,7 @@ bool sampler_state_needs_border_color(const struct pipe_sampler_state *state) void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_cmdbuf *cs = rctx->b.gfx.cs; + struct radeon_cmdbuf *cs = &rctx->b.gfx.cs; struct r600_pipe_shader *shader = ((struct r600_shader_state*)a)->shader; if (!shader) @@ -2757,6 +2822,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen, case PIPE_FORMAT_RGTC1_SNORM: case PIPE_FORMAT_LATC1_SNORM: word4 |= sign_bit[0]; + FALLTHROUGH; case PIPE_FORMAT_RGTC1_UNORM: case PIPE_FORMAT_LATC1_UNORM: result = FMT_BC4; @@ -2764,6 +2830,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen, case PIPE_FORMAT_RGTC2_SNORM: case PIPE_FORMAT_LATC2_SNORM: word4 |= sign_bit[0] | sign_bit[1]; + FALLTHROUGH; case PIPE_FORMAT_RGTC2_UNORM: case PIPE_FORMAT_LATC2_UNORM: result = FMT_BC5; @@ -2809,7 +2876,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen, goto out_word4; case PIPE_FORMAT_BPTC_RGB_FLOAT: word4 |= sign_bit[0] | sign_bit[1] | sign_bit[2]; - /* fall through */ + FALLTHROUGH; case PIPE_FORMAT_BPTC_RGB_UFLOAT: result = FMT_BC6; goto out_word4; diff --git a/lib/mesa/src/gallium/drivers/r600/r600_uvd.c b/lib/mesa/src/gallium/drivers/r600/r600_uvd.c index 2e7d7ee4d..18ac073da 100644 --- a/lib/mesa/src/gallium/drivers/r600/r600_uvd.c +++ b/lib/mesa/src/gallium/drivers/r600/r600_uvd.c @@ -66,6 +66,8 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe, struct pipe_video_buffer template; struct pipe_resource templ; unsigned i, array_size; + enum pipe_video_chroma_format chroma_format = + pipe_format_to_chroma_format(tmpl->buffer_format); assert(pipe); @@ -77,7 +79,8 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe, template.width = align(tmpl->width, VL_MACROBLOCK_WIDTH); template.height = align(tmpl->height / array_size, VL_MACROBLOCK_HEIGHT); - vl_video_buffer_template(&templ, &template, resource_formats[0], 1, array_size, PIPE_USAGE_DEFAULT, 0); + vl_video_buffer_template(&templ, &template, resource_formats[0], 1, array_size, + PIPE_USAGE_DEFAULT, 0, chroma_format); if (ctx->b.chip_class < EVERGREEN || tmpl->interlaced || !R600_UVD_ENABLE_TILING) templ.bind = PIPE_BIND_LINEAR; resources[0] = (struct r600_texture *) @@ -86,7 +89,8 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe, goto error; if (resource_formats[1] != PIPE_FORMAT_NONE) { - vl_video_buffer_template(&templ, &template, resource_formats[1], 1, array_size, PIPE_USAGE_DEFAULT, 1); + vl_video_buffer_template(&templ, &template, resource_formats[1], 1, array_size, + PIPE_USAGE_DEFAULT, 1, chroma_format); if (ctx->b.chip_class < EVERGREEN || tmpl->interlaced || !R600_UVD_ENABLE_TILING) templ.bind = PIPE_BIND_LINEAR; resources[1] = (struct r600_texture *) @@ -96,7 +100,8 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe, } if (resource_formats[2] != PIPE_FORMAT_NONE) { - vl_video_buffer_template(&templ, &template, resource_formats[2], 1, array_size, PIPE_USAGE_DEFAULT, 2); + vl_video_buffer_template(&templ, &template, resource_formats[2], 1, array_size, + PIPE_USAGE_DEFAULT, 2, chroma_format); if (ctx->b.chip_class < EVERGREEN || tmpl->interlaced || !R600_UVD_ENABLE_TILING) templ.bind = PIPE_BIND_LINEAR; resources[2] = (struct r600_texture *) diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc.h b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc.h index e7231702d..ef2f39855 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc.h +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc.h @@ -495,6 +495,15 @@ struct bc_alu_src { unsigned abs:1; unsigned rel:1; literal value; + + void clear() { + sel = 0; + chan = 0; + neg = 0; + abs = 0; + rel = 0; + value = 0; + } }; struct bc_alu { @@ -529,6 +538,31 @@ struct bc_alu { this->op = op; op_ptr = r600_isa_alu(op); } + void clear() { + op_ptr = nullptr; + op = 0; + for (int i = 0; i < 3; ++i) + src[i].clear(); + dst_gpr = 0; + dst_chan = 0; + dst_rel = 0; + clamp = 0; + omod = 0; + bank_swizzle = 0; + index_mode = 0; + last = 0; + pred_sel = 0; + fog_merge = 0; + write_mask = 0; + update_exec_mask = 0; + update_pred = 0; + slot = 0; + lds_idx_offset = 0; + slot_flags = AF_NONE; + } + bc_alu() { + clear(); + } }; struct bc_fetch { @@ -658,7 +692,12 @@ public: static unsigned dskip_mode; sb_context() : src_stats(), opt_stats(), isa(0), - hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN) {} + hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN), + alu_temp_gprs(0), max_fetch(0), has_trans(false), vtx_src_num(0), + num_slots(0), uses_mova_gpr(false), + r6xx_gpr_index_workaround(false), stack_workaround_8xx(false), + stack_workaround_9xx(false), wavefront_size(0), + stack_entry_size(0) {} int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass); diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_expr.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_expr.cpp index 05674ff24..36361a251 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_expr.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_expr.cpp @@ -326,7 +326,7 @@ void expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src, const bc_alu_src &s = bc.src[src]; if (s.abs) - v = fabs(v.f); + v = fabsf(v.f); if (s.neg) v = -v.f; } @@ -424,21 +424,21 @@ bool expr_handler::fold_alu_op1(alu_node& n) { apply_alu_src_mod(n.bc, 0, cv); switch (n.bc.op) { - case ALU_OP1_CEIL: dv = ceil(cv.f); break; + case ALU_OP1_CEIL: dv = ceilf(cv.f); break; case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break; - case ALU_OP1_EXP_IEEE: dv = exp2(cv.f); break; - case ALU_OP1_FLOOR: dv = floor(cv.f); break; + case ALU_OP1_EXP_IEEE: dv = exp2f(cv.f); break; + case ALU_OP1_FLOOR: dv = floorf(cv.f); break; case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ???? - case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floor(cv.f); break; - case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floor(cv.f + 0.5f); break; - case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)trunc(cv.f); break; + case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floorf(cv.f); break; + case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floorf(cv.f + 0.5f); break; + case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)truncf(cv.f); break; case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break; - case ALU_OP1_FRACT: dv = cv.f - floor(cv.f); break; + case ALU_OP1_FRACT: dv = cv.f - floorf(cv.f); break; case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break; case ALU_OP1_LOG_CLAMPED: case ALU_OP1_LOG_IEEE: if (cv.f != 0.0f) - dv = log2(cv.f); + dv = log2f(cv.f); else // don't fold to NAN, let the GPU handle it for now // (prevents degenerate LIT tests from failing) @@ -454,7 +454,7 @@ bool expr_handler::fold_alu_op1(alu_node& n) { case ALU_OP1_PRED_SET_RESTORE: dv = cv; break; case ALU_OP1_RECIPSQRT_CLAMPED: case ALU_OP1_RECIPSQRT_FF: - case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrt(cv.f); break; + case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrtf(cv.f); break; case ALU_OP1_RECIP_CLAMPED: case ALU_OP1_RECIP_FF: case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break; @@ -462,8 +462,8 @@ bool expr_handler::fold_alu_op1(alu_node& n) { case ALU_OP1_RECIP_UINT: dv.u = (1ull << 32) / cv.u; break; // case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break; case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break; - case ALU_OP1_SQRT_IEEE: dv = sqrt(cv.f); break; - case ALU_OP1_TRUNC: dv = trunc(cv.f); break; + case ALU_OP1_SQRT_IEEE: dv = sqrtf(cv.f); break; + case ALU_OP1_TRUNC: dv = truncf(cv.f); break; default: return false; @@ -719,7 +719,7 @@ bool expr_handler::fold_assoc(alu_node *n) { n->src[0] = n->src[2]; n->bc.src[0] = n->bc.src[2]; n->src[1] = sh.get_const_value(cr); - memset(&n->bc.src[1], 0, sizeof(bc_alu_src)); + n->bc.src[1].clear(); n->src.resize(2); n->bc.set_op(ALU_OP2_ADD); @@ -729,7 +729,7 @@ bool expr_handler::fold_assoc(alu_node *n) { n->bc.src[0] = a->bc.src[last_arg]; n->bc.src[0].neg ^= cur_neg; n->src[1] = sh.get_const_value(cr); - memset(&n->bc.src[1], 0, sizeof(bc_alu_src)); + n->bc.src[1].clear(); } return false; @@ -770,7 +770,7 @@ bool expr_handler::fold_alu_op2(alu_node& n) { case ALU_OP2_ADD: // (ADD x, x) => (MUL x, 2) if (!sh.safe_math) { n.src[1] = sh.get_const_value(2.0f); - memset(&n.bc.src[1], 0, sizeof(bc_alu_src)); + n.bc.src[1].clear(); n.bc.set_op(ALU_OP2_MUL); return fold_alu_op2(n); } @@ -1070,7 +1070,7 @@ bool expr_handler::fold_alu_op3(alu_node& n) { } n.src[1] = t; - memset(&n.bc.src[1], 0, sizeof(bc_alu_src)); + n.bc.src[1].clear(); n.src.resize(2); @@ -1101,7 +1101,7 @@ bool expr_handler::fold_alu_op3(alu_node& n) { dv = cv0.f * cv1.f; n.bc.set_op(ALU_OP2_ADD); n.src[0] = sh.get_const_value(dv); - memset(&n.bc.src[0], 0, sizeof(bc_alu_src)); + n.bc.src[0].clear(); n.src[1] = n.src[2]; n.bc.src[1] = n.bc.src[2]; n.src.resize(2); diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_if_conversion.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_if_conversion.cpp index 017153434..48355e8d6 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_if_conversion.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_if_conversion.cpp @@ -99,8 +99,8 @@ void if_conversion::convert_kill_instructions(region_node *r, a->src[0] = cnd; a->src[1] = sh.get_const_value(0); // clear modifiers - memset(&a->bc.src[0], 0, sizeof(bc_alu_src)); - memset(&a->bc.src[1], 0, sizeof(bc_alu_src)); + a->bc.src[0].clear(); + a->bc.src[1].clear(); } else { // kill with constant 'false' condition, this shouldn't happen // but remove it anyway diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h b/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h index ef0fbd4e6..eecf17d28 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h @@ -713,7 +713,8 @@ enum node_flags { NF_SCHEDULE_EARLY = (1 << 9), // for ALU_PUSH_BEFORE - when set, replace with PUSH + ALU - NF_ALU_STACK_WORKAROUND = (1 << 10) + NF_ALU_STACK_WORKAROUND = (1 << 10), + NF_ALU_2SLOT = (1 << 11), }; inline node_flags operator |(node_flags l, node_flags r) { @@ -929,7 +930,7 @@ public: bool empty() { assert(first != NULL || first == last); return !first; } unsigned count(); - // used with node containers that represent shceduling queues + // used with node containers that represent scheduling queues // ignores copies and takes into account alu_packed_node items unsigned real_alu_count(); @@ -1012,7 +1013,7 @@ public: class alu_node : public node { protected: - alu_node() : node(NT_OP, NST_ALU_INST) { memset(&bc, 0, sizeof(bc_alu)); } + alu_node() : node(NT_OP, NST_ALU_INST) { } public: bc_alu bc; @@ -1021,8 +1022,9 @@ public: virtual bool fold_dispatch(expr_handler *ex); unsigned forced_bank_swizzle() { - return ((bc.op_ptr->flags & AF_INTERP) && (bc.slot_flags == AF_4V)) ? - VEC_210 : 0; + return ((bc.op_ptr->flags & AF_INTERP) && + ((bc.slot_flags == AF_4V) || + (bc.slot_flags == AF_2V))) ? VEC_210 : 0; } // return param index + 1 if instruction references interpolation param, diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_init.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_init.cpp index c557b8687..e14b187de 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_init.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_init.cpp @@ -313,24 +313,26 @@ int ra_init::run() { alloc_arrays(); - ra_node(sh.root); - return 0; + return ra_node(sh.root) ? 0 : 1; } -void ra_init::ra_node(container_node* c) { +bool ra_init::ra_node(container_node* c) { for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { node *n = *I; if (n->type == NT_OP) { - process_op(n); + if (!process_op(n)) + return false; } if (n->is_container() && !n->is_alu_packed()) { - ra_node(static_cast<container_node*>(n)); + if (!ra_node(static_cast<container_node*>(n))) + return false; } } + return true; } -void ra_init::process_op(node* n) { +bool ra_init::process_op(node* n) { bool copy = n->is_copy_mov(); @@ -355,7 +357,8 @@ void ra_init::process_op(node* n) { for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) { value *v = *I; if (v && v->is_sgpr()) - color(v); + if (!color(v)) + return false; } } @@ -372,10 +375,12 @@ void ra_init::process_op(node* n) { assign_color(v, s->gpr); } } else - color(v); + if (!color(v)) + return false; } } } + return true; } void ra_init::color_bs_constraint(ra_constraint* c) { @@ -476,15 +481,15 @@ void ra_init::color_bs_constraint(ra_constraint* c) { } } -void ra_init::color(value* v) { +bool ra_init::color(value* v) { if (v->constraint && v->constraint->kind == CK_PACKED_BS) { color_bs_constraint(v->constraint); - return; + return true; } if (v->chunk && v->chunk->is_fixed()) - return; + return true; RA_DUMP( sblog << "coloring "; @@ -497,24 +502,24 @@ void ra_init::color(value* v) { if (v->is_reg_pinned()) { assert(v->is_chan_pinned()); assign_color(v, v->pin_gpr); - return; + return true; } regbits rb(sh, v->interferences); sel_chan c; if (v->is_chan_pinned()) { - RA_DUMP( sblog << "chan_pinned = " << v->pin_gpr.chan() << " "; ); unsigned mask = 1 << v->pin_gpr.chan(); c = rb.find_free_chans(mask) + v->pin_gpr.chan(); } else { unsigned cm = get_preferable_chan_mask(); - RA_DUMP( sblog << "pref chan mask: " << cm << "\n"; ); c = rb.find_free_chan_by_mask(cm); - } + } - assert(c && c.sel() < 128 - ctx.alu_temp_gprs && "color failed"); + if (!c || c.sel() >= 128 - ctx.alu_temp_gprs) + return false; assign_color(v, c); + return true; } void ra_init::assign_color(value* v, sel_chan c) { diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_sched.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_sched.cpp index fe887c84c..2d5fbfdb2 100644 --- a/lib/mesa/src/gallium/drivers/r600/sb/sb_sched.cpp +++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_sched.cpp @@ -1950,7 +1950,10 @@ void post_scheduler::release_src_vec(vvec& vv, bool src) { } void literal_tracker::reset() { - memset(lt, 0, sizeof(lt)); + lt[0].u = 0; + lt[1].u = 0; + lt[2].u = 0; + lt[3].u = 0; memset(uc, 0, sizeof(uc)); } diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/.editorconfig b/lib/mesa/src/gallium/drivers/r600/sfn/.editorconfig new file mode 100644 index 000000000..9cb67618b --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/.editorconfig @@ -0,0 +1,2 @@ +[*.{cpp,c,h}] +indent_style = space diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp new file mode 100644 index 000000000..8690fc269 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp @@ -0,0 +1,325 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_alu_defines.h" + +namespace r600 { + +const std::map<EAluOp, AluOp> alu_ops = { + {op0_nop ,AluOp(0, 0, AluOp::a,"NOP")}, + {op0_group_barrier ,AluOp(0, 0, AluOp::a,"GROUP_BARRIER")}, + {op0_group_seq_begin ,AluOp(0, 0, AluOp::a,"GROUP_SEQ_BEGIN")}, + {op0_group_seq_end ,AluOp(0, 0, AluOp::a,"GROUP_SEQ_END")}, + {op0_pred_set_clr ,AluOp(0, 1, AluOp::a,"PRED_SET_CLR")}, + {op0_store_flags ,AluOp(0, 0, AluOp::v,"STORE_FLAGS")}, + {op0_lds_1a ,AluOp(0, 0, AluOp::v,"LDS_1A")}, + {op0_lds_1a1d ,AluOp(0, 0, AluOp::v,"LDS_1A1D")}, + {op0_lds_2a ,AluOp(0, 0, AluOp::v,"LDS_2A")}, + + {op1_bcnt_int ,AluOp(1, 0, AluOp::v,"BCNT_INT")}, + {op1_bcnt_accum_prev_int ,AluOp(1, 0, AluOp::v,"BCNT_ACCUM_PREV_INT")}, + {op1_bfrev_int ,AluOp(1, 0, AluOp::a,"BFREV_INT")}, + {op1_ceil ,AluOp(1, 1, AluOp::a,"CEIL")}, + {op1_cos ,AluOp(1, 1, AluOp::t,"COS")}, + {op1_exp_ieee ,AluOp(1, 1, AluOp::t,"EXP_IEEE")}, + {op1_floor ,AluOp(1, 1, AluOp::a,"FLOOR")}, + {op1_flt_to_int ,AluOp(1, 0, AluOp::a,"FLT_TO_INT")}, + {op1_flt_to_uint ,AluOp(1, 1, AluOp::t,"FLT_TO_UINT")}, + {op1_flt_to_int_rpi ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_RPI")}, + {op1_flt_to_int_floor ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_FLOOR")}, + {op1_flt16_to_flt32 ,AluOp(1, 1, AluOp::v,"FLT16_TO_FLT32")}, + {op1_flt32_to_flt16 ,AluOp(1, 1, AluOp::v,"FLT32_TO_FLT16")}, + {op1_flt32_to_flt64 ,AluOp(1, 1, AluOp::v,"FLT32_TO_FLT64")}, + {op1_flt64_to_flt32 ,AluOp(1, 1, AluOp::a,"FLT64_TO_FLT32")}, + {op1_fract ,AluOp(1, 1, AluOp::a,"FRACT")}, + {op1_fract_64 ,AluOp(1, 1, AluOp::v,"FRACT_64")}, + {op1_frexp_64 ,AluOp(1, 1, AluOp::v,"FREXP_64")}, + {op1_int_to_flt ,AluOp(1, 0, AluOp::t,"INT_TO_FLT")}, + {op1_ldexp_64 ,AluOp(1, 1, AluOp::v,"LDEXP_64")}, + {op1_interp_load_p0 ,AluOp(1, 1, AluOp::v,"INTERP_LOAD_P0")}, + {op1_interp_load_p10 ,AluOp(1, 1, AluOp::v,"INTERP_LOAD_P10")}, + {op1_interp_load_p20 ,AluOp(1, 1, AluOp::v,"INTERP_LOAD_P20")}, + {op1_load_store_flags ,AluOp(1, 0, AluOp::v,"LOAD_STORE_FLAGS")}, + {op1_log_clamped ,AluOp(1, 1, AluOp::t,"LOG_CLAMPED")}, + {op1_log_ieee ,AluOp(1, 1, AluOp::t,"LOG_IEEE")}, + {op1_max4 ,AluOp(1, 1, AluOp::v,"MAX4")}, + {op1_mbcnt_32hi_int ,AluOp(1, 0, AluOp::v,"MBCNT_32HI_INT")}, + {op1_mbcnt_32lo_accum_prev_int ,AluOp(1, 0, AluOp::v,"MBCNT_32LO_ACCUM_PREV_INT")}, + {op1_mov ,AluOp(1, 0, AluOp::a,"MOV")}, + {op1_mova_int ,AluOp(1, 0, AluOp::v,"MOVA_INT")}, + {op1_not_int ,AluOp(1, 0, AluOp::a,"NOT_INT")}, + {op1_offset_to_flt ,AluOp(1, 0, AluOp::v,"OFFSET_TO_FLT")}, + {op1_pred_set_inv ,AluOp(1, 1, AluOp::a,"PRED_SET_INV")}, + {op1_pred_set_restore ,AluOp(1, 1, AluOp::a,"PRED_SET_RESTORE")}, + {op1_set_cf_idx0 ,AluOp(1, 0, AluOp::a,"SET_CF_IDX0")}, /* Reads from AR register? */ + {op1_set_cf_idx1 ,AluOp(1, 0, AluOp::a,"SET_CF_IDX1")}, /* Reads from AR register? */ + {op1_recip_clamped ,AluOp(1, 1, AluOp::t,"RECIP_CLAMPED")}, + {op1_recip_ff ,AluOp(1, 1, AluOp::t,"RECIP_FF")}, + {op1_recip_ieee ,AluOp(1, 1, AluOp::t,"RECIP_IEEE")}, + {op1_recipsqrt_clamped ,AluOp(1, 1, AluOp::t,"RECIPSQRT_CLAMPED")}, + {op1_recipsqrt_ff ,AluOp(1, 1, AluOp::t,"RECIPSQRT_FF")}, + {op1_recipsqrt_ieee1 ,AluOp(1, 1, AluOp::t,"RECIPSQRT_IEEE")}, + {op1_recip_int ,AluOp(1, 0, AluOp::t,"RECIP_INT")}, + {op1_recip_uint ,AluOp(1, 0, AluOp::t,"RECIP_UINT")}, + {op1_recip_64 ,AluOp(1, 1, AluOp::t,"RECIP_64")}, + {op1_recip_clamped_64 ,AluOp(1, 1, AluOp::t,"RECIP_CLAMPED_64")}, + {op1_recipsqrt_64 ,AluOp(1, 1, AluOp::t,"RECIPSQRT_64")}, + {op1_recipsqrt_clamped_64,AluOp(1, 1, AluOp::t,"RECIPSQRT_CLAMPED_64")}, + {op1_rndne ,AluOp(1, 1, AluOp::a,"RNDNE")}, + {op1_sqrt_ieee ,AluOp(1, 1, AluOp::t,"SQRT_IEEE")}, + {op1_sin ,AluOp(1, 1, AluOp::t,"SIN")}, + {op1_trunc ,AluOp(1, 1, AluOp::a,"TRUNC")}, + {op1_sqrt_64 ,AluOp(1, 1, AluOp::t,"SQRT_64")}, + {op1_ubyte0_flt ,AluOp(1, 1, AluOp::v,"UBYTE0_FLT")}, + {op1_ubyte1_flt ,AluOp(1, 1, AluOp::v,"UBYTE1_FLT")}, + {op1_ubyte2_flt ,AluOp(1, 1, AluOp::v,"UBYTE2_FLT")}, + {op1_ubyte3_flt ,AluOp(1, 1, AluOp::v,"UBYTE3_FLT")}, + {op1_uint_to_flt ,AluOp(1, 0, AluOp::t,"UINT_TO_FLT")}, + {op1_ffbh_uint ,AluOp(1, 0, AluOp::v,"FFBH_UINT")}, + {op1_ffbl_int ,AluOp(1, 0, AluOp::v,"FFBL_INT")}, + {op1_ffbh_int ,AluOp(1, 0, AluOp::v,"FFBH_INT")}, + {op1_flt_to_uint4 ,AluOp(1, 1, AluOp::v,"FLT_TO_UINT4")}, + {op1v_flt32_to_flt64 ,AluOp(1, 1, AluOp::a,"FLT32_TO_FLT64")}, + {op1v_flt64_to_flt32 ,AluOp(1, 1, AluOp::v,"FLT64_TO_FLT32")}, + + {op2_add ,AluOp(2, 1, AluOp::a,"ADD")}, + {op2_bfm_int ,AluOp(2, 0, AluOp::v,"BFM_INT")}, + {op2_mul ,AluOp(2, 1, AluOp::a,"MUL")}, + {op2_mul_ieee ,AluOp(2, 1, AluOp::a,"MUL_IEEE")}, + {op2_max ,AluOp(2, 1, AluOp::a,"MAX")}, + {op2_min ,AluOp(2, 1, AluOp::a,"MIN")}, + {op2_max_dx10 ,AluOp(2, 1, AluOp::a,"MAX_DX10")}, + {op2_min_dx10 ,AluOp(2, 1, AluOp::a,"MIN_DX10")}, + {op2_sete ,AluOp(2, 1, AluOp::a,"SETE")}, + {op2_setgt ,AluOp(2, 1, AluOp::a,"SETGT")}, + {op2_setge ,AluOp(2, 1, AluOp::a,"SETGE")}, + {op2_setne ,AluOp(2, 1, AluOp::a,"SETNE")}, + {op2_sete_dx10 ,AluOp(2, 1, AluOp::a,"SETE_DX10")}, + {op2_setgt_dx10 ,AluOp(2, 1, AluOp::a,"SETGT_DX10")}, + {op2_setge_dx10 ,AluOp(2, 1, AluOp::a,"SETGE_DX10")}, + {op2_setne_dx10 ,AluOp(2, 1, AluOp::a,"SETNE_DX10")}, + {op2_ashr_int ,AluOp(2, 0, AluOp::a,"ASHR_INT")}, + {op2_lshr_int ,AluOp(2, 0, AluOp::a,"LSHR_INT")}, + {op2_lshl_int ,AluOp(2, 0, AluOp::a,"LSHL_INT")}, + {op2_mul_64 ,AluOp(2, 1, AluOp::a,"MUL_64")}, + {op2_pred_setgt_uint ,AluOp(2, 0, AluOp::a,"PRED_SETGT_UINT")}, + {op2_pred_setge_uint ,AluOp(2, 0, AluOp::a,"PRED_SETGE_UINT")}, + {op2_pred_sete ,AluOp(2, 1, AluOp::a,"PRED_SETE")}, + {op2_pred_setgt ,AluOp(2, 1, AluOp::a,"PRED_SETGT")}, + {op2_pred_setge ,AluOp(2, 1, AluOp::a,"PRED_SETGE")}, + {op2_pred_setne ,AluOp(2, 1, AluOp::a,"PRED_SETNE")}, + {op2_pred_set_pop ,AluOp(2, 1, AluOp::a,"PRED_SET_POP")}, + {op2_pred_sete_push ,AluOp(2, 1, AluOp::a,"PRED_SETE_PUSH")}, + {op2_pred_setgt_push ,AluOp(2, 1, AluOp::a,"PRED_SETGT_PUSH")}, + {op2_pred_setge_push ,AluOp(2, 1, AluOp::a,"PRED_SETGE_PUSH")}, + {op2_pred_setne_push ,AluOp(2, 1, AluOp::a,"PRED_SETNE_PUSH")}, + {op2_kille ,AluOp(2, 1, AluOp::a,"KILLE")}, + {op2_killgt ,AluOp(2, 1, AluOp::a,"KILLGT")}, + {op2_killge ,AluOp(2, 1, AluOp::a,"KILLGE")}, + {op2_killne ,AluOp(2, 1, AluOp::a,"KILLNE")}, + {op2_and_int ,AluOp(2, 0, AluOp::a,"AND_INT")}, + {op2_or_int ,AluOp(2, 0, AluOp::a,"OR_INT")}, + {op2_xor_int ,AluOp(2, 0, AluOp::a,"XOR_INT")}, + {op2_add_int ,AluOp(2, 0, AluOp::a,"ADD_INT")}, + {op2_sub_int ,AluOp(2, 0, AluOp::a,"SUB_INT")}, + {op2_max_int ,AluOp(2, 0, AluOp::a,"MAX_INT")}, + {op2_min_int ,AluOp(2, 0, AluOp::a,"MIN_INT")}, + {op2_max_uint ,AluOp(2, 0, AluOp::a,"MAX_UINT")}, + {op2_min_uint ,AluOp(2, 0, AluOp::a,"MIN_UINT")}, + {op2_sete_int ,AluOp(2, 0, AluOp::a,"SETE_INT")}, + {op2_setgt_int ,AluOp(2, 0, AluOp::a,"SETGT_INT")}, + {op2_setge_int ,AluOp(2, 0, AluOp::a,"SETGE_INT")}, + {op2_setne_int ,AluOp(2, 0, AluOp::a,"SETNE_INT")}, + {op2_setgt_uint ,AluOp(2, 0, AluOp::a,"SETGT_UINT")}, + {op2_setge_uint ,AluOp(2, 0, AluOp::a,"SETGE_UINT")}, + {op2_killgt_uint ,AluOp(2, 0, AluOp::a,"KILLGT_UINT")}, + {op2_killge_uint ,AluOp(2, 0, AluOp::a,"KILLGE_UINT")}, + {op2_prede_int ,AluOp(2, 0, AluOp::a,"PREDE_INT")}, + {op2_pred_setgt_int ,AluOp(2, 0, AluOp::a,"PRED_SETGT_INT")}, + {op2_pred_setge_int ,AluOp(2, 0, AluOp::a,"PRED_SETGE_INT")}, + {op2_pred_setne_int ,AluOp(2, 0, AluOp::a,"PRED_SETNE_INT")}, + {op2_kille_int ,AluOp(2, 0, AluOp::a,"KILLE_INT")}, + {op2_killgt_int ,AluOp(2, 0, AluOp::a,"KILLGT_INT")}, + {op2_killge_int ,AluOp(2, 0, AluOp::a,"KILLGE_INT")}, + {op2_killne_int ,AluOp(2, 0, AluOp::a,"KILLNE_INT")}, + {op2_pred_sete_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETE_PUSH_INT")}, + {op2_pred_setgt_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETGT_PUSH_INT")}, + {op2_pred_setge_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETGE_PUSH_INT")}, + {op2_pred_setne_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETNE_PUSH_INT")}, + {op2_pred_setlt_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETLT_PUSH_INT")}, + {op2_pred_setle_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETLE_PUSH_INT")}, + {op2_addc_uint ,AluOp(2, 0, AluOp::a,"ADDC_UINT")}, + {op2_subb_uint ,AluOp(2, 0, AluOp::a,"SUBB_UINT")}, + {op2_set_mode ,AluOp(2, 0, AluOp::a,"SET_MODE")}, + {op2_set_lds_size ,AluOp(2, 0, AluOp::a,"SET_LDS_SIZE")}, + {op2_mullo_int ,AluOp(2, 0, AluOp::t,"MULLO_INT")}, + {op2_mulhi_int ,AluOp(2, 0, AluOp::t,"MULHI_INT")}, + {op2_mullo_uint ,AluOp(2, 0, AluOp::t,"MULLO_UINT")}, + {op2_mulhi_uint ,AluOp(2, 0, AluOp::t,"MULHI_UINT")}, + {op2_dot_ieee ,AluOp(2, 1, AluOp::v,"DOT_IEEE")}, + {op2_mulhi_uint24 ,AluOp(2, 0, AluOp::v,"MULHI_UINT24")}, + {op2_mul_uint24 ,AluOp(2, 0, AluOp::v,"MUL_UINT24")}, + {op2_sete_64 ,AluOp(2, 1, AluOp::v,"SETE_64")}, + {op2_setne_64 ,AluOp(2, 1, AluOp::v,"SETNE_64")}, + {op2_setgt_64 ,AluOp(2, 1, AluOp::v,"SETGT_64")}, + {op2_setge_64 ,AluOp(2, 1, AluOp::v,"SETGE_64")}, + {op2_min_64 ,AluOp(2, 1, AluOp::v,"MIN_64")}, + {op2_max_64 ,AluOp(2, 1, AluOp::v,"MAX_64")}, + {op2_dot4 ,AluOp(2, 1, AluOp::v,"DOT4")}, + {op2_dot4_ieee ,AluOp(2, 1, AluOp::v,"DOT4_IEEE")}, + {op2_cube ,AluOp(2, 1, AluOp::v,"CUBE")}, + {op2_pred_setgt_64 ,AluOp(2, 1, AluOp::v,"PRED_SETGT_64")}, + {op2_pred_sete_64 ,AluOp(2, 1, AluOp::v,"PRED_SETE_64")}, + {op2_pred_setge_64 ,AluOp(2, 1, AluOp::v,"PRED_SETGE_64")}, + {OP2V_MUL_64 ,AluOp(2, 1, AluOp::v,"MUL_64")}, + {op2_add_64 ,AluOp(2, 1, AluOp::v,"ADD_64")}, + {op2_sad_accum_prev_uint ,AluOp(2, 0, AluOp::v,"SAD_ACCUM_PREV_UINT")}, + {op2_dot ,AluOp(2, 1, AluOp::v,"DOT")}, + {op2_mul_prev ,AluOp(2, 1, AluOp::v,"MUL_PREV")}, + {op2_mul_ieee_prev ,AluOp(2, 1, AluOp::v,"MUL_IEEE_PREV")}, + {op2_add_prev ,AluOp(2, 1, AluOp::v,"ADD_PREV")}, + {op2_muladd_prev ,AluOp(2, 1, AluOp::v,"MULADD_PREV")}, + {op2_muladd_ieee_prev ,AluOp(2, 1, AluOp::v,"MULADD_IEEE_PREV")}, + {op2_interp_xy ,AluOp(2, 1, AluOp::v,"INTERP_XY")}, + {op2_interp_zw ,AluOp(2, 1, AluOp::v,"INTERP_ZW")}, + {op2_interp_x ,AluOp(2, 1, AluOp::v,"INTERP_X")}, + {op2_interp_z ,AluOp(2, 1, AluOp::v,"INTERP_Z")}, + + {op3_bfe_uint ,AluOp(3, 0, AluOp::v,"BFE_UINT")}, + {op3_bfe_int ,AluOp(3, 0, AluOp::v,"BFE_INT")}, + {op3_bfi_int ,AluOp(3, 0, AluOp::v,"BFI_INT")}, + {op3_fma ,AluOp(3, 1, AluOp::v,"FMA")}, + {op3_cndne_64 ,AluOp(3, 1, AluOp::v,"CNDNE_64")}, + {op3_fma_64 ,AluOp(3, 1, AluOp::v,"FMA_64")}, + {op3_lerp_uint ,AluOp(3, 0, AluOp::v,"LERP_UINT")}, + {op3_bit_align_int ,AluOp(3, 0, AluOp::v,"BIT_ALIGN_INT")}, + {op3_byte_align_int ,AluOp(3, 0, AluOp::v,"BYTE_ALIGN_INT")}, + {op3_sad_accum_uint ,AluOp(3, 0, AluOp::v,"SAD_ACCUM_UINT")}, + {op3_sad_accum_hi_uint ,AluOp(3, 0, AluOp::v,"SAD_ACCUM_HI_UINT")}, + {op3_muladd_uint24 ,AluOp(3, 0, AluOp::v,"MULADD_UINT24")}, + {op3_lds_idx_op ,AluOp(3, 0, AluOp::x,"LDS_IDX_OP")}, + {op3_muladd ,AluOp(3, 1, AluOp::a,"MULADD")}, + {op3_muladd_m2 ,AluOp(3, 1, AluOp::a,"MULADD_M2")}, + {op3_muladd_m4 ,AluOp(3, 1, AluOp::a,"MULADD_M4")}, + {op3_muladd_d2 ,AluOp(3, 1, AluOp::a,"MULADD_D2")}, + {op3_muladd_ieee ,AluOp(3, 1, AluOp::a,"MULADD_IEEE")}, + {op3_cnde ,AluOp(3, 1, AluOp::a,"CNDE")}, + {op3_cndgt ,AluOp(3, 1, AluOp::a,"CNDGT")}, + {op3_cndge ,AluOp(3, 1, AluOp::a,"CNDGE")}, + {op3_cnde_int ,AluOp(3, 0, AluOp::a,"CNDE_INT")}, + {op3_cndgt_int ,AluOp(3, 0, AluOp::a,"CNDGT_INT")}, + {op3_cndge_int ,AluOp(3, 0, AluOp::a,"CNDGE_INT")}, + {op3_mul_lit ,AluOp(3, 1, AluOp::t,"MUL_LIT")} +}; + +const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const = { + {ALU_SRC_LDS_OQ_A, {false, "LDS_OQ_A"}}, + {ALU_SRC_LDS_OQ_B, {false, "LDS_OQ_B"}}, + {ALU_SRC_LDS_OQ_A_POP, {false, "LDS_OQ_A_POP"}}, + {ALU_SRC_LDS_OQ_B_POP, {false, "LDS_OQ_B_POP"}}, + {ALU_SRC_LDS_DIRECT_A, {false, "LDS_DIRECT_A"}}, + {ALU_SRC_LDS_DIRECT_B, {false, "LDS_DIRECT_B"}}, + {ALU_SRC_TIME_HI, {false, "TIME_HI"}}, + {ALU_SRC_TIME_LO, {false, "TIME_LO"}}, + {ALU_SRC_MASK_HI, {false, "MASK_HI"}}, + {ALU_SRC_MASK_LO, {false, "MASK_LO"}}, + {ALU_SRC_HW_WAVE_ID, {false, "HW_WAVE_ID"}}, + {ALU_SRC_SIMD_ID, {false, "SIMD_ID"}}, + {ALU_SRC_SE_ID, {false, "SE_ID"}}, + {ALU_SRC_HW_THREADGRP_ID, {false, "HW_THREADGRP_ID"}}, + {ALU_SRC_WAVE_ID_IN_GRP, {false, "WAVE_ID_IN_GRP"}}, + {ALU_SRC_NUM_THREADGRP_WAVES, {false, "NUM_THREADGRP_WAVES"}}, + {ALU_SRC_HW_ALU_ODD, {false, "HW_ALU_ODD"}}, + {ALU_SRC_LOOP_IDX, {false, "LOOP_IDX"}}, + {ALU_SRC_PARAM_BASE_ADDR, {false, "PARAM_BASE_ADDR"}}, + {ALU_SRC_NEW_PRIM_MASK, {false, "NEW_PRIM_MASK"}}, + {ALU_SRC_PRIM_MASK_HI, {false, "PRIM_MASK_HI"}}, + {ALU_SRC_PRIM_MASK_LO, {false, "PRIM_MASK_LO"}}, + {ALU_SRC_1_DBL_L, {false, "1.0L"}}, + {ALU_SRC_1_DBL_M, {false, "1.0H"}}, + {ALU_SRC_0_5_DBL_L, {false, "0.5L"}}, + {ALU_SRC_0_5_DBL_M, {false, "0.5H"}}, + {ALU_SRC_0, {false, "0"}}, + {ALU_SRC_1, {false, "1.0"}}, + {ALU_SRC_1_INT, {false, "1"}}, + {ALU_SRC_M_1_INT, {false, "-1"}}, + {ALU_SRC_0_5, {false, "0.5"}}, + {ALU_SRC_LITERAL, {true, "ALU_SRC_LITERAL"}}, + {ALU_SRC_PV, {true, "PV"}}, + {ALU_SRC_PS, {false, "PS"}} +}; + +const std::map<ESDOp, LDSOp> lds_ops = { + {DS_OP_ADD , {2, "DS_ADD"}}, + {DS_OP_SUB , {2, "DS_SUB"}}, + {DS_OP_RSUB , {2, "DS_RSUB"}}, + {DS_OP_INC , {2, "DS_INC"}}, + {DS_OP_DEC , {2, "DS_DEC"}}, + {DS_OP_MIN_INT , {2, "DS_MIN_INT"}}, + {DS_OP_MAX_INT , {2, "DS_MAX_INT"}}, + {DS_OP_MIN_UINT , {2, "DS_MIN_UINT"}}, + {DS_OP_MAX_UINT , {2, "DS_MAX_UINT"}}, + {DS_OP_AND , {2, "DS_AND"}}, + {DS_OP_OR , {2, "DS_OR"}}, + {DS_OP_XOR , {2, "DS_XOR"}}, + {DS_OP_MSKOR , {3, "DS_MSKOR"}}, + {DS_OP_WRITE , {2, "DS_WRITE"}}, + {DS_OP_WRITE_REL , {3, "DS_WRITE_REL"}}, + {DS_OP_WRITE2 , {3, "DS_WRITE2"}}, + {DS_OP_CMP_STORE , {3, "DS_CMP_STORE"}}, + {DS_OP_CMP_STORE_SPF , {3, "DS_CMP_STORE_SPF"}}, + {DS_OP_BYTE_WRITE , {2, "DS_BYTE_WRITE"}}, + {DS_OP_SHORT_WRITE , {2, "DS_SHORT_WRITE"}}, + {DS_OP_ADD_RET , {2, "DS_ADD_RET"}}, + {DS_OP_SUB_RET , {2, "DS_SUB_RET"}}, + {DS_OP_RSUB_RET , {2, "DS_RSUB_RET"}}, + {DS_OP_INC_RET , {2, "DS_INC_RET"}}, + {DS_OP_DEC_RET , {2, "DS_DEC_RET"}}, + {DS_OP_MIN_INT_RET , {2, "DS_MIN_INT_RET"}}, + {DS_OP_MAX_INT_RET , {2, "DS_MAX_INT_RET"}}, + {DS_OP_MIN_UINT_RET , {2, "DS_MIN_UINT_RET"}}, + {DS_OP_MAX_UINT_RET , {2, "DS_MAX_UINT_RET"}}, + {DS_OP_AND_RET , {2, "DS_AND_RET"}}, + {DS_OP_OR_RET , {2, "DS_OR_RET"}}, + {DS_OP_XOR_RET , {2, "DS_XOR_RET"}}, + {DS_OP_MSKOR_RET , {3, "DS_MSKOR_RET"}}, + {DS_OP_XCHG_RET , {2, "DS_XCHG_RET"}}, + {DS_OP_XCHG_REL_RET , {3, "DS_XCHG_REL_RET"}}, + {DS_OP_XCHG2_RET , {3, "DS_XCHG2_RET"}}, + {DS_OP_CMP_XCHG_RET , {3, "DS_CMP_XCHG_RET"}}, + {DS_OP_CMP_XCHG_SPF_RET, {3, "DS_CMP_XCHG_SPF_RET"}}, + {DS_OP_READ_RET , {1, "DS_READ_RET"}}, + {DS_OP_READ_REL_RET , {1, "DS_READ_REL_RET"}}, + {DS_OP_READ2_RET , {2, "DS_READ2_RET"}}, + {DS_OP_READWRITE_RET , {3, "DS_READWRITE_RET"}}, + {DS_OP_BYTE_READ_RET , {1, "DS_BYTE_READ_RET"}}, + {DS_OP_UBYTE_READ_RET, {1, "DS_UBYTE_READ_RET"}}, + {DS_OP_SHORT_READ_RET, {1, "DS_SHORT_READ_RET"}}, + {DS_OP_USHORT_READ_RET, {1, "DS_USHORT_READ_RET"}}, + {DS_OP_ATOMIC_ORDERED_ALLOC_RET , {3, "DS_ATOMIC_ORDERED_ALLOC_RET"}} +}; + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.h new file mode 100644 index 000000000..4481c49db --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.h @@ -0,0 +1,377 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef r600_sfn_alu_defines_h +#define r600_sfn_alu_defines_h + +#include <map> +#include <bitset> + +namespace r600 { + +/* ALU op2 instructions 17:7 top three bits always zero. */ +enum EAluOp { + op2_add = 0, + op2_mul = 1, + op2_mul_ieee = 2, + op2_max = 3, + op2_min = 4, + op2_max_dx10 = 5, + op2_min_dx10 = 6, + op2_sete = 8, + op2_setgt = 9, + op2_setge = 10, + op2_setne = 11, + op2_sete_dx10 = 12, + op2_setgt_dx10 = 13, + op2_setge_dx10 = 14, + op2_setne_dx10 = 15, + op1_fract = 16, + op1_trunc = 17, + op1_ceil = 18, + op1_rndne = 19, + op1_floor = 20, + op2_ashr_int = 21, + op2_lshr_int = 22, + op2_lshl_int = 23, + op1_mov = 25, + op0_nop = 26, + op2_mul_64 = 27, + op1_flt64_to_flt32 = 28, + op1_flt32_to_flt64 = 29, + op2_pred_setgt_uint = 30, + op2_pred_setge_uint = 31, + op2_pred_sete = 32, + op2_pred_setgt = 33, + op2_pred_setge = 34, + op2_pred_setne = 35, + op1_pred_set_inv = 36, + op2_pred_set_pop = 37, + op0_pred_set_clr = 38, + op1_pred_set_restore = 39, + op2_pred_sete_push = 40, + op2_pred_setgt_push = 41, + op2_pred_setge_push = 42, + op2_pred_setne_push = 43, + op2_kille = 44, + op2_killgt = 45, + op2_killge = 46, + op2_killne = 47, + op2_and_int = 48, + op2_or_int = 49, + op2_xor_int = 50, + op1_not_int = 51, + op2_add_int = 52, + op2_sub_int = 53, + op2_max_int = 54, + op2_min_int = 55, + op2_max_uint = 56, + op2_min_uint = 57, + op2_sete_int = 58, + op2_setgt_int = 59, + op2_setge_int = 60, + op2_setne_int = 61, + op2_setgt_uint = 62, + op2_setge_uint = 63, + op2_killgt_uint = 64, + op2_killge_uint = 65, + op2_prede_int = 66, + op2_pred_setgt_int = 67, + op2_pred_setge_int = 68, + op2_pred_setne_int = 69, + op2_kille_int = 70, + op2_killgt_int = 71, + op2_killge_int = 72, + op2_killne_int = 73, + op2_pred_sete_push_int = 74, + op2_pred_setgt_push_int = 75, + op2_pred_setge_push_int = 76, + op2_pred_setne_push_int = 77, + op2_pred_setlt_push_int = 78, + op2_pred_setle_push_int = 79, + op1_flt_to_int = 80, + op1_bfrev_int = 81, + op2_addc_uint = 82, + op2_subb_uint = 83, + op0_group_barrier = 84, + op0_group_seq_begin = 85, + op0_group_seq_end = 86, + op2_set_mode = 87, + op1_set_cf_idx0 = 88, + op1_set_cf_idx1 = 89, + op2_set_lds_size = 90, + op1_exp_ieee = 129, + op1_log_clamped = 130, + op1_log_ieee = 131, + op1_recip_clamped = 132, + op1_recip_ff = 133, + op1_recip_ieee = 134, + op1_recipsqrt_clamped = 135, + op1_recipsqrt_ff = 136, + op1_recipsqrt_ieee1 = 137, + op1_sqrt_ieee = 138, + op1_sin = 141, + op1_cos = 142, + op2_mullo_int = 143, + op2_mulhi_int = 144, + op2_mullo_uint = 145, + op2_mulhi_uint = 146, + op1_recip_int = 147, + op1_recip_uint = 148, + op1_recip_64 = 149, + op1_recip_clamped_64 = 150, + op1_recipsqrt_64 = 151, + op1_recipsqrt_clamped_64 = 152, + op1_sqrt_64 = 153, + op1_flt_to_uint = 154, + op1_int_to_flt = 155, + op1_uint_to_flt = 156, + op2_bfm_int = 160, + op1_flt32_to_flt16 = 162, + op1_flt16_to_flt32 = 163, + op1_ubyte0_flt = 164, + op1_ubyte1_flt = 165, + op1_ubyte2_flt = 166, + op1_ubyte3_flt = 167, + op1_bcnt_int = 170, + op1_ffbh_uint = 171, + op1_ffbl_int = 172, + op1_ffbh_int = 173, + op1_flt_to_uint4 = 174, + op2_dot_ieee = 175, + op1_flt_to_int_rpi = 176, + op1_flt_to_int_floor = 177, + op2_mulhi_uint24 = 178, + op1_mbcnt_32hi_int = 179, + op1_offset_to_flt = 180, + op2_mul_uint24 = 181, + op1_bcnt_accum_prev_int = 182, + op1_mbcnt_32lo_accum_prev_int = 183, + op2_sete_64 = 184, + op2_setne_64 = 185, + op2_setgt_64 = 186, + op2_setge_64 = 187, + op2_min_64 = 188, + op2_max_64 = 189, + op2_dot4 = 190, + op2_dot4_ieee = 191, + op2_cube = 192, + op1_max4 = 193, + op1_frexp_64 = 196, + op1_ldexp_64 = 197, + op1_fract_64 = 198, + op2_pred_setgt_64 = 199, + op2_pred_sete_64 = 198, + op2_pred_setge_64 = 201, + OP2V_MUL_64 = 202, + op2_add_64 = 203, + op1_mova_int = 204, + op1v_flt64_to_flt32 = 205, + op1v_flt32_to_flt64 = 206, + op2_sad_accum_prev_uint = 207, + op2_dot = 208, + op2_mul_prev = 209, + op2_mul_ieee_prev = 210, + op2_add_prev = 211, + op2_muladd_prev = 212, + op2_muladd_ieee_prev = 213, + op2_interp_xy = 214, + op2_interp_zw = 215, + op2_interp_x = 216, + op2_interp_z = 217, + op0_store_flags = 218, + op1_load_store_flags = 219, + op0_lds_1a = 220, + op0_lds_1a1d = 221, + op0_lds_2a = 223, + op1_interp_load_p0 = 224, + op1_interp_load_p10 = 125, + op1_interp_load_p20 = 126, + // op 3 all left shift 6 + op3_bfe_uint = 4<< 6, + op3_bfe_int = 5<< 6, + op3_bfi_int = 6<< 6, + op3_fma = 7<< 6, + op3_cndne_64 = 9<< 6, + op3_fma_64 = 10<< 6, + op3_lerp_uint = 11<< 6, + op3_bit_align_int = 12<< 6, + op3_byte_align_int = 13<< 6, + op3_sad_accum_uint = 14<< 6, + op3_sad_accum_hi_uint = 15<< 6, + op3_muladd_uint24 = 16<< 6, + op3_lds_idx_op = 17<< 6, + op3_muladd = 20<< 6, + op3_muladd_m2 = 21<< 6, + op3_muladd_m4 = 22<< 6, + op3_muladd_d2 = 23<< 6, + op3_muladd_ieee = 24<< 6, + op3_cnde = 25<< 6, + op3_cndgt = 26<< 6, + op3_cndge = 27<< 6, + op3_cnde_int = 28<< 6, + op3_cndgt_int = 29<< 6, + op3_cndge_int = 30<< 6, + op3_mul_lit = 31<< 6 +}; + + + +using AluOpFlags=std::bitset<32>; + +struct AluOp { + static constexpr int x = 1; + static constexpr int y = 2; + static constexpr int z = 4; + static constexpr int w = 8; + static constexpr int v = 15; + static constexpr int t = 16; + static constexpr int a = 31; + + AluOp(int ns, int f, int um, const char *n): + nsrc(ns), is_float(f), unit_mask(um), name(n) + { + } + + bool can_channel(int flags) const { + return flags & unit_mask; + } + + int nsrc: 4; + int is_float:1; + int unit_mask: 5; + const char *name; +}; + +extern const std::map<EAluOp, AluOp> alu_ops; + +enum AluInlineConstants { + ALU_SRC_LDS_OQ_A = 219, + ALU_SRC_LDS_OQ_B = 220, + ALU_SRC_LDS_OQ_A_POP = 221, + ALU_SRC_LDS_OQ_B_POP = 222, + ALU_SRC_LDS_DIRECT_A = 223, + ALU_SRC_LDS_DIRECT_B = 224, + ALU_SRC_TIME_HI = 227, + ALU_SRC_TIME_LO = 228, + ALU_SRC_MASK_HI = 229, + ALU_SRC_MASK_LO = 230, + ALU_SRC_HW_WAVE_ID = 231, + ALU_SRC_SIMD_ID = 232, + ALU_SRC_SE_ID = 233, + ALU_SRC_HW_THREADGRP_ID = 234, + ALU_SRC_WAVE_ID_IN_GRP = 235, + ALU_SRC_NUM_THREADGRP_WAVES = 236, + ALU_SRC_HW_ALU_ODD = 237, + ALU_SRC_LOOP_IDX = 238, + ALU_SRC_PARAM_BASE_ADDR = 240, + ALU_SRC_NEW_PRIM_MASK = 241, + ALU_SRC_PRIM_MASK_HI = 242, + ALU_SRC_PRIM_MASK_LO = 243, + ALU_SRC_1_DBL_L = 244, + ALU_SRC_1_DBL_M = 245, + ALU_SRC_0_5_DBL_L = 246, + ALU_SRC_0_5_DBL_M = 247, + ALU_SRC_0 = 248, + ALU_SRC_1 = 249, + ALU_SRC_1_INT = 250, + ALU_SRC_M_1_INT = 251, + ALU_SRC_0_5 = 252, + ALU_SRC_LITERAL = 253, + ALU_SRC_PV = 254, + ALU_SRC_PS = 255, + ALU_SRC_PARAM_BASE = 0x1C0, + ALU_SRC_UNKNOWN +}; + +struct AluInlineConstantDescr { + bool use_chan; + const char *descr; +}; + +extern const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const; + +enum ESDOp { + DS_OP_ADD = 0, + DS_OP_SUB = 1, + DS_OP_RSUB = 2, + DS_OP_INC = 3, + DS_OP_DEC = 4, + DS_OP_MIN_INT = 5, + DS_OP_MAX_INT = 6, + DS_OP_MIN_UINT = 7, + DS_OP_MAX_UINT = 8, + DS_OP_AND = 9, + DS_OP_OR = 10, + DS_OP_XOR = 11, + DS_OP_MSKOR = 12, + DS_OP_WRITE = 13, + DS_OP_WRITE_REL = 14, + DS_OP_WRITE2 = 15, + DS_OP_CMP_STORE = 16, + DS_OP_CMP_STORE_SPF = 17, + DS_OP_BYTE_WRITE = 18, + DS_OP_SHORT_WRITE = 19, + DS_OP_ADD_RET = 32, + DS_OP_SUB_RET = 33, + DS_OP_RSUB_RET = 34, + DS_OP_INC_RET = 35, + DS_OP_DEC_RET = 36, + DS_OP_MIN_INT_RET = 37, + DS_OP_MAX_INT_RET = 38, + DS_OP_MIN_UINT_RET = 39, + DS_OP_MAX_UINT_RET = 40, + DS_OP_AND_RET = 41, + DS_OP_OR_RET = 42, + DS_OP_XOR_RET = 43, + DS_OP_MSKOR_RET = 44, + DS_OP_XCHG_RET = 45, + DS_OP_XCHG_REL_RET = 46, + DS_OP_XCHG2_RET = 47, + DS_OP_CMP_XCHG_RET = 48, + DS_OP_CMP_XCHG_SPF_RET = 49, + DS_OP_READ_RET = 50, + DS_OP_READ_REL_RET = 51, + DS_OP_READ2_RET = 52, + DS_OP_READWRITE_RET = 53, + DS_OP_BYTE_READ_RET = 54, + DS_OP_UBYTE_READ_RET = 55, + DS_OP_SHORT_READ_RET = 56, + DS_OP_USHORT_READ_RET = 57, + DS_OP_ATOMIC_ORDERED_ALLOC_RET = 63, + DS_OP_INVALID = 64 +}; + +struct LDSOp { + int nsrc; + const char *name; +}; + +extern const std::map<ESDOp, LDSOp> lds_ops; + +} + +#endif // ALU_DEFINES_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.cpp new file mode 100644 index 000000000..681b89d86 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.cpp @@ -0,0 +1,139 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_callstack.h" + +namespace r600 { + +CallStack::CallStack(r600_bytecode& bc): + m_bc(bc) +{ + +} + +CallStack::~CallStack() +{ +} + +int CallStack::push(unsigned type) +{ + switch (type) { + case FC_PUSH_VPM: + ++m_bc.stack.push; + break; + case FC_PUSH_WQM: + ++m_bc.stack.push_wqm; + break; + case FC_LOOP: + ++m_bc.stack.loop; + break; + default: + assert(0); + } + + return update_max_depth(type); +} + +void CallStack::pop(unsigned type) +{ + switch(type) { + case FC_PUSH_VPM: + --m_bc.stack.push; + assert(m_bc.stack.push >= 0); + break; + case FC_PUSH_WQM: + --m_bc.stack.push_wqm; + assert(m_bc.stack.push_wqm >= 0); + break; + case FC_LOOP: + --m_bc.stack.loop; + assert(m_bc.stack.loop >= 0); + break; + default: + assert(0); + break; + } +} + +int CallStack::update_max_depth(unsigned type) +{ + + r600_stack_info& stack = m_bc.stack; + int elements; + int entries; + + int entry_size = stack.entry_size; + + elements = (stack.loop + stack.push_wqm ) * entry_size; + elements += stack.push; + + switch (m_bc.chip_class) { + case R600: + case R700: + /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on + * the stack must be reserved to hold the current active/continue + * masks */ + if (type == FC_PUSH_VPM || stack.push > 0) { + elements += 2; + } + break; + case CAYMAN: + /* r9xx: any stack operation on empty stack consumes 2 additional + * elements */ + elements += 2; + break; + case EVERGREEN: + /* r8xx+: 2 extra elements are not always required, but one extra + * element must be added for each of the following cases: + * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest + * stack usage. + * (Currently we don't use ALU_ELSE_AFTER.) + * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM + * PUSH instruction executed. + * + * NOTE: it seems we also need to reserve additional element in some + * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader, + * then STACK_SIZE should be 2 instead of 1 */ + if (type == FC_PUSH_VPM || stack.push > 0) { + elements += 1; + } + break; + default: + assert(0); + break; + } + + entry_size = 4; + + entries = (elements + (entry_size - 1)) / entry_size; + + if (entries > stack.max_entries) + stack.max_entries = entries; + + return elements; +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.h new file mode 100644 index 000000000..e1babb7c1 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.h @@ -0,0 +1,47 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_CALLSTACK_HH +#define SFN_CALLSTACK_HH + +#include "gallium/drivers/r600/r600_asm.h" + +namespace r600 { + +class CallStack { +public: + CallStack(r600_bytecode& bc); + ~CallStack(); + int push(unsigned type); + void pop(unsigned type); + int update_max_depth(unsigned type); +private: + r600_bytecode& m_bc; +}; + +} + +#endif // SFN_CALLSTACK_HH diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.cpp new file mode 100644 index 000000000..ad9a03f8f --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.cpp @@ -0,0 +1,195 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_conditionaljumptracker.h" +#include "sfn_debug.h" + +#include <stack> +#include <vector> +#include <memory> +#include <iostream> + +namespace r600 { + +using std::stack; +using std::vector; +using std::shared_ptr; + +struct StackFrame { + + StackFrame(r600_bytecode_cf *s, JumpType t): + type(t), + start(s) + {} + + virtual ~StackFrame(); + + JumpType type; + r600_bytecode_cf *start; + vector<r600_bytecode_cf *> mid; + + virtual void fixup_mid(r600_bytecode_cf *cf) = 0; + virtual void fixup_pop(r600_bytecode_cf *final) = 0; +}; + +using PStackFrame = shared_ptr<StackFrame>; + +struct IfFrame : public StackFrame { + IfFrame(r600_bytecode_cf *s); + void fixup_mid(r600_bytecode_cf *cf) override; + void fixup_pop(r600_bytecode_cf *final) override; +}; + +struct LoopFrame : public StackFrame { + LoopFrame(r600_bytecode_cf *s); + void fixup_mid(r600_bytecode_cf *cf) override; + void fixup_pop(r600_bytecode_cf *final) override; +}; + +struct ConditionalJumpTrackerImpl { + ConditionalJumpTrackerImpl(); + stack<PStackFrame> m_jump_stack; + stack<PStackFrame> m_loop_stack; + int m_current_loop_stack_pos; +}; + +ConditionalJumpTrackerImpl::ConditionalJumpTrackerImpl(): + m_current_loop_stack_pos(0) +{ + +} + +ConditionalJumpTracker::~ConditionalJumpTracker() +{ + delete impl; +} + +ConditionalJumpTracker::ConditionalJumpTracker() +{ + impl = new ConditionalJumpTrackerImpl(); +} + +void ConditionalJumpTracker::push(r600_bytecode_cf *start, JumpType type) +{ + PStackFrame f; + switch (type) { + case jt_if: + f.reset(new IfFrame(start)); + break; + case jt_loop: + f.reset(new LoopFrame(start)); + impl->m_loop_stack.push(f); + break; + } + impl->m_jump_stack.push(f); +} + +bool ConditionalJumpTracker::pop(r600_bytecode_cf *final, JumpType type) +{ + if (impl->m_jump_stack.empty()) + return false; + + auto& frame = *impl->m_jump_stack.top(); + if (frame.type != type) + return false; + + frame.fixup_pop(final); + if (frame.type == jt_loop) + impl->m_loop_stack.pop(); + impl->m_jump_stack.pop(); + return true; +} + +bool ConditionalJumpTracker::add_mid(r600_bytecode_cf *source, JumpType type) +{ + if (impl->m_jump_stack.empty()) { + sfn_log << "Jump stack empty\n"; + return false; + } + + PStackFrame pframe; + if (type == jt_loop) { + if (impl->m_loop_stack.empty()) { + sfn_log << "Loop jump stack empty\n"; + return false; + } + pframe = impl->m_loop_stack.top(); + } else { + pframe = impl->m_jump_stack.top(); + } + + pframe->mid.push_back(source); + pframe->fixup_mid(source); + return true; +} + +IfFrame::IfFrame(r600_bytecode_cf *s): + StackFrame (s, jt_if) +{ +} + +StackFrame::~StackFrame() +{ +} + +void IfFrame::fixup_mid(r600_bytecode_cf *source) +{ + /* JUMP target is ELSE */ + start->cf_addr = source->id; +} + +void IfFrame::fixup_pop(r600_bytecode_cf *final) +{ + /* JUMP or ELSE target is one past last CF instruction */ + unsigned offset = final->eg_alu_extended ? 4 : 2; + auto src = mid.empty() ? start : mid[0]; + src->cf_addr = final->id + offset; + src->pop_count = 1; +} + +LoopFrame::LoopFrame(r600_bytecode_cf *s): + StackFrame(s, jt_loop) +{ +} + +void LoopFrame::fixup_mid(UNUSED r600_bytecode_cf *mid) +{ +} + +void LoopFrame::fixup_pop(r600_bytecode_cf *final) +{ + /* LOOP END address is past LOOP START */ + final->cf_addr = start->id + 2; + + /* LOOP START address is past LOOP END*/ + start->cf_addr = final->id + 2; + + /* BREAK and CONTINUE point at LOOP END*/ + for (auto m : mid) + m->cf_addr = final->id; +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h new file mode 100644 index 000000000..76cc02a27 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h @@ -0,0 +1,69 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_CONDITIONALJUMPTRACKER_H +#define SFN_CONDITIONALJUMPTRACKER_H + +#include "gallium/drivers/r600/r600_asm.h" + +namespace r600 { + +enum JumpType { + jt_loop, + jt_if +}; + +/** + Class to link the jump locations + +*/ + + +class ConditionalJumpTracker +{ +public: + ConditionalJumpTracker(); + ~ConditionalJumpTracker(); + + /* Mark the start of a loop or a if/else */ + + void push(r600_bytecode_cf *start, JumpType type); + + /* Mark the end of a loop or a if/else and fixup the jump sites */ + bool pop(r600_bytecode_cf *final, JumpType type); + + /* Add middle sites to the call frame i.e. continue, + * break inside loops, and else in if-then-else constructs. + */ + bool add_mid(r600_bytecode_cf *source, JumpType type); + +private: + struct ConditionalJumpTrackerImpl * impl; +}; + +} + +#endif // SFN_CONDITIONALJUMPTRACKER_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.cpp new file mode 100644 index 000000000..d993d42af --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.cpp @@ -0,0 +1,139 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "util/u_debug.h" +#include "sfn_debug.h" + +namespace r600 { + +class stderr_streambuf : public std::streambuf +{ +public: + stderr_streambuf(); +protected: + int sync(); + int overflow(int c); + std::streamsize xsputn ( const char *s, std::streamsize n ); +}; + +stderr_streambuf::stderr_streambuf() +{ + +} + +int stderr_streambuf::sync() +{ + fflush(stderr); + return 0; +} + +int stderr_streambuf::overflow(int c) +{ + fputc(c, stderr); + return 0; +} + +static const struct debug_named_value sfn_debug_options[] = { + {"instr", SfnLog::instr, "Log all consumed nir instructions"}, + {"ir", SfnLog::r600ir, "Log created R600 IR"}, + {"cc", SfnLog::cc, "Log R600 IR to assembly code creation"}, + {"noerr", SfnLog::err, "Don't log shader conversion errors"}, + {"si", SfnLog::shader_info, "Log shader info (non-zero values)"}, + {"ts", SfnLog::test_shader, "Log shaders in tests"}, + {"reg", SfnLog::reg, "Log register allocation and lookup"}, + {"io", SfnLog::io, "Log shader in and output"}, + {"ass", SfnLog::assembly, "Log IR to assembly conversion"}, + {"flow", SfnLog::flow, "Log Flow instructions"}, + {"merge", SfnLog::merge, "Log register merge operations"}, + {"nomerge", SfnLog::nomerge, "Skip register merge step"}, + {"tex", SfnLog::tex, "Log texture ops"}, + {"trans", SfnLog::trans, "Log generic translation messages"}, + DEBUG_NAMED_VALUE_END +}; + +SfnLog sfn_log; + +std::streamsize stderr_streambuf::xsputn ( const char *s, std::streamsize n ) +{ + std::streamsize i = n; + while (i--) + fputc(*s++, stderr); + return n; +} + +SfnLog::SfnLog(): + m_active_log_flags(0), + m_log_mask(0), + m_output(new stderr_streambuf()) +{ + m_log_mask = debug_get_flags_option("R600_NIR_DEBUG", sfn_debug_options, 0); + m_log_mask ^= err; +} + +SfnLog& SfnLog::operator << (SfnLog::LogFlag const l) +{ + m_active_log_flags = l; + return *this; +} + +SfnLog& SfnLog::operator << (UNUSED std::ostream & (*f)(std::ostream&)) +{ + if (m_active_log_flags & m_log_mask) + m_output << f; + return *this; +} + +SfnLog& SfnLog::operator << (nir_shader& sh) +{ + if (m_active_log_flags & m_log_mask) + nir_print_shader(&sh, stderr); + return *this; +} + +SfnLog& SfnLog::operator << (nir_instr &instr) +{ + if (m_active_log_flags & m_log_mask) + nir_print_instr(&instr, stderr); + return *this; +} + +SfnTrace::SfnTrace(SfnLog::LogFlag flag, const char *msg): + m_flag(flag), + m_msg(msg) +{ + sfn_log << m_flag << std::string(" ", 2 * m_indention++) + << "BEGIN: " << m_msg << "\n"; +} + +SfnTrace::~SfnTrace() +{ + sfn_log << m_flag << std::string(" ", 2 * m_indention--) + << "END: " << m_msg << "\n"; +} + +int SfnTrace::m_indention = 0; + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.h new file mode 100644 index 000000000..372379c66 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.h @@ -0,0 +1,121 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_STDERR_STREAMLOG_H +#define SFN_STDERR_STREAMLOG_H + + +#include <streambuf> +#include <ostream> +#include <fstream> +#include "compiler/nir/nir.h" + +namespace r600 { +/* Implement some logging for shader-from-nir + +*/ + +class SfnLog { +public: + enum LogFlag { + instr = 1 << 0, + r600ir = 1 << 1, + cc = 1 << 2, + err = 1 << 3, + shader_info = 1 << 4, + test_shader = 1 << 5, + reg = 1 << 6, + io = 1 << 7, + assembly = 1 << 8, + flow = 1 << 9, + merge = 1 << 10, + tex = 1 << 11, + trans = 1 << 12, + all = (1 << 13) - 1, + nomerge = 1 << 16, + }; + + SfnLog(); + + /** a special handling to set the output level "inline" + \param l the level of the following messages + */ + SfnLog& operator << (LogFlag const l); + + /* general output routine; output is only given, if the log flags and the + * currently active log mask overlap + \returns a reference to this object + */ + template <class T> + SfnLog& operator << (const T& text) + { + if (m_active_log_flags & m_log_mask) + m_output << text; + + return *this; + } + + /* A funny construct to enable std::endl to work on this stream + idea of Dave Brondsema: + http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8567 + */ + SfnLog& operator << (std::ostream & (*f)(std::ostream&)); + + SfnLog& operator << (nir_shader &sh); + + SfnLog& operator << (nir_instr& instr); + + int has_debug_flag(uint64_t flag) { + return (m_log_mask & flag) == flag; + } + +private: + uint64_t m_active_log_flags; + uint64_t m_log_mask; + std::ostream m_output; +}; + +class SfnTrace { +public: + SfnTrace(SfnLog::LogFlag flag, const char *msg); + ~SfnTrace(); +private: + SfnLog::LogFlag m_flag; + const char *m_msg; + static int m_indention; +}; + + +#ifndef NDEBUG +#define SFN_TRACE_FUNC(LEVEL, MSG) SfnTrace __trace(LEVEL, MSG) +#else +#define SFN_TRACE_FUNC(LEVEL, MSG) +#endif + +extern SfnLog sfn_log; + +} +#endif // SFN_STDERR_STREAMBUF_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_defines.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_defines.h new file mode 100644 index 000000000..31a10ae2f --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_defines.h @@ -0,0 +1,318 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef sfn_defines_h +#define sfn_defines_h + +#include "../r600_isa.h" +#include "amd_family.h" +namespace r600 { + + +enum EGWSOpCode { + cf_sema_v = 0, + cf_sema_p = 1, + cf_gws_barrier = 2, + cf_gws_init = 3, +}; + +/* CF ALU instructions [29:26], highest bit always set. */ +enum ECFAluOpCode { + cf_alu_undefined = 0, + cf_alu = CF_OP_ALU, + cf_alu_push_before = CF_OP_ALU_PUSH_BEFORE, + cf_alu_pop_after = CF_OP_ALU_POP_AFTER, + cf_alu_pop2_after = CF_OP_ALU_POP2_AFTER, + cf_alu_extended = CF_OP_ALU_EXT, + cf_alu_continue = CF_OP_ALU_CONTINUE, + cf_alu_break = CF_OP_ALU_BREAK, + cf_alu_else_after = CF_OP_ALU_ELSE_AFTER, +}; + +enum ECFAluOpCodeEG { + eg_cf_alu_undefined = 0, + eg_cf_alu = 8, + eg_cf_alu_push_before = 9, + eg_cf_alu_pop_after = 10, + eg_cf_alu_pop2_after = 11, + eg_cf_alu_extended = 12, + eg_cf_alu_continue = 13, + eg_cf_alu_break = 14, + eg_cf_alu_else_after = 15, +}; + + +enum ECFOpCode { + cf_nop = CF_OP_NOP, + cf_tc = CF_OP_TEX, + cf_vc = CF_OP_VTX, + cf_gds = CF_OP_GDS, + cf_loop_start = CF_OP_LOOP_START, + cf_loop_end = CF_OP_LOOP_END, + cf_loop_start_dx10 = CF_OP_LOOP_START_DX10, + cf_loop_start_no_al = CF_OP_LOOP_START_NO_AL, + cf_loop_continue = CF_OP_LOOP_CONTINUE, + cf_loop_break = CF_OP_LOOP_BREAK, + cf_jump = CF_OP_JUMP, + cf_push = CF_OP_PUSH, + cf_else = CF_OP_ELSE, + cf_pop = CF_OP_POP, + /* 15 - 17 reserved */ + cf_call = CF_OP_CALL, + cf_call_fs = CF_OP_CALL_FS, + cf_return = CF_OP_RET, + cf_emit_vertex = CF_OP_EMIT_VERTEX, + cf_emit_cut_vertex = CF_OP_EMIT_CUT_VERTEX, + cf_cut_vertex = CF_OP_CUT_VERTEX, + cf_kill = CF_OP_KILL, + /* 25 reserved */ + cf_wait_ack = CF_OP_WAIT_ACK, + cf_tc_ack = CF_OP_TEX_ACK, + cf_vc_ack = CF_OP_VTX_ACK, + cf_jump_table = CF_OP_JUMPTABLE, + cf_global_wave_sync = CF_OP_WAVE_SYNC, + cf_halt = CF_OP_HALT, + /* gap 32-63*/ + cf_mem_stream0_buf0 = CF_OP_MEM_STREAM0_BUF0, + cf_mem_stream0_buf1 = CF_OP_MEM_STREAM0_BUF1, + cf_mem_stream0_buf2 = CF_OP_MEM_STREAM0_BUF2, + cf_mem_stream0_buf3 = CF_OP_MEM_STREAM0_BUF3, + + cf_mem_stream1_buf0 = CF_OP_MEM_STREAM1_BUF0, + cf_mem_stream1_buf1 = CF_OP_MEM_STREAM1_BUF1, + cf_mem_stream1_buf2 = CF_OP_MEM_STREAM1_BUF2, + cf_mem_stream1_buf3 = CF_OP_MEM_STREAM1_BUF3, + + cf_mem_stream2_buf0 = CF_OP_MEM_STREAM2_BUF0, + cf_mem_stream2_buf1 = CF_OP_MEM_STREAM2_BUF1, + cf_mem_stream2_buf2 = CF_OP_MEM_STREAM2_BUF2, + cf_mem_stream2_buf3 = CF_OP_MEM_STREAM2_BUF3, + + cf_mem_stream3_buf0 = CF_OP_MEM_STREAM3_BUF0, + cf_mem_stream3_buf1 = CF_OP_MEM_STREAM3_BUF1, + cf_mem_stream3_buf2 = CF_OP_MEM_STREAM3_BUF2, + cf_mem_stream3_buf3 = CF_OP_MEM_STREAM3_BUF3, + + cf_mem_write_scratch = CF_OP_MEM_SCRATCH , + /* reserved 81 */ + cf_mem_ring = CF_OP_MEM_RING, + cf_export = CF_OP_EXPORT, + cf_export_done = CF_OP_EXPORT_DONE, + cf_mem_export = CF_OP_MEM_EXPORT, + cf_mem_rat = CF_OP_MEM_RAT, + cf_mem_rat_cacheless = CF_OP_MEM_RAT_NOCACHE, + + cf_mem_ring1 = CF_OP_MEM_RING1, + cf_mem_ring2 = CF_OP_MEM_RING2, + cf_mem_ring3 = CF_OP_MEM_RING3, + cf_mem_export_combined = CF_OP_MEM_MEM_COMBINED, + cf_mem_rat_combined_cacheless = CF_OP_MEM_RAT_COMBINED_NOCACHE + +}; + +enum ECFOpCodeEG { + eg_cf_nop = 0, + eg_cf_tc = 1, + eg_cf_vc = 2, + eg_cf_gds = 3, + eg_cf_loop_start = 4, + eg_cf_loop_end = 5, + eg_cf_loop_start_dx10 = 6, + eg_cf_loop_start_no_al = 7, + eg_cf_loop_continue = 8, + eg_cf_loop_break = 9, + eg_cf_jump = 10, + eg_cf_push = 11, + eg_cf_else = 13, + eg_cf_pop = 14, + /* 15 - 17 reserved */ + eg_cf_call = 18, + eg_cf_call_fs, + eg_cf_return, + eg_cf_emit_vertex, + eg_cf_emit_cut_vertex, + eg_cf_cut_vertex, + eg_cf_kill, + /* 25 reserved */ + eg_cf_wait_ack = 26, + eg_cf_tc_ack, + eg_cf_vc_ack, + eg_cf_jump_table, + eg_cf_global_wave_sync, + eg_cf_halt, + /* gap 32-63*/ + eg_cf_mem_stream0_buf0 = 64, + eg_cf_mem_stream0_buf1, + eg_cf_mem_stream0_buf2, + eg_cf_mem_stream0_buf3, + + eg_cf_mem_stream1_buf0, + eg_cf_mem_stream1_buf1, + eg_cf_mem_stream1_buf2, + eg_cf_mem_stream1_buf3, + + eg_cf_mem_stream2_buf0, + eg_cf_mem_stream2_buf1, + eg_cf_mem_stream2_buf2, + eg_cf_mem_stream2_buf3, + + eg_cf_mem_stream3_buf0, + eg_cf_mem_stream3_buf1, + eg_cf_mem_stream3_buf2, + eg_cf_mem_stream3_buf3, + + eg_cf_mem_write_scratch, + /* reserved 81 */ + eg_cf_mem_ring = 82, + eg_cf_export, + eg_cf_export_done, + eg_cf_mem_export, + eg_cf_mem_rat, + eg_cf_mem_rat_cacheless, + + eg_cf_mem_ring1, + eg_cf_mem_ring2, + eg_cf_mem_ring3, + eg_cf_mem_export_combined, + eg_cf_mem_rat_combined_cacheless +}; + + +enum EVFetchInstr { + vc_fetch = FETCH_OP_VFETCH, + vc_semantic = FETCH_OP_SEMFETCH, + vc_get_buf_resinfo = FETCH_OP_GET_BUFFER_RESINFO, + vc_read_scratch = FETCH_OP_READ_SCRATCH, + vc_unknown +}; + +enum EVFetchType { + vertex_data = 0, + instance_data = 1, + no_index_offset = 2 +}; + +enum EVTXDataFormat { + fmt_invalid = 0, + fmt_8 = 1, + fmt_4_4 = 2, + fmt_3_3_2 = 3, + fmt_reserved_4 = 4, + fmt_16 = 5, + fmt_16_float = 6, + fmt_8_8 = 7, + fmt_5_6_5 = 8, + fmt_6_5_5 = 9, + fmt_1_5_5_5 = 10, + fmt_4_4_4_4 = 11, + fmt_5_5_5_1 = 12, + fmt_32 = 13, + fmt_32_float = 14, + fmt_16_16 = 15, + fmt_16_16_float = 16, + fmt_8_24 = 17, + fmt_8_24_float = 18, + fmt_24_8 = 19, + fmt_24_8_float = 20, + fmt_10_11_11 = 21, + fmt_10_11_11_float = 22, + fmt_11_11_10 = 23, + fmt_11_11_10_float = 24, + fmt_2_10_10_10 = 25, + fmt_8_8_8_8 = 26, + fmt_10_10_10_2 = 27, + fmt_x24_8_32_float = 28, + fmt_32_32 = 29, + fmt_32_32_float = 30, + fmt_16_16_16_16 = 31, + fmt_16_16_16_16_float = 32, + fmt_reserved_33 = 33, + fmt_32_32_32_32 = 34, + fmt_32_32_32_32_float = 35, + fmt_reserved_36 = 36, + fmt_1 = 37, + fmt_1_reversed = 38, + fmt_gb_gr = 39, + fmt_bg_rg = 40, + fmt_32_as_8 = 41, + fmt_32_as_8_8 = 42, + fmt_5_9_9_9_sharedexp = 43, + fmt_8_8_8 = 44, + fmt_16_16_16 = 45, + fmt_16_16_16_float = 46, + fmt_32_32_32 = 47, + fmt_32_32_32_float = 48, + fmt_bc1 = 49, + fmt_bc2 = 50, + fmt_bc3 = 51, + fmt_bc4 = 52, + fmt_bc5 = 53, + fmt_apc0 = 54, + fmt_apc1 = 55, + fmt_apc2 = 56, + fmt_apc3 = 57, + fmt_apc4 = 58, + fmt_apc5 = 59, + fmt_apc6 = 60, + fmt_apc7 = 61, + fmt_ctx1 = 62, + fmt_reserved_63 = 63 +}; + +enum EVFetchNumFormat { + vtx_nf_norm = 0, + vtx_nf_int = 1, + vtx_nf_scaled = 2 +}; + +enum EVFetchEndianSwap { + vtx_es_none = 0, + vtx_es_8in16 = 1, + vtx_es_8in32 = 2 +}; + +enum EVFetchFlagShift { + vtx_fetch_whole_quad, + vtx_use_const_field, + vtx_format_comp_signed, + vtx_srf_mode, + vtx_buf_no_stride, + vtx_alt_const, + vtx_use_tc, + vtx_vpm, + vtx_unknown +}; + +enum EBufferIndexMode { + bim_none, + bim_zero, + bim_one, + bim_invalid +}; + +} + +#endif // DEFINES_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_docu.txt b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_docu.txt new file mode 100644 index 000000000..97a9c3658 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_docu.txt @@ -0,0 +1,45 @@ +# R600 shader from NIR + +This code is an attempt to implement a NIR backend for r600. + +## State + +Supported hardware: Evergreen and NI (tested on CEDAR and BARTS) + +Thanks to soft fp64 the OpenGL version is now 4.5 + +sb has been enabled for nir to be able to run some more demanding work loads. The aim is +still to get rid of it. + + +piglits gpu passes mostly like with TGSI, there are some fixes but also a few regressions. + +CTS gles + - 2 passes like with TGSI + - 3 no regressions, a few fixes compared to TGSI + - 31 + * a few fixes with interpolation specifiers + * synchronization has some unstable tests, this might be because global synchronization is missing (in both) + +GL CTS: + * a few regressions and a hang with KHR-GL43.compute_shader.shared-max + +piglit: + * spilling arrays is broken on Barts (but it works on Cedar) + * a few tests fail because the register limit is exhausted, and needlessly so, because + with better RA it would work + +## Needed optimizations: + + - Register allocator and scheduler (Could the sb allocator and scheduler + be ported?) + + - peepholes: + - compare + set predicate + + - copy propagation: + - Moves from inputs are usually not required, they could be forwarded + - texture operations often move additional parameters in extra registers + but they are actually needed in the same registers they come from and + could just be swizzled into the right place + (lower in NIR like it is done in e.g. in ETNAVIV) diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp new file mode 100644 index 000000000..44e43c1b5 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp @@ -0,0 +1,985 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#include "sfn_emitaluinstruction.h" +#include "sfn_debug.h" + +#include "gallium/drivers/r600/r600_shader.h" + +namespace r600 { + +using std::vector; + +EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor): + EmitInstruction (processor) +{ + +} + +bool EmitAluInstruction::do_emit(nir_instr* ir) +{ + const nir_alu_instr& instr = *nir_instr_as_alu(ir); + + r600::sfn_log << SfnLog::instr << "emit '" + << *ir + << " bitsize: " << static_cast<int>(instr.dest.dest.ssa.bit_size) + << "' (" << __func__ << ")\n"; + + preload_src(instr); + + switch (instr.op) { + /* These are in the ALU instruction list, but they should be texture instructions */ + case nir_op_b2b1: return emit_mov(instr); + case nir_op_b2b32: return emit_mov(instr); + case nir_op_b2f32: return emit_alu_b2f(instr); + case nir_op_b2i32: return emit_b2i32(instr); + case nir_op_b32all_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true); + case nir_op_b32all_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true); + case nir_op_b32all_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true); + case nir_op_b32all_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true); + case nir_op_b32all_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true); + case nir_op_b32all_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true); + case nir_op_b32any_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false); + case nir_op_b32any_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false); + case nir_op_b32any_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false); + case nir_op_b32any_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false); + case nir_op_b32any_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false); + case nir_op_b32any_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false); + case nir_op_b32csel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1}); + case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true); + case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true); + case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true); + case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true); + case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true); + case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true); + case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false); + case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false); + case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false); + case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false); + case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false); + case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false); + case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1}); + case nir_op_bfm: return emit_alu_op2_int(instr, op2_bfm_int); + case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int); + + case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int); + case nir_op_bitfield_select: return emit_alu_op3(instr, op3_bfi_int); + case nir_op_cube_r600: return emit_cube(instr); + case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10); + case nir_op_f2b32: return emit_alu_f2b32(instr); + case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int); + case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint); + case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs}); + case nir_op_fadd: return emit_alu_op2(instr, op2_add); + case nir_op_fceil: return emit_alu_op1(instr, op1_ceil); + case nir_op_fcos_r600: return emit_alu_trans_op1(instr, op1_cos); + case nir_op_fcsel: return emit_alu_op3(instr, op3_cnde, {0, 2, 1}); + case nir_op_fcsel_ge: return emit_alu_op3(instr, op3_cndge, {0, 1, 2}); + case nir_op_fcsel_gt: return emit_alu_op3(instr, op3_cndgt, {0, 1, 2}); + + /* These are in the ALU instruction list, but they should be texture instructions */ + case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false); + case nir_op_fddx_coarse: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false); + case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true); + case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false); + case nir_op_fddy_coarse: + case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v, true); + case nir_op_fdot2: return emit_dot(instr, 2); + case nir_op_fdot3: return emit_dot(instr, 3); + case nir_op_fdot4: return emit_dot(instr, 4); + case nir_op_fdph: return emit_fdph(instr); + case nir_op_feq32: return emit_alu_op2(instr, op2_sete_dx10); + case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10); + case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee); + case nir_op_ffloor: return emit_alu_op1(instr, op1_floor); + case nir_op_ffma: return emit_alu_op3(instr, op3_muladd_ieee); + case nir_op_ffract: return emit_alu_op1(instr, op1_fract); + case nir_op_fge32: return emit_alu_op2(instr, op2_setge_dx10); + case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10); + case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int); + case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped); + case nir_op_flt32: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse); + case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse); + case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10); + case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10); + case nir_op_fmul: return emit_alu_op2(instr, op2_mul_ieee); + case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg}); + case nir_op_fneu32: return emit_alu_op2(instr, op2_setne_dx10); + case nir_op_fneu: return emit_alu_op2(instr, op2_setne_dx10); + case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee); + case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne); + case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1); + case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp}); + case nir_op_fsin_r600: return emit_alu_trans_op1(instr, op1_sin); + case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee); + case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1); + case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc); + case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int); + case nir_op_i2b32: return emit_alu_i2orf2_b1(instr, op2_setne_int); + case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt); + case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int); + case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int); + case nir_op_ibfe: return emit_alu_op3(instr, op3_bfe_int); + case nir_op_i32csel_ge: return emit_alu_op3(instr, op3_cndge_int, {0, 1, 2}); + case nir_op_i32csel_gt: return emit_alu_op3(instr, op3_cndgt_int, {0, 1, 2}); + case nir_op_ieq32: return emit_alu_op2_int(instr, op2_sete_int); + case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int); + case nir_op_ifind_msb_rev: return emit_alu_op1(instr, op1_ffbh_int); + case nir_op_ige32: return emit_alu_op2_int(instr, op2_setge_int); + case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int); + case nir_op_ilt32: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse); + case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse); + case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int); + case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int); + case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int); + case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int); + case nir_op_ine32: return emit_alu_op2_int(instr, op2_setne_int); + case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int); + case nir_op_ineg: return emit_alu_ineg(instr); + case nir_op_inot: return emit_alu_op1(instr, op1_not_int); + case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int); + case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int); + case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int); + case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int); + case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int); + case nir_op_mov:return emit_mov(instr); + case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr); + case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr); + case nir_op_slt: return emit_alu_op2(instr, op2_setgt, op2_opt_reverse); + case nir_op_sge: return emit_alu_op2(instr, op2_setge); + case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt); + case nir_op_ubfe: return emit_alu_op3(instr, op3_bfe_uint); + case nir_op_ufind_msb_rev: return emit_alu_op1(instr, op1_ffbh_uint); + case nir_op_uge32: return emit_alu_op2_int(instr, op2_setge_uint); + case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint); + case nir_op_ult32: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse); + case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse); + case nir_op_umad24: return emit_alu_op3(instr, op3_muladd_uint24, {0, 1, 2}); + case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint); + case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint); + case nir_op_umul24: return emit_alu_op2(instr, op2_mul_uint24); + case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint); + case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0); + case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1); + case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr); + case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr); + case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int); + case nir_op_vec2: return emit_create_vec(instr, 2); + case nir_op_vec3: return emit_create_vec(instr, 3); + case nir_op_vec4: return emit_create_vec(instr, 4); + default: + return false; + } +} + +void EmitAluInstruction::preload_src(const nir_alu_instr& instr) +{ + const nir_op_info *op_info = &nir_op_infos[instr.op]; + assert(op_info->num_inputs <= 4); + + unsigned nsrc_comp = num_src_comp(instr); + sfn_log << SfnLog::reg << "Preload:\n"; + for (unsigned i = 0; i < op_info->num_inputs; ++i) { + for (unsigned c = 0; c < nsrc_comp; ++c) { + m_src[i][c] = from_nir(instr.src[i], c); + sfn_log << SfnLog::reg << " " << *m_src[i][c]; + + } + sfn_log << SfnLog::reg << "\n"; + } + if (instr.op == nir_op_fdph) { + m_src[1][3] = from_nir(instr.src[1], 3); + sfn_log << SfnLog::reg << " extra:" << *m_src[1][3] << "\n"; + } + + split_constants(instr, nsrc_comp); +} + +unsigned EmitAluInstruction::num_src_comp(const nir_alu_instr& instr) +{ + switch (instr.op) { + case nir_op_fdot2: + case nir_op_bany_inequal2: + case nir_op_ball_iequal2: + case nir_op_bany_fnequal2: + case nir_op_ball_fequal2: + case nir_op_b32any_inequal2: + case nir_op_b32all_iequal2: + case nir_op_b32any_fnequal2: + case nir_op_b32all_fequal2: + case nir_op_unpack_64_2x32_split_y: + return 2; + + case nir_op_fdot3: + case nir_op_bany_inequal3: + case nir_op_ball_iequal3: + case nir_op_bany_fnequal3: + case nir_op_ball_fequal3: + case nir_op_b32any_inequal3: + case nir_op_b32all_iequal3: + case nir_op_b32any_fnequal3: + case nir_op_b32all_fequal3: + case nir_op_cube_r600: + return 3; + + case nir_op_fdot4: + case nir_op_fdph: + case nir_op_bany_inequal4: + case nir_op_ball_iequal4: + case nir_op_bany_fnequal4: + case nir_op_ball_fequal4: + case nir_op_b32any_inequal4: + case nir_op_b32all_iequal4: + case nir_op_b32any_fnequal4: + case nir_op_b32all_fequal4: + return 4; + + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + return 1; + + default: + return nir_dest_num_components(instr.dest.dest); + + } +} + +bool EmitAluInstruction::emit_cube(const nir_alu_instr& instr) +{ + AluInstruction *ir = nullptr; + const uint16_t src0_chan[4] = {2, 2, 0, 1}; + const uint16_t src1_chan[4] = {1, 0, 2, 2}; + + for (int i = 0; i < 4; ++i) { + ir = new AluInstruction(op2_cube, from_nir(instr.dest, i), + from_nir(instr.src[0], src0_chan[i]), + from_nir(instr.src[0], src1_chan[i]), {alu_write}); + emit_instruction(ir); + } + ir->set_flag(alu_last_instr); + return true; +} + +void EmitAluInstruction::split_constants(const nir_alu_instr& instr, unsigned nsrc_comp) +{ + const nir_op_info *op_info = &nir_op_infos[instr.op]; + if (op_info->num_inputs < 2) + return; + + int nconst = 0; + std::array<const UniformValue *,4> c; + std::array<int,4> idx; + for (unsigned i = 0; i < op_info->num_inputs; ++i) { + PValue& src = m_src[i][0]; + assert(src); + sfn_log << SfnLog::reg << "Split test " << *src; + + if (src->type() == Value::kconst) { + c[nconst] = static_cast<const UniformValue *>(src.get()); + idx[nconst++] = i; + sfn_log << SfnLog::reg << " is constant " << i; + } + sfn_log << SfnLog::reg << "\n"; + } + + if (nconst < 2) + return; + + unsigned sel = c[0]->sel(); + unsigned kcache = c[0]->kcache_bank(); + sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ; + + for (int i = 1; i < nconst; ++i) { + sfn_log << "sel[" << i << "] = " << c[i]->sel() << "\n"; + if (c[i]->sel() != sel || c[i]->kcache_bank() != kcache) { + AluInstruction *ir = nullptr; + auto v = get_temp_vec4(); + for (unsigned k = 0; k < nsrc_comp; ++k) { + ir = new AluInstruction(op1_mov, v[k], m_src[idx[i]][k], {write}); + emit_instruction(ir); + m_src[idx[i]][k] = v[k]; + } + make_last(ir); + } + } +} + +bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr) +{ + if (instr.src[0].negate || instr.src[0].abs) { + std::cerr << "source modifiers not supported with int ops\n"; + return false; + } + + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i), + m_src[0][i], write); + emit_instruction(ir); + } + } + make_last(ir); + return true; +} + +bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode, + const AluOpFlags& flags) +{ + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(opcode, from_nir(instr.dest, i), + m_src[0][i], write); + + if (flags.test(alu_src0_abs) || instr.src[0].abs) + ir->set_flag(alu_src0_abs); + + if (instr.src[0].negate ^ flags.test(alu_src0_neg)) + ir->set_flag(alu_src0_neg); + + if (flags.test(alu_dst_clamp) || instr.dest.saturate) + ir->set_flag(alu_dst_clamp); + + emit_instruction(ir); + } + } + make_last(ir); + + return true; +} + +bool EmitAluInstruction::emit_mov(const nir_alu_instr& instr) +{ + /* If the op is a plain move beween SSA values we can just forward + * the register reference to the original register */ + if (instr.dest.dest.is_ssa && instr.src[0].src.is_ssa && + !instr.src[0].abs && !instr.src[0].negate && !instr.dest.saturate) { + bool result = true; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + result &= inject_register(instr.dest.dest.ssa.index, i, + m_src[0][i], true); + } + } + return result; + } else { + return emit_alu_op1(instr, op1_mov); + } +} + +bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode, + bool absolute) +{ + AluInstruction *ir = nullptr; + std::set<int> src_idx; + + if (get_chip_class() == CAYMAN) { + int last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3; + for (int i = 0; i < last_slot; ++i) { + ir = new AluInstruction(opcode, from_nir(instr.dest, i), + m_src[0][0], instr.dest.write_mask & (1 << i) ? write : empty); + if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs); + if (instr.src[0].negate) ir->set_flag(alu_src0_neg); + if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); + + if (i == (last_slot - 1)) ir->set_flag(alu_last_instr); + + emit_instruction(ir); + } + } else { + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(opcode, from_nir(instr.dest, i), + m_src[0][i], last_write); + if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs); + if (instr.src[0].negate) ir->set_flag(alu_src0_neg); + if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); + emit_instruction(ir); + } + } + } + return true; +} + +bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op) +{ + AluInstruction *ir = nullptr; + std::array<PValue, 4> v; + + for (int i = 0; i < 4; ++i) { + if (!(instr.dest.write_mask & (1 << i))) + continue; + v[i] = from_nir(instr.dest, i); + ir = new AluInstruction(op1_trunc, v[i], m_src[0][i], {alu_write}); + if (instr.src[0].abs) ir->set_flag(alu_src0_abs); + if (instr.src[0].negate) ir->set_flag(alu_src0_neg); + emit_instruction(ir); + } + make_last(ir); + + for (int i = 0; i < 4; ++i) { + if (!(instr.dest.write_mask & (1 << i))) + continue; + ir = new AluInstruction(op, v[i], v[i], {alu_write}); + emit_instruction(ir); + if (op == op1_flt_to_uint) + make_last(ir); + } + make_last(ir); + + return true; +} + +bool EmitAluInstruction::emit_alu_f2b32(const nir_alu_instr& instr) +{ + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(op2_setne_dx10, from_nir(instr.dest, i), + m_src[0][i], literal(0.0f), write); + emit_instruction(ir); + } + } + make_last(ir); + return true; +} + +bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr) +{ + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (!(instr.dest.write_mask & (1 << i))) + continue; + + ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i), + m_src[0][i], Value::one_i, write); + emit_instruction(ir); + } + make_last(ir); + + return true; +} + +bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr) +{ + AluInstruction *ir = nullptr; + for (unsigned i = 0; i < 2; ++i) { + if (!(instr.dest.write_mask & (1 << i))) + continue; + ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), + m_src[0][i], write); + emit_instruction(ir); + } + ir->set_flag(alu_last_instr); + return true; +} + +bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp) +{ + emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0), + m_src[0][comp], last_write)); + return true; +} + +bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc) +{ + AluInstruction *ir = nullptr; + std::set<int> src_slot; + for(unsigned i = 0; i < nc; ++i) { + if (instr.dest.write_mask & (1 << i)){ + auto src = m_src[i][0]; + ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write); + if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); + + // FIXME: This is a rather crude approach to fix the problem that + // r600 can't read from four different slots of the same component + // here we check only for the register index + if (src->type() == Value::gpr) + src_slot.insert(src->sel()); + if (src_slot.size() >= 3) { + src_slot.clear(); + ir->set_flag(alu_last_instr); + } + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + return true; +} + +bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n) +{ + const nir_alu_src& src0 = instr.src[0]; + const nir_alu_src& src1 = instr.src[1]; + + AluInstruction *ir = nullptr; + for (int i = 0; i < n ; ++i) { + ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i), + m_src[0][i], m_src[1][i], + instr.dest.write_mask & (1 << i) ? write : empty); + + if (src0.negate) ir->set_flag(alu_src0_neg); + if (src0.abs) ir->set_flag(alu_src0_abs); + if (src1.negate) ir->set_flag(alu_src1_neg); + if (src1.abs) ir->set_flag(alu_src1_abs); + + if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); + emit_instruction(ir); + } + for (int i = n; i < 4 ; ++i) { + ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i), + Value::zero, Value::zero, + instr.dest.write_mask & (1 << i) ? write : empty); + emit_instruction(ir); + } + + if (ir) + ir->set_flag(alu_last_instr); + return true; +} + +bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr) +{ + const nir_alu_src& src0 = instr.src[0]; + const nir_alu_src& src1 = instr.src[1]; + + AluInstruction *ir = nullptr; + for (int i = 0; i < 3 ; ++i) { + ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i), + m_src[0][i], m_src[1][i], + instr.dest.write_mask & (1 << i) ? write : empty); + if (src0.negate) ir->set_flag(alu_src0_neg); + if (src0.abs) ir->set_flag(alu_src0_abs); + if (src1.negate) ir->set_flag(alu_src1_neg); + if (src1.abs) ir->set_flag(alu_src1_abs); + if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); + emit_instruction(ir); + } + + ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f, + m_src[1][3], (instr.dest.write_mask) & (1 << 3) ? write : empty); + if (src1.negate) ir->set_flag(alu_src1_neg); + if (src1.abs) ir->set_flag(alu_src1_abs); + emit_instruction(ir); + + ir->set_flag(alu_last_instr); + return true; + +} + +bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op) +{ + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)) { + ir = new AluInstruction(op, from_nir(instr.dest, i), + m_src[0][i], Value::zero, + write); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + return true; +} + +bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr) +{ + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i), + m_src[0][i], Value::one_f, write); + if (instr.src[0].negate) ir->set_flag(alu_src0_neg); + if (instr.src[0].abs) ir->set_flag(alu_src0_abs); + if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + return true; +} + +bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all) +{ + + AluInstruction *ir = nullptr; + PValue v[4]; // this might need some additional temp register creation + for (unsigned i = 0; i < 4 ; ++i) + v[i] = from_nir(instr.dest, i); + + EAluOp combine = all ? op2_and_int : op2_or_int; + + /* For integers we can not use the modifiers, so this needs some emulation */ + /* Should actually be lowered with NIR */ + if (instr.src[0].negate == instr.src[1].negate && + instr.src[0].abs == instr.src[1].abs) { + + for (unsigned i = 0; i < nc ; ++i) { + ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write); + emit_instruction(ir); + } + if (ir) + ir->set_flag(alu_last_instr); + } else { + std::cerr << "Negate in iequal/inequal not (yet) supported\n"; + return false; + } + + for (unsigned i = 0; i < nc/2 ; ++i) { + ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write); + emit_instruction(ir); + } + if (ir) + ir->set_flag(alu_last_instr); + + if (nc > 2) { + ir = new AluInstruction(combine, v[0], v[0], v[2], last_write); + emit_instruction(ir); + } + + return true; +} + +bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all) +{ + AluInstruction *ir = nullptr; + PValue v[4]; // this might need some additional temp register creation + for (unsigned i = 0; i < 4 ; ++i) + v[i] = from_nir(instr.dest, i); + + for (unsigned i = 0; i < nc ; ++i) { + ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write); + + if (instr.src[0].abs) + ir->set_flag(alu_src0_abs); + if (instr.src[0].negate) + ir->set_flag(alu_src0_neg); + + if (instr.src[1].abs) + ir->set_flag(alu_src1_abs); + if (instr.src[1].negate) + ir->set_flag(alu_src1_neg); + + emit_instruction(ir); + } + if (ir) + ir->set_flag(alu_last_instr); + + for (unsigned i = 0; i < nc ; ++i) { + ir = new AluInstruction(op1_max4, v[i], v[i], write); + if (all) ir->set_flag(alu_src0_neg); + emit_instruction(ir); + } + + for (unsigned i = nc; i < 4 ; ++i) { + ir = new AluInstruction(op1_max4, v[i], + all ? Value::one_f : Value::zero, write); + if (all) + ir->set_flag(alu_src0_neg); + + emit_instruction(ir); + } + + ir->set_flag(alu_last_instr); + + if (all) + op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10; + else + op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10; + + ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write); + if (all) + ir->set_flag(alu_src1_neg); + emit_instruction(ir); + + return true; +} + +bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all) +{ + AluInstruction *ir = nullptr; + PValue v[4]; // this might need some additional temp register creation + for (unsigned i = 0; i < 4 ; ++i) + v[i] = from_nir(instr.dest, i); + + for (unsigned i = 0; i < 2 ; ++i) { + ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write); + if (instr.src[0].abs) + ir->set_flag(alu_src0_abs); + if (instr.src[0].negate) + ir->set_flag(alu_src0_neg); + + if (instr.src[1].abs) + ir->set_flag(alu_src1_abs); + if (instr.src[1].negate) + ir->set_flag(alu_src1_neg); + + emit_instruction(ir); + } + if (ir) + ir->set_flag(alu_last_instr); + + op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int; + ir = new AluInstruction(op, v[0], v[0], v[1], last_write); + emit_instruction(ir); + + return true; +} + +bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode) +{ + const nir_alu_src& src0 = instr.src[0]; + const nir_alu_src& src1 = instr.src[1]; + + AluInstruction *ir = nullptr; + + if (get_chip_class() == CAYMAN) { + int lasti = util_last_bit(instr.dest.write_mask); + for (int k = 0; k < lasti ; ++k) { + if (instr.dest.write_mask & (1 << k)) { + + for (int i = 0; i < 4; i++) { + ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][k], m_src[0][k], (i == k) ? write : empty); + if (src0.negate) ir->set_flag(alu_src0_neg); + if (src0.abs) ir->set_flag(alu_src0_abs); + if (src1.negate) ir->set_flag(alu_src1_neg); + if (src1.abs) ir->set_flag(alu_src1_abs); + if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); + if (i == 3) ir->set_flag(alu_last_instr); + emit_instruction(ir); + } + } + } + } else { + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][i], m_src[1][i], last_write); + if (src0.negate) ir->set_flag(alu_src0_neg); + if (src0.abs) ir->set_flag(alu_src0_abs); + if (src1.negate) ir->set_flag(alu_src1_neg); + if (src1.abs) ir->set_flag(alu_src1_abs); + if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); + emit_instruction(ir); + } + } + } + return true; +} + +bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts) +{ + + const nir_alu_src& src0 = instr.src[0]; + const nir_alu_src& src1 = instr.src[1]; + + if (src0.negate || src1.negate || + src0.abs || src1.abs) { + std::cerr << "R600: don't support modifiers with integer operations"; + return false; + } + return emit_alu_op2(instr, opcode, opts); +} + +bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops) +{ + const nir_alu_src *src0 = &instr.src[0]; + const nir_alu_src *src1 = &instr.src[1]; + + int idx0 = 0; + int idx1 = 1; + if (ops & op2_opt_reverse) { + std::swap(src0, src1); + std::swap(idx0, idx1); + } + + bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate; + + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(opcode, from_nir(instr.dest, i), + m_src[idx0][i], m_src[idx1][i], write); + + if (src0->negate) ir->set_flag(alu_src0_neg); + if (src0->abs) ir->set_flag(alu_src0_abs); + if (src1_negate) ir->set_flag(alu_src1_neg); + if (src1->abs) ir->set_flag(alu_src1_abs); + if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + return true; +} + +bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode, + std::array<uint8_t, 3> reorder) +{ + const nir_alu_src *src[3]; + src[0] = &instr.src[reorder[0]]; + src[1] = &instr.src[reorder[1]]; + src[2] = &instr.src[reorder[2]]; + + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(opcode, from_nir(instr.dest, i), + m_src[reorder[0]][i], + m_src[reorder[1]][i], + m_src[reorder[2]][i], + write); + + if (src[0]->negate) ir->set_flag(alu_src0_neg); + if (src[1]->negate) ir->set_flag(alu_src1_neg); + if (src[2]->negate) ir->set_flag(alu_src2_neg); + + if (instr.dest.saturate) ir->set_flag(alu_dst_clamp); + ir->set_flag(alu_write); + emit_instruction(ir); + } + } + make_last(ir); + return true; +} + +bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr) +{ + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + if (instr.dest.write_mask & (1 << i)){ + ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero, + m_src[0][i], write); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + + return true; +} + +static const char swz[] = "xyzw01?_"; + +void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src, + const GPRVector::Values& v, GPRVector::Values& out, int ncomp) +{ + + AluInstruction *alu = nullptr; + for (int i = 0; i < ncomp; ++i) { + alu = new AluInstruction(op1_mov, out[i], v[i], {alu_write}); + if (src.abs) + alu->set_flag(alu_src0_abs); + if (src.negate) + alu->set_flag(alu_src0_neg); + emit_instruction(alu); + } + make_last(alu); +} + +bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op, + bool fine) +{ + + GPRVector::Values v; + std::array<int, 4> writemask = {0,1,2,3}; + + int ncomp = nir_dest_num_components(instr.dest.dest); + GPRVector::Swizzle src_swz = {7,7,7,7}; + for (auto i = 0; i < ncomp; ++i) + src_swz[i] = instr.src[0].swizzle[i]; + + auto src = vec_from_nir_with_fetch_constant(instr.src[0].src, (1 << ncomp) - 1, src_swz); + + if (instr.src[0].abs || instr.src[0].negate) { + GPRVector tmp = get_temp_vec4(); + split_alu_modifiers(instr.src[0], src.values(), tmp.values(), ncomp); + src = tmp; + } + + for (int i = 0; i < 4; ++i) { + writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7; + v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0); + } + + /* This is querying the dreivatives of the output fb, so we would either need + * access to the neighboring pixels or to the framebuffer. Neither is currently + * implemented */ + GPRVector dst(v); + + auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue()); + tex->set_dest_swizzle(writemask); + + if (fine) + tex->set_flag(TexInstruction::grad_fine); + + emit_instruction(tex); + + return true; +} + +bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr) +{ + auto tmp = get_temp_register(); + emit_instruction(op2_lshr_int, tmp, + {m_src[0][0], PValue(new LiteralValue(16))}, + {alu_write, alu_last_instr}); + + emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0), + {tmp}, {alu_write, alu_last_instr}); + + return true; +} + +bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr) +{ + emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0), + {m_src[0][0]},{alu_write, alu_last_instr}); + return true; +} + +bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr) +{ + PValue x = get_temp_register(); + PValue y = get_temp_register(); + + emit_instruction(op1_flt32_to_flt16, x,{m_src[0][0]},{alu_write}); + emit_instruction(op1_flt32_to_flt16, y,{m_src[1][0]},{alu_write, alu_last_instr}); + + emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr}); + + emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr}); + + return true; +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h new file mode 100644 index 000000000..f56352f4e --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h @@ -0,0 +1,115 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_EMITALUINSTRUCTION_H +#define SFN_EMITALUINSTRUCTION_H + +#include "sfn_emitinstruction.h" + +#include "sfn_alu_defines.h" +#include "sfn_instruction_alu.h" +#include "sfn_instruction_tex.h" + +namespace r600 { + + +class EmitAluInstruction : public EmitInstruction +{ +public: + EmitAluInstruction(ShaderFromNirProcessor& processor); + +private: + + enum AluOp2Opts { + op2_opt_none = 0, + op2_opt_reverse = 1, + op2_opt_neg_src1 = 1 << 1 + }; + + bool do_emit(nir_instr* instr) override; + + void split_constants(const nir_alu_instr& instr, unsigned nsrc_comp); + + bool emit_mov(const nir_alu_instr& instr); + bool emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode, const AluOpFlags &flags = 0); + bool emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none); + + bool emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode); + + bool emit_alu_inot(const nir_alu_instr& instr); + bool emit_alu_ineg(const nir_alu_instr& instr); + bool emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none); + + bool emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode, std::array<uint8_t, 3> reorder={0,1,2}); + bool emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode, bool absolute = false); + + bool emit_alu_b2f(const nir_alu_instr& instr); + bool emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op); + bool emit_dot(const nir_alu_instr& instr, int n); + bool emit_create_vec(const nir_alu_instr& instr, unsigned nc); + bool emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all); + bool emit_any_iequal(const nir_alu_instr& instr, unsigned nc); + + bool emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all); + bool emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all); + + bool emit_fdph(const nir_alu_instr &instr); + bool emit_discard_if(const nir_intrinsic_instr *instr); + + bool emit_alu_f2b32(const nir_alu_instr& instr); + bool emit_b2i32(const nir_alu_instr& instr); + bool emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op); + bool emit_pack_64_2x32_split(const nir_alu_instr& instr); + bool emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp); + + bool emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op, bool fine); + bool emit_unpack_32_2x16_split_y(const nir_alu_instr& instr); + bool emit_unpack_32_2x16_split_x(const nir_alu_instr& instr); + bool emit_pack_32_2x16_split(const nir_alu_instr& instr); + + bool emit_cube(const nir_alu_instr& instr); +private: + void make_last(AluInstruction *ir) const; + void split_alu_modifiers(const nir_alu_src &src, const GPRVector::Values& v, + GPRVector::Values& out, int ncomp); + + void preload_src(const nir_alu_instr& instr); + unsigned num_src_comp(const nir_alu_instr& instr); + + using vreg = std::array<PValue, 4>; + + std::array<PValue, 4> m_src[4]; +}; + +inline void EmitAluInstruction::make_last(AluInstruction *ir) const +{ + if (ir) + ir->set_flag(alu_last_instr); +} + +} + +#endif // SFN_EMITALUINSTRUCTION_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp new file mode 100644 index 000000000..9a75cd18b --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp @@ -0,0 +1,164 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_emitinstruction.h" + +#include "sfn_shader_base.h" + +namespace r600 { + +EmitInstruction::EmitInstruction(ShaderFromNirProcessor& processor): + m_proc(processor) +{ + +} + +EmitInstruction::~EmitInstruction() +{ +} + +bool EmitInstruction::emit(nir_instr* instr) +{ + return do_emit(instr); +} + +PValue EmitInstruction::from_nir(const nir_src& v, unsigned component, unsigned swizzled) +{ + return m_proc.from_nir(v, component, swizzled); +} + +PValue EmitInstruction::from_nir(const nir_alu_src& v, unsigned component) +{ + return m_proc.from_nir(v, component); +} + +PValue EmitInstruction::from_nir(const nir_tex_src& v, unsigned component) +{ + return m_proc.from_nir(v, component); +} + +PValue EmitInstruction::from_nir(const nir_alu_dest& v, unsigned component) +{ + return m_proc.from_nir(v, component); +} + +PValue EmitInstruction::from_nir(const nir_dest& v, unsigned component) +{ + return m_proc.from_nir(v, component); +} + +PValue EmitInstruction::from_nir(const nir_src& v, unsigned component) +{ + return m_proc.from_nir(v, component); +} + +void EmitInstruction::emit_instruction(Instruction *ir) +{ + return m_proc.emit_instruction(ir); +} + +void EmitInstruction::emit_instruction(AluInstruction *ir) +{ + return m_proc.emit_instruction(ir); +} + +bool EmitInstruction::emit_instruction(EAluOp opcode, PValue dest, + std::vector<PValue> src0, + const std::set<AluModifiers>& m_flags) +{ + return m_proc.emit_instruction(opcode, dest,src0, m_flags); +} + +const nir_variable * +EmitInstruction::get_deref_location(const nir_src& v) const +{ + return m_proc.get_deref_location(v); +} + +PValue EmitInstruction::from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel) +{ + return m_proc.from_nir_with_fetch_constant(src, component, channel); +} + +GPRVector EmitInstruction::vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask, + const GPRVector::Swizzle& swizzle, bool match) +{ + return m_proc.vec_from_nir_with_fetch_constant(src, mask, swizzle, match); +} + +PGPRValue EmitInstruction::get_temp_register(int channel) +{ + return m_proc.get_temp_register(channel); +} + +GPRVector EmitInstruction::get_temp_vec4(const GPRVector::Swizzle& swizzle) +{ + return m_proc.get_temp_vec4(swizzle); +} + +PValue EmitInstruction::create_register_from_nir_src(const nir_src& src, unsigned swizzle) +{ + return m_proc.create_register_from_nir_src(src, swizzle); +} + +enum chip_class EmitInstruction::get_chip_class(void) const +{ + return m_proc.get_chip_class(); +} + +PValue EmitInstruction::literal(uint32_t value) +{ + return m_proc.literal(value); +} + +GPRVector EmitInstruction::vec_from_nir(const nir_dest& dst, int num_components) +{ + return m_proc.vec_from_nir(dst, num_components); +} + +bool EmitInstruction::inject_register(unsigned sel, unsigned swizzle, + const PValue& reg, bool map) +{ + return m_proc.inject_register(sel, swizzle, reg, map); +} + +int EmitInstruction::remap_atomic_base(int base) +{ + return m_proc.remap_atomic_base(base); +} + +void EmitInstruction::set_has_txs_cube_array_comp() +{ + m_proc.sh_info().has_txq_cube_array_z_comp = 1; +} + +const std::set<AluModifiers> EmitInstruction::empty = {}; +const std::set<AluModifiers> EmitInstruction::write = {alu_write}; +const std::set<AluModifiers> EmitInstruction::last_write = {alu_write, alu_last_instr}; +const std::set<AluModifiers> EmitInstruction::last = {alu_last_instr}; + +} + diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h new file mode 100644 index 000000000..09a6489b0 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h @@ -0,0 +1,101 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef EMITINSTRUCTION_H +#define EMITINSTRUCTION_H + +#include "compiler/nir/nir.h" +#include "sfn_defines.h" +#include "sfn_value.h" +#include "sfn_instruction_alu.h" + +namespace r600 { + +class ShaderFromNirProcessor; + +class EmitInstruction +{ +public: + EmitInstruction(ShaderFromNirProcessor& processor); + virtual ~EmitInstruction(); + bool emit(nir_instr* instr); + + static const std::set<AluModifiers> empty; + static const std::set<AluModifiers> write; + static const std::set<AluModifiers> last_write; + static const std::set<AluModifiers> last; + +protected: + virtual bool do_emit(nir_instr* instr) = 0; + + // forwards from ValuePool + PValue from_nir(const nir_src& v, unsigned component, unsigned swizzled); + PValue from_nir(const nir_src& v, unsigned component); + PValue from_nir(const nir_alu_src& v, unsigned component); + PValue from_nir(const nir_tex_src& v, unsigned component); + PValue from_nir(const nir_alu_dest& v, unsigned component); + PValue from_nir(const nir_dest& v, unsigned component); + + PValue create_register_from_nir_src(const nir_src& src, unsigned comp); + + PGPRValue get_temp_register(int channel = -1); + GPRVector get_temp_vec4(const GPRVector::Swizzle& swizzle = {0,1,2,3}); + + // forwards from ShaderFromNirProcessor + void emit_instruction(Instruction *ir); + void emit_instruction(AluInstruction *ir); + bool emit_instruction(EAluOp opcode, PValue dest, + std::vector<PValue> src0, + const std::set<AluModifiers>& m_flags); + + PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel = -1); + GPRVector vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask, + const GPRVector::Swizzle& swizzle, bool match = false); + + const nir_variable *get_deref_location(const nir_src& v) const; + + enum chip_class get_chip_class(void) const; + + PValue literal(uint32_t value); + + GPRVector vec_from_nir(const nir_dest& dst, int num_components); + + bool inject_register(unsigned sel, unsigned swizzle, + const PValue& reg, bool map); + + int remap_atomic_base(int base); + + void set_has_txs_cube_array_comp(); +private: + + ShaderFromNirProcessor& m_proc; +}; + +} + + + +#endif // EMITINSTRUCTION_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp new file mode 100644 index 000000000..9f0d0b605 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp @@ -0,0 +1,644 @@ +#include "sfn_emitssboinstruction.h" + +#include "sfn_instruction_fetch.h" +#include "sfn_instruction_gds.h" +#include "sfn_instruction_misc.h" +#include "sfn_instruction_tex.h" +#include "../r600_pipe.h" +#include "../r600_asm.h" + +namespace r600 { + +#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16) + +EmitSSBOInstruction::EmitSSBOInstruction(ShaderFromNirProcessor& processor): + EmitInstruction(processor), + m_require_rat_return_address(false), + m_ssbo_image_offset(0) +{ +} + +void EmitSSBOInstruction::set_ssbo_offset(int offset) +{ + m_ssbo_image_offset = offset; +} + + +void EmitSSBOInstruction::set_require_rat_return_address() +{ + m_require_rat_return_address = true; +} + +bool +EmitSSBOInstruction::load_rat_return_address() +{ + if (m_require_rat_return_address) { + m_rat_return_address = get_temp_vec4(); + emit_instruction(new AluInstruction(op1_mbcnt_32lo_accum_prev_int, m_rat_return_address.reg_i(0), literal(-1), {alu_write})); + emit_instruction(new AluInstruction(op1_mbcnt_32hi_int, m_rat_return_address.reg_i(1), literal(-1), {alu_write})); + emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(2), PValue(new InlineConstValue(ALU_SRC_SE_ID, 0)), + literal(256), PValue(new InlineConstValue(ALU_SRC_HW_WAVE_ID, 0)), {alu_write, alu_last_instr})); + emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(1), + m_rat_return_address.reg_i(2), literal(0x40), m_rat_return_address.reg_i(0), + {alu_write, alu_last_instr})); + m_require_rat_return_address = false; + } + return true; +} + + +bool EmitSSBOInstruction::do_emit(nir_instr* instr) +{ + const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + switch (intr->intrinsic) { + case nir_intrinsic_atomic_counter_add: + case nir_intrinsic_atomic_counter_and: + case nir_intrinsic_atomic_counter_exchange: + case nir_intrinsic_atomic_counter_max: + case nir_intrinsic_atomic_counter_min: + case nir_intrinsic_atomic_counter_or: + case nir_intrinsic_atomic_counter_xor: + case nir_intrinsic_atomic_counter_comp_swap: + return emit_atomic(intr); + case nir_intrinsic_atomic_counter_read: + case nir_intrinsic_atomic_counter_post_dec: + return emit_unary_atomic(intr); + case nir_intrinsic_atomic_counter_inc: + return emit_atomic_inc(intr); + case nir_intrinsic_atomic_counter_pre_dec: + return emit_atomic_pre_dec(intr); + case nir_intrinsic_load_ssbo: + return emit_load_ssbo(intr); + case nir_intrinsic_store_ssbo: + return emit_store_ssbo(intr); + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_comp_swap: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_exchange: + return emit_ssbo_atomic_op(intr); + case nir_intrinsic_image_store: + return emit_image_store(intr); + case nir_intrinsic_image_load: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_atomic_umin: + case nir_intrinsic_image_atomic_umax: + case nir_intrinsic_image_atomic_imin: + case nir_intrinsic_image_atomic_imax: + return emit_image_load(intr); + case nir_intrinsic_image_size: + return emit_image_size(intr); + case nir_intrinsic_get_ssbo_size: + return emit_buffer_size(intr); + case nir_intrinsic_memory_barrier: + case nir_intrinsic_memory_barrier_image: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_group_memory_barrier: + return make_stores_ack_and_waitack(); + default: + return false; + } +} + +bool EmitSSBOInstruction::emit_atomic(const nir_intrinsic_instr* instr) +{ + ESDOp op = get_opcode(instr->intrinsic); + + if (DS_OP_INVALID == op) + return false; + + GPRVector dest = make_dest(instr); + + int base = remap_atomic_base(nir_intrinsic_base(instr)); + + PValue uav_id = from_nir(instr->src[0], 0); + + PValue value = from_nir_with_fetch_constant(instr->src[1], 0); + + GDSInstr *ir = nullptr; + if (instr->intrinsic == nir_intrinsic_atomic_counter_comp_swap) { + PValue value2 = from_nir_with_fetch_constant(instr->src[2], 0); + ir = new GDSInstr(op, dest, value, value2, uav_id, base); + } else { + ir = new GDSInstr(op, dest, value, uav_id, base); + } + + emit_instruction(ir); + return true; +} + +bool EmitSSBOInstruction::emit_unary_atomic(const nir_intrinsic_instr* instr) +{ + ESDOp op = get_opcode(instr->intrinsic); + + if (DS_OP_INVALID == op) + return false; + + GPRVector dest = make_dest(instr); + + PValue uav_id = from_nir(instr->src[0], 0); + + auto ir = new GDSInstr(op, dest, uav_id, remap_atomic_base(nir_intrinsic_base(instr))); + + emit_instruction(ir); + return true; +} + +ESDOp EmitSSBOInstruction::get_opcode(const nir_intrinsic_op opcode) +{ + switch (opcode) { + case nir_intrinsic_atomic_counter_add: + return DS_OP_ADD_RET; + case nir_intrinsic_atomic_counter_and: + return DS_OP_AND_RET; + case nir_intrinsic_atomic_counter_exchange: + return DS_OP_XCHG_RET; + case nir_intrinsic_atomic_counter_inc: + return DS_OP_INC_RET; + case nir_intrinsic_atomic_counter_max: + return DS_OP_MAX_UINT_RET; + case nir_intrinsic_atomic_counter_min: + return DS_OP_MIN_UINT_RET; + case nir_intrinsic_atomic_counter_or: + return DS_OP_OR_RET; + case nir_intrinsic_atomic_counter_read: + return DS_OP_READ_RET; + case nir_intrinsic_atomic_counter_xor: + return DS_OP_XOR_RET; + case nir_intrinsic_atomic_counter_post_dec: + return DS_OP_DEC_RET; + case nir_intrinsic_atomic_counter_comp_swap: + return DS_OP_CMP_XCHG_RET; + case nir_intrinsic_atomic_counter_pre_dec: + default: + return DS_OP_INVALID; + } +} + +RatInstruction::ERatOp +EmitSSBOInstruction::get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const +{ + switch (opcode) { + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_image_atomic_add: + return RatInstruction::ADD_RTN; + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_image_atomic_and: + return RatInstruction::AND_RTN; + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_image_atomic_exchange: + return RatInstruction::XCHG_RTN; + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_image_atomic_or: + return RatInstruction::OR_RTN; + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_image_atomic_imin: + return RatInstruction::MIN_INT_RTN; + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_image_atomic_imax: + return RatInstruction::MAX_INT_RTN; + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_image_atomic_umin: + return RatInstruction::MIN_UINT_RTN; + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_image_atomic_umax: + return RatInstruction::MAX_UINT_RTN; + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_image_atomic_xor: + return RatInstruction::XOR_RTN; + case nir_intrinsic_ssbo_atomic_comp_swap: + case nir_intrinsic_image_atomic_comp_swap: + if (util_format_is_float(format)) + return RatInstruction::CMPXCHG_FLT_RTN; + else + return RatInstruction::CMPXCHG_INT_RTN; + case nir_intrinsic_image_load: + return RatInstruction::NOP_RTN; + default: + unreachable("Unsupported RAT instruction"); + } +} + + +bool EmitSSBOInstruction::emit_atomic_add(const nir_intrinsic_instr* instr) +{ + GPRVector dest = make_dest(instr); + + PValue value = from_nir_with_fetch_constant(instr->src[1], 0); + + PValue uav_id = from_nir(instr->src[0], 0); + + auto ir = new GDSInstr(DS_OP_ADD_RET, dest, value, uav_id, + remap_atomic_base(nir_intrinsic_base(instr))); + + emit_instruction(ir); + return true; +} + +bool EmitSSBOInstruction::load_atomic_inc_limits() +{ + m_atomic_update = get_temp_register(); + m_atomic_update->set_keep_alive(); + emit_instruction(new AluInstruction(op1_mov, m_atomic_update, literal(1), + {alu_write, alu_last_instr})); + return true; +} + +bool EmitSSBOInstruction::emit_atomic_inc(const nir_intrinsic_instr* instr) +{ + PValue uav_id = from_nir(instr->src[0], 0); + GPRVector dest = make_dest(instr); + auto ir = new GDSInstr(DS_OP_ADD_RET, dest, m_atomic_update, uav_id, + remap_atomic_base(nir_intrinsic_base(instr))); + emit_instruction(ir); + return true; +} + +bool EmitSSBOInstruction::emit_atomic_pre_dec(const nir_intrinsic_instr *instr) +{ + GPRVector dest = make_dest(instr); + + PValue uav_id = from_nir(instr->src[0], 0); + + auto ir = new GDSInstr(DS_OP_SUB_RET, dest, m_atomic_update, uav_id, + remap_atomic_base(nir_intrinsic_base(instr))); + emit_instruction(ir); + + emit_instruction(new AluInstruction(op2_sub_int, dest.x(), dest.x(), literal(1), last_write)); + + return true; +} + +bool EmitSSBOInstruction::emit_load_ssbo(const nir_intrinsic_instr* instr) +{ + GPRVector dest = make_dest(instr); + + /** src0 not used, should be some offset */ + auto addr = from_nir(instr->src[1], 0); + PValue addr_temp = create_register_from_nir_src(instr->src[1], 1); + + /** Should be lowered in nir */ + emit_instruction(new AluInstruction(op2_lshr_int, addr_temp, {addr, PValue(new LiteralValue(2))}, + {alu_write, alu_last_instr})); + + const EVTXDataFormat formats[4] = { + fmt_32, + fmt_32_32, + fmt_32_32_32, + fmt_32_32_32_32 + }; + + const std::array<int,4> dest_swt[4] = { + {0,7,7,7}, + {0,1,7,7}, + {0,1,2,7}, + {0,1,2,3} + }; + + /* TODO fix resource index */ + auto ir = new FetchInstruction(dest, addr_temp, + R600_IMAGE_REAL_RESOURCE_OFFSET + m_ssbo_image_offset + , from_nir(instr->src[0], 0), + formats[nir_dest_num_components(instr->dest) - 1], vtx_nf_int); + ir->set_dest_swizzle(dest_swt[nir_dest_num_components(instr->dest) - 1]); + ir->set_flag(vtx_use_tc); + + emit_instruction(ir); + return true; +} + +bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr* instr) +{ + + GPRVector::Swizzle swz = {7,7,7,7}; + for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i) + swz[i] = i; + + auto orig_addr = from_nir(instr->src[2], 0); + + GPRVector addr_vec = get_temp_vec4({0,1,2,7}); + + auto temp2 = get_temp_vec4(); + + auto rat_id = from_nir(instr->src[1], 0); + + emit_instruction(new AluInstruction(op2_lshr_int, addr_vec.reg_i(0), orig_addr, + PValue(new LiteralValue(2)), write)); + emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(1), Value::zero, write)); + emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(2), Value::zero, last_write)); + + + auto values = vec_from_nir_with_fetch_constant(instr->src[0], + (1 << nir_src_num_components(instr->src[0])) - 1, {0,1,2,3}, true); + + auto cf_op = cf_mem_rat; + //auto cf_op = nir_intrinsic_access(instr) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat; + auto store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED, + values, addr_vec, m_ssbo_image_offset, rat_id, 1, + 1, 0, false); + emit_instruction(store); + m_store_ops.push_back(store); + + for (unsigned i = 1; i < nir_src_num_components(instr->src[0]); ++i) { + emit_instruction(new AluInstruction(op1_mov, temp2.reg_i(0), from_nir(instr->src[0], i), write)); + emit_instruction(new AluInstruction(op2_add_int, addr_vec.reg_i(0), + {addr_vec.reg_i(0), Value::one_i}, last_write)); + store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED, + temp2, addr_vec, m_ssbo_image_offset, rat_id, 1, + 1, 0, false); + emit_instruction(store); + if (!(nir_intrinsic_access(instr) & ACCESS_COHERENT)) + m_store_ops.push_back(store); + } + + return true; +} + +bool +EmitSSBOInstruction::emit_image_store(const nir_intrinsic_instr *intrin) +{ + int imageid = 0; + PValue image_offset; + + if (nir_src_is_const(intrin->src[0])) + imageid = nir_src_as_int(intrin->src[0]); + else + image_offset = from_nir(intrin->src[0], 0); + + auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, {0,1,2,3}); + auto undef = from_nir(intrin->src[2], 0); + auto value = vec_from_nir_with_fetch_constant(intrin->src[3], 0xf, {0,1,2,3}); + auto unknown = from_nir(intrin->src[4], 0); + + if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D && + nir_intrinsic_image_array(intrin)) { + emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write})); + emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write})); + } + + auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat; + auto store = new RatInstruction(op, RatInstruction::STORE_TYPED, value, coord, imageid, + image_offset, 1, 0xf, 0, false); + + //if (!(nir_intrinsic_access(intrin) & ACCESS_COHERENT)) + m_store_ops.push_back(store); + + emit_instruction(store); + return true; +} + +bool +EmitSSBOInstruction::emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin) +{ + int imageid = 0; + PValue image_offset; + + if (nir_src_is_const(intrin->src[0])) + imageid = nir_src_as_int(intrin->src[0]); + else + image_offset = from_nir(intrin->src[0], 0); + + auto opcode = EmitSSBOInstruction::get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT); + + + auto coord_orig = from_nir(intrin->src[1], 0, 0); + auto coord = get_temp_register(0); + + emit_instruction(new AluInstruction(op2_lshr_int, coord, coord_orig, literal(2), last_write)); + + if (intrin->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) { + emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0), + from_nir(intrin->src[3], 0), {alu_write})); + // TODO: cayman wants channel 2 here + emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(3), + from_nir(intrin->src[2], 0), {alu_last_instr, alu_write})); + } else { + emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0), + from_nir(intrin->src[2], 0), {alu_write})); + emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(2), Value::zero, last_write)); + } + + + GPRVector out_vec({coord, coord, coord, coord}); + + auto atomic = new RatInstruction(cf_mem_rat, opcode, m_rat_return_address, out_vec, imageid + m_ssbo_image_offset, + image_offset, 1, 0xf, 0, true); + emit_instruction(atomic); + emit_instruction(new WaitAck(0)); + + GPRVector dest = vec_from_nir(intrin->dest, intrin->dest.ssa.num_components); + auto fetch = new FetchInstruction(vc_fetch, + no_index_offset, + fmt_32, + vtx_nf_int, + vtx_es_none, + m_rat_return_address.reg_i(1), + dest, + 0, + false, + 0xf, + R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid, + 0, + bim_none, + false, + false, + 0, + 0, + 0, + image_offset, + {0,7,7,7}); + fetch->set_flag(vtx_srf_mode); + fetch->set_flag(vtx_use_tc); + emit_instruction(fetch); + return true; + +} + +bool +EmitSSBOInstruction::emit_image_load(const nir_intrinsic_instr *intrin) +{ + int imageid = 0; + PValue image_offset; + + if (nir_src_is_const(intrin->src[0])) + imageid = nir_src_as_int(intrin->src[0]); + else + image_offset = from_nir(intrin->src[0], 0); + + auto rat_op = get_rat_opcode(intrin->intrinsic, nir_intrinsic_format(intrin)); + + GPRVector::Swizzle swz = {0,1,2,3}; + auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, swz); + + if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D && + nir_intrinsic_image_array(intrin)) { + emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write})); + emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write})); + } + + if (intrin->intrinsic != nir_intrinsic_image_load) { + if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) { + emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0), + from_nir(intrin->src[4], 0), {alu_write})); + emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(3), + from_nir(intrin->src[3], 0), {alu_last_instr, alu_write})); + } else { + emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0), + from_nir(intrin->src[3], 0), {alu_last_instr, alu_write})); + } + } + auto cf_op = cf_mem_rat;// nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat; + + auto store = new RatInstruction(cf_op, rat_op, m_rat_return_address, coord, imageid, + image_offset, 1, 0xf, 0, true); + emit_instruction(store); + return fetch_return_value(intrin); +} + +bool EmitSSBOInstruction::fetch_return_value(const nir_intrinsic_instr *intrin) +{ + emit_instruction(new WaitAck(0)); + + pipe_format format = nir_intrinsic_format(intrin); + unsigned fmt = fmt_32; + unsigned num_format = 0; + unsigned format_comp = 0; + unsigned endian = 0; + + int imageid = 0; + PValue image_offset; + + if (nir_src_is_const(intrin->src[0])) + imageid = nir_src_as_int(intrin->src[0]); + else + image_offset = from_nir(intrin->src[0], 0); + + r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian); + + GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest)); + + auto fetch = new FetchInstruction(vc_fetch, + no_index_offset, + (EVTXDataFormat)fmt, + (EVFetchNumFormat)num_format, + (EVFetchEndianSwap)endian, + m_rat_return_address.reg_i(1), + dest, + 0, + false, + 0x3, + R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid, + 0, + bim_none, + false, + false, + 0, + 0, + 0, + image_offset, {0,1,2,3}); + fetch->set_flag(vtx_srf_mode); + fetch->set_flag(vtx_use_tc); + if (format_comp) + fetch->set_flag(vtx_format_comp_signed); + + emit_instruction(fetch); + return true; +} + +bool EmitSSBOInstruction::emit_image_size(const nir_intrinsic_instr *intrin) +{ + GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest)); + GPRVector src{0,{4,4,4,4}}; + + assert(nir_src_as_uint(intrin->src[1]) == 0); + + auto const_offset = nir_src_as_const_value(intrin->src[0]); + auto dyn_offset = PValue(); + int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET; + if (const_offset) + res_id += const_offset[0].u32; + else + dyn_offset = from_nir(intrin->src[0], 0); + + if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) { + emit_instruction(new FetchInstruction(dest, PValue(new GPRValue(0, 7)), + res_id, + bim_none)); + return true; + } else { + emit_instruction(new TexInstruction(TexInstruction::get_resinfo, dest, src, + 0/* ?? */, + res_id, dyn_offset)); + if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE && + nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) { + /* Need to load the layers from a const buffer */ + + unsigned lookup_resid = const_offset[0].u32; + emit_instruction(new AluInstruction(op1_mov, dest.reg_i(2), + PValue(new UniformValue(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4, + R600_BUFFER_INFO_CONST_BUFFER)), + EmitInstruction::last_write)); + } + } + return true; +} + +bool EmitSSBOInstruction::emit_buffer_size(const nir_intrinsic_instr *intr) +{ + std::array<PValue,4> dst_elms; + + + for (uint16_t i = 0; i < 4; ++i) { + dst_elms[i] = from_nir(intr->dest, (i < intr->dest.ssa.num_components) ? i : 7); + } + + GPRVector dst(dst_elms); + GPRVector src(0,{4,4,4,4}); + + auto const_offset = nir_src_as_const_value(intr->src[0]); + auto dyn_offset = PValue(); + int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET; + if (const_offset) + res_id += const_offset[0].u32; + else + assert(0 && "dynamic buffer offset not supported in buffer_size"); + + emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)), + res_id, bim_none)); + + return true; +} + +bool EmitSSBOInstruction::make_stores_ack_and_waitack() +{ + for (auto&& store: m_store_ops) + store->set_ack(); + + if (!m_store_ops.empty()) + emit_instruction(new WaitAck(0)); + + m_store_ops.clear(); + + return true; +} + +GPRVector EmitSSBOInstruction::make_dest(const nir_intrinsic_instr* ir) +{ + GPRVector::Values v; + int i; + for (i = 0; i < 4; ++i) + v[i] = from_nir(ir->dest, i); + return GPRVector(v); +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h new file mode 100644 index 000000000..56e0e31f1 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h @@ -0,0 +1,57 @@ +#ifndef SFN_EMITSSBOINSTRUCTION_H +#define SFN_EMITSSBOINSTRUCTION_H + +#include "sfn_emitinstruction.h" +#include "sfn_instruction_gds.h" +#include "sfn_value_gpr.h" + +namespace r600 { + +class EmitSSBOInstruction: public EmitInstruction { +public: + EmitSSBOInstruction(ShaderFromNirProcessor& processor); + + void set_ssbo_offset(int offset); + + void set_require_rat_return_address(); + bool load_rat_return_address(); + bool load_atomic_inc_limits(); + +private: + bool do_emit(nir_instr *instr); + + bool emit_atomic(const nir_intrinsic_instr* instr); + bool emit_unary_atomic(const nir_intrinsic_instr* instr); + bool emit_atomic_add(const nir_intrinsic_instr* instr); + bool emit_atomic_inc(const nir_intrinsic_instr* instr); + bool emit_atomic_pre_dec(const nir_intrinsic_instr* instr); + + bool emit_load_ssbo(const nir_intrinsic_instr* instr); + bool emit_store_ssbo(const nir_intrinsic_instr* instr); + + bool emit_image_size(const nir_intrinsic_instr *intrin); + bool emit_image_load(const nir_intrinsic_instr *intrin); + bool emit_image_store(const nir_intrinsic_instr *intrin); + bool emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin); + bool emit_buffer_size(const nir_intrinsic_instr *intrin); + + bool fetch_return_value(const nir_intrinsic_instr *intrin); + + bool make_stores_ack_and_waitack(); + + ESDOp get_opcode(nir_intrinsic_op opcode); + RatInstruction::ERatOp get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const; + + GPRVector make_dest(const nir_intrinsic_instr* instr); + + PGPRValue m_atomic_update; + + bool m_require_rat_return_address; + GPRVector m_rat_return_address; + int m_ssbo_image_offset; + std::vector<RatInstruction *> m_store_ops; +}; + +} + +#endif // SFN_EMITSSBOINSTRUCTION_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp new file mode 100644 index 000000000..c31bee43d --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp @@ -0,0 +1,671 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_emittexinstruction.h" +#include "sfn_shader_base.h" +#include "sfn_instruction_fetch.h" + +namespace r600 { + +EmitTexInstruction::EmitTexInstruction(ShaderFromNirProcessor &processor): + EmitInstruction (processor) +{ +} + +bool EmitTexInstruction::do_emit(nir_instr* instr) +{ + nir_tex_instr* ir = nir_instr_as_tex(instr); + + TexInputs src; + if (!get_inputs(*ir, src)) + return false; + + if (ir->sampler_dim == GLSL_SAMPLER_DIM_BUF) { + switch (ir->op) { + case nir_texop_txf: + return emit_buf_txf(ir, src); + case nir_texop_txs: + return emit_tex_txs(ir, src, {0,1,2,3}); + default: + return false; + } + } else { + switch (ir->op) { + case nir_texop_tex: + return emit_tex_tex(ir, src); + case nir_texop_txf: + return emit_tex_txf(ir, src); + case nir_texop_txb: + return emit_tex_txb(ir, src); + case nir_texop_txl: + return emit_tex_txl(ir, src); + case nir_texop_txd: + return emit_tex_txd(ir, src); + case nir_texop_txs: + return emit_tex_txs(ir, src, {0,1,2,3}); + case nir_texop_lod: + return emit_tex_lod(ir, src); + case nir_texop_tg4: + return emit_tex_tg4(ir, src); + case nir_texop_txf_ms: + return emit_tex_txf_ms(ir, src); + case nir_texop_query_levels: + return emit_tex_txs(ir, src, {3,7,7,7}); + case nir_texop_texture_samples: + return emit_tex_texture_samples(ir, src, {3,7,7,7}); + default: + + return false; + } + } +} + +bool EmitTexInstruction::emit_buf_txf(nir_tex_instr* instr, TexInputs &src) +{ + auto dst = make_dest(*instr); + + auto ir = new FetchInstruction(vc_fetch, no_index_offset, dst, src.coord.reg_i(0), 0, + instr->texture_index + R600_MAX_CONST_BUFFERS, + src.texture_offset, bim_none); + ir->set_flag(vtx_use_const_field); + emit_instruction(ir); + return true; +} + +bool EmitTexInstruction::emit_tex_tex(nir_tex_instr* instr, TexInputs& src) +{ + + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast<nir_instr*>(instr) + << "' (" << __func__ << ")\n"; + + auto tex_op = TexInstruction::sample; + + auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref); + assert(!sampler.indirect); + + if (instr->is_shadow) { + emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator, + {alu_last_instr, alu_write})); + tex_op = TexInstruction::sample_c; + } + + auto dst = make_dest(*instr); + auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + if (instr->is_array) + handle_array_index(*instr, src.coord, irt); + + set_rect_coordinate_flags(instr, irt); + set_offsets(irt, src.offset); + + emit_instruction(irt); + return true; +} + +bool EmitTexInstruction::emit_tex_txd(nir_tex_instr* instr, TexInputs& src) +{ + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast<nir_instr*>(instr) + << "' (" << __func__ << ")\n"; + + auto tex_op = TexInstruction::sample_g; + auto dst = make_dest(*instr); + + GPRVector empty_dst(0,{7,7,7,7}); + + if (instr->is_shadow) { + emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator, + {alu_last_instr, alu_write})); + tex_op = TexInstruction::sample_c_g; + } + + auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + TexInstruction *irgh = new TexInstruction(TexInstruction::set_gradient_h, empty_dst, src.ddx, + sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + irgh->set_dest_swizzle({7,7,7,7}); + + TexInstruction *irgv = new TexInstruction(TexInstruction::set_gradient_v, empty_dst, src.ddy, + sampler.id, sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + irgv->set_dest_swizzle({7,7,7,7}); + + TexInstruction *ir = new TexInstruction(tex_op, dst, src.coord, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + if (instr->is_array) + handle_array_index(*instr, src.coord, ir); + + set_rect_coordinate_flags(instr, ir); + set_offsets(ir, src.offset); + + emit_instruction(irgh); + emit_instruction(irgv); + emit_instruction(ir); + return true; +} + +bool EmitTexInstruction::emit_tex_txf(nir_tex_instr* instr, TexInputs& src) +{ + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast<nir_instr*>(instr) + << "' (" << __func__ << ")\n"; + + auto dst = make_dest(*instr); + + if (*src.coord.reg_i(3) != *src.lod) { + if (src.coord.sel() != src.lod->sel()) + emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod, {alu_write, alu_last_instr})); + else + src.coord.set_reg_i(3, src.lod); + } + + auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref); + assert(!sampler.indirect); + + /* txf doesn't need rounding for the array index, but 1D has the array index + * in the z component */ + if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D) + src.coord.set_reg_i(2, src.coord.reg_i(1)); + + auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord, + sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + + + if (src.offset) { + assert(src.offset->is_ssa); + AluInstruction *ir = nullptr; + for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) { + ir = new AluInstruction(op2_add_int, src.coord.reg_i(i), + {src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write}); + emit_instruction(ir); + } + if (ir) + ir->set_flag(alu_last_instr); + } + + if (instr->is_array) + tex_ir->set_flag(TexInstruction::z_unnormalized); + + emit_instruction(tex_ir); + return true; +} + +bool EmitTexInstruction::emit_tex_lod(nir_tex_instr* instr, TexInputs& src) +{ + auto tex_op = TexInstruction::get_tex_lod; + + auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + auto dst = make_dest(*instr); + auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + irt->set_dest_swizzle({1,0,7,7}); + emit_instruction(irt); + + return true; + +} + +bool EmitTexInstruction::emit_tex_txl(nir_tex_instr* instr, TexInputs& src) +{ + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast<nir_instr*>(instr) + << "' (" << __func__ << ")\n"; + + auto tex_op = TexInstruction::sample_l; + if (instr->is_shadow) { + if (src.coord.sel() != src.comperator->sel()) + emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator, {alu_write})); + else + src.coord.set_reg_i(2, src.comperator); + tex_op = TexInstruction::sample_c_l; + } + + if (src.coord.sel() != src.lod->sel()) + emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod, {last_write})); + else + src.coord.set_reg_i(3, src.lod); + + auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + auto dst = make_dest(*instr); + auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + + if (instr->is_array) + handle_array_index(*instr, src.coord, irt); + + set_rect_coordinate_flags(instr, irt); + set_offsets(irt, src.offset); + + emit_instruction(irt); + return true; +} + +bool EmitTexInstruction::emit_tex_txb(nir_tex_instr* instr, TexInputs& src) +{ + auto tex_op = TexInstruction::sample_lb; + + std::array<uint8_t, 4> in_swizzle = {0,1,2,3}; + + if (instr->is_shadow) { + if (src.coord.sel() != src.comperator->sel()) + emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator, {alu_write})); + else + src.coord.set_reg_i(2, src.comperator); + tex_op = TexInstruction::sample_c_lb; + } + + if (src.coord.sel() != src.bias->sel()) + emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.bias, {last_write})); + else + src.coord.set_reg_i(3, src.bias); + + GPRVector tex_src(src.coord, in_swizzle); + + auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + auto dst = make_dest(*instr); + auto irt = new TexInstruction(tex_op, dst, tex_src, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + if (instr->is_array) + handle_array_index(*instr, tex_src, irt); + + set_rect_coordinate_flags(instr, irt); + set_offsets(irt, src.offset); + + emit_instruction(irt); + return true; +} + +bool EmitTexInstruction::emit_tex_txs(nir_tex_instr* instr, TexInputs& tex_src, + const std::array<int,4>& dest_swz) +{ + std::array<PValue,4> dst_elms; + std::array<PValue,4> src_elms; + + for (uint16_t i = 0; i < 4; ++i) { + dst_elms[i] = from_nir(instr->dest, (i < instr->dest.ssa.num_components) ? i : 7); + } + + GPRVector dst(dst_elms); + + if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { + emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)), + instr->sampler_index + R600_MAX_CONST_BUFFERS, + bim_none)); + } else { + for (uint16_t i = 0; i < 4; ++i) + src_elms[i] = tex_src.lod; + GPRVector src(src_elms); + + auto sampler = get_sampler_id(instr->sampler_index, tex_src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + auto ir = new TexInstruction(TexInstruction::get_resinfo, dst, src, + sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset); + ir->set_dest_swizzle(dest_swz); + emit_instruction(ir); + + if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { + PValue src(new UniformValue(512 + R600_BUFFER_INFO_OFFSET / 16 + (sampler.id >> 2), + sampler.id & 3, R600_BUFFER_INFO_CONST_BUFFER)); + + auto alu = new AluInstruction(op1_mov, dst[2], src, {last_write}); + emit_instruction(alu); + set_has_txs_cube_array_comp(); + } + } + + return true; + +} + +bool EmitTexInstruction::emit_tex_texture_samples(nir_tex_instr* instr, TexInputs& src, + const std::array<int, 4> &dest_swz) +{ + GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest)); + GPRVector help{0,{4,4,4,4}}; + + auto dyn_offset = PValue(); + int res_id = R600_MAX_CONST_BUFFERS + instr->sampler_index; + + auto ir = new TexInstruction(TexInstruction::get_nsampled, dest, help, + 0, res_id, src.sampler_offset); + ir->set_dest_swizzle(dest_swz); + emit_instruction(ir); + return true; +} + +bool EmitTexInstruction::emit_tex_tg4(nir_tex_instr* instr, TexInputs& src) +{ + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast<nir_instr*>(instr) + << "' (" << __func__ << ")\n"; + + TexInstruction *set_ofs = nullptr; + + auto tex_op = TexInstruction::gather4; + + if (instr->is_shadow) { + emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator, + {alu_last_instr, alu_write})); + tex_op = TexInstruction::gather4_c; + } + + auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + bool literal_offset = false; + if (src.offset) { + literal_offset = nir_src_as_const_value(*src.offset) != 0; + r600::sfn_log << SfnLog::tex << " really have offsets and they are " << + (literal_offset ? "literal" : "varying") << + "\n"; + + if (!literal_offset) { + GPRVector::Swizzle swizzle = {4,4,4,4}; + for (unsigned i = 0; i < instr->coord_components; ++i) + swizzle[i] = i; + + int noffsets = instr->coord_components; + if (instr->is_array) + --noffsets; + + auto ofs = vec_from_nir_with_fetch_constant(*src.offset, + ( 1 << noffsets) - 1, + swizzle); + GPRVector dummy(0, {7,7,7,7}); + tex_op = (tex_op == TexInstruction::gather4_c) ? + TexInstruction::gather4_c_o : TexInstruction::gather4_o; + + set_ofs = new TexInstruction(TexInstruction::set_offsets, dummy, + ofs, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + set_ofs->set_dest_swizzle({7,7,7,7}); + } + } + + + /* pre CAYMAN needs swizzle */ + auto dst = make_dest(*instr); + auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + + irt->set_dest_swizzle({1,2,0,3}); + irt->set_gather_comp(instr->component); + + if (instr->is_array) + handle_array_index(*instr, src.coord, irt); + + if (literal_offset) { + r600::sfn_log << SfnLog::tex << "emit literal offsets\n"; + set_offsets(irt, src.offset); + } + + set_rect_coordinate_flags(instr, irt); + + if (set_ofs) + emit_instruction(set_ofs); + + emit_instruction(irt); + return true; +} + +bool EmitTexInstruction::emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src) +{ + assert(instr->src[0].src.is_ssa); + + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast<nir_instr*>(instr) + << "' (" << __func__ << ")\n"; + + auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref); + assert(!sampler.indirect && "Indirect sampler selection not yet supported"); + + PGPRValue sample_id_dest_reg = get_temp_register(); + GPRVector sample_id_dest(sample_id_dest_reg->sel(), {7,7,7,7}); + sample_id_dest.set_reg_i(sample_id_dest_reg->chan(), sample_id_dest_reg); + std::array<int,4> dest_swz = {7,7,7,7}; + dest_swz[sample_id_dest_reg->chan()] = 0; + + emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), + src.ms_index, + {alu_write, alu_last_instr})); + + auto tex_sample_id_ir = new TexInstruction(TexInstruction::ld, sample_id_dest, src.coord, + sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + tex_sample_id_ir->set_flag(TexInstruction::x_unnormalized); + tex_sample_id_ir->set_flag(TexInstruction::y_unnormalized); + tex_sample_id_ir->set_flag(TexInstruction::z_unnormalized); + tex_sample_id_ir->set_flag(TexInstruction::w_unnormalized); + tex_sample_id_ir->set_inst_mode(1); + + tex_sample_id_ir->set_dest_swizzle(dest_swz); + + emit_instruction(tex_sample_id_ir); + + if (src.ms_index->type() != Value::literal || + static_cast<const LiteralValue&>(*src.ms_index).value() != 0) { + PValue help = get_temp_register(); + + emit_instruction(new AluInstruction(op2_lshl_int, help, + src.ms_index, literal(2), + {alu_write, alu_last_instr})); + + emit_instruction(new AluInstruction(op2_lshr_int, sample_id_dest_reg, + {sample_id_dest_reg, help}, + {alu_write, alu_last_instr})); + } + + emit_instruction(new AluInstruction(op2_and_int, src.coord.reg_i(3), + {sample_id_dest_reg, PValue(new LiteralValue(15))}, + {alu_write, alu_last_instr})); + + auto dst = make_dest(*instr); + + /* txf doesn't need rounding for the array index, but 1D has the array index + * in the z component */ + if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D) + src.coord.set_reg_i(2, src.coord.reg_i(1)); + + auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord, + sampler.id, + sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset); + + + if (src.offset) { + assert(src.offset->is_ssa); + AluInstruction *ir = nullptr; + for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) { + ir = new AluInstruction(op2_add_int, src.coord.reg_i(i), + {src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write}); + emit_instruction(ir); + } + if (ir) + ir->set_flag(alu_last_instr); + } + + emit_instruction(tex_ir); + return true; +} + +bool EmitTexInstruction::get_inputs(const nir_tex_instr& instr, TexInputs &src) +{ + sfn_log << SfnLog::tex << "Get Inputs with " << instr.coord_components << " components\n"; + + unsigned grad_components = instr.coord_components; + if (instr.is_array && !instr.array_is_lowered_cube) + --grad_components; + + + src.offset = nullptr; + bool retval = true; + for (unsigned i = 0; i < instr.num_srcs; ++i) { + switch (instr.src[i].src_type) { + case nir_tex_src_bias: + src.bias = from_nir(instr.src[i], 0); + break; + + case nir_tex_src_coord: { + src.coord = vec_from_nir_with_fetch_constant(instr.src[i].src, + (1 << instr.coord_components) - 1, + {0,1,2,3}); + } break; + case nir_tex_src_comparator: + src.comperator = from_nir(instr.src[i], 0); + break; + case nir_tex_src_ddx: { + sfn_log << SfnLog::tex << "Get DDX "; + src.ddx = vec_from_nir_with_fetch_constant(instr.src[i].src, + (1 << grad_components) - 1, + swizzle_from_comps(grad_components)); + sfn_log << SfnLog::tex << src.ddx << "\n"; + } break; + case nir_tex_src_ddy:{ + sfn_log << SfnLog::tex << "Get DDY "; + src.ddy = vec_from_nir_with_fetch_constant(instr.src[i].src, + (1 << grad_components) - 1, + swizzle_from_comps(grad_components)); + sfn_log << SfnLog::tex << src.ddy << "\n"; + } break; + case nir_tex_src_lod: + src.lod = from_nir_with_fetch_constant(instr.src[i].src, 0); + break; + case nir_tex_src_offset: + sfn_log << SfnLog::tex << " -- Find offset\n"; + src.offset = &instr.src[i].src; + break; + case nir_tex_src_sampler_deref: + src.sampler_deref = get_deref_location(instr.src[i].src); + break; + case nir_tex_src_texture_deref: + src.texture_deref = get_deref_location(instr.src[i].src); + break; + case nir_tex_src_ms_index: + src.ms_index = from_nir(instr.src[i], 0); + break; + case nir_tex_src_texture_offset: + src.texture_offset = from_nir(instr.src[i], 0); + break; + case nir_tex_src_sampler_offset: + src.sampler_offset = from_nir(instr.src[i], 0); + break; + case nir_tex_src_plane: + case nir_tex_src_projector: + case nir_tex_src_min_lod: + case nir_tex_src_ms_mcs: + default: + sfn_log << SfnLog::tex << "Texture source type " << instr.src[i].src_type << " not supported\n"; + retval = false; + } + } + return retval; +} + +GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr) +{ + int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components : + instr.dest.reg.reg->num_components; + std::array<PValue,4> dst_elms; + for (uint16_t i = 0; i < 4; ++i) + dst_elms[i] = from_nir(instr.dest, (i < num_dest_components) ? i : 7); + return GPRVector(dst_elms); +} + + +GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr, + const std::array<int, 4>& swizzle) +{ + int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components : + instr.dest.reg.reg->num_components; + std::array<PValue,4> dst_elms; + for (uint16_t i = 0; i < 4; ++i) { + int k = swizzle[i]; + dst_elms[i] = from_nir(instr.dest, (k < num_dest_components) ? k : 7); + } + return GPRVector(dst_elms); +} + +void EmitTexInstruction::set_rect_coordinate_flags(nir_tex_instr* instr, + TexInstruction* ir) const +{ + if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) { + ir->set_flag(TexInstruction::x_unnormalized); + ir->set_flag(TexInstruction::y_unnormalized); + } +} + +void EmitTexInstruction::set_offsets(TexInstruction* ir, nir_src *offset) +{ + if (!offset) + return; + + assert(offset->is_ssa); + auto literal = nir_src_as_const_value(*offset); + assert(literal); + + for (int i = 0; i < offset->ssa->num_components; ++i) { + ir->set_offset(i, literal[i].i32); + } +} + +void EmitTexInstruction::handle_array_index(const nir_tex_instr& instr, const GPRVector& src, TexInstruction *ir) +{ + int src_idx = instr.sampler_dim == GLSL_SAMPLER_DIM_1D ? 1 : 2; + emit_instruction(new AluInstruction(op1_rndne, src.reg_i(2), src.reg_i(src_idx), + {alu_last_instr, alu_write})); + ir->set_flag(TexInstruction::z_unnormalized); +} + +EmitTexInstruction::SamplerId +EmitTexInstruction::get_sampler_id(int sampler_id, const nir_variable *deref) +{ + EmitTexInstruction::SamplerId result = {sampler_id, false}; + + if (deref) { + assert(glsl_type_is_sampler(deref->type)); + result.id = deref->data.binding; + } + return result; +} + +EmitTexInstruction::TexInputs::TexInputs(): + sampler_deref(nullptr), + texture_deref(nullptr), + offset(nullptr) +{ +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h new file mode 100644 index 000000000..e11ebda1c --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h @@ -0,0 +1,96 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_EMITTEXINSTRUCTION_H +#define SFN_EMITTEXINSTRUCTION_H + +#include "sfn_emitinstruction.h" +#include "sfn_instruction_tex.h" + +namespace r600 { + +class EmitTexInstruction : public EmitInstruction +{ +public: + EmitTexInstruction(ShaderFromNirProcessor& processor); + +private: + struct TexInputs { + TexInputs(); + const nir_variable *sampler_deref; + const nir_variable *texture_deref; + GPRVector coord; + PValue bias; + PValue comperator; + PValue lod; + GPRVector ddx; + GPRVector ddy; + nir_src *offset; + PValue gather_comp; + PValue ms_index; + PValue sampler_offset; + PValue texture_offset; + }; + + bool emit_tex_tex(nir_tex_instr* instr, TexInputs& src); + + bool emit_tex_txf(nir_tex_instr* instr, TexInputs &src); + bool emit_tex_txb(nir_tex_instr* instr, TexInputs& src); + bool emit_tex_txd(nir_tex_instr* instr, TexInputs& src); + bool emit_tex_txl(nir_tex_instr* instr, TexInputs& src); + bool emit_tex_txs(nir_tex_instr* instr, TexInputs& src, + const std::array<int, 4> &dest_swz); + bool emit_tex_texture_samples(nir_tex_instr* instr, TexInputs& src, + const std::array<int, 4> &dest_swz); + bool emit_tex_lod(nir_tex_instr* instr, TexInputs& src); + bool emit_tex_tg4(nir_tex_instr* instr, TexInputs& src); + bool emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src); + bool emit_buf_txf(nir_tex_instr* instr, TexInputs& src); + + bool get_inputs(const nir_tex_instr& instr, TexInputs &src); + + void set_rect_coordinate_flags(nir_tex_instr* instr, TexInstruction* ir) const; + + bool do_emit(nir_instr* instr) override; + + GPRVector make_dest(nir_tex_instr& instr); + GPRVector make_dest(nir_tex_instr &instr, const std::array<int, 4> &swizzle); + + void set_offsets(TexInstruction* ir, nir_src *offset); + void handle_array_index(const nir_tex_instr& instr, const GPRVector &src, TexInstruction* ir); + + struct SamplerId { + int id; + bool indirect; + }; + + SamplerId get_sampler_id(int sampler_id, const nir_variable *deref); + +}; + +} + +#endif // SFN_EMITTEXINSTRUCTION_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp new file mode 100644 index 000000000..72cf23172 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp @@ -0,0 +1,183 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_instruction_alu.h" +#include "sfn_valuepool.h" + +namespace r600 { + +const AluModifiers AluInstruction::src_abs_flags[2] = + {alu_src0_abs, alu_src1_abs}; +const AluModifiers AluInstruction::src_neg_flags[3] = + {alu_src0_neg, alu_src1_neg, alu_src2_neg}; +const AluModifiers AluInstruction::src_rel_flags[3] = + {alu_src0_rel, alu_src1_rel, alu_src2_rel}; + +AluInstruction::AluInstruction(EAluOp opcode): + Instruction (Instruction::alu), + m_opcode(opcode), + m_src(alu_ops.at(opcode).nsrc), + m_bank_swizzle(alu_vec_unknown), + m_cf_type(cf_alu) +{ + if (alu_ops.at(opcode).nsrc == 3) + m_flags.set(alu_op3); +} + +AluInstruction::AluInstruction(EAluOp opcode, PValue dest, + std::vector<PValue> src, + const std::set<AluModifiers>& flags): + Instruction (Instruction::alu), + m_opcode(opcode), + m_dest(dest), + m_bank_swizzle(alu_vec_unknown), + m_cf_type(cf_alu) +{ + assert(dest); + m_src.swap(src); + for (auto f : flags) + m_flags.set(f); + + if (alu_ops.at(opcode).nsrc == 3) + m_flags.set(alu_op3); + + for (auto &s: m_src) + add_remappable_src_value(&s); + + add_remappable_dst_value(&m_dest); +} + +AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0, + const std::set<AluModifiers>& flags): + AluInstruction(opcode, dest, std::vector<PValue>{src0}, flags) +{ +} + +AluInstruction::AluInstruction(EAluOp opcode, PValue dest, + PValue src0, PValue src1, + const std::set<AluModifiers> &m_flags): + AluInstruction(opcode, dest, {src0, src1}, m_flags) +{ +} + +AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0, + PValue src1, PValue src2, + const std::set<AluModifiers> &flags): + AluInstruction(opcode, dest, {src0, src1, src2}, flags) +{ +} + +bool AluInstruction::is_equal_to(const Instruction& lhs) const +{ + assert(lhs.type() == alu); + const auto& oth = static_cast<const AluInstruction&>(lhs); + + if (m_opcode != oth.m_opcode) { + return false; + } + + if (*m_dest != *oth.m_dest) + return false; + + if (m_src.size() != oth.m_src.size()) + return false; + + for (unsigned i = 0; i < m_src.size(); ++i) + if (*m_src[i] != *oth.m_src[i]) { + return false; + } + return (m_flags == oth.m_flags && m_cf_type == oth.m_cf_type); +} + +void AluInstruction::replace_values(const ValueSet& candidates, PValue new_value) +{ + for (auto c: candidates) { + if (*c == *m_dest) + m_dest = new_value; + + for (auto& s: m_src) { + if (*c == *s) + s = new_value; + } + } +} + +PValue AluInstruction::remap_one_registers(PValue reg, std::vector<rename_reg_pair>& map, + ValueMap &values) +{ + auto new_index = map[reg->sel()]; + if (new_index.valid) + reg = values.get_or_inject(new_index.new_reg, reg->chan()); + map[reg->sel()].used = true; + return reg; +} + + +void AluInstruction::set_flag(AluModifiers flag) +{ + m_flags.set(flag); +} + +void AluInstruction::set_bank_swizzle(AluBankSwizzle bswz) +{ + m_bank_swizzle = bswz; +} + +unsigned AluInstruction::n_sources() const +{ + return m_src.size(); +} + +void AluInstruction::do_print(std::ostream& os) const +{ + os << "ALU " << alu_ops.at(m_opcode).name; + if (m_flags.test(alu_dst_clamp)) + os << "_CLAMP"; + if (m_dest) + os << ' ' << *m_dest << " : " ; + + for (unsigned i = 0; i < m_src.size(); ++i) { + int pflags = 0; + if (i) + os << ' '; + if (m_flags.test(src_neg_flags[i])) pflags |= Value::PrintFlags::has_neg; + if (m_flags.test(src_rel_flags[i])) pflags |= Value::PrintFlags::is_rel; + if (i < 2) + if (m_flags.test(src_abs_flags[i])) pflags |= Value::PrintFlags::has_abs; + m_src[i]->print(os, Value::PrintFlags(0, pflags)); + } + os << " {"; + os << (m_flags.test(alu_write) ? 'W' : ' '); + os << (m_flags.test(alu_last_instr) ? 'L' : ' '); + os << (m_flags.test(alu_update_exec) ? 'E' : ' '); + os << (m_flags.test(alu_update_pred) ? 'P' : ' '); + os << "}"; + + os << " BS:" << m_bank_swizzle; + os << " CF:" << m_cf_type; +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h new file mode 100644 index 000000000..383fa3baf --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h @@ -0,0 +1,144 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef sfn_r600_instruction_alu_h +#define sfn_r600_instruction_alu_h + +#include "sfn_instruction_base.h" +#include "sfn_alu_defines.h" + +namespace r600 { + +enum AluModifiers { + alu_src0_neg, + alu_src0_abs, + alu_src0_rel, + alu_src1_neg, + alu_src1_abs, + alu_src1_rel, + alu_src2_neg, + alu_src2_rel, + alu_dst_clamp, + alu_dst_rel, + alu_last_instr, + alu_update_exec, + alu_update_pred, + alu_write, + alu_op3 +}; + +enum AluDstModifiers { + omod_off = 0, + omod_mul2 = 1, + omod_mul4 = 2, + omod_divl2 = 3 +}; + +enum AluPredSel { + pred_off = 0, + pred_zero = 2, + pred_one = 3 +}; + +enum AluBankSwizzle { + alu_vec_012 = 0, + sq_alu_scl_201 = 0, + alu_vec_021 = 1, + sq_alu_scl_122 = 1, + alu_vec_120 = 2, + sq_alu_scl_212 = 2, + alu_vec_102 = 3, + sq_alu_scl_221 = 3, + alu_vec_201 = 4, + alu_vec_210 = 5, + alu_vec_unknown = 6 +}; + +class AluInstruction : public Instruction { +public: + + static const AluModifiers src_abs_flags[2]; + static const AluModifiers src_neg_flags[3]; + static const AluModifiers src_rel_flags[3]; + + AluInstruction(EAluOp opcode); + AluInstruction(EAluOp opcode, PValue dest, + std::vector<PValue> src0, + const std::set<AluModifiers>& m_flags); + + AluInstruction(EAluOp opcode, PValue dest, PValue src0, + const std::set<AluModifiers>& m_flags); + + AluInstruction(EAluOp opcode, PValue dest, + PValue src0, PValue src1, + const std::set<AluModifiers>& m_flags); + + AluInstruction(EAluOp opcode, PValue dest, PValue src0, PValue src1, + PValue src2, + const std::set<AluModifiers>& m_flags); + + void set_flag(AluModifiers flag); + unsigned n_sources() const; + + PValue dest() {return m_dest;} + EAluOp opcode() const {return m_opcode;} + const Value *dest() const {return m_dest.get();} + Value& src(unsigned i) const {assert(i < m_src.size() && m_src[i]); return *m_src[i];} + PValue *psrc(unsigned i) {assert(i < m_src.size()); return &m_src[i];} + bool is_last() const {return m_flags.test(alu_last_instr);} + bool write() const {return m_flags.test(alu_write);} + bool flag(AluModifiers f) const {return m_flags.test(f);} + void set_bank_swizzle(AluBankSwizzle swz); + int bank_swizzle() const {return m_bank_swizzle;} + ECFAluOpCode cf_type() const {return m_cf_type;} + void set_cf_type(ECFAluOpCode cf_type){ m_cf_type = cf_type; } + + void replace_values(const ValueSet& candidates, PValue new_value) override; + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + PValue remap_one_registers(PValue reg, std::vector<rename_reg_pair>& map, + ValueMap &values); + + + EAluOp m_opcode; + PValue m_dest; + std::vector<PValue> m_src; + AluOpFlags m_flags; + AluDstModifiers m_omod; + AluPredSel m_pred_sel; + AluBankSwizzle m_bank_swizzle; + ECFAluOpCode m_cf_type; +}; + +} + +#endif diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp new file mode 100644 index 000000000..116bfaca5 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp @@ -0,0 +1,187 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#include <algorithm> +#include <cassert> + +#include "sfn_instruction_base.h" +#include "sfn_liverange.h" +#include "sfn_valuepool.h" + +namespace r600 { + +ValueRemapper::ValueRemapper(std::vector<rename_reg_pair>& m, + ValueMap& values): + m_map(m), + m_values(values) +{ +} + +void ValueRemapper::remap(PValue& v) +{ + if (!v) + return; + if (v->type() == Value::gpr) { + v = remap_one_registers(v); + } else if (v->type() == Value::gpr_array_value) { + GPRArrayValue& val = static_cast<GPRArrayValue&>(*v); + auto value = val.value(); + auto addr = val.indirect(); + val.reset_value(remap_one_registers(value)); + if (addr) { + if (addr->type() == Value::gpr) + val.reset_addr(remap_one_registers(addr)); + } + size_t range_start = val.sel(); + size_t range_end = range_start + val.array_size(); + while (range_start < range_end) + m_map[range_start++].used = true; + } else if (v->type() == Value::kconst) { + auto& val = static_cast<UniformValue&>(*v); + auto addr = val.addr(); + if (addr && addr->type() == Value::gpr) + val.reset_addr(remap_one_registers(addr)); + } + +} + +void ValueRemapper::remap(GPRVector& v) +{ + for (int i = 0; i < 4; ++i) { + if (v.reg_i(i)) { + auto& ns_idx = m_map[v.reg_i(i)->sel()]; + if (ns_idx.valid) + v.set_reg_i(i,m_values.get_or_inject(ns_idx.new_reg, v.reg_i(i)->chan())); + m_map[v.reg_i(i)->sel()].used = true; + } + } +} + +PValue ValueRemapper::remap_one_registers(PValue& reg) +{ + auto new_index = m_map[reg->sel()]; + if (new_index.valid) + reg = m_values.get_or_inject(new_index.new_reg, reg->chan()); + m_map[reg->sel()].used = true; + return reg; +} + + +Instruction::Instruction(instr_type t): + m_type(t) +{ +} + +Instruction::~Instruction() +{ +} + +void Instruction::print(std::ostream& os) const +{ + os << "OP:"; + do_print(os); +} + + +void Instruction::remap_registers(ValueRemapper& map) +{ + sfn_log << SfnLog::merge << "REMAP " << *this << "\n"; + for (auto& v: m_mappable_src_registers) + map.remap(*v); + + for (auto& v: m_mappable_src_vectors) + map.remap(*v); + + for (auto& v: m_mappable_dst_registers) + map.remap(*v); + + for (auto& v: m_mappable_dst_vectors) + map.remap(*v); + sfn_log << SfnLog::merge << "TO " << *this << "\n\n"; +} + +void Instruction::add_remappable_src_value(PValue *v) +{ + if (*v) + m_mappable_src_registers.push_back(v); +} + +void Instruction::add_remappable_src_value(GPRVector *v) +{ + m_mappable_src_vectors.push_back(v); +} + +void Instruction::add_remappable_dst_value(PValue *v) +{ + if (v) + m_mappable_dst_registers.push_back(v); +} + +void Instruction::add_remappable_dst_value(GPRVector *v) +{ + m_mappable_dst_vectors.push_back(v); +} + +void Instruction::replace_values(UNUSED const ValueSet& candidates, UNUSED PValue new_value) +{ + +} + +void Instruction::evalue_liveness(LiverangeEvaluator& eval) const +{ + sfn_log << SfnLog::merge << "Scan " << *this << "\n"; + for (const auto& s: m_mappable_src_registers) + if (*s) + eval.record_read(**s); + + for (const auto& s: m_mappable_src_vectors) + eval.record_read(*s); + + for (const auto& s: m_mappable_dst_registers) + if (*s) + eval.record_write(**s); + + for (const auto& s: m_mappable_dst_vectors) + eval.record_write(*s); + + do_evalue_liveness(eval); +} + +void Instruction::do_evalue_liveness(UNUSED LiverangeEvaluator& eval) const +{ + +} + +bool operator == (const Instruction& lhs, const Instruction& rhs) +{ + if (rhs.m_type != lhs.m_type) + return false; + + return lhs.is_equal_to(rhs); +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.h new file mode 100644 index 000000000..0689a473a --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.h @@ -0,0 +1,155 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef sfn_r600_instr_h +#define sfn_r600_instr_h + +#include "sfn_instructionvisitor.h" +#include "sfn_value_gpr.h" +#include "sfn_defines.h" + +#include "gallium/drivers/r600/r600_isa.h" +#include <iostream> +#include <memory> +#include <vector> +#include <set> + +namespace r600 { + +struct rename_reg_pair { + bool valid; + bool used; + int new_reg; +}; + +class LiverangeEvaluator; +class ValueMap; + + +class ValueRemapper { +public: + ValueRemapper(std::vector<rename_reg_pair>& m, + ValueMap& values); + + void remap(PValue& v); + void remap(GPRVector& v); +private: + PValue remap_one_registers(PValue& reg); + + std::vector<rename_reg_pair>& m_map; + ValueMap& m_values; +}; + + +using OutputRegisterMap = std::map<unsigned, const GPRVector *>; + +class Instruction { +public: + enum instr_type { + alu, + exprt, + tex, + vtx, + wait_ack, + cond_if, + cond_else, + cond_endif, + lds_atomic, + lds_read, + lds_write, + loop_begin, + loop_end, + loop_break, + loop_continue, + phi, + streamout, + ring, + emit_vtx, + mem_wr_scratch, + gds, + rat, + tf_write, + block, + unknown + }; + + typedef std::shared_ptr<Instruction> Pointer; + + friend bool operator == (const Instruction& lhs, const Instruction& rhs); + + Instruction(instr_type t); + + virtual ~Instruction(); + + instr_type type() const { return m_type;} + + void print(std::ostream& os) const; + + virtual void replace_values(const ValueSet& candidates, PValue new_value); + + void evalue_liveness(LiverangeEvaluator& eval) const; + + void remap_registers(ValueRemapper& map); + + virtual bool accept(InstructionVisitor& visitor) = 0; + virtual bool accept(ConstInstructionVisitor& visitor) const = 0; + +protected: + + void add_remappable_src_value(PValue *v); + void add_remappable_src_value(GPRVector *v); + void add_remappable_dst_value(PValue *v); + void add_remappable_dst_value(GPRVector *v); + +private: + + virtual void do_evalue_liveness(LiverangeEvaluator& eval) const; + + virtual bool is_equal_to(const Instruction& lhs) const = 0; + + instr_type m_type; + + virtual void do_print(std::ostream& os) const = 0; + + std::vector<PValue*> m_mappable_src_registers; + std::vector<GPRVector*> m_mappable_src_vectors; + std::vector<PValue*> m_mappable_dst_registers; + std::vector<GPRVector*> m_mappable_dst_vectors; +}; + +using PInstruction=Instruction::Pointer; + +inline std::ostream& operator << (std::ostream& os, const Instruction& instr) +{ + instr.print(os); + return os; +} + +bool operator == (const Instruction& lhs, const Instruction& rhs); + +} + +#endif diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp new file mode 100644 index 000000000..212499faf --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp @@ -0,0 +1,57 @@ +#include "sfn_instruction_block.h" + +namespace r600 { + + +InstructionBlock::InstructionBlock(unsigned nesting_depth, unsigned block_number): + Instruction(block), + m_block_number(block_number), + m_nesting_depth(nesting_depth) +{ +} + +void InstructionBlock::emit(PInstruction instr) +{ + m_block.push_back(instr); +} + +void InstructionBlock::remap_registers(ValueRemapper& map) +{ + for(auto& i: m_block) + i->remap_registers(map); +} + +void InstructionBlock::do_evalue_liveness(LiverangeEvaluator& eval) const +{ + for(auto& i: m_block) + i->evalue_liveness(eval); +} + +bool InstructionBlock::is_equal_to(const Instruction& lhs) const +{ + assert(lhs.type() == block); + auto& l = static_cast<const InstructionBlock&>(lhs); + + if (m_block.size() != l.m_block.size()) + return false; + + if (m_block_number != l.m_block_number) + return false; + + return std::equal(m_block.begin(), m_block.end(), l.m_block.begin(), + [](PInstruction ri, PInstruction li) {return *ri == *li;}); +} + +PInstruction InstructionBlock::last_instruction() +{ + return m_block.size() ? *m_block.rbegin() : nullptr; +} + +void InstructionBlock::do_print(std::ostream& os) const +{ + std::string space(" ", 2 * m_nesting_depth); + for(auto& i: m_block) + os << space << *i << "\n"; +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.h new file mode 100644 index 000000000..fe40cc10c --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.h @@ -0,0 +1,82 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef sfn_instruction_block_h +#define sfn_instruction_block_h + +#include "sfn_instruction_base.h" + +namespace r600 { + +class InstructionBlock : public Instruction +{ +public: + InstructionBlock(unsigned nesting_depth, unsigned block_number); + + void emit(PInstruction instr); + + + std::vector<PInstruction>::const_iterator begin() const { + return m_block.begin(); + } + std::vector<PInstruction>::const_iterator end() const { + return m_block.end(); + } + + void remap_registers(ValueRemapper& map); + + size_t size() const { + return m_block.size(); + } + + const PInstruction& operator [] (int i) const { + return m_block[i]; + } + + unsigned number() const { + return m_block_number; + } + + PInstruction last_instruction(); + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + void do_evalue_liveness(LiverangeEvaluator& eval) const override; + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + + std::vector<PInstruction> m_block; + + unsigned m_block_number; + unsigned m_nesting_depth; +}; + +} + +#endif // INSTRUCTIONBLOCK_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp new file mode 100644 index 000000000..455d6d630 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp @@ -0,0 +1,195 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_instruction_cf.h" +#include "sfn_liverange.h" + +namespace r600 { + +CFInstruction::CFInstruction(instr_type type):Instruction(type) +{ + +} + +IfElseInstruction::IfElseInstruction(instr_type type): + CFInstruction (type) +{ + +} + +IfInstruction::IfInstruction(AluInstruction *pred): + IfElseInstruction(cond_if), + m_pred(pred) +{ + PValue *v = m_pred->psrc(0); + add_remappable_src_value(v); + pred->set_cf_type(cf_alu_push_before); +} + +void IfInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const +{ + eval.scope_if(); +} + +bool IfInstruction::is_equal_to(const Instruction& lhs) const +{ + assert(lhs.type() == cond_if); + const IfInstruction& l = static_cast<const IfInstruction&>(lhs); + return *l.m_pred == *m_pred; +} + +void IfInstruction::do_print(std::ostream& os) const +{ + os << "PRED = " << *m_pred << "\n"; + os << "IF (PRED)"; +} + +ElseInstruction::ElseInstruction(IfInstruction *jump_src): + IfElseInstruction(cond_else), + m_jump_src(jump_src) +{ +} + +void ElseInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const +{ + eval.scope_else(); +} + + +bool ElseInstruction::is_equal_to(const Instruction& lhs) const +{ + if (lhs.type() != cond_else) + return false; + auto& l = static_cast<const ElseInstruction&>(lhs); + return (*m_jump_src == *l.m_jump_src); +} + +void ElseInstruction::do_print(std::ostream& os) const +{ + os << "ELSE"; +} + +IfElseEndInstruction::IfElseEndInstruction(): + IfElseInstruction(cond_endif) +{ +} + +void IfElseEndInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const +{ + eval.scope_endif(); +} + +bool IfElseEndInstruction::is_equal_to(const Instruction& lhs) const +{ + if (lhs.type() != cond_endif) + return false; + return true; +} + +void IfElseEndInstruction::do_print(std::ostream& os) const +{ + os << "ENDIF"; +} + +LoopBeginInstruction::LoopBeginInstruction(): + CFInstruction(loop_begin) +{ +} + +void LoopBeginInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const +{ + eval.scope_loop_begin(); +} + +bool LoopBeginInstruction::is_equal_to(const Instruction& lhs) const +{ + assert(lhs.type() == loop_begin); + return true; +} + +void LoopBeginInstruction::do_print(std::ostream& os) const +{ + os << "BGNLOOP"; +} + +LoopEndInstruction::LoopEndInstruction(LoopBeginInstruction *start): + CFInstruction (loop_end), + m_start(start) +{ +} + +void LoopEndInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const +{ + eval.scope_loop_end(); +} + +bool LoopEndInstruction::is_equal_to(const Instruction& lhs) const +{ + assert(lhs.type() == loop_end); + const auto& other = static_cast<const LoopEndInstruction&>(lhs); + return *m_start == *other.m_start; +} + +void LoopEndInstruction::do_print(std::ostream& os) const +{ + os << "ENDLOOP"; +} + +LoopBreakInstruction::LoopBreakInstruction(): + CFInstruction (loop_break) +{ +} + +void LoopBreakInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const +{ + eval.scope_loop_break(); +} + +bool LoopBreakInstruction::is_equal_to(UNUSED const Instruction& lhs) const +{ + return true; +} + +void LoopBreakInstruction::do_print(std::ostream& os) const +{ + os << "BREAK"; +} + +LoopContInstruction::LoopContInstruction(): + CFInstruction (loop_continue) +{ +} + +bool LoopContInstruction::is_equal_to(UNUSED const Instruction& lhs) const +{ + return true; +} +void LoopContInstruction::do_print(std::ostream& os) const +{ + os << "CONTINUE"; +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h new file mode 100644 index 000000000..a13794803 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h @@ -0,0 +1,142 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_IFELSEINSTRUCTION_H +#define SFN_IFELSEINSTRUCTION_H + +#include "sfn_instruction_alu.h" + +namespace r600 { + +class CFInstruction : public Instruction { +protected: + CFInstruction(instr_type type); +}; + +class IfElseInstruction : public CFInstruction { +public: + IfElseInstruction(instr_type type); + +}; + +class IfInstruction : public IfElseInstruction { +public: + IfInstruction(AluInstruction *pred); + const AluInstruction& pred() const {return *m_pred;} + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + void do_evalue_liveness(LiverangeEvaluator& eval) const override; + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + std::shared_ptr<AluInstruction> m_pred; +}; + +class ElseInstruction : public IfElseInstruction { +public: + ElseInstruction(IfInstruction *jump_src); + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + void do_evalue_liveness(LiverangeEvaluator& eval) const override; + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + + IfElseInstruction *m_jump_src; +}; + +class IfElseEndInstruction : public IfElseInstruction { +public: + IfElseEndInstruction(); + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + void do_evalue_liveness(LiverangeEvaluator& eval) const override; + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; +}; + +class LoopBeginInstruction: public CFInstruction { +public: + LoopBeginInstruction(); + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + void do_evalue_liveness(LiverangeEvaluator& eval) const override; + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; +}; + +class LoopEndInstruction: public CFInstruction { +public: + LoopEndInstruction(LoopBeginInstruction *start); + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + void do_evalue_liveness(LiverangeEvaluator& eval) const override; + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + LoopBeginInstruction *m_start; +}; + +class LoopBreakInstruction: public CFInstruction { +public: + LoopBreakInstruction(); + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + void do_evalue_liveness(LiverangeEvaluator& eval) const override; + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; +}; + +class LoopContInstruction: public CFInstruction { +public: + LoopContInstruction(); + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; +}; + +} + +#endif // SFN_IFELSEINSTRUCTION_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp new file mode 100644 index 000000000..7d1d948a1 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp @@ -0,0 +1,341 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#include "sfn_instruction_export.h" +#include "sfn_liverange.h" +#include "sfn_valuepool.h" + +namespace r600 { + +WriteoutInstruction::WriteoutInstruction(instr_type t, const GPRVector& value): + Instruction(t), + m_value(value) +{ + add_remappable_src_value(&m_value); +} + +void WriteoutInstruction::replace_values(const ValueSet& candidates, PValue new_value) +{ + // I wonder whether we can actually end up here ... + for (auto c: candidates) { + if (*c == *m_value.reg_i(c->chan())) + m_value.set_reg_i(c->chan(), new_value); + } + + replace_values_child(candidates, new_value); +} + +void WriteoutInstruction::replace_values_child(UNUSED const ValueSet& candidates, + UNUSED PValue new_value) +{ +} + +void WriteoutInstruction::remap_registers_child(UNUSED std::vector<rename_reg_pair>& map, + UNUSED ValueMap& values) +{ +} + +ExportInstruction::ExportInstruction(unsigned loc, const GPRVector &value, ExportType type): + WriteoutInstruction(Instruction::exprt, value), + m_type(type), + m_loc(loc), + m_is_last(false) +{ +} + + +bool ExportInstruction::is_equal_to(const Instruction& lhs) const +{ + assert(lhs.type() == exprt); + const auto& oth = static_cast<const ExportInstruction&>(lhs); + + return (gpr() == oth.gpr()) && + (m_type == oth.m_type) && + (m_loc == oth.m_loc) && + (m_is_last == oth.m_is_last); +} + +void ExportInstruction::do_print(std::ostream& os) const +{ + os << (m_is_last ? "EXPORT_DONE ":"EXPORT "); + switch (m_type) { + case et_pixel: os << "PIXEL "; break; + case et_pos: os << "POS "; break; + case et_param: os << "PARAM "; break; + } + os << m_loc << " " << gpr(); +} + +void ExportInstruction::update_output_map(OutputRegisterMap& map) const +{ + map[m_loc] = gpr_ptr(); +} + +void ExportInstruction::set_last() +{ + m_is_last = true; +} + +WriteScratchInstruction::WriteScratchInstruction(unsigned loc, const GPRVector& value, + int align, int align_offset, int writemask): + WriteoutInstruction (Instruction::mem_wr_scratch, value), + m_loc(loc), + m_align(align), + m_align_offset(align_offset), + m_writemask(writemask), + m_array_size(0) +{ +} + +WriteScratchInstruction::WriteScratchInstruction(const PValue& address, const GPRVector& value, + int align, int align_offset, int writemask, int array_size): + WriteoutInstruction (Instruction::mem_wr_scratch, value), + m_loc(0), + m_address(address), + m_align(align), + m_align_offset(align_offset), + m_writemask(writemask), + m_array_size(array_size - 1) +{ + add_remappable_src_value(&m_address); +} + +bool WriteScratchInstruction::is_equal_to(const Instruction& lhs) const +{ + if (lhs.type() != Instruction::mem_wr_scratch) + return false; + const auto& other = static_cast<const WriteScratchInstruction&>(lhs); + + if (m_address) { + if (!other.m_address) + return false; + if (*m_address != *other.m_address) + return false; + } else { + if (other.m_address) + return false; + } + + return gpr() == other.gpr() && + m_loc == other.m_loc && + m_align == other.m_align && + m_align_offset == other.m_align_offset && + m_writemask == other.m_writemask; +} + +static char *writemask_to_swizzle(int writemask, char *buf) +{ + const char *swz = "xyzw"; + for (int i = 0; i < 4; ++i) { + buf[i] = (writemask & (1 << i)) ? swz[i] : '_'; + } + return buf; +} + +void WriteScratchInstruction::do_print(std::ostream& os) const +{ + char buf[5]; + + os << "MEM_SCRATCH_WRITE "; + if (m_address) + os << "@" << *m_address << "+"; + + os << m_loc << "." << writemask_to_swizzle(m_writemask, buf) + << " " << gpr() << " AL:" << m_align << " ALO:" << m_align_offset; +} + +void WriteScratchInstruction::replace_values_child(const ValueSet& candidates, PValue new_value) +{ + if (!m_address) + return; + + for (auto c: candidates) { + if (*c == *m_address) + m_address = new_value; + } +} + +void WriteScratchInstruction::remap_registers_child(std::vector<rename_reg_pair>& map, + ValueMap& values) +{ + if (!m_address) + return; + sfn_log << SfnLog::merge << "Remap " << *m_address << " of type " << m_address->type() << "\n"; + assert(m_address->type() == Value::gpr); + auto new_index = map[m_address->sel()]; + if (new_index.valid) + m_address = values.get_or_inject(new_index.new_reg, m_address->chan()); + map[m_address->sel()].used = true; +} + +StreamOutIntruction::StreamOutIntruction(const GPRVector& value, int num_components, + int array_base, int comp_mask, int out_buffer, + int stream): + WriteoutInstruction(Instruction::streamout, value), + m_element_size(num_components == 3 ? 3 : num_components - 1), + m_burst_count(1), + m_array_base(array_base), + m_array_size(0xfff), + m_writemask(comp_mask), + m_output_buffer(out_buffer), + m_stream(stream) +{ +} + +unsigned StreamOutIntruction::op() const +{ + int op = 0; + switch (m_output_buffer) { + case 0: op = CF_OP_MEM_STREAM0_BUF0; break; + case 1: op = CF_OP_MEM_STREAM0_BUF1; break; + case 2: op = CF_OP_MEM_STREAM0_BUF2; break; + case 3: op = CF_OP_MEM_STREAM0_BUF3; break; + } + return 4 * m_stream + op; +} + +bool StreamOutIntruction::is_equal_to(const Instruction& lhs) const +{ + assert(lhs.type() == streamout); + const auto& oth = static_cast<const StreamOutIntruction&>(lhs); + + return gpr() == oth.gpr() && + m_element_size == oth.m_element_size && + m_burst_count == oth.m_burst_count && + m_array_base == oth.m_array_base && + m_array_size == oth.m_array_size && + m_writemask == oth.m_writemask && + m_output_buffer == oth.m_output_buffer && + m_stream == oth.m_stream; +} + +void StreamOutIntruction::do_print(std::ostream& os) const +{ + os << "WRITE STREAM(" << m_stream << ") " << gpr() + << " ES:" << m_element_size + << " BC:" << m_burst_count + << " BUF:" << m_output_buffer + << " ARRAY:" << m_array_base; + if (m_array_size != 0xfff) + os << "+" << m_array_size; +} + +MemRingOutIntruction::MemRingOutIntruction(ECFOpCode ring, EMemWriteType type, + const GPRVector& value, + unsigned base_addr, unsigned ncomp, + PValue index): + WriteoutInstruction(Instruction::ring, value), + m_ring_op(ring), + m_type(type), + m_base_address(base_addr), + m_num_comp(ncomp), + m_index(index) +{ + add_remappable_src_value(&m_index); + + assert(m_ring_op == cf_mem_ring || m_ring_op == cf_mem_ring1|| + m_ring_op == cf_mem_ring2 || m_ring_op == cf_mem_ring3); + assert(m_num_comp <= 4); +} + +unsigned MemRingOutIntruction::ncomp() const +{ + switch (m_num_comp) { + case 1: return 0; + case 2: return 1; + case 3: + case 4: return 3; + default: + assert(0); + } + return 3; +} + +bool MemRingOutIntruction::is_equal_to(const Instruction& lhs) const +{ + assert(lhs.type() == streamout); + const auto& oth = static_cast<const MemRingOutIntruction&>(lhs); + + bool equal = gpr() == oth.gpr() && + m_ring_op == oth.m_ring_op && + m_type == oth.m_type && + m_num_comp == oth.m_num_comp && + m_base_address == oth.m_base_address; + + if (m_type == mem_write_ind || m_type == mem_write_ind_ack) + equal &= (*m_index == *oth.m_index); + return equal; + +} + +static const char *write_type_str[4] = {"WRITE", "WRITE_IDX", "WRITE_ACK", "WRITE_IDX_ACK" }; +void MemRingOutIntruction::do_print(std::ostream& os) const +{ + os << "MEM_RING " << m_ring_op; + os << " " << write_type_str[m_type] << " " << m_base_address; + os << " " << gpr(); + if (m_type == mem_write_ind || m_type == mem_write_ind_ack) + os << " @" << *m_index; + os << " ES:" << m_num_comp; +} + + +void MemRingOutIntruction::replace_values_child(const ValueSet& candidates, + PValue new_value) +{ + if (!m_index) + return; + + for (auto c: candidates) { + if (*c == *m_index) + m_index = new_value; + } +} + +void MemRingOutIntruction::remap_registers_child(std::vector<rename_reg_pair>& map, + ValueMap& values) +{ + if (!m_index) + return; + + assert(m_index->type() == Value::gpr); + auto new_index = map[m_index->sel()]; + if (new_index.valid) + m_index = values.get_or_inject(new_index.new_reg, m_index->chan()); + map[m_index->sel()].used = true; +} + +void MemRingOutIntruction::patch_ring(int stream, PValue index) +{ + const ECFOpCode ring_op[4] = {cf_mem_ring, cf_mem_ring1, cf_mem_ring2, cf_mem_ring3}; + + assert(stream < 4); + m_ring_op = ring_op[stream]; + m_index = index; +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.h new file mode 100644 index 000000000..6d014082d --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.h @@ -0,0 +1,185 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_EXPORTINSTRUCTION_H +#define SFN_EXPORTINSTRUCTION_H + +#include "sfn_instruction_base.h" + +namespace r600 { + +class WriteoutInstruction: public Instruction { +public: + void replace_values(const ValueSet& candidates, PValue new_value) override; + const GPRVector& gpr() const {return m_value;} + const GPRVector *gpr_ptr() const {return &m_value;} +protected: + WriteoutInstruction(instr_type t, const GPRVector& value); +private: + virtual void replace_values_child(const ValueSet& candidates, PValue new_value); + virtual void remap_registers_child(std::vector<rename_reg_pair>& map, + ValueMap& values); + + GPRVector m_value; +}; + +class ExportInstruction : public WriteoutInstruction { +public: + enum ExportType { + et_pixel, + et_pos, + et_param + }; + + ExportInstruction(unsigned loc, const GPRVector& value, ExportType type); + void set_last(); + + ExportType export_type() const {return m_type;} + + unsigned location() const {return m_loc;} + bool is_last_export() const {return m_is_last;} + + void update_output_map(OutputRegisterMap& map) const; + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + + +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + + ExportType m_type; + unsigned m_loc; + bool m_is_last; +}; + +class WriteScratchInstruction : public WriteoutInstruction { +public: + + WriteScratchInstruction(unsigned loc, const GPRVector& value, int align, + int align_offset, int writemask); + WriteScratchInstruction(const PValue& address, const GPRVector& value, + int align, int align_offset, int writemask, int array_size); + unsigned location() const {return m_loc;} + + int write_mask() const { return m_writemask;} + int address() const { assert(m_address); return m_address->sel();} + bool indirect() const { return !!m_address;} + int array_size() const { return m_array_size;} + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + + void replace_values_child(const ValueSet& candidates, PValue new_value) override; + void remap_registers_child(std::vector<rename_reg_pair>& map, + ValueMap& values)override; + + unsigned m_loc; + PValue m_address; + unsigned m_align; + unsigned m_align_offset; + unsigned m_writemask; + int m_array_size; +}; + + +class StreamOutIntruction: public WriteoutInstruction { +public: + StreamOutIntruction(const GPRVector& value, int num_components, + int array_base, int comp_mask, int out_buffer, + int stream); + int element_size() const { return m_element_size;} + int burst_count() const { return m_burst_count;} + int array_base() const { return m_array_base;} + int array_size() const { return m_array_size;} + int comp_mask() const { return m_writemask;} + unsigned op() const; + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + + int m_element_size; + int m_burst_count; + int m_array_base; + int m_array_size; + int m_writemask; + int m_output_buffer; + int m_stream; +}; + +enum EMemWriteType { + mem_write = 0, + mem_write_ind = 1, + mem_write_ack = 2, + mem_write_ind_ack = 3, +}; + +class MemRingOutIntruction: public WriteoutInstruction { +public: + + MemRingOutIntruction(ECFOpCode ring, EMemWriteType type, + const GPRVector& value, unsigned base_addr, + unsigned ncomp, PValue m_index); + + unsigned op() const{return m_ring_op;} + unsigned ncomp() const; + unsigned addr() const {return m_base_address;} + EMemWriteType type() const {return m_type;} + unsigned index_reg() const {return m_index->sel();} + unsigned array_base() const {return m_base_address; } + void replace_values_child(const ValueSet& candidates, PValue new_value) override; + void remap_registers_child(std::vector<rename_reg_pair>& map, + ValueMap& values) override; + void patch_ring(int stream, PValue index); + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + + ECFOpCode m_ring_op; + EMemWriteType m_type; + unsigned m_base_address; + unsigned m_num_comp; + PValue m_index; + +}; + +} + + +#endif // SFN_EXPORTINSTRUCTION_H
\ No newline at end of file diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp new file mode 100644 index 000000000..ec1a48887 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp @@ -0,0 +1,480 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_instruction_fetch.h" + +#include "gallium/drivers/r600/r600_pipe.h" + +namespace r600 { + +/* refactor this to add status create methods for specific tasks */ +FetchInstruction::FetchInstruction(EVFetchInstr op, + EVFetchType type, + GPRVector dst, + PValue src, int offset, + int buffer_id, PValue buffer_offset, + EBufferIndexMode cp_rel, + bool use_const_field): + Instruction(vtx), + m_vc_opcode(op), + m_fetch_type(type), + m_endian_swap(vtx_es_none), + m_src(src), + m_dst(dst), + m_offset(offset), + m_is_mega_fetch(1), + m_mega_fetch_count(16), + m_buffer_id(buffer_id), + m_semantic_id(0), + m_buffer_index_mode(cp_rel), + m_flags(0), + m_uncached(false), + m_indexed(false), + m_array_base(0), + m_array_size(0), + m_elm_size(0), + m_buffer_offset(buffer_offset), + m_dest_swizzle({0,1,2,3}) +{ + if (use_const_field) { + m_flags.set(vtx_use_const_field); + m_data_format = fmt_invalid; + m_num_format = vtx_nf_norm; + } else { + m_flags.set(vtx_format_comp_signed); + m_data_format = fmt_32_32_32_32_float; + m_num_format = vtx_nf_scaled; + } + + add_remappable_src_value(&m_src); + add_remappable_src_value(&m_buffer_offset); + + add_remappable_dst_value(&m_dst); +} + +/* Resource query */ +FetchInstruction::FetchInstruction(EVFetchInstr vc_opcode, + EVFetchType fetch_type, + EVTXDataFormat data_format, + EVFetchNumFormat num_format, + EVFetchEndianSwap endian_swap, + const PValue src, + const GPRVector dst, + uint32_t offset, + bool is_mega_fetch, + uint32_t mega_fetch_count, + uint32_t buffer_id, + uint32_t semantic_id, + + EBufferIndexMode buffer_index_mode, + bool uncached, + bool indexed, + int array_base, + int array_size, + int elm_size, + PValue buffer_offset, + const std::array<int, 4>& dest_swizzle): + Instruction(vtx), + m_vc_opcode(vc_opcode), + m_fetch_type(fetch_type), + m_data_format(data_format), + m_num_format(num_format), + m_endian_swap(endian_swap), + m_src(src), + m_dst(dst), + m_offset(offset), + m_is_mega_fetch(is_mega_fetch), + m_mega_fetch_count(mega_fetch_count), + m_buffer_id(buffer_id), + m_semantic_id(semantic_id), + m_buffer_index_mode(buffer_index_mode), + m_uncached(uncached), + m_indexed(indexed), + m_array_base(array_base), + m_array_size(array_size), + m_elm_size(elm_size), + m_buffer_offset(buffer_offset), + m_dest_swizzle(dest_swizzle) +{ + add_remappable_src_value(&m_src); + add_remappable_dst_value(&m_dst); + add_remappable_src_value(&m_buffer_offset); +} + +FetchInstruction::FetchInstruction(GPRVector dst, + PValue src, + int buffer_id, PValue buffer_offset, + EVTXDataFormat format, + EVFetchNumFormat num_format): + Instruction(vtx), + m_vc_opcode(vc_fetch), + m_fetch_type(no_index_offset), + m_data_format(format), + m_num_format(num_format), + m_endian_swap(vtx_es_none), + m_src(src), + m_dst(dst), + m_offset(0), + m_is_mega_fetch(0), + m_mega_fetch_count(0), + m_buffer_id(buffer_id), + m_semantic_id(0), + m_buffer_index_mode(bim_none), + m_flags(0), + m_uncached(false), + m_indexed(false), + m_array_base(0), + m_array_size(0), + m_elm_size(1), + m_buffer_offset(buffer_offset), + m_dest_swizzle({0,1,2,3}) +{ + m_flags.set(vtx_format_comp_signed); + + add_remappable_src_value(&m_src); + add_remappable_dst_value(&m_dst); + add_remappable_src_value(&m_buffer_offset); +} + + +/* Resource query */ +FetchInstruction::FetchInstruction(GPRVector dst, + PValue src, + int buffer_id, + EBufferIndexMode cp_rel): + Instruction(vtx), + m_vc_opcode(vc_get_buf_resinfo), + m_fetch_type(no_index_offset), + m_data_format(fmt_32_32_32_32), + m_num_format(vtx_nf_norm), + m_endian_swap(vtx_es_none), + m_src(src), + m_dst(dst), + m_offset(0), + m_is_mega_fetch(0), + m_mega_fetch_count(16), + m_buffer_id(buffer_id), + m_semantic_id(0), + m_buffer_index_mode(cp_rel), + m_flags(0), + m_uncached(false), + m_indexed(false), + m_array_base(0), + m_array_size(0), + m_elm_size(0), + m_dest_swizzle({0,1,2,3}) +{ + m_flags.set(vtx_format_comp_signed); + add_remappable_src_value(&m_src); + add_remappable_dst_value(&m_dst); + add_remappable_src_value(&m_buffer_offset); +} + +FetchInstruction::FetchInstruction(GPRVector dst, PValue src, int scratch_size): + Instruction(vtx), + m_vc_opcode(vc_read_scratch), + m_fetch_type(vertex_data), + m_data_format(fmt_32_32_32_32), + m_num_format(vtx_nf_int), + m_endian_swap(vtx_es_none), + m_dst(dst), + m_offset(0), + m_is_mega_fetch(0), + m_mega_fetch_count(16), + m_buffer_id(0), + m_semantic_id(0), + m_buffer_index_mode(bim_none), + m_flags(0), + m_uncached(true), + m_array_base(0), + m_array_size(0), + m_elm_size(3), + m_dest_swizzle({0,1,2,3}) +{ + if (src->type() == Value::literal) { + const auto& lv = static_cast<const LiteralValue&>(*src); + m_array_base = lv.value(); + m_indexed = false; + m_src.reset(new GPRValue(0,0)); + m_array_size = 0; + } else { + m_array_base = 0; + m_src = src; + m_indexed = true; + m_array_size = scratch_size - 1; + } + add_remappable_src_value(&m_src); + add_remappable_dst_value(&m_dst); + add_remappable_src_value(&m_buffer_offset); +} + +void FetchInstruction::replace_values(const ValueSet& candidates, PValue new_value) +{ + if (!m_src) + return; + for (auto c: candidates) { + for (int i = 0; i < 4; ++i) { + if (*c == *m_dst.reg_i(i)) + m_dst.set_reg_i(i, new_value); + } + if (*m_src == *c) + m_src = new_value; + } +} + + +bool FetchInstruction::is_equal_to(const Instruction& lhs) const +{ + auto& l = static_cast<const FetchInstruction&>(lhs); + if (m_src) { + if (!l.m_src) + return false; + if (*m_src != *l.m_src) + return false; + } else { + if (l.m_src) + return false; + } + + return m_vc_opcode == l.m_vc_opcode && + m_fetch_type == l.m_fetch_type && + m_data_format == l.m_data_format && + m_num_format == l.m_num_format && + m_endian_swap == l.m_endian_swap && + m_dst == l.m_dst && + m_offset == l.m_offset && + m_buffer_id == l.m_buffer_id && + m_semantic_id == l.m_semantic_id && + m_buffer_index_mode == l.m_buffer_index_mode && + m_flags == l.m_flags && + m_indexed == l.m_indexed && + m_uncached == l.m_uncached; +} + +void FetchInstruction::set_format(EVTXDataFormat fmt) +{ + m_data_format = fmt; +} + + +void FetchInstruction::set_dest_swizzle(const std::array<int,4>& swz) +{ + m_dest_swizzle = swz; +} + +void FetchInstruction::prelude_append(Instruction *instr) +{ + assert(instr); + m_prelude.push_back(PInstruction(instr)); +} + +const std::vector<PInstruction>& FetchInstruction::prelude() const +{ + return m_prelude; +} + +LoadFromScratch::LoadFromScratch(GPRVector dst, PValue src, int scratch_size): + FetchInstruction(dst, src, scratch_size) +{ +} + +FetchGDSOpResult::FetchGDSOpResult(const GPRVector dst, const PValue src): + FetchInstruction(vc_fetch, + no_index_offset, + fmt_32, + vtx_nf_int, + vtx_es_none, + src, + dst, + 0, + false, + 0xf, + R600_IMAGE_IMMED_RESOURCE_OFFSET, + 0, + bim_none, + false, + false, + 0, + 0, + 0, + PValue(), + {0,7,7,7}) +{ + set_flag(vtx_srf_mode); + set_flag(vtx_vpm); +} + +FetchTCSIOParam::FetchTCSIOParam(GPRVector dst, PValue src, int offset): + FetchInstruction(vc_fetch, + no_index_offset, + fmt_32_32_32_32, + vtx_nf_scaled, + vtx_es_none, + src, + dst, + offset, + false, + 16, + R600_LDS_INFO_CONST_BUFFER, + 0, + bim_none, + false, + false, + 0, + 0, + 0, + PValue(), + {0,1,2,3}) +{ + set_flag(vtx_srf_mode); + set_flag(vtx_format_comp_signed); +} + + +static const char *fmt_descr[64] = { + "INVALID", + "8", + "4_4", + "3_3_2", + "RESERVED_4", + "16", + "16F", + "8_8", + "5_6_5", + "6_5_5", + "1_5_5_5", + "4_4_4_4", + "5_5_5_1", + "32", + "32F", + "16_16", + "16_16F", + "8_24", + "8_24F", + "24_8", + "24_8F", + "10_11_11", + "10_11_11F", + "11_11_10", + "11_11_10F", + "2_10_10_10", + "8_8_8_8", + "10_10_10_2", + "X24_8_32F", + "32_32", + "32_32F", + "16_16_16_16", + "16_16_16_16F", + "RESERVED_33", + "32_32_32_32", + "32_32_32_32F", + "RESERVED_36", + "1", + "1_REVERSED", + "GB_GR", + "BG_RG", + "32_AS_8", + "32_AS_8_8", + "5_9_9_9_SHAREDEXP", + "8_8_8", + "16_16_16", + "16_16_16F", + "32_32_32", + "32_32_32F", + "BC1", + "BC2", + "BC3", + "BC4", + "BC5", + "APC0", + "APC1", + "APC2", + "APC3", + "APC4", + "APC5", + "APC6", + "APC7", + "CTX1", + "RESERVED_63" +}; + + +void FetchInstruction::do_print(std::ostream& os) const +{ + static const std::string num_format_char[] = {"norm", "int", "scaled"}; + static const std::string endian_swap_code[] = { + "noswap", "8in16", "8in32" + }; + static const char buffer_index_mode_char[] = "_01E"; + static const char *flag_string[] = {"WQM", "CF", "signed", "no_zero", + "nostride", "AC", "TC", "VPM"}; + switch (m_vc_opcode) { + case vc_fetch: + os << "Fetch " << m_dst; + break; + case vc_semantic: + os << "Fetch Semantic ID:" << m_semantic_id; + break; + case vc_get_buf_resinfo: + os << "Fetch BufResinfo:" << m_dst; + break; + case vc_read_scratch: + os << "MEM_READ_SCRATCH:" << m_dst; + break; + default: + os << "Fetch ERROR"; + return; + } + + os << ", " << *m_src; + + if (m_offset) + os << "+" << m_offset; + + os << " BUFID:" << m_buffer_id + << " FMT:(" << fmt_descr[m_data_format] + << " " << num_format_char[m_num_format] + << " " << endian_swap_code[m_endian_swap] + << ")"; + if (m_buffer_index_mode > 0) + os << " IndexMode:" << buffer_index_mode_char[m_buffer_index_mode]; + + + if (m_is_mega_fetch) + os << " MFC:" << m_mega_fetch_count; + else + os << " mfc*:" << m_mega_fetch_count; + + if (m_flags.any()) { + os << " Flags:"; + for( int i = 0; i < vtx_unknown; ++i) { + if (m_flags.test(i)) + os << ' ' << flag_string[i]; + } + } +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h new file mode 100644 index 000000000..71a3f69f3 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h @@ -0,0 +1,187 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_INSTRUCTION_FETCH_H +#define SFN_INSTRUCTION_FETCH_H + +#include "sfn_instruction_base.h" + +namespace r600 { + +class FetchInstruction : public Instruction { +public: + + FetchInstruction(EVFetchInstr vc_opcode, + EVFetchType fetch_type, + EVTXDataFormat data_format, + EVFetchNumFormat num_format, + EVFetchEndianSwap endian_swap, + const PValue src, + const GPRVector dst, + uint32_t offset, + bool is_mega_fetch, + uint32_t mega_fetch_count, + uint32_t buffer_id, + uint32_t semantic_id, + + EBufferIndexMode buffer_index_mode, + bool uncached, + bool indexed, + int array_base, + int array_size, + int elm_size, + PValue buffer_offset, + const std::array<int, 4>& dest_swizzle); + + FetchInstruction(EVFetchInstr op, + EVFetchType type, + GPRVector dst, + PValue src, int offset, + int buffer_id, PValue buffer_offset, + EBufferIndexMode cp_rel, + bool use_const_field = false); + + FetchInstruction(GPRVector dst, + PValue src, + int buffer_id, + PValue buffer_offset, + EVTXDataFormat format, + EVFetchNumFormat num_format); + + FetchInstruction(GPRVector dst, + PValue src, + int buffer_id, + EBufferIndexMode cp_rel); + + FetchInstruction(GPRVector dst, PValue src, int scratch_size); + + void replace_values(const ValueSet& candidates, PValue new_value) override; + EVFetchInstr vc_opcode() const { return m_vc_opcode;} + EVFetchType fetch_type() const { return m_fetch_type;} + + EVTXDataFormat data_format() const { return m_data_format;} + EVFetchNumFormat num_format() const { return m_num_format;} + EVFetchEndianSwap endian_swap() const { return m_endian_swap;} + + const Value& src() const { return *m_src;} + const GPRVector& dst() const { return m_dst;} + uint32_t offset() const { return m_offset;} + + bool is_mega_fetchconst() { return m_is_mega_fetch;} + uint32_t mega_fetch_count() const { return m_mega_fetch_count;} + + uint32_t buffer_id() const { return m_buffer_id;} + uint32_t semantic_id() const { return m_semantic_id;} + EBufferIndexMode buffer_index_mode() const{ return m_buffer_index_mode;} + + bool is_signed() const { return m_flags.test(vtx_format_comp_signed);} + bool use_const_fields() const { return m_flags.test(vtx_use_const_field);} + + bool srf_mode_no_zero() const { return m_flags.test(vtx_srf_mode);} + + void set_flag(EVFetchFlagShift flag) {m_flags.set(flag);} + + bool uncached() const {return m_uncached; } + bool indexed() const {return m_indexed; } + int array_base()const {return m_array_base; } + int array_size() const {return m_array_size; } + int elm_size() const {return m_elm_size; } + + void set_buffer_offset(PValue buffer_offset) { + m_buffer_offset = buffer_offset; + add_remappable_src_value(&m_buffer_offset); + } + PValue buffer_offset() const { return m_buffer_offset; } + + void set_dest_swizzle(const std::array<int,4>& swz); + void set_format(EVTXDataFormat fmt); + + int swz(int idx) const { return m_dest_swizzle[idx];} + + bool use_tc() const {return m_flags.test(vtx_use_tc);} + + bool use_vpm() const {return m_flags.test(vtx_vpm);} + + void prelude_append(Instruction *instr); + + const std::vector<PInstruction>& prelude() const; + + bool has_prelude() const {return !m_prelude.empty();} + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + + EVFetchInstr m_vc_opcode; + EVFetchType m_fetch_type; + + EVTXDataFormat m_data_format; + EVFetchNumFormat m_num_format; + EVFetchEndianSwap m_endian_swap; + + PValue m_src; + GPRVector m_dst; + uint32_t m_offset; + + bool m_is_mega_fetch; + uint32_t m_mega_fetch_count; + + uint32_t m_buffer_id; + uint32_t m_semantic_id; + + EBufferIndexMode m_buffer_index_mode; + std::bitset<16> m_flags; + bool m_uncached; + bool m_indexed; + int m_array_base; + int m_array_size; + int m_elm_size; + PValue m_buffer_offset; + std::array<int, 4> m_dest_swizzle; + std::vector<PInstruction> m_prelude; +}; + +class LoadFromScratch: public FetchInstruction { +public: + LoadFromScratch(GPRVector dst, PValue src, int scratch_size); +}; + +class FetchGDSOpResult : public FetchInstruction { +public: + FetchGDSOpResult(const GPRVector dst, const PValue src); +}; + +class FetchTCSIOParam : public FetchInstruction { +public: + FetchTCSIOParam(GPRVector dst, PValue src, int offset); +}; + +} + +#endif // SFN_INSTRUCTION_FETCH_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp new file mode 100644 index 000000000..095cd40d6 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp @@ -0,0 +1,180 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_instruction_gds.h" +#include "sfn_liverange.h" + +namespace r600 { + +GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value, + const PValue& value2, const PValue& uav_id, int uav_base): + Instruction(gds), + m_op(op), + m_src(value), + m_src2(value2), + m_dest(dest), + m_dest_swizzle({PIPE_SWIZZLE_X,7,7,7}), + m_src_swizzle({PIPE_SWIZZLE_0, PIPE_SWIZZLE_X, PIPE_SWIZZLE_0}), + m_buffer_index_mode(bim_none), + m_uav_id(uav_id), + m_uav_base(uav_base), + m_flags(0) +{ + add_remappable_src_value(&m_src); + add_remappable_src_value(&m_src2); + add_remappable_src_value(&m_uav_id); + add_remappable_dst_value(&m_dest); + m_dest_swizzle[0] = m_dest.chan_i(0); +} + +GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value, + const PValue& uav_id, int uav_base): + GDSInstr(op, dest, value, PValue(), uav_id, uav_base) +{ + assert(value); + m_src_swizzle[1] = value->chan(); + m_src_swizzle[2] = PIPE_SWIZZLE_0; +} + +GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest, + const PValue& uav_id, int uav_base): + GDSInstr(op, dest, PValue(), PValue(), uav_id, uav_base) +{ + m_src_swizzle[1] = PIPE_SWIZZLE_0; +} + +bool GDSInstr::is_equal_to(UNUSED const Instruction& lhs) const +{ + return false; +} + +void GDSInstr::do_print(std::ostream& os) const +{ + const char *swz = "xyzw01?_"; + os << lds_ops.at(m_op).name << " R" << m_dest.sel() << "."; + for (int i = 0; i < 4; ++i) { + os << swz[m_dest_swizzle[i]]; + } + if (m_src) + os << " " << *m_src; + + os << " UAV:" << *m_uav_id; +} + +RatInstruction::RatInstruction(ECFOpCode cf_opcode, ERatOp rat_op, + const GPRVector& data, const GPRVector& index, + int rat_id, const PValue& rat_id_offset, + int burst_count, int comp_mask, int element_size, bool ack): + Instruction(rat), + m_cf_opcode(cf_opcode), + m_rat_op(rat_op), + m_data(data), + m_index(index), + m_rat_id(rat_id), + m_rat_id_offset(rat_id_offset), + m_burst_count(burst_count), + m_comp_mask(comp_mask), + m_element_size(element_size), + m_need_ack(ack) +{ + add_remappable_src_value(&m_data); + add_remappable_src_value(&m_rat_id_offset); + add_remappable_src_value(&m_index); +} + +bool RatInstruction::is_equal_to(UNUSED const Instruction& lhs) const +{ + return false; +} + +void RatInstruction::do_print(std::ostream& os) const +{ + os << "MEM_RAT RAT(" << m_rat_id; + if (m_rat_id_offset) + os << "+" << *m_rat_id_offset; + os << ") @" << m_index; + os << " OP:" << m_rat_op << " " << m_data; + os << " BC:" << m_burst_count + << " MASK:" << m_comp_mask + << " ES:" << m_element_size; + if (m_need_ack) + os << " ACK"; +} + +RatInstruction::ERatOp RatInstruction::opcode(nir_intrinsic_op opcode) +{ + switch (opcode) { + case nir_intrinsic_ssbo_atomic_add: + return ADD_RTN; + case nir_intrinsic_ssbo_atomic_and: + return AND_RTN; + case nir_intrinsic_ssbo_atomic_exchange: + return XCHG_RTN; + case nir_intrinsic_ssbo_atomic_umax: + return MAX_UINT_RTN; + case nir_intrinsic_ssbo_atomic_umin: + return MIN_UINT_RTN; + case nir_intrinsic_ssbo_atomic_imax: + return MAX_INT_RTN; + case nir_intrinsic_ssbo_atomic_imin: + return MIN_INT_RTN; + case nir_intrinsic_ssbo_atomic_xor: + return XOR_RTN; + default: + return UNSUPPORTED; + } +} + +GDSStoreTessFactor::GDSStoreTessFactor(GPRVector& value): + Instruction(tf_write), + m_value(value) +{ + add_remappable_src_value(&m_value); +} + +void GDSStoreTessFactor::replace_values(const ValueSet& candidates, PValue new_value) +{ + for (auto& c: candidates) { + for (int i = 0; i < 4; ++i) { + if (*c == *m_value[i]) + m_value[i] = new_value; + } + } +} + + +bool GDSStoreTessFactor::is_equal_to(const Instruction& lhs) const +{ + auto& other = static_cast<const GDSStoreTessFactor&>(lhs); + return m_value == other.m_value; +} + +void GDSStoreTessFactor::do_print(std::ostream& os) const +{ + os << "TF_WRITE " << m_value; +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h new file mode 100644 index 000000000..6f8e0f200 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h @@ -0,0 +1,225 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_GDSINSTR_H +#define SFN_GDSINSTR_H + +#include "sfn_instruction_base.h" + +#include <bitset> + +namespace r600 { + +class GDSInstr : public Instruction +{ +public: + GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value, + const PValue &uav_id, int uav_base); + GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value, + const PValue& value2, const PValue &uav_id, int uav_base); + GDSInstr(ESDOp op, const GPRVector& dest, const PValue &uav_id, int uav_base); + + ESDOp op() const {return m_op;} + + int src_sel() const { + if (!m_src) + return 0; + + assert(m_src->type() == Value::gpr); + return m_src->sel(); + } + + int src2_chan() const { + if (!m_src2) + return 0; + + assert(m_src->type() == Value::gpr); + return m_src->chan(); + } + + int src_swizzle(int idx) const {assert(idx < 3); return m_src_swizzle[idx];} + + int dest_sel() const { + return m_dest.sel(); + } + + int dest_swizzle(int i) const { + if (i < 4) + return m_dest_swizzle[i]; + return 7; + } + + void set_dest_swizzle(const std::array<int,4>& swz) { + m_dest_swizzle = swz; + } + + PValue uav_id() const {return m_uav_id;} + int uav_base() const {return m_uav_base;} + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + + ESDOp m_op; + + PValue m_src; + PValue m_src2; + GPRVector m_dest; + std::array <int, 4> m_dest_swizzle; + std::array <int, 3> m_src_swizzle; + + EBufferIndexMode m_buffer_index_mode; + PValue m_uav_id; + int m_uav_base; + std::bitset<8> m_flags; + +}; + +class RatInstruction : public Instruction { + +public: + enum ERatOp { + NOP, + STORE_TYPED, + STORE_RAW, + STORE_RAW_FDENORM, + CMPXCHG_INT, + CMPXCHG_FLT, + CMPXCHG_FDENORM, + ADD, + SUB, + RSUB, + MIN_INT, + MIN_UINT, + MAX_INT, + MAX_UINT, + AND, + OR, + XOR, + MSKOR, + INC_UINT, + DEC_UINT, + NOP_RTN = 32, + XCHG_RTN = 34, + XCHG_FDENORM_RTN, + CMPXCHG_INT_RTN, + CMPXCHG_FLT_RTN, + CMPXCHG_FDENORM_RTN, + ADD_RTN, + SUB_RTN, + RSUB_RTN, + MIN_INT_RTN, + MIN_UINT_RTN, + MAX_INT_RTN, + MAX_UINT_RTN, + AND_RTN, + OR_RTN, + XOR_RTN, + MSKOR_RTN, + UINT_RTN, + UNSUPPORTED + }; + + RatInstruction(ECFOpCode cf_opcode, ERatOp rat_op, + const GPRVector& data, const GPRVector& index, + int rat_id, const PValue& rat_id_offset, + int burst_count, int comp_mask, int element_size, + bool ack); + + PValue rat_id_offset() const { return m_rat_id_offset;} + int rat_id() const { return m_rat_id;} + + ERatOp rat_op() const {return m_rat_op;} + + int data_gpr() const {return m_data.sel();} + int index_gpr() const {return m_index.sel();} + int elm_size() const {return m_element_size;} + + int comp_mask() const {return m_comp_mask;} + + bool need_ack() const {return m_need_ack;} + int burst_count() const {return m_burst_count;} + + static ERatOp opcode(nir_intrinsic_op opcode); + + int data_swz(int chan) const {return m_data.chan_i(chan);} + + ECFOpCode cf_opcode() const { return m_cf_opcode;} + + void set_ack() {m_need_ack = true; } + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + + +private: + + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + + ECFOpCode m_cf_opcode; + ERatOp m_rat_op; + + GPRVector m_data; + GPRVector m_index; + + int m_rat_id; + PValue m_rat_id_offset; + int m_burst_count; + int m_comp_mask; + int m_element_size; + + std::bitset<8> m_flags; + + bool m_need_ack; + +}; + +class GDSStoreTessFactor : public Instruction { +public: + GDSStoreTessFactor(GPRVector& value); + int sel() const {return m_value.sel();} + int chan(int i ) const {return m_value.chan_i(i);} + + void replace_values(const ValueSet& candiates, PValue new_value) override; + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + + GPRVector m_value; +}; + +} + +#endif // SFN_GDSINSTR_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp new file mode 100644 index 000000000..b77461abc --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp @@ -0,0 +1,151 @@ +#include "sfn_instruction_lds.h" + +namespace r600 { + +void LDSReadInstruction::do_print(std::ostream& os) const +{ + os << "LDS Read ["; + for (auto& v : m_dest_value) + os << *v << " "; + os << "], "; + for (auto& a : m_address) + os << *a << " "; +} + +LDSReadInstruction::LDSReadInstruction(std::vector<PValue>& address, std::vector<PValue>& value): + Instruction(lds_read), + m_address(address), + m_dest_value(value) +{ + assert(address.size() == value.size()); + + for (unsigned i = 0; i < address.size(); ++i) { + add_remappable_src_value(&m_address[i]); + add_remappable_dst_value(&m_dest_value[i]); + } +} + +void LDSReadInstruction::replace_values(const ValueSet& candidates, PValue new_value) +{ + for (auto& c : candidates) { + for (auto& d: m_dest_value) { + if (*c == *d) + d = new_value; + } + + for (auto& a: m_address) { + if (*c == *a) + a = new_value; + } + } +} + +bool LDSReadInstruction::is_equal_to(const Instruction& lhs) const +{ + auto& other = static_cast<const LDSReadInstruction&>(lhs); + return m_address == other.m_address && + m_dest_value == other.m_dest_value; +} + +LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op): + Instruction(lds_atomic), + m_address(address), + m_dest_value(dest), + m_src0_value(src0), + m_src1_value(src1), + m_opcode(op) +{ + add_remappable_src_value(&m_src0_value); + add_remappable_src_value(&m_src1_value); + add_remappable_src_value(&m_address); + add_remappable_dst_value(&m_dest_value); +} + +LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op): + LDSAtomicInstruction(dest, src0, PValue(), address, op) +{ + +} + + +void LDSAtomicInstruction::do_print(std::ostream& os) const +{ + os << "LDS " << m_opcode << " " << *m_dest_value << " "; + os << "[" << *m_address << "] " << *m_src0_value; + if (m_src1_value) + os << ", " << *m_src1_value; +} + +bool LDSAtomicInstruction::is_equal_to(const Instruction& lhs) const +{ + auto& other = static_cast<const LDSAtomicInstruction&>(lhs); + + return m_opcode == other.m_opcode && + *m_dest_value == *other.m_dest_value && + *m_src0_value == *other.m_src0_value && + *m_address == *other.m_address && + ((m_src1_value && other.m_src1_value && (*m_src1_value == *other.m_src1_value)) || + (!m_src1_value && !other.m_src1_value)); +} + +LDSWriteInstruction::LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0): + LDSWriteInstruction::LDSWriteInstruction(address, idx_offset, value0, PValue()) + +{ +} + +LDSWriteInstruction::LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0, PValue value1): + Instruction(lds_write), + m_address(address), + m_value0(value0), + m_value1(value1), + m_idx_offset(idx_offset) +{ + add_remappable_src_value(&m_address); + add_remappable_src_value(&m_value0); + if (m_value1) + add_remappable_src_value(&m_value1); +} + + +void LDSWriteInstruction::do_print(std::ostream& os) const +{ + os << "LDS Write" << num_components() + << " " << address() << ", " << value0(); + if (num_components() > 1) + os << ", " << value1(); +} + +void LDSWriteInstruction::replace_values(const ValueSet& candidates, PValue new_value) +{ + for (auto c: candidates) { + if (*c == *m_address) + m_address = new_value; + + if (*c == *m_value0) + m_value0 = new_value; + + if (*c == *m_value1) + m_value1 = new_value; + } +} + +bool LDSWriteInstruction::is_equal_to(const Instruction& lhs) const +{ + auto& other = static_cast<const LDSWriteInstruction&>(lhs); + + if (m_value1) { + if (!other.m_value1) + return false; + if (*m_value1 != *other.m_value1) + return false; + } else { + if (other.m_value1) + return false; + } + + return (m_value0 != other.m_value0 && + *m_address != *other.m_address); +} + +} // namespace r600 diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h new file mode 100644 index 000000000..96439a7c3 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h @@ -0,0 +1,82 @@ +#ifndef LDSINSTRUCTION_H +#define LDSINSTRUCTION_H + +#include "sfn_instruction_base.h" + +namespace r600 { + +class LDSReadInstruction : public Instruction { +public: + LDSReadInstruction(std::vector<PValue>& value, std::vector<PValue>& address); + void replace_values(const ValueSet& candidates, PValue new_value) override; + + unsigned num_values() const { return m_dest_value.size();} + const Value& address(unsigned i) const { return *m_address[i];} + const Value& dest(unsigned i) const { return *m_dest_value[i];} + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + void do_print(std::ostream& os) const override; + bool is_equal_to(const Instruction& lhs) const override; + + std::vector<PValue> m_address; + std::vector<PValue> m_dest_value; +}; + +class LDSAtomicInstruction : public Instruction { +public: + LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op); + LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op); + + const Value& address() const { return *m_address;} + const Value& dest() const { return *m_dest_value;} + const Value& src0() const { return *m_src0_value;} + const PValue& src1() const { return m_src1_value;} + unsigned op() const {return m_opcode;} + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + void do_print(std::ostream& os) const override; + bool is_equal_to(const Instruction& lhs) const override; + + PValue m_address; + PValue m_dest_value; + PValue m_src0_value; + PValue m_src1_value; + unsigned m_opcode; +}; + +class LDSWriteInstruction : public Instruction { +public: + LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0); + LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0, PValue value1); + + const Value& address() const {return *m_address;}; + const Value& value0() const { return *m_value0;} + const Value& value1() const { return *m_value1;} + unsigned num_components() const { return m_value1 ? 2 : 1;} + unsigned idx_offset() const {return m_idx_offset;}; + + void replace_values(const ValueSet& candidates, PValue new_value) override; + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + void do_print(std::ostream& os) const override; + bool is_equal_to(const Instruction& lhs) const override; + + PValue m_address; + PValue m_value0; + PValue m_value1; + unsigned m_idx_offset; + +}; + +} + +#endif // LDSINSTRUCTION_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp new file mode 100644 index 000000000..1c1a98c40 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp @@ -0,0 +1,68 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_instruction_misc.h" + +namespace r600 { +EmitVertex::EmitVertex(int stream, bool cut): + Instruction (emit_vtx), + m_stream(stream), + m_cut(cut) +{ + +} + +bool EmitVertex::is_equal_to(const Instruction& lhs) const +{ + auto& oth = static_cast<const EmitVertex&>(lhs); + return oth.m_stream == m_stream && + oth.m_cut == m_cut; +} + +void EmitVertex::do_print(std::ostream& os) const +{ + os << (m_cut ? "EMIT_CUT_VERTEX @" : "EMIT_VERTEX @") << m_stream; +} + +WaitAck::WaitAck(int nack): + Instruction (wait_ack), + m_nack(nack) +{ + +} + +bool WaitAck::is_equal_to(const Instruction& lhs) const +{ + const auto& l = static_cast<const WaitAck&>(lhs); + return m_nack == l.m_nack; +} + +void WaitAck::do_print(std::ostream& os) const +{ + os << "WAIT_ACK @" << m_nack; +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.h new file mode 100644 index 000000000..d322b4aa8 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.h @@ -0,0 +1,69 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_INSTRUCTION_MISC_H +#define SFN_INSTRUCTION_MISC_H + +#include "sfn_instruction_base.h" + +namespace r600 { + +class EmitVertex : public Instruction { +public: + EmitVertex(int stream, bool cut); + ECFOpCode op() const {return m_cut ? cf_cut_vertex: cf_emit_vertex;} + int stream() const { return m_stream;} + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + int m_stream; + bool m_cut; +}; + +class WaitAck : public Instruction { +public: + WaitAck(int nack); + ECFOpCode op() const {return cf_wait_ack;} + int n_ack() const {return m_nack;} + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + int m_nack; +}; + +} + +#endif // SFN_INSTRUCTION_MISC_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp new file mode 100644 index 000000000..8fc5469f3 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp @@ -0,0 +1,414 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_instruction_tex.h" +#include "nir_builder.h" +#include "nir_builtin_builder.h" + +namespace r600 { + +TexInstruction::TexInstruction(Opcode op, const GPRVector &dest, const GPRVector &src, + unsigned sid, unsigned rid, PValue sampler_offset): + Instruction(tex), + m_opcode(op), + m_dst(dest), + m_src(src), + m_sampler_id(sid), + m_resource_id(rid), + m_flags(0), + m_inst_mode(0), + m_dest_swizzle{0,1,2,3}, + m_sampler_offset(sampler_offset) + +{ + memset(m_offset, 0, sizeof (m_offset)); + + add_remappable_src_value(&m_src); + add_remappable_src_value(&m_sampler_offset); + add_remappable_dst_value(&m_dst); +} + +void TexInstruction::set_gather_comp(int cmp) +{ + m_inst_mode = cmp; +} + +void TexInstruction::replace_values(const ValueSet& candidates, PValue new_value) +{ + // I wonder whether we can actually end up here ... + for (auto c: candidates) { + if (*c == *m_src.reg_i(c->chan())) + m_src.set_reg_i(c->chan(), new_value); + if (*c == *m_dst.reg_i(c->chan())) + m_dst.set_reg_i(c->chan(), new_value); + } +} + +void TexInstruction::set_offset(unsigned index, int32_t val) +{ + assert(index < 3); + m_offset[index] = val; +} + +int TexInstruction::get_offset(unsigned index) const +{ + assert(index < 3); + return (m_offset[index] << 1 & 0x1f); +} + +bool TexInstruction::is_equal_to(const Instruction& rhs) const +{ + assert(rhs.type() == tex); + const auto& r = static_cast<const TexInstruction&>(rhs); + return (m_opcode == r.m_opcode && + m_dst == r.m_dst && + m_src == r.m_src && + m_sampler_id == r.m_sampler_id && + m_resource_id == r.m_resource_id); +} + +void TexInstruction::do_print(std::ostream& os) const +{ + const char *map_swz = "xyzw01?_"; + os << opname(m_opcode) << " R" << m_dst.sel() << "."; + for (int i = 0; i < 4; ++i) + os << map_swz[m_dest_swizzle[i]]; + + os << " " << m_src + << " RESID:" << m_resource_id << " SAMPLER:" + << m_sampler_id; +} + +const char *TexInstruction::opname(Opcode op) +{ + switch (op) { + case ld: return "LD"; + case get_resinfo: return "GET_TEXTURE_RESINFO"; + case get_nsampled: return "GET_NUMBER_OF_SAMPLES"; + case get_tex_lod: return "GET_LOD"; + case get_gradient_h: return "GET_GRADIENTS_H"; + case get_gradient_v: return "GET_GRADIENTS_V"; + case set_offsets: return "SET_TEXTURE_OFFSETS"; + case keep_gradients: return "KEEP_GRADIENTS"; + case set_gradient_h: return "SET_GRADIENTS_H"; + case set_gradient_v: return "SET_GRADIENTS_V"; + case sample: return "SAMPLE"; + case sample_l: return "SAMPLE_L"; + case sample_lb: return "SAMPLE_LB"; + case sample_lz: return "SAMPLE_LZ"; + case sample_g: return "SAMPLE_G"; + case sample_g_lb: return "SAMPLE_G_L"; + case gather4: return "GATHER4"; + case gather4_o: return "GATHER4_O"; + case sample_c: return "SAMPLE_C"; + case sample_c_l: return "SAMPLE_C_L"; + case sample_c_lb: return "SAMPLE_C_LB"; + case sample_c_lz: return "SAMPLE_C_LZ"; + case sample_c_g: return "SAMPLE_C_G"; + case sample_c_g_lb: return "SAMPLE_C_G_L"; + case gather4_c: return "GATHER4_C"; + case gather4_c_o: return "OP_GATHER4_C_O"; + } + return "ERROR"; +} + + + +static bool lower_coord_shift_normalized(nir_builder *b, nir_tex_instr *tex) +{ + b->cursor = nir_before_instr(&tex->instr); + + nir_ssa_def * size = nir_i2f32(b, nir_get_texture_size(b, tex)); + nir_ssa_def *scale = nir_frcp(b, size); + + int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); + nir_ssa_def *corr = nullptr; + if (unlikely(tex->array_is_lowered_cube)) { + auto corr2 = nir_fadd(b, nir_channels(b, tex->src[coord_index].src.ssa, 3), + nir_fmul(b, nir_imm_float(b, -0.5f), scale)); + corr = nir_vec3(b, nir_channel(b, corr2, 0), nir_channel(b, corr2, 1), + nir_channel( + b, tex->src[coord_index].src.ssa, 2)); + } else { + corr = nir_fadd(b, + nir_fmul(b, nir_imm_float(b, -0.5f), scale), + tex->src[coord_index].src.ssa); + } + + nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src, + nir_src_for_ssa(corr)); + return true; +} + +static bool lower_coord_shift_unnormalized(nir_builder *b, nir_tex_instr *tex) +{ + b->cursor = nir_before_instr(&tex->instr); + int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); + nir_ssa_def *corr = nullptr; + if (unlikely(tex->array_is_lowered_cube)) { + auto corr2 = nir_fadd(b, nir_channels(b, tex->src[coord_index].src.ssa, 3), + nir_imm_float(b, -0.5f)); + corr = nir_vec3(b, nir_channel(b, corr2, 0), nir_channel(b, corr2, 1), + nir_channel(b, tex->src[coord_index].src.ssa, 2)); + } else { + corr = nir_fadd(b, tex->src[coord_index].src.ssa, + nir_imm_float(b, -0.5f)); + } + nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src, + nir_src_for_ssa(corr)); + return true; +} + +static bool +r600_nir_lower_int_tg4_impl(nir_function_impl *impl) +{ + nir_builder b; + nir_builder_init(&b, impl); + + bool progress = false; + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type == nir_instr_type_tex) { + nir_tex_instr *tex = nir_instr_as_tex(instr); + if (tex->op == nir_texop_tg4 && + tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) { + if (nir_alu_type_get_base_type(tex->dest_type) != nir_type_float) { + if (tex->sampler_dim != GLSL_SAMPLER_DIM_RECT) + lower_coord_shift_normalized(&b, tex); + else + lower_coord_shift_unnormalized(&b, tex); + progress = true; + } + } + } + } + } + return progress; +} + +/* + * This lowering pass works around a bug in r600 when doing TG4 from + * integral valued samplers. + + * Gather4 should follow the same rules as bilinear filtering, but the hardware + * incorrectly forces nearest filtering if the texture format is integer. + * The only effect it has on Gather4, which always returns 4 texels for + * bilinear filtering, is that the final coordinates are off by 0.5 of + * the texel size. +*/ + +bool r600_nir_lower_int_tg4(nir_shader *shader) +{ + bool progress = false; + bool need_lowering = false; + + nir_foreach_uniform_variable(var, shader) { + if (var->type->is_sampler()) { + if (glsl_base_type_is_integer(var->type->sampled_type)) { + need_lowering = true; + } + } + } + + if (need_lowering) { + nir_foreach_function(function, shader) { + if (function->impl && r600_nir_lower_int_tg4_impl(function->impl)) + progress = true; + } + } + + return progress; +} + +static +bool lower_txl_txf_array_or_cube(nir_builder *b, nir_tex_instr *tex) +{ + assert(tex->op == nir_texop_txb || tex->op == nir_texop_txl); + assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0); + assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0); + + b->cursor = nir_before_instr(&tex->instr); + + int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod); + int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias); + int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod); + assert (lod_idx >= 0 || bias_idx >= 0); + + nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex)); + nir_ssa_def *lod = (lod_idx >= 0) ? + nir_ssa_for_src(b, tex->src[lod_idx].src, 1) : + nir_get_texture_lod(b, tex); + + if (bias_idx >= 0) + lod = nir_fadd(b, lod,nir_ssa_for_src(b, tex->src[bias_idx].src, 1)); + + if (min_lod_idx >= 0) + lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1)); + + /* max lod? */ + + nir_ssa_def *lambda_exp = nir_fexp2(b, lod); + nir_ssa_def *scale = NULL; + + if (tex->is_array) { + int cmp_mask = (1 << (size->num_components - 1)) - 1; + scale = nir_frcp(b, nir_channels(b, size, + (nir_component_mask_t)cmp_mask)); + } else if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { + unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0}; + scale = nir_frcp(b, nir_channels(b, size, 1)); + scale = nir_swizzle(b, scale, swizzle, 3); + } + + nir_ssa_def *grad = nir_fmul(b, lambda_exp, scale); + + if (lod_idx >= 0) + nir_tex_instr_remove_src(tex, lod_idx); + if (bias_idx >= 0) + nir_tex_instr_remove_src(tex, bias_idx); + if (min_lod_idx >= 0) + nir_tex_instr_remove_src(tex, min_lod_idx); + nir_tex_instr_add_src(tex, nir_tex_src_ddx, nir_src_for_ssa(grad)); + nir_tex_instr_add_src(tex, nir_tex_src_ddy, nir_src_for_ssa(grad)); + + tex->op = nir_texop_txd; + return true; +} + + +static bool +r600_nir_lower_txl_txf_array_or_cube_impl(nir_function_impl *impl) +{ + nir_builder b; + nir_builder_init(&b, impl); + + bool progress = false; + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type == nir_instr_type_tex) { + nir_tex_instr *tex = nir_instr_as_tex(instr); + + if (tex->is_shadow && + (tex->op == nir_texop_txl || tex->op == nir_texop_txb) && + (tex->is_array || tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)) + progress |= lower_txl_txf_array_or_cube(&b, tex); + } + } + } + return progress; +} + +bool +r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader) +{ + bool progress = false; + nir_foreach_function(function, shader) { + if (function->impl && r600_nir_lower_txl_txf_array_or_cube_impl(function->impl)) + progress = true; + } + return progress; +} + +static bool +r600_nir_lower_cube_to_2darray_filer(const nir_instr *instr, const void *_options) +{ + if (instr->type != nir_instr_type_tex) + return false; + + auto tex = nir_instr_as_tex(instr); + if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) + return false; + + switch (tex->op) { + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_txf: + case nir_texop_txl: + case nir_texop_lod: + case nir_texop_tg4: + case nir_texop_txd: + return true; + default: + return false; + } +} + +static nir_ssa_def * +r600_nir_lower_cube_to_2darray_impl(nir_builder *b, nir_instr *instr, void *_options) +{ + b->cursor = nir_before_instr(instr); + + auto tex = nir_instr_as_tex(instr); + int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord); + assert(coord_idx >= 0); + + auto cubed = nir_cube_r600(b, nir_channels(b, tex->src[coord_idx].src.ssa, 0x7)); + auto xy = nir_fmad(b, + nir_vec2(b, nir_channel(b, cubed, 1), nir_channel(b, cubed, 0)), + nir_frcp(b, nir_fabs(b, nir_channel(b, cubed, 2))), + nir_imm_float(b, 1.5)); + + nir_ssa_def *z = nir_channel(b, cubed, 3); + if (tex->is_array) { + auto slice = nir_fround_even(b, nir_channel(b, tex->src[coord_idx].src.ssa, 3)); + z = nir_fmad(b, nir_fmax(b, slice, nir_imm_float(b, 0.0)), nir_imm_float(b, 8.0), + z); + } + + if (tex->op == nir_texop_txd) { + int ddx_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddx); + auto zero_dot_5 = nir_imm_float(b, 0.5); + nir_instr_rewrite_src(&tex->instr, &tex->src[ddx_idx].src, + nir_src_for_ssa(nir_fmul(b, nir_ssa_for_src(b, tex->src[ddx_idx].src, 3), zero_dot_5))); + + int ddy_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddy); + nir_instr_rewrite_src(&tex->instr, &tex->src[ddy_idx].src, + nir_src_for_ssa(nir_fmul(b, nir_ssa_for_src(b, tex->src[ddy_idx].src, 3), zero_dot_5))); + } + + auto new_coord = nir_vec3(b, nir_channel(b, xy, 0), nir_channel(b, xy, 1), z); + nir_instr_rewrite_src(&tex->instr, &tex->src[coord_idx].src, + nir_src_for_ssa(new_coord)); + tex->sampler_dim = GLSL_SAMPLER_DIM_2D; + tex->is_array = true; + tex->array_is_lowered_cube = true; + + tex->coord_components = 3; + + return NIR_LOWER_INSTR_PROGRESS; +} + +bool +r600_nir_lower_cube_to_2darray(nir_shader *shader) +{ + return nir_shader_lower_instructions(shader, + r600_nir_lower_cube_to_2darray_filer, + r600_nir_lower_cube_to_2darray_impl, nullptr); +} + + + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h new file mode 100644 index 000000000..2fe7cbad7 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h @@ -0,0 +1,143 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef INSTRUCTION_TEX_H +#define INSTRUCTION_TEX_H + +#include "sfn_instruction_base.h" + +namespace r600 { + +class TexInstruction : public Instruction { +public: + enum Opcode { + ld = FETCH_OP_LD, + get_resinfo = FETCH_OP_GET_TEXTURE_RESINFO, + get_nsampled = FETCH_OP_GET_NUMBER_OF_SAMPLES, + get_tex_lod = FETCH_OP_GET_LOD, + get_gradient_h = FETCH_OP_GET_GRADIENTS_H, + get_gradient_v = FETCH_OP_GET_GRADIENTS_V, + set_offsets = FETCH_OP_SET_TEXTURE_OFFSETS, + keep_gradients = FETCH_OP_KEEP_GRADIENTS, + set_gradient_h = FETCH_OP_SET_GRADIENTS_H, + set_gradient_v = FETCH_OP_SET_GRADIENTS_V, + sample = FETCH_OP_SAMPLE, + sample_l = FETCH_OP_SAMPLE_L, + sample_lb = FETCH_OP_SAMPLE_LB, + sample_lz = FETCH_OP_SAMPLE_LZ, + sample_g = FETCH_OP_SAMPLE_G, + sample_g_lb = FETCH_OP_SAMPLE_G_L, + gather4 = FETCH_OP_GATHER4, + gather4_o = FETCH_OP_GATHER4_O, + + sample_c = FETCH_OP_SAMPLE_C, + sample_c_l = FETCH_OP_SAMPLE_C_L, + sample_c_lb = FETCH_OP_SAMPLE_C_LB, + sample_c_lz = FETCH_OP_SAMPLE_C_LZ, + sample_c_g = FETCH_OP_SAMPLE_C_G, + sample_c_g_lb = FETCH_OP_SAMPLE_C_G_L, + gather4_c = FETCH_OP_GATHER4_C, + gather4_c_o = FETCH_OP_GATHER4_C_O, + + }; + + enum Flags { + x_unnormalized, + y_unnormalized, + z_unnormalized, + w_unnormalized, + grad_fine + }; + + TexInstruction(Opcode op, const GPRVector& dest, const GPRVector& src, unsigned sid, + unsigned rid, PValue sampler_offset); + + const GPRVector& src() const {return m_src;} + const GPRVector& dst() const {return m_dst;} + unsigned opcode() const {return m_opcode;} + unsigned sampler_id() const {return m_sampler_id;} + unsigned resource_id() const {return m_resource_id;} + + void replace_values(const ValueSet& candidates, PValue new_value) override; + + void set_offset(unsigned index, int32_t val); + int get_offset(unsigned index) const; + + void set_inst_mode(int inst_mode) { m_inst_mode = inst_mode;} + + int inst_mode() const { return m_inst_mode;} + + void set_flag(Flags flag) { + m_flags.set(flag); + } + + PValue sampler_offset() const { + return m_sampler_offset; + } + + bool has_flag(Flags flag) const { + return m_flags.test(flag); + } + + int dest_swizzle(int i) const { + assert(i < 4); + return m_dest_swizzle[i]; + } + + void set_dest_swizzle(const std::array<int,4>& swz) { + m_dest_swizzle = swz; + } + + void set_gather_comp(int cmp); + + bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);} + bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);} + +private: + bool is_equal_to(const Instruction& lhs) const override; + void do_print(std::ostream& os) const override; + + static const char *opname(Opcode code); + + Opcode m_opcode; + GPRVector m_dst; + GPRVector m_src; + unsigned m_sampler_id; + unsigned m_resource_id; + std::bitset<8> m_flags; + int m_offset[3]; + int m_inst_mode; + std::array<int,4> m_dest_swizzle; + PValue m_sampler_offset; +}; + +bool r600_nir_lower_int_tg4(nir_shader *nir); +bool r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader); +bool r600_nir_lower_cube_to_2darray(nir_shader *shader); + +} + +#endif // INSTRUCTION_TEX_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp new file mode 100644 index 000000000..e47a46b88 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp @@ -0,0 +1,1450 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_ir_to_assembly.h" +#include "sfn_conditionaljumptracker.h" +#include "sfn_callstack.h" +#include "sfn_instruction_gds.h" +#include "sfn_instruction_misc.h" +#include "sfn_instruction_fetch.h" +#include "sfn_instruction_lds.h" + +#include "../r600_shader.h" +#include "../r600_sq.h" + +namespace r600 { + +using std::vector; + + + +struct AssemblyFromShaderLegacyImpl : public ConstInstructionVisitor { + + AssemblyFromShaderLegacyImpl(r600_shader *sh, r600_shader_key *key); + + + bool emit(const Instruction::Pointer i); + void reset_addr_register() {m_last_addr.reset();} + +public: + bool visit(const AluInstruction& i) override; + bool visit(const ExportInstruction& i) override; + bool visit(const TexInstruction& i) override; + bool visit(const FetchInstruction& i) override; + bool visit(const IfInstruction& i) override; + bool visit(const ElseInstruction& i) override; + bool visit(const IfElseEndInstruction& i) override; + bool visit(const LoopBeginInstruction& i) override; + bool visit(const LoopEndInstruction& i) override; + bool visit(const LoopBreakInstruction& i) override; + bool visit(const LoopContInstruction& i) override; + bool visit(const StreamOutIntruction& i) override; + bool visit(const MemRingOutIntruction& i) override; + bool visit(const EmitVertex& i) override; + bool visit(const WaitAck& i) override; + bool visit(const WriteScratchInstruction& i) override; + bool visit(const GDSInstr& i) override; + bool visit(const RatInstruction& i) override; + bool visit(const LDSWriteInstruction& i) override; + bool visit(const LDSReadInstruction& i) override; + bool visit(const LDSAtomicInstruction& i) override; + bool visit(const GDSStoreTessFactor& i) override; + bool visit(const InstructionBlock& i) override; + + bool emit_load_addr(PValue addr); + bool emit_fs_pixel_export(const ExportInstruction & exi); + bool emit_vs_pos_export(const ExportInstruction & exi); + bool emit_vs_param_export(const ExportInstruction & exi); + bool copy_dst(r600_bytecode_alu_dst& dst, const Value& src); + bool copy_src(r600_bytecode_alu_src& src, const Value& s); + + EBufferIndexMode emit_index_reg(const Value& reg, unsigned idx); + + ConditionalJumpTracker m_jump_tracker; + CallStack m_callstack; + +public: + r600_bytecode *m_bc; + r600_shader *m_shader; + r600_shader_key *m_key; + r600_bytecode_output m_output; + unsigned m_max_color_exports; + bool has_pos_output; + bool has_param_output; + PValue m_last_addr; + int m_loop_nesting; + int m_nliterals_in_group; + std::set<int> vtx_fetch_results; + bool m_last_op_was_barrier; +}; + + +AssemblyFromShaderLegacy::AssemblyFromShaderLegacy(struct r600_shader *sh, + r600_shader_key *key) +{ + impl = new AssemblyFromShaderLegacyImpl(sh, key); +} + +AssemblyFromShaderLegacy::~AssemblyFromShaderLegacy() +{ + delete impl; +} + +bool AssemblyFromShaderLegacy::do_lower(const std::vector<InstructionBlock>& ir) +{ + if (impl->m_shader->processor_type == PIPE_SHADER_VERTEX && + impl->m_shader->ninput > 0) + r600_bytecode_add_cfinst(impl->m_bc, CF_OP_CALL_FS); + + + std::vector<Instruction::Pointer> exports; + + for (const auto& block : ir) { + if (!impl->visit(block)) + return false; + } /* + for (const auto& i : exports) { + if (!impl->emit_export(static_cast<const ExportInstruction&>(*i))) + return false; + }*/ + + + const struct cf_op_info *last = nullptr; + if (impl->m_bc->cf_last) + last = r600_isa_cf(impl->m_bc->cf_last->op); + + /* alu clause instructions don't have EOP bit, so add NOP */ + if (!last || last->flags & CF_ALU || impl->m_bc->cf_last->op == CF_OP_LOOP_END + || impl->m_bc->cf_last->op == CF_OP_POP) + r600_bytecode_add_cfinst(impl->m_bc, CF_OP_NOP); + + /* A fetch shader only can't be EOP (results in hang), but we can replace it + * by a NOP */ + else if (impl->m_bc->cf_last->op == CF_OP_CALL_FS) + impl->m_bc->cf_last->op = CF_OP_NOP; + + if (impl->m_shader->bc.chip_class != CAYMAN) + impl->m_bc->cf_last->end_of_program = 1; + else + cm_bytecode_add_cf_end(impl->m_bc); + + return true; +} + +bool AssemblyFromShaderLegacyImpl::visit(const InstructionBlock& block) +{ + for (const auto& i : block) { + + if (i->type() != Instruction::vtx) + vtx_fetch_results.clear(); + + m_last_op_was_barrier &= i->type() == Instruction::alu; + + sfn_log << SfnLog::assembly << "Emit from '" << *i << "\n"; + + if (!i->accept(*this)) + return false; + + if (i->type() != Instruction::alu) + reset_addr_register(); + } + + return true; +} + +AssemblyFromShaderLegacyImpl::AssemblyFromShaderLegacyImpl(r600_shader *sh, + r600_shader_key *key): + m_callstack(sh->bc), + m_bc(&sh->bc), + m_shader(sh), + m_key(key), + has_pos_output(false), + has_param_output(false), + m_loop_nesting(0), + m_nliterals_in_group(0), + m_last_op_was_barrier(false) +{ + m_max_color_exports = MAX2(m_key->ps.nr_cbufs, 1); + +} + +extern const std::map<EAluOp, int> opcode_map; + +bool AssemblyFromShaderLegacyImpl::emit_load_addr(PValue addr) +{ + m_bc->ar_reg = addr->sel(); + m_bc->ar_chan = addr->chan(); + m_bc->ar_loaded = 0; + m_last_addr = addr; + + sfn_log << SfnLog::assembly << " Prepare " << *addr << " to address register\n"; + + return true; +} + +bool AssemblyFromShaderLegacyImpl::visit(const AluInstruction& ai) +{ + + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(alu)); + PValue addr_in_use; + + if (opcode_map.find(ai.opcode()) == opcode_map.end()) { + std::cerr << "Opcode not handled for " << ai <<"\n"; + return false; + } + + if (m_last_op_was_barrier && ai.opcode() == op0_group_barrier) + return true; + + m_last_op_was_barrier = ai.opcode() == op0_group_barrier; + + unsigned old_nliterals_in_group = m_nliterals_in_group; + for (unsigned i = 0; i < ai.n_sources(); ++i) { + auto& s = ai.src(i); + if (s.type() == Value::literal) + ++m_nliterals_in_group; + } + + /* This instruction group would exceed the limit of literals, so + * force a new instruction group by adding a NOP as last + * instruction. This will no loner be needed with a real + * scheduler */ + if (m_nliterals_in_group > 4) { + sfn_log << SfnLog::assembly << " Have " << m_nliterals_in_group << " inject a last op (nop)\n"; + alu.op = ALU_OP0_NOP; + alu.last = 1; + alu.dst.chan = 3; + int retval = r600_bytecode_add_alu(m_bc, &alu); + if (retval) + return false; + memset(&alu, 0, sizeof(alu)); + m_nliterals_in_group -= old_nliterals_in_group; + } + + alu.op = opcode_map.at(ai.opcode()); + + /* Missing test whether ai actually has a dest */ + auto dst = ai.dest(); + + if (dst) { + if (!copy_dst(alu.dst, *dst)) + return false; + + alu.dst.write = ai.flag(alu_write); + alu.dst.clamp = ai.flag(alu_dst_clamp); + + if (dst->type() == Value::gpr_array_value) { + auto& v = static_cast<const GPRArrayValue&>(*dst); + PValue addr = v.indirect(); + if (addr) { + if (!m_last_addr || *addr != *m_last_addr) { + emit_load_addr(addr); + addr_in_use = addr; + } + alu.dst.rel = addr ? 1 : 0;; + } + } + } + + alu.is_op3 = ai.n_sources() == 3; + + for (unsigned i = 0; i < ai.n_sources(); ++i) { + auto& s = ai.src(i); + + if (!copy_src(alu.src[i], s)) + return false; + alu.src[i].neg = ai.flag(AluInstruction::src_neg_flags[i]); + + if (s.type() == Value::gpr_array_value) { + auto& v = static_cast<const GPRArrayValue&>(s); + PValue addr = v.indirect(); + if (addr) { + assert(!addr_in_use || (*addr_in_use == *addr)); + if (!m_last_addr || *addr != *m_last_addr) { + emit_load_addr(addr); + addr_in_use = addr; + } + alu.src[i].rel = addr ? 1 : 0; + } + } + if (!alu.is_op3) + alu.src[i].abs = ai.flag(AluInstruction::src_abs_flags[i]); + } + + if (ai.bank_swizzle() != alu_vec_unknown) + alu.bank_swizzle_force = ai.bank_swizzle(); + + alu.last = ai.flag(alu_last_instr); + alu.update_pred = ai.flag(alu_update_pred); + alu.execute_mask = ai.flag(alu_update_exec); + + /* If the destination register is equal to the last loaded address register + * then clear the latter one, because the values will no longer be identical */ + if (m_last_addr) + sfn_log << SfnLog::assembly << " Current address register is " << *m_last_addr << "\n"; + + if (dst) + sfn_log << SfnLog::assembly << " Current dst register is " << *dst << "\n"; + + if (dst && m_last_addr) + if (*dst == *m_last_addr) { + sfn_log << SfnLog::assembly << " Clear address register (was " << *m_last_addr << "\n"; + m_last_addr.reset(); + } + + auto cf_op = ai.cf_type(); + + unsigned type = 0; + switch (cf_op) { + case cf_alu: type = CF_OP_ALU; break; + case cf_alu_push_before: type = CF_OP_ALU_PUSH_BEFORE; break; + case cf_alu_pop_after: type = CF_OP_ALU_POP_AFTER; break; + case cf_alu_pop2_after: type = CF_OP_ALU_POP2_AFTER; break; + case cf_alu_break: type = CF_OP_ALU_BREAK; break; + case cf_alu_else_after: type = CF_OP_ALU_ELSE_AFTER; break; + case cf_alu_continue: type = CF_OP_ALU_CONTINUE; break; + case cf_alu_extended: type = CF_OP_ALU_EXT; break; + default: + assert(0 && "cf_alu_undefined should have been replaced"); + } + + if (alu.last) + m_nliterals_in_group = 0; + + bool retval = !r600_bytecode_add_alu_type(m_bc, &alu, type); + + if (ai.opcode() == op1_mova_int) + m_bc->ar_loaded = 0; + + if (ai.opcode() == op1_set_cf_idx0) + m_bc->index_loaded[0] = 1; + + if (ai.opcode() == op1_set_cf_idx1) + m_bc->index_loaded[1] = 1; + + + m_bc->force_add_cf |= (ai.opcode() == op2_kille || + ai.opcode() == op2_killne_int || + ai.opcode() == op1_set_cf_idx0 || + ai.opcode() == op1_set_cf_idx1); + return retval; +} + +bool AssemblyFromShaderLegacyImpl::emit_vs_pos_export(const ExportInstruction & exi) +{ + r600_bytecode_output output; + memset(&output, 0, sizeof(output)); + assert(exi.gpr().type() == Value::gpr_vector); + const auto& gpr = exi.gpr(); + output.gpr = gpr.sel(); + output.elem_size = 3; + output.swizzle_x = gpr.chan_i(0); + output.swizzle_y = gpr.chan_i(1); + output.swizzle_z = gpr.chan_i(2); + output.swizzle_w = gpr.chan_i(3); + output.burst_count = 1; + output.array_base = 60 + exi.location(); + output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT; + output.type = exi.export_type(); + + + if (r600_bytecode_add_output(m_bc, &output)) { + R600_ERR("Error adding pixel export at location %d\n", exi.location()); + return false; + } + + return true; +} + + +bool AssemblyFromShaderLegacyImpl::emit_vs_param_export(const ExportInstruction & exi) +{ + r600_bytecode_output output; + assert(exi.gpr().type() == Value::gpr_vector); + const auto& gpr = exi.gpr(); + + memset(&output, 0, sizeof(output)); + output.gpr = gpr.sel(); + output.elem_size = 3; + output.swizzle_x = gpr.chan_i(0); + output.swizzle_y = gpr.chan_i(1); + output.swizzle_z = gpr.chan_i(2); + output.swizzle_w = gpr.chan_i(3); + output.burst_count = 1; + output.array_base = exi.location(); + output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT; + output.type = exi.export_type(); + + + if (r600_bytecode_add_output(m_bc, &output)) { + R600_ERR("Error adding pixel export at location %d\n", exi.location()); + return false; + } + + return true; +} + + +bool AssemblyFromShaderLegacyImpl::emit_fs_pixel_export(const ExportInstruction & exi) +{ + if (exi.location() >= m_max_color_exports && exi.location() < 60) { + R600_ERR("shader_from_nir: ignore pixel export %u, because supported max is %u\n", + exi.location(), m_max_color_exports); + return true; + } + + assert(exi.gpr().type() == Value::gpr_vector); + const auto& gpr = exi.gpr(); + + r600_bytecode_output output; + memset(&output, 0, sizeof(output)); + + output.gpr = gpr.sel(); + output.elem_size = 3; + output.swizzle_x = gpr.chan_i(0); + output.swizzle_y = gpr.chan_i(1); + output.swizzle_z = gpr.chan_i(2); + output.swizzle_w = m_key->ps.alpha_to_one ? 5 : gpr.chan_i(3); ; + output.burst_count = 1; + output.array_base = exi.location(); + output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT; + output.type = exi.export_type(); + + + if (r600_bytecode_add_output(m_bc, &output)) { + R600_ERR("Error adding pixel export at location %d\n", exi.location()); + return false; + } + + return true; +} + + +bool AssemblyFromShaderLegacyImpl::visit(const ExportInstruction & exi) +{ + switch (exi.export_type()) { + case ExportInstruction::et_pixel: + return emit_fs_pixel_export(exi); + case ExportInstruction::et_pos: + return emit_vs_pos_export(exi); + case ExportInstruction::et_param: + return emit_vs_param_export(exi); + default: + R600_ERR("shader_from_nir: export %d type not yet supported\n", exi.export_type()); + return false; + } +} + +bool AssemblyFromShaderLegacyImpl::visit(const IfInstruction & if_instr) +{ + int elems = m_callstack.push(FC_PUSH_VPM); + bool needs_workaround = false; + + if (m_bc->chip_class == CAYMAN && m_bc->stack.loop > 1) + needs_workaround = true; + + if (m_bc->family != CHIP_HEMLOCK && + m_bc->family != CHIP_CYPRESS && + m_bc->family != CHIP_JUNIPER) { + unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size; + unsigned dmod2 = (elems) % m_bc->stack.entry_size; + + if (elems && (!dmod1 || !dmod2)) + needs_workaround = true; + } + + auto& pred = if_instr.pred(); + + if (needs_workaround) { + r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH); + m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2; + auto new_pred = pred; + new_pred.set_cf_type(cf_alu); + visit(new_pred); + } else + visit(pred); + + r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP); + + m_jump_tracker.push(m_bc->cf_last, jt_if); + return true; +} + +bool AssemblyFromShaderLegacyImpl::visit(UNUSED const ElseInstruction & else_instr) +{ + r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE); + m_bc->cf_last->pop_count = 1; + return m_jump_tracker.add_mid(m_bc->cf_last, jt_if); +} + +bool AssemblyFromShaderLegacyImpl::visit(UNUSED const IfElseEndInstruction & endif_instr) +{ + m_callstack.pop(FC_PUSH_VPM); + + unsigned force_pop = m_bc->force_add_cf; + if (!force_pop) { + int alu_pop = 3; + if (m_bc->cf_last) { + if (m_bc->cf_last->op == CF_OP_ALU) + alu_pop = 0; + else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER) + alu_pop = 1; + } + alu_pop += 1; + if (alu_pop == 1) { + m_bc->cf_last->op = CF_OP_ALU_POP_AFTER; + m_bc->force_add_cf = 1; + } else if (alu_pop == 2) { + m_bc->cf_last->op = CF_OP_ALU_POP2_AFTER; + m_bc->force_add_cf = 1; + } else { + force_pop = 1; + } + } + + if (force_pop) { + r600_bytecode_add_cfinst(m_bc, CF_OP_POP); + m_bc->cf_last->pop_count = 1; + m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2; + } + + return m_jump_tracker.pop(m_bc->cf_last, jt_if); +} + +bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopBeginInstruction& instr) +{ + r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10); + m_jump_tracker.push(m_bc->cf_last, jt_loop); + m_callstack.push(FC_LOOP); + ++m_loop_nesting; + return true; +} + +bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopEndInstruction& instr) +{ + r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END); + m_callstack.pop(FC_LOOP); + assert(m_loop_nesting); + --m_loop_nesting; + return m_jump_tracker.pop(m_bc->cf_last, jt_loop); +} + +bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopBreakInstruction& instr) +{ + r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK); + return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop); +} + +bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopContInstruction &instr) +{ + r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE); + return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop); +} + +bool AssemblyFromShaderLegacyImpl::visit(const StreamOutIntruction& so_instr) +{ + struct r600_bytecode_output output; + memset(&output, 0, sizeof(struct r600_bytecode_output)); + + output.gpr = so_instr.gpr().sel(); + output.elem_size = so_instr.element_size(); + output.array_base = so_instr.array_base(); + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; + output.burst_count = so_instr.burst_count(); + output.array_size = so_instr.array_size(); + output.comp_mask = so_instr.comp_mask(); + output.op = so_instr.op(); + + assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3); + + + if (r600_bytecode_add_output(m_bc, &output)) { + R600_ERR("shader_from_nir: Error creating stream output instruction\n"); + return false; + } + return true; +} + + +bool AssemblyFromShaderLegacyImpl::visit(const MemRingOutIntruction& instr) +{ + struct r600_bytecode_output output; + memset(&output, 0, sizeof(struct r600_bytecode_output)); + + output.gpr = instr.gpr().sel(); + output.type = instr.type(); + output.elem_size = 3; + output.comp_mask = 0xf; + output.burst_count = 1; + output.op = instr.op(); + if (instr.type() == mem_write_ind || instr.type() == mem_write_ind_ack) { + output.index_gpr = instr.index_reg(); + output.array_size = 0xfff; + } + output.array_base = instr.array_base(); + + if (r600_bytecode_add_output(m_bc, &output)) { + R600_ERR("shader_from_nir: Error creating mem ring write instruction\n"); + return false; + } + return true; +} + + +bool AssemblyFromShaderLegacyImpl::visit(const TexInstruction & tex_instr) +{ + auto addr = tex_instr.sampler_offset(); + if (addr && (!m_bc->index_loaded[1] || m_loop_nesting + || m_bc->index_reg[1] != addr->sel() + || m_bc->index_reg_chan[1] != addr->chan())) { + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(alu)); + alu.op = opcode_map.at(op1_mova_int); + alu.dst.chan = 0; + alu.src[0].sel = addr->sel(); + alu.src[0].chan = addr->chan(); + alu.last = 1; + int r = r600_bytecode_add_alu(m_bc, &alu); + if (r) + return false; + + m_bc->ar_loaded = 0; + + alu.op = opcode_map.at(op1_set_cf_idx1); + alu.dst.chan = 0; + alu.src[0].sel = 0; + alu.src[0].chan = 0; + alu.last = 1; + + r = r600_bytecode_add_alu(m_bc, &alu); + if (r) + return false; + + m_bc->index_reg[1] = addr->sel(); + m_bc->index_reg_chan[1] = addr->chan(); + m_bc->index_loaded[1] = true; + } + + r600_bytecode_tex tex; + memset(&tex, 0, sizeof(struct r600_bytecode_tex)); + tex.op = tex_instr.opcode(); + tex.sampler_id = tex_instr.sampler_id(); + tex.sampler_index_mode = 0; + tex.resource_id = tex_instr.resource_id();; + tex.resource_index_mode = 0; + tex.src_gpr = tex_instr.src().sel(); + tex.dst_gpr = tex_instr.dst().sel(); + tex.dst_sel_x = tex_instr.dest_swizzle(0); + tex.dst_sel_y = tex_instr.dest_swizzle(1); + tex.dst_sel_z = tex_instr.dest_swizzle(2); + tex.dst_sel_w = tex_instr.dest_swizzle(3); + tex.src_sel_x = tex_instr.src().chan_i(0); + tex.src_sel_y = tex_instr.src().chan_i(1); + tex.src_sel_z = tex_instr.src().chan_i(2); + tex.src_sel_w = tex_instr.src().chan_i(3); + tex.coord_type_x = !tex_instr.has_flag(TexInstruction::x_unnormalized); + tex.coord_type_y = !tex_instr.has_flag(TexInstruction::y_unnormalized); + tex.coord_type_z = !tex_instr.has_flag(TexInstruction::z_unnormalized); + tex.coord_type_w = !tex_instr.has_flag(TexInstruction::w_unnormalized); + tex.offset_x = tex_instr.get_offset(0); + tex.offset_y = tex_instr.get_offset(1); + tex.offset_z = tex_instr.get_offset(2); + tex.resource_index_mode = (!!addr) ? 2 : 0; + tex.sampler_index_mode = tex.resource_index_mode; + + if (tex_instr.opcode() == TexInstruction::get_gradient_h || + tex_instr.opcode() == TexInstruction::get_gradient_v) + tex.inst_mod = tex_instr.has_flag(TexInstruction::grad_fine) ? 1 : 0; + else + tex.inst_mod = tex_instr.inst_mode(); + if (r600_bytecode_add_tex(m_bc, &tex)) { + R600_ERR("shader_from_nir: Error creating tex assembly instruction\n"); + return false; + } + return true; +} + +bool AssemblyFromShaderLegacyImpl::visit(const FetchInstruction& fetch_instr) +{ + int buffer_offset = 0; + auto addr = fetch_instr.buffer_offset(); + auto index_mode = fetch_instr.buffer_index_mode(); + + if (addr) { + if (addr->type() == Value::literal) { + const auto& boffs = static_cast<const LiteralValue&>(*addr); + buffer_offset = boffs.value(); + } else { + index_mode = emit_index_reg(*addr, 0); + } + } + + if (fetch_instr.has_prelude()) { + for(auto &i : fetch_instr.prelude()) { + if (!i->accept(*this)) + return false; + } + } + + if (vtx_fetch_results.find(fetch_instr.src().sel()) != + vtx_fetch_results.end()) { + m_bc->force_add_cf = 1; + vtx_fetch_results.clear(); + } + vtx_fetch_results.insert(fetch_instr.dst().sel()); + + struct r600_bytecode_vtx vtx; + memset(&vtx, 0, sizeof(vtx)); + vtx.op = fetch_instr.vc_opcode(); + vtx.buffer_id = fetch_instr.buffer_id() + buffer_offset; + vtx.fetch_type = fetch_instr.fetch_type(); + vtx.src_gpr = fetch_instr.src().sel(); + vtx.src_sel_x = fetch_instr.src().chan(); + vtx.mega_fetch_count = fetch_instr.mega_fetch_count(); + vtx.dst_gpr = fetch_instr.dst().sel(); + vtx.dst_sel_x = fetch_instr.swz(0); /* SEL_X */ + vtx.dst_sel_y = fetch_instr.swz(1); /* SEL_Y */ + vtx.dst_sel_z = fetch_instr.swz(2); /* SEL_Z */ + vtx.dst_sel_w = fetch_instr.swz(3); /* SEL_W */ + vtx.use_const_fields = fetch_instr.use_const_fields(); + vtx.data_format = fetch_instr.data_format(); + vtx.num_format_all = fetch_instr.num_format(); /* NUM_FORMAT_SCALED */ + vtx.format_comp_all = fetch_instr.is_signed(); /* FORMAT_COMP_SIGNED */ + vtx.endian = fetch_instr.endian_swap(); + vtx.buffer_index_mode = index_mode; + vtx.offset = fetch_instr.offset(); + vtx.indexed = fetch_instr.indexed(); + vtx.uncached = fetch_instr.uncached(); + vtx.elem_size = fetch_instr.elm_size(); + vtx.array_base = fetch_instr.array_base(); + vtx.array_size = fetch_instr.array_size(); + vtx.srf_mode_all = fetch_instr.srf_mode_no_zero(); + + if (fetch_instr.use_tc()) { + if ((r600_bytecode_add_vtx_tc(m_bc, &vtx))) { + R600_ERR("shader_from_nir: Error creating tex assembly instruction\n"); + return false; + } + + } else { + if ((r600_bytecode_add_vtx(m_bc, &vtx))) { + R600_ERR("shader_from_nir: Error creating tex assembly instruction\n"); + return false; + } + } + + m_bc->cf_last->vpm = fetch_instr.use_vpm(); + m_bc->cf_last->barrier = 1; + + return true; +} + +bool AssemblyFromShaderLegacyImpl::visit(const EmitVertex &instr) +{ + int r = r600_bytecode_add_cfinst(m_bc, instr.op()); + if (!r) + m_bc->cf_last->count = instr.stream(); + assert(m_bc->cf_last->count < 4); + + return r == 0; +} + +bool AssemblyFromShaderLegacyImpl::visit(const WaitAck& instr) +{ + int r = r600_bytecode_add_cfinst(m_bc, instr.op()); + if (!r) + m_bc->cf_last->cf_addr = instr.n_ack(); + + return r == 0; +} + +bool AssemblyFromShaderLegacyImpl::visit(const WriteScratchInstruction& instr) +{ + struct r600_bytecode_output cf; + + memset(&cf, 0, sizeof(struct r600_bytecode_output)); + + cf.op = CF_OP_MEM_SCRATCH; + cf.elem_size = 3; + cf.gpr = instr.gpr().sel(); + cf.mark = 1; + cf.comp_mask = instr.write_mask(); + cf.swizzle_x = 0; + cf.swizzle_y = 1; + cf.swizzle_z = 2; + cf.swizzle_w = 3; + cf.burst_count = 1; + + if (instr.indirect()) { + cf.type = 3; + cf.index_gpr = instr.address(); + + /* The docu seems to be wrong here: In indirect addressing the + * address_base seems to be the array_size */ + cf.array_size = instr.array_size(); + } else { + cf.type = 2; + cf.array_base = instr.location(); + } + /* This should be 0, but the address calculation is apparently wrong */ + + + if (r600_bytecode_add_output(m_bc, &cf)){ + R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n"); + return false; + } + + return true; +} + +extern const std::map<ESDOp, int> ds_opcode_map; + +bool AssemblyFromShaderLegacyImpl::visit(const GDSInstr& instr) +{ + struct r600_bytecode_gds gds; + + int uav_idx = -1; + auto addr = instr.uav_id(); + if (addr->type() != Value::literal) { + emit_index_reg(*addr, 1); + } else { + const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr); + uav_idx = addr_reg.value(); + } + + memset(&gds, 0, sizeof(struct r600_bytecode_gds)); + + gds.op = ds_opcode_map.at(instr.op()); + gds.dst_gpr = instr.dest_sel(); + gds.uav_id = (uav_idx >= 0 ? uav_idx : 0) + instr.uav_base(); + gds.uav_index_mode = uav_idx >= 0 ? bim_none : bim_one; + gds.src_gpr = instr.src_sel(); + + gds.src_sel_x = instr.src_swizzle(0); + gds.src_sel_y = instr.src_swizzle(1); + gds.src_sel_z = instr.src_swizzle(2); + + gds.dst_sel_x = instr.dest_swizzle(0); + gds.dst_sel_y = 7; + gds.dst_sel_z = 7; + gds.dst_sel_w = 7; + gds.src_gpr2 = 0; + gds.alloc_consume = 1; // Not Cayman + + int r = r600_bytecode_add_gds(m_bc, &gds); + if (r) + return false; + m_bc->cf_last->vpm = 1; + m_bc->cf_last->barrier = 1; + return true; +} + +bool AssemblyFromShaderLegacyImpl::visit(const GDSStoreTessFactor& instr) +{ + struct r600_bytecode_gds gds; + + memset(&gds, 0, sizeof(struct r600_bytecode_gds)); + gds.src_gpr = instr.sel(); + gds.src_sel_x = instr.chan(0); + gds.src_sel_y = instr.chan(1); + gds.src_sel_z = 4; + gds.dst_sel_x = 7; + gds.dst_sel_y = 7; + gds.dst_sel_z = 7; + gds.dst_sel_w = 7; + gds.op = FETCH_OP_TF_WRITE; + + if (r600_bytecode_add_gds(m_bc, &gds) != 0) + return false; + + if (instr.chan(2) != 7) { + memset(&gds, 0, sizeof(struct r600_bytecode_gds)); + gds.src_gpr = instr.sel(); + gds.src_sel_x = instr.chan(2); + gds.src_sel_y = instr.chan(3); + gds.src_sel_z = 4; + gds.dst_sel_x = 7; + gds.dst_sel_y = 7; + gds.dst_sel_z = 7; + gds.dst_sel_w = 7; + gds.op = FETCH_OP_TF_WRITE; + + if (r600_bytecode_add_gds(m_bc, &gds)) + return false; + } + return true; +} + +bool AssemblyFromShaderLegacyImpl::visit(const LDSWriteInstruction& instr) +{ + r600_bytecode_alu alu; + memset(&alu, 0, sizeof(r600_bytecode_alu)); + + alu.last = true; + alu.is_lds_idx_op = true; + copy_src(alu.src[0], instr.address()); + copy_src(alu.src[1], instr.value0()); + + if (instr.num_components() == 1) { + alu.op = LDS_OP2_LDS_WRITE; + } else { + alu.op = LDS_OP3_LDS_WRITE_REL; + alu.lds_idx = 1; + copy_src(alu.src[2], instr.value1()); + } + + return r600_bytecode_add_alu(m_bc, &alu) == 0; +} + +bool AssemblyFromShaderLegacyImpl::visit(const LDSReadInstruction& instr) +{ + int r; + unsigned nread = 0; + unsigned nfetch = 0; + unsigned n_values = instr.num_values(); + + r600_bytecode_alu alu_fetch; + r600_bytecode_alu alu_read; + + /* We must add a new ALU clause if the fetch and read op would be split otherwise + * r600_asm limits at 120 slots = 240 dwords */ + if (m_bc->cf_last->ndw > 240 - 4 * n_values) + m_bc->force_add_cf = 1; + + while (nread < n_values) { + if (nfetch < n_values) { + memset(&alu_fetch, 0, sizeof(r600_bytecode_alu)); + alu_fetch.is_lds_idx_op = true; + alu_fetch.op = LDS_OP1_LDS_READ_RET; + + copy_src(alu_fetch.src[0], instr.address(nfetch)); + alu_fetch.src[1].sel = V_SQ_ALU_SRC_0; + alu_fetch.src[2].sel = V_SQ_ALU_SRC_0; + alu_fetch.last = 1; + r = r600_bytecode_add_alu(m_bc, &alu_fetch); + m_bc->cf_last->nlds_read++; + if (r) + return false; + } + + if (nfetch >= n_values) { + memset(&alu_read, 0, sizeof(r600_bytecode_alu)); + copy_dst(alu_read.dst, instr.dest(nread)); + alu_read.op = ALU_OP1_MOV; + alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP; + alu_read.last = 1; + alu_read.dst.write = 1; + r = r600_bytecode_add_alu(m_bc, &alu_read); + m_bc->cf_last->nqueue_read++; + if (r) + return false; + ++nread; + } + ++nfetch; + } + assert(m_bc->cf_last->nlds_read == m_bc->cf_last->nqueue_read); + + return true; +} + +bool AssemblyFromShaderLegacyImpl::visit(const LDSAtomicInstruction& instr) +{ + if (m_bc->cf_last->ndw > 240 - 4) + m_bc->force_add_cf = 1; + + r600_bytecode_alu alu_fetch; + r600_bytecode_alu alu_read; + + memset(&alu_fetch, 0, sizeof(r600_bytecode_alu)); + alu_fetch.is_lds_idx_op = true; + alu_fetch.op = instr.op(); + + copy_src(alu_fetch.src[0], instr.address()); + copy_src(alu_fetch.src[1], instr.src0()); + + if (instr.src1()) + copy_src(alu_fetch.src[2], *instr.src1()); + alu_fetch.last = 1; + int r = r600_bytecode_add_alu(m_bc, &alu_fetch); + if (r) + return false; + + memset(&alu_read, 0, sizeof(r600_bytecode_alu)); + copy_dst(alu_read.dst, instr.dest()); + alu_read.op = ALU_OP1_MOV; + alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP; + alu_read.last = 1; + alu_read.dst.write = 1; + r = r600_bytecode_add_alu(m_bc, &alu_read); + if (r) + return false; + return true; +} + +bool AssemblyFromShaderLegacyImpl::visit(const RatInstruction& instr) +{ + struct r600_bytecode_gds gds; + + int rat_idx = instr.rat_id(); + EBufferIndexMode rat_index_mode = bim_none; + auto addr = instr.rat_id_offset(); + + if (addr) { + if (addr->type() != Value::literal) { + rat_index_mode = emit_index_reg(*addr, 1); + } else { + const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr); + rat_idx += addr_reg.value(); + } + } + memset(&gds, 0, sizeof(struct r600_bytecode_gds)); + + r600_bytecode_add_cfinst(m_bc, instr.cf_opcode()); + auto cf = m_bc->cf_last; + cf->rat.id = rat_idx + m_shader->rat_base; + cf->rat.inst = instr.rat_op(); + cf->rat.index_mode = rat_index_mode; + cf->output.type = instr.need_ack() ? 3 : 1; + cf->output.gpr = instr.data_gpr(); + cf->output.index_gpr = instr.index_gpr(); + cf->output.comp_mask = instr.comp_mask(); + cf->output.burst_count = instr.burst_count(); + assert(instr.data_swz(0) == PIPE_SWIZZLE_X); + if (cf->rat.inst != RatInstruction::STORE_TYPED) { + assert(instr.data_swz(1) == PIPE_SWIZZLE_Y || + instr.data_swz(1) == PIPE_SWIZZLE_MAX) ; + assert(instr.data_swz(2) == PIPE_SWIZZLE_Z || + instr.data_swz(2) == PIPE_SWIZZLE_MAX) ; + } + + cf->vpm = 1; + cf->barrier = 1; + cf->mark = instr.need_ack(); + cf->output.elem_size = instr.elm_size(); + return true; +} + +EBufferIndexMode +AssemblyFromShaderLegacyImpl::emit_index_reg(const Value& addr, unsigned idx) +{ + assert(idx < 2); + + EAluOp idxop = idx ? op1_set_cf_idx1 : op1_set_cf_idx0; + + if (!m_bc->index_loaded[idx] || m_loop_nesting || + m_bc->index_reg[idx] != addr.sel() + || m_bc->index_reg_chan[idx] != addr.chan()) { + struct r600_bytecode_alu alu; + + // Make sure MOVA is not last instr in clause + if ((m_bc->cf_last->ndw>>1) >= 110) + m_bc->force_add_cf = 1; + + memset(&alu, 0, sizeof(alu)); + alu.op = opcode_map.at(op1_mova_int); + alu.dst.chan = 0; + alu.src[0].sel = addr.sel(); + alu.src[0].chan = addr.chan(); + alu.last = 1; + sfn_log << SfnLog::assembly << " mova_int, "; + int r = r600_bytecode_add_alu(m_bc, &alu); + if (r) + return bim_invalid; + + m_bc->ar_loaded = 0; + + alu.op = opcode_map.at(idxop); + alu.dst.chan = 0; + alu.src[0].sel = 0; + alu.src[0].chan = 0; + alu.last = 1; + sfn_log << SfnLog::assembly << "op1_set_cf_idx" << idx; + r = r600_bytecode_add_alu(m_bc, &alu); + if (r) + return bim_invalid; + + m_bc->index_reg[idx] = addr.sel(); + m_bc->index_reg_chan[idx] = addr.chan(); + m_bc->index_loaded[idx] = true; + sfn_log << SfnLog::assembly << "\n"; + } + return idx == 0 ? bim_zero : bim_one; +} + +bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst, + const Value& d) +{ + assert(d.type() == Value::gpr || d.type() == Value::gpr_array_value); + + if (d.sel() > 124) { + R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", d.sel()); + return false; + } + + dst.sel = d.sel(); + dst.chan = d.chan(); + + if (m_bc->index_reg[1] == dst.sel && + m_bc->index_reg_chan[1] == dst.chan) + m_bc->index_loaded[1] = false; + + if (m_bc->index_reg[0] == dst.sel && + m_bc->index_reg_chan[0] == dst.chan) + m_bc->index_loaded[0] = false; + + return true; +} + +bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src& src, const Value& s) +{ + + if (s.type() == Value::gpr && s.sel() > 124) { + R600_ERR("shader_from_nir: Don't support more then 124 GPRs, try using %d\n", s.sel()); + return false; + } + + if (s.type() == Value::lds_direct) { + R600_ERR("shader_from_nir: LDS_DIRECT values not supported\n"); + return false; + } + + if (s.type() == Value::kconst && s.sel() < 512) { + R600_ERR("shader_from_nir: Uniforms should have values >= 512, got %d \n", s.sel()); + return false; + } + + if (s.type() == Value::literal) { + auto& v = static_cast<const LiteralValue&>(s); + if (v.value() == 0) { + src.sel = ALU_SRC_0; + src.chan = 0; + --m_nliterals_in_group; + return true; + } + if (v.value() == 1) { + src.sel = ALU_SRC_1_INT; + src.chan = 0; + --m_nliterals_in_group; + return true; + } + if (v.value_float() == 1.0f) { + src.sel = ALU_SRC_1; + src.chan = 0; + --m_nliterals_in_group; + return true; + } + if (v.value_float() == 0.5f) { + src.sel = ALU_SRC_0_5; + src.chan = 0; + --m_nliterals_in_group; + return true; + } + if (v.value() == 0xffffffff) { + src.sel = ALU_SRC_M_1_INT; + src.chan = 0; + --m_nliterals_in_group; + return true; + } + src.value = v.value(); + } + + src.sel = s.sel(); + src.chan = s.chan(); + if (s.type() == Value::kconst) { + const UniformValue& cv = static_cast<const UniformValue&>(s); + src.kc_bank = cv.kcache_bank(); + auto addr = cv.addr(); + if (addr) { + src.kc_rel = 1; + emit_index_reg(*addr, 0); + auto type = m_bc->cf_last->op; + if (r600_bytecode_add_cf(m_bc)) { + return false; + } + m_bc->cf_last->op = type; + } + } + + return true; +} + +const std::map<EAluOp, int> opcode_map = { + + {op2_add, ALU_OP2_ADD}, + {op2_mul, ALU_OP2_MUL}, + {op2_mul_ieee, ALU_OP2_MUL_IEEE}, + {op2_max, ALU_OP2_MAX}, + {op2_min, ALU_OP2_MIN}, + {op2_max_dx10, ALU_OP2_MAX_DX10}, + {op2_min_dx10, ALU_OP2_MIN_DX10}, + {op2_sete, ALU_OP2_SETE}, + {op2_setgt, ALU_OP2_SETGT}, + {op2_setge, ALU_OP2_SETGE}, + {op2_setne, ALU_OP2_SETNE}, + {op2_sete_dx10, ALU_OP2_SETE_DX10}, + {op2_setgt_dx10, ALU_OP2_SETGT_DX10}, + {op2_setge_dx10, ALU_OP2_SETGE_DX10}, + {op2_setne_dx10, ALU_OP2_SETNE_DX10}, + {op1_fract, ALU_OP1_FRACT}, + {op1_trunc, ALU_OP1_TRUNC}, + {op1_ceil, ALU_OP1_CEIL}, + {op1_rndne, ALU_OP1_RNDNE}, + {op1_floor, ALU_OP1_FLOOR}, + {op2_ashr_int, ALU_OP2_ASHR_INT}, + {op2_lshr_int, ALU_OP2_LSHR_INT}, + {op2_lshl_int, ALU_OP2_LSHL_INT}, + {op1_mov, ALU_OP1_MOV}, + {op0_nop, ALU_OP0_NOP}, + {op2_mul_64, ALU_OP2_MUL_64}, + {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32}, + {op1v_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64}, + {op2_pred_setgt_uint, ALU_OP2_PRED_SETGT_UINT}, + {op2_pred_setge_uint, ALU_OP2_PRED_SETGE_UINT}, + {op2_pred_sete, ALU_OP2_PRED_SETE}, + {op2_pred_setgt, ALU_OP2_PRED_SETGT}, + {op2_pred_setge, ALU_OP2_PRED_SETGE}, + {op2_pred_setne, ALU_OP2_PRED_SETNE}, + //{op2_pred_set_inv, ALU_OP2_PRED_SET}, + //{op2_pred_set_clr, ALU_OP2_PRED_SET_CRL}, + //{op2_pred_set_restore, ALU_OP2_PRED_SET_RESTORE}, + {op2_pred_sete_push, ALU_OP2_PRED_SETE_PUSH}, + {op2_pred_setgt_push, ALU_OP2_PRED_SETGT_PUSH}, + {op2_pred_setge_push, ALU_OP2_PRED_SETGE_PUSH}, + {op2_pred_setne_push, ALU_OP2_PRED_SETNE_PUSH}, + {op2_kille, ALU_OP2_KILLE}, + {op2_killgt, ALU_OP2_KILLGT}, + {op2_killge, ALU_OP2_KILLGE}, + {op2_killne, ALU_OP2_KILLNE}, + {op2_and_int, ALU_OP2_AND_INT}, + {op2_or_int, ALU_OP2_OR_INT}, + {op2_xor_int, ALU_OP2_XOR_INT}, + {op1_not_int, ALU_OP1_NOT_INT}, + {op2_add_int, ALU_OP2_ADD_INT}, + {op2_sub_int, ALU_OP2_SUB_INT}, + {op2_max_int, ALU_OP2_MAX_INT}, + {op2_min_int, ALU_OP2_MIN_INT}, + {op2_max_uint, ALU_OP2_MAX_UINT}, + {op2_min_uint, ALU_OP2_MIN_UINT}, + {op2_sete_int, ALU_OP2_SETE_INT}, + {op2_setgt_int, ALU_OP2_SETGT_INT}, + {op2_setge_int, ALU_OP2_SETGE_INT}, + {op2_setne_int, ALU_OP2_SETNE_INT}, + {op2_setgt_uint, ALU_OP2_SETGT_UINT}, + {op2_setge_uint, ALU_OP2_SETGE_UINT}, + {op2_killgt_uint, ALU_OP2_KILLGT_UINT}, + {op2_killge_uint, ALU_OP2_KILLGE_UINT}, + //p2_prede_int, ALU_OP2_PREDE_INT}, + {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT}, + {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT}, + {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT}, + {op2_kille_int, ALU_OP2_KILLE_INT}, + {op2_killgt_int, ALU_OP2_KILLGT_INT}, + {op2_killge_int, ALU_OP2_KILLGE_INT}, + {op2_killne_int, ALU_OP2_KILLNE_INT}, + {op2_pred_sete_push_int, ALU_OP2_PRED_SETE_PUSH_INT}, + {op2_pred_setgt_push_int, ALU_OP2_PRED_SETGT_PUSH_INT}, + {op2_pred_setge_push_int, ALU_OP2_PRED_SETGE_PUSH_INT}, + {op2_pred_setne_push_int, ALU_OP2_PRED_SETNE_PUSH_INT}, + {op2_pred_setlt_push_int, ALU_OP2_PRED_SETLT_PUSH_INT}, + {op2_pred_setle_push_int, ALU_OP2_PRED_SETLE_PUSH_INT}, + {op1_flt_to_int, ALU_OP1_FLT_TO_INT}, + {op1_bfrev_int, ALU_OP1_BFREV_INT}, + {op2_addc_uint, ALU_OP2_ADDC_UINT}, + {op2_subb_uint, ALU_OP2_SUBB_UINT}, + {op0_group_barrier, ALU_OP0_GROUP_BARRIER}, + {op0_group_seq_begin, ALU_OP0_GROUP_SEQ_BEGIN}, + {op0_group_seq_end, ALU_OP0_GROUP_SEQ_END}, + {op2_set_mode, ALU_OP2_SET_MODE}, + {op1_set_cf_idx0, ALU_OP0_SET_CF_IDX0}, + {op1_set_cf_idx1, ALU_OP0_SET_CF_IDX1}, + {op2_set_lds_size, ALU_OP2_SET_LDS_SIZE}, + {op1_exp_ieee, ALU_OP1_EXP_IEEE}, + {op1_log_clamped, ALU_OP1_LOG_CLAMPED}, + {op1_log_ieee, ALU_OP1_LOG_IEEE}, + {op1_recip_clamped, ALU_OP1_RECIP_CLAMPED}, + {op1_recip_ff, ALU_OP1_RECIP_FF}, + {op1_recip_ieee, ALU_OP1_RECIP_IEEE}, + {op1_recipsqrt_clamped, ALU_OP1_RECIPSQRT_CLAMPED}, + {op1_recipsqrt_ff, ALU_OP1_RECIPSQRT_FF}, + {op1_recipsqrt_ieee1, ALU_OP1_RECIPSQRT_IEEE}, + {op1_sqrt_ieee, ALU_OP1_SQRT_IEEE}, + {op1_sin, ALU_OP1_SIN}, + {op1_cos, ALU_OP1_COS}, + {op2_mullo_int, ALU_OP2_MULLO_INT}, + {op2_mulhi_int, ALU_OP2_MULHI_INT}, + {op2_mullo_uint, ALU_OP2_MULLO_UINT}, + {op2_mulhi_uint, ALU_OP2_MULHI_UINT}, + {op1_recip_int, ALU_OP1_RECIP_INT}, + {op1_recip_uint, ALU_OP1_RECIP_UINT}, + {op1_recip_64, ALU_OP2_RECIP_64}, + {op1_recip_clamped_64, ALU_OP2_RECIP_CLAMPED_64}, + {op1_recipsqrt_64, ALU_OP2_RECIPSQRT_64}, + {op1_recipsqrt_clamped_64, ALU_OP2_RECIPSQRT_CLAMPED_64}, + {op1_sqrt_64, ALU_OP2_SQRT_64}, + {op1_flt_to_uint, ALU_OP1_FLT_TO_UINT}, + {op1_int_to_flt, ALU_OP1_INT_TO_FLT}, + {op1_uint_to_flt, ALU_OP1_UINT_TO_FLT}, + {op2_bfm_int, ALU_OP2_BFM_INT}, + {op1_flt32_to_flt16, ALU_OP1_FLT32_TO_FLT16}, + {op1_flt16_to_flt32, ALU_OP1_FLT16_TO_FLT32}, + {op1_ubyte0_flt, ALU_OP1_UBYTE0_FLT}, + {op1_ubyte1_flt, ALU_OP1_UBYTE1_FLT}, + {op1_ubyte2_flt, ALU_OP1_UBYTE2_FLT}, + {op1_ubyte3_flt, ALU_OP1_UBYTE3_FLT}, + {op1_bcnt_int, ALU_OP1_BCNT_INT}, + {op1_ffbh_uint, ALU_OP1_FFBH_UINT}, + {op1_ffbl_int, ALU_OP1_FFBL_INT}, + {op1_ffbh_int, ALU_OP1_FFBH_INT}, + {op1_flt_to_uint4, ALU_OP1_FLT_TO_UINT4}, + {op2_dot_ieee, ALU_OP2_DOT_IEEE}, + {op1_flt_to_int_rpi, ALU_OP1_FLT_TO_INT_RPI}, + {op1_flt_to_int_floor, ALU_OP1_FLT_TO_INT_FLOOR}, + {op2_mulhi_uint24, ALU_OP2_MULHI_UINT24}, + {op1_mbcnt_32hi_int, ALU_OP1_MBCNT_32HI_INT}, + {op1_offset_to_flt, ALU_OP1_OFFSET_TO_FLT}, + {op2_mul_uint24, ALU_OP2_MUL_UINT24}, + {op1_bcnt_accum_prev_int, ALU_OP1_BCNT_ACCUM_PREV_INT}, + {op1_mbcnt_32lo_accum_prev_int, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT}, + {op2_sete_64, ALU_OP2_SETE_64}, + {op2_setne_64, ALU_OP2_SETNE_64}, + {op2_setgt_64, ALU_OP2_SETGT_64}, + {op2_setge_64, ALU_OP2_SETGE_64}, + {op2_min_64, ALU_OP2_MIN_64}, + {op2_max_64, ALU_OP2_MAX_64}, + {op2_dot4, ALU_OP2_DOT4}, + {op2_dot4_ieee, ALU_OP2_DOT4_IEEE}, + {op2_cube, ALU_OP2_CUBE}, + {op1_max4, ALU_OP1_MAX4}, + {op1_frexp_64, ALU_OP1_FREXP_64}, + {op1_ldexp_64, ALU_OP2_LDEXP_64}, + {op1_fract_64, ALU_OP1_FRACT_64}, + {op2_pred_setgt_64, ALU_OP2_PRED_SETGT_64}, + {op2_pred_sete_64, ALU_OP2_PRED_SETE_64}, + {op2_pred_setge_64, ALU_OP2_PRED_SETGE_64}, + {op2_add_64, ALU_OP2_ADD_64}, + {op1_mova_int, ALU_OP1_MOVA_INT}, + {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32}, + {op1_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64}, + {op2_sad_accum_prev_uint, ALU_OP2_SAD_ACCUM_PREV_UINT}, + {op2_dot, ALU_OP2_DOT}, + //p2_mul_prev, ALU_OP2_MUL_PREV}, + //p2_mul_ieee_prev, ALU_OP2_MUL_IEEE_PREV}, + //p2_add_prev, ALU_OP2_ADD_PREV}, + {op2_muladd_prev, ALU_OP2_MULADD_PREV}, + {op2_muladd_ieee_prev, ALU_OP2_MULADD_IEEE_PREV}, + {op2_interp_xy, ALU_OP2_INTERP_XY}, + {op2_interp_zw, ALU_OP2_INTERP_ZW}, + {op2_interp_x, ALU_OP2_INTERP_X}, + {op2_interp_z, ALU_OP2_INTERP_Z}, + {op0_store_flags, ALU_OP1_STORE_FLAGS}, + {op1_load_store_flags, ALU_OP1_LOAD_STORE_FLAGS}, + {op0_lds_1a, ALU_OP2_LDS_1A}, + {op0_lds_1a1d, ALU_OP2_LDS_1A1D}, + {op0_lds_2a, ALU_OP2_LDS_2A}, + {op1_interp_load_p0, ALU_OP1_INTERP_LOAD_P0}, + {op1_interp_load_p10, ALU_OP1_INTERP_LOAD_P10}, + {op1_interp_load_p20, ALU_OP1_INTERP_LOAD_P20}, + // {op 3 all left shift 6 + {op3_bfe_uint, ALU_OP3_BFE_UINT}, + {op3_bfe_int, ALU_OP3_BFE_INT}, + {op3_bfi_int, ALU_OP3_BFI_INT}, + {op3_fma, ALU_OP3_FMA}, + {op3_cndne_64, ALU_OP3_CNDNE_64}, + {op3_fma_64, ALU_OP3_FMA_64}, + {op3_lerp_uint, ALU_OP3_LERP_UINT}, + {op3_bit_align_int, ALU_OP3_BIT_ALIGN_INT}, + {op3_byte_align_int, ALU_OP3_BYTE_ALIGN_INT}, + {op3_sad_accum_uint, ALU_OP3_SAD_ACCUM_UINT}, + {op3_sad_accum_hi_uint, ALU_OP3_SAD_ACCUM_HI_UINT}, + {op3_muladd_uint24, ALU_OP3_MULADD_UINT24}, + {op3_lds_idx_op, ALU_OP3_LDS_IDX_OP}, + {op3_muladd, ALU_OP3_MULADD}, + {op3_muladd_m2, ALU_OP3_MULADD_M2}, + {op3_muladd_m4, ALU_OP3_MULADD_M4}, + {op3_muladd_d2, ALU_OP3_MULADD_D2}, + {op3_muladd_ieee, ALU_OP3_MULADD_IEEE}, + {op3_cnde, ALU_OP3_CNDE}, + {op3_cndgt, ALU_OP3_CNDGT}, + {op3_cndge, ALU_OP3_CNDGE}, + {op3_cnde_int, ALU_OP3_CNDE_INT}, + {op3_cndgt_int, ALU_OP3_CNDGT_INT}, + {op3_cndge_int, ALU_OP3_CNDGE_INT}, + {op3_mul_lit, ALU_OP3_MUL_LIT}, +}; + +const std::map<ESDOp, int> ds_opcode_map = { + {DS_OP_ADD, FETCH_OP_GDS_ADD}, + {DS_OP_SUB, FETCH_OP_GDS_SUB}, + {DS_OP_RSUB, FETCH_OP_GDS_RSUB}, + {DS_OP_INC, FETCH_OP_GDS_INC}, + {DS_OP_DEC, FETCH_OP_GDS_DEC}, + {DS_OP_MIN_INT, FETCH_OP_GDS_MIN_INT}, + {DS_OP_MAX_INT, FETCH_OP_GDS_MAX_INT}, + {DS_OP_MIN_UINT, FETCH_OP_GDS_MIN_UINT}, + {DS_OP_MAX_UINT, FETCH_OP_GDS_MAX_UINT}, + {DS_OP_AND, FETCH_OP_GDS_AND}, + {DS_OP_OR, FETCH_OP_GDS_OR}, + {DS_OP_XOR, FETCH_OP_GDS_XOR}, + {DS_OP_MSKOR, FETCH_OP_GDS_MSKOR}, + {DS_OP_WRITE, FETCH_OP_GDS_WRITE}, + {DS_OP_WRITE_REL, FETCH_OP_GDS_WRITE_REL}, + {DS_OP_WRITE2, FETCH_OP_GDS_WRITE2}, + {DS_OP_CMP_STORE, FETCH_OP_GDS_CMP_STORE}, + {DS_OP_CMP_STORE_SPF, FETCH_OP_GDS_CMP_STORE_SPF}, + {DS_OP_BYTE_WRITE, FETCH_OP_GDS_BYTE_WRITE}, + {DS_OP_SHORT_WRITE, FETCH_OP_GDS_SHORT_WRITE}, + {DS_OP_ADD_RET, FETCH_OP_GDS_ADD_RET}, + {DS_OP_SUB_RET, FETCH_OP_GDS_SUB_RET}, + {DS_OP_RSUB_RET, FETCH_OP_GDS_RSUB_RET}, + {DS_OP_INC_RET, FETCH_OP_GDS_INC_RET}, + {DS_OP_DEC_RET, FETCH_OP_GDS_DEC_RET}, + {DS_OP_MIN_INT_RET, FETCH_OP_GDS_MIN_INT_RET}, + {DS_OP_MAX_INT_RET, FETCH_OP_GDS_MAX_INT_RET}, + {DS_OP_MIN_UINT_RET, FETCH_OP_GDS_MIN_UINT_RET}, + {DS_OP_MAX_UINT_RET, FETCH_OP_GDS_MAX_UINT_RET}, + {DS_OP_AND_RET, FETCH_OP_GDS_AND_RET}, + {DS_OP_OR_RET, FETCH_OP_GDS_OR_RET}, + {DS_OP_XOR_RET, FETCH_OP_GDS_XOR_RET}, + {DS_OP_MSKOR_RET, FETCH_OP_GDS_MSKOR_RET}, + {DS_OP_XCHG_RET, FETCH_OP_GDS_XCHG_RET}, + {DS_OP_XCHG_REL_RET, FETCH_OP_GDS_XCHG_REL_RET}, + {DS_OP_XCHG2_RET, FETCH_OP_GDS_XCHG2_RET}, + {DS_OP_CMP_XCHG_RET, FETCH_OP_GDS_CMP_XCHG_RET}, + {DS_OP_CMP_XCHG_SPF_RET, FETCH_OP_GDS_CMP_XCHG_SPF_RET}, + {DS_OP_READ_RET, FETCH_OP_GDS_READ_RET}, + {DS_OP_READ_REL_RET, FETCH_OP_GDS_READ_REL_RET}, + {DS_OP_READ2_RET, FETCH_OP_GDS_READ2_RET}, + {DS_OP_READWRITE_RET, FETCH_OP_GDS_READWRITE_RET}, + {DS_OP_BYTE_READ_RET, FETCH_OP_GDS_BYTE_READ_RET}, + {DS_OP_UBYTE_READ_RET, FETCH_OP_GDS_UBYTE_READ_RET}, + {DS_OP_SHORT_READ_RET, FETCH_OP_GDS_SHORT_READ_RET}, + {DS_OP_USHORT_READ_RET, FETCH_OP_GDS_USHORT_READ_RET}, + {DS_OP_ATOMIC_ORDERED_ALLOC_RET, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC}, + {DS_OP_INVALID, 0}, +}; + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h new file mode 100644 index 000000000..0c82032e6 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h @@ -0,0 +1,45 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#include "sfn_nir.h" + +struct r600_shader; +union r600_shader_key; + +namespace r600 { + +class AssemblyFromShaderLegacy : public AssemblyFromShader { +public: + AssemblyFromShaderLegacy(struct r600_shader *sh, r600_shader_key *key); + ~AssemblyFromShaderLegacy() override; +private: + bool do_lower(const std::vector<InstructionBlock> &ir) override ; + + struct AssemblyFromShaderLegacyImpl *impl; +}; + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.cpp new file mode 100644 index 000000000..28eef0593 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.cpp @@ -0,0 +1,1006 @@ +/* + * Copyright (c) 2017-2019 Gert Wollny + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_liverange.h" +#include "sfn_debug.h" +#include "sfn_value.h" +#include "sfn_value_gpr.h" + +#include "program/prog_instruction.h" +#include "util/bitscan.h" +#include "util/u_math.h" + +#include <limits> +#include <cstdlib> +#include <iomanip> + +/* std::sort is significantly faster than qsort */ +#include <algorithm> + +/* If <windows.h> is included this is defined and clashes with + * std::numeric_limits<>::max() + */ +#ifdef max +#undef max +#endif + + +namespace r600 { + +using std::numeric_limits; +using std::unique_ptr; +using std::setw; + +prog_scope_storage::prog_scope_storage(int n): + current_slot(0), + storage(n) +{ +} + +prog_scope_storage::~prog_scope_storage() +{ +} + +prog_scope* +prog_scope_storage::create(prog_scope *p, prog_scope_type type, int id, + int lvl, int s_begin) +{ + storage[current_slot] = prog_scope(p, type, id, lvl, s_begin); + return &storage[current_slot++]; +} + +prog_scope::prog_scope(prog_scope *parent, prog_scope_type type, int id, + int depth, int scope_begin): + scope_type(type), + scope_id(id), + scope_nesting_depth(depth), + scope_begin(scope_begin), + scope_end(-1), + break_loop_line(numeric_limits<int>::max()), + parent_scope(parent) +{ +} + +prog_scope::prog_scope(): + prog_scope(nullptr, undefined_scope, -1, -1, -1) +{ +} + +prog_scope_type prog_scope::type() const +{ + return scope_type; +} + +prog_scope *prog_scope::parent() const +{ + return parent_scope; +} + +int prog_scope::nesting_depth() const +{ + return scope_nesting_depth; +} + +bool prog_scope::is_loop() const +{ + return (scope_type == loop_body); +} + +bool prog_scope::is_in_loop() const +{ + if (scope_type == loop_body) + return true; + + if (parent_scope) + return parent_scope->is_in_loop(); + + return false; +} + +const prog_scope *prog_scope::innermost_loop() const +{ + if (scope_type == loop_body) + return this; + + if (parent_scope) + return parent_scope->innermost_loop(); + + return nullptr; +} + +const prog_scope *prog_scope::outermost_loop() const +{ + const prog_scope *loop = nullptr; + const prog_scope *p = this; + + do { + if (p->type() == loop_body) + loop = p; + p = p->parent(); + } while (p); + + return loop; +} + +bool prog_scope::is_child_of_ifelse_id_sibling(const prog_scope *scope) const +{ + const prog_scope *my_parent = in_parent_ifelse_scope(); + while (my_parent) { + /* is a direct child? */ + if (my_parent == scope) + return false; + /* is a child of the conditions sibling? */ + if (my_parent->id() == scope->id()) + return true; + my_parent = my_parent->in_parent_ifelse_scope(); + } + return false; +} + +bool prog_scope::is_child_of(const prog_scope *scope) const +{ + const prog_scope *my_parent = parent(); + while (my_parent) { + if (my_parent == scope) + return true; + my_parent = my_parent->parent(); + } + return false; +} + +const prog_scope *prog_scope::enclosing_conditional() const +{ + if (is_conditional()) + return this; + + if (parent_scope) + return parent_scope->enclosing_conditional(); + + return nullptr; +} + +bool prog_scope::contains_range_of(const prog_scope& other) const +{ + return (begin() <= other.begin()) && (end() >= other.end()); +} + +bool prog_scope::is_conditional() const +{ + return scope_type == if_branch || + scope_type == else_branch || + scope_type == switch_case_branch || + scope_type == switch_default_branch; +} + +const prog_scope *prog_scope::in_else_scope() const +{ + if (scope_type == else_branch) + return this; + + if (parent_scope) + return parent_scope->in_else_scope(); + + return nullptr; +} + +const prog_scope *prog_scope::in_parent_ifelse_scope() const +{ + if (parent_scope) + return parent_scope->in_ifelse_scope(); + else + return nullptr; +} + +const prog_scope *prog_scope::in_ifelse_scope() const +{ + if (scope_type == if_branch || + scope_type == else_branch) + return this; + + if (parent_scope) + return parent_scope->in_ifelse_scope(); + + return nullptr; +} + +bool prog_scope::is_switchcase_scope_in_loop() const +{ + return (scope_type == switch_case_branch || + scope_type == switch_default_branch) && + is_in_loop(); +} + +bool prog_scope::break_is_for_switchcase() const +{ + if (scope_type == loop_body) + return false; + + if (scope_type == switch_case_branch || + scope_type == switch_default_branch || + scope_type == switch_body) + return true; + + if (parent_scope) + return parent_scope->break_is_for_switchcase(); + + return false; +} + +int prog_scope::id() const +{ + return scope_id; +} + +int prog_scope::begin() const +{ + return scope_begin; +} + +int prog_scope::end() const +{ + return scope_end; +} + +void prog_scope::set_end(int end) +{ + if (scope_end == -1) + scope_end = end; +} + +void prog_scope::set_loop_break_line(int line) +{ + if (scope_type == loop_body) { + break_loop_line = MIN2(break_loop_line, line); + } else { + if (parent_scope) + parent()->set_loop_break_line(line); + } +} + +int prog_scope::loop_break_line() const +{ + return break_loop_line; +} + +temp_access::temp_access(): + access_mask(0), + needs_component_tracking(false), + is_array_element(false) +{ +} + +void temp_access::update_access_mask(int mask) +{ + if (access_mask && access_mask != mask) + needs_component_tracking = true; + access_mask |= mask; +} + +void temp_access::record_write(int line, prog_scope *scope, int writemask, bool is_array_elm) +{ + + + update_access_mask(writemask); + is_array_element |= is_array_elm; + + if (writemask & WRITEMASK_X) + comp[0].record_write(line, scope); + if (writemask & WRITEMASK_Y) + comp[1].record_write(line, scope); + if (writemask & WRITEMASK_Z) + comp[2].record_write(line, scope); + if (writemask & WRITEMASK_W) + comp[3].record_write(line, scope); +} + +void temp_access::record_read(int line, prog_scope *scope, int readmask, bool is_array_elm) +{ + update_access_mask(readmask); + is_array_element |= is_array_elm; + + if (readmask & WRITEMASK_X) + comp[0].record_read(line, scope); + if (readmask & WRITEMASK_Y) + comp[1].record_read(line, scope); + if (readmask & WRITEMASK_Z) + comp[2].record_read(line, scope); + if (readmask & WRITEMASK_W) + comp[3].record_read(line, scope); +} + +inline static register_live_range make_live_range(int b, int e) +{ + register_live_range lt; + lt.begin = b; + lt.end = e; + lt.is_array_elm = false; + return lt; +} + +register_live_range temp_access::get_required_live_range() +{ + register_live_range result = make_live_range(-1, -1); + + unsigned mask = access_mask; + while (mask) { + unsigned chan = u_bit_scan(&mask); + register_live_range lt = comp[chan].get_required_live_range(); + + if (lt.begin >= 0) { + if ((result.begin < 0) || (result.begin > lt.begin)) + result.begin = lt.begin; + } + + if (lt.end > result.end) + result.end = lt.end; + + if (!needs_component_tracking) + break; + } + result.is_array_elm = is_array_element; + + return result; +} + +const int +temp_comp_access::conditionality_untouched = std::numeric_limits<int>::max(); + +const int +temp_comp_access::write_is_unconditional = std::numeric_limits<int>::max() - 1; + + +temp_comp_access::temp_comp_access(): + last_read_scope(nullptr), + first_read_scope(nullptr), + first_write_scope(nullptr), + first_write(-1), + last_read(-1), + last_write(-1), + first_read(numeric_limits<int>::max()), + conditionality_in_loop_id(conditionality_untouched), + if_scope_write_flags(0), + next_ifelse_nesting_depth(0), + current_unpaired_if_write_scope(nullptr), + was_written_in_current_else_scope(false) +{ +} + +void temp_comp_access::record_read(int line, prog_scope *scope) +{ + last_read_scope = scope; + if (last_read < line) + last_read = line; + + if (first_read > line) { + first_read = line; + first_read_scope = scope; + } + + /* If the conditionality of the first write is already resolved then + * no further checks are required. + */ + if (conditionality_in_loop_id == write_is_unconditional || + conditionality_in_loop_id == write_is_conditional) + return; + + /* Check whether we are in a condition within a loop */ + const prog_scope *ifelse_scope = scope->in_ifelse_scope(); + const prog_scope *enclosing_loop; + if (ifelse_scope && (enclosing_loop = ifelse_scope->innermost_loop())) { + + /* If we have either not yet written to this register nor writes are + * resolved as unconditional in the enclosing loop then check whether + * we read before write in an IF/ELSE branch. + */ + if ((conditionality_in_loop_id != write_is_conditional) && + (conditionality_in_loop_id != enclosing_loop->id())) { + + if (current_unpaired_if_write_scope) { + + /* Has been written in this or a parent scope? - this makes the temporary + * unconditionally set at this point. + */ + if (scope->is_child_of(current_unpaired_if_write_scope)) + return; + + /* Has been written in the same scope before it was read? */ + if (ifelse_scope->type() == if_branch) { + if (current_unpaired_if_write_scope->id() == scope->id()) + return; + } else { + if (was_written_in_current_else_scope) + return; + } + } + + /* The temporary was read (conditionally) before it is written, hence + * it should survive a loop. This can be signaled like if it were + * conditionally written. + */ + conditionality_in_loop_id = write_is_conditional; + } + } +} + +void temp_comp_access::record_write(int line, prog_scope *scope) +{ + last_write = line; + + if (first_write < 0) { + first_write = line; + first_write_scope = scope; + + /* If the first write we encounter is not in a conditional branch, or + * the conditional write is not within a loop, then this is to be + * considered an unconditional dominant write. + */ + const prog_scope *conditional = scope->enclosing_conditional(); + if (!conditional || !conditional->innermost_loop()) { + conditionality_in_loop_id = write_is_unconditional; + } + } + + /* The conditionality of the first write is already resolved. */ + if (conditionality_in_loop_id == write_is_unconditional || + conditionality_in_loop_id == write_is_conditional) + return; + + /* If the nesting depth is larger than the supported level, + * then we assume conditional writes. + */ + if (next_ifelse_nesting_depth >= supported_ifelse_nesting_depth) { + conditionality_in_loop_id = write_is_conditional; + return; + } + + /* If we are in an IF/ELSE scope within a loop and the loop has not + * been resolved already, then record this write. + */ + const prog_scope *ifelse_scope = scope->in_ifelse_scope(); + if (ifelse_scope && ifelse_scope->innermost_loop() && + ifelse_scope->innermost_loop()->id() != conditionality_in_loop_id) + record_ifelse_write(*ifelse_scope); +} + +void temp_comp_access::record_ifelse_write(const prog_scope& scope) +{ + if (scope.type() == if_branch) { + /* The first write in an IF branch within a loop implies unresolved + * conditionality (if it was untouched or unconditional before). + */ + conditionality_in_loop_id = conditionality_unresolved; + was_written_in_current_else_scope = false; + record_if_write(scope); + } else { + was_written_in_current_else_scope = true; + record_else_write(scope); + } +} + +void temp_comp_access::record_if_write(const prog_scope& scope) +{ + /* Don't record write if this IF scope if it ... + * - is not the first write in this IF scope, + * - has already been written in a parent IF scope. + * In both cases this write is a secondary write that doesn't contribute + * to resolve conditionality. + * + * Record the write if it + * - is the first one (obviously), + * - happens in an IF branch that is a child of the ELSE branch of the + * last active IF/ELSE pair. In this case recording this write is used to + * established whether the write is (un-)conditional in the scope enclosing + * this outer IF/ELSE pair. + */ + if (!current_unpaired_if_write_scope || + (current_unpaired_if_write_scope->id() != scope.id() && + scope.is_child_of_ifelse_id_sibling(current_unpaired_if_write_scope))) { + if_scope_write_flags |= 1 << next_ifelse_nesting_depth; + current_unpaired_if_write_scope = &scope; + next_ifelse_nesting_depth++; + } +} + +void temp_comp_access::record_else_write(const prog_scope& scope) +{ + int mask = 1 << (next_ifelse_nesting_depth - 1); + + /* If the temporary was written in an IF branch on the same scope level + * and this branch is the sibling of this ELSE branch, then we have a + * pair of writes that makes write access to this temporary unconditional + * in the enclosing scope. + */ + + if ((if_scope_write_flags & mask) && + (scope.id() == current_unpaired_if_write_scope->id())) { + --next_ifelse_nesting_depth; + if_scope_write_flags &= ~mask; + + /* The following code deals with propagating unconditionality from + * inner levels of nested IF/ELSE to the outer levels like in + * + * 1: var t; + * 2: if (a) { <- start scope A + * 3: if (b) + * 4: t = ... + * 5: else + * 6: t = ... + * 7: } else { <- start scope B + * 8: if (c) + * 9: t = ... + * A: else <- start scope C + * B: t = ... + * C: } + * + */ + + const prog_scope *parent_ifelse = scope.parent()->in_ifelse_scope(); + + if (1 << (next_ifelse_nesting_depth - 1) & if_scope_write_flags) { + /* We are at the end of scope C and already recorded a write + * within an IF scope (A), the sibling of the parent ELSE scope B, + * and it is not yet resolved. Mark that as the last relevant + * IF scope. Below the write will be resolved for the A/B + * scope pair. + */ + current_unpaired_if_write_scope = parent_ifelse; + } else { + current_unpaired_if_write_scope = nullptr; + } + /* Promote the first write scope to the enclosing scope because + * the current IF/ELSE pair is now irrelevant for the analysis. + * This is also required to evaluate the minimum life time for t in + * { + * var t; + * if (a) + * t = ... + * else + * t = ... + * x = t; + * ... + * } + */ + first_write_scope = scope.parent(); + + /* If some parent is IF/ELSE and in a loop then propagate the + * write to that scope. Otherwise the write is unconditional + * because it happens in both corresponding IF/ELSE branches + * in this loop, and hence, record the loop id to signal the + * resolution. + */ + if (parent_ifelse && parent_ifelse->is_in_loop()) { + record_ifelse_write(*parent_ifelse); + } else { + conditionality_in_loop_id = scope.innermost_loop()->id(); + } + } else { + /* The temporary was not written in the IF branch corresponding + * to this ELSE branch, hence the write is conditional. + */ + conditionality_in_loop_id = write_is_conditional; + } +} + +bool temp_comp_access::conditional_ifelse_write_in_loop() const +{ + return conditionality_in_loop_id <= conditionality_unresolved; +} + +void temp_comp_access::propagate_live_range_to_dominant_write_scope() +{ + first_write = first_write_scope->begin(); + int lr = first_write_scope->end(); + + if (last_read < lr) + last_read = lr; +} + +register_live_range temp_comp_access::get_required_live_range() +{ + bool keep_for_full_loop = false; + + /* This register component is not used at all, or only read, + * mark it as unused and ignore it when renaming. + * glsl_to_tgsi_visitor::renumber_registers will take care of + * eliminating registers that are not written to. + */ + if (last_write < 0) + return make_live_range(-1, -1); + + assert(first_write_scope); + + /* Only written to, just make sure the register component is not + * reused in the range it is used to write to + */ + if (!last_read_scope) + return make_live_range(first_write, last_write + 1); + + const prog_scope *enclosing_scope_first_read = first_read_scope; + const prog_scope *enclosing_scope_first_write = first_write_scope; + + /* We read before writing in a loop + * hence the value must survive the loops + */ + if ((first_read <= first_write) && + first_read_scope->is_in_loop()) { + keep_for_full_loop = true; + enclosing_scope_first_read = first_read_scope->outermost_loop(); + } + + /* A conditional write within a (nested) loop must survive the outermost + * loop if the last read was not within the same scope. + */ + const prog_scope *conditional = enclosing_scope_first_write->enclosing_conditional(); + if (conditional && !conditional->contains_range_of(*last_read_scope) && + (conditional->is_switchcase_scope_in_loop() || + conditional_ifelse_write_in_loop())) { + keep_for_full_loop = true; + enclosing_scope_first_write = conditional->outermost_loop(); + } + + /* Evaluate the scope that is shared by all: required first write scope, + * required first read before write scope, and last read scope. + */ + const prog_scope *enclosing_scope = enclosing_scope_first_read; + if (enclosing_scope_first_write->contains_range_of(*enclosing_scope)) + enclosing_scope = enclosing_scope_first_write; + + if (last_read_scope->contains_range_of(*enclosing_scope)) + enclosing_scope = last_read_scope; + + while (!enclosing_scope->contains_range_of(*enclosing_scope_first_write) || + !enclosing_scope->contains_range_of(*last_read_scope)) { + enclosing_scope = enclosing_scope->parent(); + assert(enclosing_scope); + } + + /* Propagate the last read scope to the target scope */ + while (enclosing_scope->nesting_depth() < last_read_scope->nesting_depth()) { + /* If the read is in a loop and we have to move up the scope we need to + * extend the live range to the end of this current loop because at this + * point we don't know whether the component was written before + * un-conditionally in the same loop. + */ + if (last_read_scope->is_loop()) + last_read = last_read_scope->end(); + + last_read_scope = last_read_scope->parent(); + } + + /* If the variable has to be kept for the whole loop, and we + * are currently in a loop, then propagate the live range. + */ + if (keep_for_full_loop && first_write_scope->is_loop()) + propagate_live_range_to_dominant_write_scope(); + + /* Propagate the first_dominant_write scope to the target scope */ + while (enclosing_scope->nesting_depth() < first_write_scope->nesting_depth()) { + /* Propagate live_range if there was a break in a loop and the write was + * after the break inside that loop. Note, that this is only needed if + * we move up in the scopes. + */ + if (first_write_scope->loop_break_line() < first_write) { + keep_for_full_loop = true; + propagate_live_range_to_dominant_write_scope(); + } + + first_write_scope = first_write_scope->parent(); + + /* Propagate live_range if we are now in a loop */ + if (keep_for_full_loop && first_write_scope->is_loop()) + propagate_live_range_to_dominant_write_scope(); + } + + /* The last write past the last read is dead code, but we have to + * ensure that the component is not reused too early, hence extend the + * live_range past the last write. + */ + if (last_write >= last_read) + last_read = last_write + 1; + + /* Here we are at the same scope, all is resolved */ + return make_live_range(first_write, last_read); +} + +/* Helper class for sorting and searching the registers based + * on live ranges. */ +class register_merge_record { +public: + int begin; + int end; + int reg; + bool erase; + bool is_array_elm; + + bool operator < (const register_merge_record& rhs) const { + return begin < rhs.begin; + } +}; + +LiverangeEvaluator::LiverangeEvaluator(): + line(0), + loop_id(1), + if_id(1), + switch_id(0), + is_at_end(false), + n_scopes(1), + cur_scope(nullptr) +{ +} + +void LiverangeEvaluator::run(const Shader& shader, + std::vector<register_live_range>& register_live_ranges) +{ + temp_acc.resize(register_live_ranges.size()); + fill(temp_acc.begin(), temp_acc.end(), temp_access()); + + sfn_log << SfnLog::merge << "have " << temp_acc.size() << " temps\n"; + + for (const auto& block: shader.m_ir) { + for (const auto& ir: block) { + switch (ir->type()) { + case Instruction::cond_if: + case Instruction::cond_else: + case Instruction::loop_begin: + ++n_scopes; + default: + ; + } + } + } + + scopes.reset(new prog_scope_storage(n_scopes)); + + cur_scope = scopes->create(nullptr, outer_scope, 0, 0, line); + + line = 0; + + for (auto& v: shader.m_temp) { + if (v.second->type() == Value::gpr) { + sfn_log << SfnLog::merge << "Record " << *v.second << "\n"; + const auto& g = static_cast<const GPRValue&>(*v.second); + if (g.is_input()) { + sfn_log << SfnLog::merge << "Record INPUT write for " + << g << " in " << temp_acc.size() << " temps\n"; + temp_acc[g.sel()].record_write(line, cur_scope, 1 << g.chan(), false); + temp_acc[g.sel()].record_read(line, cur_scope, 1 << g.chan(), false); + } + if (g.keep_alive()) { + sfn_log << SfnLog::merge << "Record KEEP ALIVE for " + << g << " in " << temp_acc.size() << " temps\n"; + temp_acc[g.sel()].record_read(0x7fffff, cur_scope, 1 << g.chan(), false); + } + } + } + + for (const auto& block: shader.m_ir) + for (const auto& ir: block) { + ir->evalue_liveness(*this); + if (ir->type() != Instruction::alu || + static_cast<const AluInstruction&>(*ir).flag(alu_last_instr)) + ++line; + } + + assert(cur_scope->type() == outer_scope); + cur_scope->set_end(line); + is_at_end = true; + + get_required_live_ranges(register_live_ranges); +} + + +void LiverangeEvaluator::record_read(const Value& src, bool is_array_elm) +{ + sfn_log << SfnLog::merge << "Record read l:" << line << " reg:" << src << "\n"; + if (src.type() == Value::gpr) { + const GPRValue& v = static_cast<const GPRValue&>(src); + if (v.chan() < 4) + temp_acc[v.sel()].record_read(v.keep_alive() ? 0x7fffff: line, cur_scope, 1 << v.chan(), is_array_elm); + return; + } else if (src.type() == Value::gpr_array_value) { + const GPRArrayValue& v = static_cast<const GPRArrayValue&>(src); + v.record_read(*this); + } else if (src.type() == Value::kconst) { + const UniformValue& v = static_cast<const UniformValue&>(src); + if (v.addr()) + record_read(*v.addr(),is_array_elm); + } +} + +void LiverangeEvaluator::record_write(const Value& src, bool is_array_elm) +{ + sfn_log << SfnLog::merge << "Record write for " + << src << " in " << temp_acc.size() << " temps\n"; + + if (src.type() == Value::gpr) { + const GPRValue& v = static_cast<const GPRValue&>(src); + assert(v.sel() < temp_acc.size()); + if (v.chan() < 4) + temp_acc[v.sel()].record_write(line, cur_scope, 1 << v.chan(), is_array_elm); + return; + } else if (src.type() == Value::gpr_array_value) { + const GPRArrayValue& v = static_cast<const GPRArrayValue&>(src); + v.record_write(*this); + } else if (src.type() == Value::kconst) { + const UniformValue& v = static_cast<const UniformValue&>(src); + if (v.addr()) + record_write(*v.addr(),is_array_elm); + } +} + +void LiverangeEvaluator::record_read(const GPRVector& src) +{ + for (int i = 0; i < 4; ++i) + if (src.reg_i(i)) + record_read(*src.reg_i(i)); +} + +void LiverangeEvaluator::record_write(const GPRVector& dst) +{ + for (int i = 0; i < 4; ++i) + if (dst.reg_i(i)) + record_write(*dst.reg_i(i)); +} + +void LiverangeEvaluator::get_required_live_ranges(std::vector<register_live_range>& register_live_ranges) +{ + sfn_log << SfnLog::merge << "== register live ranges ==========\n"; + for(unsigned i = 0; i < register_live_ranges.size(); ++i) { + sfn_log << SfnLog::merge << setw(4) << i; + register_live_ranges[i] = temp_acc[i].get_required_live_range(); + sfn_log << SfnLog::merge << ": [" << register_live_ranges[i].begin << ", " + << register_live_ranges[i].end << "]\n"; + } + sfn_log << SfnLog::merge << "==================================\n\n"; +} + +void LiverangeEvaluator::scope_if() +{ + cur_scope = scopes->create(cur_scope, if_branch, if_id++, + cur_scope->nesting_depth() + 1, line + 1); +} + +void LiverangeEvaluator::scope_else() +{ + assert(cur_scope->type() == if_branch); + cur_scope->set_end(line - 1); + cur_scope = scopes->create(cur_scope->parent(), else_branch, + cur_scope->id(), cur_scope->nesting_depth(), + line + 1); +} + +void LiverangeEvaluator::scope_endif() +{ + cur_scope->set_end(line - 1); + cur_scope = cur_scope->parent(); + assert(cur_scope); +} + +void LiverangeEvaluator::scope_loop_begin() +{ + cur_scope = scopes->create(cur_scope, loop_body, loop_id++, + cur_scope->nesting_depth() + 1, line); +} + +void LiverangeEvaluator::scope_loop_end() +{ + assert(cur_scope->type() == loop_body); + cur_scope->set_end(line); + cur_scope = cur_scope->parent(); + assert(cur_scope); +} + +void LiverangeEvaluator::scope_loop_break() +{ + cur_scope->set_loop_break_line(line); +} + +/* This functions evaluates the register merges by using a binary + * search to find suitable merge candidates. */ + +std::vector<rename_reg_pair> +get_temp_registers_remapping(const std::vector<register_live_range>& live_ranges) +{ + + std::vector<rename_reg_pair> result(live_ranges.size(), rename_reg_pair{false, false, 0}); + std::vector<register_merge_record> reg_access; + + for (unsigned i = 0; i < live_ranges.size(); ++i) { + if (live_ranges[i].begin >= 0) { + register_merge_record r; + r.begin = live_ranges[i].begin; + r.end = live_ranges[i].end; + r.is_array_elm = live_ranges[i].is_array_elm; + r.reg = i; + r.erase = false; + reg_access.push_back(r); + } + } + + std::sort(reg_access.begin(), reg_access.end()); + + for (auto& r : reg_access) + sfn_log << SfnLog::merge << "Use Range " <<r.reg << " [" + << r.begin << ", " << r.end << "]\n"; + + auto trgt = reg_access.begin(); + auto reg_access_end = reg_access.end(); + auto first_erase = reg_access_end; + auto search_start = trgt + 1; + + while (trgt != reg_access_end) { + /* Find the next register that has a live-range starting past the + * search start and that is not an array element. Array elements can't + * be moved (Moving the whole array could be an option to be implemented later)*/ + + sfn_log << SfnLog::merge << "Next target is " + << trgt->reg << "[" << trgt->begin << ", " << trgt->end << "]\n"; + + + auto src = upper_bound(search_start, reg_access_end, trgt->end, + [](int bound, const register_merge_record& m){ + return bound < m.begin && !m.is_array_elm;} + ); + + if (src != reg_access_end) { + result[src->reg].new_reg = trgt->reg; + result[src->reg].valid = true; + + sfn_log << SfnLog::merge << "Map " + << src->reg << "[" << src->begin << ", " << src->end << "] to " + << trgt->reg << "[" << trgt->begin << ", " << trgt->end << ":"; + trgt->end = src->end; + sfn_log << SfnLog::merge << trgt->end << "]\n"; + + /* Since we only search forward, don't remove the renamed + * register just now, only mark it. */ + src->erase = true; + + if (first_erase == reg_access_end) + first_erase = src; + + search_start = src + 1; + } else { + /* Moving to the next target register it is time to remove + * the already merged registers from the search range */ + if (first_erase != reg_access_end) { + auto outp = first_erase; + auto inp = first_erase + 1; + + while (inp != reg_access_end) { + if (!inp->erase) + *outp++ = *inp; + ++inp; + } + + reg_access_end = outp; + first_erase = reg_access_end; + } + ++trgt; + search_start = trgt + 1; + } + } + return result; +} + +} // end ns r600 diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.h new file mode 100644 index 000000000..8b9ed2ef2 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.h @@ -0,0 +1,314 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_LIVERANGE_H +#define SFN_LIVERANGE_H + +#include <cstdint> +#include <ostream> +#include <vector> +#include <limits> + +#include "sfn_instruction_base.h" +#include "sfn_nir.h" + +namespace r600 { + +/** Storage to record the required live range of a temporary register + * begin == end == -1 indicates that the register can be reused without + * limitations. Otherwise, "begin" indicates the first instruction in which + * a write operation may target this temporary, and end indicates the + * last instruction in which a value can be read from this temporary. + * Hence, a register R2 can be merged with a register R1 if R1.end <= R2.begin. + */ +struct register_live_range { + int begin; + int end; + bool is_array_elm; +}; + +enum prog_scope_type { + outer_scope, /* Outer program scope */ + loop_body, /* Inside a loop */ + if_branch, /* Inside if branch */ + else_branch, /* Inside else branch */ + switch_body, /* Inside switch statement */ + switch_case_branch, /* Inside switch case statement */ + switch_default_branch, /* Inside switch default statement */ + undefined_scope +}; + +class prog_scope { +public: + prog_scope(); + prog_scope(prog_scope *parent, prog_scope_type type, int id, + int depth, int begin); + + prog_scope_type type() const; + prog_scope *parent() const; + int nesting_depth() const; + int id() const; + int end() const; + int begin() const; + int loop_break_line() const; + + const prog_scope *in_else_scope() const; + const prog_scope *in_ifelse_scope() const; + const prog_scope *in_parent_ifelse_scope() const; + const prog_scope *innermost_loop() const; + const prog_scope *outermost_loop() const; + const prog_scope *enclosing_conditional() const; + + bool is_loop() const; + bool is_in_loop() const; + bool is_switchcase_scope_in_loop() const; + bool is_conditional() const; + bool is_child_of(const prog_scope *scope) const; + bool is_child_of_ifelse_id_sibling(const prog_scope *scope) const; + + bool break_is_for_switchcase() const; + bool contains_range_of(const prog_scope& other) const; + + void set_end(int end); + void set_loop_break_line(int line); + +private: + prog_scope_type scope_type; + int scope_id; + int scope_nesting_depth; + int scope_begin; + int scope_end; + int break_loop_line; + prog_scope *parent_scope; +}; + +/* Some storage class to encapsulate the prog_scope (de-)allocations */ +class prog_scope_storage { +public: + prog_scope_storage(int n); + ~prog_scope_storage(); + prog_scope * create(prog_scope *p, prog_scope_type type, int id, + int lvl, int s_begin); +private: + int current_slot; + std::vector<prog_scope> storage; +}; + +/* Class to track the access to a component of a temporary register. */ + +class temp_comp_access { +public: + temp_comp_access(); + + void record_read(int line, prog_scope *scope); + void record_write(int line, prog_scope *scope); + register_live_range get_required_live_range(); +private: + void propagate_live_range_to_dominant_write_scope(); + bool conditional_ifelse_write_in_loop() const; + + void record_ifelse_write(const prog_scope& scope); + void record_if_write(const prog_scope& scope); + void record_else_write(const prog_scope& scope); + + prog_scope *last_read_scope; + prog_scope *first_read_scope; + prog_scope *first_write_scope; + + int first_write; + int last_read; + int last_write; + int first_read; + + /* This member variable tracks the current resolution of conditional writing + * to this temporary in IF/ELSE clauses. + * + * The initial value "conditionality_untouched" indicates that this + * temporary has not yet been written to within an if clause. + * + * A positive (other than "conditionality_untouched") number refers to the + * last loop id for which the write was resolved as unconditional. With each + * new loop this value will be overwitten by "conditionality_unresolved" + * on entering the first IF clause writing this temporary. + * + * The value "conditionality_unresolved" indicates that no resolution has + * been achieved so far. If the variable is set to this value at the end of + * the processing of the whole shader it also indicates a conditional write. + * + * The value "write_is_conditional" marks that the variable is written + * conditionally (i.e. not in all relevant IF/ELSE code path pairs) in at + * least one loop. + */ + int conditionality_in_loop_id; + + /* Helper constants to make the tracking code more readable. */ + static const int write_is_conditional = -1; + static const int conditionality_unresolved = 0; + static const int conditionality_untouched; + static const int write_is_unconditional; + + /* A bit field tracking the nexting levels of if-else clauses where the + * temporary has (so far) been written to in the if branch, but not in the + * else branch. + */ + unsigned int if_scope_write_flags; + + int next_ifelse_nesting_depth; + static const int supported_ifelse_nesting_depth = 32; + + /* Tracks the last if scope in which the temporary was written to + * without a write in the corresponding else branch. Is also used + * to track read-before-write in the according scope. + */ + const prog_scope *current_unpaired_if_write_scope; + + /* Flag to resolve read-before-write in the else scope. */ + bool was_written_in_current_else_scope; +}; + +/* Class to track the access to all components of a temporary register. */ +class temp_access { +public: + temp_access(); + void record_read(int line, prog_scope *scope, int swizzle, bool is_array_elm); + void record_write(int line, prog_scope *scope, int writemask, bool is_array_elm); + register_live_range get_required_live_range(); +private: + void update_access_mask(int mask); + + temp_comp_access comp[4]; + int access_mask; + bool needs_component_tracking; + bool is_array_element; +}; + +/* Helper class to merge the live ranges of an arrays. + * + * For arrays the array length, live range, and component access needs to + * be kept, because when live ranges are merged or arrays are interleaved + * one can only merge or interleave an array into another with equal or more + * elements. For interleaving it is also required that the sum of used swizzles + * is at most four. + */ + +class array_live_range { +public: + array_live_range(); + array_live_range(unsigned aid, unsigned alength); + array_live_range(unsigned aid, unsigned alength, int first_access, + int last_access, int mask); + + void set_live_range(int first_access, int last_access); + void set_begin(int _begin){first_access = _begin;} + void set_end(int _end){last_access = _end;} + void set_access_mask(int s); + + static void merge(array_live_range *a, array_live_range *b); + static void interleave(array_live_range *a, array_live_range *b); + + int array_id() const {return id;} + int target_array_id() const {return target_array ? target_array->id : 0;} + const array_live_range *final_target() const {return target_array ? + target_array->final_target() : this;} + unsigned array_length() const { return length;} + int begin() const { return first_access;} + int end() const { return last_access;} + int access_mask() const { return component_access_mask;} + int used_components() const {return used_component_count;} + + bool time_doesnt_overlap(const array_live_range& other) const; + + void print(std::ostream& os) const; + + bool is_mapped() const { return target_array != nullptr;} + + int8_t remap_one_swizzle(int8_t idx) const; + +private: + void init_swizzles(); + void set_target(array_live_range *target); + void merge_live_range_from(array_live_range *other); + void interleave_into(array_live_range *other); + + unsigned id; + unsigned length; + int first_access; + int last_access; + uint8_t component_access_mask; + uint8_t used_component_count; + array_live_range *target_array; + int8_t swizzle_map[4]; +}; + + + +class LiverangeEvaluator { +public: + LiverangeEvaluator(); + + void run(const Shader& shader, + std::vector<register_live_range> ®ister_live_ranges); + + void scope_if(); + void scope_else(); + void scope_endif(); + void scope_loop_begin(); + void scope_loop_end(); + void scope_loop_break(); + + void record_read(const Value& src, bool is_array_elm = false); + void record_write(const Value& dst, bool is_array_elm = false); + + void record_read(const GPRVector& src); + void record_write(const GPRVector& dst); + +private: + + prog_scope *create_scope(prog_scope *parent, prog_scope_type type, int id, + int lvl, int s_begin); + + + void get_required_live_ranges(std::vector<register_live_range>& register_live_ranges); + + int line; + int loop_id; + int if_id; + int switch_id; + bool is_at_end; + int n_scopes; + std::unique_ptr<prog_scope_storage> scopes; + prog_scope *cur_scope; + + std::vector<temp_access> temp_acc; + +}; + +std::vector<rename_reg_pair> +get_temp_registers_remapping(const std::vector<register_live_range>& live_ranges); + +} // end namespace r600 + +#endif diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.cpp new file mode 100644 index 000000000..b421f838c --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -0,0 +1,1076 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_nir.h" +#include "nir_builder.h" + +#include "../r600_pipe.h" +#include "../r600_shader.h" + +#include "sfn_instruction_tex.h" + +#include "sfn_shader_vertex.h" +#include "sfn_shader_fragment.h" +#include "sfn_shader_geometry.h" +#include "sfn_shader_compute.h" +#include "sfn_shader_tcs.h" +#include "sfn_shader_tess_eval.h" +#include "sfn_nir_lower_fs_out_to_vector.h" +#include "sfn_ir_to_assembly.h" + +#include <vector> + +namespace r600 { + +using std::vector; + + +NirLowerInstruction::NirLowerInstruction(): + b(nullptr) +{ + +} + +bool NirLowerInstruction::filter_instr(const nir_instr *instr, const void *data) +{ + auto me = reinterpret_cast<const NirLowerInstruction*>(data); + return me->filter(instr); +} + +nir_ssa_def *NirLowerInstruction::lower_instr(nir_builder *b, nir_instr *instr, void *data) +{ + auto me = reinterpret_cast<NirLowerInstruction*>(data); + me->set_builder(b); + return me->lower(instr); +} + +bool NirLowerInstruction::run(nir_shader *shader) +{ + return nir_shader_lower_instructions(shader, + filter_instr, + lower_instr, + (void *)this); +} + + +ShaderFromNir::ShaderFromNir():sh(nullptr), + chip_class(CLASS_UNKNOWN), + m_current_if_id(0), + m_current_loop_id(0), + scratch_size(0) +{ +} + +bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shader, + r600_pipe_shader_selector *sel, r600_shader_key& key, + struct r600_shader* gs_shader, enum chip_class _chip_class) +{ + sh = shader; + chip_class = _chip_class; + assert(sh); + + switch (shader->info.stage) { + case MESA_SHADER_VERTEX: + impl.reset(new VertexShaderFromNir(pipe_shader, *sel, key, gs_shader, chip_class)); + break; + case MESA_SHADER_TESS_CTRL: + sfn_log << SfnLog::trans << "Start TCS\n"; + impl.reset(new TcsShaderFromNir(pipe_shader, *sel, key, chip_class)); + break; + case MESA_SHADER_TESS_EVAL: + sfn_log << SfnLog::trans << "Start TESS_EVAL\n"; + impl.reset(new TEvalShaderFromNir(pipe_shader, *sel, key, gs_shader, chip_class)); + break; + case MESA_SHADER_GEOMETRY: + sfn_log << SfnLog::trans << "Start GS\n"; + impl.reset(new GeometryShaderFromNir(pipe_shader, *sel, key, chip_class)); + break; + case MESA_SHADER_FRAGMENT: + sfn_log << SfnLog::trans << "Start FS\n"; + impl.reset(new FragmentShaderFromNir(*shader, pipe_shader->shader, *sel, key, chip_class)); + break; + case MESA_SHADER_COMPUTE: + sfn_log << SfnLog::trans << "Start CS\n"; + impl.reset(new ComputeShaderFromNir(pipe_shader, *sel, key, chip_class)); + break; + default: + return false; + } + + sfn_log << SfnLog::trans << "Process declarations\n"; + if (!process_declaration()) + return false; + + // at this point all functions should be inlined + const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sh->functions)); + + sfn_log << SfnLog::trans << "Scan shader\n"; + + if (sfn_log.has_debug_flag(SfnLog::instr)) + nir_print_shader(const_cast<nir_shader *>(shader), stderr); + + nir_foreach_block(block, func->impl) { + nir_foreach_instr(instr, block) { + if (!impl->scan_instruction(instr)) { + fprintf(stderr, "Unhandled sysvalue access "); + nir_print_instr(instr, stderr); + fprintf(stderr, "\n"); + return false; + } + } + } + + sfn_log << SfnLog::trans << "Reserve registers\n"; + if (!impl->allocate_reserved_registers()) { + return false; + } + + ValuePool::array_list arrays; + sfn_log << SfnLog::trans << "Allocate local registers\n"; + foreach_list_typed(nir_register, reg, node, &func->impl->registers) { + impl->allocate_local_register(*reg, arrays); + } + + sfn_log << SfnLog::trans << "Emit shader start\n"; + impl->allocate_arrays(arrays); + + impl->emit_shader_start(); + + sfn_log << SfnLog::trans << "Process shader \n"; + foreach_list_typed(nir_cf_node, node, node, &func->impl->body) { + if (!process_cf_node(node)) + return false; + } + + // Add optimizations here + sfn_log << SfnLog::trans << "Finalize\n"; + impl->finalize(); + + impl->get_array_info(pipe_shader->shader); + + if (!sfn_log.has_debug_flag(SfnLog::nomerge)) { + sfn_log << SfnLog::trans << "Merge registers\n"; + impl->remap_registers(); + } + + sfn_log << SfnLog::trans << "Finished translating to R600 IR\n"; + return true; +} + +Shader ShaderFromNir::shader() const +{ + return Shader{impl->m_output, impl->get_temp_registers()}; +} + + +bool ShaderFromNir::process_cf_node(nir_cf_node *node) +{ + SFN_TRACE_FUNC(SfnLog::flow, "CF"); + switch (node->type) { + case nir_cf_node_block: + return process_block(nir_cf_node_as_block(node)); + case nir_cf_node_if: + return process_if(nir_cf_node_as_if(node)); + case nir_cf_node_loop: + return process_loop(nir_cf_node_as_loop(node)); + default: + return false; + } +} + +bool ShaderFromNir::process_if(nir_if *if_stmt) +{ + SFN_TRACE_FUNC(SfnLog::flow, "IF"); + + if (!impl->emit_if_start(m_current_if_id, if_stmt)) + return false; + + int if_id = m_current_if_id++; + m_if_stack.push(if_id); + + foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list) + if (!process_cf_node(n)) return false; + + if (!if_stmt->then_list.is_empty()) { + if (!impl->emit_else_start(if_id)) + return false; + + foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list) + if (!process_cf_node(n)) return false; + } + + if (!impl->emit_ifelse_end(if_id)) + return false; + + m_if_stack.pop(); + return true; +} + +bool ShaderFromNir::process_loop(nir_loop *node) +{ + SFN_TRACE_FUNC(SfnLog::flow, "LOOP"); + int loop_id = m_current_loop_id++; + + if (!impl->emit_loop_start(loop_id)) + return false; + + foreach_list_typed(nir_cf_node, n, node, &node->body) + if (!process_cf_node(n)) return false; + + if (!impl->emit_loop_end(loop_id)) + return false; + + return true; +} + +bool ShaderFromNir::process_block(nir_block *block) +{ + SFN_TRACE_FUNC(SfnLog::flow, "BLOCK"); + nir_foreach_instr(instr, block) { + int r = emit_instruction(instr); + if (!r) { + sfn_log << SfnLog::err << "R600: Unsupported instruction: " + << *instr << "\n"; + return false; + } + } + return true; +} + + +ShaderFromNir::~ShaderFromNir() +{ +} + +pipe_shader_type ShaderFromNir::processor_type() const +{ + return impl->m_processor_type; +} + + +bool ShaderFromNir::emit_instruction(nir_instr *instr) +{ + assert(impl); + + sfn_log << SfnLog::instr << "Read instruction " << *instr << "\n"; + + switch (instr->type) { + case nir_instr_type_alu: + return impl->emit_alu_instruction(instr); + case nir_instr_type_deref: + return impl->emit_deref_instruction(nir_instr_as_deref(instr)); + case nir_instr_type_intrinsic: + return impl->emit_intrinsic_instruction(nir_instr_as_intrinsic(instr)); + case nir_instr_type_load_const: /* const values are loaded when needed */ + return true; + case nir_instr_type_tex: + return impl->emit_tex_instruction(instr); + case nir_instr_type_jump: + return impl->emit_jump_instruction(nir_instr_as_jump(instr)); + default: + fprintf(stderr, "R600: %s: ShaderFromNir Unsupported instruction: type %d:'", __func__, instr->type); + nir_print_instr(instr, stderr); + fprintf(stderr, "'\n"); + return false; + case nir_instr_type_ssa_undef: + return impl->create_undef(nir_instr_as_ssa_undef(instr)); + return true; + } +} + +bool ShaderFromNir::process_declaration() +{ + + if (!impl->scan_inputs_read(sh)) + return false; + + // scan declarations + nir_foreach_variable_with_modes(variable, sh, nir_var_uniform | + nir_var_mem_ubo | + nir_var_mem_ssbo) { + if (!impl->process_uniforms(variable)) { + fprintf(stderr, "R600: error parsing outputs variable %s\n", variable->name); + return false; + } + } + + return true; +} + +const std::vector<InstructionBlock>& ShaderFromNir::shader_ir() const +{ + assert(impl); + return impl->m_output; +} + + +AssemblyFromShader::~AssemblyFromShader() +{ +} + +bool AssemblyFromShader::lower(const std::vector<InstructionBlock>& ir) +{ + return do_lower(ir); +} + +static nir_ssa_def * +r600_nir_lower_pack_unpack_2x16_impl(nir_builder *b, nir_instr *instr, void *_options) +{ + nir_alu_instr *alu = nir_instr_as_alu(instr); + + switch (alu->op) { + case nir_op_unpack_half_2x16: { + nir_ssa_def *packed = nir_ssa_for_alu_src(b, alu, 0); + return nir_vec2(b, nir_unpack_half_2x16_split_x(b, packed), + nir_unpack_half_2x16_split_y(b, packed)); + + } + case nir_op_pack_half_2x16: { + nir_ssa_def *src_vec2 = nir_ssa_for_alu_src(b, alu, 0); + return nir_pack_half_2x16_split(b, nir_channel(b, src_vec2, 0), + nir_channel(b, src_vec2, 1)); + } + default: + return nullptr; + } +} + +bool r600_nir_lower_pack_unpack_2x16_filter(const nir_instr *instr, const void *_options) +{ + return instr->type == nir_instr_type_alu; +} + +bool r600_nir_lower_pack_unpack_2x16(nir_shader *shader) +{ + return nir_shader_lower_instructions(shader, + r600_nir_lower_pack_unpack_2x16_filter, + r600_nir_lower_pack_unpack_2x16_impl, + nullptr); +}; + +static void +r600_nir_lower_scratch_address_impl(nir_builder *b, nir_intrinsic_instr *instr) +{ + b->cursor = nir_before_instr(&instr->instr); + + int address_index = 0; + int align; + + if (instr->intrinsic == nir_intrinsic_store_scratch) { + align = instr->src[0].ssa->num_components; + address_index = 1; + } else{ + align = instr->dest.ssa.num_components; + } + + nir_ssa_def *address = instr->src[address_index].ssa; + nir_ssa_def *new_address = nir_ishr(b, address, nir_imm_int(b, 4 * align)); + + nir_instr_rewrite_src(&instr->instr, &instr->src[address_index], + nir_src_for_ssa(new_address)); +} + +bool r600_lower_scratch_addresses(nir_shader *shader) +{ + bool progress = false; + nir_foreach_function(function, shader) { + nir_builder build; + nir_builder_init(&build, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr); + if (op->intrinsic != nir_intrinsic_load_scratch && + op->intrinsic != nir_intrinsic_store_scratch) + continue; + r600_nir_lower_scratch_address_impl(&build, op); + progress = true; + } + } + } + return progress; +} + +static void +insert_uniform_sorted(struct exec_list *var_list, nir_variable *new_var) +{ + nir_foreach_variable_in_list(var, var_list) { + if (var->data.binding > new_var->data.binding || + (var->data.binding == new_var->data.binding && + var->data.offset > new_var->data.offset)) { + exec_node_insert_node_before(&var->node, &new_var->node); + return; + } + } + exec_list_push_tail(var_list, &new_var->node); +} + +void sort_uniforms(nir_shader *shader) +{ + struct exec_list new_list; + exec_list_make_empty(&new_list); + + nir_foreach_uniform_variable_safe(var, shader) { + exec_node_remove(&var->node); + insert_uniform_sorted(&new_list, var); + } + exec_list_append(&shader->variables, &new_list); +} + +static void +insert_fsoutput_sorted(struct exec_list *var_list, nir_variable *new_var) +{ + + nir_foreach_variable_in_list(var, var_list) { + if (var->data.location > new_var->data.location || + (var->data.location == new_var->data.location && + var->data.index > new_var->data.index)) { + exec_node_insert_node_before(&var->node, &new_var->node); + return; + } + } + + exec_list_push_tail(var_list, &new_var->node); +} + +void sort_fsoutput(nir_shader *shader) +{ + struct exec_list new_list; + exec_list_make_empty(&new_list); + + nir_foreach_shader_out_variable_safe(var, shader) { + exec_node_remove(&var->node); + insert_fsoutput_sorted(&new_list, var); + } + + unsigned driver_location = 0; + nir_foreach_variable_in_list(var, &new_list) + var->data.driver_location = driver_location++; + + exec_list_append(&shader->variables, &new_list); +} + +} + +static nir_intrinsic_op +r600_map_atomic(nir_intrinsic_op op) +{ + switch (op) { + case nir_intrinsic_atomic_counter_read_deref: + return nir_intrinsic_atomic_counter_read; + case nir_intrinsic_atomic_counter_inc_deref: + return nir_intrinsic_atomic_counter_inc; + case nir_intrinsic_atomic_counter_pre_dec_deref: + return nir_intrinsic_atomic_counter_pre_dec; + case nir_intrinsic_atomic_counter_post_dec_deref: + return nir_intrinsic_atomic_counter_post_dec; + case nir_intrinsic_atomic_counter_add_deref: + return nir_intrinsic_atomic_counter_add; + case nir_intrinsic_atomic_counter_min_deref: + return nir_intrinsic_atomic_counter_min; + case nir_intrinsic_atomic_counter_max_deref: + return nir_intrinsic_atomic_counter_max; + case nir_intrinsic_atomic_counter_and_deref: + return nir_intrinsic_atomic_counter_and; + case nir_intrinsic_atomic_counter_or_deref: + return nir_intrinsic_atomic_counter_or; + case nir_intrinsic_atomic_counter_xor_deref: + return nir_intrinsic_atomic_counter_xor; + case nir_intrinsic_atomic_counter_exchange_deref: + return nir_intrinsic_atomic_counter_exchange; + case nir_intrinsic_atomic_counter_comp_swap_deref: + return nir_intrinsic_atomic_counter_comp_swap; + default: + return nir_num_intrinsics; + } +} + +static bool +r600_lower_deref_instr(nir_builder *b, nir_intrinsic_instr *instr, + nir_shader *shader) +{ + nir_intrinsic_op op = r600_map_atomic(instr->intrinsic); + if (nir_num_intrinsics == op) + return false; + + nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + + if (var->data.mode != nir_var_uniform && + var->data.mode != nir_var_mem_ssbo && + var->data.mode != nir_var_mem_shared) + return false; /* atomics passed as function arguments can't be lowered */ + + const unsigned idx = var->data.binding; + + b->cursor = nir_before_instr(&instr->instr); + + nir_ssa_def *offset = nir_imm_int(b, var->data.index); + for (nir_deref_instr *d = deref; d->deref_type != nir_deref_type_var; + d = nir_deref_instr_parent(d)) { + assert(d->deref_type == nir_deref_type_array); + assert(d->arr.index.is_ssa); + + unsigned array_stride = 1; + if (glsl_type_is_array(d->type)) + array_stride *= glsl_get_aoa_size(d->type); + + offset = nir_iadd(b, offset, nir_imul(b, d->arr.index.ssa, + nir_imm_int(b, array_stride))); + } + + /* Since the first source is a deref and the first source in the lowered + * instruction is the offset, we can just swap it out and change the + * opcode. + */ + instr->intrinsic = op; + nir_instr_rewrite_src(&instr->instr, &instr->src[0], + nir_src_for_ssa(offset)); + nir_intrinsic_set_base(instr, idx); + + nir_deref_instr_remove_if_unused(deref); + + return true; +} + +static bool +r600_nir_lower_atomics(nir_shader *shader) +{ + bool progress = false; + + /* First re-do the offsets, in Hardware we start at zero for each new + * binding, and we use an offset of one per counter */ + int current_binding = -1; + int current_offset = 0; + nir_foreach_variable_with_modes(var, shader, nir_var_uniform) { + if (!var->type->contains_atomic()) + continue; + + if (current_binding == (int)var->data.binding) { + var->data.index = current_offset; + current_offset += var->type->atomic_size() / ATOMIC_COUNTER_SIZE; + } else { + current_binding = var->data.binding; + var->data.index = 0; + current_offset = var->type->atomic_size() / ATOMIC_COUNTER_SIZE; + } + } + + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + + bool impl_progress = false; + + nir_builder build; + nir_builder_init(&build, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + impl_progress |= r600_lower_deref_instr(&build, + nir_instr_as_intrinsic(instr), shader); + } + } + + if (impl_progress) { + nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance); + progress = true; + } + } + + return progress; +} +using r600::r600_nir_lower_int_tg4; +using r600::r600_nir_lower_pack_unpack_2x16; +using r600::r600_lower_scratch_addresses; +using r600::r600_lower_fs_out_to_vector; +using r600::r600_lower_ubo_to_align16; + +int +r600_glsl_type_size(const struct glsl_type *type, bool is_bindless) +{ + return glsl_count_vec4_slots(type, false, is_bindless); +} + +void +r600_get_natural_size_align_bytes(const struct glsl_type *type, + unsigned *size, unsigned *align) +{ + if (type->base_type != GLSL_TYPE_ARRAY) { + *align = 1; + *size = 1; + } else { + unsigned elem_size, elem_align; + glsl_get_natural_size_align_bytes(type->fields.array, + &elem_size, &elem_align); + *align = 1; + *size = type->length; + } +} + +static bool +r600_lower_shared_io_impl(nir_function *func) +{ + nir_builder b; + nir_builder_init(&b, func->impl); + + bool progress = false; + nir_foreach_block(block, func->impl) { + nir_foreach_instr_safe(instr, block) { + + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr); + if (op->intrinsic != nir_intrinsic_load_shared && + op->intrinsic != nir_intrinsic_store_shared) + continue; + + b.cursor = nir_before_instr(instr); + + if (op->intrinsic == nir_intrinsic_load_shared) { + nir_ssa_def *addr = op->src[0].ssa; + + switch (nir_dest_num_components(op->dest)) { + case 2: { + auto addr2 = nir_iadd_imm(&b, addr, 4); + addr = nir_vec2(&b, addr, addr2); + break; + } + case 3: { + auto addr2 = nir_iadd(&b, addr, nir_imm_ivec2(&b, 4, 8)); + addr = nir_vec3(&b, addr, + nir_channel(&b, addr2, 0), + nir_channel(&b, addr2, 1)); + break; + } + case 4: { + addr = nir_iadd(&b, addr, nir_imm_ivec4(&b, 0, 4, 8, 12)); + break; + } + } + + auto load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_local_shared_r600); + load->num_components = nir_dest_num_components(op->dest); + load->src[0] = nir_src_for_ssa(addr); + nir_ssa_dest_init(&load->instr, &load->dest, + load->num_components, 32, NULL); + nir_ssa_def_rewrite_uses(&op->dest.ssa, &load->dest.ssa); + nir_builder_instr_insert(&b, &load->instr); + } else { + nir_ssa_def *addr = op->src[1].ssa; + for (int i = 0; i < 2; ++i) { + unsigned test_mask = (0x3 << 2 * i); + if (!(nir_intrinsic_write_mask(op) & test_mask)) + continue; + + auto store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_local_shared_r600); + unsigned writemask = nir_intrinsic_write_mask(op) & test_mask; + nir_intrinsic_set_write_mask(store, writemask); + store->src[0] = nir_src_for_ssa(op->src[0].ssa); + store->num_components = store->src[0].ssa->num_components; + bool start_even = (writemask & (1u << (2 * i))); + + auto addr2 = nir_iadd(&b, addr, nir_imm_int(&b, 8 * i + (start_even ? 0 : 4))); + store->src[1] = nir_src_for_ssa(addr2); + + nir_builder_instr_insert(&b, &store->instr); + } + } + nir_instr_remove(instr); + progress = true; + } + } + return progress; +} + +static bool +r600_lower_shared_io(nir_shader *nir) +{ + bool progress=false; + nir_foreach_function(function, nir) { + if (function->impl && + r600_lower_shared_io_impl(function)) + progress = true; + } + return progress; +} + + +static nir_ssa_def * +r600_lower_fs_pos_input_impl(nir_builder *b, nir_instr *instr, void *_options) +{ + auto old_ir = nir_instr_as_intrinsic(instr); + auto load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input); + nir_ssa_dest_init(&load->instr, &load->dest, + old_ir->dest.ssa.num_components, old_ir->dest.ssa.bit_size, NULL); + nir_intrinsic_set_io_semantics(load, nir_intrinsic_io_semantics(old_ir)); + + nir_intrinsic_set_base(load, nir_intrinsic_base(old_ir)); + nir_intrinsic_set_component(load, nir_intrinsic_component(old_ir)); + nir_intrinsic_set_dest_type(load, nir_type_float32); + load->num_components = old_ir->num_components; + load->src[0] = old_ir->src[1]; + nir_builder_instr_insert(b, &load->instr); + return &load->dest.ssa; +} + +bool r600_lower_fs_pos_input_filter(const nir_instr *instr, const void *_options) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + auto ir = nir_instr_as_intrinsic(instr); + if (ir->intrinsic != nir_intrinsic_load_interpolated_input) + return false; + + return nir_intrinsic_io_semantics(ir).location == VARYING_SLOT_POS; +} + +/* Strip the interpolator specification, it is not needed and irritates */ +bool r600_lower_fs_pos_input(nir_shader *shader) +{ + return nir_shader_lower_instructions(shader, + r600_lower_fs_pos_input_filter, + r600_lower_fs_pos_input_impl, + nullptr); +}; + +static bool +optimize_once(nir_shader *shader, bool vectorize) +{ + bool progress = false; + NIR_PASS(progress, shader, nir_lower_vars_to_ssa); + NIR_PASS(progress, shader, nir_copy_prop); + NIR_PASS(progress, shader, nir_opt_dce); + NIR_PASS(progress, shader, nir_opt_algebraic); + NIR_PASS(progress, shader, nir_opt_constant_folding); + NIR_PASS(progress, shader, nir_opt_copy_prop_vars); + if (vectorize) + NIR_PASS(progress, shader, nir_opt_vectorize, NULL, NULL); + + NIR_PASS(progress, shader, nir_opt_remove_phis); + + if (nir_opt_trivial_continues(shader)) { + progress = true; + NIR_PASS(progress, shader, nir_copy_prop); + NIR_PASS(progress, shader, nir_opt_dce); + } + + NIR_PASS(progress, shader, nir_opt_if, false); + NIR_PASS(progress, shader, nir_opt_dead_cf); + NIR_PASS(progress, shader, nir_opt_cse); + NIR_PASS(progress, shader, nir_opt_peephole_select, 200, true, true); + + NIR_PASS(progress, shader, nir_opt_conditional_discard); + NIR_PASS(progress, shader, nir_opt_dce); + NIR_PASS(progress, shader, nir_opt_undef); + return progress; +} + +bool has_saturate(const nir_function *func) +{ + nir_foreach_block(block, func->impl) { + nir_foreach_instr(instr, block) { + if (instr->type == nir_instr_type_alu) { + auto alu = nir_instr_as_alu(instr); + if (alu->dest.saturate) + return true; + } + } + } + return false; +} + +bool r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *) +{ + if (instr->type != nir_instr_type_alu) + return true; + + auto alu = nir_instr_as_alu(instr); + switch (alu->op) { + case nir_op_bany_fnequal3: + case nir_op_bany_fnequal4: + case nir_op_ball_fequal3: + case nir_op_ball_fequal4: + case nir_op_bany_inequal3: + case nir_op_bany_inequal4: + case nir_op_ball_iequal3: + case nir_op_ball_iequal4: + case nir_op_fdot2: + case nir_op_fdot3: + case nir_op_fdot4: + case nir_op_cube_r600: + return false; + case nir_op_bany_fnequal2: + case nir_op_ball_fequal2: + case nir_op_bany_inequal2: + case nir_op_ball_iequal2: + return nir_src_bit_size(alu->src[0].src) != 64; + default: + return true; + } +} + +int r600_shader_from_nir(struct r600_context *rctx, + struct r600_pipe_shader *pipeshader, + r600_shader_key *key) +{ + char filename[4000]; + struct r600_pipe_shader_selector *sel = pipeshader->selector; + + bool lower_64bit = ((sel->nir->options->lower_int64_options || + sel->nir->options->lower_doubles_options) && + (sel->nir->info.bit_sizes_float | sel->nir->info.bit_sizes_int) & 64); + + r600::ShaderFromNir convert; + + if (rctx->screen->b.debug_flags & DBG_PREOPT_IR) { + fprintf(stderr, "PRE-OPT-NIR-----------.------------------------------\n"); + nir_print_shader(sel->nir, stderr); + fprintf(stderr, "END PRE-OPT-NIR--------------------------------------\n\n"); + } + + r600::sort_uniforms(sel->nir); + + NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa); + NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa); + nir_lower_idiv_options idiv_options = { + .imprecise_32bit_lowering = sel->nir->info.stage != MESA_SHADER_COMPUTE, + .allow_fp16 = true, + }; + NIR_PASS_V(sel->nir, nir_lower_idiv, &idiv_options); + NIR_PASS_V(sel->nir, r600_lower_alu); + NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar); + + if (lower_64bit) + NIR_PASS_V(sel->nir, nir_lower_int64); + while(optimize_once(sel->nir, false)); + + NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL); + + NIR_PASS_V(sel->nir, r600_lower_shared_io); + NIR_PASS_V(sel->nir, r600_nir_lower_atomics); + + static const struct nir_lower_tex_options lower_tex_options = { + .lower_txp = ~0u, + }; + NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options); + NIR_PASS_V(sel->nir, r600::r600_nir_lower_txl_txf_array_or_cube); + NIR_PASS_V(sel->nir, r600::r600_nir_lower_cube_to_2darray); + + NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16); + + if (sel->nir->info.stage == MESA_SHADER_VERTEX) + NIR_PASS_V(sel->nir, r600_vectorize_vs_inputs); + + if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) { + NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector); + } + + nir_variable_mode io_modes = nir_var_uniform | nir_var_shader_in; + + //if (sel->nir->info.stage != MESA_SHADER_FRAGMENT) + io_modes |= nir_var_shader_out; + + if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) { + + /* Lower IO to temporaries late, because otherwise we get into trouble + * with the glsl 4.40 interpolateAt swizzle tests. There seems to be a bug + * somewhere that results in the input alweas reading from the same temp + * regardless of interpolation when the lowering is done early */ + NIR_PASS_V(sel->nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(sel->nir), + true, true); + + /* Since we're doing nir_lower_io_to_temporaries late, we need + * to lower all the copy_deref's introduced by + * lower_io_to_temporaries before calling nir_lower_io. + */ + NIR_PASS_V(sel->nir, nir_split_var_copies); + NIR_PASS_V(sel->nir, nir_lower_var_copies); + NIR_PASS_V(sel->nir, nir_lower_global_vars_to_local); + } + + NIR_PASS_V(sel->nir, nir_lower_io, io_modes, r600_glsl_type_size, + nir_lower_io_lower_64bit_to_32); + + if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) + NIR_PASS_V(sel->nir, r600_lower_fs_pos_input); + + /**/ + if (lower_64bit) + NIR_PASS_V(sel->nir, nir_lower_indirect_derefs, nir_var_function_temp, 10); + + NIR_PASS_V(sel->nir, nir_opt_constant_folding); + NIR_PASS_V(sel->nir, nir_io_add_const_offset_to_base, io_modes); + + NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL); + NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar); + if (lower_64bit) + NIR_PASS_V(sel->nir, r600::r600_nir_split_64bit_io); + NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL); + NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar); + NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL); + NIR_PASS_V(sel->nir, nir_copy_prop); + NIR_PASS_V(sel->nir, nir_opt_dce); + + auto sh = nir_shader_clone(sel->nir, sel->nir); + + if (sh->info.stage == MESA_SHADER_TESS_CTRL || + sh->info.stage == MESA_SHADER_TESS_EVAL || + (sh->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) { + auto prim_type = sh->info.stage == MESA_SHADER_TESS_EVAL ? + sh->info.tess.primitive_mode: key->tcs.prim_mode; + NIR_PASS_V(sh, r600_lower_tess_io, static_cast<pipe_prim_type>(prim_type)); + } + + if (sh->info.stage == MESA_SHADER_TESS_CTRL) + NIR_PASS_V(sh, r600_append_tcs_TF_emission, + (pipe_prim_type)key->tcs.prim_mode); + + if (sh->info.stage == MESA_SHADER_TESS_EVAL) + NIR_PASS_V(sh, r600_lower_tess_coord, + static_cast<pipe_prim_type>(sh->info.tess.primitive_mode)); + + NIR_PASS_V(sh, nir_lower_ubo_vec4); + if (lower_64bit) + NIR_PASS_V(sh, r600::r600_nir_64_to_vec2); + + /* Lower to scalar to let some optimization work out better */ + while(optimize_once(sh, false)); + + NIR_PASS_V(sh, r600::r600_merge_vec2_stores); + + NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_in, NULL); + NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_out, NULL); + + + NIR_PASS_V(sh, nir_lower_vars_to_scratch, + nir_var_function_temp, + 40, + r600_get_natural_size_align_bytes); + + while (optimize_once(sh, true)); + + NIR_PASS_V(sh, nir_lower_bool_to_int32); + NIR_PASS_V(sh, r600_nir_lower_int_tg4); + NIR_PASS_V(sh, nir_opt_algebraic_late); + + if (sh->info.stage == MESA_SHADER_FRAGMENT) + r600::sort_fsoutput(sh); + + NIR_PASS_V(sh, nir_lower_locals_to_regs); + + //NIR_PASS_V(sh, nir_opt_algebraic); + //NIR_PASS_V(sh, nir_copy_prop); + NIR_PASS_V(sh, nir_lower_to_source_mods, + (nir_lower_to_source_mods_flags)(nir_lower_float_source_mods | + nir_lower_64bit_source_mods)); + NIR_PASS_V(sh, nir_convert_from_ssa, true); + NIR_PASS_V(sh, nir_opt_dce); + + if ((rctx->screen->b.debug_flags & DBG_NIR_PREFERRED) && + (rctx->screen->b.debug_flags & DBG_ALL_SHADERS)) { + fprintf(stderr, "-- NIR --------------------------------------------------------\n"); + struct nir_function *func = (struct nir_function *)exec_list_get_head(&sh->functions); + nir_index_ssa_defs(func->impl); + nir_print_shader(sh, stderr); + fprintf(stderr, "-- END --------------------------------------------------------\n"); + } + + memset(&pipeshader->shader, 0, sizeof(r600_shader)); + pipeshader->scratch_space_needed = sh->scratch_size; + + if (sh->info.stage == MESA_SHADER_TESS_EVAL || + sh->info.stage == MESA_SHADER_VERTEX || + sh->info.stage == MESA_SHADER_GEOMETRY) { + pipeshader->shader.clip_dist_write |= ((1 << sh->info.clip_distance_array_size) - 1); + pipeshader->shader.cull_dist_write = ((1 << sh->info.cull_distance_array_size) - 1) + << sh->info.clip_distance_array_size; + pipeshader->shader.cc_dist_mask = (1 << (sh->info.cull_distance_array_size + + sh->info.clip_distance_array_size)) - 1; + } + + struct r600_shader* gs_shader = nullptr; + if (rctx->gs_shader) + gs_shader = &rctx->gs_shader->current->shader; + r600_screen *rscreen = rctx->screen; + + bool r = convert.lower(sh, pipeshader, sel, *key, gs_shader, rscreen->b.chip_class); + if (!r || rctx->screen->b.debug_flags & DBG_ALL_SHADERS) { + static int shnr = 0; + + snprintf(filename, 4000, "nir-%s_%d.inc", sh->info.name, shnr++); + + if (access(filename, F_OK) == -1) { + FILE *f = fopen(filename, "w"); + + if (f) { + fprintf(f, "const char *shader_blob_%s = {\nR\"(", sh->info.name); + nir_print_shader(sh, f); + fprintf(f, ")\";\n"); + fclose(f); + } + } + if (!r) + return -2; + } + + auto shader = convert.shader(); + + r600_bytecode_init(&pipeshader->shader.bc, rscreen->b.chip_class, rscreen->b.family, + rscreen->has_compressed_msaa_texturing); + + r600::sfn_log << r600::SfnLog::shader_info + << "pipeshader->shader.processor_type = " + << pipeshader->shader.processor_type << "\n"; + + pipeshader->shader.bc.type = pipeshader->shader.processor_type; + pipeshader->shader.bc.isa = rctx->isa; + + r600::AssemblyFromShaderLegacy afs(&pipeshader->shader, key); + if (!afs.lower(shader.m_ir)) { + R600_ERR("%s: Lowering to assembly failed\n", __func__); + return -1; + } + + if (sh->info.stage == MESA_SHADER_GEOMETRY) { + r600::sfn_log << r600::SfnLog::shader_info << "Geometry shader, create copy shader\n"; + generate_gs_copy_shader(rctx, pipeshader, &sel->so); + assert(pipeshader->gs_copy_shader); + } else { + r600::sfn_log << r600::SfnLog::shader_info << "This is not a Geometry shader\n"; + } + if (pipeshader->shader.bc.ngpr < 6) + pipeshader->shader.bc.ngpr = 6; + + return 0; +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.h new file mode 100644 index 000000000..d13accb3b --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.h @@ -0,0 +1,161 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_NIR_H +#define SFN_NIR_H + +#include "nir.h" +#include "nir_builder.h" + +#ifdef __cplusplus +#include "sfn_shader_base.h" +#include <vector> + +namespace r600 { + +class NirLowerInstruction { +public: + NirLowerInstruction(); + + bool run(nir_shader *shader); + +private: + static bool filter_instr(const nir_instr *instr, const void *data); + static nir_ssa_def *lower_instr(nir_builder *b, nir_instr *instr, void *data); + + void set_builder(nir_builder *_b) { b = _b;} + + virtual bool filter(const nir_instr *instr) const = 0; + virtual nir_ssa_def *lower(nir_instr *instr) = 0; +protected: + nir_builder *b; +}; + +bool r600_nir_lower_pack_unpack_2x16(nir_shader *shader); + +bool r600_lower_scratch_addresses(nir_shader *shader); + +bool r600_lower_ubo_to_align16(nir_shader *shader); + +bool r600_nir_split_64bit_io(nir_shader *sh); + +bool r600_nir_64_to_vec2(nir_shader *sh); + +bool r600_merge_vec2_stores(nir_shader *shader); + +class Shader { +public: + std::vector<InstructionBlock>& m_ir; + ValueMap m_temp; +}; + +class ShaderFromNir { +public: + ShaderFromNir(); + ~ShaderFromNir(); + + unsigned ninputs() const; + + bool lower(const nir_shader *shader, r600_pipe_shader *sh, + r600_pipe_shader_selector *sel, r600_shader_key &key, + r600_shader *gs_shader, enum chip_class chip_class); + + bool process_declaration(); + + pipe_shader_type processor_type() const; + + bool emit_instruction(nir_instr *instr); + + const std::vector<InstructionBlock> &shader_ir() const; + + Shader shader() const; +private: + + bool process_block(); + bool process_cf_node(nir_cf_node *node); + bool process_if(nir_if *node); + bool process_loop(nir_loop *node); + bool process_block(nir_block *node); + + std::unique_ptr<ShaderFromNirProcessor> impl; + const nir_shader *sh; + + enum chip_class chip_class; + int m_current_if_id; + int m_current_loop_id; + std::stack<int> m_if_stack; + int scratch_size; +}; + +class AssemblyFromShader { +public: + virtual ~AssemblyFromShader(); + bool lower(const std::vector<InstructionBlock> &ir); +private: + virtual bool do_lower(const std::vector<InstructionBlock>& ir) = 0 ; +}; + +} + +static inline nir_ssa_def * +r600_imm_ivec3(nir_builder *build, int x, int y, int z) +{ + nir_const_value v[3] = { + nir_const_value_for_int(x, 32), + nir_const_value_for_int(y, 32), + nir_const_value_for_int(z, 32), + }; + + return nir_build_imm(build, 3, 32, v); +} + +bool r600_lower_tess_io(nir_shader *shader, enum pipe_prim_type prim_type); +bool r600_append_tcs_TF_emission(nir_shader *shader, enum pipe_prim_type prim_type); +bool r600_lower_tess_coord(nir_shader *sh, enum pipe_prim_type prim_type); + +#else +#include "gallium/drivers/r600/r600_shader.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +bool r600_vectorize_vs_inputs(nir_shader *shader); + + +int r600_shader_from_nir(struct r600_context *rctx, + struct r600_pipe_shader *pipeshader, + union r600_shader_key *key); + +bool r600_lower_alu(nir_shader *sh); + +#ifdef __cplusplus +} +#endif + + +#endif // SFN_NIR_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.cpp new file mode 100644 index 000000000..4a177d15d --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.cpp @@ -0,0 +1,462 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_nir_lower_fs_out_to_vector.h" + +#include "nir_builder.h" +#include "nir_deref.h" +#include "util/u_math.h" + +#include <set> +#include <vector> +#include <array> +#include <algorithm> + +namespace r600 { + +using std::multiset; +using std::vector; +using std::array; + +struct nir_intrinsic_instr_less { + bool operator () (const nir_intrinsic_instr *lhs, const nir_intrinsic_instr *rhs) const + { + nir_variable *vlhs = nir_deref_instr_get_variable(nir_src_as_deref(lhs->src[0])); + nir_variable *vrhs = nir_deref_instr_get_variable(nir_src_as_deref(rhs->src[0])); + + auto ltype = glsl_get_base_type(vlhs->type); + auto rtype = glsl_get_base_type(vrhs->type); + + if (ltype != rtype) + return ltype < rtype; + return vlhs->data.location < vrhs->data.location; + } +}; + +class NirLowerIOToVector { +public: + NirLowerIOToVector(int base_slot); + bool run(nir_function_impl *shader); + +protected: + bool var_can_merge(const nir_variable *lhs, const nir_variable *rhs); + bool var_can_rewrite(nir_variable *var) const; + void create_new_io_vars(nir_shader *shader); + void create_new_io_var(nir_shader *shader, unsigned location, unsigned comps); + + nir_deref_instr *clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail, + const nir_deref_instr *src_head); + + bool vectorize_block(nir_builder *b, nir_block *block); + bool instr_can_rewrite(nir_instr *instr); + bool vec_instr_set_remove(nir_builder *b,nir_instr *instr); + + using InstrSet = multiset<nir_intrinsic_instr *, nir_intrinsic_instr_less>; + using InstrSubSet = std::pair<InstrSet::iterator, InstrSet::iterator>; + + bool vec_instr_stack_pop(nir_builder *b, InstrSubSet& ir_set, + nir_intrinsic_instr *instr); + + array<array<nir_variable *, 4>, 16> m_vars; + InstrSet m_block_io; + int m_next_index; +private: + virtual nir_variable_mode get_io_mode(nir_shader *shader) const = 0; + virtual bool instr_can_rewrite_type(nir_intrinsic_instr *intr) const = 0; + virtual bool var_can_rewrite_slot(nir_variable *var) const = 0; + virtual void create_new_io(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var, + nir_ssa_def **srcs, unsigned first_comp, unsigned num_comps) = 0; + + int m_base_slot; +}; + +class NirLowerFSOutToVector : public NirLowerIOToVector { +public: + NirLowerFSOutToVector(); + +private: + nir_variable_mode get_io_mode(nir_shader *shader) const override; + bool var_can_rewrite_slot(nir_variable *var) const override; + void create_new_io(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var, + nir_ssa_def **srcs, unsigned first_comp, unsigned num_comps) override; + bool instr_can_rewrite_type(nir_intrinsic_instr *intr) const override; + + nir_ssa_def *create_combined_vector(nir_builder *b, nir_ssa_def **srcs, + int first_comp, int num_comp); +}; + +bool r600_lower_fs_out_to_vector(nir_shader *shader) +{ + NirLowerFSOutToVector processor; + + assert(shader->info.stage == MESA_SHADER_FRAGMENT); + bool progress = false; + + nir_foreach_function(function, shader) { + if (function->impl) + progress |= processor.run(function->impl); + } + return progress; +} + +NirLowerIOToVector::NirLowerIOToVector(int base_slot): + m_next_index(0), + m_base_slot(base_slot) +{ + for(auto& a : m_vars) + for(auto& aa : a) + aa = nullptr; +} + +bool NirLowerIOToVector::run(nir_function_impl *impl) +{ + nir_builder b; + nir_builder_init(&b, impl); + + nir_metadata_require(impl, nir_metadata_dominance); + create_new_io_vars(impl->function->shader); + + bool progress = vectorize_block(&b, nir_start_block(impl)); + if (progress) { + nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); + } + return progress; +} + +void NirLowerIOToVector::create_new_io_vars(nir_shader *shader) +{ + nir_variable_mode mode = get_io_mode(shader); + + bool can_rewrite_vars = false; + nir_foreach_variable_with_modes(var, shader, mode) { + if (var_can_rewrite(var)) { + can_rewrite_vars = true; + unsigned loc = var->data.location - m_base_slot; + m_vars[loc][var->data.location_frac] = var; + } + } + + if (!can_rewrite_vars) + return; + + /* We don't handle combining vars of different type e.g. different array + * lengths. + */ + for (unsigned i = 0; i < 16; i++) { + unsigned comps = 0; + + for (unsigned j = 0; j < 3; j++) { + if (!m_vars[i][j]) + continue; + + for (unsigned k = j + 1; k < 4; k++) { + if (!m_vars[i][k]) + continue; + + if (!var_can_merge(m_vars[i][j], m_vars[i][k])) + continue; + + /* Set comps */ + for (unsigned n = 0; n < glsl_get_components(m_vars[i][j]->type); ++n) + comps |= 1 << (m_vars[i][j]->data.location_frac + n); + + for (unsigned n = 0; n < glsl_get_components(m_vars[i][k]->type); ++n) + comps |= 1 << (m_vars[i][k]->data.location_frac + n); + + } + } + if (comps) + create_new_io_var(shader, i, comps); + } +} + +bool +NirLowerIOToVector::var_can_merge(const nir_variable *lhs, + const nir_variable *rhs) +{ + return (glsl_get_base_type(lhs->type) == glsl_get_base_type(rhs->type)); +} + +void +NirLowerIOToVector::create_new_io_var(nir_shader *shader, + unsigned location, unsigned comps) +{ + unsigned num_comps = util_bitcount(comps); + assert(num_comps > 1); + + /* Note: u_bit_scan() strips a component of the comps bitfield here */ + unsigned first_comp = u_bit_scan(&comps); + + nir_variable *var = nir_variable_clone(m_vars[location][first_comp], shader); + var->data.location_frac = first_comp; + var->type = glsl_replace_vector_type(var->type, num_comps); + + nir_shader_add_variable(shader, var); + + m_vars[location][first_comp] = var; + + while (comps) { + const int comp = u_bit_scan(&comps); + if (m_vars[location][comp]) { + m_vars[location][comp] = var; + } + } +} + +bool NirLowerIOToVector::var_can_rewrite(nir_variable *var) const +{ + /* Skip complex types we don't split in the first place */ + if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type))) + return false; + + if (glsl_get_bit_size(glsl_without_array(var->type)) != 32) + return false; + + return var_can_rewrite_slot(var); +} + +bool +NirLowerIOToVector::vectorize_block(nir_builder *b, nir_block *block) +{ + bool progress = false; + + nir_foreach_instr_safe(instr, block) { + if (instr_can_rewrite(instr)) { + instr->index = m_next_index++; + nir_intrinsic_instr *ir = nir_instr_as_intrinsic(instr); + m_block_io.insert(ir); + } + } + + for (unsigned i = 0; i < block->num_dom_children; i++) { + nir_block *child = block->dom_children[i]; + progress |= vectorize_block(b, child); + } + + nir_foreach_instr_reverse_safe(instr, block) { + progress |= vec_instr_set_remove(b, instr); + } + m_block_io.clear(); + + return progress; +} + +bool NirLowerIOToVector::instr_can_rewrite(nir_instr *instr) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + if (intr->num_components > 3) + return false; + + return instr_can_rewrite_type(intr); +} + +bool NirLowerIOToVector::vec_instr_set_remove(nir_builder *b,nir_instr *instr) +{ + if (!instr_can_rewrite(instr)) + return false; + + nir_intrinsic_instr *ir = nir_instr_as_intrinsic(instr); + auto entry = m_block_io.equal_range(ir); + if (entry.first != m_block_io.end()) { + vec_instr_stack_pop(b, entry, ir); + } + return true; +} + +nir_deref_instr * +NirLowerIOToVector::clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail, + const nir_deref_instr *src_head) +{ + const nir_deref_instr *parent = nir_deref_instr_parent(src_head); + + if (!parent) + return dst_tail; + + assert(src_head->deref_type == nir_deref_type_array); + + dst_tail = clone_deref_array(b, dst_tail, parent); + + return nir_build_deref_array(b, dst_tail, + nir_ssa_for_src(b, src_head->arr.index, 1)); +} + +NirLowerFSOutToVector::NirLowerFSOutToVector(): + NirLowerIOToVector(FRAG_RESULT_COLOR) +{ + +} + +bool NirLowerFSOutToVector::var_can_rewrite_slot(nir_variable *var) const +{ + return ((var->data.mode == nir_var_shader_out) && + ((var->data.location == FRAG_RESULT_COLOR) || + ((var->data.location >= FRAG_RESULT_DATA0) && + (var->data.location <= FRAG_RESULT_DATA7)))); +} + +bool NirLowerIOToVector::vec_instr_stack_pop(nir_builder *b, InstrSubSet &ir_set, + nir_intrinsic_instr *instr) +{ + vector< nir_intrinsic_instr *> ir_sorted_set(ir_set.first, ir_set.second); + std::sort(ir_sorted_set.begin(), ir_sorted_set.end(), + [](const nir_intrinsic_instr *lhs, const nir_intrinsic_instr *rhs) { + return lhs->instr.index > rhs->instr.index; + } + ); + + nir_intrinsic_instr *intr = *ir_sorted_set.begin(); + nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0])); + + unsigned loc = var->data.location - m_base_slot; + + nir_variable *new_var = m_vars[loc][var->data.location_frac]; + unsigned num_comps = glsl_get_vector_elements(glsl_without_array(new_var->type)); + unsigned old_num_comps = glsl_get_vector_elements(glsl_without_array(var->type)); + + /* Don't bother walking the stack if this component can't be vectorised. */ + if (old_num_comps > 3) { + return false; + } + + if (new_var == var) { + return false; + } + + b->cursor = nir_after_instr(&intr->instr); + nir_ssa_undef_instr *instr_undef = + nir_ssa_undef_instr_create(b->shader, 1, 32); + nir_builder_instr_insert(b, &instr_undef->instr); + + nir_ssa_def *srcs[4]; + for (int i = 0; i < 4; i++) { + srcs[i] = &instr_undef->def; + } + srcs[var->data.location_frac] = intr->src[1].ssa; + + for (auto k = ir_sorted_set.begin() + 1; k != ir_sorted_set.end(); ++k) { + nir_intrinsic_instr *intr2 = *k; + nir_variable *var2 = + nir_deref_instr_get_variable(nir_src_as_deref(intr2->src[0])); + unsigned loc2 = var->data.location - m_base_slot; + + if (m_vars[loc][var->data.location_frac] != + m_vars[loc2][var2->data.location_frac]) { + continue; + } + + assert(glsl_get_vector_elements(glsl_without_array(var2->type)) < 4); + + if (srcs[var2->data.location_frac] == &instr_undef->def) { + assert(intr2->src[1].is_ssa); + assert(intr2->src[1].ssa); + srcs[var2->data.location_frac] = intr2->src[1].ssa; + } + nir_instr_remove(&intr2->instr); + } + + create_new_io(b, intr, new_var, srcs, new_var->data.location_frac, + num_comps); + return true; +} + +nir_variable_mode NirLowerFSOutToVector::get_io_mode(nir_shader *shader) const +{ + return nir_var_shader_out; +} + +void +NirLowerFSOutToVector::create_new_io(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var, + nir_ssa_def **srcs, unsigned first_comp, unsigned num_comps) +{ + b->cursor = nir_before_instr(&intr->instr); + + nir_intrinsic_instr *new_intr = + nir_intrinsic_instr_create(b->shader, intr->intrinsic); + new_intr->num_components = num_comps; + + nir_intrinsic_set_write_mask(new_intr, (1 << num_comps) - 1); + + nir_deref_instr *deref = nir_build_deref_var(b, var); + deref = clone_deref_array(b, deref, nir_src_as_deref(intr->src[0])); + + new_intr->src[0] = nir_src_for_ssa(&deref->dest.ssa); + new_intr->src[1] = nir_src_for_ssa(create_combined_vector(b, srcs, first_comp, num_comps)); + + nir_builder_instr_insert(b, &new_intr->instr); + + /* Remove the old store intrinsic */ + nir_instr_remove(&intr->instr); +} + +bool NirLowerFSOutToVector::instr_can_rewrite_type(nir_intrinsic_instr *intr) const +{ + if (intr->intrinsic != nir_intrinsic_store_deref) + return false; + + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + if (!nir_deref_mode_is(deref, nir_var_shader_out)) + return false; + + return var_can_rewrite(nir_deref_instr_get_variable(deref)); +} + +nir_ssa_def *NirLowerFSOutToVector::create_combined_vector(nir_builder *b, nir_ssa_def **srcs, + int first_comp, int num_comp) +{ + nir_op op; + switch (num_comp) { + case 2: op = nir_op_vec2; break; + case 3: op = nir_op_vec3; break; + case 4: op = nir_op_vec4; break; + default: + unreachable("combined vector must have 2 to 4 components"); + } + nir_alu_instr * instr = nir_alu_instr_create(b->shader, op); + instr->exact = b->exact; + + int i = 0; + unsigned k = 0; + while (i < num_comp) { + nir_ssa_def *s = srcs[first_comp + k]; + for(uint8_t kk = 0; kk < s->num_components && i < num_comp; ++kk) { + instr->src[i].src = nir_src_for_ssa(s); + instr->src[i].swizzle[0] = kk; + ++i; + } + k += s->num_components; + } + + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_comp, 32, NULL); + instr->dest.write_mask = (1 << num_comp) - 1; + nir_builder_instr_insert(b, &instr->instr); + return &instr->dest.dest.ssa; +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.h new file mode 100644 index 000000000..016b7a222 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.h @@ -0,0 +1,38 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_NIR_LOWER_FS_OUT_TO_VECTOR_H +#define SFN_NIR_LOWER_FS_OUT_TO_VECTOR_H + +#include "nir.h" + +namespace r600 { + +bool r600_lower_fs_out_to_vector(nir_shader *sh); + +} + +#endif // SFN_NIR_LOWER_FS_OUT_TO_VECTOR_H
\ No newline at end of file diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp new file mode 100644 index 000000000..a830d0753 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp @@ -0,0 +1,575 @@ +#include "sfn_nir.h" + +bool r600_lower_tess_io_filter(const nir_instr *instr, gl_shader_stage stage) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr); + switch (op->intrinsic) { + case nir_intrinsic_load_input: + return stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL; + case nir_intrinsic_load_output: + case nir_intrinsic_load_per_vertex_input: + case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_store_per_vertex_output: + case nir_intrinsic_load_patch_vertices_in: + case nir_intrinsic_load_tess_level_outer: + case nir_intrinsic_load_tess_level_inner: + return true; + case nir_intrinsic_store_output: + return stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_VERTEX; + default: + ; + } + return false; +} + +static nir_ssa_def * +emit_load_param_base(nir_builder *b, nir_intrinsic_op op) +{ + nir_intrinsic_instr *result = nir_intrinsic_instr_create(b->shader, op); + nir_ssa_dest_init(&result->instr, &result->dest, + 4, 32, NULL); + nir_builder_instr_insert(b, &result->instr); + return &result->dest.ssa; +} + +static int get_tcs_varying_offset(nir_intrinsic_instr *op) +{ + unsigned location = nir_intrinsic_io_semantics(op).location; + + switch (location) { + case VARYING_SLOT_POS: + return 0; + case VARYING_SLOT_PSIZ: + return 0x10; + case VARYING_SLOT_CLIP_DIST0: + return 0x20; + case VARYING_SLOT_CLIP_DIST1: + return 0x30; + case VARYING_SLOT_TESS_LEVEL_OUTER: + return 0; + case VARYING_SLOT_TESS_LEVEL_INNER: + return 0x10; + default: + if (location >= VARYING_SLOT_VAR0 && + location <= VARYING_SLOT_VAR31) + return 0x10 * (location - VARYING_SLOT_VAR0) + 0x40; + + if (location >= VARYING_SLOT_PATCH0) { + return 0x10 * (location - VARYING_SLOT_PATCH0) + 0x20; + } + } + return 0; +} + +static inline nir_ssa_def * +r600_umad_24(nir_builder *b, nir_ssa_def *op1, nir_ssa_def *op2, nir_ssa_def *op3) +{ + return nir_build_alu(b, nir_op_umad24, op1, op2, op3, NULL); +} + +static inline nir_ssa_def * +r600_tcs_base_address(nir_builder *b, nir_ssa_def *param_base, nir_ssa_def *rel_patch_id) +{ + return r600_umad_24(b, nir_channel(b, param_base, 0), + rel_patch_id, + nir_channel(b, param_base, 3)); +} + + +static nir_ssa_def * +emil_lsd_in_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op) +{ + nir_ssa_def *addr = nir_build_alu(b, nir_op_umul24, + nir_channel(b, base, 0), + patch_id, NULL, NULL); + + auto idx1 = nir_src_as_const_value(op->src[0]); + if (!idx1 || idx1->u32 != 0) + addr = r600_umad_24(b, nir_channel(b, base, 1), + op->src[0].ssa, addr); + + auto offset = nir_imm_int(b, get_tcs_varying_offset(op)); + + auto idx2 = nir_src_as_const_value(op->src[1]); + if (!idx2 || idx2->u32 != 0) + offset = nir_iadd(b, offset, nir_ishl(b, op->src[1].ssa, nir_imm_int(b, 4))); + + return nir_iadd(b, addr, offset); +} + +static nir_ssa_def * +emil_lsd_out_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op, nir_variable_mode mode, int src_offset) +{ + + nir_ssa_def *addr1 = r600_umad_24(b, nir_channel(b, base, 0), + patch_id, + nir_channel(b, base, 2)); + nir_ssa_def *addr2 = r600_umad_24(b, nir_channel(b, base, 1), + op->src[src_offset].ssa, addr1); + int offset = get_tcs_varying_offset(op); + return nir_iadd(b, nir_iadd(b, addr2, + nir_ishl(b, op->src[src_offset + 1].ssa, nir_imm_int(b,4))), + nir_imm_int(b, offset)); +} + +static nir_ssa_def *load_offset_group(nir_builder *b, int ncomponents) +{ + switch (ncomponents) { + /* tess outer offsets */ + case 1: return nir_imm_int(b, 0); + case 2: return nir_imm_ivec2(b, 0, 4); + case 3: return r600_imm_ivec3(b, 0, 4, 8); + case 4: return nir_imm_ivec4(b, 0, 4, 8, 12); + /* tess inner offsets */ + case 5: return nir_imm_int(b, 16); + case 6: return nir_imm_ivec2(b, 16, 20); + default: + debug_printf("Got %d components\n", ncomponents); + unreachable("Unsupported component count"); + } +} + +static nir_ssa_def *load_offset_group_from_mask(nir_builder *b, uint32_t mask) +{ + auto full_mask = nir_imm_ivec4(b, 0, 4, 8, 12); + return nir_channels(b, full_mask, mask); +} + +struct MaskQuery { + uint32_t mask; + uint32_t ssa_index; + nir_alu_instr *alu; + int index; + uint32_t full_mask; +}; + +static bool update_alu_mask(nir_src *src, void *data) +{ + auto mq = reinterpret_cast<MaskQuery *>(data); + + if (mq->ssa_index == src->ssa->index) { + mq->mask |= nir_alu_instr_src_read_mask(mq->alu, mq->index); + } + ++mq->index; + + return mq->mask != mq->full_mask; +} + +static uint32_t get_dest_usee_mask(nir_intrinsic_instr *op) +{ + assert(op->dest.is_ssa); + + MaskQuery mq = {0}; + mq.full_mask = (1 << nir_dest_num_components(op->dest)) - 1; + + nir_foreach_use(use_src, &op->dest.ssa) { + auto use_instr = use_src->parent_instr; + mq.ssa_index = use_src->ssa->index; + + switch (use_instr->type) { + case nir_instr_type_alu: { + mq.alu = nir_instr_as_alu(use_instr); + mq.index = 0; + if (!nir_foreach_src(use_instr, update_alu_mask, &mq)) + return 0xf; + break; + } + case nir_instr_type_intrinsic: { + auto intr = nir_instr_as_intrinsic(use_instr); + switch (intr->intrinsic) { + case nir_intrinsic_store_output: + case nir_intrinsic_store_per_vertex_output: + mq.mask |= nir_intrinsic_write_mask(intr) << nir_intrinsic_component(intr); + break; + case nir_intrinsic_store_scratch: + case nir_intrinsic_store_local_shared_r600: + mq.mask |= nir_intrinsic_write_mask(intr); + break; + default: + return 0xf; + } + break; + } + default: + return 0xf; + } + + } + return mq.mask; +} + +static void replace_load_instr(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr) +{ + uint32_t mask = get_dest_usee_mask(op); + if (mask) { + nir_ssa_def *addr_outer = nir_iadd(b, addr, load_offset_group_from_mask(b, mask)); + if (nir_intrinsic_component(op)) + addr_outer = nir_iadd(b, addr_outer, nir_imm_int(b, 4 * nir_intrinsic_component(op))); + + auto new_load = nir_load_local_shared_r600(b, 32, addr_outer); + + auto undef = nir_ssa_undef(b, 1, 32); + int comps = nir_dest_num_components(op->dest); + nir_ssa_def *remix[4] = {undef, undef, undef, undef}; + + int chan = 0; + for (int i = 0; i < comps; ++i) { + if (mask & (1 << i)) { + remix[i] = nir_channel(b, new_load, chan++); + } + } + auto new_load_remixed = nir_vec(b, remix, comps); + nir_ssa_def_rewrite_uses(&op->dest.ssa, new_load_remixed); + } + nir_instr_remove(&op->instr); +} + +static nir_ssa_def * +r600_load_rel_patch_id(nir_builder *b) +{ + auto patch_id = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_tcs_rel_patch_id_r600); + nir_ssa_dest_init(&patch_id->instr, &patch_id->dest, + 1, 32, NULL); + nir_builder_instr_insert(b, &patch_id->instr); + return &patch_id->dest.ssa; +} + +static void +emit_store_lds(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr) +{ + uint32_t orig_writemask = nir_intrinsic_write_mask(op) << nir_intrinsic_component(op); + + for (int i = 0; i < 2; ++i) { + unsigned test_mask = (0x3 << 2 * i); + if (!(orig_writemask & test_mask)) + continue; + + uint32_t writemask = test_mask >> nir_intrinsic_component(op); + + auto store_tcs_out = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_local_shared_r600); + nir_intrinsic_set_write_mask(store_tcs_out, writemask); + store_tcs_out->src[0] = nir_src_for_ssa(op->src[0].ssa); + store_tcs_out->num_components = store_tcs_out->src[0].ssa->num_components; + bool start_even = (orig_writemask & (1u << (2 * i))); + + auto addr2 = nir_iadd(b, addr, nir_imm_int(b, 8 * i + (start_even ? 0 : 4))); + store_tcs_out->src[1] = nir_src_for_ssa(addr2); + + nir_builder_instr_insert(b, &store_tcs_out->instr); + } +} + +static nir_ssa_def * +emil_tcs_io_offset(nir_builder *b, nir_ssa_def *addr, nir_intrinsic_instr *op, int src_offset) +{ + int offset = get_tcs_varying_offset(op); + return nir_iadd(b, nir_iadd(b, addr, + nir_ishl(b, op->src[src_offset].ssa, nir_imm_int(b,4))), + nir_imm_int(b, offset)); +} + + +inline unsigned +outer_tf_components(pipe_prim_type prim_type) +{ + switch (prim_type) { + case PIPE_PRIM_LINES: return 2; + case PIPE_PRIM_TRIANGLES: return 3; + case PIPE_PRIM_QUADS: return 4; + default: + return 0; + } +} + + + +static bool +r600_lower_tess_io_impl(nir_builder *b, nir_instr *instr, enum pipe_prim_type prim_type) +{ + static nir_ssa_def *load_in_param_base = nullptr; + static nir_ssa_def *load_out_param_base = nullptr; + + b->cursor = nir_before_instr(instr); + nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr); + + if (b->shader->info.stage == MESA_SHADER_TESS_CTRL) { + load_in_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600); + load_out_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600); + } else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) { + load_in_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600); + } else if (b->shader->info.stage == MESA_SHADER_VERTEX) { + load_out_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600); + } + + auto rel_patch_id = r600_load_rel_patch_id(b); + + unsigned tf_inner_address_offset = 0; + unsigned ncomps_correct = 0; + + switch (op->intrinsic) { + case nir_intrinsic_load_patch_vertices_in: { + nir_ssa_def *vertices_in; + if (b->shader->info.stage == MESA_SHADER_TESS_CTRL) + vertices_in = nir_channel(b, load_in_param_base, 2); + else { + auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600); + vertices_in = nir_channel(b, base, 2); + } + nir_ssa_def_rewrite_uses(&op->dest.ssa, vertices_in); + nir_instr_remove(&op->instr); + return true; + } + case nir_intrinsic_load_per_vertex_input: { + nir_ssa_def *addr = + b->shader->info.stage == MESA_SHADER_TESS_CTRL ? + emil_lsd_in_addr(b, load_in_param_base, rel_patch_id, op) : + emil_lsd_out_addr(b, load_in_param_base, rel_patch_id, op, nir_var_shader_in, 0); + replace_load_instr(b, op, addr); + return true; + } + case nir_intrinsic_store_per_vertex_output: { + nir_ssa_def *addr = emil_lsd_out_addr(b, load_out_param_base, rel_patch_id, op, nir_var_shader_out, 1); + emit_store_lds(b, op, addr); + nir_instr_remove(instr); + return true; + } + case nir_intrinsic_load_per_vertex_output: { + nir_ssa_def *addr = emil_lsd_out_addr(b, load_out_param_base, rel_patch_id, op, nir_var_shader_out, 0); + replace_load_instr(b, op, addr); + return true; + } + case nir_intrinsic_store_output: { + nir_ssa_def *addr = (b->shader->info.stage == MESA_SHADER_TESS_CTRL) ? + r600_tcs_base_address(b, load_out_param_base, rel_patch_id): + nir_build_alu(b, nir_op_umul24, + nir_channel(b, load_out_param_base, 1), + rel_patch_id, NULL, NULL); + addr = emil_tcs_io_offset(b, addr, op, 1); + emit_store_lds(b, op, addr); + nir_instr_remove(instr); + return true; + } + case nir_intrinsic_load_output: { + nir_ssa_def *addr = r600_tcs_base_address(b, load_out_param_base, rel_patch_id); + addr = emil_tcs_io_offset(b, addr, op, 0); + replace_load_instr(b, op, addr); + return true; + } + case nir_intrinsic_load_input: { + nir_ssa_def *addr = r600_tcs_base_address(b, load_in_param_base, rel_patch_id); + addr = emil_tcs_io_offset(b, addr, op, 0); + replace_load_instr(b, op, addr); + return true; + } + case nir_intrinsic_load_tess_level_inner: + tf_inner_address_offset = 4; + ncomps_correct = 2; + FALLTHROUGH; + case nir_intrinsic_load_tess_level_outer: { + auto ncomps = outer_tf_components(prim_type); + if (!ncomps) + return false; + ncomps -= ncomps_correct; + auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600); + auto rel_patch_id = r600_load_rel_patch_id(b); + nir_ssa_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id); + nir_ssa_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, tf_inner_address_offset + ncomps)); + + auto tf = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600); + tf->num_components = ncomps; + tf->src[0] = nir_src_for_ssa(addr_outer); + nir_ssa_dest_init(&tf->instr, &tf->dest, + tf->num_components, 32, NULL); + nir_builder_instr_insert(b, &tf->instr); + + nir_ssa_def_rewrite_uses(&op->dest.ssa, &tf->dest.ssa); + nir_instr_remove(instr); + return true; + } + default: + ; + } + + return false; +} + +bool r600_lower_tess_io(nir_shader *shader, enum pipe_prim_type prim_type) +{ + bool progress = false; + nir_foreach_function(function, shader) { + if (function->impl) { + nir_builder b; + nir_builder_init(&b, function->impl); + + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + if (r600_lower_tess_io_filter(instr, shader->info.stage)) + progress |= r600_lower_tess_io_impl(&b, instr, prim_type); + } + } + } + } + return progress; +} + +bool r600_emit_tf(nir_builder *b, nir_ssa_def *val) +{ + nir_intrinsic_instr *store_tf = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_tf_r600); + store_tf->num_components = val->num_components; + store_tf->src[0] = nir_src_for_ssa(val); + nir_builder_instr_insert(b, &store_tf->instr); + return true; +} + +bool r600_append_tcs_TF_emission(nir_shader *shader, enum pipe_prim_type prim_type) { + if (shader->info.stage != MESA_SHADER_TESS_CTRL) + return false; + + nir_foreach_function(function, shader) { + nir_foreach_block(block, function->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic == nir_intrinsic_store_tf_r600) { + return false; + } + } + } + } + nir_builder builder; + nir_builder *b = &builder; + + assert(exec_list_length(&shader->functions) == 1); + nir_function *f = (nir_function *)shader->functions.get_head(); + nir_builder_init(b, f->impl); + + auto outer_comps = outer_tf_components(prim_type); + if (!outer_comps) + return false; + + unsigned inner_comps = outer_comps - 2; + unsigned stride = (inner_comps + outer_comps) * 4; + + b->cursor = nir_after_cf_list(&f->impl->body); + + auto invocation_id = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_invocation_id); + nir_ssa_dest_init(&invocation_id->instr, &invocation_id->dest, + 1, 32, NULL); + nir_builder_instr_insert(b, &invocation_id->instr); + + nir_push_if(b, nir_ieq_imm(b, &invocation_id->dest.ssa, 0)); + auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600); + auto rel_patch_id = r600_load_rel_patch_id(b); + + nir_ssa_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id); + + nir_ssa_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, outer_comps)); + auto tf_outer = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600); + tf_outer->num_components = outer_comps; + tf_outer->src[0] = nir_src_for_ssa(addr_outer); + nir_ssa_dest_init(&tf_outer->instr, &tf_outer->dest, + tf_outer->num_components, 32, NULL); + nir_builder_instr_insert(b, &tf_outer->instr); + + std::vector<nir_ssa_def *> tf_out; + + + auto tf_out_base = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_tcs_tess_factor_base_r600); + nir_ssa_dest_init(&tf_out_base->instr, &tf_out_base->dest, + 1, 32, NULL); + nir_builder_instr_insert(b, &tf_out_base->instr); + + auto out_addr0 = nir_build_alu(b, nir_op_umad24, + rel_patch_id, + nir_imm_int(b, stride), + &tf_out_base->dest.ssa, + NULL); + int chanx = 0; + int chany = 1; + + if (prim_type == PIPE_PRIM_LINES) + std::swap(chanx, chany); + + + auto v0 = nir_vec4(b, out_addr0, nir_channel(b, &tf_outer->dest.ssa, chanx), + nir_iadd(b, out_addr0, nir_imm_int(b, 4)), + nir_channel(b, &tf_outer->dest.ssa, chany)); + + tf_out.push_back(v0); + if (outer_comps > 2) { + auto v1 = (outer_comps > 3) ? nir_vec4(b, nir_iadd(b, out_addr0, nir_imm_int(b, 8)), + nir_channel(b, &tf_outer->dest.ssa, 2), + nir_iadd(b, out_addr0, nir_imm_int(b, 12)), + nir_channel(b, &tf_outer->dest.ssa, 3)) : + nir_vec2(b, nir_iadd(b, out_addr0, nir_imm_int(b, 8)), + nir_channel(b, &tf_outer->dest.ssa, 2)); + tf_out.push_back(v1); + } + + if (inner_comps) { + nir_ssa_def *addr1 = nir_iadd(b, addr0, load_offset_group(b, 4 + inner_comps)); + auto tf_inner = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600); + tf_inner->num_components = inner_comps; + tf_inner->src[0] = nir_src_for_ssa(addr1); + nir_ssa_dest_init(&tf_inner->instr, &tf_inner->dest, + tf_inner->num_components, 32, NULL); + nir_builder_instr_insert(b, &tf_inner->instr); + + auto v2 = (inner_comps > 1) ? nir_vec4(b, nir_iadd(b, out_addr0, nir_imm_int(b, 16)), + nir_channel(b, &tf_inner->dest.ssa, 0), + nir_iadd(b, out_addr0, nir_imm_int(b, 20)), + nir_channel(b, &tf_inner->dest.ssa, 1)): + nir_vec2(b, nir_iadd(b, out_addr0, nir_imm_int(b, 12)), + nir_channel(b, &tf_inner->dest.ssa, 0)); + tf_out.push_back(v2); + } + + for (auto tf: tf_out) + r600_emit_tf(b, tf); + + nir_pop_if(b, nullptr); + + nir_metadata_preserve(f->impl, nir_metadata_none); + + return true; +} + +static bool +r600_lower_tess_coord_filter(const nir_instr *instr, UNUSED const void *_options) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + auto intr = nir_instr_as_intrinsic(instr); + return intr->intrinsic == nir_intrinsic_load_tess_coord; +} + +static nir_ssa_def * +r600_lower_tess_coord_impl(nir_builder *b, nir_instr *instr, void *_options) +{ + pipe_prim_type prim_type = *(pipe_prim_type *)_options; + + auto tc_xy = nir_load_tess_coord_r600(b); + + auto tc_x = nir_channel(b, tc_xy, 0); + auto tc_y = nir_channel(b, tc_xy, 1); + + if (prim_type == PIPE_PRIM_TRIANGLES) + return nir_vec3(b, tc_x, tc_y, nir_fsub(b, nir_imm_float(b, 1.0), + nir_fadd(b, tc_x, tc_y))); + else + return nir_vec3(b, tc_x, tc_y, nir_imm_float(b, 0.0)); +} + + +bool r600_lower_tess_coord(nir_shader *sh, enum pipe_prim_type prim_type) +{ + return nir_shader_lower_instructions(sh, r600_lower_tess_coord_filter, + r600_lower_tess_coord_impl, &prim_type); +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_vectorize_vs_inputs.c b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_vectorize_vs_inputs.c new file mode 100644 index 000000000..2ff60cf6a --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_vectorize_vs_inputs.c @@ -0,0 +1,466 @@ +/* + * Copyright © 2018 Timothy Arceri + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" +#include "nir_deref.h" +#include "util/u_dynarray.h" +#include "util/u_math.h" +#define XXH_INLINE_ALL +#include "util/xxhash.h" + +/** @file nir_opt_vectorize_io.c + * + * Replaces scalar nir_load_input/nir_store_output operations with + * vectorized instructions. + */ +bool +r600_vectorize_vs_inputs(nir_shader *shader); + +static nir_deref_instr * +r600_clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail, + const nir_deref_instr *src_head) +{ + const nir_deref_instr *parent = nir_deref_instr_parent(src_head); + + if (!parent) + return dst_tail; + + assert(src_head->deref_type == nir_deref_type_array); + + dst_tail = r600_clone_deref_array(b, dst_tail, parent); + + return nir_build_deref_array(b, dst_tail, + nir_ssa_for_src(b, src_head->arr.index, 1)); +} + +static bool +r600_variable_can_rewrite(nir_variable *var) +{ + + /* Skip complex types we don't split in the first place */ + if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type))) + return false; + + + /* TODO: add 64/16bit support ? */ + if (glsl_get_bit_size(glsl_without_array(var->type)) != 32) + return false; + + /* We only check VSand attribute imputs */ + return (var->data.location >= VERT_ATTRIB_GENERIC0 && + var->data.location <= VERT_ATTRIB_GENERIC15); +} + +static bool +r600_instr_can_rewrite(nir_instr *instr) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + if (intr->num_components > 3) + return false; + + if (intr->intrinsic != nir_intrinsic_load_deref) + return false; + + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + if (!nir_deref_mode_is(deref, nir_var_shader_in)) + return false; + + return r600_variable_can_rewrite(nir_deref_instr_get_variable(deref)); +} + +static bool +r600_io_access_same_var(const nir_instr *instr1, const nir_instr *instr2) +{ + assert(instr1->type == nir_instr_type_intrinsic && + instr2->type == nir_instr_type_intrinsic); + + nir_intrinsic_instr *intr1 = nir_instr_as_intrinsic(instr1); + nir_intrinsic_instr *intr2 = nir_instr_as_intrinsic(instr2); + + nir_variable *var1 = + nir_deref_instr_get_variable(nir_src_as_deref(intr1->src[0])); + nir_variable *var2 = + nir_deref_instr_get_variable(nir_src_as_deref(intr2->src[0])); + + /* We don't handle combining vars of different base types, so skip those */ + if (glsl_get_base_type(var1->type) != glsl_get_base_type(var2->type)) + return false; + + if (var1->data.location != var2->data.location) + return false; + + return true; +} + +static struct util_dynarray * +r600_vec_instr_stack_create(void *mem_ctx) +{ + struct util_dynarray *stack = ralloc(mem_ctx, struct util_dynarray); + util_dynarray_init(stack, mem_ctx); + return stack; +} + +static void +r600_vec_instr_stack_push(struct util_dynarray *stack, nir_instr *instr) +{ + util_dynarray_append(stack, nir_instr *, instr); +} + +static unsigned r600_correct_location(nir_variable *var) +{ + return var->data.location - VERT_ATTRIB_GENERIC0; +} + +static void +r600_create_new_load(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var, + unsigned comp, unsigned num_comps, unsigned old_num_comps) +{ + unsigned channels[4]; + + b->cursor = nir_before_instr(&intr->instr); + + assert(intr->dest.is_ssa); + + nir_intrinsic_instr *new_intr = + nir_intrinsic_instr_create(b->shader, intr->intrinsic); + nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, num_comps, + intr->dest.ssa.bit_size, NULL); + new_intr->num_components = num_comps; + + nir_deref_instr *deref = nir_build_deref_var(b, var); + deref = r600_clone_deref_array(b, deref, nir_src_as_deref(intr->src[0])); + + new_intr->src[0] = nir_src_for_ssa(&deref->dest.ssa); + + if (intr->intrinsic == nir_intrinsic_interp_deref_at_offset || + intr->intrinsic == nir_intrinsic_interp_deref_at_sample) + nir_src_copy(&new_intr->src[1], &intr->src[1], &new_intr->instr); + + nir_builder_instr_insert(b, &new_intr->instr); + + for (unsigned i = 0; i < old_num_comps; ++i) + channels[i] = comp - var->data.location_frac + i; + nir_ssa_def *load = nir_swizzle(b, &new_intr->dest.ssa, channels, old_num_comps); + nir_ssa_def_rewrite_uses(&intr->dest.ssa, load); + + /* Remove the old load intrinsic */ + nir_instr_remove(&intr->instr); +} + + +static bool +r600_vec_instr_stack_pop(nir_builder *b, struct util_dynarray *stack, + nir_instr *instr, + nir_variable *updated_vars[16][4]) +{ + nir_instr *last = util_dynarray_pop(stack, nir_instr *); + + assert(last == instr); + assert(last->type == nir_instr_type_intrinsic); + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(last); + nir_variable *var = + nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0])); + unsigned loc = r600_correct_location(var); + + nir_variable *new_var; + new_var = updated_vars[loc][var->data.location_frac]; + + unsigned num_comps = + glsl_get_vector_elements(glsl_without_array(new_var->type)); + + unsigned old_num_comps = + glsl_get_vector_elements(glsl_without_array(var->type)); + + /* Don't bother walking the stack if this component can't be vectorised. */ + if (old_num_comps > 3) { + return false; + } + + if (new_var == var) { + return false; + } + + r600_create_new_load(b, intr, new_var, var->data.location_frac, + num_comps, old_num_comps); + return true; +} + +static bool +r600_cmp_func(const void *data1, const void *data2) +{ + const struct util_dynarray *arr1 = data1; + const struct util_dynarray *arr2 = data2; + + const nir_instr *instr1 = *(nir_instr **)util_dynarray_begin(arr1); + const nir_instr *instr2 = *(nir_instr **)util_dynarray_begin(arr2); + + return r600_io_access_same_var(instr1, instr2); +} + +#define HASH(hash, data) XXH32(&(data), sizeof(data), (hash)) + +static uint32_t +r600_hash_instr(const nir_instr *instr) +{ + assert(instr->type == nir_instr_type_intrinsic); + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + nir_variable *var = + nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0])); + + uint32_t hash = 0; + + hash = HASH(hash, var->type); + return HASH(hash, var->data.location); +} + +static uint32_t +r600_hash_stack(const void *data) +{ + const struct util_dynarray *stack = data; + const nir_instr *first = *(nir_instr **)util_dynarray_begin(stack); + return r600_hash_instr(first); +} + +static struct set * +r600_vec_instr_set_create(void) +{ + return _mesa_set_create(NULL, r600_hash_stack, r600_cmp_func); +} + +static void +r600_vec_instr_set_destroy(struct set *instr_set) +{ + _mesa_set_destroy(instr_set, NULL); +} + +static void +r600_vec_instr_set_add(struct set *instr_set, nir_instr *instr) +{ + if (!r600_instr_can_rewrite(instr)) { + return; + } + + struct util_dynarray *new_stack = r600_vec_instr_stack_create(instr_set); + r600_vec_instr_stack_push(new_stack, instr); + + struct set_entry *entry = _mesa_set_search(instr_set, new_stack); + + if (entry) { + ralloc_free(new_stack); + struct util_dynarray *stack = (struct util_dynarray *) entry->key; + r600_vec_instr_stack_push(stack, instr); + return; + } + + _mesa_set_add(instr_set, new_stack); + + return; +} + +static bool +r600_vec_instr_set_remove(nir_builder *b, struct set *instr_set, nir_instr *instr, + nir_variable *updated_vars[16][4]) +{ + if (!r600_instr_can_rewrite(instr)) { + return false; + } + /* + * It's pretty unfortunate that we have to do this, but it's a side effect + * of the hash set interfaces. The hash set assumes that we're only + * interested in storing one equivalent element at a time, and if we try to + * insert a duplicate element it will remove the original. We could hack up + * the comparison function to "know" which input is an instruction we + * passed in and which is an array that's part of the entry, but that + * wouldn't work because we need to pass an array to _mesa_set_add() in + * vec_instr_add() above, and _mesa_set_add() will call our comparison + * function as well. + */ + struct util_dynarray *temp = r600_vec_instr_stack_create(instr_set); + r600_vec_instr_stack_push(temp, instr); + struct set_entry *entry = _mesa_set_search(instr_set, temp); + ralloc_free(temp); + + if (entry) { + struct util_dynarray *stack = (struct util_dynarray *) entry->key; + bool progress = r600_vec_instr_stack_pop(b, stack, instr, updated_vars); + + if (!util_dynarray_num_elements(stack, nir_instr *)) + _mesa_set_remove(instr_set, entry); + + return progress; + } + + return false; +} + +static bool +r600_vectorize_block(nir_builder *b, nir_block *block, struct set *instr_set, + nir_variable *updated_vars[16][4]) +{ + bool progress = false; + + nir_foreach_instr_safe(instr, block) { + r600_vec_instr_set_add(instr_set, instr); + } + + for (unsigned i = 0; i < block->num_dom_children; i++) { + nir_block *child = block->dom_children[i]; + progress |= r600_vectorize_block(b, child, instr_set, updated_vars); + } + + nir_foreach_instr_reverse_safe(instr, block) { + progress |= r600_vec_instr_set_remove(b, instr_set, instr, updated_vars); + } + + return progress; +} + +static void +r600_create_new_io_var(nir_shader *shader, + nir_variable *vars[16][4], + unsigned location, unsigned comps) +{ + unsigned num_comps = util_bitcount(comps); + assert(num_comps > 1); + + /* Note: u_bit_scan() strips a component of the comps bitfield here */ + unsigned first_comp = u_bit_scan(&comps); + + nir_variable *var = nir_variable_clone(vars[location][first_comp], shader); + var->data.location_frac = first_comp; + var->type = glsl_replace_vector_type(var->type, num_comps); + + nir_shader_add_variable(shader, var); + + vars[location][first_comp] = var; + + while (comps) { + const int comp = u_bit_scan(&comps); + if (vars[location][comp]) { + vars[location][comp] = var; + } + } +} + +static inline bool +r600_variables_can_merge(const nir_variable *lhs, const nir_variable *rhs) +{ + return (glsl_get_base_type(lhs->type) == glsl_get_base_type(rhs->type)); +} + +static void +r600_create_new_io_vars(nir_shader *shader, nir_variable_mode mode, + nir_variable *vars[16][4]) +{ + bool can_rewrite_vars = false; + nir_foreach_variable_with_modes(var, shader, mode) { + if (r600_variable_can_rewrite(var)) { + can_rewrite_vars = true; + unsigned loc = r600_correct_location(var); + vars[loc][var->data.location_frac] = var; + } + } + + if (!can_rewrite_vars) + return; + + /* We don't handle combining vars of different type e.g. different array + * lengths. + */ + for (unsigned i = 0; i < 16; i++) { + unsigned comps = 0; + + for (unsigned j = 0; j < 3; j++) { + + if (!vars[i][j]) + continue; + + for (unsigned k = j + 1; k < 4; k++) { + if (!vars[i][k]) + continue; + + if (!r600_variables_can_merge(vars[i][j], vars[i][k])) + continue; + + /* Set comps */ + for (unsigned n = 0; n < glsl_get_components(vars[i][j]->type); ++n) + comps |= 1 << (vars[i][j]->data.location_frac + n); + + for (unsigned n = 0; n < glsl_get_components(vars[i][k]->type); ++n) + comps |= 1 << (vars[i][k]->data.location_frac + n); + + } + } + if (comps) + r600_create_new_io_var(shader, vars, i, comps); + } +} + +static bool +r600_vectorize_io_impl(nir_function_impl *impl) +{ + nir_builder b; + nir_builder_init(&b, impl); + + nir_metadata_require(impl, nir_metadata_dominance); + + nir_shader *shader = impl->function->shader; + nir_variable *updated_vars[16][4] = {0}; + + r600_create_new_io_vars(shader, nir_var_shader_in, updated_vars); + + struct set *instr_set = r600_vec_instr_set_create(); + bool progress = r600_vectorize_block(&b, nir_start_block(impl), instr_set, + updated_vars); + + if (progress) { + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + } + + r600_vec_instr_set_destroy(instr_set); + return false; +} + +bool +r600_vectorize_vs_inputs(nir_shader *shader) +{ + bool progress = false; + + if (shader->info.stage != MESA_SHADER_VERTEX) + return false; + + nir_foreach_function(function, shader) { + if (function->impl) + progress |= r600_vectorize_io_impl(function->impl); + } + + return progress; +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp new file mode 100644 index 000000000..e37e2732b --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp @@ -0,0 +1,1179 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "../r600_pipe.h" +#include "../r600_shader.h" +#include "sfn_shader_vertex.h" + +#include "sfn_shader_compute.h" +#include "sfn_shader_fragment.h" +#include "sfn_shader_geometry.h" +#include "sfn_liverange.h" +#include "sfn_ir_to_assembly.h" +#include "sfn_nir.h" +#include "sfn_instruction_misc.h" +#include "sfn_instruction_fetch.h" +#include "sfn_instruction_lds.h" + +#include <iostream> + +#define ENABLE_DEBUG 1 + +#ifdef ENABLE_DEBUG +#define DEBUG_SFN(X) \ + do {\ + X; \ + } while (0) +#else +#define DEBUG_SFN(X) +#endif + +namespace r600 { + +using namespace std; + + +ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype, + r600_pipe_shader_selector& sel, + r600_shader &sh_info, int scratch_size, + enum chip_class chip_class, + int atomic_base): + m_processor_type(ptype), + m_nesting_depth(0), + m_block_number(0), + m_export_output(0, -1), + m_sh_info(sh_info), + m_chip_class(chip_class), + m_tex_instr(*this), + m_alu_instr(*this), + m_ssbo_instr(*this), + m_pending_else(nullptr), + m_scratch_size(scratch_size), + m_next_hwatomic_loc(0), + m_sel(sel), + m_atomic_base(atomic_base), + m_image_count(0), + last_emitted_alu(nullptr) +{ + m_sh_info.processor_type = ptype; + +} + + +ShaderFromNirProcessor::~ShaderFromNirProcessor() +{ +} + +bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr) +{ + switch (instr->type) { + case nir_instr_type_tex: { + nir_tex_instr *t = nir_instr_as_tex(instr); + if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF) + sh_info().uses_tex_buffers = true; + if (t->op == nir_texop_txs && + t->sampler_dim == GLSL_SAMPLER_DIM_CUBE && + t->is_array) + sh_info().has_txq_cube_array_z_comp = true; + break; + } + case nir_instr_type_intrinsic: { + auto *i = nir_instr_as_intrinsic(instr); + switch (i->intrinsic) { + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_image_atomic_imin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_image_atomic_imax: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_image_atomic_umin: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_image_atomic_umax: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_ssbo_atomic_comp_swap: + m_sel.info.writes_memory = 1; + FALLTHROUGH; + case nir_intrinsic_image_load: + m_ssbo_instr.set_require_rat_return_address(); + break; + case nir_intrinsic_image_size: { + if (nir_intrinsic_image_dim(i) == GLSL_SAMPLER_DIM_CUBE && + nir_intrinsic_image_array(i) && nir_dest_num_components(i->dest) > 2) + sh_info().has_txq_cube_array_z_comp = true; + } + + + + default: + ; + } + + + } + default: + ; + } + + return scan_sysvalue_access(instr); +} + +enum chip_class ShaderFromNirProcessor::get_chip_class(void) const +{ + return m_chip_class; +} + +bool ShaderFromNirProcessor::allocate_reserved_registers() +{ + bool retval = do_allocate_reserved_registers(); + m_ssbo_instr.load_rat_return_address(); + if (sh_info().uses_atomics) + m_ssbo_instr.load_atomic_inc_limits(); + m_ssbo_instr.set_ssbo_offset(m_image_count); + return retval; +} + +static void remap_shader_info(r600_shader& sh_info, + std::vector<rename_reg_pair>& map, + UNUSED ValueMap& values) +{ + for (unsigned i = 0; i < sh_info.num_arrays; ++i) { + auto new_index = map[sh_info.arrays[i].gpr_start]; + if (new_index.valid) + sh_info.arrays[i].gpr_start = new_index.new_reg; + map[sh_info.arrays[i].gpr_start].used = true; + } + + for (unsigned i = 0; i < sh_info.ninput; ++i) { + sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr + << " of map.size()\n"; + + assert(sh_info.input[i].gpr < map.size()); + auto new_index = map[sh_info.input[i].gpr]; + if (new_index.valid) + sh_info.input[i].gpr = new_index.new_reg; + map[sh_info.input[i].gpr].used = true; + } + + for (unsigned i = 0; i < sh_info.noutput; ++i) { + assert(sh_info.output[i].gpr < map.size()); + auto new_index = map[sh_info.output[i].gpr]; + if (new_index.valid) + sh_info.output[i].gpr = new_index.new_reg; + map[sh_info.output[i].gpr].used = true; + } +} + +void ShaderFromNirProcessor::remap_registers() +{ + // register renumbering + auto rc = register_count(); + if (!rc) + return; + + std::vector<register_live_range> register_live_ranges(rc); + + auto temp_register_map = get_temp_registers(); + + Shader sh{m_output, temp_register_map}; + LiverangeEvaluator().run(sh, register_live_ranges); + auto register_map = get_temp_registers_remapping(register_live_ranges); + + sfn_log << SfnLog::merge << "=========Mapping===========\n"; + for (size_t i = 0; i < register_map.size(); ++i) + if (register_map[i].valid) + sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n"; + + ValueRemapper vmap0(register_map, temp_register_map); + for (auto& block: m_output) + block.remap_registers(vmap0); + + remap_shader_info(m_sh_info, register_map, temp_register_map); + + /* Mark inputs as used registers, these registers should no be remapped */ + for (auto& v: sh.m_temp) { + if (v.second->type() == Value::gpr) { + const auto& g = static_cast<const GPRValue&>(*v.second); + if (g.is_input()) + register_map[g.sel()].used = true; + } + } + + int new_index = 0; + for (auto& i : register_map) { + i.valid = i.used; + if (i.used) + i.new_reg = new_index++; + } + + ValueRemapper vmap1(register_map, temp_register_map); + for (auto& ir: m_output) + ir.remap_registers(vmap1); + + remap_shader_info(m_sh_info, register_map, temp_register_map); +} + +bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform) +{ + // m_uniform_type_map + m_uniform_type_map[uniform->data.location] = uniform->type; + + if (uniform->type->contains_atomic()) { + int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE; + sh_info().nhwatomic += natomics; + + if (uniform->type->is_array()) + sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC; + + sh_info().uses_atomics = 1; + + struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges]; + ++sh_info().nhwatomic_ranges; + atom.buffer_id = uniform->data.binding; + atom.hw_idx = m_atomic_base + m_next_hwatomic_loc; + + atom.start = uniform->data.offset >> 2; + atom.end = atom.start + natomics - 1; + + if (m_atomic_base_map.find(uniform->data.binding) == + m_atomic_base_map.end()) + m_atomic_base_map[uniform->data.binding] = m_next_hwatomic_loc; + + m_next_hwatomic_loc += natomics; + + m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end - atom.start + 1; + + sfn_log << SfnLog::io << "HW_ATOMIC file count: " + << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n"; + } + + auto type = uniform->type->is_array() ? uniform->type->without_array(): uniform->type; + if (type->is_image() || uniform->data.mode == nir_var_mem_ssbo) { + sh_info().uses_images = 1; + if (uniform->type->is_array() && ! (uniform->data.mode == nir_var_mem_ssbo)) + sh_info().indirect_files |= 1 << TGSI_FILE_IMAGE; + } + + if (uniform->type->is_image()) { + ++m_image_count; + } + + return true; +} + +bool ShaderFromNirProcessor::scan_inputs_read(const nir_shader *sh) +{ + return true; +} + +void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr) +{ + auto& dest = instr->dest; + unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index; + assert(util_bitcount(instr->modes) == 1); + m_var_mode[instr->var] = instr->modes; + m_var_derefs[index] = instr->var; + + sfn_log << SfnLog::io << "Add var deref:" << index + << " with DDL:" << instr->var->data.driver_location << "\n"; +} + +void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io) +{ + switch (io.name) { + case TGSI_SEMANTIC_POSITION: + case TGSI_SEMANTIC_PSIZE: + case TGSI_SEMANTIC_EDGEFLAG: + case TGSI_SEMANTIC_FACE: + case TGSI_SEMANTIC_SAMPLEMASK: + case TGSI_SEMANTIC_CLIPVERTEX: + io.spi_sid = 0; + break; + case TGSI_SEMANTIC_GENERIC: + case TGSI_SEMANTIC_TEXCOORD: + case TGSI_SEMANTIC_PCOORD: + io.spi_sid = io.sid + 1; + break; + default: + /* For non-generic params - pack name and sid into 8 bits */ + io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1; + } +} + +const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const +{ + unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index; + + sfn_log << SfnLog::io << "Search for deref:" << index << "\n"; + + auto v = m_var_derefs.find(index); + if (v != m_var_derefs.end()) + return v->second; + + fprintf(stderr, "R600: could not find deref with index %d\n", index); + + return nullptr; + + /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr); + return nir_deref_instr_get_variable(deref); */ +} + +bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr) +{ + return m_tex_instr.emit(instr); +} + +void ShaderFromNirProcessor::emit_instruction(AluInstruction *ir) +{ + if (last_emitted_alu && !last_emitted_alu->flag(alu_last_instr)) { + for (unsigned i = 0; i < ir->n_sources(); ++i) { + auto& s = ir->src(i); + if (s.type() == Value::kconst) { + auto& c = static_cast<UniformValue&>(s); + if (c.addr()) { + last_emitted_alu->set_flag(alu_last_instr); + break; + } + } + } + } + last_emitted_alu = ir; + emit_instruction_internal(ir); +} + + +void ShaderFromNirProcessor::emit_instruction(Instruction *ir) +{ + + emit_instruction_internal(ir); + last_emitted_alu = nullptr; +} + +void ShaderFromNirProcessor::emit_instruction_internal(Instruction *ir) +{ + if (m_pending_else) { + append_block(-1); + m_output.back().emit(PInstruction(m_pending_else)); + append_block(1); + m_pending_else = nullptr; + } + + r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n"; + if (m_output.empty()) + append_block(0); + + m_output.back().emit(Instruction::Pointer(ir)); +} + +void ShaderFromNirProcessor::emit_shader_start() +{ + /* placeholder, may become an abstract method */ + m_ssbo_instr.set_ssbo_offset(m_image_count); +} + +bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr) +{ + switch (instr->type) { + case nir_jump_break: { + auto b = new LoopBreakInstruction(); + emit_instruction(b); + return true; + } + case nir_jump_continue: { + auto b = new LoopContInstruction(); + emit_instruction(b); + return true; + } + default: { + nir_instr *i = reinterpret_cast<nir_instr*>(instr); + sfn_log << SfnLog::err << "Jump instrunction " << *i << " not supported\n"; + return false; + } + } + return true; +} + +bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr) +{ + return m_alu_instr.emit(instr); +} + +bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr) +{ + return false; +} + +bool ShaderFromNirProcessor::emit_loop_start(int loop_id) +{ + LoopBeginInstruction *loop = new LoopBeginInstruction(); + emit_instruction(loop); + m_loop_begin_block_map[loop_id] = loop; + append_block(1); + return true; +} +bool ShaderFromNirProcessor::emit_loop_end(int loop_id) +{ + auto start = m_loop_begin_block_map.find(loop_id); + if (start == m_loop_begin_block_map.end()) { + sfn_log << SfnLog::err << "End loop: Loop start for " + << loop_id << " not found\n"; + return false; + } + m_nesting_depth--; + m_block_number++; + m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number)); + LoopEndInstruction *loop = new LoopEndInstruction(start->second); + emit_instruction(loop); + + m_loop_begin_block_map.erase(start); + return true; +} + +bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt) +{ + + auto value = from_nir(if_stmt->condition, 0, 0); + AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)), + value, Value::zero, EmitInstruction::last); + pred->set_flag(alu_update_exec); + pred->set_flag(alu_update_pred); + pred->set_cf_type(cf_alu_push_before); + + append_block(1); + + IfInstruction *ir = new IfInstruction(pred); + emit_instruction(ir); + assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end()); + m_if_block_start_map[if_id] = ir; + return true; +} + +bool ShaderFromNirProcessor::emit_else_start(int if_id) +{ + auto iif = m_if_block_start_map.find(if_id); + if (iif == m_if_block_start_map.end()) { + std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n"; + return false; + } + + if (iif->second->type() != Instruction::cond_if) { + std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n"; + return false; + } + IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second); + ElseInstruction *ir = new ElseInstruction(if_instr); + m_if_block_start_map[if_id] = ir; + m_pending_else = ir; + + return true; +} + +bool ShaderFromNirProcessor::emit_ifelse_end(int if_id) +{ + auto ifelse = m_if_block_start_map.find(if_id); + if (ifelse == m_if_block_start_map.end()) { + std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n"; + return false; + } + + if (ifelse->second->type() != Instruction::cond_if && + ifelse->second->type() != Instruction::cond_else) { + std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n"; + return false; + } + /* Clear pending else, if the else branch was empty, non will be emitted */ + + m_pending_else = nullptr; + + append_block(-1); + IfElseEndInstruction *ir = new IfElseEndInstruction(); + emit_instruction(ir); + + return true; +} + +bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset) +{ + PValue src = get_temp_register(); + emit_instruction(new AluInstruction(op1_mov, src, Value::zero, {alu_write, alu_last_instr})); + + GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest)); + emit_instruction(new FetchTCSIOParam(dest, src, offset)); + + return true; + +} + +bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr) +{ + auto address = varvec_from_nir(instr->src[0], instr->num_components); + auto dest_value = varvec_from_nir(instr->dest, instr->num_components); + + emit_instruction(new LDSReadInstruction(address, dest_value)); + return true; +} + +static unsigned +lds_op_from_intrinsic(nir_intrinsic_op op) { + switch (op) { + case nir_intrinsic_shared_atomic_add: + return LDS_OP2_LDS_ADD_RET; + case nir_intrinsic_shared_atomic_and: + return LDS_OP2_LDS_AND_RET; + case nir_intrinsic_shared_atomic_or: + return LDS_OP2_LDS_OR_RET; + case nir_intrinsic_shared_atomic_imax: + return LDS_OP2_LDS_MAX_INT_RET; + case nir_intrinsic_shared_atomic_umax: + return LDS_OP2_LDS_MAX_UINT_RET; + case nir_intrinsic_shared_atomic_imin: + return LDS_OP2_LDS_MIN_INT_RET; + case nir_intrinsic_shared_atomic_umin: + return LDS_OP2_LDS_MIN_UINT_RET; + case nir_intrinsic_shared_atomic_xor: + return LDS_OP2_LDS_XOR_RET; + case nir_intrinsic_shared_atomic_exchange: + return LDS_OP2_LDS_XCHG_RET; + case nir_intrinsic_shared_atomic_comp_swap: + return LDS_OP3_LDS_CMP_XCHG_RET; + default: + unreachable("Unsupported shared atomic opcode"); + } +} + +bool ShaderFromNirProcessor::emit_atomic_local_shared(nir_intrinsic_instr* instr) +{ + auto address = from_nir(instr->src[0], 0); + auto dest_value = from_nir(instr->dest, 0); + auto value = from_nir(instr->src[1], 0); + auto op = lds_op_from_intrinsic(instr->intrinsic); + + if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap)) { + auto value2 = from_nir(instr->src[2], 0); + emit_instruction(new LDSAtomicInstruction(dest_value, value, value2, address, op)); + } else { + emit_instruction(new LDSAtomicInstruction(dest_value, value, address, op)); + } + return true; +} + + +bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr) +{ + unsigned write_mask = nir_intrinsic_write_mask(instr); + + auto address = from_nir(instr->src[1], 0); + int swizzle_base = (write_mask & 0x3) ? 0 : 2; + write_mask |= write_mask >> 2; + + auto value = from_nir(instr->src[0], swizzle_base); + if (!(write_mask & 2)) { + emit_instruction(new LDSWriteInstruction(address, 0, value)); + } else { + auto value1 = from_nir(instr->src[0], swizzle_base + 1); + emit_instruction(new LDSWriteInstruction(address, 0, value, value1)); + } + + return true; +} + +bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr) +{ + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast<nir_instr*>(instr) + << "' (" << __func__ << ")\n"; + + if (emit_intrinsic_instruction_override(instr)) + return true; + + if (m_ssbo_instr.emit(&instr->instr)) { + m_sel.info.writes_memory = true; + return true; + } + + switch (instr->intrinsic) { + case nir_intrinsic_load_deref: { + auto var = get_deref_location(instr->src[0]); + if (!var) + return false; + auto mode_helper = m_var_mode.find(var); + if (mode_helper == m_var_mode.end()) { + cerr << "r600-nir: variable '" << var->name << "' not found\n"; + return false; + } + switch (mode_helper->second) { + case nir_var_function_temp: + return emit_load_function_temp(var, instr); + default: + cerr << "r600-nir: Unsupported mode" << mode_helper->second + << "for src variable\n"; + return false; + } + } + case nir_intrinsic_store_scratch: + return emit_store_scratch(instr); + case nir_intrinsic_load_scratch: + return emit_load_scratch(instr); + case nir_intrinsic_load_uniform: + return load_uniform(instr); + case nir_intrinsic_discard: + case nir_intrinsic_discard_if: + return emit_discard_if(instr); + case nir_intrinsic_load_ubo_vec4: + return emit_load_ubo_vec4(instr); + case nir_intrinsic_load_tcs_in_param_base_r600: + return emit_load_tcs_param_base(instr, 0); + case nir_intrinsic_load_tcs_out_param_base_r600: + return emit_load_tcs_param_base(instr, 16); + case nir_intrinsic_load_local_shared_r600: + case nir_intrinsic_load_shared: + return emit_load_local_shared(instr); + case nir_intrinsic_store_local_shared_r600: + case nir_intrinsic_store_shared: + return emit_store_local_shared(instr); + case nir_intrinsic_control_barrier: + case nir_intrinsic_memory_barrier_tcs_patch: + case nir_intrinsic_memory_barrier_shared: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier: + case nir_intrinsic_memory_barrier_image: + case nir_intrinsic_group_memory_barrier: + return emit_barrier(instr); + case nir_intrinsic_memory_barrier_atomic_counter: + return true; + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_shared_atomic_comp_swap: + return emit_atomic_local_shared(instr); + case nir_intrinsic_shader_clock: + return emit_shader_clock(instr); + case nir_intrinsic_copy_deref: + case nir_intrinsic_load_constant: + case nir_intrinsic_load_input: + case nir_intrinsic_store_output: + + default: + fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic); + return false; + } + return false; +} + +bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr) +{ + return false; +} + +bool +ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr) +{ + return false; +} + +bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr* instr) +{ + AluInstruction *ir = new AluInstruction(op0_group_barrier); + ir->set_flag(alu_last_instr); + emit_instruction(ir); + return true; +} + + +bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last) +{ + if (!dest.is_ssa) { + auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write}); + if (as_last) + ir->set_flag(alu_last_instr); + emit_instruction(ir); + } else { + inject_register(dest.ssa.index, chan, value, true); + } + return true; +} + +bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr) +{ + PValue address = from_nir(instr->src[1], 0, 0); + + auto value = vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1, + swizzle_from_comps(instr->num_components)); + + int writemask = nir_intrinsic_write_mask(instr); + int align = nir_intrinsic_align_mul(instr); + int align_offset = nir_intrinsic_align_offset(instr); + + WriteScratchInstruction *ir = nullptr; + if (address->type() == Value::literal) { + const auto& lv = static_cast<const LiteralValue&>(*address); + ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask); + } else { + address = from_nir_with_fetch_constant(instr->src[1], 0); + ir = new WriteScratchInstruction(address, value, align, align_offset, + writemask, m_scratch_size); + } + emit_instruction(ir); + sh_info().needs_scratch_space = 1; + return true; +} + +bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr) +{ + PValue address = from_nir_with_fetch_constant(instr->src[0], 0); + std::array<PValue, 4> dst_val; + for (int i = 0; i < 4; ++i) + dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7); + + GPRVector dst(dst_val); + auto ir = new LoadFromScratch(dst, address, m_scratch_size); + ir->prelude_append(new WaitAck(0)); + emit_instruction(ir); + sh_info().needs_scratch_space = 1; + return true; +} + +bool ShaderFromNirProcessor::emit_shader_clock(nir_intrinsic_instr* instr) +{ + emit_instruction(new AluInstruction(op1_mov, from_nir(instr->dest, 0), + PValue(new InlineConstValue(ALU_SRC_TIME_LO, 0)), EmitInstruction::write)); + emit_instruction(new AluInstruction(op1_mov, from_nir(instr->dest, 1), + PValue(new InlineConstValue(ALU_SRC_TIME_HI, 0)), EmitInstruction::last_write)); + return true; +} + +GPRVector ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src, + unsigned mask, + const GPRVector::Swizzle& swizzle, + bool match) +{ + bool use_same = true; + GPRVector::Values v; + + std::array<bool,4> used_swizzles = {false, false, false, false}; + + /* Check whether all sources come from a GPR, and, + * if requested, whether they are swizzled as expected */ + + for (int i = 0; i < 4 && use_same; ++i) { + if ((1 << i) & mask) { + if (swizzle[i] < 4) { + v[i] = from_nir(src, swizzle[i]); + assert(v[i]); + use_same &= (v[i]->type() == Value::gpr); + if (match) { + use_same &= (v[i]->chan() == swizzle[i]); + } + used_swizzles[v[i]->chan()] = true; + } + } + } + + + /* Now check whether all inputs come from the same GPR, and fill + * empty slots in the vector with unused swizzles, bail out if + * the sources are not from the same GPR + */ + + if (use_same) { + int next_free_swizzle = 0; + while (used_swizzles[next_free_swizzle] && next_free_swizzle < 4) + next_free_swizzle++; + + /* Find the first GPR index used */ + int i = 0; + while (!v[i] && i < 4) ++i; + assert(i < 4); + unsigned sel = v[i]->sel(); + + + for (i = 0; i < 4 && use_same; ++i) { + if (!v[i]) { + if (swizzle[i] >= 4) + v[i] = PValue(new GPRValue(sel, swizzle[i])); + else { + assert(next_free_swizzle < 4); + v[i] = PValue(new GPRValue(sel, next_free_swizzle)); + used_swizzles[next_free_swizzle] = true; + while (next_free_swizzle < 4 && used_swizzles[next_free_swizzle]) + next_free_swizzle++; + } + } + else + use_same &= v[i]->sel() == sel; + } + } + + /* We can't re-use the source data because they either need re-swizzling, or + * they didn't come all from a GPR or the same GPR, so copy to a new vector + */ + if (!use_same) { + AluInstruction *ir = nullptr; + GPRVector result = get_temp_vec4(swizzle); + for (int i = 0; i < 4; ++i) { + if (swizzle[i] < 4 && (mask & (1 << i))) { + ir = new AluInstruction(op1_mov, result[i], from_nir(src, swizzle[i]), + EmitInstruction::write); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + return result; + } else + return GPRVector(v);; +} + +bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr) +{ + auto bufid = nir_src_as_const_value(instr->src[0]); + auto buf_offset = nir_src_as_const_value(instr->src[1]); + + if (!buf_offset) { + /* TODO: if buf_offset is constant then this can also be solved by using the CF indes + * on the ALU block, and this would probably make sense when there are more then one + * loads with the same buffer ID. */ + + PValue addr = from_nir_with_fetch_constant(instr->src[1], 0); + GPRVector trgt; + std::array<int, 4> swz = {7,7,7,7}; + for (unsigned i = 0; i < 4; ++i) { + if (i < nir_dest_num_components(instr->dest)) { + trgt.set_reg_i(i, from_nir(instr->dest, i)); + swz[i] = i + nir_intrinsic_component(instr); + } else { + trgt.set_reg_i(i, from_nir(instr->dest, 7)); + } + } + + FetchInstruction *ir; + if (bufid) { + ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0, + 1 + bufid->u32, nullptr, bim_none); + } else { + PValue bufid = from_nir(instr->src[0], 0, 0); + ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0, + 1, bufid, bim_zero); + } + ir->set_dest_swizzle(swz); + emit_instruction(ir); + m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT; + return true; + } + + + if (bufid) { + int buf_cmp = nir_intrinsic_component(instr); + AluInstruction *ir = nullptr; + for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { + int cmp = buf_cmp + i; + assert(cmp < 4); + auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, bufid->u32 + 1)); + if (instr->dest.is_ssa) + load_preloaded_value(instr->dest, i, u); + else { + ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write}); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + return true; + + } else { + int buf_cmp = nir_intrinsic_component(instr); + AluInstruction *ir = nullptr; + auto kc_id = from_nir(instr->src[0], 0); + for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { + int cmp = buf_cmp + i; + auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, kc_id)); + if (instr->dest.is_ssa) + load_preloaded_value(instr->dest, i, u); + else { + ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write}); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + return true; + } +} + +bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr) +{ + r600::sfn_log << SfnLog::instr << "emit '" + << *reinterpret_cast<nir_instr*>(instr) + << "' (" << __func__ << ")\n"; + + if (instr->intrinsic == nir_intrinsic_discard_if) { + emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)), + {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr})); + + } else { + emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)), + {Value::zero, Value::zero}, {alu_last_instr})); + } + m_sh_info.uses_kill = 1; + return true; +} + +bool ShaderFromNirProcessor::load_uniform(nir_intrinsic_instr* instr) +{ + r600::sfn_log << SfnLog::instr << __func__ << ": emit '" + << *reinterpret_cast<nir_instr*>(instr) + << "'\n"; + + + /* If the target register is a SSA register and the loading is not + * indirect then we can do lazy loading, i.e. the uniform value can + * be used directly. Otherwise we have to load the data for real + * rigt away. + */ + auto literal = nir_src_as_const_value(instr->src[0]); + int base = nir_intrinsic_base(instr); + + if (literal) { + AluInstruction *ir = nullptr; + for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { + PValue u = PValue(new UniformValue(512 + literal->u32 + base, i)); + sfn_log << SfnLog::io << "uniform " + << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n"; + + if (instr->dest.is_ssa) + load_preloaded_value(instr->dest, i, u); + else { + ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), + u, {alu_write}); + emit_instruction(ir); + } + } + if (ir) + ir->set_flag(alu_last_instr); + } else { + PValue addr = from_nir(instr->src[0], 0, 0); + return load_uniform_indirect(instr, addr, 16 * base, 0); + } + return true; +} + +bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid) +{ + if (!addr) { + std::cerr << "r600-nir: don't know how uniform is addressed\n"; + return false; + } + + GPRVector trgt; + std::array<int, 4> swz = {7,7,7,7}; + for (int i = 0; i < 4; ++i) { + trgt.set_reg_i(i, from_nir(instr->dest, i)); + swz[i] = i; + } + + if (addr->type() != Value::gpr) { + emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr}); + addr = trgt.reg_i(0); + } + + auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest, + bufferid, PValue(), bim_none); + ir->set_dest_swizzle(swz); + emit_instruction(ir); + m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT; + return true; +} + +AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask) +{ + AluInstruction *ir = nullptr; + for (int i = 0; i < literal->def.num_components ; ++i) { + if (writemask & (1 << i)){ + PValue lsrc; + switch (literal->def.bit_size) { + + case 1: + sfn_log << SfnLog::reg << "Got literal of bit size 1\n"; + lsrc = literal->value[i].b ? + PValue(new LiteralValue( 0xffffffff, i)) : + Value::zero; + break; + case 32: + sfn_log << SfnLog::reg << "Got literal of bit size 32\n"; + if (literal->value[i].u32 == 0) + lsrc = Value::zero; + else if (literal->value[i].u32 == 1) + lsrc = Value::one_i; + else if (literal->value[i].f32 == 1.0f) + lsrc = Value::one_f; + else if (literal->value[i].f32 == 0.5f) + lsrc = Value::zero_dot_5; + else + lsrc = PValue(new LiteralValue(literal->value[i].u32, i)); + break; + default: + sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size + << " falling back to 32 bit\n"; + lsrc = PValue(new LiteralValue(literal->value[i].u32, i)); + } + ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write); + + emit_instruction(ir); + } + } + return ir; +} + +PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel) +{ + PValue value = from_nir(src, component); + if (value->type() != Value::gpr && + value->type() != Value::gpr_vector && + value->type() != Value::gpr_array_value) { + PValue retval = get_temp_register(channel); + emit_instruction(new AluInstruction(op1_mov, retval, value, + EmitInstruction::last_write)); + value = retval; + } + return value; +} + +bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr) +{ + r600::sfn_log << SfnLog::instr << __func__ << ": emit '" + << *reinterpret_cast<nir_instr*>(instr) + << "'\n"; + + /* Give the specific shader type a chance to process this, i.e. Geometry and + * tesselation shaders need specialized deref_array, for the other shaders + * it is lowered. + */ + if (emit_deref_instruction_override(instr)) + return true; + + switch (instr->deref_type) { + case nir_deref_type_var: + set_var_address(instr); + return true; + case nir_deref_type_array: + case nir_deref_type_array_wildcard: + case nir_deref_type_struct: + case nir_deref_type_cast: + default: + fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type); + } + return false; +} + +bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest, + std::vector<PValue> srcs, + const std::set<AluModifiers>& m_flags) +{ + AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags); + emit_instruction(ir); + return true; +} + +void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr) +{ + m_output_register_map[loc] = gpr; +} + +void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir) +{ + r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n"; + m_export_output.emit(PInstruction(ir)); +} + +const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const +{ + const GPRVector *retval = nullptr; + auto val = m_output_register_map.find(location); + if (val != m_output_register_map.end()) + retval = val->second; + return retval; +} + +void ShaderFromNirProcessor::set_input(unsigned pos, PValue var) +{ + r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var << "\n"; + m_inputs[pos] = var; +} + +void ShaderFromNirProcessor::set_output(unsigned pos, int sel) +{ + r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << sel << "\n"; + m_outputs[pos] = sel; +} + +void ShaderFromNirProcessor::append_block(int nesting_change) +{ + m_nesting_depth += nesting_change; + m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++)); +} + +void ShaderFromNirProcessor::get_array_info(r600_shader& shader) const +{ + shader.num_arrays = m_reg_arrays.size(); + if (shader.num_arrays) { + shader.arrays = (r600_shader_array *)calloc(shader.num_arrays, sizeof(r600_shader_array)); + for (unsigned i = 0; i < shader.num_arrays; ++i) { + shader.arrays[i].comp_mask = m_reg_arrays[i]->mask(); + shader.arrays[i].gpr_start = m_reg_arrays[i]->sel(); + shader.arrays[i].gpr_count = m_reg_arrays[i]->size(); + } + shader.indirect_files |= (1 << TGSI_FILE_TEMPORARY); + } +} + +void ShaderFromNirProcessor::finalize() +{ + do_finalize(); + + for (auto& i : m_inputs) + m_sh_info.input[i.first].gpr = i.second->sel(); + + for (auto& i : m_outputs) + m_sh_info.output[i.first].gpr = i.second; + + m_output.push_back(m_export_output); +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.h new file mode 100644 index 000000000..a48674dab --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.h @@ -0,0 +1,224 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef sfn_shader_from_nir_h +#define sfn_shader_from_nir_h + + +#include "gallium/drivers/r600/r600_shader.h" + +#include "compiler/nir/nir.h" +#include "compiler/nir_types.h" + +#include "sfn_instruction_block.h" +#include "sfn_instruction_export.h" +#include "sfn_alu_defines.h" +#include "sfn_valuepool.h" +#include "sfn_debug.h" +#include "sfn_instruction_cf.h" +#include "sfn_emittexinstruction.h" +#include "sfn_emitaluinstruction.h" +#include "sfn_emitssboinstruction.h" + +#include <vector> +#include <set> +#include <stack> +#include <unordered_map> + +struct nir_instr; + +namespace r600 { + +extern SfnLog sfn_log; + +class ShaderFromNirProcessor : public ValuePool { +public: + ShaderFromNirProcessor(pipe_shader_type ptype, r600_pipe_shader_selector& sel, + r600_shader& sh_info, int scratch_size, enum chip_class _chip_class, + int atomic_base); + virtual ~ShaderFromNirProcessor(); + + void emit_instruction(Instruction *ir); + + PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel = -1); + GPRVector vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask, + const GPRVector::Swizzle& swizzle, bool match = false); + + bool emit_instruction(EAluOp opcode, PValue dest, + std::vector<PValue> src0, + const std::set<AluModifiers>& m_flags); + void emit_export_instruction(WriteoutInstruction *ir); + void emit_instruction(AluInstruction *ir); + + void split_constants(nir_alu_instr* instr); + void remap_registers(); + + const nir_variable *get_deref_location(const nir_src& src) const; + + r600_shader& sh_info() {return m_sh_info;} + void add_param_output_reg(int loc, const GPRVector *gpr); + void set_output(unsigned pos, int sel); + const GPRVector *output_register(unsigned location) const; + void evaluate_spi_sid(r600_shader_io &io); + + enum chip_class get_chip_class() const; + + int remap_atomic_base(int base) { + return m_atomic_base_map[base]; + } + + void get_array_info(r600_shader& shader) const; + + virtual bool scan_inputs_read(const nir_shader *sh); + +protected: + + void set_var_address(nir_deref_instr *instr); + void set_input(unsigned pos, PValue var); + + bool scan_instruction(nir_instr *instr); + + virtual bool scan_sysvalue_access(nir_instr *instr) = 0; + + bool emit_if_start(int if_id, nir_if *if_stmt); + bool emit_else_start(int if_id); + bool emit_ifelse_end(int if_id); + + bool emit_loop_start(int loop_id); + bool emit_loop_end(int loop_id); + bool emit_jump_instruction(nir_jump_instr *instr); + + bool emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset); + bool emit_load_local_shared(nir_intrinsic_instr* instr); + bool emit_store_local_shared(nir_intrinsic_instr* instr); + bool emit_atomic_local_shared(nir_intrinsic_instr* instr); + + bool emit_barrier(nir_intrinsic_instr* instr); + + bool load_preloaded_value(const nir_dest& dest, int chan, PValue value, + bool as_last = true); + + void inc_atomic_file_count(); + + enum ESlots { + es_face, + es_instanceid, + es_invocation_id, + es_patch_id, + es_pos, + es_rel_patch_id, + es_sample_mask_in, + es_sample_id, + es_sample_pos, + es_tess_factor_base, + es_vertexid, + es_tess_coord, + es_primitive_id, + es_helper_invocation, + es_last + }; + + std::bitset<es_last> m_sv_values; + + bool allocate_reserved_registers(); + + +private: + virtual bool do_allocate_reserved_registers() = 0; + + + void emit_instruction_internal(Instruction *ir); + + bool emit_alu_instruction(nir_instr *instr); + bool emit_deref_instruction(nir_deref_instr* instr); + bool emit_intrinsic_instruction(nir_intrinsic_instr* instr); + virtual bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr); + bool emit_tex_instruction(nir_instr* instr); + bool emit_discard_if(nir_intrinsic_instr* instr); + bool emit_load_ubo_vec4(nir_intrinsic_instr* instr); + bool emit_ssbo_atomic_add(nir_intrinsic_instr* instr); + bool load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufid); + + /* Code creating functions */ + bool emit_load_function_temp(const nir_variable *var, nir_intrinsic_instr *instr); + AluInstruction *emit_load_literal(const nir_load_const_instr *literal, const nir_src& src, unsigned writemask); + + bool load_uniform(nir_intrinsic_instr* instr); + bool process_uniforms(nir_variable *uniform); + + void append_block(int nesting_change); + + virtual void emit_shader_start(); + virtual bool emit_deref_instruction_override(nir_deref_instr* instr); + + bool emit_store_scratch(nir_intrinsic_instr* instr); + bool emit_load_scratch(nir_intrinsic_instr* instr); + bool emit_shader_clock(nir_intrinsic_instr* instr); + virtual void do_finalize() = 0; + + void finalize(); + friend class ShaderFromNir; + + std::set<nir_variable*> m_arrays; + + std::map<unsigned, PValue> m_inputs; + std::map<unsigned, int> m_outputs; + + std::map<unsigned, nir_variable*> m_var_derefs; + std::map<const nir_variable *, nir_variable_mode> m_var_mode; + + std::map<unsigned, const glsl_type*> m_uniform_type_map; + std::map<int, IfElseInstruction *> m_if_block_start_map; + std::map<int, LoopBeginInstruction *> m_loop_begin_block_map; + + pipe_shader_type m_processor_type; + + std::vector<InstructionBlock> m_output; + unsigned m_nesting_depth; + unsigned m_block_number; + InstructionBlock m_export_output; + r600_shader& m_sh_info; + enum chip_class m_chip_class; + EmitTexInstruction m_tex_instr; + EmitAluInstruction m_alu_instr; + EmitSSBOInstruction m_ssbo_instr; + OutputRegisterMap m_output_register_map; + + IfElseInstruction *m_pending_else; + int m_scratch_size; + int m_next_hwatomic_loc; + + r600_pipe_shader_selector& m_sel; + int m_atomic_base ; + int m_image_count; + + std::unordered_map<int, int> m_atomic_base_map; + AluInstruction *last_emitted_alu; +}; + +} + +#endif diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp new file mode 100644 index 000000000..26ac54981 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp @@ -0,0 +1,112 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_shader_compute.h" +#include "sfn_instruction_fetch.h" + +namespace r600 { + +ComputeShaderFromNir::ComputeShaderFromNir(r600_pipe_shader *sh, + r600_pipe_shader_selector& sel, + UNUSED const r600_shader_key& key, + enum chip_class chip_class): + ShaderFromNirProcessor (PIPE_SHADER_COMPUTE, sel, sh->shader, + sh->scratch_space_needed, chip_class, 0), + m_reserved_registers(0) +{ +} + +bool ComputeShaderFromNir::scan_sysvalue_access(UNUSED nir_instr *instr) +{ + return true; +} +bool ComputeShaderFromNir::do_allocate_reserved_registers() +{ + int thread_id_sel = m_reserved_registers++; + int wg_id_sel = m_reserved_registers++; + + for (int i = 0; i < 3; ++i) { + auto tmp = new GPRValue(thread_id_sel, i); + tmp->set_as_input(); + tmp->set_keep_alive(); + m_local_invocation_id[i] = PValue(tmp); + inject_register(tmp->sel(), i, m_local_invocation_id[i], false); + + tmp = new GPRValue(wg_id_sel, i); + tmp->set_as_input(); + tmp->set_keep_alive(); + m_workgroup_id[i] = PValue(tmp); + inject_register(tmp->sel(), i, m_workgroup_id[i], false); + } + return true; +} + +bool ComputeShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_load_local_invocation_id: + return emit_load_3vec(instr, m_local_invocation_id); + case nir_intrinsic_load_work_group_id: + return emit_load_3vec(instr, m_workgroup_id); + case nir_intrinsic_load_num_work_groups: + return emit_load_num_work_groups(instr); + default: + return false; + } +} + +bool ComputeShaderFromNir::emit_load_3vec(nir_intrinsic_instr* instr, + const std::array<PValue,3>& src) +{ + for (int i = 0; i < 3; ++i) + load_preloaded_value(instr->dest, i, src[i], i == 2); + return true; +} + +bool ComputeShaderFromNir::emit_load_num_work_groups(nir_intrinsic_instr* instr) +{ + PValue a_zero = get_temp_register(1); + emit_instruction(new AluInstruction(op1_mov, a_zero, Value::zero, EmitInstruction::last_write)); + GPRVector dest; + for (int i = 0; i < 3; ++i) + dest.set_reg_i(i, from_nir(instr->dest, i)); + dest.set_reg_i(3, from_nir(instr->dest, 7)); + + auto ir = new FetchInstruction(vc_fetch, no_index_offset, + fmt_32_32_32_32, vtx_nf_int, vtx_es_none, a_zero, dest, 16, + false, 16, R600_BUFFER_INFO_CONST_BUFFER, 0, + bim_none, false, false, 0, 0, 0, PValue(), {0,1,2,7}); + ir->set_flag(vtx_srf_mode); + emit_instruction(ir); + return true; +} + +void ComputeShaderFromNir::do_finalize() +{ + +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.h new file mode 100644 index 000000000..fea6f0122 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.h @@ -0,0 +1,62 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_COMPUTE_SHADER_FROM_NIR_H +#define SFN_COMPUTE_SHADER_FROM_NIR_H + +#include "sfn_shader_base.h" +#include "sfn_shaderio.h" +#include <bitset> + +namespace r600 { + +class ComputeShaderFromNir : public ShaderFromNirProcessor +{ +public: + ComputeShaderFromNir(r600_pipe_shader *sh, + r600_pipe_shader_selector& sel, + const r600_shader_key &key, + enum chip_class chip_class); + + bool scan_sysvalue_access(nir_instr *instr) override; + +private: + bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override; + + bool do_allocate_reserved_registers() override; + void do_finalize() override; + + bool emit_load_3vec(nir_intrinsic_instr* instr, const std::array<PValue,3>& src); + bool emit_load_num_work_groups(nir_intrinsic_instr* instr); + + int m_reserved_registers; + std::array<PValue,3> m_workgroup_id; + std::array<PValue,3> m_local_invocation_id; +}; + +} + +#endif // SFN_COMPUTE_SHADER_FROM_NIR_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp new file mode 100644 index 000000000..b13cb8a8a --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp @@ -0,0 +1,1085 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "pipe/p_defines.h" +#include "tgsi/tgsi_from_mesa.h" +#include "sfn_shader_fragment.h" +#include "sfn_instruction_fetch.h" + +namespace r600 { + +FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader& nir, + r600_shader& sh, + r600_pipe_shader_selector &sel, + const r600_shader_key &key, + enum chip_class chip_class): + ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh, nir.scratch_size, chip_class, 0), + m_max_color_exports(MAX2(key.ps.nr_cbufs,1)), + m_max_counted_color_exports(0), + m_two_sided_color(key.ps.color_two_side), + m_last_pixel_export(nullptr), + m_nir(nir), + m_reserved_registers(0), + m_frag_pos_index(0), + m_need_back_color(false), + m_front_face_loaded(false), + m_depth_exports(0), + m_apply_sample_mask(key.ps.apply_sample_id_mask), + m_dual_source_blend(key.ps.dual_source_blend), + m_pos_input(nullptr) +{ + for (auto& i: m_interpolator) { + i.enabled = false; + i.ij_index= 0; + } + + sh_info().rat_base = key.ps.nr_cbufs; + sh_info().atomic_base = key.ps.first_atomic_counter; +} + +unsigned barycentric_ij_index(nir_intrinsic_instr *instr) +{ + unsigned index = 0; + switch (instr->intrinsic) { + case nir_intrinsic_load_barycentric_sample: + index = 0; + break; + case nir_intrinsic_load_barycentric_at_sample: + case nir_intrinsic_load_barycentric_at_offset: + case nir_intrinsic_load_barycentric_pixel: + index = 1; + break; + case nir_intrinsic_load_barycentric_centroid: + index = 2; + break; + default: + unreachable("Unknown interpolator intrinsic"); + } + + switch (nir_intrinsic_interp_mode(instr)) { + case INTERP_MODE_NONE: + case INTERP_MODE_SMOOTH: + case INTERP_MODE_COLOR: + return index; + case INTERP_MODE_NOPERSPECTIVE: + return index + 3; + case INTERP_MODE_FLAT: + case INTERP_MODE_EXPLICIT: + default: + unreachable("unknown/unsupported mode for load_interpolated"); + } + return 0; +} + +bool FragmentShaderFromNir::process_load_input(nir_intrinsic_instr *instr, + bool interpolated) +{ + sfn_log << SfnLog::io << "Parse " << instr->instr + << "\n"; + + auto index = nir_src_as_const_value(instr->src[interpolated ? 1 : 0]); + assert(index); + + unsigned location = nir_intrinsic_io_semantics(instr).location + index->u32; + auto semantic = r600_get_varying_semantic(location); + tgsi_semantic name = (tgsi_semantic)semantic.first; + unsigned sid = semantic.second; + + + if (location == VARYING_SLOT_POS) { + m_sv_values.set(es_pos); + m_pos_input = new ShaderInputVarying(name, sid, nir_intrinsic_base(instr) + index->u32, + nir_intrinsic_component(instr), + nir_dest_num_components(instr->dest), + TGSI_INTERPOLATE_LINEAR, TGSI_INTERPOLATE_LOC_CENTER); + m_shaderio.add_input(m_pos_input); + return true; + } + + if (location == VARYING_SLOT_FACE) { + m_sv_values.set(es_face); + return true; + } + + + tgsi_interpolate_mode tgsi_interpolate = TGSI_INTERPOLATE_CONSTANT; + tgsi_interpolate_loc tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER; + + bool uses_interpol_at_centroid = false; + + if (interpolated) { + + glsl_interp_mode mode = INTERP_MODE_NONE; + auto parent = nir_instr_as_intrinsic(instr->src[0].ssa->parent_instr); + mode = (glsl_interp_mode)nir_intrinsic_interp_mode(parent); + switch (parent->intrinsic) { + case nir_intrinsic_load_barycentric_sample: + tgsi_loc = TGSI_INTERPOLATE_LOC_SAMPLE; + break; + case nir_intrinsic_load_barycentric_at_sample: + case nir_intrinsic_load_barycentric_at_offset: + case nir_intrinsic_load_barycentric_pixel: + tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER; + break; + case nir_intrinsic_load_barycentric_centroid: + tgsi_loc = TGSI_INTERPOLATE_LOC_CENTROID; + uses_interpol_at_centroid = true; + break; + default: + std::cerr << "Instruction " << nir_intrinsic_infos[parent->intrinsic].name << " as parent of " + << nir_intrinsic_infos[instr->intrinsic].name + << " interpolator?\n"; + assert(0); + } + + switch (mode) { + case INTERP_MODE_NONE: + if (name == TGSI_SEMANTIC_COLOR) { + tgsi_interpolate = TGSI_INTERPOLATE_COLOR; + break; + } + FALLTHROUGH; + case INTERP_MODE_SMOOTH: + tgsi_interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + break; + case INTERP_MODE_NOPERSPECTIVE: + tgsi_interpolate = TGSI_INTERPOLATE_LINEAR; + break; + case INTERP_MODE_FLAT: + break; + case INTERP_MODE_COLOR: + tgsi_interpolate = TGSI_INTERPOLATE_COLOR; + break; + case INTERP_MODE_EXPLICIT: + default: + assert(0); + } + + m_interpolators_used.set(barycentric_ij_index(parent)); + + } + + switch (name) { + case TGSI_SEMANTIC_COLOR: { + auto input = m_shaderio.find_varying(name, sid); + if (!input) { + m_shaderio.add_input(new ShaderInputColor(name, sid, + nir_intrinsic_base(instr) + index->u32, + nir_intrinsic_component(instr), + nir_dest_num_components(instr->dest), + tgsi_interpolate, tgsi_loc)); + } else { + if (uses_interpol_at_centroid) + input->set_uses_interpolate_at_centroid(); + + auto varying = static_cast<ShaderInputVarying&>(*input); + varying.update_mask(nir_dest_num_components(instr->dest), + nir_intrinsic_component(instr)); + } + + m_need_back_color = m_two_sided_color; + return true; + } + case TGSI_SEMANTIC_PRIMID: + sh_info().gs_prim_id_input = true; + sh_info().ps_prim_id_input = m_shaderio.inputs().size(); + FALLTHROUGH; + case TGSI_SEMANTIC_FOG: + case TGSI_SEMANTIC_GENERIC: + case TGSI_SEMANTIC_TEXCOORD: + case TGSI_SEMANTIC_LAYER: + case TGSI_SEMANTIC_PCOORD: + case TGSI_SEMANTIC_VIEWPORT_INDEX: + case TGSI_SEMANTIC_CLIPDIST: { + auto input = m_shaderio.find_varying(name, sid); + if (!input) { + m_shaderio.add_input(new ShaderInputVarying(name, sid, nir_intrinsic_base(instr) + index->u32, + nir_intrinsic_component(instr), + nir_dest_num_components(instr->dest), + tgsi_interpolate, tgsi_loc)); + } else { + if (uses_interpol_at_centroid) + input->set_uses_interpolate_at_centroid(); + + auto varying = static_cast<ShaderInputVarying&>(*input); + varying.update_mask(nir_dest_num_components(instr->dest), + nir_intrinsic_component(instr)); + } + + return true; + } + default: + return false; + } +} + + +bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr) +{ + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr); + + switch (ii->intrinsic) { + case nir_intrinsic_load_front_face: + m_sv_values.set(es_face); + break; + case nir_intrinsic_load_sample_mask_in: + m_sv_values.set(es_sample_mask_in); + break; + case nir_intrinsic_load_sample_pos: + m_sv_values.set(es_sample_pos); + FALLTHROUGH; + case nir_intrinsic_load_sample_id: + m_sv_values.set(es_sample_id); + break; + case nir_intrinsic_load_helper_invocation: + m_sv_values.set(es_helper_invocation); + sh_info().uses_helper_invocation = true; + break; + case nir_intrinsic_load_input: + return process_load_input(ii, false); + case nir_intrinsic_load_interpolated_input: { + return process_load_input(ii, true); + } + case nir_intrinsic_store_output: + return process_store_output(ii); + + default: + ; + } + } + default: + ; + } + return true; +} + +bool FragmentShaderFromNir::do_allocate_reserved_registers() +{ + assert(!m_reserved_registers); + + int face_reg_index = -1; + int sample_id_index = -1; + // enabled interpolators based on inputs + for (unsigned i = 0; i < s_max_interpolators; ++i) { + if (m_interpolators_used.test(i)) { + sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n"; + m_interpolator[i].enabled = true; + } + } + + // sort the varying inputs + m_shaderio.sort_varying_inputs(); + + // handle interpolators + int num_baryc = 0; + for (int i = 0; i < 6; ++i) { + if (m_interpolator[i].enabled) { + sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n"; + + m_interpolator[i].ij_index = num_baryc; + + unsigned sel = num_baryc / 2; + unsigned chan = 2 * (num_baryc % 2); + + auto ip_i = new GPRValue(sel, chan + 1); + ip_i->set_as_input(); + m_interpolator[i].i.reset(ip_i); + inject_register(sel, chan + 1, m_interpolator[i].i, false); + + auto ip_j = new GPRValue(sel, chan); + ip_j->set_as_input(); + m_interpolator[i].j.reset(ip_j); + inject_register(sel, chan, m_interpolator[i].j, false); + + ++num_baryc; + } + } + m_reserved_registers += (num_baryc + 1) >> 1; + + if (m_sv_values.test(es_pos)) { + m_frag_pos_index = m_reserved_registers++; + assert(m_pos_input); + m_pos_input->set_gpr(m_frag_pos_index); + } + + // handle system values + if (m_sv_values.test(es_face) || m_need_back_color) { + face_reg_index = m_reserved_registers++; + m_front_face_reg = std::make_shared<GPRValue>(face_reg_index,0); + m_front_face_reg->set_as_input(); + sfn_log << SfnLog::io << "Set front_face register to " << *m_front_face_reg << "\n"; + inject_register(m_front_face_reg->sel(), m_front_face_reg->chan(), m_front_face_reg, false); + + m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE, face_reg_index)); + load_front_face(); + } + + if (m_sv_values.test(es_sample_mask_in)) { + if (face_reg_index < 0) + face_reg_index = m_reserved_registers++; + + m_sample_mask_reg = std::make_shared<GPRValue>(face_reg_index,2); + m_sample_mask_reg->set_as_input(); + sfn_log << SfnLog::io << "Set sample mask in register to " << *m_sample_mask_reg << "\n"; + sh_info().nsys_inputs = 1; + m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK, face_reg_index)); + } + + if (m_sv_values.test(es_sample_id) || + m_sv_values.test(es_sample_mask_in)) { + if (sample_id_index < 0) + sample_id_index = m_reserved_registers++; + + m_sample_id_reg = std::make_shared<GPRValue>(sample_id_index, 3); + m_sample_id_reg->set_as_input(); + sfn_log << SfnLog::io << "Set sample id register to " << *m_sample_id_reg << "\n"; + sh_info().nsys_inputs++; + m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID, sample_id_index)); + } + + // The back color handling is not emmited in the code, so we have + // to add the inputs here and later we also need to inject the code to set + // the right color + if (m_need_back_color) { + size_t ninputs = m_shaderio.inputs().size(); + for (size_t k = 0; k < ninputs; ++k) { + ShaderInput& i = m_shaderio.input(k); + + if (i.name() != TGSI_SEMANTIC_COLOR) + continue; + + ShaderInputColor& col = static_cast<ShaderInputColor&>(i); + + size_t next_pos = m_shaderio.size(); + auto bcol = new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR, col, next_pos); + m_shaderio.add_input(bcol); + col.set_back_color(next_pos); + } + m_shaderio.set_two_sided(); + } + + m_shaderio.update_lds_pos(); + + set_reserved_registers(m_reserved_registers); + + return true; +} + +void FragmentShaderFromNir::emit_shader_start() +{ + if (m_sv_values.test(es_face)) + load_front_face(); + + if (m_sv_values.test(es_pos)) { + for (int i = 0; i < 4; ++i) { + auto v = new GPRValue(m_frag_pos_index, i); + v->set_as_input(); + auto reg = PValue(v); + if (i == 3) + emit_instruction(new AluInstruction(op1_recip_ieee, reg, reg, {alu_write, alu_last_instr})); + m_frag_pos[i] = reg; + } + } + + if (m_sv_values.test(es_helper_invocation)) { + m_helper_invocation = get_temp_register(); + auto dummy = PValue(new GPRValue(m_helper_invocation->sel(), 7)); + emit_instruction(new AluInstruction(op1_mov, m_helper_invocation, literal(-1), {alu_write, alu_last_instr})); + GPRVector dst({dummy, dummy, dummy, dummy}); + std::array<int,4> swz = {7,7,7,7}; + dst.set_reg_i(m_helper_invocation->chan(), m_helper_invocation); + swz[m_helper_invocation->chan()] = 4; + + auto vtx = new FetchInstruction(dst, m_helper_invocation, + R600_BUFFER_INFO_CONST_BUFFER, bim_none); + vtx->set_flag(vtx_vpm); + vtx->set_flag(vtx_use_tc); + vtx->set_dest_swizzle(swz); + emit_instruction(vtx); + } +} + +bool FragmentShaderFromNir::process_store_output(nir_intrinsic_instr *instr) +{ + + auto semantic = nir_intrinsic_io_semantics(instr); + unsigned driver_loc = nir_intrinsic_base(instr); + + if (sh_info().noutput <= driver_loc) + sh_info().noutput = driver_loc + 1; + + r600_shader_io& io = sh_info().output[driver_loc]; + tgsi_get_gl_frag_result_semantic(static_cast<gl_frag_result>(semantic.location), + &io.name, &io.sid); + + unsigned component = nir_intrinsic_component(instr); + io.write_mask |= nir_intrinsic_write_mask(instr) << component; + + if (semantic.location == FRAG_RESULT_COLOR && !m_dual_source_blend) { + sh_info().fs_write_all = true; + } + + if (semantic.location == FRAG_RESULT_COLOR || + (semantic.location >= FRAG_RESULT_DATA0 && + semantic.location <= FRAG_RESULT_DATA7)) { + ++m_max_counted_color_exports; + + /* Hack: force dual source output handling if one color output has a + * dual_source_blend_index > 0 */ + if (semantic.location == FRAG_RESULT_COLOR && + semantic.dual_source_blend_index > 0) + m_dual_source_blend = true; + + if (m_max_counted_color_exports > 1) + sh_info().fs_write_all = false; + return true; + } + + if (semantic.location == FRAG_RESULT_DEPTH || + semantic.location == FRAG_RESULT_STENCIL || + semantic.location == FRAG_RESULT_SAMPLE_MASK) { + io.write_mask = 15; + return true; + } + + return false; + + +} + +bool FragmentShaderFromNir::emit_load_sample_mask_in(nir_intrinsic_instr* instr) +{ + auto dest = from_nir(instr->dest, 0); + assert(m_sample_id_reg); + assert(m_sample_mask_reg); + + emit_instruction(new AluInstruction(op2_lshl_int, dest, Value::one_i, m_sample_id_reg, EmitInstruction::last_write)); + emit_instruction(new AluInstruction(op2_and_int, dest, dest, m_sample_mask_reg, EmitInstruction::last_write)); + return true; +} + +bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_load_sample_mask_in: + if (m_apply_sample_mask) { + return emit_load_sample_mask_in(instr); + } else + return load_preloaded_value(instr->dest, 0, m_sample_mask_reg); + case nir_intrinsic_load_sample_id: + return load_preloaded_value(instr->dest, 0, m_sample_id_reg); + case nir_intrinsic_load_front_face: + return load_preloaded_value(instr->dest, 0, m_front_face_reg); + case nir_intrinsic_load_sample_pos: + return emit_load_sample_pos(instr); + case nir_intrinsic_load_helper_invocation: + return load_preloaded_value(instr->dest, 0, m_helper_invocation); + case nir_intrinsic_load_input: + return emit_load_input(instr); + case nir_intrinsic_load_barycentric_sample: + case nir_intrinsic_load_barycentric_pixel: + case nir_intrinsic_load_barycentric_centroid: { + unsigned ij = barycentric_ij_index(instr); + return load_preloaded_value(instr->dest, 0, m_interpolator[ij].i) && + load_preloaded_value(instr->dest, 1, m_interpolator[ij].j); + } + case nir_intrinsic_load_barycentric_at_offset: + return load_barycentric_at_offset(instr); + case nir_intrinsic_load_barycentric_at_sample: + return load_barycentric_at_sample(instr); + + case nir_intrinsic_load_interpolated_input: { + return emit_load_interpolated_input(instr); + } + case nir_intrinsic_store_output: + return emit_store_output(instr); + + default: + return false; + } +} + +bool FragmentShaderFromNir::emit_store_output(nir_intrinsic_instr* instr) +{ + auto location = nir_intrinsic_io_semantics(instr).location; + + if (location == FRAG_RESULT_COLOR) + return emit_export_pixel(instr, m_dual_source_blend ? 1 : m_max_color_exports); + + if ((location >= FRAG_RESULT_DATA0 && + location <= FRAG_RESULT_DATA7) || + location == FRAG_RESULT_DEPTH || + location == FRAG_RESULT_STENCIL || + location == FRAG_RESULT_SAMPLE_MASK) + return emit_export_pixel(instr, 1); + + sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_output for " << location << ")\n"; + return false; + +} + +bool FragmentShaderFromNir::emit_load_interpolated_input(nir_intrinsic_instr* instr) +{ + unsigned loc = nir_intrinsic_io_semantics(instr).location; + switch (loc) { + case VARYING_SLOT_POS: + for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { + load_preloaded_value(instr->dest, i, m_frag_pos[i]); + } + return true; + case VARYING_SLOT_FACE: + return load_preloaded_value(instr->dest, 0, m_front_face_reg); + default: + ; + } + + auto param = nir_src_as_const_value(instr->src[1]); + assert(param && "Indirect PS inputs not (yet) supported"); + + auto& io = m_shaderio.input(param->u32 + nir_intrinsic_base(instr), nir_intrinsic_component(instr)); + auto dst = nir_intrinsic_component(instr) ? get_temp_vec4() : vec_from_nir(instr->dest, 4); + + io.set_gpr(dst.sel()); + + Interpolator ip = {true, 0, from_nir(instr->src[0], 0), from_nir(instr->src[0], 1)}; + + + if (!load_interpolated(dst, io, ip, nir_dest_num_components(instr->dest), + nir_intrinsic_component(instr))) + return false; + + if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) { + + auto & color_input = static_cast<ShaderInputColor&> (io); + auto& bgio = m_shaderio.input(color_input.back_color_input_index()); + + GPRVector bgcol = get_temp_vec4(); + bgio.set_gpr(bgcol.sel()); + load_interpolated(bgcol, bgio, ip, nir_dest_num_components(instr->dest), 0); + + load_front_face(); + + AluInstruction *ir = nullptr; + for (unsigned i = 0; i < 4 ; ++i) { + ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write}); + emit_instruction(ir); + } + if (ir) + ir->set_flag(alu_last_instr); + } + + + AluInstruction *ir = nullptr; + if (nir_intrinsic_component(instr) != 0) { + for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { + ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), dst[i + nir_intrinsic_component(instr)], {alu_write}); + emit_instruction(ir); + } + if (ir) + ir->set_flag(alu_last_instr); + } + + return true; +} + +bool FragmentShaderFromNir::load_barycentric_at_offset(nir_intrinsic_instr* instr) +{ + auto interpolator = m_interpolator[barycentric_ij_index(instr)]; + PValue dummy(new GPRValue(interpolator.i->sel(), 0)); + + GPRVector help = get_temp_vec4(); + GPRVector interp({interpolator.j, interpolator.i, dummy, dummy}); + + auto getgradh = new TexInstruction(TexInstruction::get_gradient_h, help, interp, 0, 0, PValue()); + getgradh->set_dest_swizzle({0,1,7,7}); + getgradh->set_flag(TexInstruction::x_unnormalized); + getgradh->set_flag(TexInstruction::y_unnormalized); + getgradh->set_flag(TexInstruction::z_unnormalized); + getgradh->set_flag(TexInstruction::w_unnormalized); + getgradh->set_flag(TexInstruction::grad_fine); + emit_instruction(getgradh); + + auto getgradv = new TexInstruction(TexInstruction::get_gradient_v, help, interp, 0, 0, PValue()); + getgradv->set_dest_swizzle({7,7,0,1}); + getgradv->set_flag(TexInstruction::x_unnormalized); + getgradv->set_flag(TexInstruction::y_unnormalized); + getgradv->set_flag(TexInstruction::z_unnormalized); + getgradv->set_flag(TexInstruction::w_unnormalized); + getgradv->set_flag(TexInstruction::grad_fine); + emit_instruction(getgradv); + + PValue ofs_x = from_nir(instr->src[0], 0); + PValue ofs_y = from_nir(instr->src[0], 1); + emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(0), ofs_x, interpolator.j, {alu_write})); + emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(1), ofs_x, interpolator.i, {alu_write, alu_last_instr})); + emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 0), help.reg_i(3), ofs_y, help.reg_i(1), {alu_write})); + emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 1), help.reg_i(2), ofs_y, help.reg_i(0), {alu_write, alu_last_instr})); + + return true; +} + +bool FragmentShaderFromNir::load_barycentric_at_sample(nir_intrinsic_instr* instr) +{ + GPRVector slope = get_temp_vec4(); + + auto fetch = new FetchInstruction(vc_fetch, no_index_offset, slope, + from_nir_with_fetch_constant(instr->src[0], 0), + 0, R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none); + fetch->set_flag(vtx_srf_mode); + emit_instruction(fetch); + + GPRVector grad = get_temp_vec4(); + + auto interpolator = m_interpolator[barycentric_ij_index(instr)]; + assert(interpolator.enabled); + PValue dummy(new GPRValue(interpolator.i->sel(), 0)); + + GPRVector src({interpolator.j, interpolator.i, dummy, dummy}); + + auto tex = new TexInstruction(TexInstruction::get_gradient_h, grad, src, 0, 0, PValue()); + tex->set_flag(TexInstruction::grad_fine); + tex->set_flag(TexInstruction::x_unnormalized); + tex->set_flag(TexInstruction::y_unnormalized); + tex->set_flag(TexInstruction::z_unnormalized); + tex->set_flag(TexInstruction::w_unnormalized); + tex->set_dest_swizzle({0,1,7,7}); + emit_instruction(tex); + + tex = new TexInstruction(TexInstruction::get_gradient_v, grad, src, 0, 0, PValue()); + tex->set_flag(TexInstruction::x_unnormalized); + tex->set_flag(TexInstruction::y_unnormalized); + tex->set_flag(TexInstruction::z_unnormalized); + tex->set_flag(TexInstruction::w_unnormalized); + tex->set_flag(TexInstruction::grad_fine); + tex->set_dest_swizzle({7,7,0,1}); + emit_instruction(tex); + + emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(0), slope.reg_i(2), interpolator.j}, {alu_write})); + emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(1), slope.reg_i(2), interpolator.i}, {alu_write, alu_last_instr})); + + emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 0), {grad.reg_i(3), slope.reg_i(3), slope.reg_i(1)}, {alu_write})); + emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 1), {grad.reg_i(2), slope.reg_i(3), slope.reg_i(0)}, {alu_write, alu_last_instr})); + + return true; +} + +bool FragmentShaderFromNir::emit_load_input(nir_intrinsic_instr* instr) +{ + unsigned loc = nir_intrinsic_io_semantics(instr).location; + auto param = nir_src_as_const_value(instr->src[0]); + assert(param && "Indirect PS inputs not (yet) supported"); + + auto& io = m_shaderio.input(param->u32 + nir_intrinsic_base(instr), nir_intrinsic_component(instr)); + + assert(nir_intrinsic_io_semantics(instr).num_slots == 1); + + unsigned num_components = nir_dest_num_components(instr->dest); + + switch (loc) { + case VARYING_SLOT_POS: + for (unsigned i = 0; i < num_components; ++i) { + load_preloaded_value(instr->dest, i, m_frag_pos[i]); + } + return true; + case VARYING_SLOT_FACE: + return load_preloaded_value(instr->dest, 0, m_front_face_reg); + default: + ; + } + + auto dst = nir_intrinsic_component(instr) ? get_temp_vec4() : vec_from_nir(instr->dest, 4); + + AluInstruction *ir = nullptr; + for (unsigned i = 0; i < 4 ; ++i) { + ir = new AluInstruction(op1_interp_load_p0, dst[i], + PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + + io.lds_pos(), i)), + EmitInstruction::write); + emit_instruction(ir); + } + ir->set_flag(alu_last_instr); + + /* TODO: back color */ + if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) { + Interpolator ip = {false, 0, NULL, NULL}; + + auto & color_input = static_cast<ShaderInputColor&> (io); + auto& bgio = m_shaderio.input(color_input.back_color_input_index()); + + GPRVector bgcol = get_temp_vec4(); + bgio.set_gpr(bgcol.sel()); + load_interpolated(bgcol, bgio, ip, num_components, 0); + + load_front_face(); + + AluInstruction *ir = nullptr; + for (unsigned i = 0; i < 4 ; ++i) { + ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write}); + emit_instruction(ir); + } + if (ir) + ir->set_flag(alu_last_instr); + } + + if (nir_intrinsic_component(instr) != 0) { + for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { + ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), dst[i + nir_intrinsic_component(instr)], {alu_write}); + emit_instruction(ir); + } + if (ir) + ir->set_flag(alu_last_instr); + } + + + return true; +} + +void FragmentShaderFromNir::load_front_face() +{ + assert(m_front_face_reg); + if (m_front_face_loaded) + return; + + auto ir = new AluInstruction(op2_setge_dx10, m_front_face_reg, m_front_face_reg, + Value::zero, {alu_write, alu_last_instr}); + m_front_face_loaded = true; + emit_instruction(ir); +} + +bool FragmentShaderFromNir::emit_load_sample_pos(nir_intrinsic_instr* instr) +{ + GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest)); + auto fetch = new FetchInstruction(vc_fetch, + no_index_offset, + fmt_32_32_32_32_float, + vtx_nf_scaled, + vtx_es_none, + m_sample_id_reg, + dest, + 0, + false, + 0xf, + R600_BUFFER_INFO_CONST_BUFFER, + 0, + bim_none, + false, + false, + 0, + 0, + 0, + PValue(), + {0,1,2,3}); + fetch->set_flag(vtx_srf_mode); + emit_instruction(fetch); + return true; +} + +bool FragmentShaderFromNir::load_interpolated(GPRVector &dest, + ShaderInput& io, const Interpolator &ip, + int num_components, int start_comp) +{ + // replace io with ShaderInputVarying + if (io.interpolate() > 0) { + + sfn_log << SfnLog::io << "Using Interpolator (" << *ip.j << ", " << *ip.i << ")" << "\n"; + + if (num_components == 1) { + switch (start_comp) { + case 0: return load_interpolated_one_comp(dest, io, ip, op2_interp_x); + case 1: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1); + case 2: return load_interpolated_one_comp(dest, io, ip, op2_interp_z); + case 3: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_zw, 2, 3); + default: + assert(0); + } + } + + if (num_components == 2) { + switch (start_comp) { + case 0: return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3); + case 2: return load_interpolated_two_comp(dest, io, ip, op2_interp_zw, 0xc); + case 1: return load_interpolated_one_comp(dest, io, ip, op2_interp_z) && + load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1); + default: + assert(0); + } + } + + if (num_components == 3 && start_comp == 0) + return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3) && + load_interpolated_one_comp(dest, io, ip, op2_interp_z); + + int full_write_mask = ((1 << num_components) - 1) << start_comp; + + bool success = load_interpolated_two_comp(dest, io, ip, op2_interp_zw, full_write_mask & 0xc); + success &= load_interpolated_two_comp(dest, io, ip, op2_interp_xy, full_write_mask & 0x3); + return success; + + } else { + AluInstruction *ir = nullptr; + for (unsigned i = 0; i < 4 ; ++i) { + ir = new AluInstruction(op1_interp_load_p0, dest[i], + PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)), + EmitInstruction::write); + emit_instruction(ir); + } + ir->set_flag(alu_last_instr); + } + return true; +} + +bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector &dest, + ShaderInput& io, const Interpolator& ip, EAluOp op) +{ + for (unsigned i = 0; i < 2 ; ++i) { + int chan = i; + if (op == op2_interp_z) + chan += 2; + + + auto ir = new AluInstruction(op, dest[chan], i & 1 ? ip.j : ip.i, + PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)), + i == 0 ? EmitInstruction::write : EmitInstruction::last); + dest.pin_to_channel(chan); + + ir->set_bank_swizzle(alu_vec_210); + emit_instruction(ir); + } + return true; +} + +bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector &dest, ShaderInput& io, + const Interpolator& ip, EAluOp op, int writemask) +{ + AluInstruction *ir = nullptr; + assert(ip.j); + assert(ip.i); + for (unsigned i = 0; i < 4 ; ++i) { + ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)), + (writemask & (1 << i)) ? EmitInstruction::write : EmitInstruction::empty); + dest.pin_to_channel(i); + ir->set_bank_swizzle(alu_vec_210); + emit_instruction(ir); + } + ir->set_flag(alu_last_instr); + return true; +} + +bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector &dest, + ShaderInput& io, const Interpolator& ip, + EAluOp op, UNUSED int start, int comp) +{ + AluInstruction *ir = nullptr; + for (int i = 0; i < 4 ; ++i) { + ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, + PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)), + i == comp ? EmitInstruction::write : EmitInstruction::empty); + ir->set_bank_swizzle(alu_vec_210); + dest.pin_to_channel(i); + emit_instruction(ir); + } + ir->set_flag(alu_last_instr); + return true; +} + + +bool FragmentShaderFromNir::emit_export_pixel(nir_intrinsic_instr* instr, int outputs) +{ + std::array<uint32_t,4> swizzle; + unsigned writemask = nir_intrinsic_write_mask(instr); + auto semantics = nir_intrinsic_io_semantics(instr); + unsigned driver_location = nir_intrinsic_base(instr); + + switch (semantics.location) { + case FRAG_RESULT_DEPTH: + writemask = 1; + swizzle = {0,7,7,7}; + break; + case FRAG_RESULT_STENCIL: + writemask = 2; + swizzle = {7,0,7,7}; + break; + case FRAG_RESULT_SAMPLE_MASK: + writemask = 4; + swizzle = {7,7,0,7}; + break; + default: + for (int i = 0; i < 4; ++i) { + swizzle[i] = (i < instr->num_components) ? i : 7; + } + } + + auto value = vec_from_nir_with_fetch_constant(instr->src[0], writemask, swizzle); + + set_output(driver_location, value.sel()); + + if (semantics.location == FRAG_RESULT_COLOR || + (semantics.location >= FRAG_RESULT_DATA0 && + semantics.location <= FRAG_RESULT_DATA7)) { + for (int k = 0 ; k < outputs; ++k) { + + unsigned location = (m_dual_source_blend && (semantics.location == FRAG_RESULT_COLOR) + ? semantics.dual_source_blend_index : driver_location) + k - m_depth_exports; + + sfn_log << SfnLog::io << "Pixel output at loc:" << location << "\n"; + + if (location >= m_max_color_exports) { + sfn_log << SfnLog::io << "Pixel output loc:" << location + << " dl:" << driver_location + << " skipped because we have only " << m_max_color_exports << " CBs\n"; + continue; + } + + m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel); + + if (sh_info().ps_export_highest < location) + sh_info().ps_export_highest = location; + + sh_info().nr_ps_color_exports++; + + unsigned mask = (0xfu << (location * 4)); + sh_info().ps_color_export_mask |= mask; + + emit_export_instruction(m_last_pixel_export); + }; + } else if (semantics.location == FRAG_RESULT_DEPTH || + semantics.location == FRAG_RESULT_STENCIL || + semantics.location == FRAG_RESULT_SAMPLE_MASK) { + m_depth_exports++; + emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel)); + } else { + return false; + } + return true; +} + + +bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_intrinsic_instr* instr, int outputs) +{ + std::array<uint32_t,4> swizzle; + unsigned writemask = nir_intrinsic_write_mask(instr); + switch (out_var->data.location) { + case FRAG_RESULT_DEPTH: + writemask = 1; + swizzle = {0,7,7,7}; + break; + case FRAG_RESULT_STENCIL: + writemask = 2; + swizzle = {7,0,7,7}; + break; + case FRAG_RESULT_SAMPLE_MASK: + writemask = 4; + swizzle = {7,7,0,7}; + break; + default: + for (int i = 0; i < 4; ++i) { + swizzle[i] = (i < instr->num_components) ? i : 7; + } + } + + auto value = vec_from_nir_with_fetch_constant(instr->src[1], writemask, swizzle); + + set_output(out_var->data.driver_location, value.sel()); + + if (out_var->data.location == FRAG_RESULT_COLOR || + (out_var->data.location >= FRAG_RESULT_DATA0 && + out_var->data.location <= FRAG_RESULT_DATA7)) { + for (int k = 0 ; k < outputs; ++k) { + + unsigned location = (m_dual_source_blend && (out_var->data.location == FRAG_RESULT_COLOR) + ? out_var->data.index : out_var->data.driver_location) + k - m_depth_exports; + + sfn_log << SfnLog::io << "Pixel output " << out_var->name << " at loc:" << location << "\n"; + + if (location >= m_max_color_exports) { + sfn_log << SfnLog::io << "Pixel output loc:" << location + << " dl:" << out_var->data.location + << " skipped because we have only " << m_max_color_exports << " CBs\n"; + continue; + } + + m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel); + + if (sh_info().ps_export_highest < location) + sh_info().ps_export_highest = location; + + sh_info().nr_ps_color_exports++; + + unsigned mask = (0xfu << (location * 4)); + sh_info().ps_color_export_mask |= mask; + + emit_export_instruction(m_last_pixel_export); + }; + } else if (out_var->data.location == FRAG_RESULT_DEPTH || + out_var->data.location == FRAG_RESULT_STENCIL || + out_var->data.location == FRAG_RESULT_SAMPLE_MASK) { + m_depth_exports++; + emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel)); + } else { + return false; + } + return true; +} + +void FragmentShaderFromNir::do_finalize() +{ + // update shader io info and set LDS etc. + sh_info().ninput = m_shaderio.inputs().size(); + + sfn_log << SfnLog::io << "Have " << sh_info().ninput << " inputs\n"; + for (size_t i = 0; i < sh_info().ninput; ++i) { + ShaderInput& input = m_shaderio.input(i); + int ij_idx = (input.ij_index() < 6 && + input.ij_index() >= 0) ? input.ij_index() : 0; + input.set_ioinfo(sh_info().input[i], m_interpolator[ij_idx].ij_index); + } + + sh_info().two_side = m_shaderio.two_sided(); + sh_info().nlds = m_shaderio.nlds(); + + sh_info().nr_ps_max_color_exports = m_max_counted_color_exports; + + if (sh_info().fs_write_all) { + sh_info().nr_ps_max_color_exports = m_max_color_exports; + } + + if (!m_last_pixel_export) { + GPRVector v(0, {7,7,7,7}); + m_last_pixel_export = new ExportInstruction(0, v, ExportInstruction::et_pixel); + sh_info().nr_ps_color_exports++; + sh_info().ps_color_export_mask = 0xf; + emit_export_instruction(m_last_pixel_export); + } + + m_last_pixel_export->set_last(); + + if (sh_info().fs_write_all) + sh_info().nr_ps_max_color_exports = 8; +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h new file mode 100644 index 000000000..4755afbfe --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h @@ -0,0 +1,117 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef sfn_fragment_shader_from_nir_h +#define sfn_fragment_shader_from_nir_h + +#include "sfn_shader_base.h" +#include "sfn_shaderio.h" +#include <bitset> + +namespace r600 { + +class FragmentShaderFromNir : public ShaderFromNirProcessor { +public: + FragmentShaderFromNir(const nir_shader& nir, r600_shader& sh_info, + r600_pipe_shader_selector &sel, const r600_shader_key &key, + enum chip_class chip_class); + bool scan_sysvalue_access(nir_instr *instr) override; +private: + + struct Interpolator { + bool enabled; + unsigned ij_index; + PValue i; + PValue j; + }; + + void emit_shader_start() override; + bool do_allocate_reserved_registers() override; + bool process_store_output(nir_intrinsic_instr *instr); + + bool emit_store_output(nir_intrinsic_instr* instr); + + bool emit_export_pixel(const nir_variable *, nir_intrinsic_instr* instr, int outputs); + bool emit_export_pixel(nir_intrinsic_instr* instr, int outputs); + bool load_interpolated(GPRVector &dest, ShaderInput &io, const Interpolator& ip, + int num_components, int start_comp); + bool load_interpolated_one_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip, EAluOp op); + bool load_interpolated_two_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip,EAluOp op, int writemask); + bool load_interpolated_two_comp_for_one(GPRVector &dest, + ShaderInput& io, const Interpolator& ip, EAluOp op, int start, int comp); + + bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override; + void do_finalize() override; + + void load_front_face(); + + bool emit_load_input(nir_intrinsic_instr* instr); + bool emit_load_front_face(nir_intrinsic_instr* instr); + bool emit_load_sample_mask_in(nir_intrinsic_instr* instr); + bool emit_load_sample_pos(nir_intrinsic_instr* instr); + bool emit_load_sample_id(nir_intrinsic_instr* instr); + + bool process_load_input(nir_intrinsic_instr *instr, bool interpolated); + bool emit_load_interpolated_input(nir_intrinsic_instr* instr); + bool load_barycentric_at_offset(nir_intrinsic_instr* instr); + bool load_barycentric_at_sample(nir_intrinsic_instr* instr); + + + unsigned m_max_color_exports; + unsigned m_max_counted_color_exports; + bool m_two_sided_color; + ExportInstruction *m_last_pixel_export; + const nir_shader& m_nir; + + + std::array<Interpolator, 6> m_interpolator; + unsigned m_reserved_registers; + unsigned m_frag_pos_index; + PGPRValue m_front_face_reg; + PGPRValue m_sample_mask_reg; + PGPRValue m_sample_id_reg; + PGPRValue m_helper_invocation; + GPRVector m_frag_pos; + bool m_need_back_color; + bool m_front_face_loaded; + ShaderIO m_shaderio; + unsigned m_depth_exports; + + std::map<unsigned, PValue> m_input_cache; + + static const int s_max_interpolators = 6; + + std::bitset<s_max_interpolators> m_interpolators_used; + + unsigned m_apply_sample_mask; + bool m_dual_source_blend; + ShaderInput *m_pos_input; + +}; + +} + +#endif diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.cpp new file mode 100644 index 000000000..0541e0ad0 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.cpp @@ -0,0 +1,343 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_shader_geometry.h" +#include "sfn_instruction_misc.h" +#include "sfn_instruction_fetch.h" +#include "sfn_shaderio.h" + +namespace r600 { + +GeometryShaderFromNir::GeometryShaderFromNir(r600_pipe_shader *sh, + r600_pipe_shader_selector &sel, + const r600_shader_key &key, + enum chip_class chip_class): + VertexStage(PIPE_SHADER_GEOMETRY, sel, sh->shader, + sh->scratch_space_needed, chip_class, key.gs.first_atomic_counter), + m_pipe_shader(sh), + m_so_info(&sel.so), + m_first_vertex_emitted(false), + m_offset(0), + m_next_input_ring_offset(0), + m_key(key), + m_clip_dist_mask(0), + m_cur_ring_output(0), + m_gs_tri_strip_adj_fix(false), + m_input_mask(0) +{ + sh_info().atomic_base = key.gs.first_atomic_counter; +} + +bool GeometryShaderFromNir::emit_store(nir_intrinsic_instr* instr) +{ + auto location = nir_intrinsic_io_semantics(instr).location; + auto index = nir_src_as_const_value(instr->src[1]); + assert(index); + auto driver_location = nir_intrinsic_base(instr) + index->u32; + + uint32_t write_mask = nir_intrinsic_write_mask(instr); + GPRVector::Swizzle swz = swizzle_from_mask(write_mask); + + auto out_value = vec_from_nir_with_fetch_constant(instr->src[0], write_mask, swz, true); + + sh_info().output[driver_location].write_mask = write_mask; + + auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write_ind, out_value, + 4 * driver_location, + instr->num_components, m_export_base[0]); + streamout_data[location] = ir; + + return true; +} + +bool GeometryShaderFromNir::scan_sysvalue_access(UNUSED nir_instr *instr) +{ + if (instr->type != nir_instr_type_intrinsic) + return true; + + nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr); + + switch (ii->intrinsic) { + case nir_intrinsic_store_output: + return process_store_output(ii); + case nir_intrinsic_load_input: + case nir_intrinsic_load_per_vertex_input: + return process_load_input(ii); + default: + return true; + } +} + +bool GeometryShaderFromNir::process_store_output(nir_intrinsic_instr* instr) +{ + auto location = nir_intrinsic_io_semantics(instr).location; + auto index = nir_src_as_const_value(instr->src[1]); + assert(index); + + auto driver_location = nir_intrinsic_base(instr) + index->u32; + + if (location == VARYING_SLOT_COL0 || + location == VARYING_SLOT_COL1 || + (location >= VARYING_SLOT_VAR0 && + location <= VARYING_SLOT_VAR31) || + (location >= VARYING_SLOT_TEX0 && + location <= VARYING_SLOT_TEX7) || + location == VARYING_SLOT_BFC0 || + location == VARYING_SLOT_BFC1 || + location == VARYING_SLOT_PNTC || + location == VARYING_SLOT_CLIP_VERTEX || + location == VARYING_SLOT_CLIP_DIST0 || + location == VARYING_SLOT_CLIP_DIST1 || + location == VARYING_SLOT_PRIMITIVE_ID || + location == VARYING_SLOT_POS || + location == VARYING_SLOT_PSIZ || + location == VARYING_SLOT_LAYER || + location == VARYING_SLOT_VIEWPORT || + location == VARYING_SLOT_FOGC) { + r600_shader_io& io = sh_info().output[driver_location]; + + auto semantic = r600_get_varying_semantic(location); + io.name = semantic.first; + io.sid = semantic.second; + + evaluate_spi_sid(io); + + if (sh_info().noutput <= driver_location) + sh_info().noutput = driver_location + 1; + + if (location == VARYING_SLOT_CLIP_DIST0 || + location == VARYING_SLOT_CLIP_DIST1) { + m_clip_dist_mask |= 1 << (location - VARYING_SLOT_CLIP_DIST0); + } + + if (location == VARYING_SLOT_VIEWPORT) { + sh_info().vs_out_viewport = 1; + sh_info().vs_out_misc_write = 1; + } + return true; + } + return false; +} + +bool GeometryShaderFromNir::process_load_input(nir_intrinsic_instr* instr) +{ + auto location = nir_intrinsic_io_semantics(instr).location; + auto index = nir_src_as_const_value(instr->src[1]); + assert(index); + + auto driver_location = nir_intrinsic_base(instr) + index->u32; + + if (location == VARYING_SLOT_POS || + location == VARYING_SLOT_PSIZ || + location == VARYING_SLOT_FOGC || + location == VARYING_SLOT_CLIP_VERTEX || + location == VARYING_SLOT_CLIP_DIST0 || + location == VARYING_SLOT_CLIP_DIST1 || + location == VARYING_SLOT_COL0 || + location == VARYING_SLOT_COL1 || + location == VARYING_SLOT_BFC0 || + location == VARYING_SLOT_BFC1 || + location == VARYING_SLOT_PNTC || + (location >= VARYING_SLOT_VAR0 && + location <= VARYING_SLOT_VAR31) || + (location >= VARYING_SLOT_TEX0 && + location <= VARYING_SLOT_TEX7)) { + + uint64_t bit = 1ull << location; + if (!(bit & m_input_mask)) { + r600_shader_io& io = sh_info().input[driver_location]; + auto semantic = r600_get_varying_semantic(location); + io.name = semantic.first; + io.sid = semantic.second; + + io.ring_offset = 16 * driver_location; + ++sh_info().ninput; + m_next_input_ring_offset += 16; + m_input_mask |= bit; + } + return true; + } + return false; +} + +bool GeometryShaderFromNir::do_allocate_reserved_registers() +{ + const int sel[6] = {0, 0 ,0, 1, 1, 1}; + const int chan[6] = {0, 1 ,3, 0, 1, 2}; + + increment_reserved_registers(); + increment_reserved_registers(); + + /* Reserve registers used by the shaders (should check how many + * components are actually used */ + for (int i = 0; i < 6; ++i) { + auto reg = new GPRValue(sel[i], chan[i]); + reg->set_as_input(); + m_per_vertex_offsets[i].reset(reg); + inject_register(sel[i], chan[i], m_per_vertex_offsets[i], false); + } + auto reg = new GPRValue(0, 2); + reg->set_as_input(); + m_primitive_id.reset(reg); + inject_register(0, 2, m_primitive_id, false); + + reg = new GPRValue(1, 3); + reg->set_as_input(); + m_invocation_id.reset(reg); + inject_register(1, 3, m_invocation_id, false); + + m_export_base[0] = get_temp_register(0); + m_export_base[1] = get_temp_register(0); + m_export_base[2] = get_temp_register(0); + m_export_base[3] = get_temp_register(0); + emit_instruction(new AluInstruction(op1_mov, m_export_base[0], Value::zero, {alu_write, alu_last_instr})); + emit_instruction(new AluInstruction(op1_mov, m_export_base[1], Value::zero, {alu_write, alu_last_instr})); + emit_instruction(new AluInstruction(op1_mov, m_export_base[2], Value::zero, {alu_write, alu_last_instr})); + emit_instruction(new AluInstruction(op1_mov, m_export_base[3], Value::zero, {alu_write, alu_last_instr})); + + sh_info().ring_item_sizes[0] = m_next_input_ring_offset; + + if (m_key.gs.tri_strip_adj_fix) + emit_adj_fix(); + + return true; +} + +void GeometryShaderFromNir::emit_adj_fix() +{ + PValue adjhelp0(new GPRValue(m_export_base[0]->sel(), 1)); + emit_instruction(op2_and_int, adjhelp0, {m_primitive_id, Value::one_i}, {alu_write, alu_last_instr}); + + int reg_indices[6]; + int reg_chanels[6] = {1, 2, 3, 1, 2, 3}; + + int rotate_indices[6] = {4, 5, 0, 1, 2, 3}; + + reg_indices[0] = reg_indices[1] = reg_indices[2] = m_export_base[1]->sel(); + reg_indices[3] = reg_indices[4] = reg_indices[5] = m_export_base[2]->sel(); + + std::array<PValue, 6> adjhelp; + + AluInstruction *ir = nullptr; + for (int i = 0; i < 6; i++) { + adjhelp[i].reset(new GPRValue(reg_indices[i], reg_chanels[i])); + ir = new AluInstruction(op3_cnde_int, adjhelp[i], + {adjhelp0, m_per_vertex_offsets[i], + m_per_vertex_offsets[rotate_indices[i]]}, + {alu_write}); + if (i == 3) + ir->set_flag(alu_last_instr); + emit_instruction(ir); + } + ir->set_flag(alu_last_instr); + + for (int i = 0; i < 6; i++) + m_per_vertex_offsets[i] = adjhelp[i]; +} + + +bool GeometryShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_emit_vertex: + return emit_vertex(instr, false); + case nir_intrinsic_end_primitive: + return emit_vertex(instr, true); + case nir_intrinsic_load_primitive_id: + return load_preloaded_value(instr->dest, 0, m_primitive_id); + case nir_intrinsic_load_invocation_id: + return load_preloaded_value(instr->dest, 0, m_invocation_id); + case nir_intrinsic_store_output: + return emit_store(instr); + case nir_intrinsic_load_per_vertex_input: + return emit_load_per_vertex_input(instr); + default: + ; + } + return false; +} + +bool GeometryShaderFromNir::emit_vertex(nir_intrinsic_instr* instr, bool cut) +{ + int stream = nir_intrinsic_stream_id(instr); + assert(stream < 4); + + for(auto v: streamout_data) { + if (stream == 0 || v.first != VARYING_SLOT_POS) { + v.second->patch_ring(stream, m_export_base[stream]); + emit_instruction(v.second); + } else + delete v.second; + } + streamout_data.clear(); + emit_instruction(new EmitVertex(stream, cut)); + + if (!cut) + emit_instruction(new AluInstruction(op2_add_int, m_export_base[stream], m_export_base[stream], + PValue(new LiteralValue(sh_info().noutput)), + {alu_write, alu_last_instr})); + + return true; +} + +bool GeometryShaderFromNir::emit_load_per_vertex_input(nir_intrinsic_instr* instr) +{ + auto dest = vec_from_nir(instr->dest, 4); + + std::array<int, 4> swz = {7,7,7,7}; + for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { + swz[i] = i + nir_intrinsic_component(instr); + } + + auto literal_index = nir_src_as_const_value(instr->src[0]); + + if (!literal_index) { + sfn_log << SfnLog::err << "GS: Indirect input addressing not (yet) supported\n"; + return false; + } + assert(literal_index->u32 < 6); + assert(nir_intrinsic_io_semantics(instr).num_slots == 1); + + PValue addr = m_per_vertex_offsets[literal_index->u32]; + auto fetch = new FetchInstruction(vc_fetch, no_index_offset, dest, addr, + 16 * nir_intrinsic_base(instr), + R600_GS_RING_CONST_BUFFER, PValue(), bim_none, true); + fetch->set_dest_swizzle(swz); + + emit_instruction(fetch); + return true; +} + +void GeometryShaderFromNir::do_finalize() +{ + if (m_clip_dist_mask) { + int num_clip_dist = 4 * util_bitcount(m_clip_dist_mask); + sh_info().cc_dist_mask = (1 << num_clip_dist) - 1; + sh_info().clip_dist_write = (1 << num_clip_dist) - 1; + } +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.h new file mode 100644 index 000000000..b557b8f58 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.h @@ -0,0 +1,81 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef SFN_GEOMETRYSHADERFROMNIR_H +#define SFN_GEOMETRYSHADERFROMNIR_H + +#include "sfn_vertexstageexport.h" + +namespace r600 { + +class GeometryShaderFromNir : public VertexStage +{ +public: + GeometryShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel, const r600_shader_key& key, enum chip_class chip_class); + + bool scan_sysvalue_access(nir_instr *instr) override; + PValue primitive_id() override {return m_primitive_id;} + +private: + + bool do_allocate_reserved_registers() override; + bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override; + + bool emit_vertex(nir_intrinsic_instr* instr, bool cut); + void emit_adj_fix(); + + bool process_store_output(nir_intrinsic_instr* instr); + bool process_load_input(nir_intrinsic_instr* instr); + + bool emit_store(nir_intrinsic_instr* instr); + bool emit_load_per_vertex_input(nir_intrinsic_instr* instr); + + void do_finalize() override; + + r600_pipe_shader *m_pipe_shader; + const pipe_stream_output_info *m_so_info; + + std::array<PValue, 6> m_per_vertex_offsets; + PValue m_primitive_id; + PValue m_invocation_id; + PValue m_export_base[4]; + bool m_first_vertex_emitted; + + int m_offset; + int m_next_input_ring_offset; + r600_shader_key m_key; + int m_clip_dist_mask; + unsigned m_cur_ring_output; + bool m_gs_tri_strip_adj_fix; + uint64_t m_input_mask; + + std::map<int, MemRingOutIntruction *> streamout_data; +}; + +} + +#endif // SFN_GEOMETRYSHADERFROMNIR_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp new file mode 100644 index 000000000..fb76695c6 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp @@ -0,0 +1,108 @@ +#include "sfn_shader_tcs.h" +#include "sfn_instruction_gds.h" +#include "tgsi/tgsi_from_mesa.h" + +namespace r600 { + +TcsShaderFromNir::TcsShaderFromNir(r600_pipe_shader *sh, + r600_pipe_shader_selector& sel, + const r600_shader_key& key, + enum chip_class chip_class): + ShaderFromNirProcessor (PIPE_SHADER_TESS_CTRL, sel, sh->shader, + sh->scratch_space_needed, chip_class, key.tcs.first_atomic_counter), + m_reserved_registers(0) +{ + sh_info().tcs_prim_mode = key.tcs.prim_mode; +} + +bool TcsShaderFromNir::scan_sysvalue_access(nir_instr *instr) +{ + if (instr->type != nir_instr_type_intrinsic) + return true; + + auto intr = nir_instr_as_intrinsic(instr); + + switch (intr->intrinsic) { + case nir_intrinsic_load_primitive_id: + m_sv_values.set(es_primitive_id); + break; + case nir_intrinsic_load_invocation_id: + m_sv_values.set(es_invocation_id); + break; + case nir_intrinsic_load_tcs_rel_patch_id_r600: + m_sv_values.set(es_rel_patch_id); + break; + case nir_intrinsic_load_tcs_tess_factor_base_r600: + m_sv_values.set(es_tess_factor_base); + break; + default: + + ; + } + return true; +} + +bool TcsShaderFromNir::do_allocate_reserved_registers() +{ + if (m_sv_values.test(es_primitive_id)) { + m_reserved_registers = 1; + auto gpr = new GPRValue(0,0); + gpr->set_as_input(); + m_primitive_id.reset(gpr); + } + + if (m_sv_values.test(es_invocation_id)) { + m_reserved_registers = 1; + auto gpr = new GPRValue(0,2); + gpr->set_as_input(); + m_invocation_id.reset(gpr); + } + + if (m_sv_values.test(es_rel_patch_id)) { + m_reserved_registers = 1; + auto gpr = new GPRValue(0,1); + gpr->set_as_input(); + m_rel_patch_id.reset(gpr); + } + + if (m_sv_values.test(es_tess_factor_base)) { + m_reserved_registers = 1; + auto gpr = new GPRValue(0,3); + gpr->set_as_input(); + m_tess_factor_base.reset(gpr); + } + + set_reserved_registers(m_reserved_registers); + + return true; +} + +bool TcsShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_load_tcs_rel_patch_id_r600: + return load_preloaded_value(instr->dest, 0, m_rel_patch_id); + case nir_intrinsic_load_invocation_id: + return load_preloaded_value(instr->dest, 0, m_invocation_id); + case nir_intrinsic_load_primitive_id: + return load_preloaded_value(instr->dest, 0, m_primitive_id); + case nir_intrinsic_load_tcs_tess_factor_base_r600: + return load_preloaded_value(instr->dest, 0, m_tess_factor_base); + case nir_intrinsic_store_tf_r600: + return store_tess_factor(instr); + default: + return false; + } +} + +bool TcsShaderFromNir::store_tess_factor(nir_intrinsic_instr* instr) +{ + const GPRVector::Swizzle& swizzle = (instr->src[0].ssa->num_components == 4) ? + GPRVector::Swizzle({0, 1, 2, 3}) : GPRVector::Swizzle({0, 1, 7, 7}); + auto val = vec_from_nir_with_fetch_constant(instr->src[0], + (1 << instr->src[0].ssa->num_components) - 1, swizzle); + emit_instruction(new GDSStoreTessFactor(val)); + return true; +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.h new file mode 100644 index 000000000..051078104 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.h @@ -0,0 +1,33 @@ +#ifndef TCSSHADERFROMNIR_H +#define TCSSHADERFROMNIR_H + +#include "sfn_shader_base.h" + +namespace r600 { + +class TcsShaderFromNir : public ShaderFromNirProcessor +{ +public: + TcsShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel, const r600_shader_key& key, enum chip_class chip_class); + bool scan_sysvalue_access(nir_instr *instr) override; + +private: + bool do_allocate_reserved_registers() override; + bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override; + bool store_tess_factor(nir_intrinsic_instr* instr); + + void do_finalize() override {} + + int m_reserved_registers; + PValue m_patch_id; + PValue m_rel_patch_id; + PValue m_invocation_id; + PValue m_primitive_id; + PValue m_tess_factor_base; + + +}; + +} + +#endif // TCSSHADERFROMNIR_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.cpp new file mode 100644 index 000000000..d1c75515a --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.cpp @@ -0,0 +1,123 @@ +#include "sfn_shader_tess_eval.h" +#include "tgsi/tgsi_from_mesa.h" + +namespace r600 { + +TEvalShaderFromNir::TEvalShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel, + const r600_shader_key& key, r600_shader *gs_shader, + enum chip_class chip_class): + VertexStage(PIPE_SHADER_TESS_EVAL, sel, sh->shader, + sh->scratch_space_needed, chip_class, key.tes.first_atomic_counter), + m_reserved_registers(0), + m_key(key) + +{ + sh->shader.tes_as_es = key.tes.as_es; + if (key.tes.as_es) + m_export_processor.reset(new VertexStageExportForGS(*this, gs_shader)); + else + m_export_processor.reset(new VertexStageExportForFS(*this, &sel.so, sh, key)); +} + +bool TEvalShaderFromNir::scan_sysvalue_access(nir_instr *instr) +{ + if (instr->type != nir_instr_type_intrinsic) + return true; + + auto ir = nir_instr_as_intrinsic(instr); + + switch (ir->intrinsic) { + case nir_intrinsic_load_tess_coord_r600: + m_sv_values.set(es_tess_coord); + break; + case nir_intrinsic_load_primitive_id: + m_sv_values.set(es_primitive_id); + break; + case nir_intrinsic_load_tcs_rel_patch_id_r600: + m_sv_values.set(es_rel_patch_id); + break; + case nir_intrinsic_store_output: + m_export_processor->scan_store_output(ir); + break; + default: + ; + } + return true; +} + +void TEvalShaderFromNir::emit_shader_start() +{ + m_export_processor->emit_shader_start(); +} + +bool TEvalShaderFromNir::do_allocate_reserved_registers() +{ + if (m_sv_values.test(es_tess_coord)) { + m_reserved_registers = 1; + auto gpr = new GPRValue(0,0); + gpr->set_as_input(); + m_tess_coord[0].reset(gpr); + gpr = new GPRValue(0,1); + gpr->set_as_input(); + m_tess_coord[1].reset(gpr); + } + + if (m_sv_values.test(es_rel_patch_id)) { + m_reserved_registers = 1; + auto gpr = new GPRValue(0,2); + gpr->set_as_input(); + m_rel_patch_id.reset(gpr); + } + + if (m_sv_values.test(es_primitive_id) || + m_key.vs.as_gs_a) { + m_reserved_registers = 1; + auto gpr = new GPRValue(0,3); + gpr->set_as_input(); + m_primitive_id.reset(gpr); + if (m_key.vs.as_gs_a) + inject_register(0, 3, m_primitive_id, false); + } + set_reserved_registers(m_reserved_registers); + return true; +} + +bool TEvalShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_load_tess_coord_r600: + return load_preloaded_value(instr->dest, 0, m_tess_coord[0]) && + load_preloaded_value(instr->dest, 1, m_tess_coord[1]); + case nir_intrinsic_load_primitive_id: + return load_preloaded_value(instr->dest, 0, m_primitive_id); + case nir_intrinsic_load_tcs_rel_patch_id_r600: + return load_preloaded_value(instr->dest, 0, m_rel_patch_id); + case nir_intrinsic_store_output: + return m_export_processor->store_output(instr); + default: + return false; + } +} + +void TEvalShaderFromNir::do_finalize() +{ + m_export_processor->finalize_exports(); +} + + +bool TEvalShaderFromNir::emit_load_tess_coord(nir_intrinsic_instr* instr) +{ + bool result = load_preloaded_value(instr->dest, 0, m_tess_coord[0]) && + load_preloaded_value(instr->dest, 1, m_tess_coord[1]); + + m_tess_coord[2] = from_nir(instr->dest, 2); + + + emit_instruction(new AluInstruction(op2_add, m_tess_coord[2], m_tess_coord[2], + m_tess_coord[0], {alu_last_instr, alu_write, alu_src0_neg})); + emit_instruction(new AluInstruction(op2_add, m_tess_coord[2], m_tess_coord[2], + m_tess_coord[1], {alu_last_instr, alu_write, alu_src0_neg})); + return result; +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.h new file mode 100644 index 000000000..a1b7d3a9c --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.h @@ -0,0 +1,39 @@ +#ifndef TEVALSHADERFROMNIR_H +#define TEVALSHADERFROMNIR_H + +#include "sfn_shader_base.h" +#include "sfn_vertexstageexport.h" + +namespace r600 { + +class TEvalShaderFromNir : public VertexStage +{ +public: + TEvalShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel, + const r600_shader_key& key, r600_shader *gs_shader, + enum chip_class chip_class); + bool scan_sysvalue_access(nir_instr *instr) override; + PValue primitive_id() override {return m_primitive_id;} + private: + void emit_shader_start() override; + bool do_allocate_reserved_registers() override; + bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override; + bool emit_load_tess_coord(nir_intrinsic_instr* instr); + bool load_tess_z_coord(nir_intrinsic_instr* instr); + + void do_finalize() override; + + + unsigned m_reserved_registers; + PValue m_tess_coord[3]; + PValue m_rel_patch_id; + PValue m_primitive_id; + + std::unique_ptr<VertexStageExportBase> m_export_processor; + const r600_shader_key& m_key; +}; + + +} + +#endif // TEVALSHADERFROMNIR_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp new file mode 100644 index 000000000..f2c4de3fa --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp @@ -0,0 +1,230 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#include "pipe/p_defines.h" +#include "tgsi/tgsi_from_mesa.h" +#include "sfn_shader_vertex.h" +#include "sfn_instruction_lds.h" + +#include <queue> + + +namespace r600 { + +using std::priority_queue; + +VertexShaderFromNir::VertexShaderFromNir(r600_pipe_shader *sh, + r600_pipe_shader_selector& sel, + const r600_shader_key& key, + struct r600_shader* gs_shader, + enum chip_class chip_class): + VertexStage(PIPE_SHADER_VERTEX, sel, sh->shader, + sh->scratch_space_needed, chip_class, key.vs.first_atomic_counter), + m_num_clip_dist(0), + m_last_param_export(nullptr), + m_last_pos_export(nullptr), + m_pipe_shader(sh), + m_enabled_stream_buffers_mask(0), + m_so_info(&sel.so), + m_vertex_id(), + m_key(key), + m_max_attrib(0) +{ + // reg 0 is used in the fetch shader + increment_reserved_registers(); + + sh_info().atomic_base = key.vs.first_atomic_counter; + sh_info().vs_as_gs_a = m_key.vs.as_gs_a; + + if (key.vs.as_es) { + sh->shader.vs_as_es = true; + m_export_processor.reset(new VertexStageExportForGS(*this, gs_shader)); + } else if (key.vs.as_ls) { + sh->shader.vs_as_ls = true; + sfn_log << SfnLog::trans << "Start VS for GS\n"; + m_export_processor.reset(new VertexStageExportForES(*this)); + } else { + m_export_processor.reset(new VertexStageExportForFS(*this, &sel.so, sh, key)); + } +} + +bool VertexShaderFromNir::scan_inputs_read(const nir_shader *sh) +{ + uint64_t inputs = sh->info.inputs_read; + + while (inputs) { + unsigned i = u_bit_scan64(&inputs); + if (i < VERT_ATTRIB_MAX) { + ++sh_info().ninput; + } + } + m_max_attrib = sh_info().ninput; + return true; +} + +bool VertexShaderFromNir::do_allocate_reserved_registers() +{ + /* Since the vertex ID is nearly always used, we add it here as an input so + * that the registers used for vertex attributes don't get clobbered by the + * register merge step */ + auto R0x = new GPRValue(0,0); + R0x->set_as_input(); + m_vertex_id.reset(R0x); + inject_register(0, 0, m_vertex_id, false); + + if (m_key.vs.as_gs_a || m_sv_values.test(es_primitive_id)) { + auto R0z = new GPRValue(0,2); + R0x->set_as_input(); + m_primitive_id.reset(R0z); + inject_register(0, 2, m_primitive_id, false); + } + + if (m_sv_values.test(es_instanceid)) { + auto R0w = new GPRValue(0,3); + R0w->set_as_input(); + m_instance_id.reset(R0w); + inject_register(0, 3, m_instance_id, false); + } + + + if (m_sv_values.test(es_rel_patch_id)) { + auto R0y = new GPRValue(0,1); + R0y->set_as_input(); + m_rel_vertex_id.reset(R0y); + inject_register(0, 1, m_rel_vertex_id, false); + } + + m_attribs.resize(4 * m_max_attrib + 4); + for (unsigned i = 0; i < m_max_attrib + 1; ++i) { + for (unsigned k = 0; k < 4; ++k) { + auto gpr = std::make_shared<GPRValue>(i + 1, k); + gpr->set_as_input(); + m_attribs[4 * i + k] = gpr; + inject_register(i + 1, k, gpr, false); + } + } + + return true; +} + +void VertexShaderFromNir::emit_shader_start() +{ + m_export_processor->emit_shader_start(); +} + +bool VertexShaderFromNir::scan_sysvalue_access(nir_instr *instr) +{ + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr); + switch (ii->intrinsic) { + case nir_intrinsic_load_vertex_id: + m_sv_values.set(es_vertexid); + break; + case nir_intrinsic_load_instance_id: + m_sv_values.set(es_instanceid); + break; + case nir_intrinsic_load_tcs_rel_patch_id_r600: + m_sv_values.set(es_rel_patch_id); + break; + case nir_intrinsic_store_output: + m_export_processor->scan_store_output(ii); + default: + ; + } + } + default: + ; + } + return true; +} + +bool VertexShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_load_vertex_id: + return load_preloaded_value(instr->dest, 0, m_vertex_id); + case nir_intrinsic_load_tcs_rel_patch_id_r600: + return load_preloaded_value(instr->dest, 0, m_rel_vertex_id); + case nir_intrinsic_load_instance_id: + return load_preloaded_value(instr->dest, 0, m_instance_id); + case nir_intrinsic_store_local_shared_r600: + return emit_store_local_shared(instr); + case nir_intrinsic_store_output: + return m_export_processor->store_output(instr); + case nir_intrinsic_load_input: + return load_input(instr); + + default: + return false; + } +} + +bool VertexShaderFromNir::load_input(nir_intrinsic_instr* instr) +{ + unsigned location = nir_intrinsic_base(instr); + + if (location < VERT_ATTRIB_MAX) { + for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { + auto src = m_attribs[4 * location + i]; + + if (i == 0) + set_input(location, src); + + load_preloaded_value(instr->dest, i, src, i == (unsigned)(instr->num_components - 1)); + } + return true; + } + fprintf(stderr, "r600-NIR: Unimplemented load_deref for %d\n", location); + return false; +} + +bool VertexShaderFromNir::emit_store_local_shared(nir_intrinsic_instr* instr) +{ + unsigned write_mask = nir_intrinsic_write_mask(instr); + + auto address = from_nir(instr->src[1], 0); + int swizzle_base = (write_mask & 0x3) ? 0 : 2; + write_mask |= write_mask >> 2; + + auto value = from_nir(instr->src[0], swizzle_base); + if (!(write_mask & 2)) { + emit_instruction(new LDSWriteInstruction(address, 1, value)); + } else { + auto value1 = from_nir(instr->src[0], swizzle_base + 1); + emit_instruction(new LDSWriteInstruction(address, 1, value, value1)); + } + + return true; +} + +void VertexShaderFromNir::do_finalize() +{ + m_export_processor->finalize_exports(); +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h new file mode 100644 index 000000000..c1ba251de --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h @@ -0,0 +1,83 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef sfn_vertex_shader_from_nir_h +#define sfn_vertex_shader_from_nir_h + +#include "sfn_shader_base.h" +#include "sfn_vertexstageexport.h" + +namespace r600 { + +class VertexShaderFromNir : public VertexStage { +public: + VertexShaderFromNir(r600_pipe_shader *sh, + r600_pipe_shader_selector &sel, + const r600_shader_key &key, r600_shader *gs_shader, + enum chip_class chip_class); + + bool scan_sysvalue_access(nir_instr *instr) override; + + PValue primitive_id() override {return m_primitive_id;} +protected: + + // todo: encapsulate + unsigned m_num_clip_dist; + ExportInstruction *m_last_param_export; + ExportInstruction *m_last_pos_export; + r600_pipe_shader *m_pipe_shader; + unsigned m_enabled_stream_buffers_mask; + const pipe_stream_output_info *m_so_info; + void do_finalize() override; + + std::map<unsigned, unsigned> m_param_map; + + bool scan_inputs_read(const nir_shader *sh) override; + +private: + bool load_input(nir_intrinsic_instr* instr); + + void finalize_exports(); + + void emit_shader_start() override; + bool do_allocate_reserved_registers() override; + bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override; + bool emit_store_local_shared(nir_intrinsic_instr* instr); + + PValue m_vertex_id; + PValue m_instance_id; + PValue m_rel_vertex_id; + PValue m_primitive_id; + std::vector<PGPRValue> m_attribs; + r600_shader_key m_key; + + std::unique_ptr<VertexStageExportBase> m_export_processor; + unsigned m_max_attrib; +}; + +} + +#endif diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.cpp new file mode 100644 index 000000000..1ac94ccc7 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.cpp @@ -0,0 +1,448 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_shaderio.h" +#include "sfn_debug.h" +#include "tgsi/tgsi_from_mesa.h" + +#include <queue> + +namespace r600 { + +using std::vector; +using std::priority_queue; + +ShaderIO::ShaderIO(): + m_two_sided(false), + m_lds_pos(0) +{ + +} + +ShaderInput::ShaderInput(tgsi_semantic name): + m_name(name), + m_gpr(0), + m_uses_interpolate_at_centroid(false) +{ +} + +ShaderInput::~ShaderInput() +{ +} + +void ShaderInput::set_lds_pos(UNUSED int lds_pos) +{ +} + +int ShaderInput::ij_index() const +{ + return -1; +} + +bool ShaderInput::interpolate() const +{ + return false; +} + +int ShaderInput::lds_pos() const +{ + return 0; +} + +bool ShaderInput::is_varying() const +{ + return false; +} + +void ShaderInput::set_uses_interpolate_at_centroid() +{ + m_uses_interpolate_at_centroid = true; +} + +void ShaderInput::set_ioinfo(r600_shader_io& io, int translated_ij_index) const +{ + io.name = m_name; + io.gpr = m_gpr; + io.ij_index = translated_ij_index; + io.lds_pos = lds_pos(); + io.uses_interpolate_at_centroid = m_uses_interpolate_at_centroid; + + set_specific_ioinfo(io); +} + +void ShaderInput::set_specific_ioinfo(UNUSED r600_shader_io& io) const +{ +} + +ShaderInputSystemValue::ShaderInputSystemValue(tgsi_semantic name, int gpr): + ShaderInput(name), + m_gpr(gpr) +{ +} + +void ShaderInputSystemValue::set_specific_ioinfo(r600_shader_io& io) const +{ + io.gpr = m_gpr; + io.ij_index = 0; +} + +ShaderInputVarying::ShaderInputVarying(tgsi_semantic _name, int sid, unsigned driver_location, + unsigned frac, unsigned components, + tgsi_interpolate_mode interpolate, + tgsi_interpolate_loc interp_loc): + ShaderInput(_name), + m_driver_location(driver_location), + m_location_frac(frac), + m_sid(sid), + m_interpolate(interpolate), + m_interpolate_loc(interp_loc), + m_ij_index(-10), + m_lds_pos(0), + m_mask(((1 << components) - 1) << frac) +{ + evaluate_spi_sid(); + + m_ij_index = interpolate == TGSI_INTERPOLATE_LINEAR ? 3 : 0; + switch (interp_loc) { + case TGSI_INTERPOLATE_LOC_CENTROID: m_ij_index += 2; break; + case TGSI_INTERPOLATE_LOC_CENTER: m_ij_index += 1; break; + default: + ; + } +} + +ShaderInputVarying::ShaderInputVarying(tgsi_semantic _name, int sid, nir_variable *input): + ShaderInput(_name), + m_driver_location(input->data.driver_location), + m_location_frac(input->data.location_frac), + m_sid(sid), + m_ij_index(-10), + m_lds_pos(0), + m_mask(((1 << input->type->components()) - 1) << input->data.location_frac) +{ + sfn_log << SfnLog::io << __func__ + << "name:" << _name + << " sid: " << sid + << " op: " << input->data.interpolation; + + evaluate_spi_sid(); + + enum glsl_base_type base_type = + glsl_get_base_type(glsl_without_array(input->type)); + + switch (input->data.interpolation) { + case INTERP_MODE_NONE: + if (glsl_base_type_is_integer(base_type)) { + m_interpolate = TGSI_INTERPOLATE_CONSTANT; + break; + } + + if (name() == TGSI_SEMANTIC_COLOR) { + m_interpolate = TGSI_INTERPOLATE_COLOR; + m_ij_index = 0; + break; + } + FALLTHROUGH; + + case INTERP_MODE_SMOOTH: + assert(!glsl_base_type_is_integer(base_type)); + + m_interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + m_ij_index = 0; + break; + + case INTERP_MODE_NOPERSPECTIVE: + assert(!glsl_base_type_is_integer(base_type)); + + m_interpolate = TGSI_INTERPOLATE_LINEAR; + m_ij_index = 3; + break; + + case INTERP_MODE_FLAT: + m_interpolate = TGSI_INTERPOLATE_CONSTANT; + break; + + default: + m_interpolate = TGSI_INTERPOLATE_CONSTANT; + break; + } + + if (input->data.sample) { + m_interpolate_loc = TGSI_INTERPOLATE_LOC_SAMPLE; + } else if (input->data.centroid) { + m_interpolate_loc = TGSI_INTERPOLATE_LOC_CENTROID; + m_ij_index += 2; + } else { + m_interpolate_loc = TGSI_INTERPOLATE_LOC_CENTER; + m_ij_index += 1; + } + sfn_log << SfnLog::io + << " -> IP:" << m_interpolate + << " IJ:" << m_ij_index + << "\n"; +} + +bool ShaderInputVarying::is_varying() const +{ + return true; +} + +void ShaderInputVarying::update_mask(int additional_comps, int frac) +{ + m_mask |= ((1 << additional_comps) - 1) << frac; +} + +void ShaderInputVarying::evaluate_spi_sid() +{ + switch (name()) { + case TGSI_SEMANTIC_PSIZE: + case TGSI_SEMANTIC_EDGEFLAG: + case TGSI_SEMANTIC_FACE: + case TGSI_SEMANTIC_SAMPLEMASK: + assert(0 && "System value used as varying"); + break; + case TGSI_SEMANTIC_POSITION: + m_spi_sid = 0; + break; + case TGSI_SEMANTIC_GENERIC: + case TGSI_SEMANTIC_TEXCOORD: + case TGSI_SEMANTIC_PCOORD: + m_spi_sid = m_sid + 1; + break; + default: + /* For non-generic params - pack name and sid into 8 bits */ + m_spi_sid = (0x80 | (name() << 3) | m_sid) + 1; + } +} + +ShaderInputVarying::ShaderInputVarying(tgsi_semantic name, + const ShaderInputVarying& orig, size_t location): + ShaderInput(name), + m_driver_location(location), + m_location_frac(orig.location_frac()), + + m_sid(orig.m_sid), + m_spi_sid(orig.m_spi_sid), + m_interpolate(orig.m_interpolate), + m_interpolate_loc(orig.m_interpolate_loc), + m_ij_index(orig.m_ij_index), + m_lds_pos(0), + m_mask(0) +{ + evaluate_spi_sid(); +} + +bool ShaderInputVarying::interpolate() const +{ + return m_interpolate > 0; +} + +int ShaderInputVarying::ij_index() const +{ + return m_ij_index; +} + +void ShaderInputVarying::set_lds_pos(int lds_pos) +{ + m_lds_pos = lds_pos; +} + +int ShaderInputVarying::lds_pos() const +{ + return m_lds_pos; +} + +void ShaderInputVarying::set_specific_ioinfo(r600_shader_io& io) const +{ + io.interpolate = m_interpolate; + io.interpolate_location = m_interpolate_loc; + io.sid = m_sid; + io.spi_sid = m_spi_sid; + set_color_ioinfo(io); +} + +void ShaderInputVarying::set_color_ioinfo(UNUSED r600_shader_io& io) const +{ + sfn_log << SfnLog::io << __func__ << " Don't set color_ioinfo\n"; +} + +ShaderInputColor::ShaderInputColor(tgsi_semantic name, int sid, nir_variable *input): + ShaderInputVarying(name, sid, input), + m_back_color_input_idx(0) +{ + sfn_log << SfnLog::io << __func__ << "name << " << name << " sid << " << sid << "\n"; +} + +ShaderInputColor::ShaderInputColor(tgsi_semantic _name, int sid, unsigned driver_location, + unsigned frac, unsigned components, tgsi_interpolate_mode interpolate, + tgsi_interpolate_loc interp_loc): + ShaderInputVarying(_name, sid, driver_location,frac, components, interpolate, interp_loc), + m_back_color_input_idx(0) +{ + sfn_log << SfnLog::io << __func__ << "name << " << _name << " sid << " << sid << "\n"; +} + +void ShaderInputColor::set_back_color(unsigned back_color_input_idx) +{ + sfn_log << SfnLog::io << "Set back color index " << back_color_input_idx << "\n"; + m_back_color_input_idx = back_color_input_idx; +} + +void ShaderInputColor::set_color_ioinfo(r600_shader_io& io) const +{ + sfn_log << SfnLog::io << __func__ << " set color_ioinfo " << m_back_color_input_idx << "\n"; + io.back_color_input = m_back_color_input_idx; +} + +size_t ShaderIO::add_input(ShaderInput *input) +{ + m_inputs.push_back(PShaderInput(input)); + return m_inputs.size() - 1; +} + +PShaderInput ShaderIO::find_varying(tgsi_semantic name, int sid) +{ + for (auto& a : m_inputs) { + if (a->name() == name) { + assert(a->is_varying()); + auto& v = static_cast<ShaderInputVarying&>(*a); + if (v.sid() == sid) + return a; + } + } + return nullptr; +} + +struct VaryingShaderIOLess { + bool operator () (PShaderInput lhs, PShaderInput rhs) const + { + const ShaderInputVarying& l = static_cast<ShaderInputVarying&>(*lhs); + const ShaderInputVarying& r = static_cast<ShaderInputVarying&>(*rhs); + return l.location() > r.location(); + } +}; + +void ShaderIO::sort_varying_inputs() +{ + priority_queue<PShaderInput, vector<PShaderInput>, VaryingShaderIOLess> q; + + vector<int> idx; + + for (auto i = 0u; i < m_inputs.size(); ++i) { + if (m_inputs[i]->is_varying()) { + q.push(m_inputs[i]); + idx.push_back(i); + } + } + + auto next_index = idx.begin(); + while (!q.empty()) { + auto si = q.top(); + q.pop(); + m_inputs[*next_index++] = si; + } +} + +void ShaderIO::update_lds_pos() +{ + m_lds_pos = -1; + m_ldspos.resize(m_inputs.size()); + for (auto& i : m_inputs) { + if (!i->is_varying()) + continue; + + auto& v = static_cast<ShaderInputVarying&>(*i); + /* There are shaders that miss an input ...*/ + if (m_ldspos.size() <= static_cast<unsigned>(v.location())) + m_ldspos.resize(v.location() + 1); + } + + std::fill(m_ldspos.begin(), m_ldspos.end(), -1); + for (auto& i : m_inputs) { + if (!i->is_varying()) + continue; + + auto& v = static_cast<ShaderInputVarying&>(*i); + if (v.name() == TGSI_SEMANTIC_POSITION) + continue; + + if (m_ldspos[v.location()] < 0) { + ++m_lds_pos; + m_ldspos[v.location()] = m_lds_pos; + } + v.set_lds_pos(m_lds_pos); + } + ++m_lds_pos; +} + +std::vector<PShaderInput> &ShaderIO::inputs() +{ + return m_inputs; +} + +ShaderInput& ShaderIO::input(size_t k) +{ + assert(k < m_inputs.size()); + return *m_inputs[k]; +} + +ShaderInput& ShaderIO::input(size_t driver_loc, int frac) +{ + for (auto& i: m_inputs) { + if (!i->is_varying()) + continue; + + auto& v = static_cast<ShaderInputVarying&>(*i); + if (v.location() == driver_loc && v.location_frac() == frac) + return v; + } + return input(driver_loc); +} + +void ShaderIO::set_two_sided() +{ + m_two_sided = true; +} + +std::pair<unsigned, unsigned> +r600_get_varying_semantic(unsigned varying_location) +{ + std::pair<unsigned, unsigned> result; + tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot>(varying_location), + true, &result.first, &result.second); + + if (result.first == TGSI_SEMANTIC_GENERIC) { + result.second += 9; + } else if (result.first == TGSI_SEMANTIC_PCOORD) { + result.second = 8; + } + return result; +} + + + +} + diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.h new file mode 100644 index 000000000..855bbe143 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.h @@ -0,0 +1,176 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_SHADERIO_H +#define SFN_SHADERIO_H + +#include "compiler/nir/nir.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "gallium/drivers/r600/r600_shader.h" + +#include <vector> +#include <memory> + +namespace r600 { + +class ShaderInput { +public: + ShaderInput(); + virtual ~ShaderInput(); + + ShaderInput(tgsi_semantic name); + tgsi_semantic name() const {return m_name;} + + void set_gpr(int gpr) {m_gpr = gpr;} + int gpr() const {return m_gpr;} + void set_ioinfo(r600_shader_io& io, int translated_ij_index) const; + + virtual void set_lds_pos(int lds_pos); + virtual int ij_index() const; + virtual bool interpolate() const; + virtual int lds_pos() const; + void set_uses_interpolate_at_centroid(); + + virtual bool is_varying() const; + +private: + virtual void set_specific_ioinfo(r600_shader_io& io) const; + + tgsi_semantic m_name; + int m_gpr; + bool m_uses_interpolate_at_centroid; +}; + +using PShaderInput = std::shared_ptr<ShaderInput>; + +class ShaderInputSystemValue: public ShaderInput { +public: + ShaderInputSystemValue(tgsi_semantic name, int gpr); + void set_specific_ioinfo(r600_shader_io& io) const; + int m_gpr; +}; + +class ShaderInputVarying : public ShaderInput { +public: + ShaderInputVarying(tgsi_semantic _name, int sid, unsigned driver_location, + unsigned frac, unsigned components, tgsi_interpolate_mode interpolate, + tgsi_interpolate_loc interp_loc); + ShaderInputVarying(tgsi_semantic name, int sid, nir_variable *input); + ShaderInputVarying(tgsi_semantic name, const ShaderInputVarying& orig, + size_t location); + + void set_lds_pos(int lds_pos) override; + + int ij_index() const override; + + bool interpolate() const override; + + int lds_pos() const override; + + int sid() const {return m_sid;} + + void update_mask(int additional_comps, int frac); + + size_t location() const {return m_driver_location;} + int location_frac() const {return m_location_frac;} + + bool is_varying() const override; + +private: + void evaluate_spi_sid(); + + virtual void set_color_ioinfo(r600_shader_io& io) const; + void set_specific_ioinfo(r600_shader_io& io) const override; + size_t m_driver_location; + int m_location_frac; + int m_sid; + int m_spi_sid; + tgsi_interpolate_mode m_interpolate; + tgsi_interpolate_loc m_interpolate_loc; + int m_ij_index; + int m_lds_pos; + int m_mask; +}; + +class ShaderInputColor: public ShaderInputVarying { +public: + ShaderInputColor(tgsi_semantic _name, int sid, unsigned driver_location, + unsigned frac, unsigned components, tgsi_interpolate_mode interpolate, + tgsi_interpolate_loc interp_loc); + ShaderInputColor(tgsi_semantic name, int sid, nir_variable *input); + void set_back_color(unsigned back_color_input_idx); + unsigned back_color_input_index() const { + return m_back_color_input_idx; + } +private: + void set_color_ioinfo(UNUSED r600_shader_io& io) const override; + unsigned m_back_color_input_idx; + +}; + +class ShaderIO +{ +public: + ShaderIO(); + + size_t add_input(ShaderInput *input); + + std::vector<PShaderInput>& inputs(); + ShaderInput& input(size_t k); + + ShaderInput& input(size_t driver_loc, int frac); + + void set_two_sided(); + bool two_sided() {return m_two_sided;} + + int nlds() const { + return m_lds_pos; + } + + void sort_varying_inputs(); + + size_t size() const {return m_inputs.size();} + + PShaderInput find_varying(tgsi_semantic name, int sid); + + void update_lds_pos(); + +private: + std::vector<PShaderInput> m_inputs; + std::vector<int> m_ldspos; + bool m_two_sided; + int m_lds_pos; + +}; + +std::pair<unsigned, unsigned> +r600_get_varying_semantic(unsigned varying_location); + + +} + +#endif // SFN_SHADERIO_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.cpp new file mode 100644 index 000000000..3228b75fb --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.cpp @@ -0,0 +1,242 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_value.h" +#include "util/macros.h" + +#include <iostream> +#include <iomanip> +#include <cassert> + +namespace r600 { + +using std::unique_ptr; +using std::make_shared; + +const char *Value::component_names = "xyzw01?_!"; + +Value::Value(): + m_type(gpr), + m_chan(0) +{ +} + +Value::Value(Type type, uint32_t chan): + m_type(type), + m_chan(chan) +{ + +} + + + +Value::Value(Type type): + Value(type, 0) +{ +} + +Value::Type Value::type() const +{ + return m_type; +} + +void Value::set_chan(uint32_t chan) +{ + m_chan = chan; +} + +void Value::print(std::ostream& os) const +{ + do_print(os); +} + +void Value::print(std::ostream& os, const PrintFlags& flags) const +{ + if (flags.flags & PrintFlags::has_neg) os << '-'; + if (flags.flags & PrintFlags::has_abs) os << '|'; + do_print(os, flags); + if (flags.flags & PrintFlags::has_abs) os << '|'; +} + +void Value::do_print(std::ostream& os, const PrintFlags& flags) const +{ + (void)flags; + do_print(os); +} + +bool Value::operator < (const Value& lhs) const +{ + return sel() < lhs.sel() || + (sel() == lhs.sel() && chan() < lhs.chan()); +} + + +LiteralValue::LiteralValue(float value, uint32_t chan): + Value(Value::literal, chan) +{ + m_value.f=value; +} + + +LiteralValue::LiteralValue(uint32_t value, uint32_t chan): + Value(Value::literal, chan) +{ + m_value.u=value; +} + +LiteralValue::LiteralValue(int value, uint32_t chan): + Value(Value::literal, chan) +{ + m_value.u=value; +} + +uint32_t LiteralValue::sel() const +{ + return ALU_SRC_LITERAL; +} + +uint32_t LiteralValue::value() const +{ + return m_value.u; +} + +float LiteralValue::value_float() const +{ + return m_value.f; +} + +void LiteralValue::do_print(std::ostream& os) const +{ + os << "[0x" << std::setbase(16) << m_value.u << " " << std::setbase(10) + << m_value.f << "]."; + os << component_names[chan()]; +} + +void LiteralValue::do_print(std::ostream& os, UNUSED const PrintFlags& flags) const +{ + os << "[0x" << std::setbase(16) << m_value.u << " " + << std::setbase(10); + + os << m_value.f << "f"; + + os<< "]"; +} + +bool LiteralValue::is_equal_to(const Value& other) const +{ + assert(other.type() == Value::Type::literal); + const auto& rhs = static_cast<const LiteralValue&>(other); + return (sel() == rhs.sel() && + value() == rhs.value()); +} + +InlineConstValue::InlineConstValue(int value, int chan): + Value(Value::cinline, chan), + m_value(static_cast<AluInlineConstants>(value)) +{ +} + +uint32_t InlineConstValue::sel() const +{ + return m_value; +} + +void InlineConstValue::do_print(std::ostream& os) const +{ + auto sv_info = alu_src_const.find(m_value); + if (sv_info != alu_src_const.end()) { + os << sv_info->second.descr; + if (sv_info->second.use_chan) + os << '.' << component_names[chan()]; + else if (chan() > 0) + os << "." << component_names[chan()] + << " (W: Channel ignored)"; + } else { + if (m_value >= ALU_SRC_PARAM_BASE && m_value < ALU_SRC_PARAM_BASE + 32) + os << " Param" << m_value - ALU_SRC_PARAM_BASE; + else + os << " E: unknown inline constant " << m_value; + } +} + +bool InlineConstValue::is_equal_to(const Value& other) const +{ + assert(other.type() == Value::Type::cinline); + const auto& rhs = static_cast<const InlineConstValue&>(other); + return sel() == rhs.sel(); +} + +PValue Value::zero(new InlineConstValue(ALU_SRC_0, 0)); +PValue Value::one_f(new InlineConstValue(ALU_SRC_1, 0)); +PValue Value::one_i(new InlineConstValue(ALU_SRC_1_INT, 0)); +PValue Value::zero_dot_5(new InlineConstValue(ALU_SRC_0_5, 0)); + +UniformValue::UniformValue(uint32_t sel, uint32_t chan, uint32_t kcache_bank): + Value(Value::kconst, chan) +{ + m_index = sel; + m_kcache_bank = kcache_bank; +} + +UniformValue::UniformValue(uint32_t sel, uint32_t chan, PValue addr): + Value(Value::kconst, chan), + m_index(sel), + m_kcache_bank(1), + m_addr(addr) +{ + +} + +uint32_t UniformValue::sel() const +{ + const int bank_base[4] = {128, 160, 256, 288}; + return m_index < 512 ? m_index + bank_base[m_kcache_bank] : m_index; +} + +uint32_t UniformValue::kcache_bank() const +{ + return m_kcache_bank; +} + +bool UniformValue::is_equal_to(const Value& other) const +{ + const UniformValue& o = static_cast<const UniformValue&>(other); + return sel() == o.sel() && + m_kcache_bank == o.kcache_bank(); +} + +void UniformValue::do_print(std::ostream& os) const +{ + if (m_index < 512) + os << "KC" << m_kcache_bank << "[" << m_index; + else if (m_addr) + os << "KC[" << *m_addr << "][" << m_index; + else + os << "KCx[" << m_index; + os << "]." << component_names[chan()]; +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.h new file mode 100644 index 000000000..7bc4528f9 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.h @@ -0,0 +1,194 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_VALUE_H +#define SFN_VALUE_H + +#include "sfn_alu_defines.h" +#include "nir.h" + +#include <memory> +#include <set> +#include <bitset> +#include <iostream> + +namespace r600 { + +class Value { +public: + using Pointer=std::shared_ptr<Value>; + + struct PrintFlags { + PrintFlags():index_mode(0), + flags(0) + { + } + PrintFlags(int im, int f):index_mode(im), + flags(f) + { + } + int index_mode; + int flags; + static const int is_rel = 1; + static const int has_abs = 2; + static const int has_neg = 4; + static const int literal_is_float = 8; + static const int index_ar = 16; + static const int index_loopidx = 32; + }; + + enum Type { + gpr, + kconst, + literal, + cinline, + lds_direct, + gpr_vector, + gpr_array_value, + unknown + }; + + static const char *component_names; + + using LiteralFlags=std::bitset<4>; + + Value(); + + Value(Type type); + + virtual ~Value(){} + + Type type() const; + virtual uint32_t sel() const = 0; + uint32_t chan() const {return m_chan;} + + void set_chan(uint32_t chan); + virtual void set_pin_to_channel() { assert(0 && "Only GPRs can be pinned to a channel ");} + void print(std::ostream& os, const PrintFlags& flags) const; + + void print(std::ostream& os) const; + + bool operator < (const Value& lhs) const; + + static Value::Pointer zero; + static Value::Pointer one_f; + static Value::Pointer zero_dot_5; + static Value::Pointer one_i; + +protected: + Value(Type type, uint32_t chan); + +private: + virtual void do_print(std::ostream& os) const = 0; + virtual void do_print(std::ostream& os, const PrintFlags& flags) const; + + virtual bool is_equal_to(const Value& other) const = 0; + + Type m_type; + uint32_t m_chan; + + friend bool operator == (const Value& lhs, const Value& rhs); +}; + + +inline std::ostream& operator << (std::ostream& os, const Value& v) +{ + v.print(os); + return os; +} + + +inline bool operator == (const Value& lhs, const Value& rhs) +{ + if (lhs.type() == rhs.type()) + return lhs.is_equal_to(rhs); + return false; +} + +inline bool operator != (const Value& lhs, const Value& rhs) +{ + return !(lhs == rhs); +} + +using PValue=Value::Pointer; + +struct value_less { + inline bool operator () (PValue lhs, PValue rhs) const { + return *lhs < *rhs; + } +}; + +using ValueSet = std::set<PValue, value_less>; + + +class LiteralValue: public Value { +public: + LiteralValue(float value, uint32_t chan= 0); + LiteralValue(uint32_t value, uint32_t chan= 0); + LiteralValue(int value, uint32_t chan= 0); + uint32_t sel() const override final; + uint32_t value() const; + float value_float() const; +private: + void do_print(std::ostream& os) const override; + void do_print(std::ostream& os, const PrintFlags& flags) const override; + bool is_equal_to(const Value& other) const override; + union { + uint32_t u; + float f; + } m_value; +}; + +class InlineConstValue: public Value { +public: + InlineConstValue(int value, int chan); + uint32_t sel() const override final; +private: + void do_print(std::ostream& os) const override; + bool is_equal_to(const Value& other) const override; + AluInlineConstants m_value; +}; + +class UniformValue: public Value { +public: + UniformValue(uint32_t sel, uint32_t chan, uint32_t kcache_bank = 0); + UniformValue(uint32_t sel, uint32_t chan, PValue addr); + uint32_t sel() const override; + uint32_t kcache_bank() const; + PValue addr() const {return m_addr;} + void reset_addr(PValue v) {m_addr = v;} +private: + void do_print(std::ostream& os) const override; + bool is_equal_to(const Value& other) const override; + + uint32_t m_index; + uint32_t m_kcache_bank; + PValue m_addr; +}; + +} // end ns r600 + +#endif diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp new file mode 100644 index 000000000..c53b32527 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp @@ -0,0 +1,380 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_value_gpr.h" +#include "sfn_valuepool.h" +#include "sfn_debug.h" +#include "sfn_liverange.h" + +namespace r600 { + +using std::vector; +using std::array; + +GPRValue::GPRValue(uint32_t sel, uint32_t chan, int base_offset): + Value(Value::gpr, chan), + m_sel(sel), + m_base_offset(base_offset), + m_input(false), + m_pin_to_channel(false), + m_keep_alive(false) +{ +} + +GPRValue::GPRValue(uint32_t sel, uint32_t chan): + Value(Value::gpr, chan), + m_sel(sel), + m_base_offset(0), + m_input(false), + m_pin_to_channel(false), + m_keep_alive(false) +{ +} + +uint32_t GPRValue::sel() const +{ + return m_sel; +} + +void GPRValue::do_print(std::ostream& os) const +{ + os << 'R'; + os << m_sel; + os << '.' << component_names[chan()]; +} + +bool GPRValue::is_equal_to(const Value& other) const +{ + assert(other.type() == Value::Type::gpr); + const auto& rhs = static_cast<const GPRValue&>(other); + return (sel() == rhs.sel() && + chan() == rhs.chan()); +} + +void GPRValue::do_print(std::ostream& os, UNUSED const PrintFlags& flags) const +{ + os << 'R'; + os << m_sel; + os << '.' << component_names[chan()]; +} + +GPRVector::GPRVector(const GPRVector& orig): + Value(gpr_vector), + m_elms(orig.m_elms), + m_valid(orig.m_valid) +{ +} + +GPRVector::GPRVector(std::array<PValue,4> elms): + Value(gpr_vector), + m_elms(elms), + m_valid(false) +{ + for (unsigned i = 0; i < 4; ++i) + if (!m_elms[i] || (m_elms[i]->type() != Value::gpr)) { + assert(0 && "GPR vector not valid because element missing or nit a GPR"); + return; + } + unsigned sel = m_elms[0]->sel(); + for (unsigned i = 1; i < 4; ++i) + if (m_elms[i]->sel() != sel) { + assert(0 && "GPR vector not valid because sel is not equal for all elements"); + return; + } + m_valid = true; +} + +GPRVector::GPRVector(uint32_t sel, std::array<uint32_t,4> swizzle): + Value (gpr_vector), + m_valid(true) +{ + for (int i = 0; i < 4; ++i) + m_elms[i] = PValue(new GPRValue(sel, swizzle[i])); +} + +GPRVector::GPRVector(const GPRVector& orig, const std::array<uint8_t,4>& swizzle) +{ + for (int i = 0; i < 4; ++i) + m_elms[i] = orig.reg_i(swizzle[i]); + m_valid = orig.m_valid; +} + +void GPRVector::validate() const +{ + assert(m_elms[0]); + uint32_t sel = m_elms[0]->sel(); + if (sel >= 124) + return; + + for (unsigned i = 1; i < 4; ++i) { + assert(m_elms[i]); + if (sel != m_elms[i]->sel()) + return; + } + + m_valid = true; +} + +uint32_t GPRVector::sel() const +{ + validate(); + assert(m_valid); + return m_elms[0] ? m_elms[0]->sel() : 999; +} + +void GPRVector::set_reg_i(int i, PValue reg) +{ + m_elms[i] = reg; +} + +void GPRVector::pin_to_channel(int i) +{ + auto& v = static_cast<GPRValue&>(*m_elms[i]); + v.set_pin_to_channel(); +} + +void GPRVector::pin_all_to_channel() +{ + for (auto& v: m_elms) { + auto& c = static_cast<GPRValue&>(*v); + c.set_pin_to_channel(); + } +} + +void GPRVector::do_print(std::ostream& os) const +{ + os << "R" << sel() << "."; + for (int i = 0; i < 4; ++i) + os << (m_elms[i] ? component_names[m_elms[i]->chan() < 8 ? m_elms[i]->chan() : 8] : '?'); +} + +void GPRVector::swizzle(const Swizzle& swz) +{ + Values v(m_elms); + for (uint32_t i = 0; i < 4; ++i) + if (i != swz[i]) { + assert(swz[i] < 4); + m_elms[i] = v[swz[i]]; + } +} + +bool GPRVector::is_equal_to(const Value& other) const +{ + if (other.type() != gpr_vector) { + std::cerr << "t"; + return false; + } + + const GPRVector& o = static_cast<const GPRVector&>(other); + + for (int i = 0; i < 4; ++i) { + if (*m_elms[i] != *o.m_elms[i]) { + std::cerr << "elm" << i; + return false; + } + } + return true; +} + + +GPRArrayValue::GPRArrayValue(PValue value, PValue addr, GPRArray *array): + Value(gpr_array_value, value->chan()), + m_value(value), + m_addr(addr), + m_array(array) +{ +} + +GPRArrayValue::GPRArrayValue(PValue value, GPRArray *array): + Value(gpr_array_value, value->chan()), + m_value(value), + m_array(array) +{ +} + +static const char *swz_char = "xyzw01_"; + +void GPRArrayValue::do_print(std::ostream& os) const +{ + assert(m_array); + os << "R" << m_value->sel(); + if (m_addr) { + os << "[" << *m_addr << "] "; + } + os << swz_char[m_value->chan()]; + + os << "(" << *m_array << ")"; +} + +bool GPRArrayValue::is_equal_to(const Value& other) const +{ + const GPRArrayValue& v = static_cast<const GPRArrayValue&>(other); + + return *m_value == *v.m_value && + *m_array == *v.m_array; +} + +void GPRArrayValue::record_read(LiverangeEvaluator& ev) const +{ + if (m_addr) { + ev.record_read(*m_addr); + unsigned chan = m_value->chan(); + assert(m_array); + m_array->record_read(ev, chan); + } else + ev.record_read(*m_value); +} + +void GPRArrayValue::record_write(LiverangeEvaluator& ev) const +{ + if (m_addr) { + ev.record_read(*m_addr); + unsigned chan = m_value->chan(); + assert(m_array); + m_array->record_write(ev, chan); + } else + ev.record_write(*m_value); +} + +void GPRArrayValue::reset_value(PValue new_value) +{ + m_value = new_value; +} + +void GPRArrayValue::reset_addr(PValue new_addr) +{ + m_addr = new_addr; +} + + +GPRArray::GPRArray(int base, int size, int mask, int frac): + Value (gpr_vector), + m_base_index(base), + m_component_mask(mask), + m_frac(frac) +{ + m_values.resize(size); + for (int i = 0; i < size; ++i) { + for (int j = 0; j < 4; ++j) { + if (mask & (1 << j)) { + auto gpr = new GPRValue(base + i, j); + /* If we want to use sb, we have to keep arrays + * alife for the whole shader range, otherwise the sb scheduler + * thinks is not capable to rename non-array uses of these registers */ + gpr->set_as_input(); + gpr->set_keep_alive(); + m_values[i].set_reg_i(j, PValue(gpr)); + + } + } + } +} + +uint32_t GPRArray::sel() const +{ + return m_base_index; +} + +static const char *compchar = "xyzw"; +void GPRArray::do_print(std::ostream& os) const +{ + os << "ARRAY[R" << sel() << "..R" << sel() + m_values.size() - 1 << "]."; + for (int j = 0; j < 4; ++j) { + if (m_component_mask & (1 << j)) + os << compchar[j]; + } +} + +bool GPRArray::is_equal_to(const Value& other) const +{ + const GPRArray& o = static_cast<const GPRArray&>(other); + return o.sel() == sel() && + o.m_values.size() == m_values.size() && + o.m_component_mask == m_component_mask; +} + +uint32_t GPRArrayValue::sel() const +{ + return m_value->sel(); +} + +PValue GPRArray::get_indirect(unsigned index, PValue indirect, unsigned component) +{ + assert(index < m_values.size()); + assert(m_component_mask & (1 << (component + m_frac))); + + sfn_log << SfnLog::reg << "Create indirect register from " << *this; + + PValue v = m_values[index].reg_i(component + m_frac); + assert(v); + + sfn_log << SfnLog::reg << " -> " << *v; + + if (indirect) { + sfn_log << SfnLog::reg << "[" << *indirect << "]"; + switch (indirect->type()) { + case Value::literal: { + const LiteralValue& lv = static_cast<const LiteralValue&>(*indirect); + v = m_values[lv.value()].reg_i(component + m_frac); + break; + } + case Value::gpr: { + v = PValue(new GPRArrayValue(v, indirect, this)); + sfn_log << SfnLog::reg << "(" << *v << ")"; + break; + } + default: + assert(0 && !"Indirect addressing must be literal value or GPR"); + } + } + sfn_log << SfnLog::reg <<" -> " << *v << "\n"; + return v; +} + +void GPRArray::record_read(LiverangeEvaluator& ev, int chan) const +{ + for (auto& v: m_values) + ev.record_read(*v.reg_i(chan), true); +} + +void GPRArray::record_write(LiverangeEvaluator& ev, int chan) const +{ + for (auto& v: m_values) + ev.record_write(*v.reg_i(chan), true); +} + +void GPRArray::collect_registers(ValueMap& output) const +{ + for (auto& v: m_values) { + for (int i = 0; i < 4; ++i) { + auto vv = v.reg_i(i); + if (vv) + output.insert(vv); + } + } +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.h new file mode 100644 index 000000000..789348875 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.h @@ -0,0 +1,208 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SFN_GPRARRAY_H +#define SFN_GPRARRAY_H + +#include "sfn_value.h" +#include <vector> +#include <array> + +namespace r600 { + +class ValuePool; +class ValueMap; +class LiverangeEvaluator; + +class GPRValue : public Value { +public: + GPRValue() = default; + GPRValue(GPRValue&& orig) = default; + GPRValue(const GPRValue& orig) = default; + + GPRValue(uint32_t sel, uint32_t chan, int base_offset); + + GPRValue(uint32_t sel, uint32_t chan); + + GPRValue& operator = (const GPRValue& orig) = default; + GPRValue& operator = (GPRValue&& orig) = default; + + uint32_t sel() const override final; + + void set_as_input(){ m_input = true; } + bool is_input() const {return m_input; } + void set_keep_alive() { m_keep_alive = true; } + bool keep_alive() const {return m_keep_alive; } + void set_pin_to_channel() override { m_pin_to_channel = true;} + bool pin_to_channel() const { return m_pin_to_channel;} + +private: + void do_print(std::ostream& os) const override; + void do_print(std::ostream& os, const PrintFlags& flags) const override; + bool is_equal_to(const Value& other) const override; + uint32_t m_sel; + bool m_base_offset; + bool m_input; + bool m_pin_to_channel; + bool m_keep_alive; +}; + +using PGPRValue = std::shared_ptr<GPRValue>; + +class GPRVector : public Value { +public: + using Swizzle = std::array<uint32_t,4>; + using Values = std::array<PValue,4>; + GPRVector() = default; + GPRVector(GPRVector&& orig) = default; + GPRVector(const GPRVector& orig); + + GPRVector(const GPRVector& orig, const std::array<uint8_t, 4>& swizzle); + GPRVector(std::array<PValue,4> elms); + GPRVector(uint32_t sel, std::array<uint32_t,4> swizzle); + + GPRVector& operator = (const GPRVector& orig) = default; + GPRVector& operator = (GPRVector&& orig) = default; + + void swizzle(const Swizzle& swz); + + uint32_t sel() const override final; + + void set_reg_i(int i, PValue reg); + + unsigned chan_i(int i) const {return m_elms[i]->chan();} + PValue reg_i(int i) const {return m_elms[i];} + PValue operator [] (int i) const {return m_elms[i];} + PValue& operator [] (int i) {return m_elms[i];} + + void pin_to_channel(int i); + void pin_all_to_channel(); + + PValue x() const {return m_elms[0];} + PValue y() const {return m_elms[1];} + PValue z() const {return m_elms[2];} + PValue w() const {return m_elms[3];} + + Values& values() { return m_elms;} + +private: + void do_print(std::ostream& os) const override; + bool is_equal_to(const Value& other) const override; + void validate() const; + + Values m_elms; + mutable bool m_valid; +}; + + +class GPRArray : public Value +{ +public: + using Pointer = std::shared_ptr<GPRArray>; + + GPRArray(int base, int size, int comp_mask, int frac); + + uint32_t sel() const override; + + uint32_t mask() const { return m_component_mask; }; + + size_t size() const {return m_values.size();} + + PValue get_indirect(unsigned index, PValue indirect, unsigned component); + + void record_read(LiverangeEvaluator& ev, int chan)const; + void record_write(LiverangeEvaluator& ev, int chan)const; + + void collect_registers(ValueMap& output) const; + +private: + void do_print(std::ostream& os) const override; + + bool is_equal_to(const Value& other) const override; + + int m_base_index; + int m_component_mask; + int m_frac; + + std::vector<GPRVector> m_values; +}; + +using PGPRArray = GPRArray::Pointer; + +class GPRArrayValue :public Value { +public: + GPRArrayValue(PValue value, GPRArray *array); + GPRArrayValue(PValue value, PValue index, GPRArray *array); + + void record_read(LiverangeEvaluator& ev) const; + void record_write(LiverangeEvaluator& ev) const; + + size_t array_size() const; + uint32_t sel() const override; + + PValue value() {return m_value;} + + void reset_value(PValue new_value); + void reset_addr(PValue new_addr); + + Value::Pointer indirect() const {return m_addr;} + +private: + + void do_print(std::ostream& os) const override; + + bool is_equal_to(const Value& other) const override; + + PValue m_value; + PValue m_addr; + GPRArray *m_array; +}; + +inline size_t GPRArrayValue::array_size() const +{ + return m_array->size(); +} + +inline GPRVector::Swizzle swizzle_from_comps(unsigned ncomp) +{ + GPRVector::Swizzle swz = {0,1,2,3}; + for (int i = ncomp; i < 4; ++i) + swz[i] = 7; + return swz; +} + +inline GPRVector::Swizzle swizzle_from_mask(unsigned mask) +{ + GPRVector::Swizzle swz; + for (int i = 0; i < 4; ++i) + swz[i] = ((1 << i) & mask) ? i : 7; + return swz; +} + + +} + +#endif // SFN_GPRARRAY_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.cpp new file mode 100644 index 000000000..efc9efdca --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.cpp @@ -0,0 +1,526 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018-2019 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sfn_debug.h" +#include "sfn_value_gpr.h" +#include "sfn_valuepool.h" + +#include <iostream> +#include <queue> + +namespace r600 { + +using std::vector; +using std::pair; +using std::make_pair; +using std::queue; + +ValuePool::ValuePool(): + m_next_register_index(0), + current_temp_reg_index(0), + next_temp_reg_comp(4) +{ +} + +PValue ValuePool::m_undef = Value::zero; + +GPRVector ValuePool::vec_from_nir(const nir_dest& dst, int num_components) +{ + std::array<PValue, 4> result; + for (int i = 0; i < 4; ++i) + result[i] = from_nir(dst, i < num_components ? i : 7); + return GPRVector(result); +} + +std::vector<PValue> ValuePool::varvec_from_nir(const nir_dest& dst, int num_components) +{ + std::vector<PValue> result(num_components); + for (int i = 0; i < num_components; ++i) + result[i] = from_nir(dst, i); + return result; +} + + +std::vector<PValue> ValuePool::varvec_from_nir(const nir_src& src, int num_components) +{ + std::vector<PValue> result(num_components); + int i; + for (i = 0; i < num_components; ++i) + result[i] = from_nir(src, i); + + return result; +} + + +PValue ValuePool::from_nir(const nir_src& v, unsigned component, unsigned swizzled) +{ + sfn_log << SfnLog::reg << "Search " << (v.is_ssa ? "ssa_reg " : "reg ") + << (v.is_ssa ? v.ssa->index : v.reg.reg->index); + + if (!v.is_ssa) { + int idx = lookup_register_index(v); + sfn_log << SfnLog::reg << " -> got index " << idx << "\n"; + if (idx >= 0) { + auto reg = lookup_register(idx, swizzled, false); + if (reg) { + if (reg->type() == Value::gpr_vector) { + auto& array = static_cast<GPRArray&>(*reg); + reg = array.get_indirect(v.reg.base_offset, + v.reg.indirect ? + from_nir(*v.reg.indirect, 0, 0) : nullptr, + component); + } + return reg; + } + } + assert(0 && "local registers should always be found"); + } + + unsigned index = v.ssa->index; + /* For undefs we use zero and let ()yet to be implemeneted dce deal with it */ + if (m_ssa_undef.find(index) != m_ssa_undef.end()) + return Value::zero; + + + int idx = lookup_register_index(v); + sfn_log << SfnLog::reg << " -> got index " << idx << "\n"; + if (idx >= 0) { + auto reg = lookup_register(idx, swizzled, false); + if (reg) + return reg; + } + + auto literal_val = nir_src_as_const_value(v); + if (literal_val) { + assert(v.is_ssa); + switch (v.ssa->bit_size) { + case 1: + return PValue(new LiteralValue(literal_val[swizzled].b ? 0xffffffff : 0, component)); + case 32: + return literal(literal_val[swizzled].u32); + default: + sfn_log << SfnLog::reg << "Unsupported bit size " << v.ssa->bit_size + << " fall back to 32\n"; + return PValue(new LiteralValue(literal_val[swizzled].u32, component)); + } + } + + return PValue(); +} + +PValue ValuePool::from_nir(const nir_src& v, unsigned component) +{ + return from_nir(v, component, component); +} + +PValue ValuePool::from_nir(const nir_tex_src &v, unsigned component) +{ + return from_nir(v.src, component, component); +} + +PValue ValuePool::from_nir(const nir_alu_src &v, unsigned component) +{ + return from_nir(v.src, component, v.swizzle[component]); +} + +PGPRValue ValuePool::get_temp_register(int channel) +{ + /* Skip to next register to get the channel we want */ + if (channel >= 0) { + if (next_temp_reg_comp <= channel) + next_temp_reg_comp = channel; + else + next_temp_reg_comp = 4; + } + + if (next_temp_reg_comp > 3) { + current_temp_reg_index = allocate_temp_register(); + next_temp_reg_comp = 0; + } + return std::make_shared<GPRValue>(current_temp_reg_index, next_temp_reg_comp++); +} + +GPRVector ValuePool::get_temp_vec4(const GPRVector::Swizzle& swizzle) +{ + int sel = allocate_temp_register(); + return GPRVector(sel, swizzle); +} + +PValue ValuePool::create_register_from_nir_src(const nir_src& src, int comp) +{ + int idx = src.is_ssa ? get_dst_ssa_register_index(*src.ssa): + get_local_register_index(*src.reg.reg); + + auto retval = lookup_register(idx, comp, false); + if (!retval || retval->type() != Value::gpr || retval->type() != Value::gpr_array_value) + retval = create_register(idx, comp); + return retval; +} + +PValue ValuePool::from_nir(const nir_alu_dest &v, unsigned component) +{ + //assert(v->write_mask & (1 << component)); + return from_nir(v.dest, component); +} + +int ValuePool::lookup_register_index(const nir_dest& dst) +{ + return dst.is_ssa ? get_dst_ssa_register_index(dst.ssa): + get_local_register_index(*dst.reg.reg); +} + +int ValuePool::lookup_register_index(const nir_src& src) const +{ + int index = 0; + + index = src.is_ssa ? + get_ssa_register_index(*src.ssa) : + get_local_register_index(*src.reg.reg); + + sfn_log << SfnLog::reg << " LIDX:" << index; + + auto r = m_register_map.find(index); + if (r == m_register_map.end()) { + return -1; + } + return static_cast<int>(r->second.index); +} + + +int ValuePool::allocate_temp_register() +{ + return m_next_register_index++; +} + + +PValue ValuePool::from_nir(const nir_dest& v, unsigned component) +{ + int idx = lookup_register_index(v); + sfn_log << SfnLog::reg << __func__ << ": "; + if (v.is_ssa) + sfn_log << "ssa_" << v.ssa.index; + else + sfn_log << "r" << v.reg.reg->index; + sfn_log << " -> " << idx << "\n"; + + auto retval = lookup_register(idx, component, false); + if (!retval) + retval = create_register(idx, component); + + if (retval->type() == Value::gpr_vector) { + assert(!v.is_ssa); + auto& array = static_cast<GPRArray&>(*retval); + retval = array.get_indirect(v.reg.base_offset, + v.reg.indirect ? + from_nir(*v.reg.indirect, 0, 0) : nullptr, + component); + } + + return retval; +} + +ValueMap ValuePool::get_temp_registers() const +{ + ValueMap result; + + for (auto& v : m_registers) { + if (v.second->type() == Value::gpr) + result.insert(v.second); + else if (v.second->type() == Value::gpr_vector) { + auto& array = static_cast<GPRArray&>(*v.second); + array.collect_registers(result); + } + } + return result; +} + +static const char swz[] = "xyzw01?_"; + +PValue ValuePool::create_register(unsigned sel, unsigned swizzle) +{ + sfn_log << SfnLog::reg + <<"Create register " << sel << '.' << swz[swizzle] << "\n"; + auto retval = PValue(new GPRValue(sel, swizzle)); + m_registers[(sel << 3) + swizzle] = retval; + return retval; +} + +bool ValuePool::inject_register(unsigned sel, unsigned swizzle, + const PValue& reg, bool map) +{ + uint32_t ssa_index = sel; + + if (map) { + auto pos = m_ssa_register_map.find(sel); + if (pos == m_ssa_register_map.end()) + ssa_index = m_next_register_index++; + else + ssa_index = pos->second; + } + + sfn_log << SfnLog::reg + << "Inject register " << sel << '.' << swz[swizzle] + << " at index " << ssa_index << " ..."; + + if (map) + m_ssa_register_map[sel] = ssa_index; + + allocate_with_mask(ssa_index, swizzle, true); + + unsigned idx = (ssa_index << 3) + swizzle; + auto p = m_registers.find(idx); + if ( (p != m_registers.end()) && *p->second != *reg) { + std::cerr << "Register location (" << ssa_index << ", " << swizzle << ") was already reserved\n"; + assert(0); + return false; + } + sfn_log << SfnLog::reg << " at idx:" << idx << " to " << *reg << "\n"; + m_registers[idx] = reg; + + if (m_next_register_index <= ssa_index) + m_next_register_index = ssa_index + 1; + return true; +} + + +PValue ValuePool::lookup_register(unsigned sel, unsigned swizzle, + bool required) +{ + + PValue retval; + sfn_log << SfnLog::reg + << "lookup register " << sel << '.' << swz[swizzle] << "(" + << ((sel << 3) + swizzle) << ")..."; + + + auto reg = m_registers.find((sel << 3) + swizzle); + if (reg != m_registers.end()) { + sfn_log << SfnLog::reg << " -> Found " << *reg->second << "\n"; + retval = reg->second; + } else if (swizzle == 7) { + PValue retval = create_register(sel, swizzle); + sfn_log << SfnLog::reg << " -> Created " << *retval << "\n"; + } else if (required) { + sfn_log << SfnLog::reg << "Register (" << sel << ", " + << swizzle << ") not found but required\n"; + assert(0 && "Unallocated register value requested\n"); + } + sfn_log << SfnLog::reg << " -> Not required and not allocated\n"; + return retval; +} + +unsigned ValuePool::get_dst_ssa_register_index(const nir_ssa_def& ssa) +{ + sfn_log << SfnLog::reg << __func__ << ": search dst ssa " + << ssa.index; + + auto pos = m_ssa_register_map.find(ssa.index); + if (pos == m_ssa_register_map.end()) { + sfn_log << SfnLog::reg << " Need to allocate ..."; + allocate_ssa_register(ssa); + pos = m_ssa_register_map.find(ssa.index); + assert(pos != m_ssa_register_map.end()); + } + sfn_log << SfnLog::reg << "... got " << pos->second << "\n"; + return pos->second; +} + +unsigned ValuePool::get_ssa_register_index(const nir_ssa_def& ssa) const +{ + sfn_log << SfnLog::reg << __func__ << ": search ssa " + << ssa.index; + + auto pos = m_ssa_register_map.find(ssa.index); + sfn_log << SfnLog::reg << " got " << pos->second<< "\n"; + if (pos == m_ssa_register_map.end()) { + sfn_log << SfnLog::reg << __func__ << ": ssa register " + << ssa.index << " lookup failed\n"; + return -1; + } + return pos->second; +} + +unsigned ValuePool::get_local_register_index(const nir_register& reg) +{ + unsigned index = reg.index | 0x80000000; + + auto pos = m_ssa_register_map.find(index); + if (pos == m_ssa_register_map.end()) { + allocate_local_register(reg); + pos = m_ssa_register_map.find(index); + assert(pos != m_ssa_register_map.end()); + } + return pos->second; +} + +unsigned ValuePool::get_local_register_index(const nir_register& reg) const +{ + unsigned index = reg.index | 0x80000000; + auto pos = m_ssa_register_map.find(index); + if (pos == m_ssa_register_map.end()) { + sfn_log << SfnLog::err << __func__ << ": local register " + << reg.index << " lookup failed"; + return -1; + } + return pos->second; +} + +void ValuePool::allocate_ssa_register(const nir_ssa_def& ssa) +{ + sfn_log << SfnLog::reg << "ValuePool: Allocate ssa register " << ssa.index + << " as " << m_next_register_index << "\n"; + int index = m_next_register_index++; + m_ssa_register_map[ssa.index] = index; + allocate_with_mask(index, 0xf, true); +} + +void ValuePool::allocate_arrays(array_list& arrays) +{ + int ncomponents = 0; + int current_index = m_next_register_index; + unsigned instance = 0; + + while (!arrays.empty()) { + auto a = arrays.top(); + arrays.pop(); + + /* This is a bit hackish, return an id that encodes the array merge. To make sure + * that the mapping doesn't go wrong we have to make sure the arrays is longer than + * the number of instances in this arrays slot */ + if (a.ncomponents + ncomponents > 4 || + a.length < instance) { + current_index = m_next_register_index; + ncomponents = 0; + instance = 0; + } + + if (ncomponents == 0) + m_next_register_index += a.length; + + uint32_t mask = ((1 << a.ncomponents) - 1) << ncomponents; + + PGPRArray array = PGPRArray(new GPRArray(current_index, a.length, mask, ncomponents)); + + m_reg_arrays.push_back(array); + + sfn_log << SfnLog::reg << "Add array at "<< current_index + << " of size " << a.length << " with " << a.ncomponents + << " components, mask " << mask << "\n"; + + m_ssa_register_map[a.index | 0x80000000] = current_index + instance; + + for (unsigned i = 0; i < a.ncomponents; ++i) + m_registers[((current_index + instance) << 3) + i] = array; + + VRec next_reg = {current_index + instance, mask, mask}; + m_register_map[current_index + instance] = next_reg; + + ncomponents += a.ncomponents; + ++instance; + } +} + +void ValuePool::allocate_local_register(const nir_register& reg) +{ + int index = m_next_register_index++; + m_ssa_register_map[reg.index | 0x80000000] = index; + allocate_with_mask(index, 0xf, true); + + /* Create actual register and map it */; + for (int i = 0; i < 4; ++i) { + int k = (index << 3) + i; + m_registers[k] = std::make_shared<GPRValue>(index, i); + } +} + +void ValuePool::allocate_local_register(const nir_register& reg, array_list& arrays) +{ + sfn_log << SfnLog::reg << "ValuePool: Allocate local register " << reg.index + << " as " << m_next_register_index << "\n"; + + if (reg.num_array_elems) { + array_entry ae = {reg.index, reg.num_array_elems, reg.num_components}; + arrays.push(ae); + } + else + allocate_local_register(reg); +} + +bool ValuePool::create_undef(nir_ssa_undef_instr* instr) +{ + m_ssa_undef.insert(instr->def.index); + return true; +} + +int ValuePool::allocate_with_mask(unsigned index, unsigned mask, bool pre_alloc) +{ + int retval; + VRec next_register = { index, mask }; + + sfn_log << SfnLog::reg << (pre_alloc ? "Pre-alloc" : "Allocate") + << " register (" << index << ", " << mask << ")\n"; + retval = index; + auto r = m_register_map.find(index); + + if (r != m_register_map.end()) { + if ((r->second.mask & next_register.mask) && + !(r->second.pre_alloc_mask & next_register.mask)) { + std::cerr << "r600 ERR: register (" + << index << ", " << mask + << ") already allocated as (" << r->second.index << ", " + << r->second.mask << ", " << r->second.pre_alloc_mask + << ") \n"; + retval = -1; + } else { + r->second.mask |= next_register.mask; + if (pre_alloc) + r->second.pre_alloc_mask |= next_register.mask; + retval = r->second.index; + } + } else { + if (pre_alloc) + next_register.pre_alloc_mask = mask; + m_register_map[index] = next_register; + retval = next_register.index; + } + + sfn_log << SfnLog::reg << "Allocate register (" << index << "," << mask << ") in R" + << retval << "\n"; + + return retval; +} + +PValue ValuePool::literal(uint32_t value) +{ + auto l = m_literals.find(value); + if (l != m_literals.end()) + return l->second; + + m_literals[value] = PValue(new LiteralValue(value)); + return m_literals[value]; +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.h new file mode 100644 index 000000000..fa1e5507f --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.h @@ -0,0 +1,242 @@ +/* -*- mesa-c++ -*- + * + * Copyright (c) 2018 Collabora LTD + * + * Author: Gert Wollny <gert.wollny@collabora.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef SFN_VALUEPOOL_H +#define SFN_VALUEPOOL_H + +#include "sfn_value.h" +#include "sfn_value_gpr.h" + +#include <set> +#include <queue> + +namespace r600 { + +using LiteralBuffer = std::map<unsigned, const nir_load_const_instr *>; + +class ValueMap { +public: + void insert(const PValue& v) { + auto idx = index_from(v->sel(), v->chan()); + m_map[idx] = v; + } + PValue get_or_inject(uint32_t index, uint32_t chan) { + auto idx = index_from(index, chan); + auto v = m_map.find(idx); + if (v == m_map.end()) { + insert(PValue(new GPRValue(index, chan))); + v = m_map.find(idx); + } + return v->second; + } + std::map<uint32_t, PValue>::const_iterator begin() const {return m_map.begin();} + std::map<uint32_t, PValue>::const_iterator end() const {return m_map.end();} + +private: + uint32_t index_from(uint32_t index, uint32_t chan) { + return (index << 3) + chan; + } + std::map<uint32_t, PValue> m_map; +}; + +/** \brief Class to keep track of registers, uniforms, and literals + * This class holds the references to the uniforms and the literals + * and is responsible for allocating the registers. + */ +class ValuePool +{ +public: + + struct array_entry { + unsigned index; + unsigned length; + unsigned ncomponents; + + bool operator ()(const array_entry& a, const array_entry& b) const { + return a.length < b.length || (a.length == b.length && a.ncomponents > b.ncomponents); + } + }; + + using array_list = std::priority_queue<array_entry, std::vector<array_entry>, + array_entry>; + + ValuePool(); + + + GPRVector vec_from_nir(const nir_dest& dst, int num_components); + + std::vector<PValue> varvec_from_nir(const nir_dest& src, int num_components); + std::vector<PValue> varvec_from_nir(const nir_src& src, int num_components); + + PValue from_nir(const nir_src& v, unsigned component, unsigned swizzled); + + PValue from_nir(const nir_src& v, unsigned component); + /** Get a register that is used as source register in an ALU instruction + * The PValue holds one componet as specified. If the register refers to + * a GPR it must already have been allocated, uniforms and literals on + * the other hand might be pre-loaded. + */ + PValue from_nir(const nir_alu_src& v, unsigned component); + + /** Get a register that is used as source register in an Texture instruction + * The PValue holds one componet as specified. + */ + PValue from_nir(const nir_tex_src& v, unsigned component); + + /** Allocate a register that is used as destination register in an ALU + * instruction. The PValue holds one componet as specified. + */ + PValue from_nir(const nir_alu_dest& v, unsigned component); + + /** Allocate a register that is used as destination register in any + * instruction. The PValue holds one componet as specified. + */ + PValue from_nir(const nir_dest& v, unsigned component); + + + /** Inject a register into a given ssa index position + * This is used to redirect loads from system values and vertex attributes + * that are already loaded into registers */ + bool inject_register(unsigned sel, unsigned swizzle, const PValue ®, bool map); + + /** Reserve space for a local register */ + void allocate_local_register(const nir_register& reg); + void allocate_local_register(const nir_register ®, array_list& arrays); + + void allocate_arrays(array_list& arrays); + + + void increment_reserved_registers() { + ++m_next_register_index; + } + + void set_reserved_registers(unsigned rr) { + m_next_register_index =rr; + } + + /** Reserve a undef register, currently it uses (0,7), + * \todo should be eliminated in the final pass + */ + bool create_undef(nir_ssa_undef_instr* instr); + + /** Create a new register with the given index and store it in the + * lookup map + */ + PValue create_register_from_nir_src(const nir_src& sel, int comp); + + ValueMap get_temp_registers() const; + + PValue lookup_register(unsigned sel, unsigned swizzle, bool required); + + size_t register_count() const {return m_next_register_index;} + + PValue literal(uint32_t value); + + PGPRValue get_temp_register(int channel = -1); + + GPRVector get_temp_vec4(const GPRVector::Swizzle &swizzle = {0,1,2,3}); + +protected: + std::vector<PGPRArray> m_reg_arrays; + +private: + + /** Get the register index mapped from the NIR code to the r600 ir + * \param index NIR index of register + * \returns r600 ir inxex + */ + int lookup_register_index(const nir_src& src) const; + + /** Get the register index mapped from the NIR code to the r600 ir + * \param index NIR index of register + * \returns r600 ir inxex + */ + int lookup_register_index(const nir_dest& dst); + + /** Allocate a register that is is needed for lowering an instruction + * that requires complex calculations, + */ + int allocate_temp_register(); + + + PValue create_register(unsigned index, unsigned swizzle); + + unsigned get_dst_ssa_register_index(const nir_ssa_def& ssa); + + unsigned get_ssa_register_index(const nir_ssa_def& ssa) const; + + unsigned get_local_register_index(const nir_register& reg); + + unsigned get_local_register_index(const nir_register& reg) const; + + void allocate_ssa_register(const nir_ssa_def& ssa); + + void allocate_array(const nir_register& reg); + + + /** Allocate a register index with the given component mask. + * If one of the components is already been allocated the function + * will signal an error bz returning -1, otherwise a register index is + * returned. + */ + int allocate_with_mask(unsigned index, unsigned mask, bool pre_alloc); + + /** search for a new register with the given index in the + * lookup map. + * \param sel register sel value + * \param swizzle register component, can also be 4,5, and 7 + * \param required true: in debug mode assert when register doesn't exist + * false: return nullptr on failure + */ + + std::set<unsigned> m_ssa_undef; + + std::map<unsigned, unsigned> m_ssa_register_map; + + std::map<unsigned, PValue> m_registers; + + static PValue m_undef; + + struct VRec { + unsigned index; + unsigned mask; + unsigned pre_alloc_mask; + }; + std::map<unsigned, VRec> m_register_map; + + unsigned m_next_register_index; + + + std::map<uint32_t, PValue> m_literals; + + int current_temp_reg_index; + int next_temp_reg_comp; +}; + +} + +#endif // SFN_VALUEPOOL_H diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp new file mode 100644 index 000000000..ff49216a9 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp @@ -0,0 +1,535 @@ +#include "sfn_vertexstageexport.h" + +#include "sfn_shaderio.h" + +namespace r600 { + +using std::priority_queue; + +VertexStageExportBase::VertexStageExportBase(VertexStage& proc): + m_proc(proc), + m_cur_clip_pos(1) +{ + +} + +VertexStageExportBase::~VertexStageExportBase() +{ + +} + +bool VertexStageExportBase::do_process_outputs(nir_variable *output) +{ + return true; +} + +void VertexStageExportBase::emit_shader_start() +{ + +} + +void VertexStageExportBase::scan_store_output(nir_intrinsic_instr* instr) +{ + +} + +bool VertexStageExportBase::store_output(nir_intrinsic_instr* instr) +{ + auto index = nir_src_as_const_value(instr->src[1]); + assert(index && "Indirect outputs not supported"); + + const store_loc store_info = { + nir_intrinsic_component(instr), + nir_intrinsic_io_semantics(instr).location, + (unsigned)nir_intrinsic_base(instr) + index->u32, + 0 + }; + + return do_store_output(store_info, instr); +} + +VertexStageExportForFS::VertexStageExportForFS(VertexStage& proc, + const pipe_stream_output_info *so_info, + r600_pipe_shader *pipe_shader, const r600_shader_key &key): + VertexStageWithOutputInfo(proc), + m_last_param_export(nullptr), + m_last_pos_export(nullptr), + m_num_clip_dist(0), + m_enabled_stream_buffers_mask(0), + m_so_info(so_info), + m_pipe_shader(pipe_shader), + m_key(key) +{ +} + +bool VertexStageWithOutputInfo::do_process_outputs(nir_variable *output) +{ + if (output->data.location == VARYING_SLOT_COL0 || + output->data.location == VARYING_SLOT_COL1 || + (output->data.location >= VARYING_SLOT_VAR0 && + output->data.location <= VARYING_SLOT_VAR31) || + (output->data.location >= VARYING_SLOT_TEX0 && + output->data.location <= VARYING_SLOT_TEX7) || + output->data.location == VARYING_SLOT_BFC0 || + output->data.location == VARYING_SLOT_BFC1 || + output->data.location == VARYING_SLOT_CLIP_VERTEX || + output->data.location == VARYING_SLOT_CLIP_DIST0 || + output->data.location == VARYING_SLOT_CLIP_DIST1 || + output->data.location == VARYING_SLOT_POS || + output->data.location == VARYING_SLOT_PSIZ || + output->data.location == VARYING_SLOT_FOGC || + output->data.location == VARYING_SLOT_LAYER || + output->data.location == VARYING_SLOT_EDGE || + output->data.location == VARYING_SLOT_VIEWPORT + ) { + + r600_shader_io& io = m_proc.sh_info().output[output->data.driver_location]; + auto semantic = r600_get_varying_semantic(output->data.location); + io.name = semantic.first; + io.sid = semantic.second; + + m_proc.evaluate_spi_sid(io); + io.write_mask = ((1 << glsl_get_components(output->type)) - 1) + << output->data.location_frac; + ++m_proc.sh_info().noutput; + + if (output->data.location == VARYING_SLOT_PSIZ || + output->data.location == VARYING_SLOT_EDGE || + output->data.location == VARYING_SLOT_LAYER) // VIEWPORT? + m_cur_clip_pos = 2; + + if (output->data.location != VARYING_SLOT_POS && + output->data.location != VARYING_SLOT_EDGE && + output->data.location != VARYING_SLOT_PSIZ && + output->data.location != VARYING_SLOT_CLIP_VERTEX) + m_param_driver_locations.push(output->data.driver_location); + + return true; + } + return false; +} + +bool VertexStageExportForFS::do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) +{ + switch (store_info.location) { + case VARYING_SLOT_PSIZ: + m_proc.sh_info().vs_out_point_size = 1; + m_proc.sh_info().vs_out_misc_write = 1; + FALLTHROUGH; + case VARYING_SLOT_POS: + return emit_varying_pos(store_info, instr); + case VARYING_SLOT_EDGE: { + std::array<uint32_t, 4> swizzle_override = {7 ,0, 7, 7}; + return emit_varying_pos(store_info, instr, &swizzle_override); + } + case VARYING_SLOT_VIEWPORT: { + std::array<uint32_t, 4> swizzle_override = {7, 7, 7, 0}; + return emit_varying_pos(store_info, instr, &swizzle_override) && + emit_varying_param(store_info, instr); + } + case VARYING_SLOT_CLIP_VERTEX: + return emit_clip_vertices(store_info, instr); + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: + m_num_clip_dist += 4; + return emit_varying_param(store_info, instr) && emit_varying_pos(store_info, instr); + case VARYING_SLOT_LAYER: { + m_proc.sh_info().vs_out_misc_write = 1; + m_proc.sh_info().vs_out_layer = 1; + std::array<uint32_t, 4> swz = {7,7,0,7}; + return emit_varying_pos(store_info, instr, &swz) && + emit_varying_param(store_info, instr); + } + case VARYING_SLOT_VIEW_INDEX: + return emit_varying_pos(store_info, instr) && + emit_varying_param(store_info, instr); + + default: + return emit_varying_param(store_info, instr); + } + + fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n", + store_info.location); + return false; +} + +bool VertexStageExportForFS::emit_varying_pos(const store_loc &store_info, nir_intrinsic_instr* instr, + std::array<uint32_t, 4> *swizzle_override) +{ + std::array<uint32_t,4> swizzle; + uint32_t write_mask = 0; + + if (swizzle_override) { + swizzle = *swizzle_override; + for (int i = 0; i < 4; ++i) { + if (swizzle[i] < 6) + write_mask |= 1 << i; + } + } else { + write_mask = nir_intrinsic_write_mask(instr) << store_info.frac; + for (int i = 0; i < 4; ++i) + swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7; + } + + m_proc.sh_info().output[store_info.driver_location].write_mask = write_mask; + + GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], write_mask, swizzle); + m_proc.set_output(store_info.driver_location, value.sel()); + + int export_slot = 0; + + switch (store_info.location) { + case VARYING_SLOT_EDGE: { + m_proc.sh_info().vs_out_misc_write = 1; + m_proc.sh_info().vs_out_edgeflag = 1; + m_proc.emit_instruction(op1_mov, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr}); + m_proc.emit_instruction(op1_flt_to_int, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_last_instr}); + m_proc.sh_info().output[store_info.driver_location].write_mask = 0xf; + } + FALLTHROUGH; + case VARYING_SLOT_PSIZ: + case VARYING_SLOT_LAYER: + export_slot = 1; + break; + case VARYING_SLOT_VIEWPORT: + m_proc.sh_info().vs_out_misc_write = 1; + m_proc.sh_info().vs_out_viewport = 1; + export_slot = 1; + break; + case VARYING_SLOT_POS: + break; + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: + export_slot = m_cur_clip_pos++; + break; + default: + sfn_log << SfnLog::err << __func__ << "Unsupported location " + << store_info.location << "\n"; + return false; + } + + m_last_pos_export = new ExportInstruction(export_slot, value, ExportInstruction::et_pos); + m_proc.emit_export_instruction(m_last_pos_export); + m_proc.add_param_output_reg(store_info.driver_location, m_last_pos_export->gpr_ptr()); + return true; +} + +bool VertexStageExportForFS::emit_varying_param(const store_loc &store_info, nir_intrinsic_instr* instr) +{ + assert(store_info.driver_location < m_proc.sh_info().noutput); + sfn_log << SfnLog::io << __func__ << ": emit DDL: " << store_info.driver_location << "\n"; + + int write_mask = nir_intrinsic_write_mask(instr) << store_info.frac; + std::array<uint32_t,4> swizzle; + for (int i = 0; i < 4; ++i) + swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7; + + //m_proc.sh_info().output[store_info.driver_location].write_mask = write_mask; + + GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], write_mask, swizzle, true); + m_proc.sh_info().output[store_info.driver_location].gpr = value.sel(); + + /* This should use the registers!! */ + m_proc.set_output(store_info.driver_location, value.sel()); + + m_last_param_export = new ExportInstruction(param_id(store_info.driver_location), + value, ExportInstruction::et_param); + m_proc.emit_export_instruction(m_last_param_export); + m_proc.add_param_output_reg(store_info.driver_location, m_last_param_export->gpr_ptr()); + return true; +} + +bool VertexStageExportForFS::emit_clip_vertices(const store_loc &store_info, nir_intrinsic_instr* instr) +{ + m_proc.sh_info().cc_dist_mask = 0xff; + m_proc.sh_info().clip_dist_write = 0xff; + + m_clip_vertex = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], 0xf, {0,1,2,3}); + m_proc.add_param_output_reg(store_info.driver_location, &m_clip_vertex); + + for (int i = 0; i < 4; ++i) + m_proc.sh_info().output[store_info.driver_location].write_mask |= 1 << i; + + GPRVector clip_dist[2] = { m_proc.get_temp_vec4(), m_proc.get_temp_vec4()}; + + for (int i = 0; i < 8; i++) { + int oreg = i >> 2; + int ochan = i & 3; + AluInstruction *ir = nullptr; + for (int j = 0; j < 4; j++) { + ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), m_clip_vertex.reg_i(j), + PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)), + (j == ochan) ? EmitInstruction::write : EmitInstruction::empty); + m_proc.emit_instruction(ir); + } + ir->set_flag(alu_last_instr); + } + + m_last_pos_export = new ExportInstruction(m_cur_clip_pos++, clip_dist[0], ExportInstruction::et_pos); + m_proc.emit_export_instruction(m_last_pos_export); + + m_last_pos_export = new ExportInstruction(m_cur_clip_pos, clip_dist[1], ExportInstruction::et_pos); + m_proc.emit_export_instruction(m_last_pos_export); + + return true; +} + +VertexStageWithOutputInfo::VertexStageWithOutputInfo(VertexStage& proc): + VertexStageExportBase(proc), + m_current_param(0) +{ + +} + +void VertexStageWithOutputInfo::scan_store_output(nir_intrinsic_instr* instr) +{ + auto location = nir_intrinsic_io_semantics(instr).location; + auto driver_location = nir_intrinsic_base(instr); + auto index = nir_src_as_const_value(instr->src[1]); + assert(index); + + unsigned noutputs = driver_location + index->u32 + 1; + if (m_proc.sh_info().noutput < noutputs) + m_proc.sh_info().noutput = noutputs; + + r600_shader_io& io = m_proc.sh_info().output[driver_location + index->u32]; + auto semantic = r600_get_varying_semantic(location + index->u32); + io.name = semantic.first; + io.sid = semantic.second; + m_proc.evaluate_spi_sid(io); + io.write_mask = nir_intrinsic_write_mask(instr); + + if (location == VARYING_SLOT_PSIZ || + location == VARYING_SLOT_EDGE || + location == VARYING_SLOT_LAYER) // VIEWPORT? + m_cur_clip_pos = 2; + + if (location != VARYING_SLOT_POS && + location != VARYING_SLOT_EDGE && + location != VARYING_SLOT_PSIZ && + location != VARYING_SLOT_CLIP_VERTEX) { + m_param_driver_locations.push(driver_location + index->u32); + } +} + +unsigned VertexStageWithOutputInfo::param_id(unsigned driver_location) +{ + auto param_loc = m_param_map.find(driver_location); + assert(param_loc != m_param_map.end()); + return param_loc->second; +} + +void VertexStageWithOutputInfo::emit_shader_start() +{ + while (!m_param_driver_locations.empty()) { + auto loc = m_param_driver_locations.top(); + m_param_driver_locations.pop(); + m_param_map[loc] = m_current_param++; + } +} + +unsigned VertexStageWithOutputInfo::current_param() const +{ + return m_current_param; +} + +void VertexStageExportForFS::finalize_exports() +{ + if (m_key.vs.as_gs_a) { + PValue o(new GPRValue(0,PIPE_SWIZZLE_0)); + GPRVector primid({m_proc.primitive_id(), o,o,o}); + m_last_param_export = new ExportInstruction(current_param(), primid, ExportInstruction::et_param); + m_proc.emit_export_instruction(m_last_param_export); + int i; + i = m_proc.sh_info().noutput++; + auto& io = m_proc.sh_info().output[i]; + io.name = TGSI_SEMANTIC_PRIMID; + io.sid = 0; + io.gpr = 0; + io.interpolate = TGSI_INTERPOLATE_CONSTANT; + io.write_mask = 0x1; + io.spi_sid = m_key.vs.prim_id_out; + m_proc.sh_info().vs_as_gs_a = 1; + } + + if (m_so_info && m_so_info->num_outputs) + emit_stream(-1); + + m_pipe_shader->enabled_stream_buffers_mask = m_enabled_stream_buffers_mask; + + if (!m_last_param_export) { + GPRVector value(0,{7,7,7,7}); + m_last_param_export = new ExportInstruction(0, value, ExportInstruction::et_param); + m_proc.emit_export_instruction(m_last_param_export); + } + m_last_param_export->set_last(); + + if (!m_last_pos_export) { + GPRVector value(0,{7,7,7,7}); + m_last_pos_export = new ExportInstruction(0, value, ExportInstruction::et_pos); + m_proc.emit_export_instruction(m_last_pos_export); + } + m_last_pos_export->set_last(); +} + +bool VertexStageExportForFS::emit_stream(int stream) +{ + assert(m_so_info); + if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) { + R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs); + return false; + } + for (unsigned i = 0; i < m_so_info->num_outputs; i++) { + if (m_so_info->output[i].output_buffer >= 4) { + R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", + m_so_info->output[i].output_buffer); + return false; + } + } + const GPRVector *so_gpr[PIPE_MAX_SHADER_OUTPUTS]; + unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS]; + std::vector<GPRVector> tmp(m_so_info->num_outputs); + + /* Initialize locations where the outputs are stored. */ + for (unsigned i = 0; i < m_so_info->num_outputs; i++) { + if (stream != -1 && stream != m_so_info->output[i].stream) + continue; + + sfn_log << SfnLog::instr << "Emit stream " << i + << " with register index " << m_so_info->output[i].register_index << " so_gpr:"; + + + so_gpr[i] = m_proc.output_register(m_so_info->output[i].register_index); + + if (!so_gpr[i]) { + sfn_log << SfnLog::err << "\nERR: register index " + << m_so_info->output[i].register_index + << " doesn't correspond to an output register\n"; + return false; + } + start_comp[i] = m_so_info->output[i].start_component; + /* Lower outputs with dst_offset < start_component. + * + * We can only output 4D vectors with a write mask, e.g. we can + * only output the W component at offset 3, etc. If we want + * to store Y, Z, or W at buffer offset 0, we need to use MOV + * to move it to X and output X. */ + if (m_so_info->output[i].dst_offset < m_so_info->output[i].start_component) { + + GPRVector::Swizzle swizzle = {0,1,2,3}; + for (auto j = m_so_info->output[i].num_components; j < 4; ++j) + swizzle[j] = 7; + tmp[i] = m_proc.get_temp_vec4(swizzle); + + int sc = m_so_info->output[i].start_component; + AluInstruction *alu = nullptr; + for (int j = 0; j < m_so_info->output[i].num_components; j++) { + alu = new AluInstruction(op1_mov, tmp[i][j], so_gpr[i]->reg_i(j + sc), {alu_write}); + m_proc.emit_instruction(alu); + } + if (alu) + alu->set_flag(alu_last_instr); + + start_comp[i] = 0; + so_gpr[i] = &tmp[i]; + } + sfn_log << SfnLog::instr << *so_gpr[i] << "\n"; + } + + /* Write outputs to buffers. */ + for (unsigned i = 0; i < m_so_info->num_outputs; i++) { + sfn_log << SfnLog::instr << "Write output buffer " << i + << " with register index " << m_so_info->output[i].register_index << "\n"; + + StreamOutIntruction *out_stream = + new StreamOutIntruction(*so_gpr[i], + m_so_info->output[i].num_components, + m_so_info->output[i].dst_offset - start_comp[i], + ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i], + m_so_info->output[i].output_buffer, + m_so_info->output[i].stream); + m_proc.emit_export_instruction(out_stream); + m_enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4; + } + return true; +} + + +VertexStageExportForGS::VertexStageExportForGS(VertexStage &proc, + const r600_shader *gs_shader): + VertexStageWithOutputInfo(proc), + m_num_clip_dist(0), + m_gs_shader(gs_shader) +{ + +} + +bool VertexStageExportForGS::do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) +{ + int ring_offset = -1; + const r600_shader_io& out_io = m_proc.sh_info().output[store_info.driver_location]; + + sfn_log << SfnLog::io << "check output " << store_info.driver_location + << " name=" << out_io.name<< " sid=" << out_io.sid << "\n"; + for (unsigned k = 0; k < m_gs_shader->ninput; ++k) { + auto& in_io = m_gs_shader->input[k]; + sfn_log << SfnLog::io << " against " << k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n"; + + if (in_io.name == out_io.name && + in_io.sid == out_io.sid) { + ring_offset = in_io.ring_offset; + break; + } + } + + if (store_info.location == VARYING_SLOT_VIEWPORT) { + m_proc.sh_info().vs_out_viewport = 1; + m_proc.sh_info().vs_out_misc_write = 1; + return true; + } + + if (ring_offset == -1) { + sfn_log << SfnLog::err << "VS defines output at " + << store_info.driver_location << "name=" << out_io.name + << " sid=" << out_io.sid << " that is not consumed as GS input\n"; + return true; + } + + uint32_t write_mask = (1 << instr->num_components) - 1; + + GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], write_mask, + swizzle_from_comps(instr->num_components), true); + + auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, value, + ring_offset >> 2, 4, PValue()); + m_proc.emit_export_instruction(ir); + + m_proc.sh_info().output[store_info.driver_location].write_mask |= write_mask; + if (store_info.location == VARYING_SLOT_CLIP_DIST0 || + store_info.location == VARYING_SLOT_CLIP_DIST1) + m_num_clip_dist += 4; + + return true; +} + +void VertexStageExportForGS::finalize_exports() +{ + +} + +VertexStageExportForES::VertexStageExportForES(VertexStage& proc): + VertexStageExportBase(proc) +{ +} + +bool VertexStageExportForES::do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) +{ + return true; +} + +void VertexStageExportForES::finalize_exports() +{ + +} + +} diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h new file mode 100644 index 000000000..46aee8071 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h @@ -0,0 +1,116 @@ +#ifndef VERTEXSTAGEEXPORT_H +#define VERTEXSTAGEEXPORT_H + +#include "sfn_shader_base.h" +#include <queue> + +namespace r600 { + +class VertexStage : public ShaderFromNirProcessor { +public: + using ShaderFromNirProcessor::ShaderFromNirProcessor; + + virtual PValue primitive_id() = 0; +}; + +class VertexStageExportBase +{ +public: + VertexStageExportBase(VertexStage& proc); + virtual ~VertexStageExportBase(); + virtual void finalize_exports() = 0; + virtual bool do_process_outputs(nir_variable *output); + + virtual void emit_shader_start(); + + virtual void scan_store_output(nir_intrinsic_instr* instr); + bool store_output(nir_intrinsic_instr* instr); +protected: + + struct store_loc { + unsigned frac; + unsigned location; + unsigned driver_location; + int data_loc; + }; + virtual bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) = 0; + + VertexStage& m_proc; + int m_cur_clip_pos; + GPRVector m_clip_vertex; +}; + + +class VertexStageWithOutputInfo: public VertexStageExportBase +{ +protected: + VertexStageWithOutputInfo(VertexStage& proc); + void scan_store_output(nir_intrinsic_instr* instr) override; + void emit_shader_start() override; + bool do_process_outputs(nir_variable *output) override; +protected: + unsigned param_id(unsigned driver_location); + unsigned current_param() const; +private: + std::priority_queue<unsigned, std::vector<unsigned>, std::greater<unsigned> > m_param_driver_locations; + std::map<unsigned, unsigned> m_param_map; + unsigned m_current_param; +}; + + +class VertexStageExportForFS : public VertexStageWithOutputInfo +{ +public: + VertexStageExportForFS(VertexStage& proc, + const pipe_stream_output_info *so_info, + r600_pipe_shader *pipe_shader, + const r600_shader_key& key); + + void finalize_exports() override; +private: + bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) override; + + bool emit_varying_param(const store_loc& store_info, nir_intrinsic_instr* instr); + bool emit_varying_pos(const store_loc& store_info, nir_intrinsic_instr* instr, + std::array<uint32_t, 4> *swizzle_override = nullptr); + bool emit_clip_vertices(const store_loc &store_info, nir_intrinsic_instr* instr); + bool emit_stream(int stream); + + ExportInstruction *m_last_param_export; + ExportInstruction *m_last_pos_export; + + int m_num_clip_dist; + int m_enabled_stream_buffers_mask; + const pipe_stream_output_info *m_so_info; + r600_pipe_shader *m_pipe_shader; + const r600_shader_key& m_key; + + +}; + +class VertexStageExportForGS : public VertexStageWithOutputInfo +{ +public: + VertexStageExportForGS(VertexStage& proc, + const r600_shader *gs_shader); + void finalize_exports() override; + +private: + bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) override; + unsigned m_num_clip_dist; + const r600_shader *m_gs_shader; +}; + +class VertexStageExportForES : public VertexStageExportBase +{ +public: + VertexStageExportForES(VertexStage& proc); + void finalize_exports() override; +private: + bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) override; +}; + + +} + +#endif // VERTEXSTAGEEXPORT_H |