summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/drivers/r600
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2021-07-22 10:50:50 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2021-07-22 10:50:50 +0000
commit9130ec005fbc78a62420643414d8354d0929ca50 (patch)
tree6762777acdd2d4eee17ef87290e80dc7afe2b73d /lib/mesa/src/gallium/drivers/r600
parentca11beabae33eb59fb981b8adf50b1d47a2a98f0 (diff)
Merge Mesa 21.1.5
Diffstat (limited to 'lib/mesa/src/gallium/drivers/r600')
-rw-r--r--lib/mesa/src/gallium/drivers/r600/Makefile.sources78
-rw-r--r--lib/mesa/src/gallium/drivers/r600/eg_asm.c2
-rw-r--r--lib/mesa/src/gallium/drivers/r600/eg_debug.c10
-rw-r--r--lib/mesa/src/gallium/drivers/r600/evergreen_compute.c71
-rw-r--r--lib/mesa/src/gallium/drivers/r600/evergreen_hw_context.c4
-rw-r--r--lib/mesa/src/gallium/drivers/r600/evergreen_state.c151
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_asm.c29
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_asm.h6
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_blit.c3
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_hw_context.c26
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_pipe.c72
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_pipe.h10
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_shader.c260
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_shader.h10
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_state.c107
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_state_common.c211
-rw-r--r--lib/mesa/src/gallium/drivers/r600/r600_uvd.c11
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sb/sb_bc.h41
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sb/sb_expr.cpp34
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sb/sb_if_conversion.cpp4
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h12
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sb/sb_ra_init.cpp37
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sb/sb_sched.cpp5
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/.editorconfig2
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp325
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.h377
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.cpp139
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.h47
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.cpp195
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h69
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.cpp139
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.h121
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_defines.h318
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_docu.txt45
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp985
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h115
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp164
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h101
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp644
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h57
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp671
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h96
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp183
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h144
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp187
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.h155
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp57
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.h82
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp195
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h142
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp341
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.h185
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp480
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h187
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp180
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h225
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp151
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h82
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp68
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.h69
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp414
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h143
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp1450
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h45
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.cpp1006
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.h314
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.cpp1076
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.h161
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.cpp462
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.h38
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp575
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_vectorize_vs_inputs.c466
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp1179
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.h224
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp112
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.h62
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp1085
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h117
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.cpp343
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.h81
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp108
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.h33
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.cpp123
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.h39
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp230
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h83
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.cpp448
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.h176
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.cpp242
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.h194
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp380
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.h208
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.cpp526
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.h242
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp535
-rw-r--r--lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h116
96 files changed, 21571 insertions, 382 deletions
diff --git a/lib/mesa/src/gallium/drivers/r600/Makefile.sources b/lib/mesa/src/gallium/drivers/r600/Makefile.sources
index 763a49a07..52563b257 100644
--- a/lib/mesa/src/gallium/drivers/r600/Makefile.sources
+++ b/lib/mesa/src/gallium/drivers/r600/Makefile.sources
@@ -87,7 +87,81 @@ CXX_SOURCES = \
sb/sb_shader.cpp \
sb/sb_shader.h \
sb/sb_ssa_builder.cpp \
- sb/sb_valtable.cpp
+ sb/sb_valtable.cpp \
+ sfn/sfn_alu_defines.cpp \
+ sfn/sfn_alu_defines.h \
+ sfn/sfn_callstack.cpp \
+ sfn/sfn_callstack.h \
+ sfn/sfn_conditionaljumptracker.cpp \
+ sfn/sfn_conditionaljumptracker.h \
+ sfn/sfn_defines.h \
+ sfn/sfn_debug.cpp \
+ sfn/sfn_debug.h \
+ sfn/sfn_emitaluinstruction.cpp \
+ sfn/sfn_emitaluinstruction.h \
+ sfn/sfn_emitinstruction.cpp \
+ sfn/sfn_emitinstruction.h \
+ sfn/sfn_emitssboinstruction.cpp \
+ sfn/sfn_emitssboinstruction.h \
+ sfn/sfn_emittexinstruction.cpp \
+ sfn/sfn_emittexinstruction.h \
+ sfn/sfn_emitinstruction.h \
+ sfn/sfn_instruction_alu.cpp \
+ sfn/sfn_instruction_alu.h \
+ sfn/sfn_instruction_base.cpp \
+ sfn/sfn_instruction_base.h \
+ sfn/sfn_instruction_block.cpp \
+ sfn/sfn_instruction_block.h \
+ sfn/sfn_instruction_cf.cpp \
+ sfn/sfn_instruction_cf.h \
+ sfn/sfn_instruction_export.cpp \
+ sfn/sfn_instruction_export.h \
+ sfn/sfn_instruction_fetch.cpp \
+ sfn/sfn_instruction_fetch.h \
+ sfn/sfn_instruction_lds.cpp \
+ sfn/sfn_instruction_lds.h \
+ sfn/sfn_instruction_gds.cpp \
+ sfn/sfn_instruction_gds.h \
+ sfn/sfn_instruction_misc.cpp \
+ sfn/sfn_instruction_misc.h \
+ sfn/sfn_instruction_tex.cpp \
+ sfn/sfn_instruction_tex.h \
+ sfn/sfn_ir_to_assembly.cpp \
+ sfn/sfn_ir_to_assembly.h \
+ sfn/sfn_liverange.cpp \
+ sfn/sfn_liverange.h \
+ sfn/sfn_nir.cpp \
+ sfn/sfn_nir.h \
+ sfn/sfn_nir_lower_64bit.cpp \
+ sfn/sfn_nir_lower_fs_out_to_vector.cpp \
+ sfn/sfn_nir_lower_fs_out_to_vector.h \
+ sfn/sfn_nir_lower_tess_io.cpp \
+ sfn/sfn_nir_vectorize_vs_inputs.c \
+ sfn/sfn_shader_base.cpp \
+ sfn/sfn_shader_base.h \
+ sfn/sfn_shader_compute.cpp \
+ sfn/sfn_shader_compute.h \
+ sfn/sfn_shader_fragment.cpp \
+ sfn/sfn_shader_fragment.h \
+ sfn/sfn_shader_geometry.cpp \
+ sfn/sfn_shader_geometry.h \
+ sfn/sfn_shader_tcs.cpp \
+ sfn/sfn_shader_tcs.h \
+ sfn/sfn_shader_tess_eval.cpp \
+ sfn/sfn_shader_tess_eval.h \
+ sfn/sfn_shader_vertex.cpp \
+ sfn/sfn_shader_vertex.h \
+ sfn/sfn_shaderio.cpp \
+ sfn/sfn_shaderio.h \
+ sfn/sfn_value.cpp \
+ sfn/sfn_value.h \
+ sfn/sfn_value_gpr.cpp \
+ sfn/sfn_value_gpr.h \
+ sfn/sfn_valuepool.cpp \
+ sfn/sfn_valuepool.h \
+ sfn/sfn_vertexstageexport.cpp \
+ sfn/sfn_vertexstageexport.h
R600_GENERATED_FILES = \
- egd_tables.h \ No newline at end of file
+ egd_tables.h \
+ sfn_nir_algebraic.c
diff --git a/lib/mesa/src/gallium/drivers/r600/eg_asm.c b/lib/mesa/src/gallium/drivers/r600/eg_asm.c
index acf3fd374..9468e4b01 100644
--- a/lib/mesa/src/gallium/drivers/r600/eg_asm.c
+++ b/lib/mesa/src/gallium/drivers/r600/eg_asm.c
@@ -189,7 +189,7 @@ int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_c
memset(&alu, 0, sizeof(alu));
alu.op = ALU_OP1_MOVA_INT;
alu.src[0].sel = bc->index_reg[id];
- alu.src[0].chan = 0;
+ alu.src[0].chan = bc->index_reg_chan[id];
if (bc->chip_class == CAYMAN)
alu.dst.sel = id == 0 ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1;
diff --git a/lib/mesa/src/gallium/drivers/r600/eg_debug.c b/lib/mesa/src/gallium/drivers/r600/eg_debug.c
index 56195df29..853b61044 100644
--- a/lib/mesa/src/gallium/drivers/r600/eg_debug.c
+++ b/lib/mesa/src/gallium/drivers/r600/eg_debug.c
@@ -256,7 +256,7 @@ static uint32_t *ac_parse_packet3(FILE *f, uint32_t *ib, int *num_dw,
COLOR_RESET "\n");
break;
}
- /* fall through, print all dwords */
+ FALLTHROUGH; /* print all dwords */
default:
for (i = 0; i < count+1; i++) {
print_spaces(f, INDENT_PKT);
@@ -305,7 +305,7 @@ static void eg_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id,
num_dw--;
break;
}
- /* fall through */
+ FALLTHROUGH;
default:
fprintf(f, "Unknown packet type %i\n", type);
return;
@@ -332,10 +332,10 @@ static void eg_dump_last_ib(struct r600_context *rctx, FILE *f)
* waited for the context, so this buffer should be idle.
* If the GPU is hung, there is no point in waiting for it.
*/
- uint32_t *map = rctx->b.ws->buffer_map(rctx->last_trace_buf->buf,
+ uint32_t *map = rctx->b.ws->buffer_map(rctx->b.ws, rctx->last_trace_buf->buf,
NULL,
- PIPE_TRANSFER_UNSYNCHRONIZED |
- PIPE_TRANSFER_READ);
+ PIPE_MAP_UNSYNCHRONIZED |
+ PIPE_MAP_READ);
if (map)
last_trace_id = *map;
}
diff --git a/lib/mesa/src/gallium/drivers/r600/evergreen_compute.c b/lib/mesa/src/gallium/drivers/r600/evergreen_compute.c
index 419738eec..0602a54dc 100644
--- a/lib/mesa/src/gallium/drivers/r600/evergreen_compute.c
+++ b/lib/mesa/src/gallium/drivers/r600/evergreen_compute.c
@@ -193,7 +193,7 @@ static void evergreen_cs_set_constant_buffer(struct r600_context *rctx,
cb.buffer = buffer;
cb.user_buffer = NULL;
- rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_COMPUTE, cb_index, &cb);
+ rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_COMPUTE, cb_index, false, &cb);
}
/* We need to define these R600 registers here, because we can't include
@@ -441,8 +441,9 @@ static void *evergreen_create_compute_state(struct pipe_context *ctx,
shader->ir_type = cso->ir_type;
- if (shader->ir_type == PIPE_SHADER_IR_TGSI) {
- shader->sel = r600_create_shader_state_tokens(ctx, cso->prog, PIPE_SHADER_COMPUTE);
+ if (shader->ir_type == PIPE_SHADER_IR_TGSI ||
+ shader->ir_type == PIPE_SHADER_IR_NIR) {
+ shader->sel = r600_create_shader_state_tokens(ctx, cso->prog, cso->ir_type, PIPE_SHADER_COMPUTE);
return shader;
}
#ifdef HAVE_OPENCL
@@ -457,10 +458,10 @@ static void *evergreen_create_compute_state(struct pipe_context *ctx,
shader->bc.ndw * 4);
p = r600_buffer_map_sync_with_rings(
&rctx->b, shader->code_bo,
- PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
+ PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
//TODO: use util_memcpy_cpu_to_le32 ?
memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4);
- rctx->b.ws->buffer_unmap(shader->code_bo->buf);
+ rctx->b.ws->buffer_unmap(rctx->b.ws, shader->code_bo->buf);
#endif
return shader;
@@ -476,7 +477,8 @@ static void evergreen_delete_compute_state(struct pipe_context *ctx, void *state
if (!shader)
return;
- if (shader->ir_type == PIPE_SHADER_IR_TGSI) {
+ if (shader->ir_type == PIPE_SHADER_IR_TGSI ||
+ shader->ir_type == PIPE_SHADER_IR_NIR) {
r600_delete_shader_selector(ctx, shader->sel);
} else {
#ifdef HAVE_OPENCL
@@ -500,12 +502,14 @@ static void evergreen_bind_compute_state(struct pipe_context *ctx, void *state)
return;
}
- if (cstate->ir_type == PIPE_SHADER_IR_TGSI) {
+ if (cstate->ir_type == PIPE_SHADER_IR_TGSI ||
+ cstate->ir_type == PIPE_SHADER_IR_NIR) {
bool compute_dirty;
-
- r600_shader_select(ctx, cstate->sel, &compute_dirty);
+ cstate->sel->ir_type = cstate->ir_type;
+ if (r600_shader_select(ctx, cstate->sel, &compute_dirty))
+ R600_ERR("Failed to select compute shader\n");
}
-
+
rctx->cs_shader_state.shader = (struct r600_pipe_compute *)state;
}
@@ -553,7 +557,7 @@ static void evergreen_compute_upload_input(struct pipe_context *ctx,
u_box_1d(0, input_size, &box);
num_work_groups_start = ctx->transfer_map(ctx,
(struct pipe_resource*)shader->kernel_param,
- 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE,
+ 0, PIPE_MAP_WRITE | PIPE_MAP_DISCARD_RANGE,
&box, &transfer);
global_size_start = num_work_groups_start + (3 * (sizeof(uint) /4));
local_size_start = global_size_start + (3 * (sizeof(uint)) / 4);
@@ -594,7 +598,7 @@ static void evergreen_emit_dispatch(struct r600_context *rctx,
uint32_t indirect_grid[3])
{
int i;
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_pipe_compute *shader = rctx->cs_shader_state.shader;
bool render_cond_bit = rctx->b.render_cond && !rctx->b.render_cond_force_off;
unsigned num_waves;
@@ -604,9 +608,10 @@ static void evergreen_emit_dispatch(struct r600_context *rctx,
int grid_size = 1;
unsigned lds_size = shader->local_size / 4;
- if (shader->ir_type != PIPE_SHADER_IR_TGSI)
+ if (shader->ir_type != PIPE_SHADER_IR_TGSI &&
+ shader->ir_type != PIPE_SHADER_IR_NIR)
lds_size += shader->bc.nlds_dw;
-
+
/* Calculate group_size/grid_size */
for (i = 0; i < 3; i++) {
group_size *= info->block[i];
@@ -673,7 +678,7 @@ static void evergreen_emit_dispatch(struct r600_context *rctx,
static void compute_setup_cbs(struct r600_context *rctx)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
unsigned i;
/* Emit colorbuffers. */
@@ -715,7 +720,7 @@ static void compute_setup_cbs(struct r600_context *rctx)
static void compute_emit_cs(struct r600_context *rctx,
const struct pipe_grid_info *info)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
bool compute_dirty = false;
struct r600_pipe_shader *current;
struct r600_shader_atomic combined_atomics[8];
@@ -723,7 +728,7 @@ static void compute_emit_cs(struct r600_context *rctx,
uint32_t indirect_grid[3] = { 0, 0, 0 };
/* make sure that the gfx ring is only one active */
- if (radeon_emitted(rctx->b.dma.cs, 0)) {
+ if (radeon_emitted(&rctx->b.dma.cs, 0)) {
rctx->b.dma.flush(rctx, PIPE_FLUSH_ASYNC, NULL);
}
@@ -734,8 +739,13 @@ static void compute_emit_cs(struct r600_context *rctx,
rctx->cmd_buf_is_compute = true;
}
- if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI) {
- r600_shader_select(&rctx->b.b, rctx->cs_shader_state.shader->sel, &compute_dirty);
+ if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI||
+ rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_NIR) {
+ if (r600_shader_select(&rctx->b.b, rctx->cs_shader_state.shader->sel, &compute_dirty)) {
+ R600_ERR("Failed to select compute shader\n");
+ return;
+ }
+
current = rctx->cs_shader_state.shader->sel->current;
if (compute_dirty) {
rctx->cs_shader_state.atom.num_dw = current->command_buffer.num_dw;
@@ -748,7 +758,7 @@ static void compute_emit_cs(struct r600_context *rctx,
if (info->indirect) {
struct r600_resource *indirect_resource = (struct r600_resource *)info->indirect;
- unsigned *data = r600_buffer_map_sync_with_rings(&rctx->b, indirect_resource, PIPE_TRANSFER_READ);
+ unsigned *data = r600_buffer_map_sync_with_rings(&rctx->b, indirect_resource, PIPE_MAP_READ);
unsigned offset = info->indirect_offset / 4;
indirect_grid[0] = data[offset];
indirect_grid[1] = data[offset + 1];
@@ -786,7 +796,8 @@ static void compute_emit_cs(struct r600_context *rctx,
/* emit config state */
if (rctx->b.chip_class == EVERGREEN) {
- if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI) {
+ if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI||
+ rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_NIR) {
radeon_set_config_reg_seq(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, 3);
radeon_emit(cs, S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs));
radeon_emit(cs, 0);
@@ -799,7 +810,8 @@ static void compute_emit_cs(struct r600_context *rctx,
rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
r600_flush_emit(rctx);
- if (rctx->cs_shader_state.shader->ir_type != PIPE_SHADER_IR_TGSI) {
+ if (rctx->cs_shader_state.shader->ir_type != PIPE_SHADER_IR_TGSI &&
+ rctx->cs_shader_state.shader->ir_type != PIPE_SHADER_IR_NIR) {
compute_setup_cbs(rctx);
@@ -855,7 +867,8 @@ static void compute_emit_cs(struct r600_context *rctx,
radeon_emit(cs, PKT3C(PKT3_DEALLOC_STATE, 0, 0));
radeon_emit(cs, 0);
}
- if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI)
+ if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI ||
+ rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_NIR)
evergreen_emit_atomic_buffer_save(rctx, true, combined_atomics, &atomic_used_mask);
#if 0
@@ -877,12 +890,13 @@ void evergreen_emit_cs_shader(struct r600_context *rctx,
struct r600_cs_shader_state *state =
(struct r600_cs_shader_state*)atom;
struct r600_pipe_compute *shader = state->shader;
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
uint64_t va;
struct r600_resource *code_bo;
unsigned ngpr, nstack;
- if (shader->ir_type == PIPE_SHADER_IR_TGSI) {
+ if (shader->ir_type == PIPE_SHADER_IR_TGSI ||
+ shader->ir_type == PIPE_SHADER_IR_NIR) {
code_bo = shader->sel->current->bo;
va = shader->sel->current->bo->gpu_address;
ngpr = shader->sel->current->shader.bc.ngpr;
@@ -916,7 +930,8 @@ static void evergreen_launch_grid(struct pipe_context *ctx,
struct r600_pipe_compute *shader = rctx->cs_shader_state.shader;
boolean use_kill;
- if (shader->ir_type != PIPE_SHADER_IR_TGSI) {
+ if (shader->ir_type != PIPE_SHADER_IR_TGSI &&
+ shader->ir_type != PIPE_SHADER_IR_NIR) {
rctx->cs_shader_state.pc = info->pc;
/* Get the config information for this kernel. */
r600_shader_binary_read_config(&shader->binary, &shader->bc,
@@ -1243,7 +1258,7 @@ static void *r600_compute_global_transfer_map(struct pipe_context *ctx,
dst = (struct pipe_resource*)item->real_buffer;
- if (usage & PIPE_TRANSFER_READ)
+ if (usage & PIPE_MAP_READ)
buffer->chunk->status |= ITEM_MAPPED_FOR_READING;
COMPUTE_DBG(rctx->screen, "* r600_compute_global_transfer_map()\n"
@@ -1273,7 +1288,7 @@ static void r600_compute_global_transfer_unmap(struct pipe_context *ctx,
* to an offset within the compute memory pool. The function
* r600_compute_global_transfer_map() maps the memory pool
* resource rather than the struct r600_resource_global passed to
- * it as an argument and then initalizes ptransfer->resource with
+ * it as an argument and then initializes ptransfer->resource with
* the memory pool resource (via pipe_buffer_map_range).
* When transfer_unmap is called it uses the memory pool's
* vtable which calls r600_buffer_transfer_map() rather than
diff --git a/lib/mesa/src/gallium/drivers/r600/evergreen_hw_context.c b/lib/mesa/src/gallium/drivers/r600/evergreen_hw_context.c
index da8553886..54bd19fbc 100644
--- a/lib/mesa/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/lib/mesa/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -35,7 +35,7 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx,
uint64_t src_offset,
uint64_t size)
{
- struct radeon_cmdbuf *cs = rctx->b.dma.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.dma.cs;
unsigned i, ncopy, csize, sub_cmd, shift;
struct r600_resource *rdst = (struct r600_resource*)dst;
struct r600_resource *rsrc = (struct r600_resource*)src;
@@ -85,7 +85,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
unsigned size, uint32_t clear_value,
enum r600_coherency coher)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
assert(size);
assert(rctx->screen->b.has_cp_dma);
diff --git a/lib/mesa/src/gallium/drivers/r600/evergreen_state.c b/lib/mesa/src/gallium/drivers/r600/evergreen_state.c
index 9c103c590..f76b1e331 100644
--- a/lib/mesa/src/gallium/drivers/r600/evergreen_state.c
+++ b/lib/mesa/src/gallium/drivers/r600/evergreen_state.c
@@ -427,11 +427,11 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
dsa->valuemask[1] = state->stencil[1].valuemask;
dsa->writemask[0] = state->stencil[0].writemask;
dsa->writemask[1] = state->stencil[1].writemask;
- dsa->zwritemask = state->depth.writemask;
+ dsa->zwritemask = state->depth_writemask;
- db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
- S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
- S_028800_ZFUNC(state->depth.func);
+ db_depth_control = S_028800_Z_ENABLE(state->depth_enabled) |
+ S_028800_Z_WRITE_ENABLE(state->depth_writemask) |
+ S_028800_ZFUNC(state->depth_func);
/* stencil */
if (state->stencil[0].enabled) {
@@ -453,10 +453,10 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
/* alpha */
alpha_test_control = 0;
alpha_ref = 0;
- if (state->alpha.enabled) {
- alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func);
+ if (state->alpha_enabled) {
+ alpha_test_control = S_028410_ALPHA_FUNC(state->alpha_func);
alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1);
- alpha_ref = fui(state->alpha.ref_value);
+ alpha_ref = fui(state->alpha_ref_value);
}
dsa->sx_alpha_test_control = alpha_test_control & 0xff;
dsa->alpha_ref = alpha_ref;
@@ -514,15 +514,13 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
}
spi_interp = S_0286D4_FLAT_SHADE_ENA(1);
- if (state->sprite_coord_enable) {
- spi_interp |= S_0286D4_PNT_SPRITE_ENA(1) |
- S_0286D4_PNT_SPRITE_OVRD_X(2) |
- S_0286D4_PNT_SPRITE_OVRD_Y(3) |
- S_0286D4_PNT_SPRITE_OVRD_Z(0) |
- S_0286D4_PNT_SPRITE_OVRD_W(1);
- if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
- spi_interp |= S_0286D4_PNT_SPRITE_TOP_1(1);
- }
+ spi_interp |= S_0286D4_PNT_SPRITE_ENA(1) |
+ S_0286D4_PNT_SPRITE_OVRD_X(2) |
+ S_0286D4_PNT_SPRITE_OVRD_Y(3) |
+ S_0286D4_PNT_SPRITE_OVRD_Z(0) |
+ S_0286D4_PNT_SPRITE_OVRD_W(1);
+ if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
+ spi_interp |= S_0286D4_PNT_SPRITE_TOP_1(1);
}
r600_store_context_reg_seq(&rs->buffer, R_028A00_PA_SU_POINT_SIZE, 3);
@@ -576,6 +574,8 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso
: state->max_anisotropy;
unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso);
+ bool trunc_coord = state->min_img_filter == PIPE_TEX_FILTER_NEAREST &&
+ state->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
float max_lod = state->max_lod;
if (!ss) {
@@ -610,6 +610,7 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
ss->tex_sampler_words[2] =
S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
(state->seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) |
+ S_03C008_TRUNCATE_COORD(trunc_coord) |
S_03C008_TYPE(1);
if (ss->border_color_use) {
@@ -755,7 +756,7 @@ static int evergreen_fill_tex_resource_words(struct r600_context *rctx,
case PIPE_FORMAT_X32_S8X24_UINT:
params->pipe_format = PIPE_FORMAT_S8_UINT;
tile_split = tmp->surface.u.legacy.stencil_tile_split;
- surflevel = tmp->surface.u.legacy.stencil_level;
+ surflevel = tmp->surface.u.legacy.zs.stencil_level;
break;
default:;
}
@@ -846,7 +847,7 @@ static int evergreen_fill_tex_resource_words(struct r600_context *rctx,
tex_resource_words[1] = (S_030004_TEX_HEIGHT(height - 1) |
S_030004_TEX_DEPTH(depth - 1) |
S_030004_ARRAY_MODE(array_mode));
- tex_resource_words[2] = (surflevel[base_level].offset + va) >> 8;
+ tex_resource_words[2] = ((uint64_t)surflevel[base_level].offset_256B * 256 + va) >> 8;
*skip_mip_address_reloc = false;
/* TEX_RESOURCE_WORD3.MIP_ADDRESS */
@@ -860,9 +861,9 @@ static int evergreen_fill_tex_resource_words(struct r600_context *rctx,
tex_resource_words[3] = (tmp->fmask.offset + va) >> 8;
}
} else if (last_level && texture->nr_samples <= 1) {
- tex_resource_words[3] = (surflevel[1].offset + va) >> 8;
+ tex_resource_words[3] = ((uint64_t)surflevel[1].offset_256B * 256 + va) >> 8;
} else {
- tex_resource_words[3] = (surflevel[base_level].offset + va) >> 8;
+ tex_resource_words[3] = ((uint64_t)surflevel[base_level].offset_256B * 256 + va) >> 8;
}
last_layer = params->last_layer;
@@ -974,7 +975,7 @@ evergreen_create_sampler_view(struct pipe_context *ctx,
static void evergreen_emit_config_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_config_state *a = (struct r600_config_state*)atom;
radeon_set_config_reg_seq(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, 3);
@@ -1001,7 +1002,7 @@ static void evergreen_emit_config_state(struct r600_context *rctx, struct r600_a
static void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct pipe_clip_state *state = &rctx->clip_state.state;
radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP0_X, 6*4);
@@ -1123,7 +1124,7 @@ static void evergreen_set_color_surface_common(struct r600_context *rctx,
bool blend_clamp = 0, blend_bypass = 0, do_endian_swap = FALSE;
int i;
- color->offset = rtex->surface.u.legacy.level[level].offset;
+ color->offset = (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256;
color->view = S_028C6C_SLICE_START(first_layer) |
S_028C6C_SLICE_MAX(last_layer);
@@ -1251,7 +1252,7 @@ static void evergreen_set_color_surface_common(struct r600_context *rctx,
color->info |= S_028C70_COMPRESSION(1);
}
- /* EXPORT_NORM is an optimzation that can be enabled for better
+ /* EXPORT_NORM is an optimization that can be enabled for better
* performance in certain cases.
* EXPORT_NORM can be enabled if:
* - 11-bit or smaller UNORM/SNORM/SRGB
@@ -1281,7 +1282,7 @@ static void evergreen_set_color_surface_common(struct r600_context *rctx,
}
/**
- * This function intializes the CB* register values for RATs. It is meant
+ * This function initializes the CB* register values for RATs. It is meant
* to be used for 1D aligned buffers that do not have an associated
* radeon_surf.
*/
@@ -1360,7 +1361,7 @@ static void evergreen_init_depth_surface(struct r600_context *rctx,
assert(format != ~0);
offset = rtex->resource.gpu_address;
- offset += rtex->surface.u.legacy.level[level].offset;
+ offset += (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256;
switch (rtex->surface.u.legacy.level[level].mode) {
case RADEON_SURF_MODE_2D:
@@ -1410,7 +1411,7 @@ static void evergreen_init_depth_surface(struct r600_context *rctx,
stile_split = eg_tile_split(stile_split);
- stencil_offset = rtex->surface.u.legacy.stencil_level[level].offset;
+ stencil_offset = (uint64_t)rtex->surface.u.legacy.zs.stencil_level[level].offset_256B * 256;
stencil_offset += rtex->resource.gpu_address;
surf->db_stencil_base = stencil_offset >> 8;
@@ -1657,7 +1658,7 @@ static void evergreen_get_sample_position(struct pipe_context *ctx,
static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples, int ps_iter_samples)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
unsigned max_dist = 0;
switch (nr_samples) {
@@ -1706,7 +1707,7 @@ static void evergreen_emit_image_state(struct r600_context *rctx, struct r600_at
{
struct r600_image_state *state = (struct r600_image_state *)atom;
struct pipe_framebuffer_state *fb_state = &rctx->framebuffer.state;
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_texture *rtex;
struct r600_resource *resource;
int i;
@@ -1833,7 +1834,7 @@ static void evergreen_emit_compute_buffer_state(struct r600_context *rctx, struc
static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct pipe_framebuffer_state *state = &rctx->framebuffer.state;
unsigned nr_cbufs = state->nr_cbufs;
unsigned i, tl, br;
@@ -1972,7 +1973,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a;
float offset_units = state->offset_units;
float offset_scale = state->offset_scale;
@@ -2030,7 +2031,7 @@ uint32_t evergreen_construct_rat_mask(struct r600_context *rctx, struct r600_cb_
static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
unsigned fb_colormask = a->bound_cbufs_target_mask;
unsigned ps_colormask = a->ps_color_export_mask;
@@ -2045,7 +2046,7 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_
static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_db_state *a = (struct r600_db_state*)atom;
if (a->rsurf && a->rsurf->db_htile_surface) {
@@ -2068,7 +2069,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom
static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom;
unsigned db_render_control = 0;
unsigned db_count_control = 0;
@@ -2123,7 +2124,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
unsigned resource_offset,
unsigned pkt_flags)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
uint32_t dirty_mask = state->dirty_mask;
while (dirty_mask) {
@@ -2182,7 +2183,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
unsigned reg_alu_const_cache,
unsigned pkt_flags)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
uint32_t dirty_mask = state->dirty_mask;
while (dirty_mask) {
@@ -2334,7 +2335,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
struct r600_samplerview_state *state,
unsigned resource_id_base, unsigned pkt_flags)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
uint32_t dirty_mask = state->dirty_mask;
while (dirty_mask) {
@@ -2443,7 +2444,7 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx,
unsigned border_index_reg,
unsigned pkt_flags)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
uint32_t dirty_mask = texinfo->states.dirty_mask;
union pipe_color_union border_color = {{0,0,0,1}};
union pipe_color_union *border_color_ptr = &border_color;
@@ -2527,14 +2528,14 @@ static void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_at
struct r600_sample_mask *s = (struct r600_sample_mask*)a;
uint8_t mask = s->sample_mask;
- radeon_set_context_reg(rctx->b.gfx.cs, R_028C3C_PA_SC_AA_MASK,
+ radeon_set_context_reg(&rctx->b.gfx.cs, R_028C3C_PA_SC_AA_MASK,
mask | (mask << 8) | (mask << 16) | (mask << 24));
}
static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a)
{
struct r600_sample_mask *s = (struct r600_sample_mask*)a;
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
uint16_t mask = s->sample_mask;
radeon_set_context_reg_seq(cs, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
@@ -2544,7 +2545,7 @@ static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom
static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_cso_state *state = (struct r600_cso_state*)a;
struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
@@ -2561,7 +2562,7 @@ static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct
static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a;
uint32_t v = 0, v2 = 0, primid = 0, tf_param = 0;
@@ -2665,7 +2666,7 @@ static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_
static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a;
struct r600_resource *rbuffer;
@@ -3389,8 +3390,9 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
tmp |= S_028644_FLAT_SHADE(1);
}
- if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
- (sprite_coord_enable & (1 << rshader->input[i].sid))) {
+ if (rshader->input[i].name == TGSI_SEMANTIC_PCOORD ||
+ (rshader->input[i].name == TGSI_SEMANTIC_TEXCOORD &&
+ (sprite_coord_enable & (1 << rshader->input[i].sid)))) {
tmp |= S_028644_PT_SPRITE_TEX(1);
}
@@ -3712,7 +3714,7 @@ void *evergreen_create_fastclear_blend(struct r600_context *rctx)
void *evergreen_create_db_flush_dsa(struct r600_context *rctx)
{
- struct pipe_depth_stencil_alpha_state dsa = {{0}};
+ struct pipe_depth_stencil_alpha_state dsa = {{{0}}};
return rctx->b.b.create_depth_stencil_alpha_state(&rctx->b.b, &dsa);
}
@@ -3774,7 +3776,7 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
unsigned pitch,
unsigned bpp)
{
- struct radeon_cmdbuf *cs = rctx->b.dma.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.dma.cs;
struct r600_texture *rsrc = (struct r600_texture*)src;
struct r600_texture *rdst = (struct r600_texture*)dst;
unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size;
@@ -3811,8 +3813,8 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
x = src_x;
y = src_y;
z = src_z;
- base = rsrc->surface.u.legacy.level[src_level].offset;
- addr = rdst->surface.u.legacy.level[dst_level].offset;
+ base = (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256;
+ addr = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256;
addr += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z;
addr += dst_y * pitch + dst_x * bpp;
bank_h = eg_bank_wh(rsrc->surface.u.legacy.bankh);
@@ -3836,8 +3838,8 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
x = dst_x;
y = dst_y;
z = dst_z;
- base = rdst->surface.u.legacy.level[dst_level].offset;
- addr = rsrc->surface.u.legacy.level[src_level].offset;
+ base = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256;
+ addr = (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256;
addr += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_z;
addr += src_y * pitch + src_x * bpp;
bank_h = eg_bank_wh(rdst->surface.u.legacy.bankh);
@@ -3896,7 +3898,7 @@ static void evergreen_dma_copy(struct pipe_context *ctx,
unsigned src_x, src_y;
unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
- if (rctx->b.dma.cs == NULL) {
+ if (rctx->b.dma.cs.priv == NULL) {
goto fallback;
}
@@ -3959,10 +3961,10 @@ static void evergreen_dma_copy(struct pipe_context *ctx,
* dst_x/y == 0
* dst_pitch == src_pitch
*/
- src_offset= rsrc->surface.u.legacy.level[src_level].offset;
+ src_offset= (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256;
src_offset += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_box->z;
src_offset += src_y * src_pitch + src_x * bpp;
- dst_offset = rdst->surface.u.legacy.level[dst_level].offset;
+ dst_offset = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256;
dst_offset += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z;
dst_offset += dst_y * dst_pitch + dst_x * bpp;
evergreen_dma_copy_buffer(rctx, dst, src, dst_offset, src_offset,
@@ -4148,7 +4150,7 @@ static void evergreen_set_shader_buffers(struct pipe_context *ctx,
static void evergreen_set_shader_images(struct pipe_context *ctx,
enum pipe_shader_type shader, unsigned start_slot,
- unsigned count,
+ unsigned count, unsigned unbind_num_trailing_slots,
const struct pipe_image_view *images)
{
struct r600_context *rctx = (struct r600_context *)ctx;
@@ -4162,7 +4164,9 @@ static void evergreen_set_shader_images(struct pipe_context *ctx,
unsigned old_mask;
struct r600_image_state *istate = NULL;
int idx;
- if (shader != PIPE_SHADER_FRAGMENT && shader != PIPE_SHADER_COMPUTE && count == 0)
+ if (shader != PIPE_SHADER_FRAGMENT && shader != PIPE_SHADER_COMPUTE)
+ return;
+ if (!count && !unbind_num_trailing_slots)
return;
if (shader == PIPE_SHADER_FRAGMENT)
@@ -4305,6 +4309,16 @@ static void evergreen_set_shader_images(struct pipe_context *ctx,
istate->enabled_mask |= (1 << i);
}
+ for (i = start_slot + count, idx = 0;
+ i < start_slot + count + unbind_num_trailing_slots; i++, idx++) {
+ rview = &istate->views[i];
+
+ pipe_resource_reference((struct pipe_resource **)&rview->base.resource, NULL);
+ istate->enabled_mask &= ~(1 << i);
+ istate->compressed_colortex_mask &= ~(1 << i);
+ istate->compressed_depthtex_mask &= ~(1 << i);
+ }
+
istate->atom.num_dw = util_bitcount(istate->enabled_mask) * 46;
istate->dirty_buffer_constants = TRUE;
rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
@@ -4523,11 +4537,11 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe
if (!rctx->tes_shader) {
rctx->lds_alloc = 0;
rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
- R600_LDS_INFO_CONST_BUFFER, NULL);
+ R600_LDS_INFO_CONST_BUFFER, false, NULL);
rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL,
- R600_LDS_INFO_CONST_BUFFER, NULL);
+ R600_LDS_INFO_CONST_BUFFER, false, NULL);
rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL,
- R600_LDS_INFO_CONST_BUFFER, NULL);
+ R600_LDS_INFO_CONST_BUFFER, false, NULL);
return;
}
@@ -4587,12 +4601,11 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe
constbuf.buffer_size = 8 * 4;
rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
- R600_LDS_INFO_CONST_BUFFER, &constbuf);
+ R600_LDS_INFO_CONST_BUFFER, false, &constbuf);
rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL,
- R600_LDS_INFO_CONST_BUFFER, &constbuf);
+ R600_LDS_INFO_CONST_BUFFER, false, &constbuf);
rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL,
- R600_LDS_INFO_CONST_BUFFER, &constbuf);
- pipe_resource_reference(&constbuf.buffer, NULL);
+ R600_LDS_INFO_CONST_BUFFER, true, &constbuf);
}
uint32_t evergreen_get_ls_hs_config(struct r600_context *rctx,
@@ -4750,7 +4763,7 @@ bool evergreen_adjust_gprs(struct r600_context *rctx)
void eg_trace_emit(struct r600_context *rctx)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
unsigned reloc;
if (rctx->b.chip_class < EVERGREEN)
@@ -4780,7 +4793,7 @@ static void evergreen_emit_set_append_cnt(struct r600_context *rctx,
struct r600_resource *resource,
uint32_t pkt_flags)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
resource,
RADEON_USAGE_READ,
@@ -4803,7 +4816,7 @@ static void evergreen_emit_event_write_eos(struct r600_context *rctx,
struct r600_resource *resource,
uint32_t pkt_flags)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
uint32_t event = EVENT_TYPE_PS_DONE;
uint32_t base_reg_0 = R_02872C_GDS_APPEND_COUNT_0;
uint32_t reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
@@ -4830,7 +4843,7 @@ static void cayman_emit_event_write_eos(struct r600_context *rctx,
struct r600_resource *resource,
uint32_t pkt_flags)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
uint32_t event = EVENT_TYPE_PS_DONE;
uint32_t reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
resource,
@@ -4856,7 +4869,7 @@ static void cayman_write_count_to_gds(struct r600_context *rctx,
struct r600_resource *resource,
uint32_t pkt_flags)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
resource,
RADEON_USAGE_READ,
@@ -4951,7 +4964,7 @@ void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
struct r600_shader_atomic *combined_atomics,
uint8_t *atomic_used_mask_p)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
uint32_t pkt_flags = 0;
uint32_t event = EVENT_TYPE_PS_DONE;
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_asm.c b/lib/mesa/src/gallium/drivers/r600/r600_asm.c
index 6affa3d3a..6a9690f69 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_asm.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_asm.c
@@ -362,7 +362,7 @@ static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *
}
assignment[4] = alu;
} else {
- if (assignment[chan]) {
+ if (assignment[chan]) {
assert(0); /* ALU.chan has already been allocated. */
return -1;
}
@@ -686,7 +686,7 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
return 0;
}
-void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg, unsigned abs)
+void r600_bytecode_special_constants(uint32_t value, unsigned *sel)
{
switch(value) {
case 0:
@@ -704,14 +704,6 @@ void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *ne
case 0x3F000000: /* 0.5f */
*sel = V_SQ_ALU_SRC_0_5;
break;
- case 0xBF800000: /* -1.0f */
- *sel = V_SQ_ALU_SRC_1;
- *neg ^= !abs;
- break;
- case 0xBF000000: /* -0.5f */
- *sel = V_SQ_ALU_SRC_0_5;
- *neg ^= !abs;
- break;
default:
*sel = V_SQ_ALU_SRC_LITERAL;
break;
@@ -1232,7 +1224,7 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
/* Load index register if required */
if (bc->chip_class >= EVERGREEN) {
for (i = 0; i < 3; i++)
- if (nalu->src[i].kc_bank && nalu->src[i].kc_rel)
+ if (nalu->src[i].kc_bank && nalu->src[i].kc_rel)
egcm_load_index_reg(bc, 0, true);
}
@@ -1261,7 +1253,7 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
}
if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
r600_bytecode_special_constants(nalu->src[i].value,
- &nalu->src[i].sel, &nalu->src[i].neg, nalu->src[i].abs);
+ &nalu->src[i].sel);
}
if (nalu->dst.sel >= bc->ngpr) {
bc->ngpr = nalu->dst.sel + 1;
@@ -1450,7 +1442,9 @@ int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_t
bc->cf_last->op == CF_OP_TEX) {
struct r600_bytecode_tex *ttex;
LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) {
- if (ttex->dst_gpr == ntex->src_gpr) {
+ if (ttex->dst_gpr == ntex->src_gpr &&
+ (ttex->dst_sel_x < 4 || ttex->dst_sel_y < 4 ||
+ ttex->dst_sel_z < 4 || ttex->dst_sel_w < 4)) {
bc->force_add_cf = 1;
break;
}
@@ -2638,7 +2632,8 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
uint32_t *bytecode;
int i, j, r, fs_size;
struct r600_fetch_shader *shader;
- unsigned no_sb = rctx->screen->b.debug_flags & DBG_NO_SB;
+ unsigned no_sb = rctx->screen->b.debug_flags & DBG_NO_SB ||
+ (rctx->screen->b.debug_flags & DBG_NIR);
unsigned sb_disasm = !no_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
assert(count < 32);
@@ -2763,7 +2758,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
return NULL;
}
- u_suballocator_alloc(rctx->allocator_fetch_shader, fs_size, 256,
+ u_suballocator_alloc(&rctx->allocator_fetch_shader, fs_size, 256,
&shader->offset,
(struct pipe_resource**)&shader->buffer);
if (!shader->buffer) {
@@ -2774,7 +2769,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
bytecode = r600_buffer_map_sync_with_rings
(&rctx->b, shader->buffer,
- PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED | RADEON_TRANSFER_TEMPORARY);
+ PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED | RADEON_MAP_TEMPORARY);
bytecode += shader->offset / 4;
if (R600_BIG_ENDIAN) {
@@ -2784,7 +2779,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
} else {
memcpy(bytecode, bc.bytecode, fs_size);
}
- rctx->b.ws->buffer_unmap(shader->buffer->buf);
+ rctx->b.ws->buffer_unmap(rctx->b.ws, shader->buffer->buf);
r600_bytecode_clear(&bc);
return shader;
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_asm.h b/lib/mesa/src/gallium/drivers/r600/r600_asm.h
index 71a3ae1ba..a526993b3 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_asm.h
+++ b/lib/mesa/src/gallium/drivers/r600/r600_asm.h
@@ -214,6 +214,8 @@ struct r600_bytecode_cf {
struct r600_bytecode_alu *prev_bs_head;
struct r600_bytecode_alu *prev2_bs_head;
unsigned isa[2];
+ unsigned nlds_read;
+ unsigned nqueue_read;
};
#define FC_NONE 0
@@ -276,6 +278,7 @@ struct r600_bytecode {
unsigned r6xx_nop_after_rel_dst;
bool index_loaded[2];
unsigned index_reg[2]; /* indexing register CF_INDEX_[01] */
+ unsigned index_reg_chan[2]; /* indexing register chanel CF_INDEX_[01] */
unsigned debug_id;
struct r600_isa* isa;
struct r600_bytecode_output pending_outputs[5];
@@ -318,8 +321,7 @@ int r600_bytecode_add_cfinst(struct r600_bytecode *bc,
unsigned op);
int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
const struct r600_bytecode_alu *alu, unsigned type);
-void r600_bytecode_special_constants(uint32_t value,
- unsigned *sel, unsigned *neg, unsigned abs);
+void r600_bytecode_special_constants(uint32_t value, unsigned *sel);
void r600_bytecode_disasm(struct r600_bytecode *bc);
void r600_bytecode_alu_read(struct r600_bytecode *bc,
struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_blit.c b/lib/mesa/src/gallium/drivers/r600/r600_blit.c
index 606b3892e..b8924f826 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_blit.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_blit.c
@@ -463,6 +463,7 @@ static bool r600_decompress_subresource(struct pipe_context *ctx,
}
static void r600_clear(struct pipe_context *ctx, unsigned buffers,
+ const struct pipe_scissor_state *scissor_state,
const union pipe_color_union *color,
double depth, unsigned stencil)
{
@@ -660,7 +661,7 @@ static void r600_clear_buffer(struct pipe_context *ctx, struct pipe_resource *ds
r600_blitter_end(ctx);
} else {
uint32_t *map = r600_buffer_map_sync_with_rings(&rctx->b, r600_resource(dst),
- PIPE_TRANSFER_WRITE);
+ PIPE_MAP_WRITE);
map += offset / 4;
size /= 4;
for (unsigned i = 0; i < size; i++)
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_hw_context.c b/lib/mesa/src/gallium/drivers/r600/r600_hw_context.c
index 494b7ed69..de032c6dc 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_hw_context.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_hw_context.c
@@ -34,17 +34,17 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
boolean count_draw_in, unsigned num_atomics)
{
/* Flush the DMA IB if it's not empty. */
- if (radeon_emitted(ctx->b.dma.cs, 0))
+ if (radeon_emitted(&ctx->b.dma.cs, 0))
ctx->b.dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL);
- if (!radeon_cs_memory_below_limit(ctx->b.screen, ctx->b.gfx.cs,
+ if (!radeon_cs_memory_below_limit(ctx->b.screen, &ctx->b.gfx.cs,
ctx->b.vram, ctx->b.gtt)) {
ctx->b.gtt = 0;
ctx->b.vram = 0;
ctx->b.gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL);
return;
}
- /* all will be accounted once relocation are emited */
+ /* all will be accounted once relocation are emitted */
ctx->b.gtt = 0;
ctx->b.vram = 0;
@@ -84,14 +84,14 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
num_dw += 10;
/* Flush if there's not enough space. */
- if (!ctx->b.ws->cs_check_space(ctx->b.gfx.cs, num_dw, false)) {
+ if (!ctx->b.ws->cs_check_space(&ctx->b.gfx.cs, num_dw, false)) {
ctx->b.gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL);
}
}
void r600_flush_emit(struct r600_context *rctx)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
unsigned cp_coher_cntl = 0;
unsigned wait_until = 0;
@@ -260,7 +260,7 @@ void r600_context_gfx_flush(void *context, unsigned flags,
struct pipe_fence_handle **fence)
{
struct r600_context *ctx = context;
- struct radeon_cmdbuf *cs = ctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &ctx->b.gfx.cs;
struct radeon_winsys *ws = ctx->b.ws;
if (!radeon_emitted(cs, ctx->b.initial_gfx_cs_size))
@@ -345,7 +345,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
ctx->b.vram = 0;
/* Begin a new CS. */
- r600_emit_command_buffer(ctx->b.gfx.cs, &ctx->start_cs_cmd);
+ r600_emit_command_buffer(&ctx->b.gfx.cs, &ctx->start_cs_cmd);
/* Re-emit states. */
r600_mark_atom_dirty(ctx, &ctx->alphatest_state.atom);
@@ -430,13 +430,13 @@ void r600_begin_new_cs(struct r600_context *ctx)
ctx->last_rast_prim = -1;
ctx->current_rast_prim = -1;
- assert(!ctx->b.gfx.cs->prev_dw);
- ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->current.cdw;
+ assert(!ctx->b.gfx.cs.prev_dw);
+ ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs.current.cdw;
}
void r600_emit_pfp_sync_me(struct r600_context *rctx)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
if (rctx->b.chip_class >= EVERGREEN &&
rctx->b.screen->info.drm_minor >= 46) {
@@ -451,7 +451,7 @@ void r600_emit_pfp_sync_me(struct r600_context *rctx)
uint64_t va;
/* 16-byte address alignment is required by WAIT_REG_MEM. */
- u_suballocator_alloc(rctx->b.allocator_zeroed_memory, 4, 16,
+ u_suballocator_alloc(&rctx->b.allocator_zeroed_memory, 4, 16,
&offset, (struct pipe_resource**)&buf);
if (!buf) {
/* This is too heavyweight, but will work. */
@@ -502,7 +502,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
struct pipe_resource *src, uint64_t src_offset,
unsigned size)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
assert(size);
assert(rctx->screen->b.has_cp_dma);
@@ -584,7 +584,7 @@ void r600_dma_copy_buffer(struct r600_context *rctx,
uint64_t src_offset,
uint64_t size)
{
- struct radeon_cmdbuf *cs = rctx->b.dma.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.dma.cs;
unsigned i, ncopy, csize;
struct r600_resource *rdst = (struct r600_resource*)dst;
struct r600_resource *rsrc = (struct r600_resource*)src;
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_pipe.c b/lib/mesa/src/gallium/drivers/r600/r600_pipe.c
index f26da31d2..9e11c7442 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_pipe.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_pipe.c
@@ -55,6 +55,7 @@ static const struct debug_named_value r600_debug_options[] = {
{ "sbnofallback", DBG_SB_NO_FALLBACK, "Abort on errors instead of fallback" },
{ "sbdisasm", DBG_SB_DISASM, "Use sb disassembler for shader dumps" },
{ "sbsafemath", DBG_SB_SAFEMATH, "Disable unsafe math optimizations" },
+ { "nirsb", DBG_NIR_SB, "Enable NIR with SB optimizer"},
DEBUG_NAMED_VALUE_END /* must be last */
};
@@ -81,7 +82,7 @@ static void r600_destroy_context(struct pipe_context *context)
if (rctx->append_fence)
pipe_resource_reference((struct pipe_resource**)&rctx->append_fence, NULL);
for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
- rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, NULL);
+ rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, false, NULL);
free(rctx->driver_consts[sh].constants);
}
@@ -113,14 +114,12 @@ static void r600_destroy_context(struct pipe_context *context)
for (sh = 0; sh < PIPE_SHADER_TYPES; ++sh)
for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; ++i)
- rctx->b.b.set_constant_buffer(context, sh, i, NULL);
+ rctx->b.b.set_constant_buffer(context, sh, i, false, NULL);
if (rctx->blitter) {
util_blitter_destroy(rctx->blitter);
}
- if (rctx->allocator_fetch_shader) {
- u_suballocator_destroy(rctx->allocator_fetch_shader);
- }
+ u_suballocator_destroy(&rctx->allocator_fetch_shader);
r600_release_command_buffer(&rctx->start_cs_cmd);
@@ -211,15 +210,12 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen,
goto fail;
}
- rctx->b.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX,
- r600_context_gfx_flush, rctx, false);
+ ws->cs_create(&rctx->b.gfx.cs, rctx->b.ctx, RING_GFX,
+ r600_context_gfx_flush, rctx, false);
rctx->b.gfx.flush = r600_context_gfx_flush;
- rctx->allocator_fetch_shader =
- u_suballocator_create(&rctx->b.b, 64 * 1024,
- 0, PIPE_USAGE_DEFAULT, 0, FALSE);
- if (!rctx->allocator_fetch_shader)
- goto fail;
+ u_suballocator_init(&rctx->allocator_fetch_shader, &rctx->b.b, 64 * 1024,
+ 0, PIPE_USAGE_DEFAULT, 0, FALSE);
rctx->isa = calloc(1, sizeof(struct r600_isa));
if (!rctx->isa || r600_isa_init(rctx, rctx->isa))
@@ -249,6 +245,12 @@ fail:
return NULL;
}
+static bool is_nir_enabled(struct r600_common_screen *screen) {
+ return ((screen->debug_flags & DBG_NIR_PREFERRED) &&
+ screen->family >= CHIP_CEDAR &&
+ screen->family < CHIP_CAYMAN);
+}
+
/*
* pipe_screen
*/
@@ -282,6 +284,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEX_SHADER_SATURATE:
case PIPE_CAP_SEAMLESS_CUBE_MAP:
case PIPE_CAP_PRIMITIVE_RESTART:
+ case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
case PIPE_CAP_CONDITIONAL_RENDER:
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
@@ -317,8 +320,12 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+ case PIPE_CAP_NIR_ATOMICS_AS_DEREF:
return 1;
+ case PIPE_CAP_SHAREABLE_SHADERS:
+ return 0;
+
case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET:
/* Optimal number for good TexSubImage performance on Polaris10. */
return 64 * 1024 * 1024;
@@ -333,8 +340,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
return rscreen->b.chip_class > R700;
case PIPE_CAP_TGSI_TEXCOORD:
- return 0;
+ return 1;
+ case PIPE_CAP_NIR_IMAGES_AS_DEREF:
case PIPE_CAP_FAKE_SW_MSAA:
return 0;
@@ -348,11 +356,11 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
return 256;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
- return 1;
+ return 4;
case PIPE_CAP_GLSL_FEATURE_LEVEL:
if (family >= CHIP_CEDAR)
- return 430;
+ return is_nir_enabled(&rscreen->b) ? 450 : 430;
/* pre-evergreen geom shaders need newer kernel */
if (rscreen->b.info.drm_minor >= 37)
return 330;
@@ -403,13 +411,21 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
return 0;
+ case PIPE_CAP_INT64:
case PIPE_CAP_DOUBLES:
if (rscreen->b.family == CHIP_ARUBA ||
rscreen->b.family == CHIP_CAYMAN ||
rscreen->b.family == CHIP_CYPRESS ||
rscreen->b.family == CHIP_HEMLOCK)
return 1;
+ if (is_nir_enabled(&rscreen->b))
+ return 1;
return 0;
+ case PIPE_CAP_INT64_DIVMOD:
+ /* it is actually not supported, but the nir lowering hdanles this corectly wheras
+ * the glsl lowering path seems to not initialize the buildins correctly.
+ */
+ return is_nir_enabled(&rscreen->b);
case PIPE_CAP_CULL_DISTANCE:
return 1;
@@ -542,7 +558,6 @@ static int r600_get_shader_param(struct pipe_screen* pscreen,
{
case PIPE_SHADER_FRAGMENT:
case PIPE_SHADER_VERTEX:
- case PIPE_SHADER_COMPUTE:
break;
case PIPE_SHADER_GEOMETRY:
if (rscreen->b.family >= CHIP_CEDAR)
@@ -553,8 +568,10 @@ static int r600_get_shader_param(struct pipe_screen* pscreen,
return 0;
case PIPE_SHADER_TESS_CTRL:
case PIPE_SHADER_TESS_EVAL:
+ case PIPE_SHADER_COMPUTE:
if (rscreen->b.family >= CHIP_CEDAR)
break;
+ FALLTHROUGH;
default:
return 0;
}
@@ -576,9 +593,11 @@ static int r600_get_shader_param(struct pipe_screen* pscreen,
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
if (shader == PIPE_SHADER_COMPUTE) {
uint64_t max_const_buffer_size;
- pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI,
- PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
- &max_const_buffer_size);
+ enum pipe_shader_ir ir_type = is_nir_enabled(&rscreen->b) ?
+ PIPE_SHADER_IR_NIR: PIPE_SHADER_IR_TGSI;
+ pscreen->get_compute_param(pscreen, ir_type,
+ PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
+ &max_const_buffer_size);
return MIN2(max_const_buffer_size, INT_MAX);
} else {
@@ -598,6 +617,10 @@ static int r600_get_shader_param(struct pipe_screen* pscreen,
case PIPE_SHADER_CAP_SUBROUTINES:
case PIPE_SHADER_CAP_INT64_ATOMICS:
case PIPE_SHADER_CAP_FP16:
+ case PIPE_SHADER_CAP_FP16_DERIVATIVES:
+ case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
+ case PIPE_SHADER_CAP_INT16:
+ case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
return 0;
case PIPE_SHADER_CAP_INTEGERS:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
@@ -605,14 +628,19 @@ static int r600_get_shader_param(struct pipe_screen* pscreen,
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
return 16;
- case PIPE_SHADER_CAP_PREFERRED_IR:
+ case PIPE_SHADER_CAP_PREFERRED_IR:
+ if (is_nir_enabled(&rscreen->b))
+ return PIPE_SHADER_IR_NIR;
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_SUPPORTED_IRS: {
int ir = 0;
if (shader == PIPE_SHADER_COMPUTE)
ir = 1 << PIPE_SHADER_IR_NATIVE;
- if (rscreen->b.family >= CHIP_CEDAR)
+ if (rscreen->b.family >= CHIP_CEDAR) {
ir |= 1 << PIPE_SHADER_IR_TGSI;
+ if (is_nir_enabled(&rscreen->b))
+ ir |= 1 << PIPE_SHADER_IR_NIR;
+ }
return ir;
}
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
@@ -791,7 +819,7 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws,
templ.usage = PIPE_USAGE_DEFAULT;
struct r600_resource *res = r600_resource(rscreen->screen.resource_create(&rscreen->screen, &templ));
- unsigned char *map = ws->buffer_map(res->buf, NULL, PIPE_TRANSFER_WRITE);
+ unsigned char *map = ws->buffer_map(res->buf, NULL, PIPE_MAP_WRITE);
memset(map, 0, 256);
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_pipe.h b/lib/mesa/src/gallium/drivers/r600/r600_pipe.h
index 11c16957a..3cb171a0d 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_pipe.h
+++ b/lib/mesa/src/gallium/drivers/r600/r600_pipe.h
@@ -268,6 +268,9 @@ struct r600_gs_rings_state {
#define DBG_SB_NO_FALLBACK (1 << 26)
#define DBG_SB_DISASM (1 << 27)
#define DBG_SB_SAFEMATH (1 << 28)
+#define DBG_NIR_SB (1 << 28)
+
+#define DBG_NIR_PREFERRED (DBG_NIR_SB | DBG_NIR)
struct r600_screen {
struct r600_common_screen b;
@@ -343,12 +346,14 @@ struct r600_pipe_shader_selector {
struct r600_pipe_shader *current;
struct tgsi_token *tokens;
+ struct nir_shader *nir;
struct pipe_stream_output_info so;
struct tgsi_shader_info info;
unsigned num_shaders;
enum pipe_shader_type type;
+ enum pipe_shader_ir ir_type;
/* geometry shader properties */
enum pipe_prim_type gs_output_prim;
@@ -488,7 +493,7 @@ struct r600_context {
struct r600_common_context b;
struct r600_screen *screen;
struct blitter_context *blitter;
- struct u_suballocator *allocator_fetch_shader;
+ struct u_suballocator allocator_fetch_shader;
/* Hardware info. */
boolean has_vertex_cache;
@@ -1055,7 +1060,8 @@ void eg_dump_debug_state(struct pipe_context *ctx, FILE *f,
unsigned flags);
struct r600_pipe_shader_selector *r600_create_shader_state_tokens(struct pipe_context *ctx,
- const struct tgsi_token *tokens,
+ const void *tokens,
+ enum pipe_shader_ir,
unsigned pipe_shader_type);
int r600_shader_select(struct pipe_context *ctx,
struct r600_pipe_shader_selector* sel,
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_shader.c b/lib/mesa/src/gallium/drivers/r600/r600_shader.c
index 85e584baf..c23adf2ea 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_shader.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_shader.c
@@ -24,7 +24,9 @@
#include "r600_formats.h"
#include "r600_opcodes.h"
#include "r600_shader.h"
+#include "r600_dump.h"
#include "r600d.h"
+#include "sfn/sfn_nir.h"
#include "sb/sb_public.h"
@@ -33,6 +35,10 @@
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_scan.h"
#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_from_mesa.h"
+#include "nir/tgsi_to_nir.h"
+#include "nir/nir_to_tgsi_info.h"
+#include "compiler/nir/nir.h"
#include "util/u_bitcast.h"
#include "util/u_memory.h"
#include "util/u_math.h"
@@ -143,7 +149,7 @@ static int store_shader(struct pipe_context *ctx,
}
ptr = r600_buffer_map_sync_with_rings(
&rctx->b, shader->bo,
- PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
+ PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
if (R600_BIG_ENDIAN) {
for (i = 0; i < shader->shader.bc.ndw; ++i) {
ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]);
@@ -151,12 +157,14 @@ static int store_shader(struct pipe_context *ctx,
} else {
memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr));
}
- rctx->b.ws->buffer_unmap(shader->bo->buf);
+ rctx->b.ws->buffer_unmap(rctx->b.ws, shader->bo->buf);
}
return 0;
}
+extern const struct nir_shader_compiler_options r600_nir_options;
+static int nshader = 0;
int r600_pipe_shader_create(struct pipe_context *ctx,
struct r600_pipe_shader *shader,
union r600_shader_key key)
@@ -164,27 +172,76 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
struct r600_context *rctx = (struct r600_context *)ctx;
struct r600_pipe_shader_selector *sel = shader->selector;
int r;
- bool dump = r600_can_dump_shader(&rctx->screen->b,
- tgsi_get_processor_type(sel->tokens));
- unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB);
+ struct r600_screen *rscreen = (struct r600_screen *)ctx->screen;
+
+ int processor = sel->ir_type == PIPE_SHADER_IR_TGSI ?
+ tgsi_get_processor_type(sel->tokens):
+ pipe_shader_type_from_mesa(sel->nir->info.stage);
+
+ bool dump = r600_can_dump_shader(&rctx->screen->b, processor);
+ unsigned use_sb = !(rctx->screen->b.debug_flags & (DBG_NO_SB | DBG_NIR)) ||
+ (rctx->screen->b.debug_flags & DBG_NIR_SB);
unsigned sb_disasm;
unsigned export_shader;
-
+
shader->shader.bc.isa = rctx->isa;
+
+ if (!(rscreen->b.debug_flags & DBG_NIR_PREFERRED)) {
+ assert(sel->ir_type == PIPE_SHADER_IR_TGSI);
+ r = r600_shader_from_tgsi(rctx, shader, key);
+ if (r) {
+ R600_ERR("translation from TGSI failed !\n");
+ goto error;
+ }
+ } else {
+ if (sel->ir_type == PIPE_SHADER_IR_TGSI) {
+ sel->nir = tgsi_to_nir(sel->tokens, ctx->screen, true);
+ const nir_shader_compiler_options *nir_options =
+ (const nir_shader_compiler_options *)
+ ctx->screen->get_compiler_options(ctx->screen,
+ PIPE_SHADER_IR_NIR,
+ shader->shader.processor_type);
+ /* Lower int64 ops because we have some r600 build-in shaders that use it */
+ if (nir_options->lower_int64_options) {
+ NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
+ NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, NULL, NULL);
+ NIR_PASS_V(sel->nir, nir_lower_int64);
+ NIR_PASS_V(sel->nir, nir_opt_vectorize, NULL, NULL);
+ }
+ NIR_PASS_V(sel->nir, nir_lower_flrp, ~0, false);
+ }
+ nir_tgsi_scan_shader(sel->nir, &sel->info, true);
+ r = r600_shader_from_nir(rctx, shader, &key);
+ if (r) {
+ fprintf(stderr, "--Failed shader--------------------------------------------------\n");
+
+ if (sel->ir_type == PIPE_SHADER_IR_TGSI) {
+ fprintf(stderr, "--TGSI--------------------------------------------------------\n");
+ tgsi_dump(sel->tokens, 0);
+ }
+
+ if (rscreen->b.debug_flags & (DBG_NIR_PREFERRED)) {
+ fprintf(stderr, "--NIR --------------------------------------------------------\n");
+ nir_print_shader(sel->nir, stderr);
+ }
+
+ R600_ERR("translation from NIR failed !\n");
+ goto error;
+ }
+ }
+
if (dump) {
- fprintf(stderr, "--------------------------------------------------------------\n");
- tgsi_dump(sel->tokens, 0);
-
+ if (sel->ir_type == PIPE_SHADER_IR_TGSI) {
+ fprintf(stderr, "--TGSI--------------------------------------------------------\n");
+ tgsi_dump(sel->tokens, 0);
+ }
+
if (sel->so.num_outputs) {
r600_dump_streamout(&sel->so);
}
}
- r = r600_shader_from_tgsi(rctx, shader, key);
- if (r) {
- R600_ERR("translation from TGSI failed !\n");
- goto error;
- }
+
if (shader->shader.processor_type == PIPE_SHADER_VERTEX) {
/* only disable for vertex shaders in tess paths */
if (key.vs.as_ls)
@@ -216,7 +273,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
r600_bytecode_disasm(&shader->shader.bc);
fprintf(stderr, "______________________________________________________________\n");
} else if ((dump && sb_disasm) || use_sb) {
- r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader,
+ r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader,
dump, use_sb);
if (r) {
R600_ERR("r600_sb_bytecode_process failed !\n");
@@ -224,6 +281,30 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
}
}
+ if (dump) {
+ FILE *f;
+ char fname[1024];
+ snprintf(fname, 1024, "shader_from_%s_%d.cpp",
+ (sel->ir_type == PIPE_SHADER_IR_TGSI ?
+ (rscreen->b.debug_flags & DBG_NIR_PREFERRED ? "tgsi-nir" : "tgsi")
+ : "nir"), nshader);
+ f = fopen(fname, "w");
+ print_shader_info(f, nshader++, &shader->shader);
+ print_shader_info(stderr, nshader++, &shader->shader);
+ print_pipe_info(stderr, &sel->info);
+ if (sel->ir_type == PIPE_SHADER_IR_TGSI) {
+ fprintf(f, "/****TGSI**********************************\n");
+ tgsi_dump_to_file(sel->tokens, 0, f);
+ }
+
+ if (rscreen->b.debug_flags & DBG_NIR_PREFERRED){
+ fprintf(f, "/****NIR **********************************\n");
+ nir_print_shader(sel->nir, f);
+ }
+ fprintf(f, "******************************************/\n");
+ fclose(f);
+ }
+
if (shader->gs_copy_shader) {
if (dump) {
// dump copy shader
@@ -301,7 +382,8 @@ error:
void r600_pipe_shader_destroy(struct pipe_context *ctx UNUSED, struct r600_pipe_shader *shader)
{
r600_resource_reference(&shader->bo, NULL);
- r600_bytecode_clear(&shader->shader.bc);
+ if (list_is_linked(&shader->shader.bc.cf))
+ r600_bytecode_clear(&shader->shader.bc);
r600_release_command_buffer(&shader->command_buffer);
}
@@ -433,24 +515,26 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
#endif
for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
if (i->Src[j].Register.Dimension) {
- switch (i->Src[j].Register.File) {
- case TGSI_FILE_CONSTANT:
- case TGSI_FILE_HW_ATOMIC:
- break;
- case TGSI_FILE_INPUT:
- if (ctx->type == PIPE_SHADER_GEOMETRY ||
- ctx->type == PIPE_SHADER_TESS_CTRL ||
- ctx->type == PIPE_SHADER_TESS_EVAL)
- break;
- case TGSI_FILE_OUTPUT:
- if (ctx->type == PIPE_SHADER_TESS_CTRL)
- break;
- default:
- R600_ERR("unsupported src %d (file %d, dimension %d)\n", j,
- i->Src[j].Register.File,
- i->Src[j].Register.Dimension);
- return -EINVAL;
- }
+ switch (i->Src[j].Register.File) {
+ case TGSI_FILE_CONSTANT:
+ case TGSI_FILE_HW_ATOMIC:
+ break;
+ case TGSI_FILE_INPUT:
+ if (ctx->type == PIPE_SHADER_GEOMETRY ||
+ ctx->type == PIPE_SHADER_TESS_CTRL ||
+ ctx->type == PIPE_SHADER_TESS_EVAL)
+ break;
+ FALLTHROUGH;
+ case TGSI_FILE_OUTPUT:
+ if (ctx->type == PIPE_SHADER_TESS_CTRL)
+ break;
+ FALLTHROUGH;
+ default:
+ R600_ERR("unsupported src %d (file %d, dimension %d)\n", j,
+ i->Src[j].Register.File,
+ i->Src[j].Register.Dimension);
+ return -EINVAL;
+ }
}
}
for (j = 0; j < i->Instruction.NumDstRegs; j++) {
@@ -620,6 +704,8 @@ static int r600_spi_sid(struct r600_shader_io * io)
else {
if (name == TGSI_SEMANTIC_GENERIC) {
/* For generic params simply use sid from tgsi */
+ index = 9 + io->sid;
+ } else if (name == TGSI_SEMANTIC_TEXCOORD) {
index = io->sid;
} else {
/* For non-generic params - pack name and sid into 8 bits */
@@ -646,9 +732,11 @@ int r600_get_lds_unique_index(unsigned semantic_name, unsigned index)
case TGSI_SEMANTIC_CLIPDIST:
assert(index <= 1);
return 2 + index;
+ case TGSI_SEMANTIC_TEXCOORD:
+ return 4 + index;
case TGSI_SEMANTIC_GENERIC:
if (index <= 63-4)
- return 4 + index - 9;
+ return 4 + index;
else
/* same explanation as in the default statement,
* the only user hitting this is st/nine.
@@ -1614,7 +1702,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
- r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg, r600_src->abs);
+ r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel);
if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
return;
}
@@ -2469,9 +2557,9 @@ static void convert_edgeflag_to_int(struct r600_shader_ctx *ctx)
r600_bytecode_add_alu(ctx->bc, &alu);
}
-static int generate_gs_copy_shader(struct r600_context *rctx,
- struct r600_pipe_shader *gs,
- struct pipe_stream_output_info *so)
+int generate_gs_copy_shader(struct r600_context *rctx,
+ struct r600_pipe_shader *gs,
+ struct pipe_stream_output_info *so)
{
struct r600_shader_ctx ctx = {};
struct r600_shader *gs_shader = &gs->shader;
@@ -2969,7 +3057,8 @@ static int emit_lds_vs_writes(struct r600_shader_ctx *ctx)
for (i = 0; i < ctx->shader->noutput; i++) {
struct r600_bytecode_alu alu;
- int param = r600_get_lds_unique_index(ctx->shader->output[i].name, ctx->shader->output[i].sid);
+ int param = r600_get_lds_unique_index(ctx->shader->output[i].name,
+ ctx->shader->output[i].sid);
if (param) {
r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
@@ -4625,6 +4714,14 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
op = ALU_OP2_MUL;
+ /* nir_to_tgsi lowers nir_op_isub to UADD + negate, since r600 doesn't support
+ * source modifiers with integer ops we switch back to SUB_INT */
+ bool src1_neg = ctx->src[1].neg;
+ if (op == ALU_OP2_ADD_INT && src1_neg) {
+ src1_neg = false;
+ op = ALU_OP2_SUB_INT;
+ }
+
for (i = 0; i <= lasti; i++) {
if (!(write_mask & (1 << i)))
continue;
@@ -4642,6 +4739,7 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
}
+ alu.src[1].neg = src1_neg;
} else {
r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
@@ -8090,7 +8188,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
- /* fall through */
+ FALLTHROUGH;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_SHADOW2D:
@@ -8111,7 +8209,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
- /* fall through */
+ FALLTHROUGH;
case TGSI_TEXTURE_1D:
case TGSI_TEXTURE_SHADOW1D:
@@ -8135,7 +8233,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
switch (inst->Texture.Texture) {
case TGSI_TEXTURE_3D:
offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
- /* fallthrough */
+ FALLTHROUGH;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_SHADOW2D:
case TGSI_TEXTURE_RECT:
@@ -8143,7 +8241,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
case TGSI_TEXTURE_2D_ARRAY:
case TGSI_TEXTURE_SHADOW2D_ARRAY:
offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
- /* fallthrough */
+ FALLTHROUGH;
case TGSI_TEXTURE_1D:
case TGSI_TEXTURE_SHADOW1D:
case TGSI_TEXTURE_1D_ARRAY:
@@ -10346,7 +10444,7 @@ static inline int callstack_update_max_depth(struct r600_shader_ctx *ctx,
* elements */
elements += 2;
- /* fallthrough */
+ FALLTHROUGH;
/* FIXME: do the two elements added above cover the cases for the
* r8xx+ below? */
@@ -11050,6 +11148,76 @@ static int egcm_u64add(struct r600_shader_ctx *ctx)
return 0;
}
+
+static int egcm_i64neg(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int r;
+ int treg = ctx->temp_reg;
+ const int op = ALU_OP2_SUB_INT;
+ const int opc = ALU_OP2_SUBB_UINT;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = op; ;
+ alu.dst.sel = treg;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ alu.src[0].sel = V_SQ_ALU_SRC_0;
+ r600_bytecode_src(&alu.src[1], &ctx->src[0], 0);
+ alu.src[1].neg = 0;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = op;
+ alu.dst.sel = treg;
+ alu.dst.chan = 1;
+ alu.dst.write = 1;
+ alu.src[0].sel = V_SQ_ALU_SRC_0;
+ r600_bytecode_src(&alu.src[1], &ctx->src[0], 1);
+ alu.src[1].neg = 0;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = opc ;
+ alu.dst.sel = treg;
+ alu.dst.chan = 2;
+ alu.dst.write = 1;
+ alu.last = 1;
+ alu.src[0].sel = V_SQ_ALU_SRC_0;
+ r600_bytecode_src(&alu.src[1], &ctx->src[0], 0);
+ alu.src[1].neg = 0;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = op;
+ tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
+ alu.src[0].sel = treg;
+ alu.src[0].chan = 1;
+ alu.src[1].sel = treg;
+ alu.src[1].chan = 2;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+ alu.src[0].sel = treg;
+ alu.src[0].chan = 0;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ return 0;
+}
+
/* result.y = mul_high a, b
result.x = mul a,b
result.y += a.x * b.y + a.y * b.x;
@@ -12007,6 +12175,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
[TGSI_OPCODE_U64ADD] = { ALU_OP0_NOP, egcm_u64add },
[TGSI_OPCODE_U64MUL] = { ALU_OP0_NOP, egcm_u64mul },
[TGSI_OPCODE_U64DIV] = { ALU_OP0_NOP, egcm_u64div },
+ [TGSI_OPCODE_I64NEG] = { ALU_OP0_NOP, egcm_i64neg },
[TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported},
};
@@ -12233,5 +12402,6 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
[TGSI_OPCODE_U64ADD] = { ALU_OP0_NOP, egcm_u64add },
[TGSI_OPCODE_U64MUL] = { ALU_OP0_NOP, egcm_u64mul },
[TGSI_OPCODE_U64DIV] = { ALU_OP0_NOP, egcm_u64div },
+ [TGSI_OPCODE_I64NEG] = { ALU_OP0_NOP, egcm_i64neg },
[TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported},
};
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_shader.h b/lib/mesa/src/gallium/drivers/r600/r600_shader.h
index 7dffd592a..8acd9a3af 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_shader.h
+++ b/lib/mesa/src/gallium/drivers/r600/r600_shader.h
@@ -72,8 +72,8 @@ struct r600_shader {
unsigned nhwatomic;
unsigned nlds;
unsigned nsys_inputs;
- struct r600_shader_io input[64];
- struct r600_shader_io output[64];
+ struct r600_shader_io input[PIPE_MAX_SHADER_INPUTS];
+ struct r600_shader_io output[PIPE_MAX_SHADER_OUTPUTS];
struct r600_shader_atomic atomics[8];
unsigned nhwatomic_ranges;
boolean uses_kill;
@@ -136,6 +136,8 @@ union r600_shader_key {
unsigned image_size_const_offset:5;
unsigned color_two_side:1;
unsigned alpha_to_one:1;
+ unsigned apply_sample_id_mask:1;
+ unsigned dual_source_blend:1;
} ps;
struct {
unsigned prim_id_out:8;
@@ -191,6 +193,10 @@ int eg_get_interpolator_index(unsigned interpolate, unsigned location);
int r600_get_lds_unique_index(unsigned semantic_name, unsigned index);
+int generate_gs_copy_shader(struct r600_context *rctx,
+ struct r600_pipe_shader *gs,
+ struct pipe_stream_output_info *so);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_state.c b/lib/mesa/src/gallium/drivers/r600/r600_state.c
index b20a9d2a2..6eb2bd42b 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_state.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_state.c
@@ -246,7 +246,7 @@ bool r600_is_format_supported(struct pipe_screen *screen,
static void r600_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a;
float offset_units = state->offset_units;
float offset_scale = state->offset_scale;
@@ -415,11 +415,11 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
dsa->valuemask[1] = state->stencil[1].valuemask;
dsa->writemask[0] = state->stencil[0].writemask;
dsa->writemask[1] = state->stencil[1].writemask;
- dsa->zwritemask = state->depth.writemask;
+ dsa->zwritemask = state->depth_writemask;
- db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
- S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
- S_028800_ZFUNC(state->depth.func);
+ db_depth_control = S_028800_Z_ENABLE(state->depth_enabled) |
+ S_028800_Z_WRITE_ENABLE(state->depth_writemask) |
+ S_028800_ZFUNC(state->depth_func);
/* stencil */
if (state->stencil[0].enabled) {
@@ -441,10 +441,10 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
/* alpha */
alpha_test_control = 0;
alpha_ref = 0;
- if (state->alpha.enabled) {
- alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func);
+ if (state->alpha_enabled) {
+ alpha_test_control = S_028410_ALPHA_FUNC(state->alpha_func);
alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1);
- alpha_ref = fui(state->alpha.ref_value);
+ alpha_ref = fui(state->alpha_ref_value);
}
dsa->sx_alpha_test_control = alpha_test_control & 0xff;
dsa->alpha_ref = alpha_ref;
@@ -520,15 +520,13 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
}
spi_interp = S_0286D4_FLAT_SHADE_ENA(1);
- if (state->sprite_coord_enable) {
- spi_interp |= S_0286D4_PNT_SPRITE_ENA(1) |
- S_0286D4_PNT_SPRITE_OVRD_X(2) |
- S_0286D4_PNT_SPRITE_OVRD_Y(3) |
- S_0286D4_PNT_SPRITE_OVRD_Z(0) |
- S_0286D4_PNT_SPRITE_OVRD_W(1);
- if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
- spi_interp |= S_0286D4_PNT_SPRITE_TOP_1(1);
- }
+ spi_interp |= S_0286D4_PNT_SPRITE_ENA(1) |
+ S_0286D4_PNT_SPRITE_OVRD_X(2) |
+ S_0286D4_PNT_SPRITE_OVRD_Y(3) |
+ S_0286D4_PNT_SPRITE_OVRD_Z(0) |
+ S_0286D4_PNT_SPRITE_OVRD_W(1);
+ if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
+ spi_interp |= S_0286D4_PNT_SPRITE_TOP_1(1);
}
r600_store_context_reg_seq(&rs->buffer, R_028A00_PA_SU_POINT_SIZE, 3);
@@ -757,11 +755,11 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
view->tex_resource_words[1] = (S_038004_TEX_HEIGHT(height - 1) |
S_038004_TEX_DEPTH(depth - 1) |
S_038004_DATA_FORMAT(format));
- view->tex_resource_words[2] = tmp->surface.u.legacy.level[offset_level].offset >> 8;
+ view->tex_resource_words[2] = tmp->surface.u.legacy.level[offset_level].offset_256B;
if (offset_level >= tmp->resource.b.b.last_level) {
- view->tex_resource_words[3] = tmp->surface.u.legacy.level[offset_level].offset >> 8;
+ view->tex_resource_words[3] = tmp->surface.u.legacy.level[offset_level].offset_256B;
} else {
- view->tex_resource_words[3] = tmp->surface.u.legacy.level[offset_level + 1].offset >> 8;
+ view->tex_resource_words[3] = tmp->surface.u.legacy.level[offset_level + 1].offset_256B;
}
view->tex_resource_words[4] = (word4 |
S_038010_REQUEST_SIZE(1) |
@@ -792,7 +790,7 @@ r600_create_sampler_view(struct pipe_context *ctx,
static void r600_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct pipe_clip_state *state = &rctx->clip_state.state;
radeon_set_context_reg_seq(cs, R_028E20_PA_CL_UCP0_X, 6*4);
@@ -826,7 +824,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
assert(rtex);
}
- offset = rtex->surface.u.legacy.level[level].offset;
+ offset = (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256;
color_view = S_028080_SLICE_START(surf->base.u.tex.first_layer) |
S_028080_SLICE_MAX(surf->base.u.tex.last_layer);
@@ -910,7 +908,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
S_0280A0_NUMBER_TYPE(ntype) |
S_0280A0_ENDIAN(endian);
- /* EXPORT_NORM is an optimzation that can be enabled for better
+ /* EXPORT_NORM is an optimization that can be enabled for better
* performance in certain cases
*/
if (rctx->b.chip_class == R600) {
@@ -984,7 +982,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
/* CMASK. */
if (!rctx->dummy_cmask ||
rctx->dummy_cmask->b.b.width0 < cmask.size ||
- rctx->dummy_cmask->buf->alignment % cmask.alignment != 0) {
+ (1 << rctx->dummy_cmask->buf->alignment_log2) % cmask.alignment != 0) {
struct pipe_transfer *transfer;
void *ptr;
@@ -1000,7 +998,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
}
/* Set the contents to 0xCC. */
- ptr = pipe_buffer_map(&rctx->b.b, &rctx->dummy_cmask->b.b, PIPE_TRANSFER_WRITE, &transfer);
+ ptr = pipe_buffer_map(&rctx->b.b, &rctx->dummy_cmask->b.b, PIPE_MAP_WRITE, &transfer);
memset(ptr, 0xCC, cmask.size);
pipe_buffer_unmap(&rctx->b.b, transfer);
}
@@ -1009,7 +1007,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
/* FMASK. */
if (!rctx->dummy_fmask ||
rctx->dummy_fmask->b.b.width0 < fmask.size ||
- rctx->dummy_fmask->buf->alignment % fmask.alignment != 0) {
+ (1 << rctx->dummy_fmask->buf->alignment_log2) % fmask.alignment != 0) {
r600_resource_reference(&rctx->dummy_fmask, NULL);
rctx->dummy_fmask = (struct r600_resource*)
r600_aligned_buffer_create(&rscreen->b.b, 0,
@@ -1043,7 +1041,7 @@ static void r600_init_depth_surface(struct r600_context *rctx,
unsigned level, pitch, slice, format, offset, array_mode;
level = surf->base.u.tex.level;
- offset = rtex->surface.u.legacy.level[level].offset;
+ offset = (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256;
pitch = rtex->surface.u.legacy.level[level].nblk_x / 8 - 1;
slice = (rtex->surface.u.legacy.level[level].nblk_x * rtex->surface.u.legacy.level[level].nblk_y) / 64;
if (slice) {
@@ -1284,7 +1282,7 @@ static void r600_get_sample_position(struct pipe_context *ctx,
static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
unsigned max_dist = 0;
if (rctx->b.family == CHIP_R600) {
@@ -1351,7 +1349,7 @@ static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples)
static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct pipe_framebuffer_state *state = &rctx->framebuffer.state;
unsigned nr_cbufs = state->nr_cbufs;
struct r600_surface **cb = (struct r600_surface**)&state->cbufs[0];
@@ -1517,7 +1515,7 @@ static void r600_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
if (G_028808_SPECIAL_OP(a->cb_color_control) == V_028808_SPECIAL_RESOLVE_BOX) {
@@ -1547,7 +1545,7 @@ static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom
static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_db_state *a = (struct r600_db_state*)atom;
if (a->rsurf && a->rsurf->db_htile_surface) {
@@ -1568,7 +1566,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom
static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom;
unsigned db_render_control = 0;
unsigned db_render_override =
@@ -1653,7 +1651,7 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_config_state *a = (struct r600_config_state*)atom;
radeon_set_config_reg(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, a->sq_gpr_resource_mgmt_1);
@@ -1662,7 +1660,7 @@ static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *
static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
uint32_t dirty_mask = rctx->vertex_buffer_state.dirty_mask;
while (dirty_mask) {
@@ -1702,7 +1700,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
unsigned reg_alu_constbuf_size,
unsigned reg_alu_const_cache)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
uint32_t dirty_mask = state->dirty_mask;
while (dirty_mask) {
@@ -1776,7 +1774,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
struct r600_samplerview_state *state,
unsigned resource_id_base)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
uint32_t dirty_mask = state->dirty_mask;
while (dirty_mask) {
@@ -1823,7 +1821,7 @@ static void r600_emit_sampler_states(struct r600_context *rctx,
unsigned resource_id_base,
unsigned border_color_reg)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
uint32_t dirty_mask = texinfo->states.dirty_mask;
while (dirty_mask) {
@@ -1883,7 +1881,7 @@ static void r600_emit_ps_sampler_states(struct r600_context *rctx, struct r600_a
static void r600_emit_seamless_cube_map(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
unsigned tmp;
tmp = S_009508_DISABLE_CUBE_ANISO(1) |
@@ -1901,13 +1899,13 @@ static void r600_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a
struct r600_sample_mask *s = (struct r600_sample_mask*)a;
uint8_t mask = s->sample_mask;
- radeon_set_context_reg(rctx->b.gfx.cs, R_028C48_PA_SC_AA_MASK,
+ radeon_set_context_reg(&rctx->b.gfx.cs, R_028C48_PA_SC_AA_MASK,
mask | (mask << 8) | (mask << 16) | (mask << 24));
}
static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_cso_state *state = (struct r600_cso_state*)a;
struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
@@ -1923,7 +1921,7 @@ static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600
static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a;
uint32_t v2 = 0, primid = 0;
@@ -1958,7 +1956,7 @@ static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom
static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a;
struct r600_resource *rbuffer;
@@ -2474,8 +2472,9 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
rctx->rasterizer && rctx->rasterizer->flatshade))
tmp |= S_028644_FLAT_SHADE(1);
- if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
- sprite_coord_enable & (1 << rshader->input[i].sid)) {
+ if (rshader->input[i].name == TGSI_SEMANTIC_PCOORD ||
+ (rshader->input[i].name == TGSI_SEMANTIC_TEXCOORD &&
+ sprite_coord_enable & (1 << rshader->input[i].sid))) {
tmp |= S_028644_PT_SPRITE_TEX(1);
}
@@ -2777,8 +2776,8 @@ void *r600_create_db_flush_dsa(struct r600_context *rctx)
memset(&dsa, 0, sizeof(dsa));
if (quirk) {
- dsa.depth.enabled = 1;
- dsa.depth.func = PIPE_FUNC_LEQUAL;
+ dsa.depth_enabled = 1;
+ dsa.depth_func = PIPE_FUNC_LEQUAL;
dsa.stencil[0].enabled = 1;
dsa.stencil[0].func = PIPE_FUNC_ALWAYS;
dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_KEEP;
@@ -2855,7 +2854,7 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
unsigned pitch,
unsigned bpp)
{
- struct radeon_cmdbuf *cs = rctx->b.dma.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.dma.cs;
struct r600_texture *rsrc = (struct r600_texture*)src;
struct r600_texture *rdst = (struct r600_texture*)dst;
unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size;
@@ -2885,8 +2884,8 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
x = src_x;
y = src_y;
z = src_z;
- base = rsrc->surface.u.legacy.level[src_level].offset;
- addr = rdst->surface.u.legacy.level[dst_level].offset;
+ base = (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256;
+ addr = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256;
addr += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z;
addr += dst_y * pitch + dst_x * bpp;
} else {
@@ -2904,8 +2903,8 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
x = dst_x;
y = dst_y;
z = dst_z;
- base = rdst->surface.u.legacy.level[dst_level].offset;
- addr = rsrc->surface.u.legacy.level[src_level].offset;
+ base = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256;
+ addr = (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256;
addr += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_z;
addr += src_y * pitch + src_x * bpp;
}
@@ -2959,7 +2958,7 @@ static void r600_dma_copy(struct pipe_context *ctx,
unsigned src_x, src_y;
unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
- if (rctx->b.dma.cs == NULL) {
+ if (rctx->b.dma.cs.priv == NULL) {
goto fallback;
}
@@ -3008,10 +3007,10 @@ static void r600_dma_copy(struct pipe_context *ctx,
* dst_x/y == 0
* dst_pitch == src_pitch
*/
- src_offset= rsrc->surface.u.legacy.level[src_level].offset;
+ src_offset= (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256;
src_offset += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_box->z;
src_offset += src_y * src_pitch + src_x * bpp;
- dst_offset = rdst->surface.u.legacy.level[dst_level].offset;
+ dst_offset = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256;
dst_offset += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z;
dst_offset += dst_y * dst_pitch + dst_x * bpp;
size = src_box->height * src_pitch;
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_state_common.c b/lib/mesa/src/gallium/drivers/r600/r600_state_common.c
index 4718286bd..2ded6c822 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_state_common.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_state_common.c
@@ -29,6 +29,7 @@
#include "r600d.h"
#include "util/format/u_format_s3tc.h"
+#include "util/u_draw.h"
#include "util/u_index_modify.h"
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
@@ -37,6 +38,10 @@
#include "tgsi/tgsi_scan.h"
#include "tgsi/tgsi_ureg.h"
+#include "nir.h"
+#include "nir/nir_to_tgsi_info.h"
+#include "tgsi/tgsi_from_mesa.h"
+
void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw)
{
assert(!cb->buf);
@@ -72,12 +77,12 @@ void r600_init_atom(struct r600_context *rctx,
void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom)
{
- r600_emit_command_buffer(rctx->b.gfx.cs, ((struct r600_cso_state*)atom)->cb);
+ r600_emit_command_buffer(&rctx->b.gfx.cs, ((struct r600_cso_state*)atom)->cb);
}
void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_alphatest_state *a = (struct r600_alphatest_state*)atom;
unsigned alpha_ref = a->sx_alpha_ref;
@@ -245,7 +250,7 @@ static void r600_set_blend_color(struct pipe_context *ctx,
void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct pipe_blend_color *state = &rctx->blend_color.state;
radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
@@ -257,7 +262,7 @@ void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_vgt_state *a = (struct r600_vgt_state *)atom;
radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, a->vgt_multi_prim_ib_reset_en);
@@ -281,17 +286,17 @@ static void r600_set_clip_state(struct pipe_context *ctx,
}
static void r600_set_stencil_ref(struct pipe_context *ctx,
- const struct r600_stencil_ref *state)
+ const struct r600_stencil_ref state)
{
struct r600_context *rctx = (struct r600_context *)ctx;
- rctx->stencil_ref.state = *state;
+ rctx->stencil_ref.state = state;
r600_mark_atom_dirty(rctx, &rctx->stencil_ref.atom);
}
void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_stencil_ref_state *a = (struct r600_stencil_ref_state*)atom;
radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
@@ -306,25 +311,25 @@ void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom)
}
static void r600_set_pipe_stencil_ref(struct pipe_context *ctx,
- const struct pipe_stencil_ref *state)
+ const struct pipe_stencil_ref state)
{
struct r600_context *rctx = (struct r600_context *)ctx;
struct r600_dsa_state *dsa = (struct r600_dsa_state*)rctx->dsa_state.cso;
struct r600_stencil_ref ref;
- rctx->stencil_ref.pipe_state = *state;
+ rctx->stencil_ref.pipe_state = state;
if (!dsa)
return;
- ref.ref_value[0] = state->ref_value[0];
- ref.ref_value[1] = state->ref_value[1];
+ ref.ref_value[0] = state.ref_value[0];
+ ref.ref_value[1] = state.ref_value[1];
ref.valuemask[0] = dsa->valuemask[0];
ref.valuemask[1] = dsa->valuemask[1];
ref.writemask[0] = dsa->writemask[0];
ref.writemask[1] = dsa->writemask[1];
- r600_set_stencil_ref(ctx, &ref);
+ r600_set_stencil_ref(ctx, ref);
}
static void r600_bind_dsa_state(struct pipe_context *ctx, void *state)
@@ -357,7 +362,7 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, void *state)
}
}
- r600_set_stencil_ref(ctx, &ref);
+ r600_set_stencil_ref(ctx, ref);
/* Update alphatest state. */
if (rctx->alphatest_state.sx_alpha_test_control != dsa->sx_alpha_test_control ||
@@ -562,6 +567,8 @@ void r600_vertex_buffers_dirty(struct r600_context *rctx)
static void r600_set_vertex_buffers(struct pipe_context *ctx,
unsigned start_slot, unsigned count,
+ unsigned unbind_num_trailing_slots,
+ bool take_ownership,
const struct pipe_vertex_buffer *input)
{
struct r600_context *rctx = (struct r600_context *)ctx;
@@ -582,7 +589,13 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx,
if (input[i].buffer.resource) {
vb[i].stride = input[i].stride;
vb[i].buffer_offset = input[i].buffer_offset;
- pipe_resource_reference(&vb[i].buffer.resource, input[i].buffer.resource);
+ if (take_ownership) {
+ pipe_resource_reference(&vb[i].buffer.resource, NULL);
+ vb[i].buffer.resource = input[i].buffer.resource;
+ } else {
+ pipe_resource_reference(&vb[i].buffer.resource,
+ input[i].buffer.resource);
+ }
new_buffer_mask |= 1 << i;
r600_context_add_resource_size(ctx, input[i].buffer.resource);
} else {
@@ -598,6 +611,11 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx,
disable_mask = ((1ull << count) - 1);
}
+ for (i = 0; i < unbind_num_trailing_slots; i++) {
+ pipe_resource_reference(&vb[count + i].buffer.resource, NULL);
+ }
+ disable_mask |= ((1ull << unbind_num_trailing_slots) - 1) << count;
+
disable_mask <<= start_slot;
new_buffer_mask <<= start_slot;
@@ -622,6 +640,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx,
static void r600_set_sampler_views(struct pipe_context *pipe,
enum pipe_shader_type shader,
unsigned start, unsigned count,
+ unsigned unbind_num_trailing_slots,
struct pipe_sampler_view **views)
{
struct r600_context *rctx = (struct r600_context *) pipe;
@@ -815,9 +834,12 @@ static inline void r600_shader_selector_key(const struct pipe_context *ctx,
rctx->rasterizer && rctx->rasterizer->multisample_enable &&
!rctx->framebuffer.cb0_is_integer;
key->ps.nr_cbufs = rctx->framebuffer.state.nr_cbufs;
+ key->ps.apply_sample_id_mask = (rctx->ps_iter_samples > 1) || !rctx->rasterizer->multisample_enable;
/* Dual-source blending only makes sense with nr_cbufs == 1. */
- if (key->ps.nr_cbufs == 1 && rctx->dual_src_blend)
+ if (key->ps.nr_cbufs == 1 && rctx->dual_src_blend) {
key->ps.nr_cbufs = 2;
+ key->ps.dual_source_blend = 1;
+ }
break;
}
case PIPE_SHADER_TESS_EVAL:
@@ -906,14 +928,19 @@ int r600_shader_select(struct pipe_context *ctx,
}
struct r600_pipe_shader_selector *r600_create_shader_state_tokens(struct pipe_context *ctx,
- const struct tgsi_token *tokens,
+ const void *prog, enum pipe_shader_ir ir,
unsigned pipe_shader_type)
{
struct r600_pipe_shader_selector *sel = CALLOC_STRUCT(r600_pipe_shader_selector);
sel->type = pipe_shader_type;
- sel->tokens = tgsi_dup_tokens(tokens);
- tgsi_scan_shader(tokens, &sel->info);
+ if (ir == PIPE_SHADER_IR_TGSI) {
+ sel->tokens = tgsi_dup_tokens((const struct tgsi_token *)prog);
+ tgsi_scan_shader(sel->tokens, &sel->info);
+ } else if (ir == PIPE_SHADER_IR_NIR){
+ sel->nir = nir_shader_clone(NULL, (const nir_shader *)prog);
+ nir_tgsi_scan_shader(sel->nir, &sel->info, true);
+ }
return sel;
}
@@ -922,8 +949,16 @@ static void *r600_create_shader_state(struct pipe_context *ctx,
unsigned pipe_shader_type)
{
int i;
- struct r600_pipe_shader_selector *sel = r600_create_shader_state_tokens(ctx, state->tokens, pipe_shader_type);
-
+ struct r600_pipe_shader_selector *sel;
+
+ if (state->type == PIPE_SHADER_IR_TGSI)
+ sel = r600_create_shader_state_tokens(ctx, state->tokens, state->type, pipe_shader_type);
+ else if (state->type == PIPE_SHADER_IR_NIR) {
+ sel = r600_create_shader_state_tokens(ctx, state->ir.nir, state->type, pipe_shader_type);
+ } else
+ assert(0 && "Unknown shader type\n");
+
+ sel->ir_type = state->type;
sel->so = state->stream_output;
switch (pipe_shader_type) {
@@ -1082,7 +1117,14 @@ void r600_delete_shader_selector(struct pipe_context *ctx,
p = c;
}
- free(sel->tokens);
+ if (sel->ir_type == PIPE_SHADER_IR_TGSI) {
+ free(sel->tokens);
+ /* We might have converted the TGSI shader to a NIR shader */
+ if (sel->nir)
+ ralloc_free(sel->nir);
+ }
+ else if (sel->ir_type == PIPE_SHADER_IR_NIR)
+ ralloc_free(sel->nir);
free(sel);
}
@@ -1159,6 +1201,7 @@ void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf
static void r600_set_constant_buffer(struct pipe_context *ctx,
enum pipe_shader_type shader, uint index,
+ bool take_ownership,
const struct pipe_constant_buffer *input)
{
struct r600_context *rctx = (struct r600_context *)ctx;
@@ -1166,7 +1209,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx,
struct pipe_constant_buffer *cb;
const uint8_t *ptr;
- /* Note that the state tracker can unbind constant buffers by
+ /* Note that the gallium frontend can unbind constant buffers by
* passing NULL here.
*/
if (unlikely(!input || (!input->buffer && !input->user_buffer))) {
@@ -1209,7 +1252,12 @@ static void r600_set_constant_buffer(struct pipe_context *ctx,
} else {
/* Setup the hw buffer. */
cb->buffer_offset = input->buffer_offset;
- pipe_resource_reference(&cb->buffer, input->buffer);
+ if (take_ownership) {
+ pipe_resource_reference(&cb->buffer, NULL);
+ cb->buffer = input->buffer;
+ } else {
+ pipe_resource_reference(&cb->buffer, input->buffer);
+ }
r600_context_add_resource_size(ctx, input->buffer);
}
@@ -1315,7 +1363,7 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on
cb.user_buffer = ptr;
cb.buffer_offset = 0;
cb.buffer_size = size;
- rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, &cb);
+ rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, false, &cb);
pipe_resource_reference(&cb.buffer, NULL);
}
}
@@ -1504,21 +1552,21 @@ static void update_gs_block_state(struct r600_context *rctx, unsigned enable)
if (enable) {
r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_GEOMETRY,
- R600_GS_RING_CONST_BUFFER, &rctx->gs_rings.esgs_ring);
+ R600_GS_RING_CONST_BUFFER, false, &rctx->gs_rings.esgs_ring);
if (rctx->tes_shader) {
r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL,
- R600_GS_RING_CONST_BUFFER, &rctx->gs_rings.gsvs_ring);
+ R600_GS_RING_CONST_BUFFER, false, &rctx->gs_rings.gsvs_ring);
} else {
r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
- R600_GS_RING_CONST_BUFFER, &rctx->gs_rings.gsvs_ring);
+ R600_GS_RING_CONST_BUFFER, false, &rctx->gs_rings.gsvs_ring);
}
} else {
r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_GEOMETRY,
- R600_GS_RING_CONST_BUFFER, NULL);
+ R600_GS_RING_CONST_BUFFER, false, NULL);
r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
- R600_GS_RING_CONST_BUFFER, NULL);
+ R600_GS_RING_CONST_BUFFER, false, NULL);
r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL,
- R600_GS_RING_CONST_BUFFER, NULL);
+ R600_GS_RING_CONST_BUFFER, false, NULL);
}
}
}
@@ -1638,7 +1686,7 @@ void r600_setup_scratch_area_for_shader(struct r600_context *rctx,
if (scratch->dirty ||
unlikely(shader->scratch_space_needed != scratch->item_size ||
size > scratch->size)) {
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
scratch->dirty = false;
@@ -1846,7 +1894,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
* to LS slots and won't reflect what is dirty as VS stage even if the
* TES didn't overwrite it. The story for re-enabled TES is similar.
* In any case, we're not allowed to submit any TES state when
- * TES is disabled (the state tracker may not do this but this looks
+ * TES is disabled (the gallium frontend may not do this but this looks
* like an optimization to me, not something which can be relied on).
*/
@@ -1982,7 +2030,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_clip_misc_state *state = &rctx->clip_misc_state;
radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
@@ -2002,7 +2050,7 @@ void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom
/* rast_prim is the primitive type after GS. */
static inline void r600_emit_rasterizer_prim_state(struct r600_context *rctx)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
enum pipe_prim_type rast_prim = rctx->current_rast_prim;
/* Skip this if not rendering lines. */
@@ -2025,21 +2073,35 @@ static inline void r600_emit_rasterizer_prim_state(struct r600_context *rctx)
rctx->last_rast_prim = rast_prim;
}
-static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
+static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info,
+ const struct pipe_draw_indirect_info *indirect,
+ const struct pipe_draw_start_count *draws,
+ unsigned num_draws)
{
+ if (num_draws > 1) {
+ util_draw_multi(ctx, info, indirect, draws, num_draws);
+ return;
+ }
+
struct r600_context *rctx = (struct r600_context *)ctx;
- struct pipe_resource *indexbuf = info->has_user_indices ? NULL : info->index.resource;
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct pipe_resource *indexbuf = !info->index_size || info->has_user_indices ? NULL : info->index.resource;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
bool render_cond_bit = rctx->b.render_cond && !rctx->b.render_cond_force_off;
- bool has_user_indices = info->has_user_indices;
+ bool has_user_indices = info->index_size && info->has_user_indices;
uint64_t mask;
unsigned num_patches, dirty_tex_counter, index_offset = 0;
unsigned index_size = info->index_size;
int index_bias;
struct r600_shader_atomic combined_atomics[8];
- uint8_t atomic_used_mask;
+ uint8_t atomic_used_mask = 0;
+ struct pipe_stream_output_target *count_from_so = NULL;
+
+ if (indirect && indirect->count_from_stream_output) {
+ count_from_so = indirect->count_from_stream_output;
+ indirect = NULL;
+ }
- if (!info->indirect && !info->count && (index_size || !info->count_from_stream_output)) {
+ if (!indirect && !draws[0].count && (index_size || !count_from_so)) {
return;
}
@@ -2054,7 +2116,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
}
/* make sure that the gfx ring is only one active */
- if (radeon_emitted(rctx->b.dma.cs, 0)) {
+ if (radeon_emitted(&rctx->b.dma.cs, 0)) {
rctx->b.dma.flush(rctx, PIPE_FLUSH_ASYNC, NULL);
}
@@ -2101,7 +2163,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
}
if (index_size) {
- index_offset += info->start * index_size;
+ index_offset += draws[0].start * index_size;
/* Translate 8-bit indices to 16-bit. */
if (unlikely(index_size == 1)) {
@@ -2110,17 +2172,17 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
void *ptr;
unsigned start, count;
- if (likely(!info->indirect)) {
+ if (likely(!indirect)) {
start = 0;
- count = info->count;
+ count = draws[0].count;
}
else {
/* Have to get start/count from indirect buffer, slow path ahead... */
- struct r600_resource *indirect_resource = (struct r600_resource *)info->indirect->buffer;
+ struct r600_resource *indirect_resource = (struct r600_resource *)indirect->buffer;
unsigned *data = r600_buffer_map_sync_with_rings(&rctx->b, indirect_resource,
- PIPE_TRANSFER_READ);
+ PIPE_MAP_READ);
if (data) {
- data += info->indirect->offset / sizeof(unsigned);
+ data += indirect->offset / sizeof(unsigned);
start = data[2] * index_size;
count = data[0];
}
@@ -2149,25 +2211,28 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
* and the indices are emitted via PKT3_DRAW_INDEX_IMMD.
* Indirect draws never use immediate indices.
* Note: Instanced rendering in combination with immediate indices hangs. */
- if (has_user_indices && (R600_BIG_ENDIAN || info->indirect ||
+ if (has_user_indices && (R600_BIG_ENDIAN || indirect ||
info->instance_count > 1 ||
- info->count*index_size > 20)) {
+ draws[0].count*index_size > 20)) {
+ unsigned start_offset = draws[0].start * index_size;
indexbuf = NULL;
- u_upload_data(ctx->stream_uploader, 0,
- info->count * index_size, 256,
- info->index.user, &index_offset, &indexbuf);
+ u_upload_data(ctx->stream_uploader, start_offset,
+ draws[0].count * index_size, 256,
+ (char*)info->index.user + start_offset,
+ &index_offset, &indexbuf);
+ index_offset -= start_offset;
has_user_indices = false;
}
index_bias = info->index_bias;
} else {
- index_bias = info->start;
+ index_bias = indirect ? 0 : draws[0].start;
}
/* Set the index offset and primitive restart. */
if (rctx->vgt_state.vgt_multi_prim_ib_reset_en != info->primitive_restart ||
rctx->vgt_state.vgt_multi_prim_ib_reset_indx != info->restart_index ||
rctx->vgt_state.vgt_indx_offset != index_bias ||
- (rctx->vgt_state.last_draw_was_indirect && !info->indirect)) {
+ (rctx->vgt_state.last_draw_was_indirect && !indirect)) {
rctx->vgt_state.vgt_multi_prim_ib_reset_en = info->primitive_restart;
rctx->vgt_state.vgt_multi_prim_ib_reset_indx = info->restart_index;
rctx->vgt_state.vgt_indx_offset = index_bias;
@@ -2247,7 +2312,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
}
/* Update start instance. */
- if (!info->indirect && rctx->last_start_instance != info->start_instance) {
+ if (!indirect && rctx->last_start_instance != info->start_instance) {
radeon_set_ctl_const(cs, R_03CFF4_SQ_VTX_START_INST_LOC, info->start_instance);
rctx->last_start_instance = info->start_instance;
}
@@ -2262,11 +2327,11 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
}
/* Draw packets. */
- if (likely(!info->indirect)) {
+ if (likely(!indirect)) {
radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
radeon_emit(cs, info->instance_count);
} else {
- uint64_t va = r600_resource(info->indirect->buffer)->gpu_address;
+ uint64_t va = r600_resource(indirect->buffer)->gpu_address;
assert(rctx->b.chip_class >= EVERGREEN);
// Invalidate so non-indirect draw calls reset this state
@@ -2280,7 +2345,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
- (struct r600_resource*)info->indirect->buffer,
+ (struct r600_resource*)indirect->buffer,
RADEON_USAGE_READ,
RADEON_PRIO_DRAW_INDIRECT));
}
@@ -2292,20 +2357,20 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
(VGT_INDEX_16 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_16_BIT : 0)));
if (has_user_indices) {
- unsigned size_bytes = info->count*index_size;
+ unsigned size_bytes = draws[0].count*index_size;
unsigned size_dw = align(size_bytes, 4) / 4;
radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_IMMD, 1 + size_dw, render_cond_bit));
- radeon_emit(cs, info->count);
+ radeon_emit(cs, draws[0].count);
radeon_emit(cs, V_0287F0_DI_SRC_SEL_IMMEDIATE);
- radeon_emit_array(cs, info->index.user, size_dw);
+ radeon_emit_array(cs, info->index.user + draws[0].start * index_size, size_dw);
} else {
uint64_t va = r600_resource(indexbuf)->gpu_address + index_offset;
- if (likely(!info->indirect)) {
+ if (likely(!indirect)) {
radeon_emit(cs, PKT3(PKT3_DRAW_INDEX, 3, render_cond_bit));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
- radeon_emit(cs, info->count);
+ radeon_emit(cs, draws[0].count);
radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
@@ -2330,13 +2395,13 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
radeon_emit(cs, max_size);
radeon_emit(cs, PKT3(EG_PKT3_DRAW_INDEX_INDIRECT, 1, render_cond_bit));
- radeon_emit(cs, info->indirect->offset);
+ radeon_emit(cs, indirect->offset);
radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA);
}
}
} else {
- if (unlikely(info->count_from_stream_output)) {
- struct r600_so_target *t = (struct r600_so_target*)info->count_from_stream_output;
+ if (unlikely(count_from_so)) {
+ struct r600_so_target *t = (struct r600_so_target*)count_from_so;
uint64_t va = t->buf_filled_size->gpu_address + t->buf_filled_size_offset;
radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw);
@@ -2354,16 +2419,16 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
RADEON_PRIO_SO_FILLED_SIZE));
}
- if (likely(!info->indirect)) {
+ if (likely(!indirect)) {
radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit));
- radeon_emit(cs, info->count);
+ radeon_emit(cs, draws[0].count);
}
else {
radeon_emit(cs, PKT3(EG_PKT3_DRAW_INDIRECT, 1, render_cond_bit));
- radeon_emit(cs, info->indirect->offset);
+ radeon_emit(cs, indirect->offset);
}
radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX |
- (info->count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0));
+ (count_from_so ? S_0287F0_USE_OPAQUE(1) : 0));
}
/* SMX returns CONTEXT_DONE too early workaround */
@@ -2549,7 +2614,7 @@ bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a)
{
- struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+ struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
struct r600_pipe_shader *shader = ((struct r600_shader_state*)a)->shader;
if (!shader)
@@ -2757,6 +2822,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen,
case PIPE_FORMAT_RGTC1_SNORM:
case PIPE_FORMAT_LATC1_SNORM:
word4 |= sign_bit[0];
+ FALLTHROUGH;
case PIPE_FORMAT_RGTC1_UNORM:
case PIPE_FORMAT_LATC1_UNORM:
result = FMT_BC4;
@@ -2764,6 +2830,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen,
case PIPE_FORMAT_RGTC2_SNORM:
case PIPE_FORMAT_LATC2_SNORM:
word4 |= sign_bit[0] | sign_bit[1];
+ FALLTHROUGH;
case PIPE_FORMAT_RGTC2_UNORM:
case PIPE_FORMAT_LATC2_UNORM:
result = FMT_BC5;
@@ -2809,7 +2876,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen,
goto out_word4;
case PIPE_FORMAT_BPTC_RGB_FLOAT:
word4 |= sign_bit[0] | sign_bit[1] | sign_bit[2];
- /* fall through */
+ FALLTHROUGH;
case PIPE_FORMAT_BPTC_RGB_UFLOAT:
result = FMT_BC6;
goto out_word4;
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_uvd.c b/lib/mesa/src/gallium/drivers/r600/r600_uvd.c
index 2e7d7ee4d..18ac073da 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_uvd.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_uvd.c
@@ -66,6 +66,8 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe,
struct pipe_video_buffer template;
struct pipe_resource templ;
unsigned i, array_size;
+ enum pipe_video_chroma_format chroma_format =
+ pipe_format_to_chroma_format(tmpl->buffer_format);
assert(pipe);
@@ -77,7 +79,8 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe,
template.width = align(tmpl->width, VL_MACROBLOCK_WIDTH);
template.height = align(tmpl->height / array_size, VL_MACROBLOCK_HEIGHT);
- vl_video_buffer_template(&templ, &template, resource_formats[0], 1, array_size, PIPE_USAGE_DEFAULT, 0);
+ vl_video_buffer_template(&templ, &template, resource_formats[0], 1, array_size,
+ PIPE_USAGE_DEFAULT, 0, chroma_format);
if (ctx->b.chip_class < EVERGREEN || tmpl->interlaced || !R600_UVD_ENABLE_TILING)
templ.bind = PIPE_BIND_LINEAR;
resources[0] = (struct r600_texture *)
@@ -86,7 +89,8 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe,
goto error;
if (resource_formats[1] != PIPE_FORMAT_NONE) {
- vl_video_buffer_template(&templ, &template, resource_formats[1], 1, array_size, PIPE_USAGE_DEFAULT, 1);
+ vl_video_buffer_template(&templ, &template, resource_formats[1], 1, array_size,
+ PIPE_USAGE_DEFAULT, 1, chroma_format);
if (ctx->b.chip_class < EVERGREEN || tmpl->interlaced || !R600_UVD_ENABLE_TILING)
templ.bind = PIPE_BIND_LINEAR;
resources[1] = (struct r600_texture *)
@@ -96,7 +100,8 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe,
}
if (resource_formats[2] != PIPE_FORMAT_NONE) {
- vl_video_buffer_template(&templ, &template, resource_formats[2], 1, array_size, PIPE_USAGE_DEFAULT, 2);
+ vl_video_buffer_template(&templ, &template, resource_formats[2], 1, array_size,
+ PIPE_USAGE_DEFAULT, 2, chroma_format);
if (ctx->b.chip_class < EVERGREEN || tmpl->interlaced || !R600_UVD_ENABLE_TILING)
templ.bind = PIPE_BIND_LINEAR;
resources[2] = (struct r600_texture *)
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc.h b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc.h
index e7231702d..ef2f39855 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc.h
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc.h
@@ -495,6 +495,15 @@ struct bc_alu_src {
unsigned abs:1;
unsigned rel:1;
literal value;
+
+ void clear() {
+ sel = 0;
+ chan = 0;
+ neg = 0;
+ abs = 0;
+ rel = 0;
+ value = 0;
+ }
};
struct bc_alu {
@@ -529,6 +538,31 @@ struct bc_alu {
this->op = op;
op_ptr = r600_isa_alu(op);
}
+ void clear() {
+ op_ptr = nullptr;
+ op = 0;
+ for (int i = 0; i < 3; ++i)
+ src[i].clear();
+ dst_gpr = 0;
+ dst_chan = 0;
+ dst_rel = 0;
+ clamp = 0;
+ omod = 0;
+ bank_swizzle = 0;
+ index_mode = 0;
+ last = 0;
+ pred_sel = 0;
+ fog_merge = 0;
+ write_mask = 0;
+ update_exec_mask = 0;
+ update_pred = 0;
+ slot = 0;
+ lds_idx_offset = 0;
+ slot_flags = AF_NONE;
+ }
+ bc_alu() {
+ clear();
+ }
};
struct bc_fetch {
@@ -658,7 +692,12 @@ public:
static unsigned dskip_mode;
sb_context() : src_stats(), opt_stats(), isa(0),
- hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN) {}
+ hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN),
+ alu_temp_gprs(0), max_fetch(0), has_trans(false), vtx_src_num(0),
+ num_slots(0), uses_mova_gpr(false),
+ r6xx_gpr_index_workaround(false), stack_workaround_8xx(false),
+ stack_workaround_9xx(false), wavefront_size(0),
+ stack_entry_size(0) {}
int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass);
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_expr.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_expr.cpp
index 05674ff24..36361a251 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_expr.cpp
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_expr.cpp
@@ -326,7 +326,7 @@ void expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src,
const bc_alu_src &s = bc.src[src];
if (s.abs)
- v = fabs(v.f);
+ v = fabsf(v.f);
if (s.neg)
v = -v.f;
}
@@ -424,21 +424,21 @@ bool expr_handler::fold_alu_op1(alu_node& n) {
apply_alu_src_mod(n.bc, 0, cv);
switch (n.bc.op) {
- case ALU_OP1_CEIL: dv = ceil(cv.f); break;
+ case ALU_OP1_CEIL: dv = ceilf(cv.f); break;
case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break;
- case ALU_OP1_EXP_IEEE: dv = exp2(cv.f); break;
- case ALU_OP1_FLOOR: dv = floor(cv.f); break;
+ case ALU_OP1_EXP_IEEE: dv = exp2f(cv.f); break;
+ case ALU_OP1_FLOOR: dv = floorf(cv.f); break;
case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ????
- case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floor(cv.f); break;
- case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floor(cv.f + 0.5f); break;
- case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)trunc(cv.f); break;
+ case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floorf(cv.f); break;
+ case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floorf(cv.f + 0.5f); break;
+ case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)truncf(cv.f); break;
case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break;
- case ALU_OP1_FRACT: dv = cv.f - floor(cv.f); break;
+ case ALU_OP1_FRACT: dv = cv.f - floorf(cv.f); break;
case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break;
case ALU_OP1_LOG_CLAMPED:
case ALU_OP1_LOG_IEEE:
if (cv.f != 0.0f)
- dv = log2(cv.f);
+ dv = log2f(cv.f);
else
// don't fold to NAN, let the GPU handle it for now
// (prevents degenerate LIT tests from failing)
@@ -454,7 +454,7 @@ bool expr_handler::fold_alu_op1(alu_node& n) {
case ALU_OP1_PRED_SET_RESTORE: dv = cv; break;
case ALU_OP1_RECIPSQRT_CLAMPED:
case ALU_OP1_RECIPSQRT_FF:
- case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrt(cv.f); break;
+ case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrtf(cv.f); break;
case ALU_OP1_RECIP_CLAMPED:
case ALU_OP1_RECIP_FF:
case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break;
@@ -462,8 +462,8 @@ bool expr_handler::fold_alu_op1(alu_node& n) {
case ALU_OP1_RECIP_UINT: dv.u = (1ull << 32) / cv.u; break;
// case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break;
case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break;
- case ALU_OP1_SQRT_IEEE: dv = sqrt(cv.f); break;
- case ALU_OP1_TRUNC: dv = trunc(cv.f); break;
+ case ALU_OP1_SQRT_IEEE: dv = sqrtf(cv.f); break;
+ case ALU_OP1_TRUNC: dv = truncf(cv.f); break;
default:
return false;
@@ -719,7 +719,7 @@ bool expr_handler::fold_assoc(alu_node *n) {
n->src[0] = n->src[2];
n->bc.src[0] = n->bc.src[2];
n->src[1] = sh.get_const_value(cr);
- memset(&n->bc.src[1], 0, sizeof(bc_alu_src));
+ n->bc.src[1].clear();
n->src.resize(2);
n->bc.set_op(ALU_OP2_ADD);
@@ -729,7 +729,7 @@ bool expr_handler::fold_assoc(alu_node *n) {
n->bc.src[0] = a->bc.src[last_arg];
n->bc.src[0].neg ^= cur_neg;
n->src[1] = sh.get_const_value(cr);
- memset(&n->bc.src[1], 0, sizeof(bc_alu_src));
+ n->bc.src[1].clear();
}
return false;
@@ -770,7 +770,7 @@ bool expr_handler::fold_alu_op2(alu_node& n) {
case ALU_OP2_ADD: // (ADD x, x) => (MUL x, 2)
if (!sh.safe_math) {
n.src[1] = sh.get_const_value(2.0f);
- memset(&n.bc.src[1], 0, sizeof(bc_alu_src));
+ n.bc.src[1].clear();
n.bc.set_op(ALU_OP2_MUL);
return fold_alu_op2(n);
}
@@ -1070,7 +1070,7 @@ bool expr_handler::fold_alu_op3(alu_node& n) {
}
n.src[1] = t;
- memset(&n.bc.src[1], 0, sizeof(bc_alu_src));
+ n.bc.src[1].clear();
n.src.resize(2);
@@ -1101,7 +1101,7 @@ bool expr_handler::fold_alu_op3(alu_node& n) {
dv = cv0.f * cv1.f;
n.bc.set_op(ALU_OP2_ADD);
n.src[0] = sh.get_const_value(dv);
- memset(&n.bc.src[0], 0, sizeof(bc_alu_src));
+ n.bc.src[0].clear();
n.src[1] = n.src[2];
n.bc.src[1] = n.bc.src[2];
n.src.resize(2);
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_if_conversion.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_if_conversion.cpp
index 017153434..48355e8d6 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_if_conversion.cpp
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_if_conversion.cpp
@@ -99,8 +99,8 @@ void if_conversion::convert_kill_instructions(region_node *r,
a->src[0] = cnd;
a->src[1] = sh.get_const_value(0);
// clear modifiers
- memset(&a->bc.src[0], 0, sizeof(bc_alu_src));
- memset(&a->bc.src[1], 0, sizeof(bc_alu_src));
+ a->bc.src[0].clear();
+ a->bc.src[1].clear();
} else {
// kill with constant 'false' condition, this shouldn't happen
// but remove it anyway
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h b/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h
index ef0fbd4e6..eecf17d28 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h
@@ -713,7 +713,8 @@ enum node_flags {
NF_SCHEDULE_EARLY = (1 << 9),
// for ALU_PUSH_BEFORE - when set, replace with PUSH + ALU
- NF_ALU_STACK_WORKAROUND = (1 << 10)
+ NF_ALU_STACK_WORKAROUND = (1 << 10),
+ NF_ALU_2SLOT = (1 << 11),
};
inline node_flags operator |(node_flags l, node_flags r) {
@@ -929,7 +930,7 @@ public:
bool empty() { assert(first != NULL || first == last); return !first; }
unsigned count();
- // used with node containers that represent shceduling queues
+ // used with node containers that represent scheduling queues
// ignores copies and takes into account alu_packed_node items
unsigned real_alu_count();
@@ -1012,7 +1013,7 @@ public:
class alu_node : public node {
protected:
- alu_node() : node(NT_OP, NST_ALU_INST) { memset(&bc, 0, sizeof(bc_alu)); }
+ alu_node() : node(NT_OP, NST_ALU_INST) { }
public:
bc_alu bc;
@@ -1021,8 +1022,9 @@ public:
virtual bool fold_dispatch(expr_handler *ex);
unsigned forced_bank_swizzle() {
- return ((bc.op_ptr->flags & AF_INTERP) && (bc.slot_flags == AF_4V)) ?
- VEC_210 : 0;
+ return ((bc.op_ptr->flags & AF_INTERP) &&
+ ((bc.slot_flags == AF_4V) ||
+ (bc.slot_flags == AF_2V))) ? VEC_210 : 0;
}
// return param index + 1 if instruction references interpolation param,
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_init.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_init.cpp
index c557b8687..e14b187de 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_init.cpp
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_init.cpp
@@ -313,24 +313,26 @@ int ra_init::run() {
alloc_arrays();
- ra_node(sh.root);
- return 0;
+ return ra_node(sh.root) ? 0 : 1;
}
-void ra_init::ra_node(container_node* c) {
+bool ra_init::ra_node(container_node* c) {
for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
node *n = *I;
if (n->type == NT_OP) {
- process_op(n);
+ if (!process_op(n))
+ return false;
}
if (n->is_container() && !n->is_alu_packed()) {
- ra_node(static_cast<container_node*>(n));
+ if (!ra_node(static_cast<container_node*>(n)))
+ return false;
}
}
+ return true;
}
-void ra_init::process_op(node* n) {
+bool ra_init::process_op(node* n) {
bool copy = n->is_copy_mov();
@@ -355,7 +357,8 @@ void ra_init::process_op(node* n) {
for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) {
value *v = *I;
if (v && v->is_sgpr())
- color(v);
+ if (!color(v))
+ return false;
}
}
@@ -372,10 +375,12 @@ void ra_init::process_op(node* n) {
assign_color(v, s->gpr);
}
} else
- color(v);
+ if (!color(v))
+ return false;
}
}
}
+ return true;
}
void ra_init::color_bs_constraint(ra_constraint* c) {
@@ -476,15 +481,15 @@ void ra_init::color_bs_constraint(ra_constraint* c) {
}
}
-void ra_init::color(value* v) {
+bool ra_init::color(value* v) {
if (v->constraint && v->constraint->kind == CK_PACKED_BS) {
color_bs_constraint(v->constraint);
- return;
+ return true;
}
if (v->chunk && v->chunk->is_fixed())
- return;
+ return true;
RA_DUMP(
sblog << "coloring ";
@@ -497,24 +502,24 @@ void ra_init::color(value* v) {
if (v->is_reg_pinned()) {
assert(v->is_chan_pinned());
assign_color(v, v->pin_gpr);
- return;
+ return true;
}
regbits rb(sh, v->interferences);
sel_chan c;
if (v->is_chan_pinned()) {
- RA_DUMP( sblog << "chan_pinned = " << v->pin_gpr.chan() << " "; );
unsigned mask = 1 << v->pin_gpr.chan();
c = rb.find_free_chans(mask) + v->pin_gpr.chan();
} else {
unsigned cm = get_preferable_chan_mask();
- RA_DUMP( sblog << "pref chan mask: " << cm << "\n"; );
c = rb.find_free_chan_by_mask(cm);
- }
+ }
- assert(c && c.sel() < 128 - ctx.alu_temp_gprs && "color failed");
+ if (!c || c.sel() >= 128 - ctx.alu_temp_gprs)
+ return false;
assign_color(v, c);
+ return true;
}
void ra_init::assign_color(value* v, sel_chan c) {
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_sched.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_sched.cpp
index fe887c84c..2d5fbfdb2 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_sched.cpp
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_sched.cpp
@@ -1950,7 +1950,10 @@ void post_scheduler::release_src_vec(vvec& vv, bool src) {
}
void literal_tracker::reset() {
- memset(lt, 0, sizeof(lt));
+ lt[0].u = 0;
+ lt[1].u = 0;
+ lt[2].u = 0;
+ lt[3].u = 0;
memset(uc, 0, sizeof(uc));
}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/.editorconfig b/lib/mesa/src/gallium/drivers/r600/sfn/.editorconfig
new file mode 100644
index 000000000..9cb67618b
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/.editorconfig
@@ -0,0 +1,2 @@
+[*.{cpp,c,h}]
+indent_style = space
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp
new file mode 100644
index 000000000..8690fc269
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp
@@ -0,0 +1,325 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_alu_defines.h"
+
+namespace r600 {
+
+const std::map<EAluOp, AluOp> alu_ops = {
+ {op0_nop ,AluOp(0, 0, AluOp::a,"NOP")},
+ {op0_group_barrier ,AluOp(0, 0, AluOp::a,"GROUP_BARRIER")},
+ {op0_group_seq_begin ,AluOp(0, 0, AluOp::a,"GROUP_SEQ_BEGIN")},
+ {op0_group_seq_end ,AluOp(0, 0, AluOp::a,"GROUP_SEQ_END")},
+ {op0_pred_set_clr ,AluOp(0, 1, AluOp::a,"PRED_SET_CLR")},
+ {op0_store_flags ,AluOp(0, 0, AluOp::v,"STORE_FLAGS")},
+ {op0_lds_1a ,AluOp(0, 0, AluOp::v,"LDS_1A")},
+ {op0_lds_1a1d ,AluOp(0, 0, AluOp::v,"LDS_1A1D")},
+ {op0_lds_2a ,AluOp(0, 0, AluOp::v,"LDS_2A")},
+
+ {op1_bcnt_int ,AluOp(1, 0, AluOp::v,"BCNT_INT")},
+ {op1_bcnt_accum_prev_int ,AluOp(1, 0, AluOp::v,"BCNT_ACCUM_PREV_INT")},
+ {op1_bfrev_int ,AluOp(1, 0, AluOp::a,"BFREV_INT")},
+ {op1_ceil ,AluOp(1, 1, AluOp::a,"CEIL")},
+ {op1_cos ,AluOp(1, 1, AluOp::t,"COS")},
+ {op1_exp_ieee ,AluOp(1, 1, AluOp::t,"EXP_IEEE")},
+ {op1_floor ,AluOp(1, 1, AluOp::a,"FLOOR")},
+ {op1_flt_to_int ,AluOp(1, 0, AluOp::a,"FLT_TO_INT")},
+ {op1_flt_to_uint ,AluOp(1, 1, AluOp::t,"FLT_TO_UINT")},
+ {op1_flt_to_int_rpi ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_RPI")},
+ {op1_flt_to_int_floor ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_FLOOR")},
+ {op1_flt16_to_flt32 ,AluOp(1, 1, AluOp::v,"FLT16_TO_FLT32")},
+ {op1_flt32_to_flt16 ,AluOp(1, 1, AluOp::v,"FLT32_TO_FLT16")},
+ {op1_flt32_to_flt64 ,AluOp(1, 1, AluOp::v,"FLT32_TO_FLT64")},
+ {op1_flt64_to_flt32 ,AluOp(1, 1, AluOp::a,"FLT64_TO_FLT32")},
+ {op1_fract ,AluOp(1, 1, AluOp::a,"FRACT")},
+ {op1_fract_64 ,AluOp(1, 1, AluOp::v,"FRACT_64")},
+ {op1_frexp_64 ,AluOp(1, 1, AluOp::v,"FREXP_64")},
+ {op1_int_to_flt ,AluOp(1, 0, AluOp::t,"INT_TO_FLT")},
+ {op1_ldexp_64 ,AluOp(1, 1, AluOp::v,"LDEXP_64")},
+ {op1_interp_load_p0 ,AluOp(1, 1, AluOp::v,"INTERP_LOAD_P0")},
+ {op1_interp_load_p10 ,AluOp(1, 1, AluOp::v,"INTERP_LOAD_P10")},
+ {op1_interp_load_p20 ,AluOp(1, 1, AluOp::v,"INTERP_LOAD_P20")},
+ {op1_load_store_flags ,AluOp(1, 0, AluOp::v,"LOAD_STORE_FLAGS")},
+ {op1_log_clamped ,AluOp(1, 1, AluOp::t,"LOG_CLAMPED")},
+ {op1_log_ieee ,AluOp(1, 1, AluOp::t,"LOG_IEEE")},
+ {op1_max4 ,AluOp(1, 1, AluOp::v,"MAX4")},
+ {op1_mbcnt_32hi_int ,AluOp(1, 0, AluOp::v,"MBCNT_32HI_INT")},
+ {op1_mbcnt_32lo_accum_prev_int ,AluOp(1, 0, AluOp::v,"MBCNT_32LO_ACCUM_PREV_INT")},
+ {op1_mov ,AluOp(1, 0, AluOp::a,"MOV")},
+ {op1_mova_int ,AluOp(1, 0, AluOp::v,"MOVA_INT")},
+ {op1_not_int ,AluOp(1, 0, AluOp::a,"NOT_INT")},
+ {op1_offset_to_flt ,AluOp(1, 0, AluOp::v,"OFFSET_TO_FLT")},
+ {op1_pred_set_inv ,AluOp(1, 1, AluOp::a,"PRED_SET_INV")},
+ {op1_pred_set_restore ,AluOp(1, 1, AluOp::a,"PRED_SET_RESTORE")},
+ {op1_set_cf_idx0 ,AluOp(1, 0, AluOp::a,"SET_CF_IDX0")}, /* Reads from AR register? */
+ {op1_set_cf_idx1 ,AluOp(1, 0, AluOp::a,"SET_CF_IDX1")}, /* Reads from AR register? */
+ {op1_recip_clamped ,AluOp(1, 1, AluOp::t,"RECIP_CLAMPED")},
+ {op1_recip_ff ,AluOp(1, 1, AluOp::t,"RECIP_FF")},
+ {op1_recip_ieee ,AluOp(1, 1, AluOp::t,"RECIP_IEEE")},
+ {op1_recipsqrt_clamped ,AluOp(1, 1, AluOp::t,"RECIPSQRT_CLAMPED")},
+ {op1_recipsqrt_ff ,AluOp(1, 1, AluOp::t,"RECIPSQRT_FF")},
+ {op1_recipsqrt_ieee1 ,AluOp(1, 1, AluOp::t,"RECIPSQRT_IEEE")},
+ {op1_recip_int ,AluOp(1, 0, AluOp::t,"RECIP_INT")},
+ {op1_recip_uint ,AluOp(1, 0, AluOp::t,"RECIP_UINT")},
+ {op1_recip_64 ,AluOp(1, 1, AluOp::t,"RECIP_64")},
+ {op1_recip_clamped_64 ,AluOp(1, 1, AluOp::t,"RECIP_CLAMPED_64")},
+ {op1_recipsqrt_64 ,AluOp(1, 1, AluOp::t,"RECIPSQRT_64")},
+ {op1_recipsqrt_clamped_64,AluOp(1, 1, AluOp::t,"RECIPSQRT_CLAMPED_64")},
+ {op1_rndne ,AluOp(1, 1, AluOp::a,"RNDNE")},
+ {op1_sqrt_ieee ,AluOp(1, 1, AluOp::t,"SQRT_IEEE")},
+ {op1_sin ,AluOp(1, 1, AluOp::t,"SIN")},
+ {op1_trunc ,AluOp(1, 1, AluOp::a,"TRUNC")},
+ {op1_sqrt_64 ,AluOp(1, 1, AluOp::t,"SQRT_64")},
+ {op1_ubyte0_flt ,AluOp(1, 1, AluOp::v,"UBYTE0_FLT")},
+ {op1_ubyte1_flt ,AluOp(1, 1, AluOp::v,"UBYTE1_FLT")},
+ {op1_ubyte2_flt ,AluOp(1, 1, AluOp::v,"UBYTE2_FLT")},
+ {op1_ubyte3_flt ,AluOp(1, 1, AluOp::v,"UBYTE3_FLT")},
+ {op1_uint_to_flt ,AluOp(1, 0, AluOp::t,"UINT_TO_FLT")},
+ {op1_ffbh_uint ,AluOp(1, 0, AluOp::v,"FFBH_UINT")},
+ {op1_ffbl_int ,AluOp(1, 0, AluOp::v,"FFBL_INT")},
+ {op1_ffbh_int ,AluOp(1, 0, AluOp::v,"FFBH_INT")},
+ {op1_flt_to_uint4 ,AluOp(1, 1, AluOp::v,"FLT_TO_UINT4")},
+ {op1v_flt32_to_flt64 ,AluOp(1, 1, AluOp::a,"FLT32_TO_FLT64")},
+ {op1v_flt64_to_flt32 ,AluOp(1, 1, AluOp::v,"FLT64_TO_FLT32")},
+
+ {op2_add ,AluOp(2, 1, AluOp::a,"ADD")},
+ {op2_bfm_int ,AluOp(2, 0, AluOp::v,"BFM_INT")},
+ {op2_mul ,AluOp(2, 1, AluOp::a,"MUL")},
+ {op2_mul_ieee ,AluOp(2, 1, AluOp::a,"MUL_IEEE")},
+ {op2_max ,AluOp(2, 1, AluOp::a,"MAX")},
+ {op2_min ,AluOp(2, 1, AluOp::a,"MIN")},
+ {op2_max_dx10 ,AluOp(2, 1, AluOp::a,"MAX_DX10")},
+ {op2_min_dx10 ,AluOp(2, 1, AluOp::a,"MIN_DX10")},
+ {op2_sete ,AluOp(2, 1, AluOp::a,"SETE")},
+ {op2_setgt ,AluOp(2, 1, AluOp::a,"SETGT")},
+ {op2_setge ,AluOp(2, 1, AluOp::a,"SETGE")},
+ {op2_setne ,AluOp(2, 1, AluOp::a,"SETNE")},
+ {op2_sete_dx10 ,AluOp(2, 1, AluOp::a,"SETE_DX10")},
+ {op2_setgt_dx10 ,AluOp(2, 1, AluOp::a,"SETGT_DX10")},
+ {op2_setge_dx10 ,AluOp(2, 1, AluOp::a,"SETGE_DX10")},
+ {op2_setne_dx10 ,AluOp(2, 1, AluOp::a,"SETNE_DX10")},
+ {op2_ashr_int ,AluOp(2, 0, AluOp::a,"ASHR_INT")},
+ {op2_lshr_int ,AluOp(2, 0, AluOp::a,"LSHR_INT")},
+ {op2_lshl_int ,AluOp(2, 0, AluOp::a,"LSHL_INT")},
+ {op2_mul_64 ,AluOp(2, 1, AluOp::a,"MUL_64")},
+ {op2_pred_setgt_uint ,AluOp(2, 0, AluOp::a,"PRED_SETGT_UINT")},
+ {op2_pred_setge_uint ,AluOp(2, 0, AluOp::a,"PRED_SETGE_UINT")},
+ {op2_pred_sete ,AluOp(2, 1, AluOp::a,"PRED_SETE")},
+ {op2_pred_setgt ,AluOp(2, 1, AluOp::a,"PRED_SETGT")},
+ {op2_pred_setge ,AluOp(2, 1, AluOp::a,"PRED_SETGE")},
+ {op2_pred_setne ,AluOp(2, 1, AluOp::a,"PRED_SETNE")},
+ {op2_pred_set_pop ,AluOp(2, 1, AluOp::a,"PRED_SET_POP")},
+ {op2_pred_sete_push ,AluOp(2, 1, AluOp::a,"PRED_SETE_PUSH")},
+ {op2_pred_setgt_push ,AluOp(2, 1, AluOp::a,"PRED_SETGT_PUSH")},
+ {op2_pred_setge_push ,AluOp(2, 1, AluOp::a,"PRED_SETGE_PUSH")},
+ {op2_pred_setne_push ,AluOp(2, 1, AluOp::a,"PRED_SETNE_PUSH")},
+ {op2_kille ,AluOp(2, 1, AluOp::a,"KILLE")},
+ {op2_killgt ,AluOp(2, 1, AluOp::a,"KILLGT")},
+ {op2_killge ,AluOp(2, 1, AluOp::a,"KILLGE")},
+ {op2_killne ,AluOp(2, 1, AluOp::a,"KILLNE")},
+ {op2_and_int ,AluOp(2, 0, AluOp::a,"AND_INT")},
+ {op2_or_int ,AluOp(2, 0, AluOp::a,"OR_INT")},
+ {op2_xor_int ,AluOp(2, 0, AluOp::a,"XOR_INT")},
+ {op2_add_int ,AluOp(2, 0, AluOp::a,"ADD_INT")},
+ {op2_sub_int ,AluOp(2, 0, AluOp::a,"SUB_INT")},
+ {op2_max_int ,AluOp(2, 0, AluOp::a,"MAX_INT")},
+ {op2_min_int ,AluOp(2, 0, AluOp::a,"MIN_INT")},
+ {op2_max_uint ,AluOp(2, 0, AluOp::a,"MAX_UINT")},
+ {op2_min_uint ,AluOp(2, 0, AluOp::a,"MIN_UINT")},
+ {op2_sete_int ,AluOp(2, 0, AluOp::a,"SETE_INT")},
+ {op2_setgt_int ,AluOp(2, 0, AluOp::a,"SETGT_INT")},
+ {op2_setge_int ,AluOp(2, 0, AluOp::a,"SETGE_INT")},
+ {op2_setne_int ,AluOp(2, 0, AluOp::a,"SETNE_INT")},
+ {op2_setgt_uint ,AluOp(2, 0, AluOp::a,"SETGT_UINT")},
+ {op2_setge_uint ,AluOp(2, 0, AluOp::a,"SETGE_UINT")},
+ {op2_killgt_uint ,AluOp(2, 0, AluOp::a,"KILLGT_UINT")},
+ {op2_killge_uint ,AluOp(2, 0, AluOp::a,"KILLGE_UINT")},
+ {op2_prede_int ,AluOp(2, 0, AluOp::a,"PREDE_INT")},
+ {op2_pred_setgt_int ,AluOp(2, 0, AluOp::a,"PRED_SETGT_INT")},
+ {op2_pred_setge_int ,AluOp(2, 0, AluOp::a,"PRED_SETGE_INT")},
+ {op2_pred_setne_int ,AluOp(2, 0, AluOp::a,"PRED_SETNE_INT")},
+ {op2_kille_int ,AluOp(2, 0, AluOp::a,"KILLE_INT")},
+ {op2_killgt_int ,AluOp(2, 0, AluOp::a,"KILLGT_INT")},
+ {op2_killge_int ,AluOp(2, 0, AluOp::a,"KILLGE_INT")},
+ {op2_killne_int ,AluOp(2, 0, AluOp::a,"KILLNE_INT")},
+ {op2_pred_sete_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETE_PUSH_INT")},
+ {op2_pred_setgt_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETGT_PUSH_INT")},
+ {op2_pred_setge_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETGE_PUSH_INT")},
+ {op2_pred_setne_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETNE_PUSH_INT")},
+ {op2_pred_setlt_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETLT_PUSH_INT")},
+ {op2_pred_setle_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETLE_PUSH_INT")},
+ {op2_addc_uint ,AluOp(2, 0, AluOp::a,"ADDC_UINT")},
+ {op2_subb_uint ,AluOp(2, 0, AluOp::a,"SUBB_UINT")},
+ {op2_set_mode ,AluOp(2, 0, AluOp::a,"SET_MODE")},
+ {op2_set_lds_size ,AluOp(2, 0, AluOp::a,"SET_LDS_SIZE")},
+ {op2_mullo_int ,AluOp(2, 0, AluOp::t,"MULLO_INT")},
+ {op2_mulhi_int ,AluOp(2, 0, AluOp::t,"MULHI_INT")},
+ {op2_mullo_uint ,AluOp(2, 0, AluOp::t,"MULLO_UINT")},
+ {op2_mulhi_uint ,AluOp(2, 0, AluOp::t,"MULHI_UINT")},
+ {op2_dot_ieee ,AluOp(2, 1, AluOp::v,"DOT_IEEE")},
+ {op2_mulhi_uint24 ,AluOp(2, 0, AluOp::v,"MULHI_UINT24")},
+ {op2_mul_uint24 ,AluOp(2, 0, AluOp::v,"MUL_UINT24")},
+ {op2_sete_64 ,AluOp(2, 1, AluOp::v,"SETE_64")},
+ {op2_setne_64 ,AluOp(2, 1, AluOp::v,"SETNE_64")},
+ {op2_setgt_64 ,AluOp(2, 1, AluOp::v,"SETGT_64")},
+ {op2_setge_64 ,AluOp(2, 1, AluOp::v,"SETGE_64")},
+ {op2_min_64 ,AluOp(2, 1, AluOp::v,"MIN_64")},
+ {op2_max_64 ,AluOp(2, 1, AluOp::v,"MAX_64")},
+ {op2_dot4 ,AluOp(2, 1, AluOp::v,"DOT4")},
+ {op2_dot4_ieee ,AluOp(2, 1, AluOp::v,"DOT4_IEEE")},
+ {op2_cube ,AluOp(2, 1, AluOp::v,"CUBE")},
+ {op2_pred_setgt_64 ,AluOp(2, 1, AluOp::v,"PRED_SETGT_64")},
+ {op2_pred_sete_64 ,AluOp(2, 1, AluOp::v,"PRED_SETE_64")},
+ {op2_pred_setge_64 ,AluOp(2, 1, AluOp::v,"PRED_SETGE_64")},
+ {OP2V_MUL_64 ,AluOp(2, 1, AluOp::v,"MUL_64")},
+ {op2_add_64 ,AluOp(2, 1, AluOp::v,"ADD_64")},
+ {op2_sad_accum_prev_uint ,AluOp(2, 0, AluOp::v,"SAD_ACCUM_PREV_UINT")},
+ {op2_dot ,AluOp(2, 1, AluOp::v,"DOT")},
+ {op2_mul_prev ,AluOp(2, 1, AluOp::v,"MUL_PREV")},
+ {op2_mul_ieee_prev ,AluOp(2, 1, AluOp::v,"MUL_IEEE_PREV")},
+ {op2_add_prev ,AluOp(2, 1, AluOp::v,"ADD_PREV")},
+ {op2_muladd_prev ,AluOp(2, 1, AluOp::v,"MULADD_PREV")},
+ {op2_muladd_ieee_prev ,AluOp(2, 1, AluOp::v,"MULADD_IEEE_PREV")},
+ {op2_interp_xy ,AluOp(2, 1, AluOp::v,"INTERP_XY")},
+ {op2_interp_zw ,AluOp(2, 1, AluOp::v,"INTERP_ZW")},
+ {op2_interp_x ,AluOp(2, 1, AluOp::v,"INTERP_X")},
+ {op2_interp_z ,AluOp(2, 1, AluOp::v,"INTERP_Z")},
+
+ {op3_bfe_uint ,AluOp(3, 0, AluOp::v,"BFE_UINT")},
+ {op3_bfe_int ,AluOp(3, 0, AluOp::v,"BFE_INT")},
+ {op3_bfi_int ,AluOp(3, 0, AluOp::v,"BFI_INT")},
+ {op3_fma ,AluOp(3, 1, AluOp::v,"FMA")},
+ {op3_cndne_64 ,AluOp(3, 1, AluOp::v,"CNDNE_64")},
+ {op3_fma_64 ,AluOp(3, 1, AluOp::v,"FMA_64")},
+ {op3_lerp_uint ,AluOp(3, 0, AluOp::v,"LERP_UINT")},
+ {op3_bit_align_int ,AluOp(3, 0, AluOp::v,"BIT_ALIGN_INT")},
+ {op3_byte_align_int ,AluOp(3, 0, AluOp::v,"BYTE_ALIGN_INT")},
+ {op3_sad_accum_uint ,AluOp(3, 0, AluOp::v,"SAD_ACCUM_UINT")},
+ {op3_sad_accum_hi_uint ,AluOp(3, 0, AluOp::v,"SAD_ACCUM_HI_UINT")},
+ {op3_muladd_uint24 ,AluOp(3, 0, AluOp::v,"MULADD_UINT24")},
+ {op3_lds_idx_op ,AluOp(3, 0, AluOp::x,"LDS_IDX_OP")},
+ {op3_muladd ,AluOp(3, 1, AluOp::a,"MULADD")},
+ {op3_muladd_m2 ,AluOp(3, 1, AluOp::a,"MULADD_M2")},
+ {op3_muladd_m4 ,AluOp(3, 1, AluOp::a,"MULADD_M4")},
+ {op3_muladd_d2 ,AluOp(3, 1, AluOp::a,"MULADD_D2")},
+ {op3_muladd_ieee ,AluOp(3, 1, AluOp::a,"MULADD_IEEE")},
+ {op3_cnde ,AluOp(3, 1, AluOp::a,"CNDE")},
+ {op3_cndgt ,AluOp(3, 1, AluOp::a,"CNDGT")},
+ {op3_cndge ,AluOp(3, 1, AluOp::a,"CNDGE")},
+ {op3_cnde_int ,AluOp(3, 0, AluOp::a,"CNDE_INT")},
+ {op3_cndgt_int ,AluOp(3, 0, AluOp::a,"CNDGT_INT")},
+ {op3_cndge_int ,AluOp(3, 0, AluOp::a,"CNDGE_INT")},
+ {op3_mul_lit ,AluOp(3, 1, AluOp::t,"MUL_LIT")}
+};
+
+const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const = {
+ {ALU_SRC_LDS_OQ_A, {false, "LDS_OQ_A"}},
+ {ALU_SRC_LDS_OQ_B, {false, "LDS_OQ_B"}},
+ {ALU_SRC_LDS_OQ_A_POP, {false, "LDS_OQ_A_POP"}},
+ {ALU_SRC_LDS_OQ_B_POP, {false, "LDS_OQ_B_POP"}},
+ {ALU_SRC_LDS_DIRECT_A, {false, "LDS_DIRECT_A"}},
+ {ALU_SRC_LDS_DIRECT_B, {false, "LDS_DIRECT_B"}},
+ {ALU_SRC_TIME_HI, {false, "TIME_HI"}},
+ {ALU_SRC_TIME_LO, {false, "TIME_LO"}},
+ {ALU_SRC_MASK_HI, {false, "MASK_HI"}},
+ {ALU_SRC_MASK_LO, {false, "MASK_LO"}},
+ {ALU_SRC_HW_WAVE_ID, {false, "HW_WAVE_ID"}},
+ {ALU_SRC_SIMD_ID, {false, "SIMD_ID"}},
+ {ALU_SRC_SE_ID, {false, "SE_ID"}},
+ {ALU_SRC_HW_THREADGRP_ID, {false, "HW_THREADGRP_ID"}},
+ {ALU_SRC_WAVE_ID_IN_GRP, {false, "WAVE_ID_IN_GRP"}},
+ {ALU_SRC_NUM_THREADGRP_WAVES, {false, "NUM_THREADGRP_WAVES"}},
+ {ALU_SRC_HW_ALU_ODD, {false, "HW_ALU_ODD"}},
+ {ALU_SRC_LOOP_IDX, {false, "LOOP_IDX"}},
+ {ALU_SRC_PARAM_BASE_ADDR, {false, "PARAM_BASE_ADDR"}},
+ {ALU_SRC_NEW_PRIM_MASK, {false, "NEW_PRIM_MASK"}},
+ {ALU_SRC_PRIM_MASK_HI, {false, "PRIM_MASK_HI"}},
+ {ALU_SRC_PRIM_MASK_LO, {false, "PRIM_MASK_LO"}},
+ {ALU_SRC_1_DBL_L, {false, "1.0L"}},
+ {ALU_SRC_1_DBL_M, {false, "1.0H"}},
+ {ALU_SRC_0_5_DBL_L, {false, "0.5L"}},
+ {ALU_SRC_0_5_DBL_M, {false, "0.5H"}},
+ {ALU_SRC_0, {false, "0"}},
+ {ALU_SRC_1, {false, "1.0"}},
+ {ALU_SRC_1_INT, {false, "1"}},
+ {ALU_SRC_M_1_INT, {false, "-1"}},
+ {ALU_SRC_0_5, {false, "0.5"}},
+ {ALU_SRC_LITERAL, {true, "ALU_SRC_LITERAL"}},
+ {ALU_SRC_PV, {true, "PV"}},
+ {ALU_SRC_PS, {false, "PS"}}
+};
+
+const std::map<ESDOp, LDSOp> lds_ops = {
+ {DS_OP_ADD , {2, "DS_ADD"}},
+ {DS_OP_SUB , {2, "DS_SUB"}},
+ {DS_OP_RSUB , {2, "DS_RSUB"}},
+ {DS_OP_INC , {2, "DS_INC"}},
+ {DS_OP_DEC , {2, "DS_DEC"}},
+ {DS_OP_MIN_INT , {2, "DS_MIN_INT"}},
+ {DS_OP_MAX_INT , {2, "DS_MAX_INT"}},
+ {DS_OP_MIN_UINT , {2, "DS_MIN_UINT"}},
+ {DS_OP_MAX_UINT , {2, "DS_MAX_UINT"}},
+ {DS_OP_AND , {2, "DS_AND"}},
+ {DS_OP_OR , {2, "DS_OR"}},
+ {DS_OP_XOR , {2, "DS_XOR"}},
+ {DS_OP_MSKOR , {3, "DS_MSKOR"}},
+ {DS_OP_WRITE , {2, "DS_WRITE"}},
+ {DS_OP_WRITE_REL , {3, "DS_WRITE_REL"}},
+ {DS_OP_WRITE2 , {3, "DS_WRITE2"}},
+ {DS_OP_CMP_STORE , {3, "DS_CMP_STORE"}},
+ {DS_OP_CMP_STORE_SPF , {3, "DS_CMP_STORE_SPF"}},
+ {DS_OP_BYTE_WRITE , {2, "DS_BYTE_WRITE"}},
+ {DS_OP_SHORT_WRITE , {2, "DS_SHORT_WRITE"}},
+ {DS_OP_ADD_RET , {2, "DS_ADD_RET"}},
+ {DS_OP_SUB_RET , {2, "DS_SUB_RET"}},
+ {DS_OP_RSUB_RET , {2, "DS_RSUB_RET"}},
+ {DS_OP_INC_RET , {2, "DS_INC_RET"}},
+ {DS_OP_DEC_RET , {2, "DS_DEC_RET"}},
+ {DS_OP_MIN_INT_RET , {2, "DS_MIN_INT_RET"}},
+ {DS_OP_MAX_INT_RET , {2, "DS_MAX_INT_RET"}},
+ {DS_OP_MIN_UINT_RET , {2, "DS_MIN_UINT_RET"}},
+ {DS_OP_MAX_UINT_RET , {2, "DS_MAX_UINT_RET"}},
+ {DS_OP_AND_RET , {2, "DS_AND_RET"}},
+ {DS_OP_OR_RET , {2, "DS_OR_RET"}},
+ {DS_OP_XOR_RET , {2, "DS_XOR_RET"}},
+ {DS_OP_MSKOR_RET , {3, "DS_MSKOR_RET"}},
+ {DS_OP_XCHG_RET , {2, "DS_XCHG_RET"}},
+ {DS_OP_XCHG_REL_RET , {3, "DS_XCHG_REL_RET"}},
+ {DS_OP_XCHG2_RET , {3, "DS_XCHG2_RET"}},
+ {DS_OP_CMP_XCHG_RET , {3, "DS_CMP_XCHG_RET"}},
+ {DS_OP_CMP_XCHG_SPF_RET, {3, "DS_CMP_XCHG_SPF_RET"}},
+ {DS_OP_READ_RET , {1, "DS_READ_RET"}},
+ {DS_OP_READ_REL_RET , {1, "DS_READ_REL_RET"}},
+ {DS_OP_READ2_RET , {2, "DS_READ2_RET"}},
+ {DS_OP_READWRITE_RET , {3, "DS_READWRITE_RET"}},
+ {DS_OP_BYTE_READ_RET , {1, "DS_BYTE_READ_RET"}},
+ {DS_OP_UBYTE_READ_RET, {1, "DS_UBYTE_READ_RET"}},
+ {DS_OP_SHORT_READ_RET, {1, "DS_SHORT_READ_RET"}},
+ {DS_OP_USHORT_READ_RET, {1, "DS_USHORT_READ_RET"}},
+ {DS_OP_ATOMIC_ORDERED_ALLOC_RET , {3, "DS_ATOMIC_ORDERED_ALLOC_RET"}}
+};
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.h
new file mode 100644
index 000000000..4481c49db
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.h
@@ -0,0 +1,377 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef r600_sfn_alu_defines_h
+#define r600_sfn_alu_defines_h
+
+#include <map>
+#include <bitset>
+
+namespace r600 {
+
+/* ALU op2 instructions 17:7 top three bits always zero. */
+enum EAluOp {
+ op2_add = 0,
+ op2_mul = 1,
+ op2_mul_ieee = 2,
+ op2_max = 3,
+ op2_min = 4,
+ op2_max_dx10 = 5,
+ op2_min_dx10 = 6,
+ op2_sete = 8,
+ op2_setgt = 9,
+ op2_setge = 10,
+ op2_setne = 11,
+ op2_sete_dx10 = 12,
+ op2_setgt_dx10 = 13,
+ op2_setge_dx10 = 14,
+ op2_setne_dx10 = 15,
+ op1_fract = 16,
+ op1_trunc = 17,
+ op1_ceil = 18,
+ op1_rndne = 19,
+ op1_floor = 20,
+ op2_ashr_int = 21,
+ op2_lshr_int = 22,
+ op2_lshl_int = 23,
+ op1_mov = 25,
+ op0_nop = 26,
+ op2_mul_64 = 27,
+ op1_flt64_to_flt32 = 28,
+ op1_flt32_to_flt64 = 29,
+ op2_pred_setgt_uint = 30,
+ op2_pred_setge_uint = 31,
+ op2_pred_sete = 32,
+ op2_pred_setgt = 33,
+ op2_pred_setge = 34,
+ op2_pred_setne = 35,
+ op1_pred_set_inv = 36,
+ op2_pred_set_pop = 37,
+ op0_pred_set_clr = 38,
+ op1_pred_set_restore = 39,
+ op2_pred_sete_push = 40,
+ op2_pred_setgt_push = 41,
+ op2_pred_setge_push = 42,
+ op2_pred_setne_push = 43,
+ op2_kille = 44,
+ op2_killgt = 45,
+ op2_killge = 46,
+ op2_killne = 47,
+ op2_and_int = 48,
+ op2_or_int = 49,
+ op2_xor_int = 50,
+ op1_not_int = 51,
+ op2_add_int = 52,
+ op2_sub_int = 53,
+ op2_max_int = 54,
+ op2_min_int = 55,
+ op2_max_uint = 56,
+ op2_min_uint = 57,
+ op2_sete_int = 58,
+ op2_setgt_int = 59,
+ op2_setge_int = 60,
+ op2_setne_int = 61,
+ op2_setgt_uint = 62,
+ op2_setge_uint = 63,
+ op2_killgt_uint = 64,
+ op2_killge_uint = 65,
+ op2_prede_int = 66,
+ op2_pred_setgt_int = 67,
+ op2_pred_setge_int = 68,
+ op2_pred_setne_int = 69,
+ op2_kille_int = 70,
+ op2_killgt_int = 71,
+ op2_killge_int = 72,
+ op2_killne_int = 73,
+ op2_pred_sete_push_int = 74,
+ op2_pred_setgt_push_int = 75,
+ op2_pred_setge_push_int = 76,
+ op2_pred_setne_push_int = 77,
+ op2_pred_setlt_push_int = 78,
+ op2_pred_setle_push_int = 79,
+ op1_flt_to_int = 80,
+ op1_bfrev_int = 81,
+ op2_addc_uint = 82,
+ op2_subb_uint = 83,
+ op0_group_barrier = 84,
+ op0_group_seq_begin = 85,
+ op0_group_seq_end = 86,
+ op2_set_mode = 87,
+ op1_set_cf_idx0 = 88,
+ op1_set_cf_idx1 = 89,
+ op2_set_lds_size = 90,
+ op1_exp_ieee = 129,
+ op1_log_clamped = 130,
+ op1_log_ieee = 131,
+ op1_recip_clamped = 132,
+ op1_recip_ff = 133,
+ op1_recip_ieee = 134,
+ op1_recipsqrt_clamped = 135,
+ op1_recipsqrt_ff = 136,
+ op1_recipsqrt_ieee1 = 137,
+ op1_sqrt_ieee = 138,
+ op1_sin = 141,
+ op1_cos = 142,
+ op2_mullo_int = 143,
+ op2_mulhi_int = 144,
+ op2_mullo_uint = 145,
+ op2_mulhi_uint = 146,
+ op1_recip_int = 147,
+ op1_recip_uint = 148,
+ op1_recip_64 = 149,
+ op1_recip_clamped_64 = 150,
+ op1_recipsqrt_64 = 151,
+ op1_recipsqrt_clamped_64 = 152,
+ op1_sqrt_64 = 153,
+ op1_flt_to_uint = 154,
+ op1_int_to_flt = 155,
+ op1_uint_to_flt = 156,
+ op2_bfm_int = 160,
+ op1_flt32_to_flt16 = 162,
+ op1_flt16_to_flt32 = 163,
+ op1_ubyte0_flt = 164,
+ op1_ubyte1_flt = 165,
+ op1_ubyte2_flt = 166,
+ op1_ubyte3_flt = 167,
+ op1_bcnt_int = 170,
+ op1_ffbh_uint = 171,
+ op1_ffbl_int = 172,
+ op1_ffbh_int = 173,
+ op1_flt_to_uint4 = 174,
+ op2_dot_ieee = 175,
+ op1_flt_to_int_rpi = 176,
+ op1_flt_to_int_floor = 177,
+ op2_mulhi_uint24 = 178,
+ op1_mbcnt_32hi_int = 179,
+ op1_offset_to_flt = 180,
+ op2_mul_uint24 = 181,
+ op1_bcnt_accum_prev_int = 182,
+ op1_mbcnt_32lo_accum_prev_int = 183,
+ op2_sete_64 = 184,
+ op2_setne_64 = 185,
+ op2_setgt_64 = 186,
+ op2_setge_64 = 187,
+ op2_min_64 = 188,
+ op2_max_64 = 189,
+ op2_dot4 = 190,
+ op2_dot4_ieee = 191,
+ op2_cube = 192,
+ op1_max4 = 193,
+ op1_frexp_64 = 196,
+ op1_ldexp_64 = 197,
+ op1_fract_64 = 198,
+ op2_pred_setgt_64 = 199,
+ op2_pred_sete_64 = 198,
+ op2_pred_setge_64 = 201,
+ OP2V_MUL_64 = 202,
+ op2_add_64 = 203,
+ op1_mova_int = 204,
+ op1v_flt64_to_flt32 = 205,
+ op1v_flt32_to_flt64 = 206,
+ op2_sad_accum_prev_uint = 207,
+ op2_dot = 208,
+ op2_mul_prev = 209,
+ op2_mul_ieee_prev = 210,
+ op2_add_prev = 211,
+ op2_muladd_prev = 212,
+ op2_muladd_ieee_prev = 213,
+ op2_interp_xy = 214,
+ op2_interp_zw = 215,
+ op2_interp_x = 216,
+ op2_interp_z = 217,
+ op0_store_flags = 218,
+ op1_load_store_flags = 219,
+ op0_lds_1a = 220,
+ op0_lds_1a1d = 221,
+ op0_lds_2a = 223,
+ op1_interp_load_p0 = 224,
+ op1_interp_load_p10 = 125,
+ op1_interp_load_p20 = 126,
+ // op 3 all left shift 6
+ op3_bfe_uint = 4<< 6,
+ op3_bfe_int = 5<< 6,
+ op3_bfi_int = 6<< 6,
+ op3_fma = 7<< 6,
+ op3_cndne_64 = 9<< 6,
+ op3_fma_64 = 10<< 6,
+ op3_lerp_uint = 11<< 6,
+ op3_bit_align_int = 12<< 6,
+ op3_byte_align_int = 13<< 6,
+ op3_sad_accum_uint = 14<< 6,
+ op3_sad_accum_hi_uint = 15<< 6,
+ op3_muladd_uint24 = 16<< 6,
+ op3_lds_idx_op = 17<< 6,
+ op3_muladd = 20<< 6,
+ op3_muladd_m2 = 21<< 6,
+ op3_muladd_m4 = 22<< 6,
+ op3_muladd_d2 = 23<< 6,
+ op3_muladd_ieee = 24<< 6,
+ op3_cnde = 25<< 6,
+ op3_cndgt = 26<< 6,
+ op3_cndge = 27<< 6,
+ op3_cnde_int = 28<< 6,
+ op3_cndgt_int = 29<< 6,
+ op3_cndge_int = 30<< 6,
+ op3_mul_lit = 31<< 6
+};
+
+
+
+using AluOpFlags=std::bitset<32>;
+
+struct AluOp {
+ static constexpr int x = 1;
+ static constexpr int y = 2;
+ static constexpr int z = 4;
+ static constexpr int w = 8;
+ static constexpr int v = 15;
+ static constexpr int t = 16;
+ static constexpr int a = 31;
+
+ AluOp(int ns, int f, int um, const char *n):
+ nsrc(ns), is_float(f), unit_mask(um), name(n)
+ {
+ }
+
+ bool can_channel(int flags) const {
+ return flags & unit_mask;
+ }
+
+ int nsrc: 4;
+ int is_float:1;
+ int unit_mask: 5;
+ const char *name;
+};
+
+extern const std::map<EAluOp, AluOp> alu_ops;
+
+enum AluInlineConstants {
+ ALU_SRC_LDS_OQ_A = 219,
+ ALU_SRC_LDS_OQ_B = 220,
+ ALU_SRC_LDS_OQ_A_POP = 221,
+ ALU_SRC_LDS_OQ_B_POP = 222,
+ ALU_SRC_LDS_DIRECT_A = 223,
+ ALU_SRC_LDS_DIRECT_B = 224,
+ ALU_SRC_TIME_HI = 227,
+ ALU_SRC_TIME_LO = 228,
+ ALU_SRC_MASK_HI = 229,
+ ALU_SRC_MASK_LO = 230,
+ ALU_SRC_HW_WAVE_ID = 231,
+ ALU_SRC_SIMD_ID = 232,
+ ALU_SRC_SE_ID = 233,
+ ALU_SRC_HW_THREADGRP_ID = 234,
+ ALU_SRC_WAVE_ID_IN_GRP = 235,
+ ALU_SRC_NUM_THREADGRP_WAVES = 236,
+ ALU_SRC_HW_ALU_ODD = 237,
+ ALU_SRC_LOOP_IDX = 238,
+ ALU_SRC_PARAM_BASE_ADDR = 240,
+ ALU_SRC_NEW_PRIM_MASK = 241,
+ ALU_SRC_PRIM_MASK_HI = 242,
+ ALU_SRC_PRIM_MASK_LO = 243,
+ ALU_SRC_1_DBL_L = 244,
+ ALU_SRC_1_DBL_M = 245,
+ ALU_SRC_0_5_DBL_L = 246,
+ ALU_SRC_0_5_DBL_M = 247,
+ ALU_SRC_0 = 248,
+ ALU_SRC_1 = 249,
+ ALU_SRC_1_INT = 250,
+ ALU_SRC_M_1_INT = 251,
+ ALU_SRC_0_5 = 252,
+ ALU_SRC_LITERAL = 253,
+ ALU_SRC_PV = 254,
+ ALU_SRC_PS = 255,
+ ALU_SRC_PARAM_BASE = 0x1C0,
+ ALU_SRC_UNKNOWN
+};
+
+struct AluInlineConstantDescr {
+ bool use_chan;
+ const char *descr;
+};
+
+extern const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const;
+
+enum ESDOp {
+ DS_OP_ADD = 0,
+ DS_OP_SUB = 1,
+ DS_OP_RSUB = 2,
+ DS_OP_INC = 3,
+ DS_OP_DEC = 4,
+ DS_OP_MIN_INT = 5,
+ DS_OP_MAX_INT = 6,
+ DS_OP_MIN_UINT = 7,
+ DS_OP_MAX_UINT = 8,
+ DS_OP_AND = 9,
+ DS_OP_OR = 10,
+ DS_OP_XOR = 11,
+ DS_OP_MSKOR = 12,
+ DS_OP_WRITE = 13,
+ DS_OP_WRITE_REL = 14,
+ DS_OP_WRITE2 = 15,
+ DS_OP_CMP_STORE = 16,
+ DS_OP_CMP_STORE_SPF = 17,
+ DS_OP_BYTE_WRITE = 18,
+ DS_OP_SHORT_WRITE = 19,
+ DS_OP_ADD_RET = 32,
+ DS_OP_SUB_RET = 33,
+ DS_OP_RSUB_RET = 34,
+ DS_OP_INC_RET = 35,
+ DS_OP_DEC_RET = 36,
+ DS_OP_MIN_INT_RET = 37,
+ DS_OP_MAX_INT_RET = 38,
+ DS_OP_MIN_UINT_RET = 39,
+ DS_OP_MAX_UINT_RET = 40,
+ DS_OP_AND_RET = 41,
+ DS_OP_OR_RET = 42,
+ DS_OP_XOR_RET = 43,
+ DS_OP_MSKOR_RET = 44,
+ DS_OP_XCHG_RET = 45,
+ DS_OP_XCHG_REL_RET = 46,
+ DS_OP_XCHG2_RET = 47,
+ DS_OP_CMP_XCHG_RET = 48,
+ DS_OP_CMP_XCHG_SPF_RET = 49,
+ DS_OP_READ_RET = 50,
+ DS_OP_READ_REL_RET = 51,
+ DS_OP_READ2_RET = 52,
+ DS_OP_READWRITE_RET = 53,
+ DS_OP_BYTE_READ_RET = 54,
+ DS_OP_UBYTE_READ_RET = 55,
+ DS_OP_SHORT_READ_RET = 56,
+ DS_OP_USHORT_READ_RET = 57,
+ DS_OP_ATOMIC_ORDERED_ALLOC_RET = 63,
+ DS_OP_INVALID = 64
+};
+
+struct LDSOp {
+ int nsrc;
+ const char *name;
+};
+
+extern const std::map<ESDOp, LDSOp> lds_ops;
+
+}
+
+#endif // ALU_DEFINES_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.cpp
new file mode 100644
index 000000000..681b89d86
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.cpp
@@ -0,0 +1,139 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_callstack.h"
+
+namespace r600 {
+
+CallStack::CallStack(r600_bytecode& bc):
+ m_bc(bc)
+{
+
+}
+
+CallStack::~CallStack()
+{
+}
+
+int CallStack::push(unsigned type)
+{
+ switch (type) {
+ case FC_PUSH_VPM:
+ ++m_bc.stack.push;
+ break;
+ case FC_PUSH_WQM:
+ ++m_bc.stack.push_wqm;
+ break;
+ case FC_LOOP:
+ ++m_bc.stack.loop;
+ break;
+ default:
+ assert(0);
+ }
+
+ return update_max_depth(type);
+}
+
+void CallStack::pop(unsigned type)
+{
+ switch(type) {
+ case FC_PUSH_VPM:
+ --m_bc.stack.push;
+ assert(m_bc.stack.push >= 0);
+ break;
+ case FC_PUSH_WQM:
+ --m_bc.stack.push_wqm;
+ assert(m_bc.stack.push_wqm >= 0);
+ break;
+ case FC_LOOP:
+ --m_bc.stack.loop;
+ assert(m_bc.stack.loop >= 0);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+int CallStack::update_max_depth(unsigned type)
+{
+
+ r600_stack_info& stack = m_bc.stack;
+ int elements;
+ int entries;
+
+ int entry_size = stack.entry_size;
+
+ elements = (stack.loop + stack.push_wqm ) * entry_size;
+ elements += stack.push;
+
+ switch (m_bc.chip_class) {
+ case R600:
+ case R700:
+ /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on
+ * the stack must be reserved to hold the current active/continue
+ * masks */
+ if (type == FC_PUSH_VPM || stack.push > 0) {
+ elements += 2;
+ }
+ break;
+ case CAYMAN:
+ /* r9xx: any stack operation on empty stack consumes 2 additional
+ * elements */
+ elements += 2;
+ break;
+ case EVERGREEN:
+ /* r8xx+: 2 extra elements are not always required, but one extra
+ * element must be added for each of the following cases:
+ * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest
+ * stack usage.
+ * (Currently we don't use ALU_ELSE_AFTER.)
+ * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM
+ * PUSH instruction executed.
+ *
+ * NOTE: it seems we also need to reserve additional element in some
+ * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader,
+ * then STACK_SIZE should be 2 instead of 1 */
+ if (type == FC_PUSH_VPM || stack.push > 0) {
+ elements += 1;
+ }
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ entry_size = 4;
+
+ entries = (elements + (entry_size - 1)) / entry_size;
+
+ if (entries > stack.max_entries)
+ stack.max_entries = entries;
+
+ return elements;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.h
new file mode 100644
index 000000000..e1babb7c1
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.h
@@ -0,0 +1,47 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_CALLSTACK_HH
+#define SFN_CALLSTACK_HH
+
+#include "gallium/drivers/r600/r600_asm.h"
+
+namespace r600 {
+
+class CallStack {
+public:
+ CallStack(r600_bytecode& bc);
+ ~CallStack();
+ int push(unsigned type);
+ void pop(unsigned type);
+ int update_max_depth(unsigned type);
+private:
+ r600_bytecode& m_bc;
+};
+
+}
+
+#endif // SFN_CALLSTACK_HH
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.cpp
new file mode 100644
index 000000000..ad9a03f8f
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.cpp
@@ -0,0 +1,195 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_conditionaljumptracker.h"
+#include "sfn_debug.h"
+
+#include <stack>
+#include <vector>
+#include <memory>
+#include <iostream>
+
+namespace r600 {
+
+using std::stack;
+using std::vector;
+using std::shared_ptr;
+
+struct StackFrame {
+
+ StackFrame(r600_bytecode_cf *s, JumpType t):
+ type(t),
+ start(s)
+ {}
+
+ virtual ~StackFrame();
+
+ JumpType type;
+ r600_bytecode_cf *start;
+ vector<r600_bytecode_cf *> mid;
+
+ virtual void fixup_mid(r600_bytecode_cf *cf) = 0;
+ virtual void fixup_pop(r600_bytecode_cf *final) = 0;
+};
+
+using PStackFrame = shared_ptr<StackFrame>;
+
+struct IfFrame : public StackFrame {
+ IfFrame(r600_bytecode_cf *s);
+ void fixup_mid(r600_bytecode_cf *cf) override;
+ void fixup_pop(r600_bytecode_cf *final) override;
+};
+
+struct LoopFrame : public StackFrame {
+ LoopFrame(r600_bytecode_cf *s);
+ void fixup_mid(r600_bytecode_cf *cf) override;
+ void fixup_pop(r600_bytecode_cf *final) override;
+};
+
+struct ConditionalJumpTrackerImpl {
+ ConditionalJumpTrackerImpl();
+ stack<PStackFrame> m_jump_stack;
+ stack<PStackFrame> m_loop_stack;
+ int m_current_loop_stack_pos;
+};
+
+ConditionalJumpTrackerImpl::ConditionalJumpTrackerImpl():
+ m_current_loop_stack_pos(0)
+{
+
+}
+
+ConditionalJumpTracker::~ConditionalJumpTracker()
+{
+ delete impl;
+}
+
+ConditionalJumpTracker::ConditionalJumpTracker()
+{
+ impl = new ConditionalJumpTrackerImpl();
+}
+
+void ConditionalJumpTracker::push(r600_bytecode_cf *start, JumpType type)
+{
+ PStackFrame f;
+ switch (type) {
+ case jt_if:
+ f.reset(new IfFrame(start));
+ break;
+ case jt_loop:
+ f.reset(new LoopFrame(start));
+ impl->m_loop_stack.push(f);
+ break;
+ }
+ impl->m_jump_stack.push(f);
+}
+
+bool ConditionalJumpTracker::pop(r600_bytecode_cf *final, JumpType type)
+{
+ if (impl->m_jump_stack.empty())
+ return false;
+
+ auto& frame = *impl->m_jump_stack.top();
+ if (frame.type != type)
+ return false;
+
+ frame.fixup_pop(final);
+ if (frame.type == jt_loop)
+ impl->m_loop_stack.pop();
+ impl->m_jump_stack.pop();
+ return true;
+}
+
+bool ConditionalJumpTracker::add_mid(r600_bytecode_cf *source, JumpType type)
+{
+ if (impl->m_jump_stack.empty()) {
+ sfn_log << "Jump stack empty\n";
+ return false;
+ }
+
+ PStackFrame pframe;
+ if (type == jt_loop) {
+ if (impl->m_loop_stack.empty()) {
+ sfn_log << "Loop jump stack empty\n";
+ return false;
+ }
+ pframe = impl->m_loop_stack.top();
+ } else {
+ pframe = impl->m_jump_stack.top();
+ }
+
+ pframe->mid.push_back(source);
+ pframe->fixup_mid(source);
+ return true;
+}
+
+IfFrame::IfFrame(r600_bytecode_cf *s):
+ StackFrame (s, jt_if)
+{
+}
+
+StackFrame::~StackFrame()
+{
+}
+
+void IfFrame::fixup_mid(r600_bytecode_cf *source)
+{
+ /* JUMP target is ELSE */
+ start->cf_addr = source->id;
+}
+
+void IfFrame::fixup_pop(r600_bytecode_cf *final)
+{
+ /* JUMP or ELSE target is one past last CF instruction */
+ unsigned offset = final->eg_alu_extended ? 4 : 2;
+ auto src = mid.empty() ? start : mid[0];
+ src->cf_addr = final->id + offset;
+ src->pop_count = 1;
+}
+
+LoopFrame::LoopFrame(r600_bytecode_cf *s):
+ StackFrame(s, jt_loop)
+{
+}
+
+void LoopFrame::fixup_mid(UNUSED r600_bytecode_cf *mid)
+{
+}
+
+void LoopFrame::fixup_pop(r600_bytecode_cf *final)
+{
+ /* LOOP END address is past LOOP START */
+ final->cf_addr = start->id + 2;
+
+ /* LOOP START address is past LOOP END*/
+ start->cf_addr = final->id + 2;
+
+ /* BREAK and CONTINUE point at LOOP END*/
+ for (auto m : mid)
+ m->cf_addr = final->id;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h
new file mode 100644
index 000000000..76cc02a27
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h
@@ -0,0 +1,69 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_CONDITIONALJUMPTRACKER_H
+#define SFN_CONDITIONALJUMPTRACKER_H
+
+#include "gallium/drivers/r600/r600_asm.h"
+
+namespace r600 {
+
+enum JumpType {
+ jt_loop,
+ jt_if
+};
+
+/**
+ Class to link the jump locations
+
+*/
+
+
+class ConditionalJumpTracker
+{
+public:
+ ConditionalJumpTracker();
+ ~ConditionalJumpTracker();
+
+ /* Mark the start of a loop or a if/else */
+
+ void push(r600_bytecode_cf *start, JumpType type);
+
+ /* Mark the end of a loop or a if/else and fixup the jump sites */
+ bool pop(r600_bytecode_cf *final, JumpType type);
+
+ /* Add middle sites to the call frame i.e. continue,
+ * break inside loops, and else in if-then-else constructs.
+ */
+ bool add_mid(r600_bytecode_cf *source, JumpType type);
+
+private:
+ struct ConditionalJumpTrackerImpl * impl;
+};
+
+}
+
+#endif // SFN_CONDITIONALJUMPTRACKER_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.cpp
new file mode 100644
index 000000000..d993d42af
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.cpp
@@ -0,0 +1,139 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/u_debug.h"
+#include "sfn_debug.h"
+
+namespace r600 {
+
+class stderr_streambuf : public std::streambuf
+{
+public:
+ stderr_streambuf();
+protected:
+ int sync();
+ int overflow(int c);
+ std::streamsize xsputn ( const char *s, std::streamsize n );
+};
+
+stderr_streambuf::stderr_streambuf()
+{
+
+}
+
+int stderr_streambuf::sync()
+{
+ fflush(stderr);
+ return 0;
+}
+
+int stderr_streambuf::overflow(int c)
+{
+ fputc(c, stderr);
+ return 0;
+}
+
+static const struct debug_named_value sfn_debug_options[] = {
+ {"instr", SfnLog::instr, "Log all consumed nir instructions"},
+ {"ir", SfnLog::r600ir, "Log created R600 IR"},
+ {"cc", SfnLog::cc, "Log R600 IR to assembly code creation"},
+ {"noerr", SfnLog::err, "Don't log shader conversion errors"},
+ {"si", SfnLog::shader_info, "Log shader info (non-zero values)"},
+ {"ts", SfnLog::test_shader, "Log shaders in tests"},
+ {"reg", SfnLog::reg, "Log register allocation and lookup"},
+ {"io", SfnLog::io, "Log shader in and output"},
+ {"ass", SfnLog::assembly, "Log IR to assembly conversion"},
+ {"flow", SfnLog::flow, "Log Flow instructions"},
+ {"merge", SfnLog::merge, "Log register merge operations"},
+ {"nomerge", SfnLog::nomerge, "Skip register merge step"},
+ {"tex", SfnLog::tex, "Log texture ops"},
+ {"trans", SfnLog::trans, "Log generic translation messages"},
+ DEBUG_NAMED_VALUE_END
+};
+
+SfnLog sfn_log;
+
+std::streamsize stderr_streambuf::xsputn ( const char *s, std::streamsize n )
+{
+ std::streamsize i = n;
+ while (i--)
+ fputc(*s++, stderr);
+ return n;
+}
+
+SfnLog::SfnLog():
+ m_active_log_flags(0),
+ m_log_mask(0),
+ m_output(new stderr_streambuf())
+{
+ m_log_mask = debug_get_flags_option("R600_NIR_DEBUG", sfn_debug_options, 0);
+ m_log_mask ^= err;
+}
+
+SfnLog& SfnLog::operator << (SfnLog::LogFlag const l)
+{
+ m_active_log_flags = l;
+ return *this;
+}
+
+SfnLog& SfnLog::operator << (UNUSED std::ostream & (*f)(std::ostream&))
+{
+ if (m_active_log_flags & m_log_mask)
+ m_output << f;
+ return *this;
+}
+
+SfnLog& SfnLog::operator << (nir_shader& sh)
+{
+ if (m_active_log_flags & m_log_mask)
+ nir_print_shader(&sh, stderr);
+ return *this;
+}
+
+SfnLog& SfnLog::operator << (nir_instr &instr)
+{
+ if (m_active_log_flags & m_log_mask)
+ nir_print_instr(&instr, stderr);
+ return *this;
+}
+
+SfnTrace::SfnTrace(SfnLog::LogFlag flag, const char *msg):
+ m_flag(flag),
+ m_msg(msg)
+{
+ sfn_log << m_flag << std::string(" ", 2 * m_indention++)
+ << "BEGIN: " << m_msg << "\n";
+}
+
+SfnTrace::~SfnTrace()
+{
+ sfn_log << m_flag << std::string(" ", 2 * m_indention--)
+ << "END: " << m_msg << "\n";
+}
+
+int SfnTrace::m_indention = 0;
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.h
new file mode 100644
index 000000000..372379c66
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.h
@@ -0,0 +1,121 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_STDERR_STREAMLOG_H
+#define SFN_STDERR_STREAMLOG_H
+
+
+#include <streambuf>
+#include <ostream>
+#include <fstream>
+#include "compiler/nir/nir.h"
+
+namespace r600 {
+/* Implement some logging for shader-from-nir
+
+*/
+
+class SfnLog {
+public:
+ enum LogFlag {
+ instr = 1 << 0,
+ r600ir = 1 << 1,
+ cc = 1 << 2,
+ err = 1 << 3,
+ shader_info = 1 << 4,
+ test_shader = 1 << 5,
+ reg = 1 << 6,
+ io = 1 << 7,
+ assembly = 1 << 8,
+ flow = 1 << 9,
+ merge = 1 << 10,
+ tex = 1 << 11,
+ trans = 1 << 12,
+ all = (1 << 13) - 1,
+ nomerge = 1 << 16,
+ };
+
+ SfnLog();
+
+ /** a special handling to set the output level "inline"
+ \param l the level of the following messages
+ */
+ SfnLog& operator << (LogFlag const l);
+
+ /* general output routine; output is only given, if the log flags and the
+ * currently active log mask overlap
+ \returns a reference to this object
+ */
+ template <class T>
+ SfnLog& operator << (const T& text)
+ {
+ if (m_active_log_flags & m_log_mask)
+ m_output << text;
+
+ return *this;
+ }
+
+ /* A funny construct to enable std::endl to work on this stream
+ idea of Dave Brondsema:
+ http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8567
+ */
+ SfnLog& operator << (std::ostream & (*f)(std::ostream&));
+
+ SfnLog& operator << (nir_shader &sh);
+
+ SfnLog& operator << (nir_instr& instr);
+
+ int has_debug_flag(uint64_t flag) {
+ return (m_log_mask & flag) == flag;
+ }
+
+private:
+ uint64_t m_active_log_flags;
+ uint64_t m_log_mask;
+ std::ostream m_output;
+};
+
+class SfnTrace {
+public:
+ SfnTrace(SfnLog::LogFlag flag, const char *msg);
+ ~SfnTrace();
+private:
+ SfnLog::LogFlag m_flag;
+ const char *m_msg;
+ static int m_indention;
+};
+
+
+#ifndef NDEBUG
+#define SFN_TRACE_FUNC(LEVEL, MSG) SfnTrace __trace(LEVEL, MSG)
+#else
+#define SFN_TRACE_FUNC(LEVEL, MSG)
+#endif
+
+extern SfnLog sfn_log;
+
+}
+#endif // SFN_STDERR_STREAMBUF_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_defines.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_defines.h
new file mode 100644
index 000000000..31a10ae2f
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_defines.h
@@ -0,0 +1,318 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef sfn_defines_h
+#define sfn_defines_h
+
+#include "../r600_isa.h"
+#include "amd_family.h"
+namespace r600 {
+
+
+enum EGWSOpCode {
+ cf_sema_v = 0,
+ cf_sema_p = 1,
+ cf_gws_barrier = 2,
+ cf_gws_init = 3,
+};
+
+/* CF ALU instructions [29:26], highest bit always set. */
+enum ECFAluOpCode {
+ cf_alu_undefined = 0,
+ cf_alu = CF_OP_ALU,
+ cf_alu_push_before = CF_OP_ALU_PUSH_BEFORE,
+ cf_alu_pop_after = CF_OP_ALU_POP_AFTER,
+ cf_alu_pop2_after = CF_OP_ALU_POP2_AFTER,
+ cf_alu_extended = CF_OP_ALU_EXT,
+ cf_alu_continue = CF_OP_ALU_CONTINUE,
+ cf_alu_break = CF_OP_ALU_BREAK,
+ cf_alu_else_after = CF_OP_ALU_ELSE_AFTER,
+};
+
+enum ECFAluOpCodeEG {
+ eg_cf_alu_undefined = 0,
+ eg_cf_alu = 8,
+ eg_cf_alu_push_before = 9,
+ eg_cf_alu_pop_after = 10,
+ eg_cf_alu_pop2_after = 11,
+ eg_cf_alu_extended = 12,
+ eg_cf_alu_continue = 13,
+ eg_cf_alu_break = 14,
+ eg_cf_alu_else_after = 15,
+};
+
+
+enum ECFOpCode {
+ cf_nop = CF_OP_NOP,
+ cf_tc = CF_OP_TEX,
+ cf_vc = CF_OP_VTX,
+ cf_gds = CF_OP_GDS,
+ cf_loop_start = CF_OP_LOOP_START,
+ cf_loop_end = CF_OP_LOOP_END,
+ cf_loop_start_dx10 = CF_OP_LOOP_START_DX10,
+ cf_loop_start_no_al = CF_OP_LOOP_START_NO_AL,
+ cf_loop_continue = CF_OP_LOOP_CONTINUE,
+ cf_loop_break = CF_OP_LOOP_BREAK,
+ cf_jump = CF_OP_JUMP,
+ cf_push = CF_OP_PUSH,
+ cf_else = CF_OP_ELSE,
+ cf_pop = CF_OP_POP,
+ /* 15 - 17 reserved */
+ cf_call = CF_OP_CALL,
+ cf_call_fs = CF_OP_CALL_FS,
+ cf_return = CF_OP_RET,
+ cf_emit_vertex = CF_OP_EMIT_VERTEX,
+ cf_emit_cut_vertex = CF_OP_EMIT_CUT_VERTEX,
+ cf_cut_vertex = CF_OP_CUT_VERTEX,
+ cf_kill = CF_OP_KILL,
+ /* 25 reserved */
+ cf_wait_ack = CF_OP_WAIT_ACK,
+ cf_tc_ack = CF_OP_TEX_ACK,
+ cf_vc_ack = CF_OP_VTX_ACK,
+ cf_jump_table = CF_OP_JUMPTABLE,
+ cf_global_wave_sync = CF_OP_WAVE_SYNC,
+ cf_halt = CF_OP_HALT,
+ /* gap 32-63*/
+ cf_mem_stream0_buf0 = CF_OP_MEM_STREAM0_BUF0,
+ cf_mem_stream0_buf1 = CF_OP_MEM_STREAM0_BUF1,
+ cf_mem_stream0_buf2 = CF_OP_MEM_STREAM0_BUF2,
+ cf_mem_stream0_buf3 = CF_OP_MEM_STREAM0_BUF3,
+
+ cf_mem_stream1_buf0 = CF_OP_MEM_STREAM1_BUF0,
+ cf_mem_stream1_buf1 = CF_OP_MEM_STREAM1_BUF1,
+ cf_mem_stream1_buf2 = CF_OP_MEM_STREAM1_BUF2,
+ cf_mem_stream1_buf3 = CF_OP_MEM_STREAM1_BUF3,
+
+ cf_mem_stream2_buf0 = CF_OP_MEM_STREAM2_BUF0,
+ cf_mem_stream2_buf1 = CF_OP_MEM_STREAM2_BUF1,
+ cf_mem_stream2_buf2 = CF_OP_MEM_STREAM2_BUF2,
+ cf_mem_stream2_buf3 = CF_OP_MEM_STREAM2_BUF3,
+
+ cf_mem_stream3_buf0 = CF_OP_MEM_STREAM3_BUF0,
+ cf_mem_stream3_buf1 = CF_OP_MEM_STREAM3_BUF1,
+ cf_mem_stream3_buf2 = CF_OP_MEM_STREAM3_BUF2,
+ cf_mem_stream3_buf3 = CF_OP_MEM_STREAM3_BUF3,
+
+ cf_mem_write_scratch = CF_OP_MEM_SCRATCH ,
+ /* reserved 81 */
+ cf_mem_ring = CF_OP_MEM_RING,
+ cf_export = CF_OP_EXPORT,
+ cf_export_done = CF_OP_EXPORT_DONE,
+ cf_mem_export = CF_OP_MEM_EXPORT,
+ cf_mem_rat = CF_OP_MEM_RAT,
+ cf_mem_rat_cacheless = CF_OP_MEM_RAT_NOCACHE,
+
+ cf_mem_ring1 = CF_OP_MEM_RING1,
+ cf_mem_ring2 = CF_OP_MEM_RING2,
+ cf_mem_ring3 = CF_OP_MEM_RING3,
+ cf_mem_export_combined = CF_OP_MEM_MEM_COMBINED,
+ cf_mem_rat_combined_cacheless = CF_OP_MEM_RAT_COMBINED_NOCACHE
+
+};
+
+enum ECFOpCodeEG {
+ eg_cf_nop = 0,
+ eg_cf_tc = 1,
+ eg_cf_vc = 2,
+ eg_cf_gds = 3,
+ eg_cf_loop_start = 4,
+ eg_cf_loop_end = 5,
+ eg_cf_loop_start_dx10 = 6,
+ eg_cf_loop_start_no_al = 7,
+ eg_cf_loop_continue = 8,
+ eg_cf_loop_break = 9,
+ eg_cf_jump = 10,
+ eg_cf_push = 11,
+ eg_cf_else = 13,
+ eg_cf_pop = 14,
+ /* 15 - 17 reserved */
+ eg_cf_call = 18,
+ eg_cf_call_fs,
+ eg_cf_return,
+ eg_cf_emit_vertex,
+ eg_cf_emit_cut_vertex,
+ eg_cf_cut_vertex,
+ eg_cf_kill,
+ /* 25 reserved */
+ eg_cf_wait_ack = 26,
+ eg_cf_tc_ack,
+ eg_cf_vc_ack,
+ eg_cf_jump_table,
+ eg_cf_global_wave_sync,
+ eg_cf_halt,
+ /* gap 32-63*/
+ eg_cf_mem_stream0_buf0 = 64,
+ eg_cf_mem_stream0_buf1,
+ eg_cf_mem_stream0_buf2,
+ eg_cf_mem_stream0_buf3,
+
+ eg_cf_mem_stream1_buf0,
+ eg_cf_mem_stream1_buf1,
+ eg_cf_mem_stream1_buf2,
+ eg_cf_mem_stream1_buf3,
+
+ eg_cf_mem_stream2_buf0,
+ eg_cf_mem_stream2_buf1,
+ eg_cf_mem_stream2_buf2,
+ eg_cf_mem_stream2_buf3,
+
+ eg_cf_mem_stream3_buf0,
+ eg_cf_mem_stream3_buf1,
+ eg_cf_mem_stream3_buf2,
+ eg_cf_mem_stream3_buf3,
+
+ eg_cf_mem_write_scratch,
+ /* reserved 81 */
+ eg_cf_mem_ring = 82,
+ eg_cf_export,
+ eg_cf_export_done,
+ eg_cf_mem_export,
+ eg_cf_mem_rat,
+ eg_cf_mem_rat_cacheless,
+
+ eg_cf_mem_ring1,
+ eg_cf_mem_ring2,
+ eg_cf_mem_ring3,
+ eg_cf_mem_export_combined,
+ eg_cf_mem_rat_combined_cacheless
+};
+
+
+enum EVFetchInstr {
+ vc_fetch = FETCH_OP_VFETCH,
+ vc_semantic = FETCH_OP_SEMFETCH,
+ vc_get_buf_resinfo = FETCH_OP_GET_BUFFER_RESINFO,
+ vc_read_scratch = FETCH_OP_READ_SCRATCH,
+ vc_unknown
+};
+
+enum EVFetchType {
+ vertex_data = 0,
+ instance_data = 1,
+ no_index_offset = 2
+};
+
+enum EVTXDataFormat {
+ fmt_invalid = 0,
+ fmt_8 = 1,
+ fmt_4_4 = 2,
+ fmt_3_3_2 = 3,
+ fmt_reserved_4 = 4,
+ fmt_16 = 5,
+ fmt_16_float = 6,
+ fmt_8_8 = 7,
+ fmt_5_6_5 = 8,
+ fmt_6_5_5 = 9,
+ fmt_1_5_5_5 = 10,
+ fmt_4_4_4_4 = 11,
+ fmt_5_5_5_1 = 12,
+ fmt_32 = 13,
+ fmt_32_float = 14,
+ fmt_16_16 = 15,
+ fmt_16_16_float = 16,
+ fmt_8_24 = 17,
+ fmt_8_24_float = 18,
+ fmt_24_8 = 19,
+ fmt_24_8_float = 20,
+ fmt_10_11_11 = 21,
+ fmt_10_11_11_float = 22,
+ fmt_11_11_10 = 23,
+ fmt_11_11_10_float = 24,
+ fmt_2_10_10_10 = 25,
+ fmt_8_8_8_8 = 26,
+ fmt_10_10_10_2 = 27,
+ fmt_x24_8_32_float = 28,
+ fmt_32_32 = 29,
+ fmt_32_32_float = 30,
+ fmt_16_16_16_16 = 31,
+ fmt_16_16_16_16_float = 32,
+ fmt_reserved_33 = 33,
+ fmt_32_32_32_32 = 34,
+ fmt_32_32_32_32_float = 35,
+ fmt_reserved_36 = 36,
+ fmt_1 = 37,
+ fmt_1_reversed = 38,
+ fmt_gb_gr = 39,
+ fmt_bg_rg = 40,
+ fmt_32_as_8 = 41,
+ fmt_32_as_8_8 = 42,
+ fmt_5_9_9_9_sharedexp = 43,
+ fmt_8_8_8 = 44,
+ fmt_16_16_16 = 45,
+ fmt_16_16_16_float = 46,
+ fmt_32_32_32 = 47,
+ fmt_32_32_32_float = 48,
+ fmt_bc1 = 49,
+ fmt_bc2 = 50,
+ fmt_bc3 = 51,
+ fmt_bc4 = 52,
+ fmt_bc5 = 53,
+ fmt_apc0 = 54,
+ fmt_apc1 = 55,
+ fmt_apc2 = 56,
+ fmt_apc3 = 57,
+ fmt_apc4 = 58,
+ fmt_apc5 = 59,
+ fmt_apc6 = 60,
+ fmt_apc7 = 61,
+ fmt_ctx1 = 62,
+ fmt_reserved_63 = 63
+};
+
+enum EVFetchNumFormat {
+ vtx_nf_norm = 0,
+ vtx_nf_int = 1,
+ vtx_nf_scaled = 2
+};
+
+enum EVFetchEndianSwap {
+ vtx_es_none = 0,
+ vtx_es_8in16 = 1,
+ vtx_es_8in32 = 2
+};
+
+enum EVFetchFlagShift {
+ vtx_fetch_whole_quad,
+ vtx_use_const_field,
+ vtx_format_comp_signed,
+ vtx_srf_mode,
+ vtx_buf_no_stride,
+ vtx_alt_const,
+ vtx_use_tc,
+ vtx_vpm,
+ vtx_unknown
+};
+
+enum EBufferIndexMode {
+ bim_none,
+ bim_zero,
+ bim_one,
+ bim_invalid
+};
+
+}
+
+#endif // DEFINES_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_docu.txt b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_docu.txt
new file mode 100644
index 000000000..97a9c3658
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_docu.txt
@@ -0,0 +1,45 @@
+# R600 shader from NIR
+
+This code is an attempt to implement a NIR backend for r600.
+
+## State
+
+Supported hardware: Evergreen and NI (tested on CEDAR and BARTS)
+
+Thanks to soft fp64 the OpenGL version is now 4.5
+
+sb has been enabled for nir to be able to run some more demanding work loads. The aim is
+still to get rid of it.
+
+
+piglits gpu passes mostly like with TGSI, there are some fixes but also a few regressions.
+
+CTS gles
+ - 2 passes like with TGSI
+ - 3 no regressions, a few fixes compared to TGSI
+ - 31
+ * a few fixes with interpolation specifiers
+ * synchronization has some unstable tests, this might be because global synchronization is missing (in both)
+
+GL CTS:
+ * a few regressions and a hang with KHR-GL43.compute_shader.shared-max
+
+piglit:
+ * spilling arrays is broken on Barts (but it works on Cedar)
+ * a few tests fail because the register limit is exhausted, and needlessly so, because
+ with better RA it would work
+
+## Needed optimizations:
+
+ - Register allocator and scheduler (Could the sb allocator and scheduler
+ be ported?)
+
+ - peepholes:
+ - compare + set predicate
+
+ - copy propagation:
+ - Moves from inputs are usually not required, they could be forwarded
+ - texture operations often move additional parameters in extra registers
+ but they are actually needed in the same registers they come from and
+ could just be swizzled into the right place
+ (lower in NIR like it is done in e.g. in ETNAVIV)
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp
new file mode 100644
index 000000000..44e43c1b5
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp
@@ -0,0 +1,985 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "sfn_emitaluinstruction.h"
+#include "sfn_debug.h"
+
+#include "gallium/drivers/r600/r600_shader.h"
+
+namespace r600 {
+
+using std::vector;
+
+EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor):
+ EmitInstruction (processor)
+{
+
+}
+
+bool EmitAluInstruction::do_emit(nir_instr* ir)
+{
+ const nir_alu_instr& instr = *nir_instr_as_alu(ir);
+
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *ir
+ << " bitsize: " << static_cast<int>(instr.dest.dest.ssa.bit_size)
+ << "' (" << __func__ << ")\n";
+
+ preload_src(instr);
+
+ switch (instr.op) {
+ /* These are in the ALU instruction list, but they should be texture instructions */
+ case nir_op_b2b1: return emit_mov(instr);
+ case nir_op_b2b32: return emit_mov(instr);
+ case nir_op_b2f32: return emit_alu_b2f(instr);
+ case nir_op_b2i32: return emit_b2i32(instr);
+ case nir_op_b32all_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
+ case nir_op_b32all_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
+ case nir_op_b32all_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
+ case nir_op_b32all_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
+ case nir_op_b32all_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
+ case nir_op_b32all_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
+ case nir_op_b32any_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
+ case nir_op_b32any_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
+ case nir_op_b32any_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
+ case nir_op_b32any_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
+ case nir_op_b32any_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
+ case nir_op_b32any_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
+ case nir_op_b32csel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1});
+ case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
+ case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
+ case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
+ case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
+ case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
+ case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
+ case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
+ case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
+ case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
+ case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
+ case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
+ case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
+ case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1});
+ case nir_op_bfm: return emit_alu_op2_int(instr, op2_bfm_int);
+ case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int);
+
+ case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int);
+ case nir_op_bitfield_select: return emit_alu_op3(instr, op3_bfi_int);
+ case nir_op_cube_r600: return emit_cube(instr);
+ case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10);
+ case nir_op_f2b32: return emit_alu_f2b32(instr);
+ case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int);
+ case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint);
+ case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs});
+ case nir_op_fadd: return emit_alu_op2(instr, op2_add);
+ case nir_op_fceil: return emit_alu_op1(instr, op1_ceil);
+ case nir_op_fcos_r600: return emit_alu_trans_op1(instr, op1_cos);
+ case nir_op_fcsel: return emit_alu_op3(instr, op3_cnde, {0, 2, 1});
+ case nir_op_fcsel_ge: return emit_alu_op3(instr, op3_cndge, {0, 1, 2});
+ case nir_op_fcsel_gt: return emit_alu_op3(instr, op3_cndgt, {0, 1, 2});
+
+ /* These are in the ALU instruction list, but they should be texture instructions */
+ case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
+ case nir_op_fddx_coarse: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
+ case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true);
+ case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false);
+ case nir_op_fddy_coarse:
+ case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v, true);
+ case nir_op_fdot2: return emit_dot(instr, 2);
+ case nir_op_fdot3: return emit_dot(instr, 3);
+ case nir_op_fdot4: return emit_dot(instr, 4);
+ case nir_op_fdph: return emit_fdph(instr);
+ case nir_op_feq32: return emit_alu_op2(instr, op2_sete_dx10);
+ case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10);
+ case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee);
+ case nir_op_ffloor: return emit_alu_op1(instr, op1_floor);
+ case nir_op_ffma: return emit_alu_op3(instr, op3_muladd_ieee);
+ case nir_op_ffract: return emit_alu_op1(instr, op1_fract);
+ case nir_op_fge32: return emit_alu_op2(instr, op2_setge_dx10);
+ case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10);
+ case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int);
+ case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped);
+ case nir_op_flt32: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
+ case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
+ case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10);
+ case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10);
+ case nir_op_fmul: return emit_alu_op2(instr, op2_mul_ieee);
+ case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg});
+ case nir_op_fneu32: return emit_alu_op2(instr, op2_setne_dx10);
+ case nir_op_fneu: return emit_alu_op2(instr, op2_setne_dx10);
+ case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee);
+ case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne);
+ case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1);
+ case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp});
+ case nir_op_fsin_r600: return emit_alu_trans_op1(instr, op1_sin);
+ case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee);
+ case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1);
+ case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc);
+ case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int);
+ case nir_op_i2b32: return emit_alu_i2orf2_b1(instr, op2_setne_int);
+ case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt);
+ case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int);
+ case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int);
+ case nir_op_ibfe: return emit_alu_op3(instr, op3_bfe_int);
+ case nir_op_i32csel_ge: return emit_alu_op3(instr, op3_cndge_int, {0, 1, 2});
+ case nir_op_i32csel_gt: return emit_alu_op3(instr, op3_cndgt_int, {0, 1, 2});
+ case nir_op_ieq32: return emit_alu_op2_int(instr, op2_sete_int);
+ case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int);
+ case nir_op_ifind_msb_rev: return emit_alu_op1(instr, op1_ffbh_int);
+ case nir_op_ige32: return emit_alu_op2_int(instr, op2_setge_int);
+ case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int);
+ case nir_op_ilt32: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
+ case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
+ case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int);
+ case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int);
+ case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int);
+ case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int);
+ case nir_op_ine32: return emit_alu_op2_int(instr, op2_setne_int);
+ case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int);
+ case nir_op_ineg: return emit_alu_ineg(instr);
+ case nir_op_inot: return emit_alu_op1(instr, op1_not_int);
+ case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int);
+ case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int);
+ case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int);
+ case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int);
+ case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int);
+ case nir_op_mov:return emit_mov(instr);
+ case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr);
+ case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr);
+ case nir_op_slt: return emit_alu_op2(instr, op2_setgt, op2_opt_reverse);
+ case nir_op_sge: return emit_alu_op2(instr, op2_setge);
+ case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt);
+ case nir_op_ubfe: return emit_alu_op3(instr, op3_bfe_uint);
+ case nir_op_ufind_msb_rev: return emit_alu_op1(instr, op1_ffbh_uint);
+ case nir_op_uge32: return emit_alu_op2_int(instr, op2_setge_uint);
+ case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint);
+ case nir_op_ult32: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
+ case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
+ case nir_op_umad24: return emit_alu_op3(instr, op3_muladd_uint24, {0, 1, 2});
+ case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint);
+ case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint);
+ case nir_op_umul24: return emit_alu_op2(instr, op2_mul_uint24);
+ case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint);
+ case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0);
+ case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1);
+ case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr);
+ case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr);
+ case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int);
+ case nir_op_vec2: return emit_create_vec(instr, 2);
+ case nir_op_vec3: return emit_create_vec(instr, 3);
+ case nir_op_vec4: return emit_create_vec(instr, 4);
+ default:
+ return false;
+ }
+}
+
+void EmitAluInstruction::preload_src(const nir_alu_instr& instr)
+{
+ const nir_op_info *op_info = &nir_op_infos[instr.op];
+ assert(op_info->num_inputs <= 4);
+
+ unsigned nsrc_comp = num_src_comp(instr);
+ sfn_log << SfnLog::reg << "Preload:\n";
+ for (unsigned i = 0; i < op_info->num_inputs; ++i) {
+ for (unsigned c = 0; c < nsrc_comp; ++c) {
+ m_src[i][c] = from_nir(instr.src[i], c);
+ sfn_log << SfnLog::reg << " " << *m_src[i][c];
+
+ }
+ sfn_log << SfnLog::reg << "\n";
+ }
+ if (instr.op == nir_op_fdph) {
+ m_src[1][3] = from_nir(instr.src[1], 3);
+ sfn_log << SfnLog::reg << " extra:" << *m_src[1][3] << "\n";
+ }
+
+ split_constants(instr, nsrc_comp);
+}
+
+unsigned EmitAluInstruction::num_src_comp(const nir_alu_instr& instr)
+{
+ switch (instr.op) {
+ case nir_op_fdot2:
+ case nir_op_bany_inequal2:
+ case nir_op_ball_iequal2:
+ case nir_op_bany_fnequal2:
+ case nir_op_ball_fequal2:
+ case nir_op_b32any_inequal2:
+ case nir_op_b32all_iequal2:
+ case nir_op_b32any_fnequal2:
+ case nir_op_b32all_fequal2:
+ case nir_op_unpack_64_2x32_split_y:
+ return 2;
+
+ case nir_op_fdot3:
+ case nir_op_bany_inequal3:
+ case nir_op_ball_iequal3:
+ case nir_op_bany_fnequal3:
+ case nir_op_ball_fequal3:
+ case nir_op_b32any_inequal3:
+ case nir_op_b32all_iequal3:
+ case nir_op_b32any_fnequal3:
+ case nir_op_b32all_fequal3:
+ case nir_op_cube_r600:
+ return 3;
+
+ case nir_op_fdot4:
+ case nir_op_fdph:
+ case nir_op_bany_inequal4:
+ case nir_op_ball_iequal4:
+ case nir_op_bany_fnequal4:
+ case nir_op_ball_fequal4:
+ case nir_op_b32any_inequal4:
+ case nir_op_b32all_iequal4:
+ case nir_op_b32any_fnequal4:
+ case nir_op_b32all_fequal4:
+ return 4;
+
+ case nir_op_vec2:
+ case nir_op_vec3:
+ case nir_op_vec4:
+ return 1;
+
+ default:
+ return nir_dest_num_components(instr.dest.dest);
+
+ }
+}
+
+bool EmitAluInstruction::emit_cube(const nir_alu_instr& instr)
+{
+ AluInstruction *ir = nullptr;
+ const uint16_t src0_chan[4] = {2, 2, 0, 1};
+ const uint16_t src1_chan[4] = {1, 0, 2, 2};
+
+ for (int i = 0; i < 4; ++i) {
+ ir = new AluInstruction(op2_cube, from_nir(instr.dest, i),
+ from_nir(instr.src[0], src0_chan[i]),
+ from_nir(instr.src[0], src1_chan[i]), {alu_write});
+ emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+void EmitAluInstruction::split_constants(const nir_alu_instr& instr, unsigned nsrc_comp)
+{
+ const nir_op_info *op_info = &nir_op_infos[instr.op];
+ if (op_info->num_inputs < 2)
+ return;
+
+ int nconst = 0;
+ std::array<const UniformValue *,4> c;
+ std::array<int,4> idx;
+ for (unsigned i = 0; i < op_info->num_inputs; ++i) {
+ PValue& src = m_src[i][0];
+ assert(src);
+ sfn_log << SfnLog::reg << "Split test " << *src;
+
+ if (src->type() == Value::kconst) {
+ c[nconst] = static_cast<const UniformValue *>(src.get());
+ idx[nconst++] = i;
+ sfn_log << SfnLog::reg << " is constant " << i;
+ }
+ sfn_log << SfnLog::reg << "\n";
+ }
+
+ if (nconst < 2)
+ return;
+
+ unsigned sel = c[0]->sel();
+ unsigned kcache = c[0]->kcache_bank();
+ sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ;
+
+ for (int i = 1; i < nconst; ++i) {
+ sfn_log << "sel[" << i << "] = " << c[i]->sel() << "\n";
+ if (c[i]->sel() != sel || c[i]->kcache_bank() != kcache) {
+ AluInstruction *ir = nullptr;
+ auto v = get_temp_vec4();
+ for (unsigned k = 0; k < nsrc_comp; ++k) {
+ ir = new AluInstruction(op1_mov, v[k], m_src[idx[i]][k], {write});
+ emit_instruction(ir);
+ m_src[idx[i]][k] = v[k];
+ }
+ make_last(ir);
+ }
+ }
+}
+
+bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr)
+{
+ if (instr.src[0].negate || instr.src[0].abs) {
+ std::cerr << "source modifiers not supported with int ops\n";
+ return false;
+ }
+
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i),
+ m_src[0][i], write);
+ emit_instruction(ir);
+ }
+ }
+ make_last(ir);
+ return true;
+}
+
+bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode,
+ const AluOpFlags& flags)
+{
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(opcode, from_nir(instr.dest, i),
+ m_src[0][i], write);
+
+ if (flags.test(alu_src0_abs) || instr.src[0].abs)
+ ir->set_flag(alu_src0_abs);
+
+ if (instr.src[0].negate ^ flags.test(alu_src0_neg))
+ ir->set_flag(alu_src0_neg);
+
+ if (flags.test(alu_dst_clamp) || instr.dest.saturate)
+ ir->set_flag(alu_dst_clamp);
+
+ emit_instruction(ir);
+ }
+ }
+ make_last(ir);
+
+ return true;
+}
+
+bool EmitAluInstruction::emit_mov(const nir_alu_instr& instr)
+{
+ /* If the op is a plain move beween SSA values we can just forward
+ * the register reference to the original register */
+ if (instr.dest.dest.is_ssa && instr.src[0].src.is_ssa &&
+ !instr.src[0].abs && !instr.src[0].negate && !instr.dest.saturate) {
+ bool result = true;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ result &= inject_register(instr.dest.dest.ssa.index, i,
+ m_src[0][i], true);
+ }
+ }
+ return result;
+ } else {
+ return emit_alu_op1(instr, op1_mov);
+ }
+}
+
+bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode,
+ bool absolute)
+{
+ AluInstruction *ir = nullptr;
+ std::set<int> src_idx;
+
+ if (get_chip_class() == CAYMAN) {
+ int last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3;
+ for (int i = 0; i < last_slot; ++i) {
+ ir = new AluInstruction(opcode, from_nir(instr.dest, i),
+ m_src[0][0], instr.dest.write_mask & (1 << i) ? write : empty);
+ if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
+ if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
+ if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+
+ if (i == (last_slot - 1)) ir->set_flag(alu_last_instr);
+
+ emit_instruction(ir);
+ }
+ } else {
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(opcode, from_nir(instr.dest, i),
+ m_src[0][i], last_write);
+ if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
+ if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
+ if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+ emit_instruction(ir);
+ }
+ }
+ }
+ return true;
+}
+
+bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op)
+{
+ AluInstruction *ir = nullptr;
+ std::array<PValue, 4> v;
+
+ for (int i = 0; i < 4; ++i) {
+ if (!(instr.dest.write_mask & (1 << i)))
+ continue;
+ v[i] = from_nir(instr.dest, i);
+ ir = new AluInstruction(op1_trunc, v[i], m_src[0][i], {alu_write});
+ if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
+ if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
+ emit_instruction(ir);
+ }
+ make_last(ir);
+
+ for (int i = 0; i < 4; ++i) {
+ if (!(instr.dest.write_mask & (1 << i)))
+ continue;
+ ir = new AluInstruction(op, v[i], v[i], {alu_write});
+ emit_instruction(ir);
+ if (op == op1_flt_to_uint)
+ make_last(ir);
+ }
+ make_last(ir);
+
+ return true;
+}
+
+bool EmitAluInstruction::emit_alu_f2b32(const nir_alu_instr& instr)
+{
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(op2_setne_dx10, from_nir(instr.dest, i),
+ m_src[0][i], literal(0.0f), write);
+ emit_instruction(ir);
+ }
+ }
+ make_last(ir);
+ return true;
+}
+
+bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr)
+{
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (!(instr.dest.write_mask & (1 << i)))
+ continue;
+
+ ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
+ m_src[0][i], Value::one_i, write);
+ emit_instruction(ir);
+ }
+ make_last(ir);
+
+ return true;
+}
+
+bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr)
+{
+ AluInstruction *ir = nullptr;
+ for (unsigned i = 0; i < 2; ++i) {
+ if (!(instr.dest.write_mask & (1 << i)))
+ continue;
+ ir = new AluInstruction(op1_mov, from_nir(instr.dest, i),
+ m_src[0][i], write);
+ emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp)
+{
+ emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0),
+ m_src[0][comp], last_write));
+ return true;
+}
+
+bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc)
+{
+ AluInstruction *ir = nullptr;
+ std::set<int> src_slot;
+ for(unsigned i = 0; i < nc; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ auto src = m_src[i][0];
+ ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write);
+ if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+
+ // FIXME: This is a rather crude approach to fix the problem that
+ // r600 can't read from four different slots of the same component
+ // here we check only for the register index
+ if (src->type() == Value::gpr)
+ src_slot.insert(src->sel());
+ if (src_slot.size() >= 3) {
+ src_slot.clear();
+ ir->set_flag(alu_last_instr);
+ }
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n)
+{
+ const nir_alu_src& src0 = instr.src[0];
+ const nir_alu_src& src1 = instr.src[1];
+
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < n ; ++i) {
+ ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
+ m_src[0][i], m_src[1][i],
+ instr.dest.write_mask & (1 << i) ? write : empty);
+
+ if (src0.negate) ir->set_flag(alu_src0_neg);
+ if (src0.abs) ir->set_flag(alu_src0_abs);
+ if (src1.negate) ir->set_flag(alu_src1_neg);
+ if (src1.abs) ir->set_flag(alu_src1_abs);
+
+ if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+ emit_instruction(ir);
+ }
+ for (int i = n; i < 4 ; ++i) {
+ ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
+ Value::zero, Value::zero,
+ instr.dest.write_mask & (1 << i) ? write : empty);
+ emit_instruction(ir);
+ }
+
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr)
+{
+ const nir_alu_src& src0 = instr.src[0];
+ const nir_alu_src& src1 = instr.src[1];
+
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 3 ; ++i) {
+ ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
+ m_src[0][i], m_src[1][i],
+ instr.dest.write_mask & (1 << i) ? write : empty);
+ if (src0.negate) ir->set_flag(alu_src0_neg);
+ if (src0.abs) ir->set_flag(alu_src0_abs);
+ if (src1.negate) ir->set_flag(alu_src1_neg);
+ if (src1.abs) ir->set_flag(alu_src1_abs);
+ if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+ emit_instruction(ir);
+ }
+
+ ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f,
+ m_src[1][3], (instr.dest.write_mask) & (1 << 3) ? write : empty);
+ if (src1.negate) ir->set_flag(alu_src1_neg);
+ if (src1.abs) ir->set_flag(alu_src1_abs);
+ emit_instruction(ir);
+
+ ir->set_flag(alu_last_instr);
+ return true;
+
+}
+
+bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op)
+{
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)) {
+ ir = new AluInstruction(op, from_nir(instr.dest, i),
+ m_src[0][i], Value::zero,
+ write);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr)
+{
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
+ m_src[0][i], Value::one_f, write);
+ if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
+ if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
+ if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
+{
+
+ AluInstruction *ir = nullptr;
+ PValue v[4]; // this might need some additional temp register creation
+ for (unsigned i = 0; i < 4 ; ++i)
+ v[i] = from_nir(instr.dest, i);
+
+ EAluOp combine = all ? op2_and_int : op2_or_int;
+
+ /* For integers we can not use the modifiers, so this needs some emulation */
+ /* Should actually be lowered with NIR */
+ if (instr.src[0].negate == instr.src[1].negate &&
+ instr.src[0].abs == instr.src[1].abs) {
+
+ for (unsigned i = 0; i < nc ; ++i) {
+ ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
+ emit_instruction(ir);
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ } else {
+ std::cerr << "Negate in iequal/inequal not (yet) supported\n";
+ return false;
+ }
+
+ for (unsigned i = 0; i < nc/2 ; ++i) {
+ ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write);
+ emit_instruction(ir);
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+
+ if (nc > 2) {
+ ir = new AluInstruction(combine, v[0], v[0], v[2], last_write);
+ emit_instruction(ir);
+ }
+
+ return true;
+}
+
+bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
+{
+ AluInstruction *ir = nullptr;
+ PValue v[4]; // this might need some additional temp register creation
+ for (unsigned i = 0; i < 4 ; ++i)
+ v[i] = from_nir(instr.dest, i);
+
+ for (unsigned i = 0; i < nc ; ++i) {
+ ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
+
+ if (instr.src[0].abs)
+ ir->set_flag(alu_src0_abs);
+ if (instr.src[0].negate)
+ ir->set_flag(alu_src0_neg);
+
+ if (instr.src[1].abs)
+ ir->set_flag(alu_src1_abs);
+ if (instr.src[1].negate)
+ ir->set_flag(alu_src1_neg);
+
+ emit_instruction(ir);
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+
+ for (unsigned i = 0; i < nc ; ++i) {
+ ir = new AluInstruction(op1_max4, v[i], v[i], write);
+ if (all) ir->set_flag(alu_src0_neg);
+ emit_instruction(ir);
+ }
+
+ for (unsigned i = nc; i < 4 ; ++i) {
+ ir = new AluInstruction(op1_max4, v[i],
+ all ? Value::one_f : Value::zero, write);
+ if (all)
+ ir->set_flag(alu_src0_neg);
+
+ emit_instruction(ir);
+ }
+
+ ir->set_flag(alu_last_instr);
+
+ if (all)
+ op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10;
+ else
+ op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10;
+
+ ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write);
+ if (all)
+ ir->set_flag(alu_src1_neg);
+ emit_instruction(ir);
+
+ return true;
+}
+
+bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all)
+{
+ AluInstruction *ir = nullptr;
+ PValue v[4]; // this might need some additional temp register creation
+ for (unsigned i = 0; i < 4 ; ++i)
+ v[i] = from_nir(instr.dest, i);
+
+ for (unsigned i = 0; i < 2 ; ++i) {
+ ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
+ if (instr.src[0].abs)
+ ir->set_flag(alu_src0_abs);
+ if (instr.src[0].negate)
+ ir->set_flag(alu_src0_neg);
+
+ if (instr.src[1].abs)
+ ir->set_flag(alu_src1_abs);
+ if (instr.src[1].negate)
+ ir->set_flag(alu_src1_neg);
+
+ emit_instruction(ir);
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+
+ op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int;
+ ir = new AluInstruction(op, v[0], v[0], v[1], last_write);
+ emit_instruction(ir);
+
+ return true;
+}
+
+bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode)
+{
+ const nir_alu_src& src0 = instr.src[0];
+ const nir_alu_src& src1 = instr.src[1];
+
+ AluInstruction *ir = nullptr;
+
+ if (get_chip_class() == CAYMAN) {
+ int lasti = util_last_bit(instr.dest.write_mask);
+ for (int k = 0; k < lasti ; ++k) {
+ if (instr.dest.write_mask & (1 << k)) {
+
+ for (int i = 0; i < 4; i++) {
+ ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][k], m_src[0][k], (i == k) ? write : empty);
+ if (src0.negate) ir->set_flag(alu_src0_neg);
+ if (src0.abs) ir->set_flag(alu_src0_abs);
+ if (src1.negate) ir->set_flag(alu_src1_neg);
+ if (src1.abs) ir->set_flag(alu_src1_abs);
+ if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+ if (i == 3) ir->set_flag(alu_last_instr);
+ emit_instruction(ir);
+ }
+ }
+ }
+ } else {
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][i], m_src[1][i], last_write);
+ if (src0.negate) ir->set_flag(alu_src0_neg);
+ if (src0.abs) ir->set_flag(alu_src0_abs);
+ if (src1.negate) ir->set_flag(alu_src1_neg);
+ if (src1.abs) ir->set_flag(alu_src1_abs);
+ if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+ emit_instruction(ir);
+ }
+ }
+ }
+ return true;
+}
+
+bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts)
+{
+
+ const nir_alu_src& src0 = instr.src[0];
+ const nir_alu_src& src1 = instr.src[1];
+
+ if (src0.negate || src1.negate ||
+ src0.abs || src1.abs) {
+ std::cerr << "R600: don't support modifiers with integer operations";
+ return false;
+ }
+ return emit_alu_op2(instr, opcode, opts);
+}
+
+bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
+{
+ const nir_alu_src *src0 = &instr.src[0];
+ const nir_alu_src *src1 = &instr.src[1];
+
+ int idx0 = 0;
+ int idx1 = 1;
+ if (ops & op2_opt_reverse) {
+ std::swap(src0, src1);
+ std::swap(idx0, idx1);
+ }
+
+ bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate;
+
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(opcode, from_nir(instr.dest, i),
+ m_src[idx0][i], m_src[idx1][i], write);
+
+ if (src0->negate) ir->set_flag(alu_src0_neg);
+ if (src0->abs) ir->set_flag(alu_src0_abs);
+ if (src1_negate) ir->set_flag(alu_src1_neg);
+ if (src1->abs) ir->set_flag(alu_src1_abs);
+ if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode,
+ std::array<uint8_t, 3> reorder)
+{
+ const nir_alu_src *src[3];
+ src[0] = &instr.src[reorder[0]];
+ src[1] = &instr.src[reorder[1]];
+ src[2] = &instr.src[reorder[2]];
+
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(opcode, from_nir(instr.dest, i),
+ m_src[reorder[0]][i],
+ m_src[reorder[1]][i],
+ m_src[reorder[2]][i],
+ write);
+
+ if (src[0]->negate) ir->set_flag(alu_src0_neg);
+ if (src[1]->negate) ir->set_flag(alu_src1_neg);
+ if (src[2]->negate) ir->set_flag(alu_src2_neg);
+
+ if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+ ir->set_flag(alu_write);
+ emit_instruction(ir);
+ }
+ }
+ make_last(ir);
+ return true;
+}
+
+bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr)
+{
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ if (instr.dest.write_mask & (1 << i)){
+ ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero,
+ m_src[0][i], write);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+
+ return true;
+}
+
+static const char swz[] = "xyzw01?_";
+
+void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src,
+ const GPRVector::Values& v, GPRVector::Values& out, int ncomp)
+{
+
+ AluInstruction *alu = nullptr;
+ for (int i = 0; i < ncomp; ++i) {
+ alu = new AluInstruction(op1_mov, out[i], v[i], {alu_write});
+ if (src.abs)
+ alu->set_flag(alu_src0_abs);
+ if (src.negate)
+ alu->set_flag(alu_src0_neg);
+ emit_instruction(alu);
+ }
+ make_last(alu);
+}
+
+bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op,
+ bool fine)
+{
+
+ GPRVector::Values v;
+ std::array<int, 4> writemask = {0,1,2,3};
+
+ int ncomp = nir_dest_num_components(instr.dest.dest);
+ GPRVector::Swizzle src_swz = {7,7,7,7};
+ for (auto i = 0; i < ncomp; ++i)
+ src_swz[i] = instr.src[0].swizzle[i];
+
+ auto src = vec_from_nir_with_fetch_constant(instr.src[0].src, (1 << ncomp) - 1, src_swz);
+
+ if (instr.src[0].abs || instr.src[0].negate) {
+ GPRVector tmp = get_temp_vec4();
+ split_alu_modifiers(instr.src[0], src.values(), tmp.values(), ncomp);
+ src = tmp;
+ }
+
+ for (int i = 0; i < 4; ++i) {
+ writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7;
+ v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0);
+ }
+
+ /* This is querying the dreivatives of the output fb, so we would either need
+ * access to the neighboring pixels or to the framebuffer. Neither is currently
+ * implemented */
+ GPRVector dst(v);
+
+ auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue());
+ tex->set_dest_swizzle(writemask);
+
+ if (fine)
+ tex->set_flag(TexInstruction::grad_fine);
+
+ emit_instruction(tex);
+
+ return true;
+}
+
+bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr)
+{
+ auto tmp = get_temp_register();
+ emit_instruction(op2_lshr_int, tmp,
+ {m_src[0][0], PValue(new LiteralValue(16))},
+ {alu_write, alu_last_instr});
+
+ emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
+ {tmp}, {alu_write, alu_last_instr});
+
+ return true;
+}
+
+bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr)
+{
+ emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
+ {m_src[0][0]},{alu_write, alu_last_instr});
+ return true;
+}
+
+bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr)
+{
+ PValue x = get_temp_register();
+ PValue y = get_temp_register();
+
+ emit_instruction(op1_flt32_to_flt16, x,{m_src[0][0]},{alu_write});
+ emit_instruction(op1_flt32_to_flt16, y,{m_src[1][0]},{alu_write, alu_last_instr});
+
+ emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr});
+
+ emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr});
+
+ return true;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h
new file mode 100644
index 000000000..f56352f4e
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h
@@ -0,0 +1,115 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_EMITALUINSTRUCTION_H
+#define SFN_EMITALUINSTRUCTION_H
+
+#include "sfn_emitinstruction.h"
+
+#include "sfn_alu_defines.h"
+#include "sfn_instruction_alu.h"
+#include "sfn_instruction_tex.h"
+
+namespace r600 {
+
+
+class EmitAluInstruction : public EmitInstruction
+{
+public:
+ EmitAluInstruction(ShaderFromNirProcessor& processor);
+
+private:
+
+ enum AluOp2Opts {
+ op2_opt_none = 0,
+ op2_opt_reverse = 1,
+ op2_opt_neg_src1 = 1 << 1
+ };
+
+ bool do_emit(nir_instr* instr) override;
+
+ void split_constants(const nir_alu_instr& instr, unsigned nsrc_comp);
+
+ bool emit_mov(const nir_alu_instr& instr);
+ bool emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode, const AluOpFlags &flags = 0);
+ bool emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none);
+
+ bool emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode);
+
+ bool emit_alu_inot(const nir_alu_instr& instr);
+ bool emit_alu_ineg(const nir_alu_instr& instr);
+ bool emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none);
+
+ bool emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode, std::array<uint8_t, 3> reorder={0,1,2});
+ bool emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode, bool absolute = false);
+
+ bool emit_alu_b2f(const nir_alu_instr& instr);
+ bool emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op);
+ bool emit_dot(const nir_alu_instr& instr, int n);
+ bool emit_create_vec(const nir_alu_instr& instr, unsigned nc);
+ bool emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all);
+ bool emit_any_iequal(const nir_alu_instr& instr, unsigned nc);
+
+ bool emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all);
+ bool emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all);
+
+ bool emit_fdph(const nir_alu_instr &instr);
+ bool emit_discard_if(const nir_intrinsic_instr *instr);
+
+ bool emit_alu_f2b32(const nir_alu_instr& instr);
+ bool emit_b2i32(const nir_alu_instr& instr);
+ bool emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op);
+ bool emit_pack_64_2x32_split(const nir_alu_instr& instr);
+ bool emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp);
+
+ bool emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op, bool fine);
+ bool emit_unpack_32_2x16_split_y(const nir_alu_instr& instr);
+ bool emit_unpack_32_2x16_split_x(const nir_alu_instr& instr);
+ bool emit_pack_32_2x16_split(const nir_alu_instr& instr);
+
+ bool emit_cube(const nir_alu_instr& instr);
+private:
+ void make_last(AluInstruction *ir) const;
+ void split_alu_modifiers(const nir_alu_src &src, const GPRVector::Values& v,
+ GPRVector::Values& out, int ncomp);
+
+ void preload_src(const nir_alu_instr& instr);
+ unsigned num_src_comp(const nir_alu_instr& instr);
+
+ using vreg = std::array<PValue, 4>;
+
+ std::array<PValue, 4> m_src[4];
+};
+
+inline void EmitAluInstruction::make_last(AluInstruction *ir) const
+{
+ if (ir)
+ ir->set_flag(alu_last_instr);
+}
+
+}
+
+#endif // SFN_EMITALUINSTRUCTION_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp
new file mode 100644
index 000000000..9a75cd18b
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp
@@ -0,0 +1,164 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_emitinstruction.h"
+
+#include "sfn_shader_base.h"
+
+namespace r600 {
+
+EmitInstruction::EmitInstruction(ShaderFromNirProcessor& processor):
+ m_proc(processor)
+{
+
+}
+
+EmitInstruction::~EmitInstruction()
+{
+}
+
+bool EmitInstruction::emit(nir_instr* instr)
+{
+ return do_emit(instr);
+}
+
+PValue EmitInstruction::from_nir(const nir_src& v, unsigned component, unsigned swizzled)
+{
+ return m_proc.from_nir(v, component, swizzled);
+}
+
+PValue EmitInstruction::from_nir(const nir_alu_src& v, unsigned component)
+{
+ return m_proc.from_nir(v, component);
+}
+
+PValue EmitInstruction::from_nir(const nir_tex_src& v, unsigned component)
+{
+ return m_proc.from_nir(v, component);
+}
+
+PValue EmitInstruction::from_nir(const nir_alu_dest& v, unsigned component)
+{
+ return m_proc.from_nir(v, component);
+}
+
+PValue EmitInstruction::from_nir(const nir_dest& v, unsigned component)
+{
+ return m_proc.from_nir(v, component);
+}
+
+PValue EmitInstruction::from_nir(const nir_src& v, unsigned component)
+{
+ return m_proc.from_nir(v, component);
+}
+
+void EmitInstruction::emit_instruction(Instruction *ir)
+{
+ return m_proc.emit_instruction(ir);
+}
+
+void EmitInstruction::emit_instruction(AluInstruction *ir)
+{
+ return m_proc.emit_instruction(ir);
+}
+
+bool EmitInstruction::emit_instruction(EAluOp opcode, PValue dest,
+ std::vector<PValue> src0,
+ const std::set<AluModifiers>& m_flags)
+{
+ return m_proc.emit_instruction(opcode, dest,src0, m_flags);
+}
+
+const nir_variable *
+EmitInstruction::get_deref_location(const nir_src& v) const
+{
+ return m_proc.get_deref_location(v);
+}
+
+PValue EmitInstruction::from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel)
+{
+ return m_proc.from_nir_with_fetch_constant(src, component, channel);
+}
+
+GPRVector EmitInstruction::vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
+ const GPRVector::Swizzle& swizzle, bool match)
+{
+ return m_proc.vec_from_nir_with_fetch_constant(src, mask, swizzle, match);
+}
+
+PGPRValue EmitInstruction::get_temp_register(int channel)
+{
+ return m_proc.get_temp_register(channel);
+}
+
+GPRVector EmitInstruction::get_temp_vec4(const GPRVector::Swizzle& swizzle)
+{
+ return m_proc.get_temp_vec4(swizzle);
+}
+
+PValue EmitInstruction::create_register_from_nir_src(const nir_src& src, unsigned swizzle)
+{
+ return m_proc.create_register_from_nir_src(src, swizzle);
+}
+
+enum chip_class EmitInstruction::get_chip_class(void) const
+{
+ return m_proc.get_chip_class();
+}
+
+PValue EmitInstruction::literal(uint32_t value)
+{
+ return m_proc.literal(value);
+}
+
+GPRVector EmitInstruction::vec_from_nir(const nir_dest& dst, int num_components)
+{
+ return m_proc.vec_from_nir(dst, num_components);
+}
+
+bool EmitInstruction::inject_register(unsigned sel, unsigned swizzle,
+ const PValue& reg, bool map)
+{
+ return m_proc.inject_register(sel, swizzle, reg, map);
+}
+
+int EmitInstruction::remap_atomic_base(int base)
+{
+ return m_proc.remap_atomic_base(base);
+}
+
+void EmitInstruction::set_has_txs_cube_array_comp()
+{
+ m_proc.sh_info().has_txq_cube_array_z_comp = 1;
+}
+
+const std::set<AluModifiers> EmitInstruction::empty = {};
+const std::set<AluModifiers> EmitInstruction::write = {alu_write};
+const std::set<AluModifiers> EmitInstruction::last_write = {alu_write, alu_last_instr};
+const std::set<AluModifiers> EmitInstruction::last = {alu_last_instr};
+
+}
+
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h
new file mode 100644
index 000000000..09a6489b0
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h
@@ -0,0 +1,101 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef EMITINSTRUCTION_H
+#define EMITINSTRUCTION_H
+
+#include "compiler/nir/nir.h"
+#include "sfn_defines.h"
+#include "sfn_value.h"
+#include "sfn_instruction_alu.h"
+
+namespace r600 {
+
+class ShaderFromNirProcessor;
+
+class EmitInstruction
+{
+public:
+ EmitInstruction(ShaderFromNirProcessor& processor);
+ virtual ~EmitInstruction();
+ bool emit(nir_instr* instr);
+
+ static const std::set<AluModifiers> empty;
+ static const std::set<AluModifiers> write;
+ static const std::set<AluModifiers> last_write;
+ static const std::set<AluModifiers> last;
+
+protected:
+ virtual bool do_emit(nir_instr* instr) = 0;
+
+ // forwards from ValuePool
+ PValue from_nir(const nir_src& v, unsigned component, unsigned swizzled);
+ PValue from_nir(const nir_src& v, unsigned component);
+ PValue from_nir(const nir_alu_src& v, unsigned component);
+ PValue from_nir(const nir_tex_src& v, unsigned component);
+ PValue from_nir(const nir_alu_dest& v, unsigned component);
+ PValue from_nir(const nir_dest& v, unsigned component);
+
+ PValue create_register_from_nir_src(const nir_src& src, unsigned comp);
+
+ PGPRValue get_temp_register(int channel = -1);
+ GPRVector get_temp_vec4(const GPRVector::Swizzle& swizzle = {0,1,2,3});
+
+ // forwards from ShaderFromNirProcessor
+ void emit_instruction(Instruction *ir);
+ void emit_instruction(AluInstruction *ir);
+ bool emit_instruction(EAluOp opcode, PValue dest,
+ std::vector<PValue> src0,
+ const std::set<AluModifiers>& m_flags);
+
+ PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel = -1);
+ GPRVector vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
+ const GPRVector::Swizzle& swizzle, bool match = false);
+
+ const nir_variable *get_deref_location(const nir_src& v) const;
+
+ enum chip_class get_chip_class(void) const;
+
+ PValue literal(uint32_t value);
+
+ GPRVector vec_from_nir(const nir_dest& dst, int num_components);
+
+ bool inject_register(unsigned sel, unsigned swizzle,
+ const PValue& reg, bool map);
+
+ int remap_atomic_base(int base);
+
+ void set_has_txs_cube_array_comp();
+private:
+
+ ShaderFromNirProcessor& m_proc;
+};
+
+}
+
+
+
+#endif // EMITINSTRUCTION_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp
new file mode 100644
index 000000000..9f0d0b605
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp
@@ -0,0 +1,644 @@
+#include "sfn_emitssboinstruction.h"
+
+#include "sfn_instruction_fetch.h"
+#include "sfn_instruction_gds.h"
+#include "sfn_instruction_misc.h"
+#include "sfn_instruction_tex.h"
+#include "../r600_pipe.h"
+#include "../r600_asm.h"
+
+namespace r600 {
+
+#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
+
+EmitSSBOInstruction::EmitSSBOInstruction(ShaderFromNirProcessor& processor):
+ EmitInstruction(processor),
+ m_require_rat_return_address(false),
+ m_ssbo_image_offset(0)
+{
+}
+
+void EmitSSBOInstruction::set_ssbo_offset(int offset)
+{
+ m_ssbo_image_offset = offset;
+}
+
+
+void EmitSSBOInstruction::set_require_rat_return_address()
+{
+ m_require_rat_return_address = true;
+}
+
+bool
+EmitSSBOInstruction::load_rat_return_address()
+{
+ if (m_require_rat_return_address) {
+ m_rat_return_address = get_temp_vec4();
+ emit_instruction(new AluInstruction(op1_mbcnt_32lo_accum_prev_int, m_rat_return_address.reg_i(0), literal(-1), {alu_write}));
+ emit_instruction(new AluInstruction(op1_mbcnt_32hi_int, m_rat_return_address.reg_i(1), literal(-1), {alu_write}));
+ emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(2), PValue(new InlineConstValue(ALU_SRC_SE_ID, 0)),
+ literal(256), PValue(new InlineConstValue(ALU_SRC_HW_WAVE_ID, 0)), {alu_write, alu_last_instr}));
+ emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(1),
+ m_rat_return_address.reg_i(2), literal(0x40), m_rat_return_address.reg_i(0),
+ {alu_write, alu_last_instr}));
+ m_require_rat_return_address = false;
+ }
+ return true;
+}
+
+
+bool EmitSSBOInstruction::do_emit(nir_instr* instr)
+{
+ const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ switch (intr->intrinsic) {
+ case nir_intrinsic_atomic_counter_add:
+ case nir_intrinsic_atomic_counter_and:
+ case nir_intrinsic_atomic_counter_exchange:
+ case nir_intrinsic_atomic_counter_max:
+ case nir_intrinsic_atomic_counter_min:
+ case nir_intrinsic_atomic_counter_or:
+ case nir_intrinsic_atomic_counter_xor:
+ case nir_intrinsic_atomic_counter_comp_swap:
+ return emit_atomic(intr);
+ case nir_intrinsic_atomic_counter_read:
+ case nir_intrinsic_atomic_counter_post_dec:
+ return emit_unary_atomic(intr);
+ case nir_intrinsic_atomic_counter_inc:
+ return emit_atomic_inc(intr);
+ case nir_intrinsic_atomic_counter_pre_dec:
+ return emit_atomic_pre_dec(intr);
+ case nir_intrinsic_load_ssbo:
+ return emit_load_ssbo(intr);
+ case nir_intrinsic_store_ssbo:
+ return emit_store_ssbo(intr);
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_ssbo_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_ssbo_atomic_umax:
+ case nir_intrinsic_ssbo_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ return emit_ssbo_atomic_op(intr);
+ case nir_intrinsic_image_store:
+ return emit_image_store(intr);
+ case nir_intrinsic_image_load:
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_comp_swap:
+ case nir_intrinsic_image_atomic_umin:
+ case nir_intrinsic_image_atomic_umax:
+ case nir_intrinsic_image_atomic_imin:
+ case nir_intrinsic_image_atomic_imax:
+ return emit_image_load(intr);
+ case nir_intrinsic_image_size:
+ return emit_image_size(intr);
+ case nir_intrinsic_get_ssbo_size:
+ return emit_buffer_size(intr);
+ case nir_intrinsic_memory_barrier:
+ case nir_intrinsic_memory_barrier_image:
+ case nir_intrinsic_memory_barrier_buffer:
+ case nir_intrinsic_group_memory_barrier:
+ return make_stores_ack_and_waitack();
+ default:
+ return false;
+ }
+}
+
+bool EmitSSBOInstruction::emit_atomic(const nir_intrinsic_instr* instr)
+{
+ ESDOp op = get_opcode(instr->intrinsic);
+
+ if (DS_OP_INVALID == op)
+ return false;
+
+ GPRVector dest = make_dest(instr);
+
+ int base = remap_atomic_base(nir_intrinsic_base(instr));
+
+ PValue uav_id = from_nir(instr->src[0], 0);
+
+ PValue value = from_nir_with_fetch_constant(instr->src[1], 0);
+
+ GDSInstr *ir = nullptr;
+ if (instr->intrinsic == nir_intrinsic_atomic_counter_comp_swap) {
+ PValue value2 = from_nir_with_fetch_constant(instr->src[2], 0);
+ ir = new GDSInstr(op, dest, value, value2, uav_id, base);
+ } else {
+ ir = new GDSInstr(op, dest, value, uav_id, base);
+ }
+
+ emit_instruction(ir);
+ return true;
+}
+
+bool EmitSSBOInstruction::emit_unary_atomic(const nir_intrinsic_instr* instr)
+{
+ ESDOp op = get_opcode(instr->intrinsic);
+
+ if (DS_OP_INVALID == op)
+ return false;
+
+ GPRVector dest = make_dest(instr);
+
+ PValue uav_id = from_nir(instr->src[0], 0);
+
+ auto ir = new GDSInstr(op, dest, uav_id, remap_atomic_base(nir_intrinsic_base(instr)));
+
+ emit_instruction(ir);
+ return true;
+}
+
+ESDOp EmitSSBOInstruction::get_opcode(const nir_intrinsic_op opcode)
+{
+ switch (opcode) {
+ case nir_intrinsic_atomic_counter_add:
+ return DS_OP_ADD_RET;
+ case nir_intrinsic_atomic_counter_and:
+ return DS_OP_AND_RET;
+ case nir_intrinsic_atomic_counter_exchange:
+ return DS_OP_XCHG_RET;
+ case nir_intrinsic_atomic_counter_inc:
+ return DS_OP_INC_RET;
+ case nir_intrinsic_atomic_counter_max:
+ return DS_OP_MAX_UINT_RET;
+ case nir_intrinsic_atomic_counter_min:
+ return DS_OP_MIN_UINT_RET;
+ case nir_intrinsic_atomic_counter_or:
+ return DS_OP_OR_RET;
+ case nir_intrinsic_atomic_counter_read:
+ return DS_OP_READ_RET;
+ case nir_intrinsic_atomic_counter_xor:
+ return DS_OP_XOR_RET;
+ case nir_intrinsic_atomic_counter_post_dec:
+ return DS_OP_DEC_RET;
+ case nir_intrinsic_atomic_counter_comp_swap:
+ return DS_OP_CMP_XCHG_RET;
+ case nir_intrinsic_atomic_counter_pre_dec:
+ default:
+ return DS_OP_INVALID;
+ }
+}
+
+RatInstruction::ERatOp
+EmitSSBOInstruction::get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const
+{
+ switch (opcode) {
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_image_atomic_add:
+ return RatInstruction::ADD_RTN;
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_image_atomic_and:
+ return RatInstruction::AND_RTN;
+ case nir_intrinsic_ssbo_atomic_exchange:
+ case nir_intrinsic_image_atomic_exchange:
+ return RatInstruction::XCHG_RTN;
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_image_atomic_or:
+ return RatInstruction::OR_RTN;
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_image_atomic_imin:
+ return RatInstruction::MIN_INT_RTN;
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_image_atomic_imax:
+ return RatInstruction::MAX_INT_RTN;
+ case nir_intrinsic_ssbo_atomic_umin:
+ case nir_intrinsic_image_atomic_umin:
+ return RatInstruction::MIN_UINT_RTN;
+ case nir_intrinsic_ssbo_atomic_umax:
+ case nir_intrinsic_image_atomic_umax:
+ return RatInstruction::MAX_UINT_RTN;
+ case nir_intrinsic_ssbo_atomic_xor:
+ case nir_intrinsic_image_atomic_xor:
+ return RatInstruction::XOR_RTN;
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ case nir_intrinsic_image_atomic_comp_swap:
+ if (util_format_is_float(format))
+ return RatInstruction::CMPXCHG_FLT_RTN;
+ else
+ return RatInstruction::CMPXCHG_INT_RTN;
+ case nir_intrinsic_image_load:
+ return RatInstruction::NOP_RTN;
+ default:
+ unreachable("Unsupported RAT instruction");
+ }
+}
+
+
+bool EmitSSBOInstruction::emit_atomic_add(const nir_intrinsic_instr* instr)
+{
+ GPRVector dest = make_dest(instr);
+
+ PValue value = from_nir_with_fetch_constant(instr->src[1], 0);
+
+ PValue uav_id = from_nir(instr->src[0], 0);
+
+ auto ir = new GDSInstr(DS_OP_ADD_RET, dest, value, uav_id,
+ remap_atomic_base(nir_intrinsic_base(instr)));
+
+ emit_instruction(ir);
+ return true;
+}
+
+bool EmitSSBOInstruction::load_atomic_inc_limits()
+{
+ m_atomic_update = get_temp_register();
+ m_atomic_update->set_keep_alive();
+ emit_instruction(new AluInstruction(op1_mov, m_atomic_update, literal(1),
+ {alu_write, alu_last_instr}));
+ return true;
+}
+
+bool EmitSSBOInstruction::emit_atomic_inc(const nir_intrinsic_instr* instr)
+{
+ PValue uav_id = from_nir(instr->src[0], 0);
+ GPRVector dest = make_dest(instr);
+ auto ir = new GDSInstr(DS_OP_ADD_RET, dest, m_atomic_update, uav_id,
+ remap_atomic_base(nir_intrinsic_base(instr)));
+ emit_instruction(ir);
+ return true;
+}
+
+bool EmitSSBOInstruction::emit_atomic_pre_dec(const nir_intrinsic_instr *instr)
+{
+ GPRVector dest = make_dest(instr);
+
+ PValue uav_id = from_nir(instr->src[0], 0);
+
+ auto ir = new GDSInstr(DS_OP_SUB_RET, dest, m_atomic_update, uav_id,
+ remap_atomic_base(nir_intrinsic_base(instr)));
+ emit_instruction(ir);
+
+ emit_instruction(new AluInstruction(op2_sub_int, dest.x(), dest.x(), literal(1), last_write));
+
+ return true;
+}
+
+bool EmitSSBOInstruction::emit_load_ssbo(const nir_intrinsic_instr* instr)
+{
+ GPRVector dest = make_dest(instr);
+
+ /** src0 not used, should be some offset */
+ auto addr = from_nir(instr->src[1], 0);
+ PValue addr_temp = create_register_from_nir_src(instr->src[1], 1);
+
+ /** Should be lowered in nir */
+ emit_instruction(new AluInstruction(op2_lshr_int, addr_temp, {addr, PValue(new LiteralValue(2))},
+ {alu_write, alu_last_instr}));
+
+ const EVTXDataFormat formats[4] = {
+ fmt_32,
+ fmt_32_32,
+ fmt_32_32_32,
+ fmt_32_32_32_32
+ };
+
+ const std::array<int,4> dest_swt[4] = {
+ {0,7,7,7},
+ {0,1,7,7},
+ {0,1,2,7},
+ {0,1,2,3}
+ };
+
+ /* TODO fix resource index */
+ auto ir = new FetchInstruction(dest, addr_temp,
+ R600_IMAGE_REAL_RESOURCE_OFFSET + m_ssbo_image_offset
+ , from_nir(instr->src[0], 0),
+ formats[nir_dest_num_components(instr->dest) - 1], vtx_nf_int);
+ ir->set_dest_swizzle(dest_swt[nir_dest_num_components(instr->dest) - 1]);
+ ir->set_flag(vtx_use_tc);
+
+ emit_instruction(ir);
+ return true;
+}
+
+bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr* instr)
+{
+
+ GPRVector::Swizzle swz = {7,7,7,7};
+ for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i)
+ swz[i] = i;
+
+ auto orig_addr = from_nir(instr->src[2], 0);
+
+ GPRVector addr_vec = get_temp_vec4({0,1,2,7});
+
+ auto temp2 = get_temp_vec4();
+
+ auto rat_id = from_nir(instr->src[1], 0);
+
+ emit_instruction(new AluInstruction(op2_lshr_int, addr_vec.reg_i(0), orig_addr,
+ PValue(new LiteralValue(2)), write));
+ emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(1), Value::zero, write));
+ emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(2), Value::zero, last_write));
+
+
+ auto values = vec_from_nir_with_fetch_constant(instr->src[0],
+ (1 << nir_src_num_components(instr->src[0])) - 1, {0,1,2,3}, true);
+
+ auto cf_op = cf_mem_rat;
+ //auto cf_op = nir_intrinsic_access(instr) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
+ auto store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
+ values, addr_vec, m_ssbo_image_offset, rat_id, 1,
+ 1, 0, false);
+ emit_instruction(store);
+ m_store_ops.push_back(store);
+
+ for (unsigned i = 1; i < nir_src_num_components(instr->src[0]); ++i) {
+ emit_instruction(new AluInstruction(op1_mov, temp2.reg_i(0), from_nir(instr->src[0], i), write));
+ emit_instruction(new AluInstruction(op2_add_int, addr_vec.reg_i(0),
+ {addr_vec.reg_i(0), Value::one_i}, last_write));
+ store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
+ temp2, addr_vec, m_ssbo_image_offset, rat_id, 1,
+ 1, 0, false);
+ emit_instruction(store);
+ if (!(nir_intrinsic_access(instr) & ACCESS_COHERENT))
+ m_store_ops.push_back(store);
+ }
+
+ return true;
+}
+
+bool
+EmitSSBOInstruction::emit_image_store(const nir_intrinsic_instr *intrin)
+{
+ int imageid = 0;
+ PValue image_offset;
+
+ if (nir_src_is_const(intrin->src[0]))
+ imageid = nir_src_as_int(intrin->src[0]);
+ else
+ image_offset = from_nir(intrin->src[0], 0);
+
+ auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, {0,1,2,3});
+ auto undef = from_nir(intrin->src[2], 0);
+ auto value = vec_from_nir_with_fetch_constant(intrin->src[3], 0xf, {0,1,2,3});
+ auto unknown = from_nir(intrin->src[4], 0);
+
+ if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
+ nir_intrinsic_image_array(intrin)) {
+ emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
+ emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
+ }
+
+ auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
+ auto store = new RatInstruction(op, RatInstruction::STORE_TYPED, value, coord, imageid,
+ image_offset, 1, 0xf, 0, false);
+
+ //if (!(nir_intrinsic_access(intrin) & ACCESS_COHERENT))
+ m_store_ops.push_back(store);
+
+ emit_instruction(store);
+ return true;
+}
+
+bool
+EmitSSBOInstruction::emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin)
+{
+ int imageid = 0;
+ PValue image_offset;
+
+ if (nir_src_is_const(intrin->src[0]))
+ imageid = nir_src_as_int(intrin->src[0]);
+ else
+ image_offset = from_nir(intrin->src[0], 0);
+
+ auto opcode = EmitSSBOInstruction::get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
+
+
+ auto coord_orig = from_nir(intrin->src[1], 0, 0);
+ auto coord = get_temp_register(0);
+
+ emit_instruction(new AluInstruction(op2_lshr_int, coord, coord_orig, literal(2), last_write));
+
+ if (intrin->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
+ emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
+ from_nir(intrin->src[3], 0), {alu_write}));
+ // TODO: cayman wants channel 2 here
+ emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(3),
+ from_nir(intrin->src[2], 0), {alu_last_instr, alu_write}));
+ } else {
+ emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
+ from_nir(intrin->src[2], 0), {alu_write}));
+ emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(2), Value::zero, last_write));
+ }
+
+
+ GPRVector out_vec({coord, coord, coord, coord});
+
+ auto atomic = new RatInstruction(cf_mem_rat, opcode, m_rat_return_address, out_vec, imageid + m_ssbo_image_offset,
+ image_offset, 1, 0xf, 0, true);
+ emit_instruction(atomic);
+ emit_instruction(new WaitAck(0));
+
+ GPRVector dest = vec_from_nir(intrin->dest, intrin->dest.ssa.num_components);
+ auto fetch = new FetchInstruction(vc_fetch,
+ no_index_offset,
+ fmt_32,
+ vtx_nf_int,
+ vtx_es_none,
+ m_rat_return_address.reg_i(1),
+ dest,
+ 0,
+ false,
+ 0xf,
+ R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
+ 0,
+ bim_none,
+ false,
+ false,
+ 0,
+ 0,
+ 0,
+ image_offset,
+ {0,7,7,7});
+ fetch->set_flag(vtx_srf_mode);
+ fetch->set_flag(vtx_use_tc);
+ emit_instruction(fetch);
+ return true;
+
+}
+
+bool
+EmitSSBOInstruction::emit_image_load(const nir_intrinsic_instr *intrin)
+{
+ int imageid = 0;
+ PValue image_offset;
+
+ if (nir_src_is_const(intrin->src[0]))
+ imageid = nir_src_as_int(intrin->src[0]);
+ else
+ image_offset = from_nir(intrin->src[0], 0);
+
+ auto rat_op = get_rat_opcode(intrin->intrinsic, nir_intrinsic_format(intrin));
+
+ GPRVector::Swizzle swz = {0,1,2,3};
+ auto coord = vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, swz);
+
+ if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
+ nir_intrinsic_image_array(intrin)) {
+ emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
+ emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
+ }
+
+ if (intrin->intrinsic != nir_intrinsic_image_load) {
+ if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
+ emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
+ from_nir(intrin->src[4], 0), {alu_write}));
+ emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(3),
+ from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
+ } else {
+ emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
+ from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
+ }
+ }
+ auto cf_op = cf_mem_rat;// nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
+
+ auto store = new RatInstruction(cf_op, rat_op, m_rat_return_address, coord, imageid,
+ image_offset, 1, 0xf, 0, true);
+ emit_instruction(store);
+ return fetch_return_value(intrin);
+}
+
+bool EmitSSBOInstruction::fetch_return_value(const nir_intrinsic_instr *intrin)
+{
+ emit_instruction(new WaitAck(0));
+
+ pipe_format format = nir_intrinsic_format(intrin);
+ unsigned fmt = fmt_32;
+ unsigned num_format = 0;
+ unsigned format_comp = 0;
+ unsigned endian = 0;
+
+ int imageid = 0;
+ PValue image_offset;
+
+ if (nir_src_is_const(intrin->src[0]))
+ imageid = nir_src_as_int(intrin->src[0]);
+ else
+ image_offset = from_nir(intrin->src[0], 0);
+
+ r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
+
+ GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
+
+ auto fetch = new FetchInstruction(vc_fetch,
+ no_index_offset,
+ (EVTXDataFormat)fmt,
+ (EVFetchNumFormat)num_format,
+ (EVFetchEndianSwap)endian,
+ m_rat_return_address.reg_i(1),
+ dest,
+ 0,
+ false,
+ 0x3,
+ R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
+ 0,
+ bim_none,
+ false,
+ false,
+ 0,
+ 0,
+ 0,
+ image_offset, {0,1,2,3});
+ fetch->set_flag(vtx_srf_mode);
+ fetch->set_flag(vtx_use_tc);
+ if (format_comp)
+ fetch->set_flag(vtx_format_comp_signed);
+
+ emit_instruction(fetch);
+ return true;
+}
+
+bool EmitSSBOInstruction::emit_image_size(const nir_intrinsic_instr *intrin)
+{
+ GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
+ GPRVector src{0,{4,4,4,4}};
+
+ assert(nir_src_as_uint(intrin->src[1]) == 0);
+
+ auto const_offset = nir_src_as_const_value(intrin->src[0]);
+ auto dyn_offset = PValue();
+ int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
+ if (const_offset)
+ res_id += const_offset[0].u32;
+ else
+ dyn_offset = from_nir(intrin->src[0], 0);
+
+ if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
+ emit_instruction(new FetchInstruction(dest, PValue(new GPRValue(0, 7)),
+ res_id,
+ bim_none));
+ return true;
+ } else {
+ emit_instruction(new TexInstruction(TexInstruction::get_resinfo, dest, src,
+ 0/* ?? */,
+ res_id, dyn_offset));
+ if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
+ nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) {
+ /* Need to load the layers from a const buffer */
+
+ unsigned lookup_resid = const_offset[0].u32;
+ emit_instruction(new AluInstruction(op1_mov, dest.reg_i(2),
+ PValue(new UniformValue(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4,
+ R600_BUFFER_INFO_CONST_BUFFER)),
+ EmitInstruction::last_write));
+ }
+ }
+ return true;
+}
+
+bool EmitSSBOInstruction::emit_buffer_size(const nir_intrinsic_instr *intr)
+{
+ std::array<PValue,4> dst_elms;
+
+
+ for (uint16_t i = 0; i < 4; ++i) {
+ dst_elms[i] = from_nir(intr->dest, (i < intr->dest.ssa.num_components) ? i : 7);
+ }
+
+ GPRVector dst(dst_elms);
+ GPRVector src(0,{4,4,4,4});
+
+ auto const_offset = nir_src_as_const_value(intr->src[0]);
+ auto dyn_offset = PValue();
+ int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
+ if (const_offset)
+ res_id += const_offset[0].u32;
+ else
+ assert(0 && "dynamic buffer offset not supported in buffer_size");
+
+ emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)),
+ res_id, bim_none));
+
+ return true;
+}
+
+bool EmitSSBOInstruction::make_stores_ack_and_waitack()
+{
+ for (auto&& store: m_store_ops)
+ store->set_ack();
+
+ if (!m_store_ops.empty())
+ emit_instruction(new WaitAck(0));
+
+ m_store_ops.clear();
+
+ return true;
+}
+
+GPRVector EmitSSBOInstruction::make_dest(const nir_intrinsic_instr* ir)
+{
+ GPRVector::Values v;
+ int i;
+ for (i = 0; i < 4; ++i)
+ v[i] = from_nir(ir->dest, i);
+ return GPRVector(v);
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h
new file mode 100644
index 000000000..56e0e31f1
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h
@@ -0,0 +1,57 @@
+#ifndef SFN_EMITSSBOINSTRUCTION_H
+#define SFN_EMITSSBOINSTRUCTION_H
+
+#include "sfn_emitinstruction.h"
+#include "sfn_instruction_gds.h"
+#include "sfn_value_gpr.h"
+
+namespace r600 {
+
+class EmitSSBOInstruction: public EmitInstruction {
+public:
+ EmitSSBOInstruction(ShaderFromNirProcessor& processor);
+
+ void set_ssbo_offset(int offset);
+
+ void set_require_rat_return_address();
+ bool load_rat_return_address();
+ bool load_atomic_inc_limits();
+
+private:
+ bool do_emit(nir_instr *instr);
+
+ bool emit_atomic(const nir_intrinsic_instr* instr);
+ bool emit_unary_atomic(const nir_intrinsic_instr* instr);
+ bool emit_atomic_add(const nir_intrinsic_instr* instr);
+ bool emit_atomic_inc(const nir_intrinsic_instr* instr);
+ bool emit_atomic_pre_dec(const nir_intrinsic_instr* instr);
+
+ bool emit_load_ssbo(const nir_intrinsic_instr* instr);
+ bool emit_store_ssbo(const nir_intrinsic_instr* instr);
+
+ bool emit_image_size(const nir_intrinsic_instr *intrin);
+ bool emit_image_load(const nir_intrinsic_instr *intrin);
+ bool emit_image_store(const nir_intrinsic_instr *intrin);
+ bool emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin);
+ bool emit_buffer_size(const nir_intrinsic_instr *intrin);
+
+ bool fetch_return_value(const nir_intrinsic_instr *intrin);
+
+ bool make_stores_ack_and_waitack();
+
+ ESDOp get_opcode(nir_intrinsic_op opcode);
+ RatInstruction::ERatOp get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const;
+
+ GPRVector make_dest(const nir_intrinsic_instr* instr);
+
+ PGPRValue m_atomic_update;
+
+ bool m_require_rat_return_address;
+ GPRVector m_rat_return_address;
+ int m_ssbo_image_offset;
+ std::vector<RatInstruction *> m_store_ops;
+};
+
+}
+
+#endif // SFN_EMITSSBOINSTRUCTION_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp
new file mode 100644
index 000000000..c31bee43d
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp
@@ -0,0 +1,671 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_emittexinstruction.h"
+#include "sfn_shader_base.h"
+#include "sfn_instruction_fetch.h"
+
+namespace r600 {
+
+EmitTexInstruction::EmitTexInstruction(ShaderFromNirProcessor &processor):
+ EmitInstruction (processor)
+{
+}
+
+bool EmitTexInstruction::do_emit(nir_instr* instr)
+{
+ nir_tex_instr* ir = nir_instr_as_tex(instr);
+
+ TexInputs src;
+ if (!get_inputs(*ir, src))
+ return false;
+
+ if (ir->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
+ switch (ir->op) {
+ case nir_texop_txf:
+ return emit_buf_txf(ir, src);
+ case nir_texop_txs:
+ return emit_tex_txs(ir, src, {0,1,2,3});
+ default:
+ return false;
+ }
+ } else {
+ switch (ir->op) {
+ case nir_texop_tex:
+ return emit_tex_tex(ir, src);
+ case nir_texop_txf:
+ return emit_tex_txf(ir, src);
+ case nir_texop_txb:
+ return emit_tex_txb(ir, src);
+ case nir_texop_txl:
+ return emit_tex_txl(ir, src);
+ case nir_texop_txd:
+ return emit_tex_txd(ir, src);
+ case nir_texop_txs:
+ return emit_tex_txs(ir, src, {0,1,2,3});
+ case nir_texop_lod:
+ return emit_tex_lod(ir, src);
+ case nir_texop_tg4:
+ return emit_tex_tg4(ir, src);
+ case nir_texop_txf_ms:
+ return emit_tex_txf_ms(ir, src);
+ case nir_texop_query_levels:
+ return emit_tex_txs(ir, src, {3,7,7,7});
+ case nir_texop_texture_samples:
+ return emit_tex_texture_samples(ir, src, {3,7,7,7});
+ default:
+
+ return false;
+ }
+ }
+}
+
+bool EmitTexInstruction::emit_buf_txf(nir_tex_instr* instr, TexInputs &src)
+{
+ auto dst = make_dest(*instr);
+
+ auto ir = new FetchInstruction(vc_fetch, no_index_offset, dst, src.coord.reg_i(0), 0,
+ instr->texture_index + R600_MAX_CONST_BUFFERS,
+ src.texture_offset, bim_none);
+ ir->set_flag(vtx_use_const_field);
+ emit_instruction(ir);
+ return true;
+}
+
+bool EmitTexInstruction::emit_tex_tex(nir_tex_instr* instr, TexInputs& src)
+{
+
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "' (" << __func__ << ")\n";
+
+ auto tex_op = TexInstruction::sample;
+
+ auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
+ assert(!sampler.indirect);
+
+ if (instr->is_shadow) {
+ emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
+ {alu_last_instr, alu_write}));
+ tex_op = TexInstruction::sample_c;
+ }
+
+ auto dst = make_dest(*instr);
+ auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+ if (instr->is_array)
+ handle_array_index(*instr, src.coord, irt);
+
+ set_rect_coordinate_flags(instr, irt);
+ set_offsets(irt, src.offset);
+
+ emit_instruction(irt);
+ return true;
+}
+
+bool EmitTexInstruction::emit_tex_txd(nir_tex_instr* instr, TexInputs& src)
+{
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "' (" << __func__ << ")\n";
+
+ auto tex_op = TexInstruction::sample_g;
+ auto dst = make_dest(*instr);
+
+ GPRVector empty_dst(0,{7,7,7,7});
+
+ if (instr->is_shadow) {
+ emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
+ {alu_last_instr, alu_write}));
+ tex_op = TexInstruction::sample_c_g;
+ }
+
+ auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
+ assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+ TexInstruction *irgh = new TexInstruction(TexInstruction::set_gradient_h, empty_dst, src.ddx,
+ sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+ irgh->set_dest_swizzle({7,7,7,7});
+
+ TexInstruction *irgv = new TexInstruction(TexInstruction::set_gradient_v, empty_dst, src.ddy,
+ sampler.id, sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+ irgv->set_dest_swizzle({7,7,7,7});
+
+ TexInstruction *ir = new TexInstruction(tex_op, dst, src.coord, sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+ if (instr->is_array)
+ handle_array_index(*instr, src.coord, ir);
+
+ set_rect_coordinate_flags(instr, ir);
+ set_offsets(ir, src.offset);
+
+ emit_instruction(irgh);
+ emit_instruction(irgv);
+ emit_instruction(ir);
+ return true;
+}
+
+bool EmitTexInstruction::emit_tex_txf(nir_tex_instr* instr, TexInputs& src)
+{
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "' (" << __func__ << ")\n";
+
+ auto dst = make_dest(*instr);
+
+ if (*src.coord.reg_i(3) != *src.lod) {
+ if (src.coord.sel() != src.lod->sel())
+ emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod, {alu_write, alu_last_instr}));
+ else
+ src.coord.set_reg_i(3, src.lod);
+ }
+
+ auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
+ assert(!sampler.indirect);
+
+ /* txf doesn't need rounding for the array index, but 1D has the array index
+ * in the z component */
+ if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D)
+ src.coord.set_reg_i(2, src.coord.reg_i(1));
+
+ auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord,
+ sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+
+
+ if (src.offset) {
+ assert(src.offset->is_ssa);
+ AluInstruction *ir = nullptr;
+ for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) {
+ ir = new AluInstruction(op2_add_int, src.coord.reg_i(i),
+ {src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write});
+ emit_instruction(ir);
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ }
+
+ if (instr->is_array)
+ tex_ir->set_flag(TexInstruction::z_unnormalized);
+
+ emit_instruction(tex_ir);
+ return true;
+}
+
+bool EmitTexInstruction::emit_tex_lod(nir_tex_instr* instr, TexInputs& src)
+{
+ auto tex_op = TexInstruction::get_tex_lod;
+
+ auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
+ assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+ auto dst = make_dest(*instr);
+ auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+ irt->set_dest_swizzle({1,0,7,7});
+ emit_instruction(irt);
+
+ return true;
+
+}
+
+bool EmitTexInstruction::emit_tex_txl(nir_tex_instr* instr, TexInputs& src)
+{
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "' (" << __func__ << ")\n";
+
+ auto tex_op = TexInstruction::sample_l;
+ if (instr->is_shadow) {
+ if (src.coord.sel() != src.comperator->sel())
+ emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator, {alu_write}));
+ else
+ src.coord.set_reg_i(2, src.comperator);
+ tex_op = TexInstruction::sample_c_l;
+ }
+
+ if (src.coord.sel() != src.lod->sel())
+ emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod, {last_write}));
+ else
+ src.coord.set_reg_i(3, src.lod);
+
+ auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
+ assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+ auto dst = make_dest(*instr);
+ auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+
+ if (instr->is_array)
+ handle_array_index(*instr, src.coord, irt);
+
+ set_rect_coordinate_flags(instr, irt);
+ set_offsets(irt, src.offset);
+
+ emit_instruction(irt);
+ return true;
+}
+
+bool EmitTexInstruction::emit_tex_txb(nir_tex_instr* instr, TexInputs& src)
+{
+ auto tex_op = TexInstruction::sample_lb;
+
+ std::array<uint8_t, 4> in_swizzle = {0,1,2,3};
+
+ if (instr->is_shadow) {
+ if (src.coord.sel() != src.comperator->sel())
+ emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator, {alu_write}));
+ else
+ src.coord.set_reg_i(2, src.comperator);
+ tex_op = TexInstruction::sample_c_lb;
+ }
+
+ if (src.coord.sel() != src.bias->sel())
+ emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.bias, {last_write}));
+ else
+ src.coord.set_reg_i(3, src.bias);
+
+ GPRVector tex_src(src.coord, in_swizzle);
+
+ auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
+ assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+ auto dst = make_dest(*instr);
+ auto irt = new TexInstruction(tex_op, dst, tex_src, sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+ if (instr->is_array)
+ handle_array_index(*instr, tex_src, irt);
+
+ set_rect_coordinate_flags(instr, irt);
+ set_offsets(irt, src.offset);
+
+ emit_instruction(irt);
+ return true;
+}
+
+bool EmitTexInstruction::emit_tex_txs(nir_tex_instr* instr, TexInputs& tex_src,
+ const std::array<int,4>& dest_swz)
+{
+ std::array<PValue,4> dst_elms;
+ std::array<PValue,4> src_elms;
+
+ for (uint16_t i = 0; i < 4; ++i) {
+ dst_elms[i] = from_nir(instr->dest, (i < instr->dest.ssa.num_components) ? i : 7);
+ }
+
+ GPRVector dst(dst_elms);
+
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
+ emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)),
+ instr->sampler_index + R600_MAX_CONST_BUFFERS,
+ bim_none));
+ } else {
+ for (uint16_t i = 0; i < 4; ++i)
+ src_elms[i] = tex_src.lod;
+ GPRVector src(src_elms);
+
+ auto sampler = get_sampler_id(instr->sampler_index, tex_src.sampler_deref);
+ assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+ auto ir = new TexInstruction(TexInstruction::get_resinfo, dst, src,
+ sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset);
+ ir->set_dest_swizzle(dest_swz);
+ emit_instruction(ir);
+
+ if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+ PValue src(new UniformValue(512 + R600_BUFFER_INFO_OFFSET / 16 + (sampler.id >> 2),
+ sampler.id & 3, R600_BUFFER_INFO_CONST_BUFFER));
+
+ auto alu = new AluInstruction(op1_mov, dst[2], src, {last_write});
+ emit_instruction(alu);
+ set_has_txs_cube_array_comp();
+ }
+ }
+
+ return true;
+
+}
+
+bool EmitTexInstruction::emit_tex_texture_samples(nir_tex_instr* instr, TexInputs& src,
+ const std::array<int, 4> &dest_swz)
+{
+ GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
+ GPRVector help{0,{4,4,4,4}};
+
+ auto dyn_offset = PValue();
+ int res_id = R600_MAX_CONST_BUFFERS + instr->sampler_index;
+
+ auto ir = new TexInstruction(TexInstruction::get_nsampled, dest, help,
+ 0, res_id, src.sampler_offset);
+ ir->set_dest_swizzle(dest_swz);
+ emit_instruction(ir);
+ return true;
+}
+
+bool EmitTexInstruction::emit_tex_tg4(nir_tex_instr* instr, TexInputs& src)
+{
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "' (" << __func__ << ")\n";
+
+ TexInstruction *set_ofs = nullptr;
+
+ auto tex_op = TexInstruction::gather4;
+
+ if (instr->is_shadow) {
+ emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
+ {alu_last_instr, alu_write}));
+ tex_op = TexInstruction::gather4_c;
+ }
+
+ auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
+ assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+ bool literal_offset = false;
+ if (src.offset) {
+ literal_offset = nir_src_as_const_value(*src.offset) != 0;
+ r600::sfn_log << SfnLog::tex << " really have offsets and they are " <<
+ (literal_offset ? "literal" : "varying") <<
+ "\n";
+
+ if (!literal_offset) {
+ GPRVector::Swizzle swizzle = {4,4,4,4};
+ for (unsigned i = 0; i < instr->coord_components; ++i)
+ swizzle[i] = i;
+
+ int noffsets = instr->coord_components;
+ if (instr->is_array)
+ --noffsets;
+
+ auto ofs = vec_from_nir_with_fetch_constant(*src.offset,
+ ( 1 << noffsets) - 1,
+ swizzle);
+ GPRVector dummy(0, {7,7,7,7});
+ tex_op = (tex_op == TexInstruction::gather4_c) ?
+ TexInstruction::gather4_c_o : TexInstruction::gather4_o;
+
+ set_ofs = new TexInstruction(TexInstruction::set_offsets, dummy,
+ ofs, sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+ set_ofs->set_dest_swizzle({7,7,7,7});
+ }
+ }
+
+
+ /* pre CAYMAN needs swizzle */
+ auto dst = make_dest(*instr);
+ auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+
+ irt->set_dest_swizzle({1,2,0,3});
+ irt->set_gather_comp(instr->component);
+
+ if (instr->is_array)
+ handle_array_index(*instr, src.coord, irt);
+
+ if (literal_offset) {
+ r600::sfn_log << SfnLog::tex << "emit literal offsets\n";
+ set_offsets(irt, src.offset);
+ }
+
+ set_rect_coordinate_flags(instr, irt);
+
+ if (set_ofs)
+ emit_instruction(set_ofs);
+
+ emit_instruction(irt);
+ return true;
+}
+
+bool EmitTexInstruction::emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src)
+{
+ assert(instr->src[0].src.is_ssa);
+
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "' (" << __func__ << ")\n";
+
+ auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
+ assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+ PGPRValue sample_id_dest_reg = get_temp_register();
+ GPRVector sample_id_dest(sample_id_dest_reg->sel(), {7,7,7,7});
+ sample_id_dest.set_reg_i(sample_id_dest_reg->chan(), sample_id_dest_reg);
+ std::array<int,4> dest_swz = {7,7,7,7};
+ dest_swz[sample_id_dest_reg->chan()] = 0;
+
+ emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3),
+ src.ms_index,
+ {alu_write, alu_last_instr}));
+
+ auto tex_sample_id_ir = new TexInstruction(TexInstruction::ld, sample_id_dest, src.coord,
+ sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+ tex_sample_id_ir->set_flag(TexInstruction::x_unnormalized);
+ tex_sample_id_ir->set_flag(TexInstruction::y_unnormalized);
+ tex_sample_id_ir->set_flag(TexInstruction::z_unnormalized);
+ tex_sample_id_ir->set_flag(TexInstruction::w_unnormalized);
+ tex_sample_id_ir->set_inst_mode(1);
+
+ tex_sample_id_ir->set_dest_swizzle(dest_swz);
+
+ emit_instruction(tex_sample_id_ir);
+
+ if (src.ms_index->type() != Value::literal ||
+ static_cast<const LiteralValue&>(*src.ms_index).value() != 0) {
+ PValue help = get_temp_register();
+
+ emit_instruction(new AluInstruction(op2_lshl_int, help,
+ src.ms_index, literal(2),
+ {alu_write, alu_last_instr}));
+
+ emit_instruction(new AluInstruction(op2_lshr_int, sample_id_dest_reg,
+ {sample_id_dest_reg, help},
+ {alu_write, alu_last_instr}));
+ }
+
+ emit_instruction(new AluInstruction(op2_and_int, src.coord.reg_i(3),
+ {sample_id_dest_reg, PValue(new LiteralValue(15))},
+ {alu_write, alu_last_instr}));
+
+ auto dst = make_dest(*instr);
+
+ /* txf doesn't need rounding for the array index, but 1D has the array index
+ * in the z component */
+ if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D)
+ src.coord.set_reg_i(2, src.coord.reg_i(1));
+
+ auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord,
+ sampler.id,
+ sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+
+
+ if (src.offset) {
+ assert(src.offset->is_ssa);
+ AluInstruction *ir = nullptr;
+ for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) {
+ ir = new AluInstruction(op2_add_int, src.coord.reg_i(i),
+ {src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write});
+ emit_instruction(ir);
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ }
+
+ emit_instruction(tex_ir);
+ return true;
+}
+
+bool EmitTexInstruction::get_inputs(const nir_tex_instr& instr, TexInputs &src)
+{
+ sfn_log << SfnLog::tex << "Get Inputs with " << instr.coord_components << " components\n";
+
+ unsigned grad_components = instr.coord_components;
+ if (instr.is_array && !instr.array_is_lowered_cube)
+ --grad_components;
+
+
+ src.offset = nullptr;
+ bool retval = true;
+ for (unsigned i = 0; i < instr.num_srcs; ++i) {
+ switch (instr.src[i].src_type) {
+ case nir_tex_src_bias:
+ src.bias = from_nir(instr.src[i], 0);
+ break;
+
+ case nir_tex_src_coord: {
+ src.coord = vec_from_nir_with_fetch_constant(instr.src[i].src,
+ (1 << instr.coord_components) - 1,
+ {0,1,2,3});
+ } break;
+ case nir_tex_src_comparator:
+ src.comperator = from_nir(instr.src[i], 0);
+ break;
+ case nir_tex_src_ddx: {
+ sfn_log << SfnLog::tex << "Get DDX ";
+ src.ddx = vec_from_nir_with_fetch_constant(instr.src[i].src,
+ (1 << grad_components) - 1,
+ swizzle_from_comps(grad_components));
+ sfn_log << SfnLog::tex << src.ddx << "\n";
+ } break;
+ case nir_tex_src_ddy:{
+ sfn_log << SfnLog::tex << "Get DDY ";
+ src.ddy = vec_from_nir_with_fetch_constant(instr.src[i].src,
+ (1 << grad_components) - 1,
+ swizzle_from_comps(grad_components));
+ sfn_log << SfnLog::tex << src.ddy << "\n";
+ } break;
+ case nir_tex_src_lod:
+ src.lod = from_nir_with_fetch_constant(instr.src[i].src, 0);
+ break;
+ case nir_tex_src_offset:
+ sfn_log << SfnLog::tex << " -- Find offset\n";
+ src.offset = &instr.src[i].src;
+ break;
+ case nir_tex_src_sampler_deref:
+ src.sampler_deref = get_deref_location(instr.src[i].src);
+ break;
+ case nir_tex_src_texture_deref:
+ src.texture_deref = get_deref_location(instr.src[i].src);
+ break;
+ case nir_tex_src_ms_index:
+ src.ms_index = from_nir(instr.src[i], 0);
+ break;
+ case nir_tex_src_texture_offset:
+ src.texture_offset = from_nir(instr.src[i], 0);
+ break;
+ case nir_tex_src_sampler_offset:
+ src.sampler_offset = from_nir(instr.src[i], 0);
+ break;
+ case nir_tex_src_plane:
+ case nir_tex_src_projector:
+ case nir_tex_src_min_lod:
+ case nir_tex_src_ms_mcs:
+ default:
+ sfn_log << SfnLog::tex << "Texture source type " << instr.src[i].src_type << " not supported\n";
+ retval = false;
+ }
+ }
+ return retval;
+}
+
+GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr)
+{
+ int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components :
+ instr.dest.reg.reg->num_components;
+ std::array<PValue,4> dst_elms;
+ for (uint16_t i = 0; i < 4; ++i)
+ dst_elms[i] = from_nir(instr.dest, (i < num_dest_components) ? i : 7);
+ return GPRVector(dst_elms);
+}
+
+
+GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr,
+ const std::array<int, 4>& swizzle)
+{
+ int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components :
+ instr.dest.reg.reg->num_components;
+ std::array<PValue,4> dst_elms;
+ for (uint16_t i = 0; i < 4; ++i) {
+ int k = swizzle[i];
+ dst_elms[i] = from_nir(instr.dest, (k < num_dest_components) ? k : 7);
+ }
+ return GPRVector(dst_elms);
+}
+
+void EmitTexInstruction::set_rect_coordinate_flags(nir_tex_instr* instr,
+ TexInstruction* ir) const
+{
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
+ ir->set_flag(TexInstruction::x_unnormalized);
+ ir->set_flag(TexInstruction::y_unnormalized);
+ }
+}
+
+void EmitTexInstruction::set_offsets(TexInstruction* ir, nir_src *offset)
+{
+ if (!offset)
+ return;
+
+ assert(offset->is_ssa);
+ auto literal = nir_src_as_const_value(*offset);
+ assert(literal);
+
+ for (int i = 0; i < offset->ssa->num_components; ++i) {
+ ir->set_offset(i, literal[i].i32);
+ }
+}
+
+void EmitTexInstruction::handle_array_index(const nir_tex_instr& instr, const GPRVector& src, TexInstruction *ir)
+{
+ int src_idx = instr.sampler_dim == GLSL_SAMPLER_DIM_1D ? 1 : 2;
+ emit_instruction(new AluInstruction(op1_rndne, src.reg_i(2), src.reg_i(src_idx),
+ {alu_last_instr, alu_write}));
+ ir->set_flag(TexInstruction::z_unnormalized);
+}
+
+EmitTexInstruction::SamplerId
+EmitTexInstruction::get_sampler_id(int sampler_id, const nir_variable *deref)
+{
+ EmitTexInstruction::SamplerId result = {sampler_id, false};
+
+ if (deref) {
+ assert(glsl_type_is_sampler(deref->type));
+ result.id = deref->data.binding;
+ }
+ return result;
+}
+
+EmitTexInstruction::TexInputs::TexInputs():
+ sampler_deref(nullptr),
+ texture_deref(nullptr),
+ offset(nullptr)
+{
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h
new file mode 100644
index 000000000..e11ebda1c
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h
@@ -0,0 +1,96 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_EMITTEXINSTRUCTION_H
+#define SFN_EMITTEXINSTRUCTION_H
+
+#include "sfn_emitinstruction.h"
+#include "sfn_instruction_tex.h"
+
+namespace r600 {
+
+class EmitTexInstruction : public EmitInstruction
+{
+public:
+ EmitTexInstruction(ShaderFromNirProcessor& processor);
+
+private:
+ struct TexInputs {
+ TexInputs();
+ const nir_variable *sampler_deref;
+ const nir_variable *texture_deref;
+ GPRVector coord;
+ PValue bias;
+ PValue comperator;
+ PValue lod;
+ GPRVector ddx;
+ GPRVector ddy;
+ nir_src *offset;
+ PValue gather_comp;
+ PValue ms_index;
+ PValue sampler_offset;
+ PValue texture_offset;
+ };
+
+ bool emit_tex_tex(nir_tex_instr* instr, TexInputs& src);
+
+ bool emit_tex_txf(nir_tex_instr* instr, TexInputs &src);
+ bool emit_tex_txb(nir_tex_instr* instr, TexInputs& src);
+ bool emit_tex_txd(nir_tex_instr* instr, TexInputs& src);
+ bool emit_tex_txl(nir_tex_instr* instr, TexInputs& src);
+ bool emit_tex_txs(nir_tex_instr* instr, TexInputs& src,
+ const std::array<int, 4> &dest_swz);
+ bool emit_tex_texture_samples(nir_tex_instr* instr, TexInputs& src,
+ const std::array<int, 4> &dest_swz);
+ bool emit_tex_lod(nir_tex_instr* instr, TexInputs& src);
+ bool emit_tex_tg4(nir_tex_instr* instr, TexInputs& src);
+ bool emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src);
+ bool emit_buf_txf(nir_tex_instr* instr, TexInputs& src);
+
+ bool get_inputs(const nir_tex_instr& instr, TexInputs &src);
+
+ void set_rect_coordinate_flags(nir_tex_instr* instr, TexInstruction* ir) const;
+
+ bool do_emit(nir_instr* instr) override;
+
+ GPRVector make_dest(nir_tex_instr& instr);
+ GPRVector make_dest(nir_tex_instr &instr, const std::array<int, 4> &swizzle);
+
+ void set_offsets(TexInstruction* ir, nir_src *offset);
+ void handle_array_index(const nir_tex_instr& instr, const GPRVector &src, TexInstruction* ir);
+
+ struct SamplerId {
+ int id;
+ bool indirect;
+ };
+
+ SamplerId get_sampler_id(int sampler_id, const nir_variable *deref);
+
+};
+
+}
+
+#endif // SFN_EMITTEXINSTRUCTION_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp
new file mode 100644
index 000000000..72cf23172
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp
@@ -0,0 +1,183 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_instruction_alu.h"
+#include "sfn_valuepool.h"
+
+namespace r600 {
+
+const AluModifiers AluInstruction::src_abs_flags[2] =
+ {alu_src0_abs, alu_src1_abs};
+const AluModifiers AluInstruction::src_neg_flags[3] =
+ {alu_src0_neg, alu_src1_neg, alu_src2_neg};
+const AluModifiers AluInstruction::src_rel_flags[3] =
+ {alu_src0_rel, alu_src1_rel, alu_src2_rel};
+
+AluInstruction::AluInstruction(EAluOp opcode):
+ Instruction (Instruction::alu),
+ m_opcode(opcode),
+ m_src(alu_ops.at(opcode).nsrc),
+ m_bank_swizzle(alu_vec_unknown),
+ m_cf_type(cf_alu)
+{
+ if (alu_ops.at(opcode).nsrc == 3)
+ m_flags.set(alu_op3);
+}
+
+AluInstruction::AluInstruction(EAluOp opcode, PValue dest,
+ std::vector<PValue> src,
+ const std::set<AluModifiers>& flags):
+ Instruction (Instruction::alu),
+ m_opcode(opcode),
+ m_dest(dest),
+ m_bank_swizzle(alu_vec_unknown),
+ m_cf_type(cf_alu)
+{
+ assert(dest);
+ m_src.swap(src);
+ for (auto f : flags)
+ m_flags.set(f);
+
+ if (alu_ops.at(opcode).nsrc == 3)
+ m_flags.set(alu_op3);
+
+ for (auto &s: m_src)
+ add_remappable_src_value(&s);
+
+ add_remappable_dst_value(&m_dest);
+}
+
+AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0,
+ const std::set<AluModifiers>& flags):
+ AluInstruction(opcode, dest, std::vector<PValue>{src0}, flags)
+{
+}
+
+AluInstruction::AluInstruction(EAluOp opcode, PValue dest,
+ PValue src0, PValue src1,
+ const std::set<AluModifiers> &m_flags):
+ AluInstruction(opcode, dest, {src0, src1}, m_flags)
+{
+}
+
+AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0,
+ PValue src1, PValue src2,
+ const std::set<AluModifiers> &flags):
+ AluInstruction(opcode, dest, {src0, src1, src2}, flags)
+{
+}
+
+bool AluInstruction::is_equal_to(const Instruction& lhs) const
+{
+ assert(lhs.type() == alu);
+ const auto& oth = static_cast<const AluInstruction&>(lhs);
+
+ if (m_opcode != oth.m_opcode) {
+ return false;
+ }
+
+ if (*m_dest != *oth.m_dest)
+ return false;
+
+ if (m_src.size() != oth.m_src.size())
+ return false;
+
+ for (unsigned i = 0; i < m_src.size(); ++i)
+ if (*m_src[i] != *oth.m_src[i]) {
+ return false;
+ }
+ return (m_flags == oth.m_flags && m_cf_type == oth.m_cf_type);
+}
+
+void AluInstruction::replace_values(const ValueSet& candidates, PValue new_value)
+{
+ for (auto c: candidates) {
+ if (*c == *m_dest)
+ m_dest = new_value;
+
+ for (auto& s: m_src) {
+ if (*c == *s)
+ s = new_value;
+ }
+ }
+}
+
+PValue AluInstruction::remap_one_registers(PValue reg, std::vector<rename_reg_pair>& map,
+ ValueMap &values)
+{
+ auto new_index = map[reg->sel()];
+ if (new_index.valid)
+ reg = values.get_or_inject(new_index.new_reg, reg->chan());
+ map[reg->sel()].used = true;
+ return reg;
+}
+
+
+void AluInstruction::set_flag(AluModifiers flag)
+{
+ m_flags.set(flag);
+}
+
+void AluInstruction::set_bank_swizzle(AluBankSwizzle bswz)
+{
+ m_bank_swizzle = bswz;
+}
+
+unsigned AluInstruction::n_sources() const
+{
+ return m_src.size();
+}
+
+void AluInstruction::do_print(std::ostream& os) const
+{
+ os << "ALU " << alu_ops.at(m_opcode).name;
+ if (m_flags.test(alu_dst_clamp))
+ os << "_CLAMP";
+ if (m_dest)
+ os << ' ' << *m_dest << " : " ;
+
+ for (unsigned i = 0; i < m_src.size(); ++i) {
+ int pflags = 0;
+ if (i)
+ os << ' ';
+ if (m_flags.test(src_neg_flags[i])) pflags |= Value::PrintFlags::has_neg;
+ if (m_flags.test(src_rel_flags[i])) pflags |= Value::PrintFlags::is_rel;
+ if (i < 2)
+ if (m_flags.test(src_abs_flags[i])) pflags |= Value::PrintFlags::has_abs;
+ m_src[i]->print(os, Value::PrintFlags(0, pflags));
+ }
+ os << " {";
+ os << (m_flags.test(alu_write) ? 'W' : ' ');
+ os << (m_flags.test(alu_last_instr) ? 'L' : ' ');
+ os << (m_flags.test(alu_update_exec) ? 'E' : ' ');
+ os << (m_flags.test(alu_update_pred) ? 'P' : ' ');
+ os << "}";
+
+ os << " BS:" << m_bank_swizzle;
+ os << " CF:" << m_cf_type;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h
new file mode 100644
index 000000000..383fa3baf
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h
@@ -0,0 +1,144 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef sfn_r600_instruction_alu_h
+#define sfn_r600_instruction_alu_h
+
+#include "sfn_instruction_base.h"
+#include "sfn_alu_defines.h"
+
+namespace r600 {
+
+enum AluModifiers {
+ alu_src0_neg,
+ alu_src0_abs,
+ alu_src0_rel,
+ alu_src1_neg,
+ alu_src1_abs,
+ alu_src1_rel,
+ alu_src2_neg,
+ alu_src2_rel,
+ alu_dst_clamp,
+ alu_dst_rel,
+ alu_last_instr,
+ alu_update_exec,
+ alu_update_pred,
+ alu_write,
+ alu_op3
+};
+
+enum AluDstModifiers {
+ omod_off = 0,
+ omod_mul2 = 1,
+ omod_mul4 = 2,
+ omod_divl2 = 3
+};
+
+enum AluPredSel {
+ pred_off = 0,
+ pred_zero = 2,
+ pred_one = 3
+};
+
+enum AluBankSwizzle {
+ alu_vec_012 = 0,
+ sq_alu_scl_201 = 0,
+ alu_vec_021 = 1,
+ sq_alu_scl_122 = 1,
+ alu_vec_120 = 2,
+ sq_alu_scl_212 = 2,
+ alu_vec_102 = 3,
+ sq_alu_scl_221 = 3,
+ alu_vec_201 = 4,
+ alu_vec_210 = 5,
+ alu_vec_unknown = 6
+};
+
+class AluInstruction : public Instruction {
+public:
+
+ static const AluModifiers src_abs_flags[2];
+ static const AluModifiers src_neg_flags[3];
+ static const AluModifiers src_rel_flags[3];
+
+ AluInstruction(EAluOp opcode);
+ AluInstruction(EAluOp opcode, PValue dest,
+ std::vector<PValue> src0,
+ const std::set<AluModifiers>& m_flags);
+
+ AluInstruction(EAluOp opcode, PValue dest, PValue src0,
+ const std::set<AluModifiers>& m_flags);
+
+ AluInstruction(EAluOp opcode, PValue dest,
+ PValue src0, PValue src1,
+ const std::set<AluModifiers>& m_flags);
+
+ AluInstruction(EAluOp opcode, PValue dest, PValue src0, PValue src1,
+ PValue src2,
+ const std::set<AluModifiers>& m_flags);
+
+ void set_flag(AluModifiers flag);
+ unsigned n_sources() const;
+
+ PValue dest() {return m_dest;}
+ EAluOp opcode() const {return m_opcode;}
+ const Value *dest() const {return m_dest.get();}
+ Value& src(unsigned i) const {assert(i < m_src.size() && m_src[i]); return *m_src[i];}
+ PValue *psrc(unsigned i) {assert(i < m_src.size()); return &m_src[i];}
+ bool is_last() const {return m_flags.test(alu_last_instr);}
+ bool write() const {return m_flags.test(alu_write);}
+ bool flag(AluModifiers f) const {return m_flags.test(f);}
+ void set_bank_swizzle(AluBankSwizzle swz);
+ int bank_swizzle() const {return m_bank_swizzle;}
+ ECFAluOpCode cf_type() const {return m_cf_type;}
+ void set_cf_type(ECFAluOpCode cf_type){ m_cf_type = cf_type; }
+
+ void replace_values(const ValueSet& candidates, PValue new_value) override;
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+ PValue remap_one_registers(PValue reg, std::vector<rename_reg_pair>& map,
+ ValueMap &values);
+
+
+ EAluOp m_opcode;
+ PValue m_dest;
+ std::vector<PValue> m_src;
+ AluOpFlags m_flags;
+ AluDstModifiers m_omod;
+ AluPredSel m_pred_sel;
+ AluBankSwizzle m_bank_swizzle;
+ ECFAluOpCode m_cf_type;
+};
+
+}
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp
new file mode 100644
index 000000000..116bfaca5
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp
@@ -0,0 +1,187 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include <algorithm>
+#include <cassert>
+
+#include "sfn_instruction_base.h"
+#include "sfn_liverange.h"
+#include "sfn_valuepool.h"
+
+namespace r600 {
+
+ValueRemapper::ValueRemapper(std::vector<rename_reg_pair>& m,
+ ValueMap& values):
+ m_map(m),
+ m_values(values)
+{
+}
+
+void ValueRemapper::remap(PValue& v)
+{
+ if (!v)
+ return;
+ if (v->type() == Value::gpr) {
+ v = remap_one_registers(v);
+ } else if (v->type() == Value::gpr_array_value) {
+ GPRArrayValue& val = static_cast<GPRArrayValue&>(*v);
+ auto value = val.value();
+ auto addr = val.indirect();
+ val.reset_value(remap_one_registers(value));
+ if (addr) {
+ if (addr->type() == Value::gpr)
+ val.reset_addr(remap_one_registers(addr));
+ }
+ size_t range_start = val.sel();
+ size_t range_end = range_start + val.array_size();
+ while (range_start < range_end)
+ m_map[range_start++].used = true;
+ } else if (v->type() == Value::kconst) {
+ auto& val = static_cast<UniformValue&>(*v);
+ auto addr = val.addr();
+ if (addr && addr->type() == Value::gpr)
+ val.reset_addr(remap_one_registers(addr));
+ }
+
+}
+
+void ValueRemapper::remap(GPRVector& v)
+{
+ for (int i = 0; i < 4; ++i) {
+ if (v.reg_i(i)) {
+ auto& ns_idx = m_map[v.reg_i(i)->sel()];
+ if (ns_idx.valid)
+ v.set_reg_i(i,m_values.get_or_inject(ns_idx.new_reg, v.reg_i(i)->chan()));
+ m_map[v.reg_i(i)->sel()].used = true;
+ }
+ }
+}
+
+PValue ValueRemapper::remap_one_registers(PValue& reg)
+{
+ auto new_index = m_map[reg->sel()];
+ if (new_index.valid)
+ reg = m_values.get_or_inject(new_index.new_reg, reg->chan());
+ m_map[reg->sel()].used = true;
+ return reg;
+}
+
+
+Instruction::Instruction(instr_type t):
+ m_type(t)
+{
+}
+
+Instruction::~Instruction()
+{
+}
+
+void Instruction::print(std::ostream& os) const
+{
+ os << "OP:";
+ do_print(os);
+}
+
+
+void Instruction::remap_registers(ValueRemapper& map)
+{
+ sfn_log << SfnLog::merge << "REMAP " << *this << "\n";
+ for (auto& v: m_mappable_src_registers)
+ map.remap(*v);
+
+ for (auto& v: m_mappable_src_vectors)
+ map.remap(*v);
+
+ for (auto& v: m_mappable_dst_registers)
+ map.remap(*v);
+
+ for (auto& v: m_mappable_dst_vectors)
+ map.remap(*v);
+ sfn_log << SfnLog::merge << "TO " << *this << "\n\n";
+}
+
+void Instruction::add_remappable_src_value(PValue *v)
+{
+ if (*v)
+ m_mappable_src_registers.push_back(v);
+}
+
+void Instruction::add_remappable_src_value(GPRVector *v)
+{
+ m_mappable_src_vectors.push_back(v);
+}
+
+void Instruction::add_remappable_dst_value(PValue *v)
+{
+ if (v)
+ m_mappable_dst_registers.push_back(v);
+}
+
+void Instruction::add_remappable_dst_value(GPRVector *v)
+{
+ m_mappable_dst_vectors.push_back(v);
+}
+
+void Instruction::replace_values(UNUSED const ValueSet& candidates, UNUSED PValue new_value)
+{
+
+}
+
+void Instruction::evalue_liveness(LiverangeEvaluator& eval) const
+{
+ sfn_log << SfnLog::merge << "Scan " << *this << "\n";
+ for (const auto& s: m_mappable_src_registers)
+ if (*s)
+ eval.record_read(**s);
+
+ for (const auto& s: m_mappable_src_vectors)
+ eval.record_read(*s);
+
+ for (const auto& s: m_mappable_dst_registers)
+ if (*s)
+ eval.record_write(**s);
+
+ for (const auto& s: m_mappable_dst_vectors)
+ eval.record_write(*s);
+
+ do_evalue_liveness(eval);
+}
+
+void Instruction::do_evalue_liveness(UNUSED LiverangeEvaluator& eval) const
+{
+
+}
+
+bool operator == (const Instruction& lhs, const Instruction& rhs)
+{
+ if (rhs.m_type != lhs.m_type)
+ return false;
+
+ return lhs.is_equal_to(rhs);
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.h
new file mode 100644
index 000000000..0689a473a
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.h
@@ -0,0 +1,155 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef sfn_r600_instr_h
+#define sfn_r600_instr_h
+
+#include "sfn_instructionvisitor.h"
+#include "sfn_value_gpr.h"
+#include "sfn_defines.h"
+
+#include "gallium/drivers/r600/r600_isa.h"
+#include <iostream>
+#include <memory>
+#include <vector>
+#include <set>
+
+namespace r600 {
+
+struct rename_reg_pair {
+ bool valid;
+ bool used;
+ int new_reg;
+};
+
+class LiverangeEvaluator;
+class ValueMap;
+
+
+class ValueRemapper {
+public:
+ ValueRemapper(std::vector<rename_reg_pair>& m,
+ ValueMap& values);
+
+ void remap(PValue& v);
+ void remap(GPRVector& v);
+private:
+ PValue remap_one_registers(PValue& reg);
+
+ std::vector<rename_reg_pair>& m_map;
+ ValueMap& m_values;
+};
+
+
+using OutputRegisterMap = std::map<unsigned, const GPRVector *>;
+
+class Instruction {
+public:
+ enum instr_type {
+ alu,
+ exprt,
+ tex,
+ vtx,
+ wait_ack,
+ cond_if,
+ cond_else,
+ cond_endif,
+ lds_atomic,
+ lds_read,
+ lds_write,
+ loop_begin,
+ loop_end,
+ loop_break,
+ loop_continue,
+ phi,
+ streamout,
+ ring,
+ emit_vtx,
+ mem_wr_scratch,
+ gds,
+ rat,
+ tf_write,
+ block,
+ unknown
+ };
+
+ typedef std::shared_ptr<Instruction> Pointer;
+
+ friend bool operator == (const Instruction& lhs, const Instruction& rhs);
+
+ Instruction(instr_type t);
+
+ virtual ~Instruction();
+
+ instr_type type() const { return m_type;}
+
+ void print(std::ostream& os) const;
+
+ virtual void replace_values(const ValueSet& candidates, PValue new_value);
+
+ void evalue_liveness(LiverangeEvaluator& eval) const;
+
+ void remap_registers(ValueRemapper& map);
+
+ virtual bool accept(InstructionVisitor& visitor) = 0;
+ virtual bool accept(ConstInstructionVisitor& visitor) const = 0;
+
+protected:
+
+ void add_remappable_src_value(PValue *v);
+ void add_remappable_src_value(GPRVector *v);
+ void add_remappable_dst_value(PValue *v);
+ void add_remappable_dst_value(GPRVector *v);
+
+private:
+
+ virtual void do_evalue_liveness(LiverangeEvaluator& eval) const;
+
+ virtual bool is_equal_to(const Instruction& lhs) const = 0;
+
+ instr_type m_type;
+
+ virtual void do_print(std::ostream& os) const = 0;
+
+ std::vector<PValue*> m_mappable_src_registers;
+ std::vector<GPRVector*> m_mappable_src_vectors;
+ std::vector<PValue*> m_mappable_dst_registers;
+ std::vector<GPRVector*> m_mappable_dst_vectors;
+};
+
+using PInstruction=Instruction::Pointer;
+
+inline std::ostream& operator << (std::ostream& os, const Instruction& instr)
+{
+ instr.print(os);
+ return os;
+}
+
+bool operator == (const Instruction& lhs, const Instruction& rhs);
+
+}
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp
new file mode 100644
index 000000000..212499faf
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp
@@ -0,0 +1,57 @@
+#include "sfn_instruction_block.h"
+
+namespace r600 {
+
+
+InstructionBlock::InstructionBlock(unsigned nesting_depth, unsigned block_number):
+ Instruction(block),
+ m_block_number(block_number),
+ m_nesting_depth(nesting_depth)
+{
+}
+
+void InstructionBlock::emit(PInstruction instr)
+{
+ m_block.push_back(instr);
+}
+
+void InstructionBlock::remap_registers(ValueRemapper& map)
+{
+ for(auto& i: m_block)
+ i->remap_registers(map);
+}
+
+void InstructionBlock::do_evalue_liveness(LiverangeEvaluator& eval) const
+{
+ for(auto& i: m_block)
+ i->evalue_liveness(eval);
+}
+
+bool InstructionBlock::is_equal_to(const Instruction& lhs) const
+{
+ assert(lhs.type() == block);
+ auto& l = static_cast<const InstructionBlock&>(lhs);
+
+ if (m_block.size() != l.m_block.size())
+ return false;
+
+ if (m_block_number != l.m_block_number)
+ return false;
+
+ return std::equal(m_block.begin(), m_block.end(), l.m_block.begin(),
+ [](PInstruction ri, PInstruction li) {return *ri == *li;});
+}
+
+PInstruction InstructionBlock::last_instruction()
+{
+ return m_block.size() ? *m_block.rbegin() : nullptr;
+}
+
+void InstructionBlock::do_print(std::ostream& os) const
+{
+ std::string space(" ", 2 * m_nesting_depth);
+ for(auto& i: m_block)
+ os << space << *i << "\n";
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.h
new file mode 100644
index 000000000..fe40cc10c
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.h
@@ -0,0 +1,82 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef sfn_instruction_block_h
+#define sfn_instruction_block_h
+
+#include "sfn_instruction_base.h"
+
+namespace r600 {
+
+class InstructionBlock : public Instruction
+{
+public:
+ InstructionBlock(unsigned nesting_depth, unsigned block_number);
+
+ void emit(PInstruction instr);
+
+
+ std::vector<PInstruction>::const_iterator begin() const {
+ return m_block.begin();
+ }
+ std::vector<PInstruction>::const_iterator end() const {
+ return m_block.end();
+ }
+
+ void remap_registers(ValueRemapper& map);
+
+ size_t size() const {
+ return m_block.size();
+ }
+
+ const PInstruction& operator [] (int i) const {
+ return m_block[i];
+ }
+
+ unsigned number() const {
+ return m_block_number;
+ }
+
+ PInstruction last_instruction();
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+ void do_evalue_liveness(LiverangeEvaluator& eval) const override;
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+
+ std::vector<PInstruction> m_block;
+
+ unsigned m_block_number;
+ unsigned m_nesting_depth;
+};
+
+}
+
+#endif // INSTRUCTIONBLOCK_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp
new file mode 100644
index 000000000..455d6d630
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp
@@ -0,0 +1,195 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_instruction_cf.h"
+#include "sfn_liverange.h"
+
+namespace r600 {
+
+CFInstruction::CFInstruction(instr_type type):Instruction(type)
+{
+
+}
+
+IfElseInstruction::IfElseInstruction(instr_type type):
+ CFInstruction (type)
+{
+
+}
+
+IfInstruction::IfInstruction(AluInstruction *pred):
+ IfElseInstruction(cond_if),
+ m_pred(pred)
+{
+ PValue *v = m_pred->psrc(0);
+ add_remappable_src_value(v);
+ pred->set_cf_type(cf_alu_push_before);
+}
+
+void IfInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
+{
+ eval.scope_if();
+}
+
+bool IfInstruction::is_equal_to(const Instruction& lhs) const
+{
+ assert(lhs.type() == cond_if);
+ const IfInstruction& l = static_cast<const IfInstruction&>(lhs);
+ return *l.m_pred == *m_pred;
+}
+
+void IfInstruction::do_print(std::ostream& os) const
+{
+ os << "PRED = " << *m_pred << "\n";
+ os << "IF (PRED)";
+}
+
+ElseInstruction::ElseInstruction(IfInstruction *jump_src):
+ IfElseInstruction(cond_else),
+ m_jump_src(jump_src)
+{
+}
+
+void ElseInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
+{
+ eval.scope_else();
+}
+
+
+bool ElseInstruction::is_equal_to(const Instruction& lhs) const
+{
+ if (lhs.type() != cond_else)
+ return false;
+ auto& l = static_cast<const ElseInstruction&>(lhs);
+ return (*m_jump_src == *l.m_jump_src);
+}
+
+void ElseInstruction::do_print(std::ostream& os) const
+{
+ os << "ELSE";
+}
+
+IfElseEndInstruction::IfElseEndInstruction():
+ IfElseInstruction(cond_endif)
+{
+}
+
+void IfElseEndInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
+{
+ eval.scope_endif();
+}
+
+bool IfElseEndInstruction::is_equal_to(const Instruction& lhs) const
+{
+ if (lhs.type() != cond_endif)
+ return false;
+ return true;
+}
+
+void IfElseEndInstruction::do_print(std::ostream& os) const
+{
+ os << "ENDIF";
+}
+
+LoopBeginInstruction::LoopBeginInstruction():
+ CFInstruction(loop_begin)
+{
+}
+
+void LoopBeginInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
+{
+ eval.scope_loop_begin();
+}
+
+bool LoopBeginInstruction::is_equal_to(const Instruction& lhs) const
+{
+ assert(lhs.type() == loop_begin);
+ return true;
+}
+
+void LoopBeginInstruction::do_print(std::ostream& os) const
+{
+ os << "BGNLOOP";
+}
+
+LoopEndInstruction::LoopEndInstruction(LoopBeginInstruction *start):
+ CFInstruction (loop_end),
+ m_start(start)
+{
+}
+
+void LoopEndInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
+{
+ eval.scope_loop_end();
+}
+
+bool LoopEndInstruction::is_equal_to(const Instruction& lhs) const
+{
+ assert(lhs.type() == loop_end);
+ const auto& other = static_cast<const LoopEndInstruction&>(lhs);
+ return *m_start == *other.m_start;
+}
+
+void LoopEndInstruction::do_print(std::ostream& os) const
+{
+ os << "ENDLOOP";
+}
+
+LoopBreakInstruction::LoopBreakInstruction():
+ CFInstruction (loop_break)
+{
+}
+
+void LoopBreakInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
+{
+ eval.scope_loop_break();
+}
+
+bool LoopBreakInstruction::is_equal_to(UNUSED const Instruction& lhs) const
+{
+ return true;
+}
+
+void LoopBreakInstruction::do_print(std::ostream& os) const
+{
+ os << "BREAK";
+}
+
+LoopContInstruction::LoopContInstruction():
+ CFInstruction (loop_continue)
+{
+}
+
+bool LoopContInstruction::is_equal_to(UNUSED const Instruction& lhs) const
+{
+ return true;
+}
+void LoopContInstruction::do_print(std::ostream& os) const
+{
+ os << "CONTINUE";
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h
new file mode 100644
index 000000000..a13794803
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h
@@ -0,0 +1,142 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_IFELSEINSTRUCTION_H
+#define SFN_IFELSEINSTRUCTION_H
+
+#include "sfn_instruction_alu.h"
+
+namespace r600 {
+
+class CFInstruction : public Instruction {
+protected:
+ CFInstruction(instr_type type);
+};
+
+class IfElseInstruction : public CFInstruction {
+public:
+ IfElseInstruction(instr_type type);
+
+};
+
+class IfInstruction : public IfElseInstruction {
+public:
+ IfInstruction(AluInstruction *pred);
+ const AluInstruction& pred() const {return *m_pred;}
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+ void do_evalue_liveness(LiverangeEvaluator& eval) const override;
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+ std::shared_ptr<AluInstruction> m_pred;
+};
+
+class ElseInstruction : public IfElseInstruction {
+public:
+ ElseInstruction(IfInstruction *jump_src);
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+ void do_evalue_liveness(LiverangeEvaluator& eval) const override;
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+
+ IfElseInstruction *m_jump_src;
+};
+
+class IfElseEndInstruction : public IfElseInstruction {
+public:
+ IfElseEndInstruction();
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+ void do_evalue_liveness(LiverangeEvaluator& eval) const override;
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+};
+
+class LoopBeginInstruction: public CFInstruction {
+public:
+ LoopBeginInstruction();
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+ void do_evalue_liveness(LiverangeEvaluator& eval) const override;
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+};
+
+class LoopEndInstruction: public CFInstruction {
+public:
+ LoopEndInstruction(LoopBeginInstruction *start);
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+ void do_evalue_liveness(LiverangeEvaluator& eval) const override;
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+ LoopBeginInstruction *m_start;
+};
+
+class LoopBreakInstruction: public CFInstruction {
+public:
+ LoopBreakInstruction();
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+ void do_evalue_liveness(LiverangeEvaluator& eval) const override;
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+};
+
+class LoopContInstruction: public CFInstruction {
+public:
+ LoopContInstruction();
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+};
+
+}
+
+#endif // SFN_IFELSEINSTRUCTION_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp
new file mode 100644
index 000000000..7d1d948a1
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp
@@ -0,0 +1,341 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "sfn_instruction_export.h"
+#include "sfn_liverange.h"
+#include "sfn_valuepool.h"
+
+namespace r600 {
+
+WriteoutInstruction::WriteoutInstruction(instr_type t, const GPRVector& value):
+ Instruction(t),
+ m_value(value)
+{
+ add_remappable_src_value(&m_value);
+}
+
+void WriteoutInstruction::replace_values(const ValueSet& candidates, PValue new_value)
+{
+ // I wonder whether we can actually end up here ...
+ for (auto c: candidates) {
+ if (*c == *m_value.reg_i(c->chan()))
+ m_value.set_reg_i(c->chan(), new_value);
+ }
+
+ replace_values_child(candidates, new_value);
+}
+
+void WriteoutInstruction::replace_values_child(UNUSED const ValueSet& candidates,
+ UNUSED PValue new_value)
+{
+}
+
+void WriteoutInstruction::remap_registers_child(UNUSED std::vector<rename_reg_pair>& map,
+ UNUSED ValueMap& values)
+{
+}
+
+ExportInstruction::ExportInstruction(unsigned loc, const GPRVector &value, ExportType type):
+ WriteoutInstruction(Instruction::exprt, value),
+ m_type(type),
+ m_loc(loc),
+ m_is_last(false)
+{
+}
+
+
+bool ExportInstruction::is_equal_to(const Instruction& lhs) const
+{
+ assert(lhs.type() == exprt);
+ const auto& oth = static_cast<const ExportInstruction&>(lhs);
+
+ return (gpr() == oth.gpr()) &&
+ (m_type == oth.m_type) &&
+ (m_loc == oth.m_loc) &&
+ (m_is_last == oth.m_is_last);
+}
+
+void ExportInstruction::do_print(std::ostream& os) const
+{
+ os << (m_is_last ? "EXPORT_DONE ":"EXPORT ");
+ switch (m_type) {
+ case et_pixel: os << "PIXEL "; break;
+ case et_pos: os << "POS "; break;
+ case et_param: os << "PARAM "; break;
+ }
+ os << m_loc << " " << gpr();
+}
+
+void ExportInstruction::update_output_map(OutputRegisterMap& map) const
+{
+ map[m_loc] = gpr_ptr();
+}
+
+void ExportInstruction::set_last()
+{
+ m_is_last = true;
+}
+
+WriteScratchInstruction::WriteScratchInstruction(unsigned loc, const GPRVector& value,
+ int align, int align_offset, int writemask):
+ WriteoutInstruction (Instruction::mem_wr_scratch, value),
+ m_loc(loc),
+ m_align(align),
+ m_align_offset(align_offset),
+ m_writemask(writemask),
+ m_array_size(0)
+{
+}
+
+WriteScratchInstruction::WriteScratchInstruction(const PValue& address, const GPRVector& value,
+ int align, int align_offset, int writemask, int array_size):
+ WriteoutInstruction (Instruction::mem_wr_scratch, value),
+ m_loc(0),
+ m_address(address),
+ m_align(align),
+ m_align_offset(align_offset),
+ m_writemask(writemask),
+ m_array_size(array_size - 1)
+{
+ add_remappable_src_value(&m_address);
+}
+
+bool WriteScratchInstruction::is_equal_to(const Instruction& lhs) const
+{
+ if (lhs.type() != Instruction::mem_wr_scratch)
+ return false;
+ const auto& other = static_cast<const WriteScratchInstruction&>(lhs);
+
+ if (m_address) {
+ if (!other.m_address)
+ return false;
+ if (*m_address != *other.m_address)
+ return false;
+ } else {
+ if (other.m_address)
+ return false;
+ }
+
+ return gpr() == other.gpr() &&
+ m_loc == other.m_loc &&
+ m_align == other.m_align &&
+ m_align_offset == other.m_align_offset &&
+ m_writemask == other.m_writemask;
+}
+
+static char *writemask_to_swizzle(int writemask, char *buf)
+{
+ const char *swz = "xyzw";
+ for (int i = 0; i < 4; ++i) {
+ buf[i] = (writemask & (1 << i)) ? swz[i] : '_';
+ }
+ return buf;
+}
+
+void WriteScratchInstruction::do_print(std::ostream& os) const
+{
+ char buf[5];
+
+ os << "MEM_SCRATCH_WRITE ";
+ if (m_address)
+ os << "@" << *m_address << "+";
+
+ os << m_loc << "." << writemask_to_swizzle(m_writemask, buf)
+ << " " << gpr() << " AL:" << m_align << " ALO:" << m_align_offset;
+}
+
+void WriteScratchInstruction::replace_values_child(const ValueSet& candidates, PValue new_value)
+{
+ if (!m_address)
+ return;
+
+ for (auto c: candidates) {
+ if (*c == *m_address)
+ m_address = new_value;
+ }
+}
+
+void WriteScratchInstruction::remap_registers_child(std::vector<rename_reg_pair>& map,
+ ValueMap& values)
+{
+ if (!m_address)
+ return;
+ sfn_log << SfnLog::merge << "Remap " << *m_address << " of type " << m_address->type() << "\n";
+ assert(m_address->type() == Value::gpr);
+ auto new_index = map[m_address->sel()];
+ if (new_index.valid)
+ m_address = values.get_or_inject(new_index.new_reg, m_address->chan());
+ map[m_address->sel()].used = true;
+}
+
+StreamOutIntruction::StreamOutIntruction(const GPRVector& value, int num_components,
+ int array_base, int comp_mask, int out_buffer,
+ int stream):
+ WriteoutInstruction(Instruction::streamout, value),
+ m_element_size(num_components == 3 ? 3 : num_components - 1),
+ m_burst_count(1),
+ m_array_base(array_base),
+ m_array_size(0xfff),
+ m_writemask(comp_mask),
+ m_output_buffer(out_buffer),
+ m_stream(stream)
+{
+}
+
+unsigned StreamOutIntruction::op() const
+{
+ int op = 0;
+ switch (m_output_buffer) {
+ case 0: op = CF_OP_MEM_STREAM0_BUF0; break;
+ case 1: op = CF_OP_MEM_STREAM0_BUF1; break;
+ case 2: op = CF_OP_MEM_STREAM0_BUF2; break;
+ case 3: op = CF_OP_MEM_STREAM0_BUF3; break;
+ }
+ return 4 * m_stream + op;
+}
+
+bool StreamOutIntruction::is_equal_to(const Instruction& lhs) const
+{
+ assert(lhs.type() == streamout);
+ const auto& oth = static_cast<const StreamOutIntruction&>(lhs);
+
+ return gpr() == oth.gpr() &&
+ m_element_size == oth.m_element_size &&
+ m_burst_count == oth.m_burst_count &&
+ m_array_base == oth.m_array_base &&
+ m_array_size == oth.m_array_size &&
+ m_writemask == oth.m_writemask &&
+ m_output_buffer == oth.m_output_buffer &&
+ m_stream == oth.m_stream;
+}
+
+void StreamOutIntruction::do_print(std::ostream& os) const
+{
+ os << "WRITE STREAM(" << m_stream << ") " << gpr()
+ << " ES:" << m_element_size
+ << " BC:" << m_burst_count
+ << " BUF:" << m_output_buffer
+ << " ARRAY:" << m_array_base;
+ if (m_array_size != 0xfff)
+ os << "+" << m_array_size;
+}
+
+MemRingOutIntruction::MemRingOutIntruction(ECFOpCode ring, EMemWriteType type,
+ const GPRVector& value,
+ unsigned base_addr, unsigned ncomp,
+ PValue index):
+ WriteoutInstruction(Instruction::ring, value),
+ m_ring_op(ring),
+ m_type(type),
+ m_base_address(base_addr),
+ m_num_comp(ncomp),
+ m_index(index)
+{
+ add_remappable_src_value(&m_index);
+
+ assert(m_ring_op == cf_mem_ring || m_ring_op == cf_mem_ring1||
+ m_ring_op == cf_mem_ring2 || m_ring_op == cf_mem_ring3);
+ assert(m_num_comp <= 4);
+}
+
+unsigned MemRingOutIntruction::ncomp() const
+{
+ switch (m_num_comp) {
+ case 1: return 0;
+ case 2: return 1;
+ case 3:
+ case 4: return 3;
+ default:
+ assert(0);
+ }
+ return 3;
+}
+
+bool MemRingOutIntruction::is_equal_to(const Instruction& lhs) const
+{
+ assert(lhs.type() == streamout);
+ const auto& oth = static_cast<const MemRingOutIntruction&>(lhs);
+
+ bool equal = gpr() == oth.gpr() &&
+ m_ring_op == oth.m_ring_op &&
+ m_type == oth.m_type &&
+ m_num_comp == oth.m_num_comp &&
+ m_base_address == oth.m_base_address;
+
+ if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
+ equal &= (*m_index == *oth.m_index);
+ return equal;
+
+}
+
+static const char *write_type_str[4] = {"WRITE", "WRITE_IDX", "WRITE_ACK", "WRITE_IDX_ACK" };
+void MemRingOutIntruction::do_print(std::ostream& os) const
+{
+ os << "MEM_RING " << m_ring_op;
+ os << " " << write_type_str[m_type] << " " << m_base_address;
+ os << " " << gpr();
+ if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
+ os << " @" << *m_index;
+ os << " ES:" << m_num_comp;
+}
+
+
+void MemRingOutIntruction::replace_values_child(const ValueSet& candidates,
+ PValue new_value)
+{
+ if (!m_index)
+ return;
+
+ for (auto c: candidates) {
+ if (*c == *m_index)
+ m_index = new_value;
+ }
+}
+
+void MemRingOutIntruction::remap_registers_child(std::vector<rename_reg_pair>& map,
+ ValueMap& values)
+{
+ if (!m_index)
+ return;
+
+ assert(m_index->type() == Value::gpr);
+ auto new_index = map[m_index->sel()];
+ if (new_index.valid)
+ m_index = values.get_or_inject(new_index.new_reg, m_index->chan());
+ map[m_index->sel()].used = true;
+}
+
+void MemRingOutIntruction::patch_ring(int stream, PValue index)
+{
+ const ECFOpCode ring_op[4] = {cf_mem_ring, cf_mem_ring1, cf_mem_ring2, cf_mem_ring3};
+
+ assert(stream < 4);
+ m_ring_op = ring_op[stream];
+ m_index = index;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.h
new file mode 100644
index 000000000..6d014082d
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.h
@@ -0,0 +1,185 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_EXPORTINSTRUCTION_H
+#define SFN_EXPORTINSTRUCTION_H
+
+#include "sfn_instruction_base.h"
+
+namespace r600 {
+
+class WriteoutInstruction: public Instruction {
+public:
+ void replace_values(const ValueSet& candidates, PValue new_value) override;
+ const GPRVector& gpr() const {return m_value;}
+ const GPRVector *gpr_ptr() const {return &m_value;}
+protected:
+ WriteoutInstruction(instr_type t, const GPRVector& value);
+private:
+ virtual void replace_values_child(const ValueSet& candidates, PValue new_value);
+ virtual void remap_registers_child(std::vector<rename_reg_pair>& map,
+ ValueMap& values);
+
+ GPRVector m_value;
+};
+
+class ExportInstruction : public WriteoutInstruction {
+public:
+ enum ExportType {
+ et_pixel,
+ et_pos,
+ et_param
+ };
+
+ ExportInstruction(unsigned loc, const GPRVector& value, ExportType type);
+ void set_last();
+
+ ExportType export_type() const {return m_type;}
+
+ unsigned location() const {return m_loc;}
+ bool is_last_export() const {return m_is_last;}
+
+ void update_output_map(OutputRegisterMap& map) const;
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+
+ ExportType m_type;
+ unsigned m_loc;
+ bool m_is_last;
+};
+
+class WriteScratchInstruction : public WriteoutInstruction {
+public:
+
+ WriteScratchInstruction(unsigned loc, const GPRVector& value, int align,
+ int align_offset, int writemask);
+ WriteScratchInstruction(const PValue& address, const GPRVector& value,
+ int align, int align_offset, int writemask, int array_size);
+ unsigned location() const {return m_loc;}
+
+ int write_mask() const { return m_writemask;}
+ int address() const { assert(m_address); return m_address->sel();}
+ bool indirect() const { return !!m_address;}
+ int array_size() const { return m_array_size;}
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+
+ void replace_values_child(const ValueSet& candidates, PValue new_value) override;
+ void remap_registers_child(std::vector<rename_reg_pair>& map,
+ ValueMap& values)override;
+
+ unsigned m_loc;
+ PValue m_address;
+ unsigned m_align;
+ unsigned m_align_offset;
+ unsigned m_writemask;
+ int m_array_size;
+};
+
+
+class StreamOutIntruction: public WriteoutInstruction {
+public:
+ StreamOutIntruction(const GPRVector& value, int num_components,
+ int array_base, int comp_mask, int out_buffer,
+ int stream);
+ int element_size() const { return m_element_size;}
+ int burst_count() const { return m_burst_count;}
+ int array_base() const { return m_array_base;}
+ int array_size() const { return m_array_size;}
+ int comp_mask() const { return m_writemask;}
+ unsigned op() const;
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+
+ int m_element_size;
+ int m_burst_count;
+ int m_array_base;
+ int m_array_size;
+ int m_writemask;
+ int m_output_buffer;
+ int m_stream;
+};
+
+enum EMemWriteType {
+ mem_write = 0,
+ mem_write_ind = 1,
+ mem_write_ack = 2,
+ mem_write_ind_ack = 3,
+};
+
+class MemRingOutIntruction: public WriteoutInstruction {
+public:
+
+ MemRingOutIntruction(ECFOpCode ring, EMemWriteType type,
+ const GPRVector& value, unsigned base_addr,
+ unsigned ncomp, PValue m_index);
+
+ unsigned op() const{return m_ring_op;}
+ unsigned ncomp() const;
+ unsigned addr() const {return m_base_address;}
+ EMemWriteType type() const {return m_type;}
+ unsigned index_reg() const {return m_index->sel();}
+ unsigned array_base() const {return m_base_address; }
+ void replace_values_child(const ValueSet& candidates, PValue new_value) override;
+ void remap_registers_child(std::vector<rename_reg_pair>& map,
+ ValueMap& values) override;
+ void patch_ring(int stream, PValue index);
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+
+ ECFOpCode m_ring_op;
+ EMemWriteType m_type;
+ unsigned m_base_address;
+ unsigned m_num_comp;
+ PValue m_index;
+
+};
+
+}
+
+
+#endif // SFN_EXPORTINSTRUCTION_H \ No newline at end of file
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp
new file mode 100644
index 000000000..ec1a48887
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp
@@ -0,0 +1,480 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_instruction_fetch.h"
+
+#include "gallium/drivers/r600/r600_pipe.h"
+
+namespace r600 {
+
+/* refactor this to add status create methods for specific tasks */
+FetchInstruction::FetchInstruction(EVFetchInstr op,
+ EVFetchType type,
+ GPRVector dst,
+ PValue src, int offset,
+ int buffer_id, PValue buffer_offset,
+ EBufferIndexMode cp_rel,
+ bool use_const_field):
+ Instruction(vtx),
+ m_vc_opcode(op),
+ m_fetch_type(type),
+ m_endian_swap(vtx_es_none),
+ m_src(src),
+ m_dst(dst),
+ m_offset(offset),
+ m_is_mega_fetch(1),
+ m_mega_fetch_count(16),
+ m_buffer_id(buffer_id),
+ m_semantic_id(0),
+ m_buffer_index_mode(cp_rel),
+ m_flags(0),
+ m_uncached(false),
+ m_indexed(false),
+ m_array_base(0),
+ m_array_size(0),
+ m_elm_size(0),
+ m_buffer_offset(buffer_offset),
+ m_dest_swizzle({0,1,2,3})
+{
+ if (use_const_field) {
+ m_flags.set(vtx_use_const_field);
+ m_data_format = fmt_invalid;
+ m_num_format = vtx_nf_norm;
+ } else {
+ m_flags.set(vtx_format_comp_signed);
+ m_data_format = fmt_32_32_32_32_float;
+ m_num_format = vtx_nf_scaled;
+ }
+
+ add_remappable_src_value(&m_src);
+ add_remappable_src_value(&m_buffer_offset);
+
+ add_remappable_dst_value(&m_dst);
+}
+
+/* Resource query */
+FetchInstruction::FetchInstruction(EVFetchInstr vc_opcode,
+ EVFetchType fetch_type,
+ EVTXDataFormat data_format,
+ EVFetchNumFormat num_format,
+ EVFetchEndianSwap endian_swap,
+ const PValue src,
+ const GPRVector dst,
+ uint32_t offset,
+ bool is_mega_fetch,
+ uint32_t mega_fetch_count,
+ uint32_t buffer_id,
+ uint32_t semantic_id,
+
+ EBufferIndexMode buffer_index_mode,
+ bool uncached,
+ bool indexed,
+ int array_base,
+ int array_size,
+ int elm_size,
+ PValue buffer_offset,
+ const std::array<int, 4>& dest_swizzle):
+ Instruction(vtx),
+ m_vc_opcode(vc_opcode),
+ m_fetch_type(fetch_type),
+ m_data_format(data_format),
+ m_num_format(num_format),
+ m_endian_swap(endian_swap),
+ m_src(src),
+ m_dst(dst),
+ m_offset(offset),
+ m_is_mega_fetch(is_mega_fetch),
+ m_mega_fetch_count(mega_fetch_count),
+ m_buffer_id(buffer_id),
+ m_semantic_id(semantic_id),
+ m_buffer_index_mode(buffer_index_mode),
+ m_uncached(uncached),
+ m_indexed(indexed),
+ m_array_base(array_base),
+ m_array_size(array_size),
+ m_elm_size(elm_size),
+ m_buffer_offset(buffer_offset),
+ m_dest_swizzle(dest_swizzle)
+{
+ add_remappable_src_value(&m_src);
+ add_remappable_dst_value(&m_dst);
+ add_remappable_src_value(&m_buffer_offset);
+}
+
+FetchInstruction::FetchInstruction(GPRVector dst,
+ PValue src,
+ int buffer_id, PValue buffer_offset,
+ EVTXDataFormat format,
+ EVFetchNumFormat num_format):
+ Instruction(vtx),
+ m_vc_opcode(vc_fetch),
+ m_fetch_type(no_index_offset),
+ m_data_format(format),
+ m_num_format(num_format),
+ m_endian_swap(vtx_es_none),
+ m_src(src),
+ m_dst(dst),
+ m_offset(0),
+ m_is_mega_fetch(0),
+ m_mega_fetch_count(0),
+ m_buffer_id(buffer_id),
+ m_semantic_id(0),
+ m_buffer_index_mode(bim_none),
+ m_flags(0),
+ m_uncached(false),
+ m_indexed(false),
+ m_array_base(0),
+ m_array_size(0),
+ m_elm_size(1),
+ m_buffer_offset(buffer_offset),
+ m_dest_swizzle({0,1,2,3})
+{
+ m_flags.set(vtx_format_comp_signed);
+
+ add_remappable_src_value(&m_src);
+ add_remappable_dst_value(&m_dst);
+ add_remappable_src_value(&m_buffer_offset);
+}
+
+
+/* Resource query */
+FetchInstruction::FetchInstruction(GPRVector dst,
+ PValue src,
+ int buffer_id,
+ EBufferIndexMode cp_rel):
+ Instruction(vtx),
+ m_vc_opcode(vc_get_buf_resinfo),
+ m_fetch_type(no_index_offset),
+ m_data_format(fmt_32_32_32_32),
+ m_num_format(vtx_nf_norm),
+ m_endian_swap(vtx_es_none),
+ m_src(src),
+ m_dst(dst),
+ m_offset(0),
+ m_is_mega_fetch(0),
+ m_mega_fetch_count(16),
+ m_buffer_id(buffer_id),
+ m_semantic_id(0),
+ m_buffer_index_mode(cp_rel),
+ m_flags(0),
+ m_uncached(false),
+ m_indexed(false),
+ m_array_base(0),
+ m_array_size(0),
+ m_elm_size(0),
+ m_dest_swizzle({0,1,2,3})
+{
+ m_flags.set(vtx_format_comp_signed);
+ add_remappable_src_value(&m_src);
+ add_remappable_dst_value(&m_dst);
+ add_remappable_src_value(&m_buffer_offset);
+}
+
+FetchInstruction::FetchInstruction(GPRVector dst, PValue src, int scratch_size):
+ Instruction(vtx),
+ m_vc_opcode(vc_read_scratch),
+ m_fetch_type(vertex_data),
+ m_data_format(fmt_32_32_32_32),
+ m_num_format(vtx_nf_int),
+ m_endian_swap(vtx_es_none),
+ m_dst(dst),
+ m_offset(0),
+ m_is_mega_fetch(0),
+ m_mega_fetch_count(16),
+ m_buffer_id(0),
+ m_semantic_id(0),
+ m_buffer_index_mode(bim_none),
+ m_flags(0),
+ m_uncached(true),
+ m_array_base(0),
+ m_array_size(0),
+ m_elm_size(3),
+ m_dest_swizzle({0,1,2,3})
+{
+ if (src->type() == Value::literal) {
+ const auto& lv = static_cast<const LiteralValue&>(*src);
+ m_array_base = lv.value();
+ m_indexed = false;
+ m_src.reset(new GPRValue(0,0));
+ m_array_size = 0;
+ } else {
+ m_array_base = 0;
+ m_src = src;
+ m_indexed = true;
+ m_array_size = scratch_size - 1;
+ }
+ add_remappable_src_value(&m_src);
+ add_remappable_dst_value(&m_dst);
+ add_remappable_src_value(&m_buffer_offset);
+}
+
+void FetchInstruction::replace_values(const ValueSet& candidates, PValue new_value)
+{
+ if (!m_src)
+ return;
+ for (auto c: candidates) {
+ for (int i = 0; i < 4; ++i) {
+ if (*c == *m_dst.reg_i(i))
+ m_dst.set_reg_i(i, new_value);
+ }
+ if (*m_src == *c)
+ m_src = new_value;
+ }
+}
+
+
+bool FetchInstruction::is_equal_to(const Instruction& lhs) const
+{
+ auto& l = static_cast<const FetchInstruction&>(lhs);
+ if (m_src) {
+ if (!l.m_src)
+ return false;
+ if (*m_src != *l.m_src)
+ return false;
+ } else {
+ if (l.m_src)
+ return false;
+ }
+
+ return m_vc_opcode == l.m_vc_opcode &&
+ m_fetch_type == l.m_fetch_type &&
+ m_data_format == l.m_data_format &&
+ m_num_format == l.m_num_format &&
+ m_endian_swap == l.m_endian_swap &&
+ m_dst == l.m_dst &&
+ m_offset == l.m_offset &&
+ m_buffer_id == l.m_buffer_id &&
+ m_semantic_id == l.m_semantic_id &&
+ m_buffer_index_mode == l.m_buffer_index_mode &&
+ m_flags == l.m_flags &&
+ m_indexed == l.m_indexed &&
+ m_uncached == l.m_uncached;
+}
+
+void FetchInstruction::set_format(EVTXDataFormat fmt)
+{
+ m_data_format = fmt;
+}
+
+
+void FetchInstruction::set_dest_swizzle(const std::array<int,4>& swz)
+{
+ m_dest_swizzle = swz;
+}
+
+void FetchInstruction::prelude_append(Instruction *instr)
+{
+ assert(instr);
+ m_prelude.push_back(PInstruction(instr));
+}
+
+const std::vector<PInstruction>& FetchInstruction::prelude() const
+{
+ return m_prelude;
+}
+
+LoadFromScratch::LoadFromScratch(GPRVector dst, PValue src, int scratch_size):
+ FetchInstruction(dst, src, scratch_size)
+{
+}
+
+FetchGDSOpResult::FetchGDSOpResult(const GPRVector dst, const PValue src):
+ FetchInstruction(vc_fetch,
+ no_index_offset,
+ fmt_32,
+ vtx_nf_int,
+ vtx_es_none,
+ src,
+ dst,
+ 0,
+ false,
+ 0xf,
+ R600_IMAGE_IMMED_RESOURCE_OFFSET,
+ 0,
+ bim_none,
+ false,
+ false,
+ 0,
+ 0,
+ 0,
+ PValue(),
+ {0,7,7,7})
+{
+ set_flag(vtx_srf_mode);
+ set_flag(vtx_vpm);
+}
+
+FetchTCSIOParam::FetchTCSIOParam(GPRVector dst, PValue src, int offset):
+ FetchInstruction(vc_fetch,
+ no_index_offset,
+ fmt_32_32_32_32,
+ vtx_nf_scaled,
+ vtx_es_none,
+ src,
+ dst,
+ offset,
+ false,
+ 16,
+ R600_LDS_INFO_CONST_BUFFER,
+ 0,
+ bim_none,
+ false,
+ false,
+ 0,
+ 0,
+ 0,
+ PValue(),
+ {0,1,2,3})
+{
+ set_flag(vtx_srf_mode);
+ set_flag(vtx_format_comp_signed);
+}
+
+
+static const char *fmt_descr[64] = {
+ "INVALID",
+ "8",
+ "4_4",
+ "3_3_2",
+ "RESERVED_4",
+ "16",
+ "16F",
+ "8_8",
+ "5_6_5",
+ "6_5_5",
+ "1_5_5_5",
+ "4_4_4_4",
+ "5_5_5_1",
+ "32",
+ "32F",
+ "16_16",
+ "16_16F",
+ "8_24",
+ "8_24F",
+ "24_8",
+ "24_8F",
+ "10_11_11",
+ "10_11_11F",
+ "11_11_10",
+ "11_11_10F",
+ "2_10_10_10",
+ "8_8_8_8",
+ "10_10_10_2",
+ "X24_8_32F",
+ "32_32",
+ "32_32F",
+ "16_16_16_16",
+ "16_16_16_16F",
+ "RESERVED_33",
+ "32_32_32_32",
+ "32_32_32_32F",
+ "RESERVED_36",
+ "1",
+ "1_REVERSED",
+ "GB_GR",
+ "BG_RG",
+ "32_AS_8",
+ "32_AS_8_8",
+ "5_9_9_9_SHAREDEXP",
+ "8_8_8",
+ "16_16_16",
+ "16_16_16F",
+ "32_32_32",
+ "32_32_32F",
+ "BC1",
+ "BC2",
+ "BC3",
+ "BC4",
+ "BC5",
+ "APC0",
+ "APC1",
+ "APC2",
+ "APC3",
+ "APC4",
+ "APC5",
+ "APC6",
+ "APC7",
+ "CTX1",
+ "RESERVED_63"
+};
+
+
+void FetchInstruction::do_print(std::ostream& os) const
+{
+ static const std::string num_format_char[] = {"norm", "int", "scaled"};
+ static const std::string endian_swap_code[] = {
+ "noswap", "8in16", "8in32"
+ };
+ static const char buffer_index_mode_char[] = "_01E";
+ static const char *flag_string[] = {"WQM", "CF", "signed", "no_zero",
+ "nostride", "AC", "TC", "VPM"};
+ switch (m_vc_opcode) {
+ case vc_fetch:
+ os << "Fetch " << m_dst;
+ break;
+ case vc_semantic:
+ os << "Fetch Semantic ID:" << m_semantic_id;
+ break;
+ case vc_get_buf_resinfo:
+ os << "Fetch BufResinfo:" << m_dst;
+ break;
+ case vc_read_scratch:
+ os << "MEM_READ_SCRATCH:" << m_dst;
+ break;
+ default:
+ os << "Fetch ERROR";
+ return;
+ }
+
+ os << ", " << *m_src;
+
+ if (m_offset)
+ os << "+" << m_offset;
+
+ os << " BUFID:" << m_buffer_id
+ << " FMT:(" << fmt_descr[m_data_format]
+ << " " << num_format_char[m_num_format]
+ << " " << endian_swap_code[m_endian_swap]
+ << ")";
+ if (m_buffer_index_mode > 0)
+ os << " IndexMode:" << buffer_index_mode_char[m_buffer_index_mode];
+
+
+ if (m_is_mega_fetch)
+ os << " MFC:" << m_mega_fetch_count;
+ else
+ os << " mfc*:" << m_mega_fetch_count;
+
+ if (m_flags.any()) {
+ os << " Flags:";
+ for( int i = 0; i < vtx_unknown; ++i) {
+ if (m_flags.test(i))
+ os << ' ' << flag_string[i];
+ }
+ }
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h
new file mode 100644
index 000000000..71a3f69f3
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h
@@ -0,0 +1,187 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_INSTRUCTION_FETCH_H
+#define SFN_INSTRUCTION_FETCH_H
+
+#include "sfn_instruction_base.h"
+
+namespace r600 {
+
+class FetchInstruction : public Instruction {
+public:
+
+ FetchInstruction(EVFetchInstr vc_opcode,
+ EVFetchType fetch_type,
+ EVTXDataFormat data_format,
+ EVFetchNumFormat num_format,
+ EVFetchEndianSwap endian_swap,
+ const PValue src,
+ const GPRVector dst,
+ uint32_t offset,
+ bool is_mega_fetch,
+ uint32_t mega_fetch_count,
+ uint32_t buffer_id,
+ uint32_t semantic_id,
+
+ EBufferIndexMode buffer_index_mode,
+ bool uncached,
+ bool indexed,
+ int array_base,
+ int array_size,
+ int elm_size,
+ PValue buffer_offset,
+ const std::array<int, 4>& dest_swizzle);
+
+ FetchInstruction(EVFetchInstr op,
+ EVFetchType type,
+ GPRVector dst,
+ PValue src, int offset,
+ int buffer_id, PValue buffer_offset,
+ EBufferIndexMode cp_rel,
+ bool use_const_field = false);
+
+ FetchInstruction(GPRVector dst,
+ PValue src,
+ int buffer_id,
+ PValue buffer_offset,
+ EVTXDataFormat format,
+ EVFetchNumFormat num_format);
+
+ FetchInstruction(GPRVector dst,
+ PValue src,
+ int buffer_id,
+ EBufferIndexMode cp_rel);
+
+ FetchInstruction(GPRVector dst, PValue src, int scratch_size);
+
+ void replace_values(const ValueSet& candidates, PValue new_value) override;
+ EVFetchInstr vc_opcode() const { return m_vc_opcode;}
+ EVFetchType fetch_type() const { return m_fetch_type;}
+
+ EVTXDataFormat data_format() const { return m_data_format;}
+ EVFetchNumFormat num_format() const { return m_num_format;}
+ EVFetchEndianSwap endian_swap() const { return m_endian_swap;}
+
+ const Value& src() const { return *m_src;}
+ const GPRVector& dst() const { return m_dst;}
+ uint32_t offset() const { return m_offset;}
+
+ bool is_mega_fetchconst() { return m_is_mega_fetch;}
+ uint32_t mega_fetch_count() const { return m_mega_fetch_count;}
+
+ uint32_t buffer_id() const { return m_buffer_id;}
+ uint32_t semantic_id() const { return m_semantic_id;}
+ EBufferIndexMode buffer_index_mode() const{ return m_buffer_index_mode;}
+
+ bool is_signed() const { return m_flags.test(vtx_format_comp_signed);}
+ bool use_const_fields() const { return m_flags.test(vtx_use_const_field);}
+
+ bool srf_mode_no_zero() const { return m_flags.test(vtx_srf_mode);}
+
+ void set_flag(EVFetchFlagShift flag) {m_flags.set(flag);}
+
+ bool uncached() const {return m_uncached; }
+ bool indexed() const {return m_indexed; }
+ int array_base()const {return m_array_base; }
+ int array_size() const {return m_array_size; }
+ int elm_size() const {return m_elm_size; }
+
+ void set_buffer_offset(PValue buffer_offset) {
+ m_buffer_offset = buffer_offset;
+ add_remappable_src_value(&m_buffer_offset);
+ }
+ PValue buffer_offset() const { return m_buffer_offset; }
+
+ void set_dest_swizzle(const std::array<int,4>& swz);
+ void set_format(EVTXDataFormat fmt);
+
+ int swz(int idx) const { return m_dest_swizzle[idx];}
+
+ bool use_tc() const {return m_flags.test(vtx_use_tc);}
+
+ bool use_vpm() const {return m_flags.test(vtx_vpm);}
+
+ void prelude_append(Instruction *instr);
+
+ const std::vector<PInstruction>& prelude() const;
+
+ bool has_prelude() const {return !m_prelude.empty();}
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+
+ EVFetchInstr m_vc_opcode;
+ EVFetchType m_fetch_type;
+
+ EVTXDataFormat m_data_format;
+ EVFetchNumFormat m_num_format;
+ EVFetchEndianSwap m_endian_swap;
+
+ PValue m_src;
+ GPRVector m_dst;
+ uint32_t m_offset;
+
+ bool m_is_mega_fetch;
+ uint32_t m_mega_fetch_count;
+
+ uint32_t m_buffer_id;
+ uint32_t m_semantic_id;
+
+ EBufferIndexMode m_buffer_index_mode;
+ std::bitset<16> m_flags;
+ bool m_uncached;
+ bool m_indexed;
+ int m_array_base;
+ int m_array_size;
+ int m_elm_size;
+ PValue m_buffer_offset;
+ std::array<int, 4> m_dest_swizzle;
+ std::vector<PInstruction> m_prelude;
+};
+
+class LoadFromScratch: public FetchInstruction {
+public:
+ LoadFromScratch(GPRVector dst, PValue src, int scratch_size);
+};
+
+class FetchGDSOpResult : public FetchInstruction {
+public:
+ FetchGDSOpResult(const GPRVector dst, const PValue src);
+};
+
+class FetchTCSIOParam : public FetchInstruction {
+public:
+ FetchTCSIOParam(GPRVector dst, PValue src, int offset);
+};
+
+}
+
+#endif // SFN_INSTRUCTION_FETCH_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp
new file mode 100644
index 000000000..095cd40d6
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp
@@ -0,0 +1,180 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_instruction_gds.h"
+#include "sfn_liverange.h"
+
+namespace r600 {
+
+GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value,
+ const PValue& value2, const PValue& uav_id, int uav_base):
+ Instruction(gds),
+ m_op(op),
+ m_src(value),
+ m_src2(value2),
+ m_dest(dest),
+ m_dest_swizzle({PIPE_SWIZZLE_X,7,7,7}),
+ m_src_swizzle({PIPE_SWIZZLE_0, PIPE_SWIZZLE_X, PIPE_SWIZZLE_0}),
+ m_buffer_index_mode(bim_none),
+ m_uav_id(uav_id),
+ m_uav_base(uav_base),
+ m_flags(0)
+{
+ add_remappable_src_value(&m_src);
+ add_remappable_src_value(&m_src2);
+ add_remappable_src_value(&m_uav_id);
+ add_remappable_dst_value(&m_dest);
+ m_dest_swizzle[0] = m_dest.chan_i(0);
+}
+
+GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value,
+ const PValue& uav_id, int uav_base):
+ GDSInstr(op, dest, value, PValue(), uav_id, uav_base)
+{
+ assert(value);
+ m_src_swizzle[1] = value->chan();
+ m_src_swizzle[2] = PIPE_SWIZZLE_0;
+}
+
+GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest,
+ const PValue& uav_id, int uav_base):
+ GDSInstr(op, dest, PValue(), PValue(), uav_id, uav_base)
+{
+ m_src_swizzle[1] = PIPE_SWIZZLE_0;
+}
+
+bool GDSInstr::is_equal_to(UNUSED const Instruction& lhs) const
+{
+ return false;
+}
+
+void GDSInstr::do_print(std::ostream& os) const
+{
+ const char *swz = "xyzw01?_";
+ os << lds_ops.at(m_op).name << " R" << m_dest.sel() << ".";
+ for (int i = 0; i < 4; ++i) {
+ os << swz[m_dest_swizzle[i]];
+ }
+ if (m_src)
+ os << " " << *m_src;
+
+ os << " UAV:" << *m_uav_id;
+}
+
+RatInstruction::RatInstruction(ECFOpCode cf_opcode, ERatOp rat_op,
+ const GPRVector& data, const GPRVector& index,
+ int rat_id, const PValue& rat_id_offset,
+ int burst_count, int comp_mask, int element_size, bool ack):
+ Instruction(rat),
+ m_cf_opcode(cf_opcode),
+ m_rat_op(rat_op),
+ m_data(data),
+ m_index(index),
+ m_rat_id(rat_id),
+ m_rat_id_offset(rat_id_offset),
+ m_burst_count(burst_count),
+ m_comp_mask(comp_mask),
+ m_element_size(element_size),
+ m_need_ack(ack)
+{
+ add_remappable_src_value(&m_data);
+ add_remappable_src_value(&m_rat_id_offset);
+ add_remappable_src_value(&m_index);
+}
+
+bool RatInstruction::is_equal_to(UNUSED const Instruction& lhs) const
+{
+ return false;
+}
+
+void RatInstruction::do_print(std::ostream& os) const
+{
+ os << "MEM_RAT RAT(" << m_rat_id;
+ if (m_rat_id_offset)
+ os << "+" << *m_rat_id_offset;
+ os << ") @" << m_index;
+ os << " OP:" << m_rat_op << " " << m_data;
+ os << " BC:" << m_burst_count
+ << " MASK:" << m_comp_mask
+ << " ES:" << m_element_size;
+ if (m_need_ack)
+ os << " ACK";
+}
+
+RatInstruction::ERatOp RatInstruction::opcode(nir_intrinsic_op opcode)
+{
+ switch (opcode) {
+ case nir_intrinsic_ssbo_atomic_add:
+ return ADD_RTN;
+ case nir_intrinsic_ssbo_atomic_and:
+ return AND_RTN;
+ case nir_intrinsic_ssbo_atomic_exchange:
+ return XCHG_RTN;
+ case nir_intrinsic_ssbo_atomic_umax:
+ return MAX_UINT_RTN;
+ case nir_intrinsic_ssbo_atomic_umin:
+ return MIN_UINT_RTN;
+ case nir_intrinsic_ssbo_atomic_imax:
+ return MAX_INT_RTN;
+ case nir_intrinsic_ssbo_atomic_imin:
+ return MIN_INT_RTN;
+ case nir_intrinsic_ssbo_atomic_xor:
+ return XOR_RTN;
+ default:
+ return UNSUPPORTED;
+ }
+}
+
+GDSStoreTessFactor::GDSStoreTessFactor(GPRVector& value):
+ Instruction(tf_write),
+ m_value(value)
+{
+ add_remappable_src_value(&m_value);
+}
+
+void GDSStoreTessFactor::replace_values(const ValueSet& candidates, PValue new_value)
+{
+ for (auto& c: candidates) {
+ for (int i = 0; i < 4; ++i) {
+ if (*c == *m_value[i])
+ m_value[i] = new_value;
+ }
+ }
+}
+
+
+bool GDSStoreTessFactor::is_equal_to(const Instruction& lhs) const
+{
+ auto& other = static_cast<const GDSStoreTessFactor&>(lhs);
+ return m_value == other.m_value;
+}
+
+void GDSStoreTessFactor::do_print(std::ostream& os) const
+{
+ os << "TF_WRITE " << m_value;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h
new file mode 100644
index 000000000..6f8e0f200
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h
@@ -0,0 +1,225 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_GDSINSTR_H
+#define SFN_GDSINSTR_H
+
+#include "sfn_instruction_base.h"
+
+#include <bitset>
+
+namespace r600 {
+
+class GDSInstr : public Instruction
+{
+public:
+ GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value,
+ const PValue &uav_id, int uav_base);
+ GDSInstr(ESDOp op, const GPRVector& dest, const PValue& value,
+ const PValue& value2, const PValue &uav_id, int uav_base);
+ GDSInstr(ESDOp op, const GPRVector& dest, const PValue &uav_id, int uav_base);
+
+ ESDOp op() const {return m_op;}
+
+ int src_sel() const {
+ if (!m_src)
+ return 0;
+
+ assert(m_src->type() == Value::gpr);
+ return m_src->sel();
+ }
+
+ int src2_chan() const {
+ if (!m_src2)
+ return 0;
+
+ assert(m_src->type() == Value::gpr);
+ return m_src->chan();
+ }
+
+ int src_swizzle(int idx) const {assert(idx < 3); return m_src_swizzle[idx];}
+
+ int dest_sel() const {
+ return m_dest.sel();
+ }
+
+ int dest_swizzle(int i) const {
+ if (i < 4)
+ return m_dest_swizzle[i];
+ return 7;
+ }
+
+ void set_dest_swizzle(const std::array<int,4>& swz) {
+ m_dest_swizzle = swz;
+ }
+
+ PValue uav_id() const {return m_uav_id;}
+ int uav_base() const {return m_uav_base;}
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+
+ ESDOp m_op;
+
+ PValue m_src;
+ PValue m_src2;
+ GPRVector m_dest;
+ std::array <int, 4> m_dest_swizzle;
+ std::array <int, 3> m_src_swizzle;
+
+ EBufferIndexMode m_buffer_index_mode;
+ PValue m_uav_id;
+ int m_uav_base;
+ std::bitset<8> m_flags;
+
+};
+
+class RatInstruction : public Instruction {
+
+public:
+ enum ERatOp {
+ NOP,
+ STORE_TYPED,
+ STORE_RAW,
+ STORE_RAW_FDENORM,
+ CMPXCHG_INT,
+ CMPXCHG_FLT,
+ CMPXCHG_FDENORM,
+ ADD,
+ SUB,
+ RSUB,
+ MIN_INT,
+ MIN_UINT,
+ MAX_INT,
+ MAX_UINT,
+ AND,
+ OR,
+ XOR,
+ MSKOR,
+ INC_UINT,
+ DEC_UINT,
+ NOP_RTN = 32,
+ XCHG_RTN = 34,
+ XCHG_FDENORM_RTN,
+ CMPXCHG_INT_RTN,
+ CMPXCHG_FLT_RTN,
+ CMPXCHG_FDENORM_RTN,
+ ADD_RTN,
+ SUB_RTN,
+ RSUB_RTN,
+ MIN_INT_RTN,
+ MIN_UINT_RTN,
+ MAX_INT_RTN,
+ MAX_UINT_RTN,
+ AND_RTN,
+ OR_RTN,
+ XOR_RTN,
+ MSKOR_RTN,
+ UINT_RTN,
+ UNSUPPORTED
+ };
+
+ RatInstruction(ECFOpCode cf_opcode, ERatOp rat_op,
+ const GPRVector& data, const GPRVector& index,
+ int rat_id, const PValue& rat_id_offset,
+ int burst_count, int comp_mask, int element_size,
+ bool ack);
+
+ PValue rat_id_offset() const { return m_rat_id_offset;}
+ int rat_id() const { return m_rat_id;}
+
+ ERatOp rat_op() const {return m_rat_op;}
+
+ int data_gpr() const {return m_data.sel();}
+ int index_gpr() const {return m_index.sel();}
+ int elm_size() const {return m_element_size;}
+
+ int comp_mask() const {return m_comp_mask;}
+
+ bool need_ack() const {return m_need_ack;}
+ int burst_count() const {return m_burst_count;}
+
+ static ERatOp opcode(nir_intrinsic_op opcode);
+
+ int data_swz(int chan) const {return m_data.chan_i(chan);}
+
+ ECFOpCode cf_opcode() const { return m_cf_opcode;}
+
+ void set_ack() {m_need_ack = true; }
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+
+private:
+
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+
+ ECFOpCode m_cf_opcode;
+ ERatOp m_rat_op;
+
+ GPRVector m_data;
+ GPRVector m_index;
+
+ int m_rat_id;
+ PValue m_rat_id_offset;
+ int m_burst_count;
+ int m_comp_mask;
+ int m_element_size;
+
+ std::bitset<8> m_flags;
+
+ bool m_need_ack;
+
+};
+
+class GDSStoreTessFactor : public Instruction {
+public:
+ GDSStoreTessFactor(GPRVector& value);
+ int sel() const {return m_value.sel();}
+ int chan(int i ) const {return m_value.chan_i(i);}
+
+ void replace_values(const ValueSet& candiates, PValue new_value) override;
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+
+ GPRVector m_value;
+};
+
+}
+
+#endif // SFN_GDSINSTR_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp
new file mode 100644
index 000000000..b77461abc
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp
@@ -0,0 +1,151 @@
+#include "sfn_instruction_lds.h"
+
+namespace r600 {
+
+void LDSReadInstruction::do_print(std::ostream& os) const
+{
+ os << "LDS Read [";
+ for (auto& v : m_dest_value)
+ os << *v << " ";
+ os << "], ";
+ for (auto& a : m_address)
+ os << *a << " ";
+}
+
+LDSReadInstruction::LDSReadInstruction(std::vector<PValue>& address, std::vector<PValue>& value):
+ Instruction(lds_read),
+ m_address(address),
+ m_dest_value(value)
+{
+ assert(address.size() == value.size());
+
+ for (unsigned i = 0; i < address.size(); ++i) {
+ add_remappable_src_value(&m_address[i]);
+ add_remappable_dst_value(&m_dest_value[i]);
+ }
+}
+
+void LDSReadInstruction::replace_values(const ValueSet& candidates, PValue new_value)
+{
+ for (auto& c : candidates) {
+ for (auto& d: m_dest_value) {
+ if (*c == *d)
+ d = new_value;
+ }
+
+ for (auto& a: m_address) {
+ if (*c == *a)
+ a = new_value;
+ }
+ }
+}
+
+bool LDSReadInstruction::is_equal_to(const Instruction& lhs) const
+{
+ auto& other = static_cast<const LDSReadInstruction&>(lhs);
+ return m_address == other.m_address &&
+ m_dest_value == other.m_dest_value;
+}
+
+LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op):
+ Instruction(lds_atomic),
+ m_address(address),
+ m_dest_value(dest),
+ m_src0_value(src0),
+ m_src1_value(src1),
+ m_opcode(op)
+{
+ add_remappable_src_value(&m_src0_value);
+ add_remappable_src_value(&m_src1_value);
+ add_remappable_src_value(&m_address);
+ add_remappable_dst_value(&m_dest_value);
+}
+
+LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op):
+ LDSAtomicInstruction(dest, src0, PValue(), address, op)
+{
+
+}
+
+
+void LDSAtomicInstruction::do_print(std::ostream& os) const
+{
+ os << "LDS " << m_opcode << " " << *m_dest_value << " ";
+ os << "[" << *m_address << "] " << *m_src0_value;
+ if (m_src1_value)
+ os << ", " << *m_src1_value;
+}
+
+bool LDSAtomicInstruction::is_equal_to(const Instruction& lhs) const
+{
+ auto& other = static_cast<const LDSAtomicInstruction&>(lhs);
+
+ return m_opcode == other.m_opcode &&
+ *m_dest_value == *other.m_dest_value &&
+ *m_src0_value == *other.m_src0_value &&
+ *m_address == *other.m_address &&
+ ((m_src1_value && other.m_src1_value && (*m_src1_value == *other.m_src1_value)) ||
+ (!m_src1_value && !other.m_src1_value));
+}
+
+LDSWriteInstruction::LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0):
+ LDSWriteInstruction::LDSWriteInstruction(address, idx_offset, value0, PValue())
+
+{
+}
+
+LDSWriteInstruction::LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0, PValue value1):
+ Instruction(lds_write),
+ m_address(address),
+ m_value0(value0),
+ m_value1(value1),
+ m_idx_offset(idx_offset)
+{
+ add_remappable_src_value(&m_address);
+ add_remappable_src_value(&m_value0);
+ if (m_value1)
+ add_remappable_src_value(&m_value1);
+}
+
+
+void LDSWriteInstruction::do_print(std::ostream& os) const
+{
+ os << "LDS Write" << num_components()
+ << " " << address() << ", " << value0();
+ if (num_components() > 1)
+ os << ", " << value1();
+}
+
+void LDSWriteInstruction::replace_values(const ValueSet& candidates, PValue new_value)
+{
+ for (auto c: candidates) {
+ if (*c == *m_address)
+ m_address = new_value;
+
+ if (*c == *m_value0)
+ m_value0 = new_value;
+
+ if (*c == *m_value1)
+ m_value1 = new_value;
+ }
+}
+
+bool LDSWriteInstruction::is_equal_to(const Instruction& lhs) const
+{
+ auto& other = static_cast<const LDSWriteInstruction&>(lhs);
+
+ if (m_value1) {
+ if (!other.m_value1)
+ return false;
+ if (*m_value1 != *other.m_value1)
+ return false;
+ } else {
+ if (other.m_value1)
+ return false;
+ }
+
+ return (m_value0 != other.m_value0 &&
+ *m_address != *other.m_address);
+}
+
+} // namespace r600
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h
new file mode 100644
index 000000000..96439a7c3
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h
@@ -0,0 +1,82 @@
+#ifndef LDSINSTRUCTION_H
+#define LDSINSTRUCTION_H
+
+#include "sfn_instruction_base.h"
+
+namespace r600 {
+
+class LDSReadInstruction : public Instruction {
+public:
+ LDSReadInstruction(std::vector<PValue>& value, std::vector<PValue>& address);
+ void replace_values(const ValueSet& candidates, PValue new_value) override;
+
+ unsigned num_values() const { return m_dest_value.size();}
+ const Value& address(unsigned i) const { return *m_address[i];}
+ const Value& dest(unsigned i) const { return *m_dest_value[i];}
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+ void do_print(std::ostream& os) const override;
+ bool is_equal_to(const Instruction& lhs) const override;
+
+ std::vector<PValue> m_address;
+ std::vector<PValue> m_dest_value;
+};
+
+class LDSAtomicInstruction : public Instruction {
+public:
+ LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op);
+ LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op);
+
+ const Value& address() const { return *m_address;}
+ const Value& dest() const { return *m_dest_value;}
+ const Value& src0() const { return *m_src0_value;}
+ const PValue& src1() const { return m_src1_value;}
+ unsigned op() const {return m_opcode;}
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+ void do_print(std::ostream& os) const override;
+ bool is_equal_to(const Instruction& lhs) const override;
+
+ PValue m_address;
+ PValue m_dest_value;
+ PValue m_src0_value;
+ PValue m_src1_value;
+ unsigned m_opcode;
+};
+
+class LDSWriteInstruction : public Instruction {
+public:
+ LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0);
+ LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0, PValue value1);
+
+ const Value& address() const {return *m_address;};
+ const Value& value0() const { return *m_value0;}
+ const Value& value1() const { return *m_value1;}
+ unsigned num_components() const { return m_value1 ? 2 : 1;}
+ unsigned idx_offset() const {return m_idx_offset;};
+
+ void replace_values(const ValueSet& candidates, PValue new_value) override;
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+ void do_print(std::ostream& os) const override;
+ bool is_equal_to(const Instruction& lhs) const override;
+
+ PValue m_address;
+ PValue m_value0;
+ PValue m_value1;
+ unsigned m_idx_offset;
+
+};
+
+}
+
+#endif // LDSINSTRUCTION_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp
new file mode 100644
index 000000000..1c1a98c40
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp
@@ -0,0 +1,68 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_instruction_misc.h"
+
+namespace r600 {
+EmitVertex::EmitVertex(int stream, bool cut):
+ Instruction (emit_vtx),
+ m_stream(stream),
+ m_cut(cut)
+{
+
+}
+
+bool EmitVertex::is_equal_to(const Instruction& lhs) const
+{
+ auto& oth = static_cast<const EmitVertex&>(lhs);
+ return oth.m_stream == m_stream &&
+ oth.m_cut == m_cut;
+}
+
+void EmitVertex::do_print(std::ostream& os) const
+{
+ os << (m_cut ? "EMIT_CUT_VERTEX @" : "EMIT_VERTEX @") << m_stream;
+}
+
+WaitAck::WaitAck(int nack):
+ Instruction (wait_ack),
+ m_nack(nack)
+{
+
+}
+
+bool WaitAck::is_equal_to(const Instruction& lhs) const
+{
+ const auto& l = static_cast<const WaitAck&>(lhs);
+ return m_nack == l.m_nack;
+}
+
+void WaitAck::do_print(std::ostream& os) const
+{
+ os << "WAIT_ACK @" << m_nack;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.h
new file mode 100644
index 000000000..d322b4aa8
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.h
@@ -0,0 +1,69 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_INSTRUCTION_MISC_H
+#define SFN_INSTRUCTION_MISC_H
+
+#include "sfn_instruction_base.h"
+
+namespace r600 {
+
+class EmitVertex : public Instruction {
+public:
+ EmitVertex(int stream, bool cut);
+ ECFOpCode op() const {return m_cut ? cf_cut_vertex: cf_emit_vertex;}
+ int stream() const { return m_stream;}
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+ int m_stream;
+ bool m_cut;
+};
+
+class WaitAck : public Instruction {
+public:
+ WaitAck(int nack);
+ ECFOpCode op() const {return cf_wait_ack;}
+ int n_ack() const {return m_nack;}
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+ int m_nack;
+};
+
+}
+
+#endif // SFN_INSTRUCTION_MISC_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp
new file mode 100644
index 000000000..8fc5469f3
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp
@@ -0,0 +1,414 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_instruction_tex.h"
+#include "nir_builder.h"
+#include "nir_builtin_builder.h"
+
+namespace r600 {
+
+TexInstruction::TexInstruction(Opcode op, const GPRVector &dest, const GPRVector &src,
+ unsigned sid, unsigned rid, PValue sampler_offset):
+ Instruction(tex),
+ m_opcode(op),
+ m_dst(dest),
+ m_src(src),
+ m_sampler_id(sid),
+ m_resource_id(rid),
+ m_flags(0),
+ m_inst_mode(0),
+ m_dest_swizzle{0,1,2,3},
+ m_sampler_offset(sampler_offset)
+
+{
+ memset(m_offset, 0, sizeof (m_offset));
+
+ add_remappable_src_value(&m_src);
+ add_remappable_src_value(&m_sampler_offset);
+ add_remappable_dst_value(&m_dst);
+}
+
+void TexInstruction::set_gather_comp(int cmp)
+{
+ m_inst_mode = cmp;
+}
+
+void TexInstruction::replace_values(const ValueSet& candidates, PValue new_value)
+{
+ // I wonder whether we can actually end up here ...
+ for (auto c: candidates) {
+ if (*c == *m_src.reg_i(c->chan()))
+ m_src.set_reg_i(c->chan(), new_value);
+ if (*c == *m_dst.reg_i(c->chan()))
+ m_dst.set_reg_i(c->chan(), new_value);
+ }
+}
+
+void TexInstruction::set_offset(unsigned index, int32_t val)
+{
+ assert(index < 3);
+ m_offset[index] = val;
+}
+
+int TexInstruction::get_offset(unsigned index) const
+{
+ assert(index < 3);
+ return (m_offset[index] << 1 & 0x1f);
+}
+
+bool TexInstruction::is_equal_to(const Instruction& rhs) const
+{
+ assert(rhs.type() == tex);
+ const auto& r = static_cast<const TexInstruction&>(rhs);
+ return (m_opcode == r.m_opcode &&
+ m_dst == r.m_dst &&
+ m_src == r.m_src &&
+ m_sampler_id == r.m_sampler_id &&
+ m_resource_id == r.m_resource_id);
+}
+
+void TexInstruction::do_print(std::ostream& os) const
+{
+ const char *map_swz = "xyzw01?_";
+ os << opname(m_opcode) << " R" << m_dst.sel() << ".";
+ for (int i = 0; i < 4; ++i)
+ os << map_swz[m_dest_swizzle[i]];
+
+ os << " " << m_src
+ << " RESID:" << m_resource_id << " SAMPLER:"
+ << m_sampler_id;
+}
+
+const char *TexInstruction::opname(Opcode op)
+{
+ switch (op) {
+ case ld: return "LD";
+ case get_resinfo: return "GET_TEXTURE_RESINFO";
+ case get_nsampled: return "GET_NUMBER_OF_SAMPLES";
+ case get_tex_lod: return "GET_LOD";
+ case get_gradient_h: return "GET_GRADIENTS_H";
+ case get_gradient_v: return "GET_GRADIENTS_V";
+ case set_offsets: return "SET_TEXTURE_OFFSETS";
+ case keep_gradients: return "KEEP_GRADIENTS";
+ case set_gradient_h: return "SET_GRADIENTS_H";
+ case set_gradient_v: return "SET_GRADIENTS_V";
+ case sample: return "SAMPLE";
+ case sample_l: return "SAMPLE_L";
+ case sample_lb: return "SAMPLE_LB";
+ case sample_lz: return "SAMPLE_LZ";
+ case sample_g: return "SAMPLE_G";
+ case sample_g_lb: return "SAMPLE_G_L";
+ case gather4: return "GATHER4";
+ case gather4_o: return "GATHER4_O";
+ case sample_c: return "SAMPLE_C";
+ case sample_c_l: return "SAMPLE_C_L";
+ case sample_c_lb: return "SAMPLE_C_LB";
+ case sample_c_lz: return "SAMPLE_C_LZ";
+ case sample_c_g: return "SAMPLE_C_G";
+ case sample_c_g_lb: return "SAMPLE_C_G_L";
+ case gather4_c: return "GATHER4_C";
+ case gather4_c_o: return "OP_GATHER4_C_O";
+ }
+ return "ERROR";
+}
+
+
+
+static bool lower_coord_shift_normalized(nir_builder *b, nir_tex_instr *tex)
+{
+ b->cursor = nir_before_instr(&tex->instr);
+
+ nir_ssa_def * size = nir_i2f32(b, nir_get_texture_size(b, tex));
+ nir_ssa_def *scale = nir_frcp(b, size);
+
+ int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
+ nir_ssa_def *corr = nullptr;
+ if (unlikely(tex->array_is_lowered_cube)) {
+ auto corr2 = nir_fadd(b, nir_channels(b, tex->src[coord_index].src.ssa, 3),
+ nir_fmul(b, nir_imm_float(b, -0.5f), scale));
+ corr = nir_vec3(b, nir_channel(b, corr2, 0), nir_channel(b, corr2, 1),
+ nir_channel(
+ b, tex->src[coord_index].src.ssa, 2));
+ } else {
+ corr = nir_fadd(b,
+ nir_fmul(b, nir_imm_float(b, -0.5f), scale),
+ tex->src[coord_index].src.ssa);
+ }
+
+ nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
+ nir_src_for_ssa(corr));
+ return true;
+}
+
+static bool lower_coord_shift_unnormalized(nir_builder *b, nir_tex_instr *tex)
+{
+ b->cursor = nir_before_instr(&tex->instr);
+ int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
+ nir_ssa_def *corr = nullptr;
+ if (unlikely(tex->array_is_lowered_cube)) {
+ auto corr2 = nir_fadd(b, nir_channels(b, tex->src[coord_index].src.ssa, 3),
+ nir_imm_float(b, -0.5f));
+ corr = nir_vec3(b, nir_channel(b, corr2, 0), nir_channel(b, corr2, 1),
+ nir_channel(b, tex->src[coord_index].src.ssa, 2));
+ } else {
+ corr = nir_fadd(b, tex->src[coord_index].src.ssa,
+ nir_imm_float(b, -0.5f));
+ }
+ nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
+ nir_src_for_ssa(corr));
+ return true;
+}
+
+static bool
+r600_nir_lower_int_tg4_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ bool progress = false;
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type == nir_instr_type_tex) {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ if (tex->op == nir_texop_tg4 &&
+ tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) {
+ if (nir_alu_type_get_base_type(tex->dest_type) != nir_type_float) {
+ if (tex->sampler_dim != GLSL_SAMPLER_DIM_RECT)
+ lower_coord_shift_normalized(&b, tex);
+ else
+ lower_coord_shift_unnormalized(&b, tex);
+ progress = true;
+ }
+ }
+ }
+ }
+ }
+ return progress;
+}
+
+/*
+ * This lowering pass works around a bug in r600 when doing TG4 from
+ * integral valued samplers.
+
+ * Gather4 should follow the same rules as bilinear filtering, but the hardware
+ * incorrectly forces nearest filtering if the texture format is integer.
+ * The only effect it has on Gather4, which always returns 4 texels for
+ * bilinear filtering, is that the final coordinates are off by 0.5 of
+ * the texel size.
+*/
+
+bool r600_nir_lower_int_tg4(nir_shader *shader)
+{
+ bool progress = false;
+ bool need_lowering = false;
+
+ nir_foreach_uniform_variable(var, shader) {
+ if (var->type->is_sampler()) {
+ if (glsl_base_type_is_integer(var->type->sampled_type)) {
+ need_lowering = true;
+ }
+ }
+ }
+
+ if (need_lowering) {
+ nir_foreach_function(function, shader) {
+ if (function->impl && r600_nir_lower_int_tg4_impl(function->impl))
+ progress = true;
+ }
+ }
+
+ return progress;
+}
+
+static
+bool lower_txl_txf_array_or_cube(nir_builder *b, nir_tex_instr *tex)
+{
+ assert(tex->op == nir_texop_txb || tex->op == nir_texop_txl);
+ assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
+ assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
+
+ b->cursor = nir_before_instr(&tex->instr);
+
+ int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
+ int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
+ int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
+ assert (lod_idx >= 0 || bias_idx >= 0);
+
+ nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
+ nir_ssa_def *lod = (lod_idx >= 0) ?
+ nir_ssa_for_src(b, tex->src[lod_idx].src, 1) :
+ nir_get_texture_lod(b, tex);
+
+ if (bias_idx >= 0)
+ lod = nir_fadd(b, lod,nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
+
+ if (min_lod_idx >= 0)
+ lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
+
+ /* max lod? */
+
+ nir_ssa_def *lambda_exp = nir_fexp2(b, lod);
+ nir_ssa_def *scale = NULL;
+
+ if (tex->is_array) {
+ int cmp_mask = (1 << (size->num_components - 1)) - 1;
+ scale = nir_frcp(b, nir_channels(b, size,
+ (nir_component_mask_t)cmp_mask));
+ } else if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+ unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0};
+ scale = nir_frcp(b, nir_channels(b, size, 1));
+ scale = nir_swizzle(b, scale, swizzle, 3);
+ }
+
+ nir_ssa_def *grad = nir_fmul(b, lambda_exp, scale);
+
+ if (lod_idx >= 0)
+ nir_tex_instr_remove_src(tex, lod_idx);
+ if (bias_idx >= 0)
+ nir_tex_instr_remove_src(tex, bias_idx);
+ if (min_lod_idx >= 0)
+ nir_tex_instr_remove_src(tex, min_lod_idx);
+ nir_tex_instr_add_src(tex, nir_tex_src_ddx, nir_src_for_ssa(grad));
+ nir_tex_instr_add_src(tex, nir_tex_src_ddy, nir_src_for_ssa(grad));
+
+ tex->op = nir_texop_txd;
+ return true;
+}
+
+
+static bool
+r600_nir_lower_txl_txf_array_or_cube_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ bool progress = false;
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type == nir_instr_type_tex) {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+
+ if (tex->is_shadow &&
+ (tex->op == nir_texop_txl || tex->op == nir_texop_txb) &&
+ (tex->is_array || tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE))
+ progress |= lower_txl_txf_array_or_cube(&b, tex);
+ }
+ }
+ }
+ return progress;
+}
+
+bool
+r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader)
+{
+ bool progress = false;
+ nir_foreach_function(function, shader) {
+ if (function->impl && r600_nir_lower_txl_txf_array_or_cube_impl(function->impl))
+ progress = true;
+ }
+ return progress;
+}
+
+static bool
+r600_nir_lower_cube_to_2darray_filer(const nir_instr *instr, const void *_options)
+{
+ if (instr->type != nir_instr_type_tex)
+ return false;
+
+ auto tex = nir_instr_as_tex(instr);
+ if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
+ return false;
+
+ switch (tex->op) {
+ case nir_texop_tex:
+ case nir_texop_txb:
+ case nir_texop_txf:
+ case nir_texop_txl:
+ case nir_texop_lod:
+ case nir_texop_tg4:
+ case nir_texop_txd:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static nir_ssa_def *
+r600_nir_lower_cube_to_2darray_impl(nir_builder *b, nir_instr *instr, void *_options)
+{
+ b->cursor = nir_before_instr(instr);
+
+ auto tex = nir_instr_as_tex(instr);
+ int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
+ assert(coord_idx >= 0);
+
+ auto cubed = nir_cube_r600(b, nir_channels(b, tex->src[coord_idx].src.ssa, 0x7));
+ auto xy = nir_fmad(b,
+ nir_vec2(b, nir_channel(b, cubed, 1), nir_channel(b, cubed, 0)),
+ nir_frcp(b, nir_fabs(b, nir_channel(b, cubed, 2))),
+ nir_imm_float(b, 1.5));
+
+ nir_ssa_def *z = nir_channel(b, cubed, 3);
+ if (tex->is_array) {
+ auto slice = nir_fround_even(b, nir_channel(b, tex->src[coord_idx].src.ssa, 3));
+ z = nir_fmad(b, nir_fmax(b, slice, nir_imm_float(b, 0.0)), nir_imm_float(b, 8.0),
+ z);
+ }
+
+ if (tex->op == nir_texop_txd) {
+ int ddx_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddx);
+ auto zero_dot_5 = nir_imm_float(b, 0.5);
+ nir_instr_rewrite_src(&tex->instr, &tex->src[ddx_idx].src,
+ nir_src_for_ssa(nir_fmul(b, nir_ssa_for_src(b, tex->src[ddx_idx].src, 3), zero_dot_5)));
+
+ int ddy_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddy);
+ nir_instr_rewrite_src(&tex->instr, &tex->src[ddy_idx].src,
+ nir_src_for_ssa(nir_fmul(b, nir_ssa_for_src(b, tex->src[ddy_idx].src, 3), zero_dot_5)));
+ }
+
+ auto new_coord = nir_vec3(b, nir_channel(b, xy, 0), nir_channel(b, xy, 1), z);
+ nir_instr_rewrite_src(&tex->instr, &tex->src[coord_idx].src,
+ nir_src_for_ssa(new_coord));
+ tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+ tex->is_array = true;
+ tex->array_is_lowered_cube = true;
+
+ tex->coord_components = 3;
+
+ return NIR_LOWER_INSTR_PROGRESS;
+}
+
+bool
+r600_nir_lower_cube_to_2darray(nir_shader *shader)
+{
+ return nir_shader_lower_instructions(shader,
+ r600_nir_lower_cube_to_2darray_filer,
+ r600_nir_lower_cube_to_2darray_impl, nullptr);
+}
+
+
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h
new file mode 100644
index 000000000..2fe7cbad7
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h
@@ -0,0 +1,143 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef INSTRUCTION_TEX_H
+#define INSTRUCTION_TEX_H
+
+#include "sfn_instruction_base.h"
+
+namespace r600 {
+
+class TexInstruction : public Instruction {
+public:
+ enum Opcode {
+ ld = FETCH_OP_LD,
+ get_resinfo = FETCH_OP_GET_TEXTURE_RESINFO,
+ get_nsampled = FETCH_OP_GET_NUMBER_OF_SAMPLES,
+ get_tex_lod = FETCH_OP_GET_LOD,
+ get_gradient_h = FETCH_OP_GET_GRADIENTS_H,
+ get_gradient_v = FETCH_OP_GET_GRADIENTS_V,
+ set_offsets = FETCH_OP_SET_TEXTURE_OFFSETS,
+ keep_gradients = FETCH_OP_KEEP_GRADIENTS,
+ set_gradient_h = FETCH_OP_SET_GRADIENTS_H,
+ set_gradient_v = FETCH_OP_SET_GRADIENTS_V,
+ sample = FETCH_OP_SAMPLE,
+ sample_l = FETCH_OP_SAMPLE_L,
+ sample_lb = FETCH_OP_SAMPLE_LB,
+ sample_lz = FETCH_OP_SAMPLE_LZ,
+ sample_g = FETCH_OP_SAMPLE_G,
+ sample_g_lb = FETCH_OP_SAMPLE_G_L,
+ gather4 = FETCH_OP_GATHER4,
+ gather4_o = FETCH_OP_GATHER4_O,
+
+ sample_c = FETCH_OP_SAMPLE_C,
+ sample_c_l = FETCH_OP_SAMPLE_C_L,
+ sample_c_lb = FETCH_OP_SAMPLE_C_LB,
+ sample_c_lz = FETCH_OP_SAMPLE_C_LZ,
+ sample_c_g = FETCH_OP_SAMPLE_C_G,
+ sample_c_g_lb = FETCH_OP_SAMPLE_C_G_L,
+ gather4_c = FETCH_OP_GATHER4_C,
+ gather4_c_o = FETCH_OP_GATHER4_C_O,
+
+ };
+
+ enum Flags {
+ x_unnormalized,
+ y_unnormalized,
+ z_unnormalized,
+ w_unnormalized,
+ grad_fine
+ };
+
+ TexInstruction(Opcode op, const GPRVector& dest, const GPRVector& src, unsigned sid,
+ unsigned rid, PValue sampler_offset);
+
+ const GPRVector& src() const {return m_src;}
+ const GPRVector& dst() const {return m_dst;}
+ unsigned opcode() const {return m_opcode;}
+ unsigned sampler_id() const {return m_sampler_id;}
+ unsigned resource_id() const {return m_resource_id;}
+
+ void replace_values(const ValueSet& candidates, PValue new_value) override;
+
+ void set_offset(unsigned index, int32_t val);
+ int get_offset(unsigned index) const;
+
+ void set_inst_mode(int inst_mode) { m_inst_mode = inst_mode;}
+
+ int inst_mode() const { return m_inst_mode;}
+
+ void set_flag(Flags flag) {
+ m_flags.set(flag);
+ }
+
+ PValue sampler_offset() const {
+ return m_sampler_offset;
+ }
+
+ bool has_flag(Flags flag) const {
+ return m_flags.test(flag);
+ }
+
+ int dest_swizzle(int i) const {
+ assert(i < 4);
+ return m_dest_swizzle[i];
+ }
+
+ void set_dest_swizzle(const std::array<int,4>& swz) {
+ m_dest_swizzle = swz;
+ }
+
+ void set_gather_comp(int cmp);
+
+ bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+ bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+ bool is_equal_to(const Instruction& lhs) const override;
+ void do_print(std::ostream& os) const override;
+
+ static const char *opname(Opcode code);
+
+ Opcode m_opcode;
+ GPRVector m_dst;
+ GPRVector m_src;
+ unsigned m_sampler_id;
+ unsigned m_resource_id;
+ std::bitset<8> m_flags;
+ int m_offset[3];
+ int m_inst_mode;
+ std::array<int,4> m_dest_swizzle;
+ PValue m_sampler_offset;
+};
+
+bool r600_nir_lower_int_tg4(nir_shader *nir);
+bool r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader);
+bool r600_nir_lower_cube_to_2darray(nir_shader *shader);
+
+}
+
+#endif // INSTRUCTION_TEX_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp
new file mode 100644
index 000000000..e47a46b88
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp
@@ -0,0 +1,1450 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_ir_to_assembly.h"
+#include "sfn_conditionaljumptracker.h"
+#include "sfn_callstack.h"
+#include "sfn_instruction_gds.h"
+#include "sfn_instruction_misc.h"
+#include "sfn_instruction_fetch.h"
+#include "sfn_instruction_lds.h"
+
+#include "../r600_shader.h"
+#include "../r600_sq.h"
+
+namespace r600 {
+
+using std::vector;
+
+
+
+struct AssemblyFromShaderLegacyImpl : public ConstInstructionVisitor {
+
+ AssemblyFromShaderLegacyImpl(r600_shader *sh, r600_shader_key *key);
+
+
+ bool emit(const Instruction::Pointer i);
+ void reset_addr_register() {m_last_addr.reset();}
+
+public:
+ bool visit(const AluInstruction& i) override;
+ bool visit(const ExportInstruction& i) override;
+ bool visit(const TexInstruction& i) override;
+ bool visit(const FetchInstruction& i) override;
+ bool visit(const IfInstruction& i) override;
+ bool visit(const ElseInstruction& i) override;
+ bool visit(const IfElseEndInstruction& i) override;
+ bool visit(const LoopBeginInstruction& i) override;
+ bool visit(const LoopEndInstruction& i) override;
+ bool visit(const LoopBreakInstruction& i) override;
+ bool visit(const LoopContInstruction& i) override;
+ bool visit(const StreamOutIntruction& i) override;
+ bool visit(const MemRingOutIntruction& i) override;
+ bool visit(const EmitVertex& i) override;
+ bool visit(const WaitAck& i) override;
+ bool visit(const WriteScratchInstruction& i) override;
+ bool visit(const GDSInstr& i) override;
+ bool visit(const RatInstruction& i) override;
+ bool visit(const LDSWriteInstruction& i) override;
+ bool visit(const LDSReadInstruction& i) override;
+ bool visit(const LDSAtomicInstruction& i) override;
+ bool visit(const GDSStoreTessFactor& i) override;
+ bool visit(const InstructionBlock& i) override;
+
+ bool emit_load_addr(PValue addr);
+ bool emit_fs_pixel_export(const ExportInstruction & exi);
+ bool emit_vs_pos_export(const ExportInstruction & exi);
+ bool emit_vs_param_export(const ExportInstruction & exi);
+ bool copy_dst(r600_bytecode_alu_dst& dst, const Value& src);
+ bool copy_src(r600_bytecode_alu_src& src, const Value& s);
+
+ EBufferIndexMode emit_index_reg(const Value& reg, unsigned idx);
+
+ ConditionalJumpTracker m_jump_tracker;
+ CallStack m_callstack;
+
+public:
+ r600_bytecode *m_bc;
+ r600_shader *m_shader;
+ r600_shader_key *m_key;
+ r600_bytecode_output m_output;
+ unsigned m_max_color_exports;
+ bool has_pos_output;
+ bool has_param_output;
+ PValue m_last_addr;
+ int m_loop_nesting;
+ int m_nliterals_in_group;
+ std::set<int> vtx_fetch_results;
+ bool m_last_op_was_barrier;
+};
+
+
+AssemblyFromShaderLegacy::AssemblyFromShaderLegacy(struct r600_shader *sh,
+ r600_shader_key *key)
+{
+ impl = new AssemblyFromShaderLegacyImpl(sh, key);
+}
+
+AssemblyFromShaderLegacy::~AssemblyFromShaderLegacy()
+{
+ delete impl;
+}
+
+bool AssemblyFromShaderLegacy::do_lower(const std::vector<InstructionBlock>& ir)
+{
+ if (impl->m_shader->processor_type == PIPE_SHADER_VERTEX &&
+ impl->m_shader->ninput > 0)
+ r600_bytecode_add_cfinst(impl->m_bc, CF_OP_CALL_FS);
+
+
+ std::vector<Instruction::Pointer> exports;
+
+ for (const auto& block : ir) {
+ if (!impl->visit(block))
+ return false;
+ } /*
+ for (const auto& i : exports) {
+ if (!impl->emit_export(static_cast<const ExportInstruction&>(*i)))
+ return false;
+ }*/
+
+
+ const struct cf_op_info *last = nullptr;
+ if (impl->m_bc->cf_last)
+ last = r600_isa_cf(impl->m_bc->cf_last->op);
+
+ /* alu clause instructions don't have EOP bit, so add NOP */
+ if (!last || last->flags & CF_ALU || impl->m_bc->cf_last->op == CF_OP_LOOP_END
+ || impl->m_bc->cf_last->op == CF_OP_POP)
+ r600_bytecode_add_cfinst(impl->m_bc, CF_OP_NOP);
+
+ /* A fetch shader only can't be EOP (results in hang), but we can replace it
+ * by a NOP */
+ else if (impl->m_bc->cf_last->op == CF_OP_CALL_FS)
+ impl->m_bc->cf_last->op = CF_OP_NOP;
+
+ if (impl->m_shader->bc.chip_class != CAYMAN)
+ impl->m_bc->cf_last->end_of_program = 1;
+ else
+ cm_bytecode_add_cf_end(impl->m_bc);
+
+ return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const InstructionBlock& block)
+{
+ for (const auto& i : block) {
+
+ if (i->type() != Instruction::vtx)
+ vtx_fetch_results.clear();
+
+ m_last_op_was_barrier &= i->type() == Instruction::alu;
+
+ sfn_log << SfnLog::assembly << "Emit from '" << *i << "\n";
+
+ if (!i->accept(*this))
+ return false;
+
+ if (i->type() != Instruction::alu)
+ reset_addr_register();
+ }
+
+ return true;
+}
+
+AssemblyFromShaderLegacyImpl::AssemblyFromShaderLegacyImpl(r600_shader *sh,
+ r600_shader_key *key):
+ m_callstack(sh->bc),
+ m_bc(&sh->bc),
+ m_shader(sh),
+ m_key(key),
+ has_pos_output(false),
+ has_param_output(false),
+ m_loop_nesting(0),
+ m_nliterals_in_group(0),
+ m_last_op_was_barrier(false)
+{
+ m_max_color_exports = MAX2(m_key->ps.nr_cbufs, 1);
+
+}
+
+extern const std::map<EAluOp, int> opcode_map;
+
+bool AssemblyFromShaderLegacyImpl::emit_load_addr(PValue addr)
+{
+ m_bc->ar_reg = addr->sel();
+ m_bc->ar_chan = addr->chan();
+ m_bc->ar_loaded = 0;
+ m_last_addr = addr;
+
+ sfn_log << SfnLog::assembly << " Prepare " << *addr << " to address register\n";
+
+ return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const AluInstruction& ai)
+{
+
+ struct r600_bytecode_alu alu;
+ memset(&alu, 0, sizeof(alu));
+ PValue addr_in_use;
+
+ if (opcode_map.find(ai.opcode()) == opcode_map.end()) {
+ std::cerr << "Opcode not handled for " << ai <<"\n";
+ return false;
+ }
+
+ if (m_last_op_was_barrier && ai.opcode() == op0_group_barrier)
+ return true;
+
+ m_last_op_was_barrier = ai.opcode() == op0_group_barrier;
+
+ unsigned old_nliterals_in_group = m_nliterals_in_group;
+ for (unsigned i = 0; i < ai.n_sources(); ++i) {
+ auto& s = ai.src(i);
+ if (s.type() == Value::literal)
+ ++m_nliterals_in_group;
+ }
+
+ /* This instruction group would exceed the limit of literals, so
+ * force a new instruction group by adding a NOP as last
+ * instruction. This will no loner be needed with a real
+ * scheduler */
+ if (m_nliterals_in_group > 4) {
+ sfn_log << SfnLog::assembly << " Have " << m_nliterals_in_group << " inject a last op (nop)\n";
+ alu.op = ALU_OP0_NOP;
+ alu.last = 1;
+ alu.dst.chan = 3;
+ int retval = r600_bytecode_add_alu(m_bc, &alu);
+ if (retval)
+ return false;
+ memset(&alu, 0, sizeof(alu));
+ m_nliterals_in_group -= old_nliterals_in_group;
+ }
+
+ alu.op = opcode_map.at(ai.opcode());
+
+ /* Missing test whether ai actually has a dest */
+ auto dst = ai.dest();
+
+ if (dst) {
+ if (!copy_dst(alu.dst, *dst))
+ return false;
+
+ alu.dst.write = ai.flag(alu_write);
+ alu.dst.clamp = ai.flag(alu_dst_clamp);
+
+ if (dst->type() == Value::gpr_array_value) {
+ auto& v = static_cast<const GPRArrayValue&>(*dst);
+ PValue addr = v.indirect();
+ if (addr) {
+ if (!m_last_addr || *addr != *m_last_addr) {
+ emit_load_addr(addr);
+ addr_in_use = addr;
+ }
+ alu.dst.rel = addr ? 1 : 0;;
+ }
+ }
+ }
+
+ alu.is_op3 = ai.n_sources() == 3;
+
+ for (unsigned i = 0; i < ai.n_sources(); ++i) {
+ auto& s = ai.src(i);
+
+ if (!copy_src(alu.src[i], s))
+ return false;
+ alu.src[i].neg = ai.flag(AluInstruction::src_neg_flags[i]);
+
+ if (s.type() == Value::gpr_array_value) {
+ auto& v = static_cast<const GPRArrayValue&>(s);
+ PValue addr = v.indirect();
+ if (addr) {
+ assert(!addr_in_use || (*addr_in_use == *addr));
+ if (!m_last_addr || *addr != *m_last_addr) {
+ emit_load_addr(addr);
+ addr_in_use = addr;
+ }
+ alu.src[i].rel = addr ? 1 : 0;
+ }
+ }
+ if (!alu.is_op3)
+ alu.src[i].abs = ai.flag(AluInstruction::src_abs_flags[i]);
+ }
+
+ if (ai.bank_swizzle() != alu_vec_unknown)
+ alu.bank_swizzle_force = ai.bank_swizzle();
+
+ alu.last = ai.flag(alu_last_instr);
+ alu.update_pred = ai.flag(alu_update_pred);
+ alu.execute_mask = ai.flag(alu_update_exec);
+
+ /* If the destination register is equal to the last loaded address register
+ * then clear the latter one, because the values will no longer be identical */
+ if (m_last_addr)
+ sfn_log << SfnLog::assembly << " Current address register is " << *m_last_addr << "\n";
+
+ if (dst)
+ sfn_log << SfnLog::assembly << " Current dst register is " << *dst << "\n";
+
+ if (dst && m_last_addr)
+ if (*dst == *m_last_addr) {
+ sfn_log << SfnLog::assembly << " Clear address register (was " << *m_last_addr << "\n";
+ m_last_addr.reset();
+ }
+
+ auto cf_op = ai.cf_type();
+
+ unsigned type = 0;
+ switch (cf_op) {
+ case cf_alu: type = CF_OP_ALU; break;
+ case cf_alu_push_before: type = CF_OP_ALU_PUSH_BEFORE; break;
+ case cf_alu_pop_after: type = CF_OP_ALU_POP_AFTER; break;
+ case cf_alu_pop2_after: type = CF_OP_ALU_POP2_AFTER; break;
+ case cf_alu_break: type = CF_OP_ALU_BREAK; break;
+ case cf_alu_else_after: type = CF_OP_ALU_ELSE_AFTER; break;
+ case cf_alu_continue: type = CF_OP_ALU_CONTINUE; break;
+ case cf_alu_extended: type = CF_OP_ALU_EXT; break;
+ default:
+ assert(0 && "cf_alu_undefined should have been replaced");
+ }
+
+ if (alu.last)
+ m_nliterals_in_group = 0;
+
+ bool retval = !r600_bytecode_add_alu_type(m_bc, &alu, type);
+
+ if (ai.opcode() == op1_mova_int)
+ m_bc->ar_loaded = 0;
+
+ if (ai.opcode() == op1_set_cf_idx0)
+ m_bc->index_loaded[0] = 1;
+
+ if (ai.opcode() == op1_set_cf_idx1)
+ m_bc->index_loaded[1] = 1;
+
+
+ m_bc->force_add_cf |= (ai.opcode() == op2_kille ||
+ ai.opcode() == op2_killne_int ||
+ ai.opcode() == op1_set_cf_idx0 ||
+ ai.opcode() == op1_set_cf_idx1);
+ return retval;
+}
+
+bool AssemblyFromShaderLegacyImpl::emit_vs_pos_export(const ExportInstruction & exi)
+{
+ r600_bytecode_output output;
+ memset(&output, 0, sizeof(output));
+ assert(exi.gpr().type() == Value::gpr_vector);
+ const auto& gpr = exi.gpr();
+ output.gpr = gpr.sel();
+ output.elem_size = 3;
+ output.swizzle_x = gpr.chan_i(0);
+ output.swizzle_y = gpr.chan_i(1);
+ output.swizzle_z = gpr.chan_i(2);
+ output.swizzle_w = gpr.chan_i(3);
+ output.burst_count = 1;
+ output.array_base = 60 + exi.location();
+ output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
+ output.type = exi.export_type();
+
+
+ if (r600_bytecode_add_output(m_bc, &output)) {
+ R600_ERR("Error adding pixel export at location %d\n", exi.location());
+ return false;
+ }
+
+ return true;
+}
+
+
+bool AssemblyFromShaderLegacyImpl::emit_vs_param_export(const ExportInstruction & exi)
+{
+ r600_bytecode_output output;
+ assert(exi.gpr().type() == Value::gpr_vector);
+ const auto& gpr = exi.gpr();
+
+ memset(&output, 0, sizeof(output));
+ output.gpr = gpr.sel();
+ output.elem_size = 3;
+ output.swizzle_x = gpr.chan_i(0);
+ output.swizzle_y = gpr.chan_i(1);
+ output.swizzle_z = gpr.chan_i(2);
+ output.swizzle_w = gpr.chan_i(3);
+ output.burst_count = 1;
+ output.array_base = exi.location();
+ output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
+ output.type = exi.export_type();
+
+
+ if (r600_bytecode_add_output(m_bc, &output)) {
+ R600_ERR("Error adding pixel export at location %d\n", exi.location());
+ return false;
+ }
+
+ return true;
+}
+
+
+bool AssemblyFromShaderLegacyImpl::emit_fs_pixel_export(const ExportInstruction & exi)
+{
+ if (exi.location() >= m_max_color_exports && exi.location() < 60) {
+ R600_ERR("shader_from_nir: ignore pixel export %u, because supported max is %u\n",
+ exi.location(), m_max_color_exports);
+ return true;
+ }
+
+ assert(exi.gpr().type() == Value::gpr_vector);
+ const auto& gpr = exi.gpr();
+
+ r600_bytecode_output output;
+ memset(&output, 0, sizeof(output));
+
+ output.gpr = gpr.sel();
+ output.elem_size = 3;
+ output.swizzle_x = gpr.chan_i(0);
+ output.swizzle_y = gpr.chan_i(1);
+ output.swizzle_z = gpr.chan_i(2);
+ output.swizzle_w = m_key->ps.alpha_to_one ? 5 : gpr.chan_i(3); ;
+ output.burst_count = 1;
+ output.array_base = exi.location();
+ output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
+ output.type = exi.export_type();
+
+
+ if (r600_bytecode_add_output(m_bc, &output)) {
+ R600_ERR("Error adding pixel export at location %d\n", exi.location());
+ return false;
+ }
+
+ return true;
+}
+
+
+bool AssemblyFromShaderLegacyImpl::visit(const ExportInstruction & exi)
+{
+ switch (exi.export_type()) {
+ case ExportInstruction::et_pixel:
+ return emit_fs_pixel_export(exi);
+ case ExportInstruction::et_pos:
+ return emit_vs_pos_export(exi);
+ case ExportInstruction::et_param:
+ return emit_vs_param_export(exi);
+ default:
+ R600_ERR("shader_from_nir: export %d type not yet supported\n", exi.export_type());
+ return false;
+ }
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const IfInstruction & if_instr)
+{
+ int elems = m_callstack.push(FC_PUSH_VPM);
+ bool needs_workaround = false;
+
+ if (m_bc->chip_class == CAYMAN && m_bc->stack.loop > 1)
+ needs_workaround = true;
+
+ if (m_bc->family != CHIP_HEMLOCK &&
+ m_bc->family != CHIP_CYPRESS &&
+ m_bc->family != CHIP_JUNIPER) {
+ unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size;
+ unsigned dmod2 = (elems) % m_bc->stack.entry_size;
+
+ if (elems && (!dmod1 || !dmod2))
+ needs_workaround = true;
+ }
+
+ auto& pred = if_instr.pred();
+
+ if (needs_workaround) {
+ r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH);
+ m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
+ auto new_pred = pred;
+ new_pred.set_cf_type(cf_alu);
+ visit(new_pred);
+ } else
+ visit(pred);
+
+ r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
+
+ m_jump_tracker.push(m_bc->cf_last, jt_if);
+ return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(UNUSED const ElseInstruction & else_instr)
+{
+ r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE);
+ m_bc->cf_last->pop_count = 1;
+ return m_jump_tracker.add_mid(m_bc->cf_last, jt_if);
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(UNUSED const IfElseEndInstruction & endif_instr)
+{
+ m_callstack.pop(FC_PUSH_VPM);
+
+ unsigned force_pop = m_bc->force_add_cf;
+ if (!force_pop) {
+ int alu_pop = 3;
+ if (m_bc->cf_last) {
+ if (m_bc->cf_last->op == CF_OP_ALU)
+ alu_pop = 0;
+ else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER)
+ alu_pop = 1;
+ }
+ alu_pop += 1;
+ if (alu_pop == 1) {
+ m_bc->cf_last->op = CF_OP_ALU_POP_AFTER;
+ m_bc->force_add_cf = 1;
+ } else if (alu_pop == 2) {
+ m_bc->cf_last->op = CF_OP_ALU_POP2_AFTER;
+ m_bc->force_add_cf = 1;
+ } else {
+ force_pop = 1;
+ }
+ }
+
+ if (force_pop) {
+ r600_bytecode_add_cfinst(m_bc, CF_OP_POP);
+ m_bc->cf_last->pop_count = 1;
+ m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
+ }
+
+ return m_jump_tracker.pop(m_bc->cf_last, jt_if);
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopBeginInstruction& instr)
+{
+ r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10);
+ m_jump_tracker.push(m_bc->cf_last, jt_loop);
+ m_callstack.push(FC_LOOP);
+ ++m_loop_nesting;
+ return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopEndInstruction& instr)
+{
+ r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END);
+ m_callstack.pop(FC_LOOP);
+ assert(m_loop_nesting);
+ --m_loop_nesting;
+ return m_jump_tracker.pop(m_bc->cf_last, jt_loop);
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopBreakInstruction& instr)
+{
+ r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK);
+ return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopContInstruction &instr)
+{
+ r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE);
+ return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const StreamOutIntruction& so_instr)
+{
+ struct r600_bytecode_output output;
+ memset(&output, 0, sizeof(struct r600_bytecode_output));
+
+ output.gpr = so_instr.gpr().sel();
+ output.elem_size = so_instr.element_size();
+ output.array_base = so_instr.array_base();
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
+ output.burst_count = so_instr.burst_count();
+ output.array_size = so_instr.array_size();
+ output.comp_mask = so_instr.comp_mask();
+ output.op = so_instr.op();
+
+ assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3);
+
+
+ if (r600_bytecode_add_output(m_bc, &output)) {
+ R600_ERR("shader_from_nir: Error creating stream output instruction\n");
+ return false;
+ }
+ return true;
+}
+
+
+bool AssemblyFromShaderLegacyImpl::visit(const MemRingOutIntruction& instr)
+{
+ struct r600_bytecode_output output;
+ memset(&output, 0, sizeof(struct r600_bytecode_output));
+
+ output.gpr = instr.gpr().sel();
+ output.type = instr.type();
+ output.elem_size = 3;
+ output.comp_mask = 0xf;
+ output.burst_count = 1;
+ output.op = instr.op();
+ if (instr.type() == mem_write_ind || instr.type() == mem_write_ind_ack) {
+ output.index_gpr = instr.index_reg();
+ output.array_size = 0xfff;
+ }
+ output.array_base = instr.array_base();
+
+ if (r600_bytecode_add_output(m_bc, &output)) {
+ R600_ERR("shader_from_nir: Error creating mem ring write instruction\n");
+ return false;
+ }
+ return true;
+}
+
+
+bool AssemblyFromShaderLegacyImpl::visit(const TexInstruction & tex_instr)
+{
+ auto addr = tex_instr.sampler_offset();
+ if (addr && (!m_bc->index_loaded[1] || m_loop_nesting
+ || m_bc->index_reg[1] != addr->sel()
+ || m_bc->index_reg_chan[1] != addr->chan())) {
+ struct r600_bytecode_alu alu;
+ memset(&alu, 0, sizeof(alu));
+ alu.op = opcode_map.at(op1_mova_int);
+ alu.dst.chan = 0;
+ alu.src[0].sel = addr->sel();
+ alu.src[0].chan = addr->chan();
+ alu.last = 1;
+ int r = r600_bytecode_add_alu(m_bc, &alu);
+ if (r)
+ return false;
+
+ m_bc->ar_loaded = 0;
+
+ alu.op = opcode_map.at(op1_set_cf_idx1);
+ alu.dst.chan = 0;
+ alu.src[0].sel = 0;
+ alu.src[0].chan = 0;
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(m_bc, &alu);
+ if (r)
+ return false;
+
+ m_bc->index_reg[1] = addr->sel();
+ m_bc->index_reg_chan[1] = addr->chan();
+ m_bc->index_loaded[1] = true;
+ }
+
+ r600_bytecode_tex tex;
+ memset(&tex, 0, sizeof(struct r600_bytecode_tex));
+ tex.op = tex_instr.opcode();
+ tex.sampler_id = tex_instr.sampler_id();
+ tex.sampler_index_mode = 0;
+ tex.resource_id = tex_instr.resource_id();;
+ tex.resource_index_mode = 0;
+ tex.src_gpr = tex_instr.src().sel();
+ tex.dst_gpr = tex_instr.dst().sel();
+ tex.dst_sel_x = tex_instr.dest_swizzle(0);
+ tex.dst_sel_y = tex_instr.dest_swizzle(1);
+ tex.dst_sel_z = tex_instr.dest_swizzle(2);
+ tex.dst_sel_w = tex_instr.dest_swizzle(3);
+ tex.src_sel_x = tex_instr.src().chan_i(0);
+ tex.src_sel_y = tex_instr.src().chan_i(1);
+ tex.src_sel_z = tex_instr.src().chan_i(2);
+ tex.src_sel_w = tex_instr.src().chan_i(3);
+ tex.coord_type_x = !tex_instr.has_flag(TexInstruction::x_unnormalized);
+ tex.coord_type_y = !tex_instr.has_flag(TexInstruction::y_unnormalized);
+ tex.coord_type_z = !tex_instr.has_flag(TexInstruction::z_unnormalized);
+ tex.coord_type_w = !tex_instr.has_flag(TexInstruction::w_unnormalized);
+ tex.offset_x = tex_instr.get_offset(0);
+ tex.offset_y = tex_instr.get_offset(1);
+ tex.offset_z = tex_instr.get_offset(2);
+ tex.resource_index_mode = (!!addr) ? 2 : 0;
+ tex.sampler_index_mode = tex.resource_index_mode;
+
+ if (tex_instr.opcode() == TexInstruction::get_gradient_h ||
+ tex_instr.opcode() == TexInstruction::get_gradient_v)
+ tex.inst_mod = tex_instr.has_flag(TexInstruction::grad_fine) ? 1 : 0;
+ else
+ tex.inst_mod = tex_instr.inst_mode();
+ if (r600_bytecode_add_tex(m_bc, &tex)) {
+ R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
+ return false;
+ }
+ return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const FetchInstruction& fetch_instr)
+{
+ int buffer_offset = 0;
+ auto addr = fetch_instr.buffer_offset();
+ auto index_mode = fetch_instr.buffer_index_mode();
+
+ if (addr) {
+ if (addr->type() == Value::literal) {
+ const auto& boffs = static_cast<const LiteralValue&>(*addr);
+ buffer_offset = boffs.value();
+ } else {
+ index_mode = emit_index_reg(*addr, 0);
+ }
+ }
+
+ if (fetch_instr.has_prelude()) {
+ for(auto &i : fetch_instr.prelude()) {
+ if (!i->accept(*this))
+ return false;
+ }
+ }
+
+ if (vtx_fetch_results.find(fetch_instr.src().sel()) !=
+ vtx_fetch_results.end()) {
+ m_bc->force_add_cf = 1;
+ vtx_fetch_results.clear();
+ }
+ vtx_fetch_results.insert(fetch_instr.dst().sel());
+
+ struct r600_bytecode_vtx vtx;
+ memset(&vtx, 0, sizeof(vtx));
+ vtx.op = fetch_instr.vc_opcode();
+ vtx.buffer_id = fetch_instr.buffer_id() + buffer_offset;
+ vtx.fetch_type = fetch_instr.fetch_type();
+ vtx.src_gpr = fetch_instr.src().sel();
+ vtx.src_sel_x = fetch_instr.src().chan();
+ vtx.mega_fetch_count = fetch_instr.mega_fetch_count();
+ vtx.dst_gpr = fetch_instr.dst().sel();
+ vtx.dst_sel_x = fetch_instr.swz(0); /* SEL_X */
+ vtx.dst_sel_y = fetch_instr.swz(1); /* SEL_Y */
+ vtx.dst_sel_z = fetch_instr.swz(2); /* SEL_Z */
+ vtx.dst_sel_w = fetch_instr.swz(3); /* SEL_W */
+ vtx.use_const_fields = fetch_instr.use_const_fields();
+ vtx.data_format = fetch_instr.data_format();
+ vtx.num_format_all = fetch_instr.num_format(); /* NUM_FORMAT_SCALED */
+ vtx.format_comp_all = fetch_instr.is_signed(); /* FORMAT_COMP_SIGNED */
+ vtx.endian = fetch_instr.endian_swap();
+ vtx.buffer_index_mode = index_mode;
+ vtx.offset = fetch_instr.offset();
+ vtx.indexed = fetch_instr.indexed();
+ vtx.uncached = fetch_instr.uncached();
+ vtx.elem_size = fetch_instr.elm_size();
+ vtx.array_base = fetch_instr.array_base();
+ vtx.array_size = fetch_instr.array_size();
+ vtx.srf_mode_all = fetch_instr.srf_mode_no_zero();
+
+ if (fetch_instr.use_tc()) {
+ if ((r600_bytecode_add_vtx_tc(m_bc, &vtx))) {
+ R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
+ return false;
+ }
+
+ } else {
+ if ((r600_bytecode_add_vtx(m_bc, &vtx))) {
+ R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
+ return false;
+ }
+ }
+
+ m_bc->cf_last->vpm = fetch_instr.use_vpm();
+ m_bc->cf_last->barrier = 1;
+
+ return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const EmitVertex &instr)
+{
+ int r = r600_bytecode_add_cfinst(m_bc, instr.op());
+ if (!r)
+ m_bc->cf_last->count = instr.stream();
+ assert(m_bc->cf_last->count < 4);
+
+ return r == 0;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const WaitAck& instr)
+{
+ int r = r600_bytecode_add_cfinst(m_bc, instr.op());
+ if (!r)
+ m_bc->cf_last->cf_addr = instr.n_ack();
+
+ return r == 0;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const WriteScratchInstruction& instr)
+{
+ struct r600_bytecode_output cf;
+
+ memset(&cf, 0, sizeof(struct r600_bytecode_output));
+
+ cf.op = CF_OP_MEM_SCRATCH;
+ cf.elem_size = 3;
+ cf.gpr = instr.gpr().sel();
+ cf.mark = 1;
+ cf.comp_mask = instr.write_mask();
+ cf.swizzle_x = 0;
+ cf.swizzle_y = 1;
+ cf.swizzle_z = 2;
+ cf.swizzle_w = 3;
+ cf.burst_count = 1;
+
+ if (instr.indirect()) {
+ cf.type = 3;
+ cf.index_gpr = instr.address();
+
+ /* The docu seems to be wrong here: In indirect addressing the
+ * address_base seems to be the array_size */
+ cf.array_size = instr.array_size();
+ } else {
+ cf.type = 2;
+ cf.array_base = instr.location();
+ }
+ /* This should be 0, but the address calculation is apparently wrong */
+
+
+ if (r600_bytecode_add_output(m_bc, &cf)){
+ R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n");
+ return false;
+ }
+
+ return true;
+}
+
+extern const std::map<ESDOp, int> ds_opcode_map;
+
+bool AssemblyFromShaderLegacyImpl::visit(const GDSInstr& instr)
+{
+ struct r600_bytecode_gds gds;
+
+ int uav_idx = -1;
+ auto addr = instr.uav_id();
+ if (addr->type() != Value::literal) {
+ emit_index_reg(*addr, 1);
+ } else {
+ const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
+ uav_idx = addr_reg.value();
+ }
+
+ memset(&gds, 0, sizeof(struct r600_bytecode_gds));
+
+ gds.op = ds_opcode_map.at(instr.op());
+ gds.dst_gpr = instr.dest_sel();
+ gds.uav_id = (uav_idx >= 0 ? uav_idx : 0) + instr.uav_base();
+ gds.uav_index_mode = uav_idx >= 0 ? bim_none : bim_one;
+ gds.src_gpr = instr.src_sel();
+
+ gds.src_sel_x = instr.src_swizzle(0);
+ gds.src_sel_y = instr.src_swizzle(1);
+ gds.src_sel_z = instr.src_swizzle(2);
+
+ gds.dst_sel_x = instr.dest_swizzle(0);
+ gds.dst_sel_y = 7;
+ gds.dst_sel_z = 7;
+ gds.dst_sel_w = 7;
+ gds.src_gpr2 = 0;
+ gds.alloc_consume = 1; // Not Cayman
+
+ int r = r600_bytecode_add_gds(m_bc, &gds);
+ if (r)
+ return false;
+ m_bc->cf_last->vpm = 1;
+ m_bc->cf_last->barrier = 1;
+ return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const GDSStoreTessFactor& instr)
+{
+ struct r600_bytecode_gds gds;
+
+ memset(&gds, 0, sizeof(struct r600_bytecode_gds));
+ gds.src_gpr = instr.sel();
+ gds.src_sel_x = instr.chan(0);
+ gds.src_sel_y = instr.chan(1);
+ gds.src_sel_z = 4;
+ gds.dst_sel_x = 7;
+ gds.dst_sel_y = 7;
+ gds.dst_sel_z = 7;
+ gds.dst_sel_w = 7;
+ gds.op = FETCH_OP_TF_WRITE;
+
+ if (r600_bytecode_add_gds(m_bc, &gds) != 0)
+ return false;
+
+ if (instr.chan(2) != 7) {
+ memset(&gds, 0, sizeof(struct r600_bytecode_gds));
+ gds.src_gpr = instr.sel();
+ gds.src_sel_x = instr.chan(2);
+ gds.src_sel_y = instr.chan(3);
+ gds.src_sel_z = 4;
+ gds.dst_sel_x = 7;
+ gds.dst_sel_y = 7;
+ gds.dst_sel_z = 7;
+ gds.dst_sel_w = 7;
+ gds.op = FETCH_OP_TF_WRITE;
+
+ if (r600_bytecode_add_gds(m_bc, &gds))
+ return false;
+ }
+ return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const LDSWriteInstruction& instr)
+{
+ r600_bytecode_alu alu;
+ memset(&alu, 0, sizeof(r600_bytecode_alu));
+
+ alu.last = true;
+ alu.is_lds_idx_op = true;
+ copy_src(alu.src[0], instr.address());
+ copy_src(alu.src[1], instr.value0());
+
+ if (instr.num_components() == 1) {
+ alu.op = LDS_OP2_LDS_WRITE;
+ } else {
+ alu.op = LDS_OP3_LDS_WRITE_REL;
+ alu.lds_idx = 1;
+ copy_src(alu.src[2], instr.value1());
+ }
+
+ return r600_bytecode_add_alu(m_bc, &alu) == 0;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const LDSReadInstruction& instr)
+{
+ int r;
+ unsigned nread = 0;
+ unsigned nfetch = 0;
+ unsigned n_values = instr.num_values();
+
+ r600_bytecode_alu alu_fetch;
+ r600_bytecode_alu alu_read;
+
+ /* We must add a new ALU clause if the fetch and read op would be split otherwise
+ * r600_asm limits at 120 slots = 240 dwords */
+ if (m_bc->cf_last->ndw > 240 - 4 * n_values)
+ m_bc->force_add_cf = 1;
+
+ while (nread < n_values) {
+ if (nfetch < n_values) {
+ memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
+ alu_fetch.is_lds_idx_op = true;
+ alu_fetch.op = LDS_OP1_LDS_READ_RET;
+
+ copy_src(alu_fetch.src[0], instr.address(nfetch));
+ alu_fetch.src[1].sel = V_SQ_ALU_SRC_0;
+ alu_fetch.src[2].sel = V_SQ_ALU_SRC_0;
+ alu_fetch.last = 1;
+ r = r600_bytecode_add_alu(m_bc, &alu_fetch);
+ m_bc->cf_last->nlds_read++;
+ if (r)
+ return false;
+ }
+
+ if (nfetch >= n_values) {
+ memset(&alu_read, 0, sizeof(r600_bytecode_alu));
+ copy_dst(alu_read.dst, instr.dest(nread));
+ alu_read.op = ALU_OP1_MOV;
+ alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
+ alu_read.last = 1;
+ alu_read.dst.write = 1;
+ r = r600_bytecode_add_alu(m_bc, &alu_read);
+ m_bc->cf_last->nqueue_read++;
+ if (r)
+ return false;
+ ++nread;
+ }
+ ++nfetch;
+ }
+ assert(m_bc->cf_last->nlds_read == m_bc->cf_last->nqueue_read);
+
+ return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const LDSAtomicInstruction& instr)
+{
+ if (m_bc->cf_last->ndw > 240 - 4)
+ m_bc->force_add_cf = 1;
+
+ r600_bytecode_alu alu_fetch;
+ r600_bytecode_alu alu_read;
+
+ memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
+ alu_fetch.is_lds_idx_op = true;
+ alu_fetch.op = instr.op();
+
+ copy_src(alu_fetch.src[0], instr.address());
+ copy_src(alu_fetch.src[1], instr.src0());
+
+ if (instr.src1())
+ copy_src(alu_fetch.src[2], *instr.src1());
+ alu_fetch.last = 1;
+ int r = r600_bytecode_add_alu(m_bc, &alu_fetch);
+ if (r)
+ return false;
+
+ memset(&alu_read, 0, sizeof(r600_bytecode_alu));
+ copy_dst(alu_read.dst, instr.dest());
+ alu_read.op = ALU_OP1_MOV;
+ alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
+ alu_read.last = 1;
+ alu_read.dst.write = 1;
+ r = r600_bytecode_add_alu(m_bc, &alu_read);
+ if (r)
+ return false;
+ return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const RatInstruction& instr)
+{
+ struct r600_bytecode_gds gds;
+
+ int rat_idx = instr.rat_id();
+ EBufferIndexMode rat_index_mode = bim_none;
+ auto addr = instr.rat_id_offset();
+
+ if (addr) {
+ if (addr->type() != Value::literal) {
+ rat_index_mode = emit_index_reg(*addr, 1);
+ } else {
+ const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
+ rat_idx += addr_reg.value();
+ }
+ }
+ memset(&gds, 0, sizeof(struct r600_bytecode_gds));
+
+ r600_bytecode_add_cfinst(m_bc, instr.cf_opcode());
+ auto cf = m_bc->cf_last;
+ cf->rat.id = rat_idx + m_shader->rat_base;
+ cf->rat.inst = instr.rat_op();
+ cf->rat.index_mode = rat_index_mode;
+ cf->output.type = instr.need_ack() ? 3 : 1;
+ cf->output.gpr = instr.data_gpr();
+ cf->output.index_gpr = instr.index_gpr();
+ cf->output.comp_mask = instr.comp_mask();
+ cf->output.burst_count = instr.burst_count();
+ assert(instr.data_swz(0) == PIPE_SWIZZLE_X);
+ if (cf->rat.inst != RatInstruction::STORE_TYPED) {
+ assert(instr.data_swz(1) == PIPE_SWIZZLE_Y ||
+ instr.data_swz(1) == PIPE_SWIZZLE_MAX) ;
+ assert(instr.data_swz(2) == PIPE_SWIZZLE_Z ||
+ instr.data_swz(2) == PIPE_SWIZZLE_MAX) ;
+ }
+
+ cf->vpm = 1;
+ cf->barrier = 1;
+ cf->mark = instr.need_ack();
+ cf->output.elem_size = instr.elm_size();
+ return true;
+}
+
+EBufferIndexMode
+AssemblyFromShaderLegacyImpl::emit_index_reg(const Value& addr, unsigned idx)
+{
+ assert(idx < 2);
+
+ EAluOp idxop = idx ? op1_set_cf_idx1 : op1_set_cf_idx0;
+
+ if (!m_bc->index_loaded[idx] || m_loop_nesting ||
+ m_bc->index_reg[idx] != addr.sel()
+ || m_bc->index_reg_chan[idx] != addr.chan()) {
+ struct r600_bytecode_alu alu;
+
+ // Make sure MOVA is not last instr in clause
+ if ((m_bc->cf_last->ndw>>1) >= 110)
+ m_bc->force_add_cf = 1;
+
+ memset(&alu, 0, sizeof(alu));
+ alu.op = opcode_map.at(op1_mova_int);
+ alu.dst.chan = 0;
+ alu.src[0].sel = addr.sel();
+ alu.src[0].chan = addr.chan();
+ alu.last = 1;
+ sfn_log << SfnLog::assembly << " mova_int, ";
+ int r = r600_bytecode_add_alu(m_bc, &alu);
+ if (r)
+ return bim_invalid;
+
+ m_bc->ar_loaded = 0;
+
+ alu.op = opcode_map.at(idxop);
+ alu.dst.chan = 0;
+ alu.src[0].sel = 0;
+ alu.src[0].chan = 0;
+ alu.last = 1;
+ sfn_log << SfnLog::assembly << "op1_set_cf_idx" << idx;
+ r = r600_bytecode_add_alu(m_bc, &alu);
+ if (r)
+ return bim_invalid;
+
+ m_bc->index_reg[idx] = addr.sel();
+ m_bc->index_reg_chan[idx] = addr.chan();
+ m_bc->index_loaded[idx] = true;
+ sfn_log << SfnLog::assembly << "\n";
+ }
+ return idx == 0 ? bim_zero : bim_one;
+}
+
+bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst,
+ const Value& d)
+{
+ assert(d.type() == Value::gpr || d.type() == Value::gpr_array_value);
+
+ if (d.sel() > 124) {
+ R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", d.sel());
+ return false;
+ }
+
+ dst.sel = d.sel();
+ dst.chan = d.chan();
+
+ if (m_bc->index_reg[1] == dst.sel &&
+ m_bc->index_reg_chan[1] == dst.chan)
+ m_bc->index_loaded[1] = false;
+
+ if (m_bc->index_reg[0] == dst.sel &&
+ m_bc->index_reg_chan[0] == dst.chan)
+ m_bc->index_loaded[0] = false;
+
+ return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src& src, const Value& s)
+{
+
+ if (s.type() == Value::gpr && s.sel() > 124) {
+ R600_ERR("shader_from_nir: Don't support more then 124 GPRs, try using %d\n", s.sel());
+ return false;
+ }
+
+ if (s.type() == Value::lds_direct) {
+ R600_ERR("shader_from_nir: LDS_DIRECT values not supported\n");
+ return false;
+ }
+
+ if (s.type() == Value::kconst && s.sel() < 512) {
+ R600_ERR("shader_from_nir: Uniforms should have values >= 512, got %d \n", s.sel());
+ return false;
+ }
+
+ if (s.type() == Value::literal) {
+ auto& v = static_cast<const LiteralValue&>(s);
+ if (v.value() == 0) {
+ src.sel = ALU_SRC_0;
+ src.chan = 0;
+ --m_nliterals_in_group;
+ return true;
+ }
+ if (v.value() == 1) {
+ src.sel = ALU_SRC_1_INT;
+ src.chan = 0;
+ --m_nliterals_in_group;
+ return true;
+ }
+ if (v.value_float() == 1.0f) {
+ src.sel = ALU_SRC_1;
+ src.chan = 0;
+ --m_nliterals_in_group;
+ return true;
+ }
+ if (v.value_float() == 0.5f) {
+ src.sel = ALU_SRC_0_5;
+ src.chan = 0;
+ --m_nliterals_in_group;
+ return true;
+ }
+ if (v.value() == 0xffffffff) {
+ src.sel = ALU_SRC_M_1_INT;
+ src.chan = 0;
+ --m_nliterals_in_group;
+ return true;
+ }
+ src.value = v.value();
+ }
+
+ src.sel = s.sel();
+ src.chan = s.chan();
+ if (s.type() == Value::kconst) {
+ const UniformValue& cv = static_cast<const UniformValue&>(s);
+ src.kc_bank = cv.kcache_bank();
+ auto addr = cv.addr();
+ if (addr) {
+ src.kc_rel = 1;
+ emit_index_reg(*addr, 0);
+ auto type = m_bc->cf_last->op;
+ if (r600_bytecode_add_cf(m_bc)) {
+ return false;
+ }
+ m_bc->cf_last->op = type;
+ }
+ }
+
+ return true;
+}
+
+const std::map<EAluOp, int> opcode_map = {
+
+ {op2_add, ALU_OP2_ADD},
+ {op2_mul, ALU_OP2_MUL},
+ {op2_mul_ieee, ALU_OP2_MUL_IEEE},
+ {op2_max, ALU_OP2_MAX},
+ {op2_min, ALU_OP2_MIN},
+ {op2_max_dx10, ALU_OP2_MAX_DX10},
+ {op2_min_dx10, ALU_OP2_MIN_DX10},
+ {op2_sete, ALU_OP2_SETE},
+ {op2_setgt, ALU_OP2_SETGT},
+ {op2_setge, ALU_OP2_SETGE},
+ {op2_setne, ALU_OP2_SETNE},
+ {op2_sete_dx10, ALU_OP2_SETE_DX10},
+ {op2_setgt_dx10, ALU_OP2_SETGT_DX10},
+ {op2_setge_dx10, ALU_OP2_SETGE_DX10},
+ {op2_setne_dx10, ALU_OP2_SETNE_DX10},
+ {op1_fract, ALU_OP1_FRACT},
+ {op1_trunc, ALU_OP1_TRUNC},
+ {op1_ceil, ALU_OP1_CEIL},
+ {op1_rndne, ALU_OP1_RNDNE},
+ {op1_floor, ALU_OP1_FLOOR},
+ {op2_ashr_int, ALU_OP2_ASHR_INT},
+ {op2_lshr_int, ALU_OP2_LSHR_INT},
+ {op2_lshl_int, ALU_OP2_LSHL_INT},
+ {op1_mov, ALU_OP1_MOV},
+ {op0_nop, ALU_OP0_NOP},
+ {op2_mul_64, ALU_OP2_MUL_64},
+ {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
+ {op1v_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
+ {op2_pred_setgt_uint, ALU_OP2_PRED_SETGT_UINT},
+ {op2_pred_setge_uint, ALU_OP2_PRED_SETGE_UINT},
+ {op2_pred_sete, ALU_OP2_PRED_SETE},
+ {op2_pred_setgt, ALU_OP2_PRED_SETGT},
+ {op2_pred_setge, ALU_OP2_PRED_SETGE},
+ {op2_pred_setne, ALU_OP2_PRED_SETNE},
+ //{op2_pred_set_inv, ALU_OP2_PRED_SET},
+ //{op2_pred_set_clr, ALU_OP2_PRED_SET_CRL},
+ //{op2_pred_set_restore, ALU_OP2_PRED_SET_RESTORE},
+ {op2_pred_sete_push, ALU_OP2_PRED_SETE_PUSH},
+ {op2_pred_setgt_push, ALU_OP2_PRED_SETGT_PUSH},
+ {op2_pred_setge_push, ALU_OP2_PRED_SETGE_PUSH},
+ {op2_pred_setne_push, ALU_OP2_PRED_SETNE_PUSH},
+ {op2_kille, ALU_OP2_KILLE},
+ {op2_killgt, ALU_OP2_KILLGT},
+ {op2_killge, ALU_OP2_KILLGE},
+ {op2_killne, ALU_OP2_KILLNE},
+ {op2_and_int, ALU_OP2_AND_INT},
+ {op2_or_int, ALU_OP2_OR_INT},
+ {op2_xor_int, ALU_OP2_XOR_INT},
+ {op1_not_int, ALU_OP1_NOT_INT},
+ {op2_add_int, ALU_OP2_ADD_INT},
+ {op2_sub_int, ALU_OP2_SUB_INT},
+ {op2_max_int, ALU_OP2_MAX_INT},
+ {op2_min_int, ALU_OP2_MIN_INT},
+ {op2_max_uint, ALU_OP2_MAX_UINT},
+ {op2_min_uint, ALU_OP2_MIN_UINT},
+ {op2_sete_int, ALU_OP2_SETE_INT},
+ {op2_setgt_int, ALU_OP2_SETGT_INT},
+ {op2_setge_int, ALU_OP2_SETGE_INT},
+ {op2_setne_int, ALU_OP2_SETNE_INT},
+ {op2_setgt_uint, ALU_OP2_SETGT_UINT},
+ {op2_setge_uint, ALU_OP2_SETGE_UINT},
+ {op2_killgt_uint, ALU_OP2_KILLGT_UINT},
+ {op2_killge_uint, ALU_OP2_KILLGE_UINT},
+ //p2_prede_int, ALU_OP2_PREDE_INT},
+ {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT},
+ {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT},
+ {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT},
+ {op2_kille_int, ALU_OP2_KILLE_INT},
+ {op2_killgt_int, ALU_OP2_KILLGT_INT},
+ {op2_killge_int, ALU_OP2_KILLGE_INT},
+ {op2_killne_int, ALU_OP2_KILLNE_INT},
+ {op2_pred_sete_push_int, ALU_OP2_PRED_SETE_PUSH_INT},
+ {op2_pred_setgt_push_int, ALU_OP2_PRED_SETGT_PUSH_INT},
+ {op2_pred_setge_push_int, ALU_OP2_PRED_SETGE_PUSH_INT},
+ {op2_pred_setne_push_int, ALU_OP2_PRED_SETNE_PUSH_INT},
+ {op2_pred_setlt_push_int, ALU_OP2_PRED_SETLT_PUSH_INT},
+ {op2_pred_setle_push_int, ALU_OP2_PRED_SETLE_PUSH_INT},
+ {op1_flt_to_int, ALU_OP1_FLT_TO_INT},
+ {op1_bfrev_int, ALU_OP1_BFREV_INT},
+ {op2_addc_uint, ALU_OP2_ADDC_UINT},
+ {op2_subb_uint, ALU_OP2_SUBB_UINT},
+ {op0_group_barrier, ALU_OP0_GROUP_BARRIER},
+ {op0_group_seq_begin, ALU_OP0_GROUP_SEQ_BEGIN},
+ {op0_group_seq_end, ALU_OP0_GROUP_SEQ_END},
+ {op2_set_mode, ALU_OP2_SET_MODE},
+ {op1_set_cf_idx0, ALU_OP0_SET_CF_IDX0},
+ {op1_set_cf_idx1, ALU_OP0_SET_CF_IDX1},
+ {op2_set_lds_size, ALU_OP2_SET_LDS_SIZE},
+ {op1_exp_ieee, ALU_OP1_EXP_IEEE},
+ {op1_log_clamped, ALU_OP1_LOG_CLAMPED},
+ {op1_log_ieee, ALU_OP1_LOG_IEEE},
+ {op1_recip_clamped, ALU_OP1_RECIP_CLAMPED},
+ {op1_recip_ff, ALU_OP1_RECIP_FF},
+ {op1_recip_ieee, ALU_OP1_RECIP_IEEE},
+ {op1_recipsqrt_clamped, ALU_OP1_RECIPSQRT_CLAMPED},
+ {op1_recipsqrt_ff, ALU_OP1_RECIPSQRT_FF},
+ {op1_recipsqrt_ieee1, ALU_OP1_RECIPSQRT_IEEE},
+ {op1_sqrt_ieee, ALU_OP1_SQRT_IEEE},
+ {op1_sin, ALU_OP1_SIN},
+ {op1_cos, ALU_OP1_COS},
+ {op2_mullo_int, ALU_OP2_MULLO_INT},
+ {op2_mulhi_int, ALU_OP2_MULHI_INT},
+ {op2_mullo_uint, ALU_OP2_MULLO_UINT},
+ {op2_mulhi_uint, ALU_OP2_MULHI_UINT},
+ {op1_recip_int, ALU_OP1_RECIP_INT},
+ {op1_recip_uint, ALU_OP1_RECIP_UINT},
+ {op1_recip_64, ALU_OP2_RECIP_64},
+ {op1_recip_clamped_64, ALU_OP2_RECIP_CLAMPED_64},
+ {op1_recipsqrt_64, ALU_OP2_RECIPSQRT_64},
+ {op1_recipsqrt_clamped_64, ALU_OP2_RECIPSQRT_CLAMPED_64},
+ {op1_sqrt_64, ALU_OP2_SQRT_64},
+ {op1_flt_to_uint, ALU_OP1_FLT_TO_UINT},
+ {op1_int_to_flt, ALU_OP1_INT_TO_FLT},
+ {op1_uint_to_flt, ALU_OP1_UINT_TO_FLT},
+ {op2_bfm_int, ALU_OP2_BFM_INT},
+ {op1_flt32_to_flt16, ALU_OP1_FLT32_TO_FLT16},
+ {op1_flt16_to_flt32, ALU_OP1_FLT16_TO_FLT32},
+ {op1_ubyte0_flt, ALU_OP1_UBYTE0_FLT},
+ {op1_ubyte1_flt, ALU_OP1_UBYTE1_FLT},
+ {op1_ubyte2_flt, ALU_OP1_UBYTE2_FLT},
+ {op1_ubyte3_flt, ALU_OP1_UBYTE3_FLT},
+ {op1_bcnt_int, ALU_OP1_BCNT_INT},
+ {op1_ffbh_uint, ALU_OP1_FFBH_UINT},
+ {op1_ffbl_int, ALU_OP1_FFBL_INT},
+ {op1_ffbh_int, ALU_OP1_FFBH_INT},
+ {op1_flt_to_uint4, ALU_OP1_FLT_TO_UINT4},
+ {op2_dot_ieee, ALU_OP2_DOT_IEEE},
+ {op1_flt_to_int_rpi, ALU_OP1_FLT_TO_INT_RPI},
+ {op1_flt_to_int_floor, ALU_OP1_FLT_TO_INT_FLOOR},
+ {op2_mulhi_uint24, ALU_OP2_MULHI_UINT24},
+ {op1_mbcnt_32hi_int, ALU_OP1_MBCNT_32HI_INT},
+ {op1_offset_to_flt, ALU_OP1_OFFSET_TO_FLT},
+ {op2_mul_uint24, ALU_OP2_MUL_UINT24},
+ {op1_bcnt_accum_prev_int, ALU_OP1_BCNT_ACCUM_PREV_INT},
+ {op1_mbcnt_32lo_accum_prev_int, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT},
+ {op2_sete_64, ALU_OP2_SETE_64},
+ {op2_setne_64, ALU_OP2_SETNE_64},
+ {op2_setgt_64, ALU_OP2_SETGT_64},
+ {op2_setge_64, ALU_OP2_SETGE_64},
+ {op2_min_64, ALU_OP2_MIN_64},
+ {op2_max_64, ALU_OP2_MAX_64},
+ {op2_dot4, ALU_OP2_DOT4},
+ {op2_dot4_ieee, ALU_OP2_DOT4_IEEE},
+ {op2_cube, ALU_OP2_CUBE},
+ {op1_max4, ALU_OP1_MAX4},
+ {op1_frexp_64, ALU_OP1_FREXP_64},
+ {op1_ldexp_64, ALU_OP2_LDEXP_64},
+ {op1_fract_64, ALU_OP1_FRACT_64},
+ {op2_pred_setgt_64, ALU_OP2_PRED_SETGT_64},
+ {op2_pred_sete_64, ALU_OP2_PRED_SETE_64},
+ {op2_pred_setge_64, ALU_OP2_PRED_SETGE_64},
+ {op2_add_64, ALU_OP2_ADD_64},
+ {op1_mova_int, ALU_OP1_MOVA_INT},
+ {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
+ {op1_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
+ {op2_sad_accum_prev_uint, ALU_OP2_SAD_ACCUM_PREV_UINT},
+ {op2_dot, ALU_OP2_DOT},
+ //p2_mul_prev, ALU_OP2_MUL_PREV},
+ //p2_mul_ieee_prev, ALU_OP2_MUL_IEEE_PREV},
+ //p2_add_prev, ALU_OP2_ADD_PREV},
+ {op2_muladd_prev, ALU_OP2_MULADD_PREV},
+ {op2_muladd_ieee_prev, ALU_OP2_MULADD_IEEE_PREV},
+ {op2_interp_xy, ALU_OP2_INTERP_XY},
+ {op2_interp_zw, ALU_OP2_INTERP_ZW},
+ {op2_interp_x, ALU_OP2_INTERP_X},
+ {op2_interp_z, ALU_OP2_INTERP_Z},
+ {op0_store_flags, ALU_OP1_STORE_FLAGS},
+ {op1_load_store_flags, ALU_OP1_LOAD_STORE_FLAGS},
+ {op0_lds_1a, ALU_OP2_LDS_1A},
+ {op0_lds_1a1d, ALU_OP2_LDS_1A1D},
+ {op0_lds_2a, ALU_OP2_LDS_2A},
+ {op1_interp_load_p0, ALU_OP1_INTERP_LOAD_P0},
+ {op1_interp_load_p10, ALU_OP1_INTERP_LOAD_P10},
+ {op1_interp_load_p20, ALU_OP1_INTERP_LOAD_P20},
+ // {op 3 all left shift 6
+ {op3_bfe_uint, ALU_OP3_BFE_UINT},
+ {op3_bfe_int, ALU_OP3_BFE_INT},
+ {op3_bfi_int, ALU_OP3_BFI_INT},
+ {op3_fma, ALU_OP3_FMA},
+ {op3_cndne_64, ALU_OP3_CNDNE_64},
+ {op3_fma_64, ALU_OP3_FMA_64},
+ {op3_lerp_uint, ALU_OP3_LERP_UINT},
+ {op3_bit_align_int, ALU_OP3_BIT_ALIGN_INT},
+ {op3_byte_align_int, ALU_OP3_BYTE_ALIGN_INT},
+ {op3_sad_accum_uint, ALU_OP3_SAD_ACCUM_UINT},
+ {op3_sad_accum_hi_uint, ALU_OP3_SAD_ACCUM_HI_UINT},
+ {op3_muladd_uint24, ALU_OP3_MULADD_UINT24},
+ {op3_lds_idx_op, ALU_OP3_LDS_IDX_OP},
+ {op3_muladd, ALU_OP3_MULADD},
+ {op3_muladd_m2, ALU_OP3_MULADD_M2},
+ {op3_muladd_m4, ALU_OP3_MULADD_M4},
+ {op3_muladd_d2, ALU_OP3_MULADD_D2},
+ {op3_muladd_ieee, ALU_OP3_MULADD_IEEE},
+ {op3_cnde, ALU_OP3_CNDE},
+ {op3_cndgt, ALU_OP3_CNDGT},
+ {op3_cndge, ALU_OP3_CNDGE},
+ {op3_cnde_int, ALU_OP3_CNDE_INT},
+ {op3_cndgt_int, ALU_OP3_CNDGT_INT},
+ {op3_cndge_int, ALU_OP3_CNDGE_INT},
+ {op3_mul_lit, ALU_OP3_MUL_LIT},
+};
+
+const std::map<ESDOp, int> ds_opcode_map = {
+ {DS_OP_ADD, FETCH_OP_GDS_ADD},
+ {DS_OP_SUB, FETCH_OP_GDS_SUB},
+ {DS_OP_RSUB, FETCH_OP_GDS_RSUB},
+ {DS_OP_INC, FETCH_OP_GDS_INC},
+ {DS_OP_DEC, FETCH_OP_GDS_DEC},
+ {DS_OP_MIN_INT, FETCH_OP_GDS_MIN_INT},
+ {DS_OP_MAX_INT, FETCH_OP_GDS_MAX_INT},
+ {DS_OP_MIN_UINT, FETCH_OP_GDS_MIN_UINT},
+ {DS_OP_MAX_UINT, FETCH_OP_GDS_MAX_UINT},
+ {DS_OP_AND, FETCH_OP_GDS_AND},
+ {DS_OP_OR, FETCH_OP_GDS_OR},
+ {DS_OP_XOR, FETCH_OP_GDS_XOR},
+ {DS_OP_MSKOR, FETCH_OP_GDS_MSKOR},
+ {DS_OP_WRITE, FETCH_OP_GDS_WRITE},
+ {DS_OP_WRITE_REL, FETCH_OP_GDS_WRITE_REL},
+ {DS_OP_WRITE2, FETCH_OP_GDS_WRITE2},
+ {DS_OP_CMP_STORE, FETCH_OP_GDS_CMP_STORE},
+ {DS_OP_CMP_STORE_SPF, FETCH_OP_GDS_CMP_STORE_SPF},
+ {DS_OP_BYTE_WRITE, FETCH_OP_GDS_BYTE_WRITE},
+ {DS_OP_SHORT_WRITE, FETCH_OP_GDS_SHORT_WRITE},
+ {DS_OP_ADD_RET, FETCH_OP_GDS_ADD_RET},
+ {DS_OP_SUB_RET, FETCH_OP_GDS_SUB_RET},
+ {DS_OP_RSUB_RET, FETCH_OP_GDS_RSUB_RET},
+ {DS_OP_INC_RET, FETCH_OP_GDS_INC_RET},
+ {DS_OP_DEC_RET, FETCH_OP_GDS_DEC_RET},
+ {DS_OP_MIN_INT_RET, FETCH_OP_GDS_MIN_INT_RET},
+ {DS_OP_MAX_INT_RET, FETCH_OP_GDS_MAX_INT_RET},
+ {DS_OP_MIN_UINT_RET, FETCH_OP_GDS_MIN_UINT_RET},
+ {DS_OP_MAX_UINT_RET, FETCH_OP_GDS_MAX_UINT_RET},
+ {DS_OP_AND_RET, FETCH_OP_GDS_AND_RET},
+ {DS_OP_OR_RET, FETCH_OP_GDS_OR_RET},
+ {DS_OP_XOR_RET, FETCH_OP_GDS_XOR_RET},
+ {DS_OP_MSKOR_RET, FETCH_OP_GDS_MSKOR_RET},
+ {DS_OP_XCHG_RET, FETCH_OP_GDS_XCHG_RET},
+ {DS_OP_XCHG_REL_RET, FETCH_OP_GDS_XCHG_REL_RET},
+ {DS_OP_XCHG2_RET, FETCH_OP_GDS_XCHG2_RET},
+ {DS_OP_CMP_XCHG_RET, FETCH_OP_GDS_CMP_XCHG_RET},
+ {DS_OP_CMP_XCHG_SPF_RET, FETCH_OP_GDS_CMP_XCHG_SPF_RET},
+ {DS_OP_READ_RET, FETCH_OP_GDS_READ_RET},
+ {DS_OP_READ_REL_RET, FETCH_OP_GDS_READ_REL_RET},
+ {DS_OP_READ2_RET, FETCH_OP_GDS_READ2_RET},
+ {DS_OP_READWRITE_RET, FETCH_OP_GDS_READWRITE_RET},
+ {DS_OP_BYTE_READ_RET, FETCH_OP_GDS_BYTE_READ_RET},
+ {DS_OP_UBYTE_READ_RET, FETCH_OP_GDS_UBYTE_READ_RET},
+ {DS_OP_SHORT_READ_RET, FETCH_OP_GDS_SHORT_READ_RET},
+ {DS_OP_USHORT_READ_RET, FETCH_OP_GDS_USHORT_READ_RET},
+ {DS_OP_ATOMIC_ORDERED_ALLOC_RET, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC},
+ {DS_OP_INVALID, 0},
+};
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h
new file mode 100644
index 000000000..0c82032e6
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h
@@ -0,0 +1,45 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "sfn_nir.h"
+
+struct r600_shader;
+union r600_shader_key;
+
+namespace r600 {
+
+class AssemblyFromShaderLegacy : public AssemblyFromShader {
+public:
+ AssemblyFromShaderLegacy(struct r600_shader *sh, r600_shader_key *key);
+ ~AssemblyFromShaderLegacy() override;
+private:
+ bool do_lower(const std::vector<InstructionBlock> &ir) override ;
+
+ struct AssemblyFromShaderLegacyImpl *impl;
+};
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.cpp
new file mode 100644
index 000000000..28eef0593
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.cpp
@@ -0,0 +1,1006 @@
+/*
+ * Copyright (c) 2017-2019 Gert Wollny
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_liverange.h"
+#include "sfn_debug.h"
+#include "sfn_value.h"
+#include "sfn_value_gpr.h"
+
+#include "program/prog_instruction.h"
+#include "util/bitscan.h"
+#include "util/u_math.h"
+
+#include <limits>
+#include <cstdlib>
+#include <iomanip>
+
+/* std::sort is significantly faster than qsort */
+#include <algorithm>
+
+/* If <windows.h> is included this is defined and clashes with
+ * std::numeric_limits<>::max()
+ */
+#ifdef max
+#undef max
+#endif
+
+
+namespace r600 {
+
+using std::numeric_limits;
+using std::unique_ptr;
+using std::setw;
+
+prog_scope_storage::prog_scope_storage(int n):
+ current_slot(0),
+ storage(n)
+{
+}
+
+prog_scope_storage::~prog_scope_storage()
+{
+}
+
+prog_scope*
+prog_scope_storage::create(prog_scope *p, prog_scope_type type, int id,
+ int lvl, int s_begin)
+{
+ storage[current_slot] = prog_scope(p, type, id, lvl, s_begin);
+ return &storage[current_slot++];
+}
+
+prog_scope::prog_scope(prog_scope *parent, prog_scope_type type, int id,
+ int depth, int scope_begin):
+ scope_type(type),
+ scope_id(id),
+ scope_nesting_depth(depth),
+ scope_begin(scope_begin),
+ scope_end(-1),
+ break_loop_line(numeric_limits<int>::max()),
+ parent_scope(parent)
+{
+}
+
+prog_scope::prog_scope():
+ prog_scope(nullptr, undefined_scope, -1, -1, -1)
+{
+}
+
+prog_scope_type prog_scope::type() const
+{
+ return scope_type;
+}
+
+prog_scope *prog_scope::parent() const
+{
+ return parent_scope;
+}
+
+int prog_scope::nesting_depth() const
+{
+ return scope_nesting_depth;
+}
+
+bool prog_scope::is_loop() const
+{
+ return (scope_type == loop_body);
+}
+
+bool prog_scope::is_in_loop() const
+{
+ if (scope_type == loop_body)
+ return true;
+
+ if (parent_scope)
+ return parent_scope->is_in_loop();
+
+ return false;
+}
+
+const prog_scope *prog_scope::innermost_loop() const
+{
+ if (scope_type == loop_body)
+ return this;
+
+ if (parent_scope)
+ return parent_scope->innermost_loop();
+
+ return nullptr;
+}
+
+const prog_scope *prog_scope::outermost_loop() const
+{
+ const prog_scope *loop = nullptr;
+ const prog_scope *p = this;
+
+ do {
+ if (p->type() == loop_body)
+ loop = p;
+ p = p->parent();
+ } while (p);
+
+ return loop;
+}
+
+bool prog_scope::is_child_of_ifelse_id_sibling(const prog_scope *scope) const
+{
+ const prog_scope *my_parent = in_parent_ifelse_scope();
+ while (my_parent) {
+ /* is a direct child? */
+ if (my_parent == scope)
+ return false;
+ /* is a child of the conditions sibling? */
+ if (my_parent->id() == scope->id())
+ return true;
+ my_parent = my_parent->in_parent_ifelse_scope();
+ }
+ return false;
+}
+
+bool prog_scope::is_child_of(const prog_scope *scope) const
+{
+ const prog_scope *my_parent = parent();
+ while (my_parent) {
+ if (my_parent == scope)
+ return true;
+ my_parent = my_parent->parent();
+ }
+ return false;
+}
+
+const prog_scope *prog_scope::enclosing_conditional() const
+{
+ if (is_conditional())
+ return this;
+
+ if (parent_scope)
+ return parent_scope->enclosing_conditional();
+
+ return nullptr;
+}
+
+bool prog_scope::contains_range_of(const prog_scope& other) const
+{
+ return (begin() <= other.begin()) && (end() >= other.end());
+}
+
+bool prog_scope::is_conditional() const
+{
+ return scope_type == if_branch ||
+ scope_type == else_branch ||
+ scope_type == switch_case_branch ||
+ scope_type == switch_default_branch;
+}
+
+const prog_scope *prog_scope::in_else_scope() const
+{
+ if (scope_type == else_branch)
+ return this;
+
+ if (parent_scope)
+ return parent_scope->in_else_scope();
+
+ return nullptr;
+}
+
+const prog_scope *prog_scope::in_parent_ifelse_scope() const
+{
+ if (parent_scope)
+ return parent_scope->in_ifelse_scope();
+ else
+ return nullptr;
+}
+
+const prog_scope *prog_scope::in_ifelse_scope() const
+{
+ if (scope_type == if_branch ||
+ scope_type == else_branch)
+ return this;
+
+ if (parent_scope)
+ return parent_scope->in_ifelse_scope();
+
+ return nullptr;
+}
+
+bool prog_scope::is_switchcase_scope_in_loop() const
+{
+ return (scope_type == switch_case_branch ||
+ scope_type == switch_default_branch) &&
+ is_in_loop();
+}
+
+bool prog_scope::break_is_for_switchcase() const
+{
+ if (scope_type == loop_body)
+ return false;
+
+ if (scope_type == switch_case_branch ||
+ scope_type == switch_default_branch ||
+ scope_type == switch_body)
+ return true;
+
+ if (parent_scope)
+ return parent_scope->break_is_for_switchcase();
+
+ return false;
+}
+
+int prog_scope::id() const
+{
+ return scope_id;
+}
+
+int prog_scope::begin() const
+{
+ return scope_begin;
+}
+
+int prog_scope::end() const
+{
+ return scope_end;
+}
+
+void prog_scope::set_end(int end)
+{
+ if (scope_end == -1)
+ scope_end = end;
+}
+
+void prog_scope::set_loop_break_line(int line)
+{
+ if (scope_type == loop_body) {
+ break_loop_line = MIN2(break_loop_line, line);
+ } else {
+ if (parent_scope)
+ parent()->set_loop_break_line(line);
+ }
+}
+
+int prog_scope::loop_break_line() const
+{
+ return break_loop_line;
+}
+
+temp_access::temp_access():
+ access_mask(0),
+ needs_component_tracking(false),
+ is_array_element(false)
+{
+}
+
+void temp_access::update_access_mask(int mask)
+{
+ if (access_mask && access_mask != mask)
+ needs_component_tracking = true;
+ access_mask |= mask;
+}
+
+void temp_access::record_write(int line, prog_scope *scope, int writemask, bool is_array_elm)
+{
+
+
+ update_access_mask(writemask);
+ is_array_element |= is_array_elm;
+
+ if (writemask & WRITEMASK_X)
+ comp[0].record_write(line, scope);
+ if (writemask & WRITEMASK_Y)
+ comp[1].record_write(line, scope);
+ if (writemask & WRITEMASK_Z)
+ comp[2].record_write(line, scope);
+ if (writemask & WRITEMASK_W)
+ comp[3].record_write(line, scope);
+}
+
+void temp_access::record_read(int line, prog_scope *scope, int readmask, bool is_array_elm)
+{
+ update_access_mask(readmask);
+ is_array_element |= is_array_elm;
+
+ if (readmask & WRITEMASK_X)
+ comp[0].record_read(line, scope);
+ if (readmask & WRITEMASK_Y)
+ comp[1].record_read(line, scope);
+ if (readmask & WRITEMASK_Z)
+ comp[2].record_read(line, scope);
+ if (readmask & WRITEMASK_W)
+ comp[3].record_read(line, scope);
+}
+
+inline static register_live_range make_live_range(int b, int e)
+{
+ register_live_range lt;
+ lt.begin = b;
+ lt.end = e;
+ lt.is_array_elm = false;
+ return lt;
+}
+
+register_live_range temp_access::get_required_live_range()
+{
+ register_live_range result = make_live_range(-1, -1);
+
+ unsigned mask = access_mask;
+ while (mask) {
+ unsigned chan = u_bit_scan(&mask);
+ register_live_range lt = comp[chan].get_required_live_range();
+
+ if (lt.begin >= 0) {
+ if ((result.begin < 0) || (result.begin > lt.begin))
+ result.begin = lt.begin;
+ }
+
+ if (lt.end > result.end)
+ result.end = lt.end;
+
+ if (!needs_component_tracking)
+ break;
+ }
+ result.is_array_elm = is_array_element;
+
+ return result;
+}
+
+const int
+temp_comp_access::conditionality_untouched = std::numeric_limits<int>::max();
+
+const int
+temp_comp_access::write_is_unconditional = std::numeric_limits<int>::max() - 1;
+
+
+temp_comp_access::temp_comp_access():
+ last_read_scope(nullptr),
+ first_read_scope(nullptr),
+ first_write_scope(nullptr),
+ first_write(-1),
+ last_read(-1),
+ last_write(-1),
+ first_read(numeric_limits<int>::max()),
+ conditionality_in_loop_id(conditionality_untouched),
+ if_scope_write_flags(0),
+ next_ifelse_nesting_depth(0),
+ current_unpaired_if_write_scope(nullptr),
+ was_written_in_current_else_scope(false)
+{
+}
+
+void temp_comp_access::record_read(int line, prog_scope *scope)
+{
+ last_read_scope = scope;
+ if (last_read < line)
+ last_read = line;
+
+ if (first_read > line) {
+ first_read = line;
+ first_read_scope = scope;
+ }
+
+ /* If the conditionality of the first write is already resolved then
+ * no further checks are required.
+ */
+ if (conditionality_in_loop_id == write_is_unconditional ||
+ conditionality_in_loop_id == write_is_conditional)
+ return;
+
+ /* Check whether we are in a condition within a loop */
+ const prog_scope *ifelse_scope = scope->in_ifelse_scope();
+ const prog_scope *enclosing_loop;
+ if (ifelse_scope && (enclosing_loop = ifelse_scope->innermost_loop())) {
+
+ /* If we have either not yet written to this register nor writes are
+ * resolved as unconditional in the enclosing loop then check whether
+ * we read before write in an IF/ELSE branch.
+ */
+ if ((conditionality_in_loop_id != write_is_conditional) &&
+ (conditionality_in_loop_id != enclosing_loop->id())) {
+
+ if (current_unpaired_if_write_scope) {
+
+ /* Has been written in this or a parent scope? - this makes the temporary
+ * unconditionally set at this point.
+ */
+ if (scope->is_child_of(current_unpaired_if_write_scope))
+ return;
+
+ /* Has been written in the same scope before it was read? */
+ if (ifelse_scope->type() == if_branch) {
+ if (current_unpaired_if_write_scope->id() == scope->id())
+ return;
+ } else {
+ if (was_written_in_current_else_scope)
+ return;
+ }
+ }
+
+ /* The temporary was read (conditionally) before it is written, hence
+ * it should survive a loop. This can be signaled like if it were
+ * conditionally written.
+ */
+ conditionality_in_loop_id = write_is_conditional;
+ }
+ }
+}
+
+void temp_comp_access::record_write(int line, prog_scope *scope)
+{
+ last_write = line;
+
+ if (first_write < 0) {
+ first_write = line;
+ first_write_scope = scope;
+
+ /* If the first write we encounter is not in a conditional branch, or
+ * the conditional write is not within a loop, then this is to be
+ * considered an unconditional dominant write.
+ */
+ const prog_scope *conditional = scope->enclosing_conditional();
+ if (!conditional || !conditional->innermost_loop()) {
+ conditionality_in_loop_id = write_is_unconditional;
+ }
+ }
+
+ /* The conditionality of the first write is already resolved. */
+ if (conditionality_in_loop_id == write_is_unconditional ||
+ conditionality_in_loop_id == write_is_conditional)
+ return;
+
+ /* If the nesting depth is larger than the supported level,
+ * then we assume conditional writes.
+ */
+ if (next_ifelse_nesting_depth >= supported_ifelse_nesting_depth) {
+ conditionality_in_loop_id = write_is_conditional;
+ return;
+ }
+
+ /* If we are in an IF/ELSE scope within a loop and the loop has not
+ * been resolved already, then record this write.
+ */
+ const prog_scope *ifelse_scope = scope->in_ifelse_scope();
+ if (ifelse_scope && ifelse_scope->innermost_loop() &&
+ ifelse_scope->innermost_loop()->id() != conditionality_in_loop_id)
+ record_ifelse_write(*ifelse_scope);
+}
+
+void temp_comp_access::record_ifelse_write(const prog_scope& scope)
+{
+ if (scope.type() == if_branch) {
+ /* The first write in an IF branch within a loop implies unresolved
+ * conditionality (if it was untouched or unconditional before).
+ */
+ conditionality_in_loop_id = conditionality_unresolved;
+ was_written_in_current_else_scope = false;
+ record_if_write(scope);
+ } else {
+ was_written_in_current_else_scope = true;
+ record_else_write(scope);
+ }
+}
+
+void temp_comp_access::record_if_write(const prog_scope& scope)
+{
+ /* Don't record write if this IF scope if it ...
+ * - is not the first write in this IF scope,
+ * - has already been written in a parent IF scope.
+ * In both cases this write is a secondary write that doesn't contribute
+ * to resolve conditionality.
+ *
+ * Record the write if it
+ * - is the first one (obviously),
+ * - happens in an IF branch that is a child of the ELSE branch of the
+ * last active IF/ELSE pair. In this case recording this write is used to
+ * established whether the write is (un-)conditional in the scope enclosing
+ * this outer IF/ELSE pair.
+ */
+ if (!current_unpaired_if_write_scope ||
+ (current_unpaired_if_write_scope->id() != scope.id() &&
+ scope.is_child_of_ifelse_id_sibling(current_unpaired_if_write_scope))) {
+ if_scope_write_flags |= 1 << next_ifelse_nesting_depth;
+ current_unpaired_if_write_scope = &scope;
+ next_ifelse_nesting_depth++;
+ }
+}
+
+void temp_comp_access::record_else_write(const prog_scope& scope)
+{
+ int mask = 1 << (next_ifelse_nesting_depth - 1);
+
+ /* If the temporary was written in an IF branch on the same scope level
+ * and this branch is the sibling of this ELSE branch, then we have a
+ * pair of writes that makes write access to this temporary unconditional
+ * in the enclosing scope.
+ */
+
+ if ((if_scope_write_flags & mask) &&
+ (scope.id() == current_unpaired_if_write_scope->id())) {
+ --next_ifelse_nesting_depth;
+ if_scope_write_flags &= ~mask;
+
+ /* The following code deals with propagating unconditionality from
+ * inner levels of nested IF/ELSE to the outer levels like in
+ *
+ * 1: var t;
+ * 2: if (a) { <- start scope A
+ * 3: if (b)
+ * 4: t = ...
+ * 5: else
+ * 6: t = ...
+ * 7: } else { <- start scope B
+ * 8: if (c)
+ * 9: t = ...
+ * A: else <- start scope C
+ * B: t = ...
+ * C: }
+ *
+ */
+
+ const prog_scope *parent_ifelse = scope.parent()->in_ifelse_scope();
+
+ if (1 << (next_ifelse_nesting_depth - 1) & if_scope_write_flags) {
+ /* We are at the end of scope C and already recorded a write
+ * within an IF scope (A), the sibling of the parent ELSE scope B,
+ * and it is not yet resolved. Mark that as the last relevant
+ * IF scope. Below the write will be resolved for the A/B
+ * scope pair.
+ */
+ current_unpaired_if_write_scope = parent_ifelse;
+ } else {
+ current_unpaired_if_write_scope = nullptr;
+ }
+ /* Promote the first write scope to the enclosing scope because
+ * the current IF/ELSE pair is now irrelevant for the analysis.
+ * This is also required to evaluate the minimum life time for t in
+ * {
+ * var t;
+ * if (a)
+ * t = ...
+ * else
+ * t = ...
+ * x = t;
+ * ...
+ * }
+ */
+ first_write_scope = scope.parent();
+
+ /* If some parent is IF/ELSE and in a loop then propagate the
+ * write to that scope. Otherwise the write is unconditional
+ * because it happens in both corresponding IF/ELSE branches
+ * in this loop, and hence, record the loop id to signal the
+ * resolution.
+ */
+ if (parent_ifelse && parent_ifelse->is_in_loop()) {
+ record_ifelse_write(*parent_ifelse);
+ } else {
+ conditionality_in_loop_id = scope.innermost_loop()->id();
+ }
+ } else {
+ /* The temporary was not written in the IF branch corresponding
+ * to this ELSE branch, hence the write is conditional.
+ */
+ conditionality_in_loop_id = write_is_conditional;
+ }
+}
+
+bool temp_comp_access::conditional_ifelse_write_in_loop() const
+{
+ return conditionality_in_loop_id <= conditionality_unresolved;
+}
+
+void temp_comp_access::propagate_live_range_to_dominant_write_scope()
+{
+ first_write = first_write_scope->begin();
+ int lr = first_write_scope->end();
+
+ if (last_read < lr)
+ last_read = lr;
+}
+
+register_live_range temp_comp_access::get_required_live_range()
+{
+ bool keep_for_full_loop = false;
+
+ /* This register component is not used at all, or only read,
+ * mark it as unused and ignore it when renaming.
+ * glsl_to_tgsi_visitor::renumber_registers will take care of
+ * eliminating registers that are not written to.
+ */
+ if (last_write < 0)
+ return make_live_range(-1, -1);
+
+ assert(first_write_scope);
+
+ /* Only written to, just make sure the register component is not
+ * reused in the range it is used to write to
+ */
+ if (!last_read_scope)
+ return make_live_range(first_write, last_write + 1);
+
+ const prog_scope *enclosing_scope_first_read = first_read_scope;
+ const prog_scope *enclosing_scope_first_write = first_write_scope;
+
+ /* We read before writing in a loop
+ * hence the value must survive the loops
+ */
+ if ((first_read <= first_write) &&
+ first_read_scope->is_in_loop()) {
+ keep_for_full_loop = true;
+ enclosing_scope_first_read = first_read_scope->outermost_loop();
+ }
+
+ /* A conditional write within a (nested) loop must survive the outermost
+ * loop if the last read was not within the same scope.
+ */
+ const prog_scope *conditional = enclosing_scope_first_write->enclosing_conditional();
+ if (conditional && !conditional->contains_range_of(*last_read_scope) &&
+ (conditional->is_switchcase_scope_in_loop() ||
+ conditional_ifelse_write_in_loop())) {
+ keep_for_full_loop = true;
+ enclosing_scope_first_write = conditional->outermost_loop();
+ }
+
+ /* Evaluate the scope that is shared by all: required first write scope,
+ * required first read before write scope, and last read scope.
+ */
+ const prog_scope *enclosing_scope = enclosing_scope_first_read;
+ if (enclosing_scope_first_write->contains_range_of(*enclosing_scope))
+ enclosing_scope = enclosing_scope_first_write;
+
+ if (last_read_scope->contains_range_of(*enclosing_scope))
+ enclosing_scope = last_read_scope;
+
+ while (!enclosing_scope->contains_range_of(*enclosing_scope_first_write) ||
+ !enclosing_scope->contains_range_of(*last_read_scope)) {
+ enclosing_scope = enclosing_scope->parent();
+ assert(enclosing_scope);
+ }
+
+ /* Propagate the last read scope to the target scope */
+ while (enclosing_scope->nesting_depth() < last_read_scope->nesting_depth()) {
+ /* If the read is in a loop and we have to move up the scope we need to
+ * extend the live range to the end of this current loop because at this
+ * point we don't know whether the component was written before
+ * un-conditionally in the same loop.
+ */
+ if (last_read_scope->is_loop())
+ last_read = last_read_scope->end();
+
+ last_read_scope = last_read_scope->parent();
+ }
+
+ /* If the variable has to be kept for the whole loop, and we
+ * are currently in a loop, then propagate the live range.
+ */
+ if (keep_for_full_loop && first_write_scope->is_loop())
+ propagate_live_range_to_dominant_write_scope();
+
+ /* Propagate the first_dominant_write scope to the target scope */
+ while (enclosing_scope->nesting_depth() < first_write_scope->nesting_depth()) {
+ /* Propagate live_range if there was a break in a loop and the write was
+ * after the break inside that loop. Note, that this is only needed if
+ * we move up in the scopes.
+ */
+ if (first_write_scope->loop_break_line() < first_write) {
+ keep_for_full_loop = true;
+ propagate_live_range_to_dominant_write_scope();
+ }
+
+ first_write_scope = first_write_scope->parent();
+
+ /* Propagate live_range if we are now in a loop */
+ if (keep_for_full_loop && first_write_scope->is_loop())
+ propagate_live_range_to_dominant_write_scope();
+ }
+
+ /* The last write past the last read is dead code, but we have to
+ * ensure that the component is not reused too early, hence extend the
+ * live_range past the last write.
+ */
+ if (last_write >= last_read)
+ last_read = last_write + 1;
+
+ /* Here we are at the same scope, all is resolved */
+ return make_live_range(first_write, last_read);
+}
+
+/* Helper class for sorting and searching the registers based
+ * on live ranges. */
+class register_merge_record {
+public:
+ int begin;
+ int end;
+ int reg;
+ bool erase;
+ bool is_array_elm;
+
+ bool operator < (const register_merge_record& rhs) const {
+ return begin < rhs.begin;
+ }
+};
+
+LiverangeEvaluator::LiverangeEvaluator():
+ line(0),
+ loop_id(1),
+ if_id(1),
+ switch_id(0),
+ is_at_end(false),
+ n_scopes(1),
+ cur_scope(nullptr)
+{
+}
+
+void LiverangeEvaluator::run(const Shader& shader,
+ std::vector<register_live_range>& register_live_ranges)
+{
+ temp_acc.resize(register_live_ranges.size());
+ fill(temp_acc.begin(), temp_acc.end(), temp_access());
+
+ sfn_log << SfnLog::merge << "have " << temp_acc.size() << " temps\n";
+
+ for (const auto& block: shader.m_ir) {
+ for (const auto& ir: block) {
+ switch (ir->type()) {
+ case Instruction::cond_if:
+ case Instruction::cond_else:
+ case Instruction::loop_begin:
+ ++n_scopes;
+ default:
+ ;
+ }
+ }
+ }
+
+ scopes.reset(new prog_scope_storage(n_scopes));
+
+ cur_scope = scopes->create(nullptr, outer_scope, 0, 0, line);
+
+ line = 0;
+
+ for (auto& v: shader.m_temp) {
+ if (v.second->type() == Value::gpr) {
+ sfn_log << SfnLog::merge << "Record " << *v.second << "\n";
+ const auto& g = static_cast<const GPRValue&>(*v.second);
+ if (g.is_input()) {
+ sfn_log << SfnLog::merge << "Record INPUT write for "
+ << g << " in " << temp_acc.size() << " temps\n";
+ temp_acc[g.sel()].record_write(line, cur_scope, 1 << g.chan(), false);
+ temp_acc[g.sel()].record_read(line, cur_scope, 1 << g.chan(), false);
+ }
+ if (g.keep_alive()) {
+ sfn_log << SfnLog::merge << "Record KEEP ALIVE for "
+ << g << " in " << temp_acc.size() << " temps\n";
+ temp_acc[g.sel()].record_read(0x7fffff, cur_scope, 1 << g.chan(), false);
+ }
+ }
+ }
+
+ for (const auto& block: shader.m_ir)
+ for (const auto& ir: block) {
+ ir->evalue_liveness(*this);
+ if (ir->type() != Instruction::alu ||
+ static_cast<const AluInstruction&>(*ir).flag(alu_last_instr))
+ ++line;
+ }
+
+ assert(cur_scope->type() == outer_scope);
+ cur_scope->set_end(line);
+ is_at_end = true;
+
+ get_required_live_ranges(register_live_ranges);
+}
+
+
+void LiverangeEvaluator::record_read(const Value& src, bool is_array_elm)
+{
+ sfn_log << SfnLog::merge << "Record read l:" << line << " reg:" << src << "\n";
+ if (src.type() == Value::gpr) {
+ const GPRValue& v = static_cast<const GPRValue&>(src);
+ if (v.chan() < 4)
+ temp_acc[v.sel()].record_read(v.keep_alive() ? 0x7fffff: line, cur_scope, 1 << v.chan(), is_array_elm);
+ return;
+ } else if (src.type() == Value::gpr_array_value) {
+ const GPRArrayValue& v = static_cast<const GPRArrayValue&>(src);
+ v.record_read(*this);
+ } else if (src.type() == Value::kconst) {
+ const UniformValue& v = static_cast<const UniformValue&>(src);
+ if (v.addr())
+ record_read(*v.addr(),is_array_elm);
+ }
+}
+
+void LiverangeEvaluator::record_write(const Value& src, bool is_array_elm)
+{
+ sfn_log << SfnLog::merge << "Record write for "
+ << src << " in " << temp_acc.size() << " temps\n";
+
+ if (src.type() == Value::gpr) {
+ const GPRValue& v = static_cast<const GPRValue&>(src);
+ assert(v.sel() < temp_acc.size());
+ if (v.chan() < 4)
+ temp_acc[v.sel()].record_write(line, cur_scope, 1 << v.chan(), is_array_elm);
+ return;
+ } else if (src.type() == Value::gpr_array_value) {
+ const GPRArrayValue& v = static_cast<const GPRArrayValue&>(src);
+ v.record_write(*this);
+ } else if (src.type() == Value::kconst) {
+ const UniformValue& v = static_cast<const UniformValue&>(src);
+ if (v.addr())
+ record_write(*v.addr(),is_array_elm);
+ }
+}
+
+void LiverangeEvaluator::record_read(const GPRVector& src)
+{
+ for (int i = 0; i < 4; ++i)
+ if (src.reg_i(i))
+ record_read(*src.reg_i(i));
+}
+
+void LiverangeEvaluator::record_write(const GPRVector& dst)
+{
+ for (int i = 0; i < 4; ++i)
+ if (dst.reg_i(i))
+ record_write(*dst.reg_i(i));
+}
+
+void LiverangeEvaluator::get_required_live_ranges(std::vector<register_live_range>& register_live_ranges)
+{
+ sfn_log << SfnLog::merge << "== register live ranges ==========\n";
+ for(unsigned i = 0; i < register_live_ranges.size(); ++i) {
+ sfn_log << SfnLog::merge << setw(4) << i;
+ register_live_ranges[i] = temp_acc[i].get_required_live_range();
+ sfn_log << SfnLog::merge << ": [" << register_live_ranges[i].begin << ", "
+ << register_live_ranges[i].end << "]\n";
+ }
+ sfn_log << SfnLog::merge << "==================================\n\n";
+}
+
+void LiverangeEvaluator::scope_if()
+{
+ cur_scope = scopes->create(cur_scope, if_branch, if_id++,
+ cur_scope->nesting_depth() + 1, line + 1);
+}
+
+void LiverangeEvaluator::scope_else()
+{
+ assert(cur_scope->type() == if_branch);
+ cur_scope->set_end(line - 1);
+ cur_scope = scopes->create(cur_scope->parent(), else_branch,
+ cur_scope->id(), cur_scope->nesting_depth(),
+ line + 1);
+}
+
+void LiverangeEvaluator::scope_endif()
+{
+ cur_scope->set_end(line - 1);
+ cur_scope = cur_scope->parent();
+ assert(cur_scope);
+}
+
+void LiverangeEvaluator::scope_loop_begin()
+{
+ cur_scope = scopes->create(cur_scope, loop_body, loop_id++,
+ cur_scope->nesting_depth() + 1, line);
+}
+
+void LiverangeEvaluator::scope_loop_end()
+{
+ assert(cur_scope->type() == loop_body);
+ cur_scope->set_end(line);
+ cur_scope = cur_scope->parent();
+ assert(cur_scope);
+}
+
+void LiverangeEvaluator::scope_loop_break()
+{
+ cur_scope->set_loop_break_line(line);
+}
+
+/* This functions evaluates the register merges by using a binary
+ * search to find suitable merge candidates. */
+
+std::vector<rename_reg_pair>
+get_temp_registers_remapping(const std::vector<register_live_range>& live_ranges)
+{
+
+ std::vector<rename_reg_pair> result(live_ranges.size(), rename_reg_pair{false, false, 0});
+ std::vector<register_merge_record> reg_access;
+
+ for (unsigned i = 0; i < live_ranges.size(); ++i) {
+ if (live_ranges[i].begin >= 0) {
+ register_merge_record r;
+ r.begin = live_ranges[i].begin;
+ r.end = live_ranges[i].end;
+ r.is_array_elm = live_ranges[i].is_array_elm;
+ r.reg = i;
+ r.erase = false;
+ reg_access.push_back(r);
+ }
+ }
+
+ std::sort(reg_access.begin(), reg_access.end());
+
+ for (auto& r : reg_access)
+ sfn_log << SfnLog::merge << "Use Range " <<r.reg << " ["
+ << r.begin << ", " << r.end << "]\n";
+
+ auto trgt = reg_access.begin();
+ auto reg_access_end = reg_access.end();
+ auto first_erase = reg_access_end;
+ auto search_start = trgt + 1;
+
+ while (trgt != reg_access_end) {
+ /* Find the next register that has a live-range starting past the
+ * search start and that is not an array element. Array elements can't
+ * be moved (Moving the whole array could be an option to be implemented later)*/
+
+ sfn_log << SfnLog::merge << "Next target is "
+ << trgt->reg << "[" << trgt->begin << ", " << trgt->end << "]\n";
+
+
+ auto src = upper_bound(search_start, reg_access_end, trgt->end,
+ [](int bound, const register_merge_record& m){
+ return bound < m.begin && !m.is_array_elm;}
+ );
+
+ if (src != reg_access_end) {
+ result[src->reg].new_reg = trgt->reg;
+ result[src->reg].valid = true;
+
+ sfn_log << SfnLog::merge << "Map "
+ << src->reg << "[" << src->begin << ", " << src->end << "] to "
+ << trgt->reg << "[" << trgt->begin << ", " << trgt->end << ":";
+ trgt->end = src->end;
+ sfn_log << SfnLog::merge << trgt->end << "]\n";
+
+ /* Since we only search forward, don't remove the renamed
+ * register just now, only mark it. */
+ src->erase = true;
+
+ if (first_erase == reg_access_end)
+ first_erase = src;
+
+ search_start = src + 1;
+ } else {
+ /* Moving to the next target register it is time to remove
+ * the already merged registers from the search range */
+ if (first_erase != reg_access_end) {
+ auto outp = first_erase;
+ auto inp = first_erase + 1;
+
+ while (inp != reg_access_end) {
+ if (!inp->erase)
+ *outp++ = *inp;
+ ++inp;
+ }
+
+ reg_access_end = outp;
+ first_erase = reg_access_end;
+ }
+ ++trgt;
+ search_start = trgt + 1;
+ }
+ }
+ return result;
+}
+
+} // end ns r600
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.h
new file mode 100644
index 000000000..8b9ed2ef2
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.h
@@ -0,0 +1,314 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_LIVERANGE_H
+#define SFN_LIVERANGE_H
+
+#include <cstdint>
+#include <ostream>
+#include <vector>
+#include <limits>
+
+#include "sfn_instruction_base.h"
+#include "sfn_nir.h"
+
+namespace r600 {
+
+/** Storage to record the required live range of a temporary register
+ * begin == end == -1 indicates that the register can be reused without
+ * limitations. Otherwise, "begin" indicates the first instruction in which
+ * a write operation may target this temporary, and end indicates the
+ * last instruction in which a value can be read from this temporary.
+ * Hence, a register R2 can be merged with a register R1 if R1.end <= R2.begin.
+ */
+struct register_live_range {
+ int begin;
+ int end;
+ bool is_array_elm;
+};
+
+enum prog_scope_type {
+ outer_scope, /* Outer program scope */
+ loop_body, /* Inside a loop */
+ if_branch, /* Inside if branch */
+ else_branch, /* Inside else branch */
+ switch_body, /* Inside switch statement */
+ switch_case_branch, /* Inside switch case statement */
+ switch_default_branch, /* Inside switch default statement */
+ undefined_scope
+};
+
+class prog_scope {
+public:
+ prog_scope();
+ prog_scope(prog_scope *parent, prog_scope_type type, int id,
+ int depth, int begin);
+
+ prog_scope_type type() const;
+ prog_scope *parent() const;
+ int nesting_depth() const;
+ int id() const;
+ int end() const;
+ int begin() const;
+ int loop_break_line() const;
+
+ const prog_scope *in_else_scope() const;
+ const prog_scope *in_ifelse_scope() const;
+ const prog_scope *in_parent_ifelse_scope() const;
+ const prog_scope *innermost_loop() const;
+ const prog_scope *outermost_loop() const;
+ const prog_scope *enclosing_conditional() const;
+
+ bool is_loop() const;
+ bool is_in_loop() const;
+ bool is_switchcase_scope_in_loop() const;
+ bool is_conditional() const;
+ bool is_child_of(const prog_scope *scope) const;
+ bool is_child_of_ifelse_id_sibling(const prog_scope *scope) const;
+
+ bool break_is_for_switchcase() const;
+ bool contains_range_of(const prog_scope& other) const;
+
+ void set_end(int end);
+ void set_loop_break_line(int line);
+
+private:
+ prog_scope_type scope_type;
+ int scope_id;
+ int scope_nesting_depth;
+ int scope_begin;
+ int scope_end;
+ int break_loop_line;
+ prog_scope *parent_scope;
+};
+
+/* Some storage class to encapsulate the prog_scope (de-)allocations */
+class prog_scope_storage {
+public:
+ prog_scope_storage(int n);
+ ~prog_scope_storage();
+ prog_scope * create(prog_scope *p, prog_scope_type type, int id,
+ int lvl, int s_begin);
+private:
+ int current_slot;
+ std::vector<prog_scope> storage;
+};
+
+/* Class to track the access to a component of a temporary register. */
+
+class temp_comp_access {
+public:
+ temp_comp_access();
+
+ void record_read(int line, prog_scope *scope);
+ void record_write(int line, prog_scope *scope);
+ register_live_range get_required_live_range();
+private:
+ void propagate_live_range_to_dominant_write_scope();
+ bool conditional_ifelse_write_in_loop() const;
+
+ void record_ifelse_write(const prog_scope& scope);
+ void record_if_write(const prog_scope& scope);
+ void record_else_write(const prog_scope& scope);
+
+ prog_scope *last_read_scope;
+ prog_scope *first_read_scope;
+ prog_scope *first_write_scope;
+
+ int first_write;
+ int last_read;
+ int last_write;
+ int first_read;
+
+ /* This member variable tracks the current resolution of conditional writing
+ * to this temporary in IF/ELSE clauses.
+ *
+ * The initial value "conditionality_untouched" indicates that this
+ * temporary has not yet been written to within an if clause.
+ *
+ * A positive (other than "conditionality_untouched") number refers to the
+ * last loop id for which the write was resolved as unconditional. With each
+ * new loop this value will be overwitten by "conditionality_unresolved"
+ * on entering the first IF clause writing this temporary.
+ *
+ * The value "conditionality_unresolved" indicates that no resolution has
+ * been achieved so far. If the variable is set to this value at the end of
+ * the processing of the whole shader it also indicates a conditional write.
+ *
+ * The value "write_is_conditional" marks that the variable is written
+ * conditionally (i.e. not in all relevant IF/ELSE code path pairs) in at
+ * least one loop.
+ */
+ int conditionality_in_loop_id;
+
+ /* Helper constants to make the tracking code more readable. */
+ static const int write_is_conditional = -1;
+ static const int conditionality_unresolved = 0;
+ static const int conditionality_untouched;
+ static const int write_is_unconditional;
+
+ /* A bit field tracking the nexting levels of if-else clauses where the
+ * temporary has (so far) been written to in the if branch, but not in the
+ * else branch.
+ */
+ unsigned int if_scope_write_flags;
+
+ int next_ifelse_nesting_depth;
+ static const int supported_ifelse_nesting_depth = 32;
+
+ /* Tracks the last if scope in which the temporary was written to
+ * without a write in the corresponding else branch. Is also used
+ * to track read-before-write in the according scope.
+ */
+ const prog_scope *current_unpaired_if_write_scope;
+
+ /* Flag to resolve read-before-write in the else scope. */
+ bool was_written_in_current_else_scope;
+};
+
+/* Class to track the access to all components of a temporary register. */
+class temp_access {
+public:
+ temp_access();
+ void record_read(int line, prog_scope *scope, int swizzle, bool is_array_elm);
+ void record_write(int line, prog_scope *scope, int writemask, bool is_array_elm);
+ register_live_range get_required_live_range();
+private:
+ void update_access_mask(int mask);
+
+ temp_comp_access comp[4];
+ int access_mask;
+ bool needs_component_tracking;
+ bool is_array_element;
+};
+
+/* Helper class to merge the live ranges of an arrays.
+ *
+ * For arrays the array length, live range, and component access needs to
+ * be kept, because when live ranges are merged or arrays are interleaved
+ * one can only merge or interleave an array into another with equal or more
+ * elements. For interleaving it is also required that the sum of used swizzles
+ * is at most four.
+ */
+
+class array_live_range {
+public:
+ array_live_range();
+ array_live_range(unsigned aid, unsigned alength);
+ array_live_range(unsigned aid, unsigned alength, int first_access,
+ int last_access, int mask);
+
+ void set_live_range(int first_access, int last_access);
+ void set_begin(int _begin){first_access = _begin;}
+ void set_end(int _end){last_access = _end;}
+ void set_access_mask(int s);
+
+ static void merge(array_live_range *a, array_live_range *b);
+ static void interleave(array_live_range *a, array_live_range *b);
+
+ int array_id() const {return id;}
+ int target_array_id() const {return target_array ? target_array->id : 0;}
+ const array_live_range *final_target() const {return target_array ?
+ target_array->final_target() : this;}
+ unsigned array_length() const { return length;}
+ int begin() const { return first_access;}
+ int end() const { return last_access;}
+ int access_mask() const { return component_access_mask;}
+ int used_components() const {return used_component_count;}
+
+ bool time_doesnt_overlap(const array_live_range& other) const;
+
+ void print(std::ostream& os) const;
+
+ bool is_mapped() const { return target_array != nullptr;}
+
+ int8_t remap_one_swizzle(int8_t idx) const;
+
+private:
+ void init_swizzles();
+ void set_target(array_live_range *target);
+ void merge_live_range_from(array_live_range *other);
+ void interleave_into(array_live_range *other);
+
+ unsigned id;
+ unsigned length;
+ int first_access;
+ int last_access;
+ uint8_t component_access_mask;
+ uint8_t used_component_count;
+ array_live_range *target_array;
+ int8_t swizzle_map[4];
+};
+
+
+
+class LiverangeEvaluator {
+public:
+ LiverangeEvaluator();
+
+ void run(const Shader& shader,
+ std::vector<register_live_range> &register_live_ranges);
+
+ void scope_if();
+ void scope_else();
+ void scope_endif();
+ void scope_loop_begin();
+ void scope_loop_end();
+ void scope_loop_break();
+
+ void record_read(const Value& src, bool is_array_elm = false);
+ void record_write(const Value& dst, bool is_array_elm = false);
+
+ void record_read(const GPRVector& src);
+ void record_write(const GPRVector& dst);
+
+private:
+
+ prog_scope *create_scope(prog_scope *parent, prog_scope_type type, int id,
+ int lvl, int s_begin);
+
+
+ void get_required_live_ranges(std::vector<register_live_range>& register_live_ranges);
+
+ int line;
+ int loop_id;
+ int if_id;
+ int switch_id;
+ bool is_at_end;
+ int n_scopes;
+ std::unique_ptr<prog_scope_storage> scopes;
+ prog_scope *cur_scope;
+
+ std::vector<temp_access> temp_acc;
+
+};
+
+std::vector<rename_reg_pair>
+get_temp_registers_remapping(const std::vector<register_live_range>& live_ranges);
+
+} // end namespace r600
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.cpp
new file mode 100644
index 000000000..b421f838c
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.cpp
@@ -0,0 +1,1076 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_nir.h"
+#include "nir_builder.h"
+
+#include "../r600_pipe.h"
+#include "../r600_shader.h"
+
+#include "sfn_instruction_tex.h"
+
+#include "sfn_shader_vertex.h"
+#include "sfn_shader_fragment.h"
+#include "sfn_shader_geometry.h"
+#include "sfn_shader_compute.h"
+#include "sfn_shader_tcs.h"
+#include "sfn_shader_tess_eval.h"
+#include "sfn_nir_lower_fs_out_to_vector.h"
+#include "sfn_ir_to_assembly.h"
+
+#include <vector>
+
+namespace r600 {
+
+using std::vector;
+
+
+NirLowerInstruction::NirLowerInstruction():
+ b(nullptr)
+{
+
+}
+
+bool NirLowerInstruction::filter_instr(const nir_instr *instr, const void *data)
+{
+ auto me = reinterpret_cast<const NirLowerInstruction*>(data);
+ return me->filter(instr);
+}
+
+nir_ssa_def *NirLowerInstruction::lower_instr(nir_builder *b, nir_instr *instr, void *data)
+{
+ auto me = reinterpret_cast<NirLowerInstruction*>(data);
+ me->set_builder(b);
+ return me->lower(instr);
+}
+
+bool NirLowerInstruction::run(nir_shader *shader)
+{
+ return nir_shader_lower_instructions(shader,
+ filter_instr,
+ lower_instr,
+ (void *)this);
+}
+
+
+ShaderFromNir::ShaderFromNir():sh(nullptr),
+ chip_class(CLASS_UNKNOWN),
+ m_current_if_id(0),
+ m_current_loop_id(0),
+ scratch_size(0)
+{
+}
+
+bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shader,
+ r600_pipe_shader_selector *sel, r600_shader_key& key,
+ struct r600_shader* gs_shader, enum chip_class _chip_class)
+{
+ sh = shader;
+ chip_class = _chip_class;
+ assert(sh);
+
+ switch (shader->info.stage) {
+ case MESA_SHADER_VERTEX:
+ impl.reset(new VertexShaderFromNir(pipe_shader, *sel, key, gs_shader, chip_class));
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ sfn_log << SfnLog::trans << "Start TCS\n";
+ impl.reset(new TcsShaderFromNir(pipe_shader, *sel, key, chip_class));
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ sfn_log << SfnLog::trans << "Start TESS_EVAL\n";
+ impl.reset(new TEvalShaderFromNir(pipe_shader, *sel, key, gs_shader, chip_class));
+ break;
+ case MESA_SHADER_GEOMETRY:
+ sfn_log << SfnLog::trans << "Start GS\n";
+ impl.reset(new GeometryShaderFromNir(pipe_shader, *sel, key, chip_class));
+ break;
+ case MESA_SHADER_FRAGMENT:
+ sfn_log << SfnLog::trans << "Start FS\n";
+ impl.reset(new FragmentShaderFromNir(*shader, pipe_shader->shader, *sel, key, chip_class));
+ break;
+ case MESA_SHADER_COMPUTE:
+ sfn_log << SfnLog::trans << "Start CS\n";
+ impl.reset(new ComputeShaderFromNir(pipe_shader, *sel, key, chip_class));
+ break;
+ default:
+ return false;
+ }
+
+ sfn_log << SfnLog::trans << "Process declarations\n";
+ if (!process_declaration())
+ return false;
+
+ // at this point all functions should be inlined
+ const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sh->functions));
+
+ sfn_log << SfnLog::trans << "Scan shader\n";
+
+ if (sfn_log.has_debug_flag(SfnLog::instr))
+ nir_print_shader(const_cast<nir_shader *>(shader), stderr);
+
+ nir_foreach_block(block, func->impl) {
+ nir_foreach_instr(instr, block) {
+ if (!impl->scan_instruction(instr)) {
+ fprintf(stderr, "Unhandled sysvalue access ");
+ nir_print_instr(instr, stderr);
+ fprintf(stderr, "\n");
+ return false;
+ }
+ }
+ }
+
+ sfn_log << SfnLog::trans << "Reserve registers\n";
+ if (!impl->allocate_reserved_registers()) {
+ return false;
+ }
+
+ ValuePool::array_list arrays;
+ sfn_log << SfnLog::trans << "Allocate local registers\n";
+ foreach_list_typed(nir_register, reg, node, &func->impl->registers) {
+ impl->allocate_local_register(*reg, arrays);
+ }
+
+ sfn_log << SfnLog::trans << "Emit shader start\n";
+ impl->allocate_arrays(arrays);
+
+ impl->emit_shader_start();
+
+ sfn_log << SfnLog::trans << "Process shader \n";
+ foreach_list_typed(nir_cf_node, node, node, &func->impl->body) {
+ if (!process_cf_node(node))
+ return false;
+ }
+
+ // Add optimizations here
+ sfn_log << SfnLog::trans << "Finalize\n";
+ impl->finalize();
+
+ impl->get_array_info(pipe_shader->shader);
+
+ if (!sfn_log.has_debug_flag(SfnLog::nomerge)) {
+ sfn_log << SfnLog::trans << "Merge registers\n";
+ impl->remap_registers();
+ }
+
+ sfn_log << SfnLog::trans << "Finished translating to R600 IR\n";
+ return true;
+}
+
+Shader ShaderFromNir::shader() const
+{
+ return Shader{impl->m_output, impl->get_temp_registers()};
+}
+
+
+bool ShaderFromNir::process_cf_node(nir_cf_node *node)
+{
+ SFN_TRACE_FUNC(SfnLog::flow, "CF");
+ switch (node->type) {
+ case nir_cf_node_block:
+ return process_block(nir_cf_node_as_block(node));
+ case nir_cf_node_if:
+ return process_if(nir_cf_node_as_if(node));
+ case nir_cf_node_loop:
+ return process_loop(nir_cf_node_as_loop(node));
+ default:
+ return false;
+ }
+}
+
+bool ShaderFromNir::process_if(nir_if *if_stmt)
+{
+ SFN_TRACE_FUNC(SfnLog::flow, "IF");
+
+ if (!impl->emit_if_start(m_current_if_id, if_stmt))
+ return false;
+
+ int if_id = m_current_if_id++;
+ m_if_stack.push(if_id);
+
+ foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list)
+ if (!process_cf_node(n)) return false;
+
+ if (!if_stmt->then_list.is_empty()) {
+ if (!impl->emit_else_start(if_id))
+ return false;
+
+ foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list)
+ if (!process_cf_node(n)) return false;
+ }
+
+ if (!impl->emit_ifelse_end(if_id))
+ return false;
+
+ m_if_stack.pop();
+ return true;
+}
+
+bool ShaderFromNir::process_loop(nir_loop *node)
+{
+ SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
+ int loop_id = m_current_loop_id++;
+
+ if (!impl->emit_loop_start(loop_id))
+ return false;
+
+ foreach_list_typed(nir_cf_node, n, node, &node->body)
+ if (!process_cf_node(n)) return false;
+
+ if (!impl->emit_loop_end(loop_id))
+ return false;
+
+ return true;
+}
+
+bool ShaderFromNir::process_block(nir_block *block)
+{
+ SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
+ nir_foreach_instr(instr, block) {
+ int r = emit_instruction(instr);
+ if (!r) {
+ sfn_log << SfnLog::err << "R600: Unsupported instruction: "
+ << *instr << "\n";
+ return false;
+ }
+ }
+ return true;
+}
+
+
+ShaderFromNir::~ShaderFromNir()
+{
+}
+
+pipe_shader_type ShaderFromNir::processor_type() const
+{
+ return impl->m_processor_type;
+}
+
+
+bool ShaderFromNir::emit_instruction(nir_instr *instr)
+{
+ assert(impl);
+
+ sfn_log << SfnLog::instr << "Read instruction " << *instr << "\n";
+
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ return impl->emit_alu_instruction(instr);
+ case nir_instr_type_deref:
+ return impl->emit_deref_instruction(nir_instr_as_deref(instr));
+ case nir_instr_type_intrinsic:
+ return impl->emit_intrinsic_instruction(nir_instr_as_intrinsic(instr));
+ case nir_instr_type_load_const: /* const values are loaded when needed */
+ return true;
+ case nir_instr_type_tex:
+ return impl->emit_tex_instruction(instr);
+ case nir_instr_type_jump:
+ return impl->emit_jump_instruction(nir_instr_as_jump(instr));
+ default:
+ fprintf(stderr, "R600: %s: ShaderFromNir Unsupported instruction: type %d:'", __func__, instr->type);
+ nir_print_instr(instr, stderr);
+ fprintf(stderr, "'\n");
+ return false;
+ case nir_instr_type_ssa_undef:
+ return impl->create_undef(nir_instr_as_ssa_undef(instr));
+ return true;
+ }
+}
+
+bool ShaderFromNir::process_declaration()
+{
+
+ if (!impl->scan_inputs_read(sh))
+ return false;
+
+ // scan declarations
+ nir_foreach_variable_with_modes(variable, sh, nir_var_uniform |
+ nir_var_mem_ubo |
+ nir_var_mem_ssbo) {
+ if (!impl->process_uniforms(variable)) {
+ fprintf(stderr, "R600: error parsing outputs variable %s\n", variable->name);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+const std::vector<InstructionBlock>& ShaderFromNir::shader_ir() const
+{
+ assert(impl);
+ return impl->m_output;
+}
+
+
+AssemblyFromShader::~AssemblyFromShader()
+{
+}
+
+bool AssemblyFromShader::lower(const std::vector<InstructionBlock>& ir)
+{
+ return do_lower(ir);
+}
+
+static nir_ssa_def *
+r600_nir_lower_pack_unpack_2x16_impl(nir_builder *b, nir_instr *instr, void *_options)
+{
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+ switch (alu->op) {
+ case nir_op_unpack_half_2x16: {
+ nir_ssa_def *packed = nir_ssa_for_alu_src(b, alu, 0);
+ return nir_vec2(b, nir_unpack_half_2x16_split_x(b, packed),
+ nir_unpack_half_2x16_split_y(b, packed));
+
+ }
+ case nir_op_pack_half_2x16: {
+ nir_ssa_def *src_vec2 = nir_ssa_for_alu_src(b, alu, 0);
+ return nir_pack_half_2x16_split(b, nir_channel(b, src_vec2, 0),
+ nir_channel(b, src_vec2, 1));
+ }
+ default:
+ return nullptr;
+ }
+}
+
+bool r600_nir_lower_pack_unpack_2x16_filter(const nir_instr *instr, const void *_options)
+{
+ return instr->type == nir_instr_type_alu;
+}
+
+bool r600_nir_lower_pack_unpack_2x16(nir_shader *shader)
+{
+ return nir_shader_lower_instructions(shader,
+ r600_nir_lower_pack_unpack_2x16_filter,
+ r600_nir_lower_pack_unpack_2x16_impl,
+ nullptr);
+};
+
+static void
+r600_nir_lower_scratch_address_impl(nir_builder *b, nir_intrinsic_instr *instr)
+{
+ b->cursor = nir_before_instr(&instr->instr);
+
+ int address_index = 0;
+ int align;
+
+ if (instr->intrinsic == nir_intrinsic_store_scratch) {
+ align = instr->src[0].ssa->num_components;
+ address_index = 1;
+ } else{
+ align = instr->dest.ssa.num_components;
+ }
+
+ nir_ssa_def *address = instr->src[address_index].ssa;
+ nir_ssa_def *new_address = nir_ishr(b, address, nir_imm_int(b, 4 * align));
+
+ nir_instr_rewrite_src(&instr->instr, &instr->src[address_index],
+ nir_src_for_ssa(new_address));
+}
+
+bool r600_lower_scratch_addresses(nir_shader *shader)
+{
+ bool progress = false;
+ nir_foreach_function(function, shader) {
+ nir_builder build;
+ nir_builder_init(&build, function->impl);
+
+ nir_foreach_block(block, function->impl) {
+ nir_foreach_instr(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+ nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
+ if (op->intrinsic != nir_intrinsic_load_scratch &&
+ op->intrinsic != nir_intrinsic_store_scratch)
+ continue;
+ r600_nir_lower_scratch_address_impl(&build, op);
+ progress = true;
+ }
+ }
+ }
+ return progress;
+}
+
+static void
+insert_uniform_sorted(struct exec_list *var_list, nir_variable *new_var)
+{
+ nir_foreach_variable_in_list(var, var_list) {
+ if (var->data.binding > new_var->data.binding ||
+ (var->data.binding == new_var->data.binding &&
+ var->data.offset > new_var->data.offset)) {
+ exec_node_insert_node_before(&var->node, &new_var->node);
+ return;
+ }
+ }
+ exec_list_push_tail(var_list, &new_var->node);
+}
+
+void sort_uniforms(nir_shader *shader)
+{
+ struct exec_list new_list;
+ exec_list_make_empty(&new_list);
+
+ nir_foreach_uniform_variable_safe(var, shader) {
+ exec_node_remove(&var->node);
+ insert_uniform_sorted(&new_list, var);
+ }
+ exec_list_append(&shader->variables, &new_list);
+}
+
+static void
+insert_fsoutput_sorted(struct exec_list *var_list, nir_variable *new_var)
+{
+
+ nir_foreach_variable_in_list(var, var_list) {
+ if (var->data.location > new_var->data.location ||
+ (var->data.location == new_var->data.location &&
+ var->data.index > new_var->data.index)) {
+ exec_node_insert_node_before(&var->node, &new_var->node);
+ return;
+ }
+ }
+
+ exec_list_push_tail(var_list, &new_var->node);
+}
+
+void sort_fsoutput(nir_shader *shader)
+{
+ struct exec_list new_list;
+ exec_list_make_empty(&new_list);
+
+ nir_foreach_shader_out_variable_safe(var, shader) {
+ exec_node_remove(&var->node);
+ insert_fsoutput_sorted(&new_list, var);
+ }
+
+ unsigned driver_location = 0;
+ nir_foreach_variable_in_list(var, &new_list)
+ var->data.driver_location = driver_location++;
+
+ exec_list_append(&shader->variables, &new_list);
+}
+
+}
+
+static nir_intrinsic_op
+r600_map_atomic(nir_intrinsic_op op)
+{
+ switch (op) {
+ case nir_intrinsic_atomic_counter_read_deref:
+ return nir_intrinsic_atomic_counter_read;
+ case nir_intrinsic_atomic_counter_inc_deref:
+ return nir_intrinsic_atomic_counter_inc;
+ case nir_intrinsic_atomic_counter_pre_dec_deref:
+ return nir_intrinsic_atomic_counter_pre_dec;
+ case nir_intrinsic_atomic_counter_post_dec_deref:
+ return nir_intrinsic_atomic_counter_post_dec;
+ case nir_intrinsic_atomic_counter_add_deref:
+ return nir_intrinsic_atomic_counter_add;
+ case nir_intrinsic_atomic_counter_min_deref:
+ return nir_intrinsic_atomic_counter_min;
+ case nir_intrinsic_atomic_counter_max_deref:
+ return nir_intrinsic_atomic_counter_max;
+ case nir_intrinsic_atomic_counter_and_deref:
+ return nir_intrinsic_atomic_counter_and;
+ case nir_intrinsic_atomic_counter_or_deref:
+ return nir_intrinsic_atomic_counter_or;
+ case nir_intrinsic_atomic_counter_xor_deref:
+ return nir_intrinsic_atomic_counter_xor;
+ case nir_intrinsic_atomic_counter_exchange_deref:
+ return nir_intrinsic_atomic_counter_exchange;
+ case nir_intrinsic_atomic_counter_comp_swap_deref:
+ return nir_intrinsic_atomic_counter_comp_swap;
+ default:
+ return nir_num_intrinsics;
+ }
+}
+
+static bool
+r600_lower_deref_instr(nir_builder *b, nir_intrinsic_instr *instr,
+ nir_shader *shader)
+{
+ nir_intrinsic_op op = r600_map_atomic(instr->intrinsic);
+ if (nir_num_intrinsics == op)
+ return false;
+
+ nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+
+ if (var->data.mode != nir_var_uniform &&
+ var->data.mode != nir_var_mem_ssbo &&
+ var->data.mode != nir_var_mem_shared)
+ return false; /* atomics passed as function arguments can't be lowered */
+
+ const unsigned idx = var->data.binding;
+
+ b->cursor = nir_before_instr(&instr->instr);
+
+ nir_ssa_def *offset = nir_imm_int(b, var->data.index);
+ for (nir_deref_instr *d = deref; d->deref_type != nir_deref_type_var;
+ d = nir_deref_instr_parent(d)) {
+ assert(d->deref_type == nir_deref_type_array);
+ assert(d->arr.index.is_ssa);
+
+ unsigned array_stride = 1;
+ if (glsl_type_is_array(d->type))
+ array_stride *= glsl_get_aoa_size(d->type);
+
+ offset = nir_iadd(b, offset, nir_imul(b, d->arr.index.ssa,
+ nir_imm_int(b, array_stride)));
+ }
+
+ /* Since the first source is a deref and the first source in the lowered
+ * instruction is the offset, we can just swap it out and change the
+ * opcode.
+ */
+ instr->intrinsic = op;
+ nir_instr_rewrite_src(&instr->instr, &instr->src[0],
+ nir_src_for_ssa(offset));
+ nir_intrinsic_set_base(instr, idx);
+
+ nir_deref_instr_remove_if_unused(deref);
+
+ return true;
+}
+
+static bool
+r600_nir_lower_atomics(nir_shader *shader)
+{
+ bool progress = false;
+
+ /* First re-do the offsets, in Hardware we start at zero for each new
+ * binding, and we use an offset of one per counter */
+ int current_binding = -1;
+ int current_offset = 0;
+ nir_foreach_variable_with_modes(var, shader, nir_var_uniform) {
+ if (!var->type->contains_atomic())
+ continue;
+
+ if (current_binding == (int)var->data.binding) {
+ var->data.index = current_offset;
+ current_offset += var->type->atomic_size() / ATOMIC_COUNTER_SIZE;
+ } else {
+ current_binding = var->data.binding;
+ var->data.index = 0;
+ current_offset = var->type->atomic_size() / ATOMIC_COUNTER_SIZE;
+ }
+ }
+
+ nir_foreach_function(function, shader) {
+ if (!function->impl)
+ continue;
+
+ bool impl_progress = false;
+
+ nir_builder build;
+ nir_builder_init(&build, function->impl);
+
+ nir_foreach_block(block, function->impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ impl_progress |= r600_lower_deref_instr(&build,
+ nir_instr_as_intrinsic(instr), shader);
+ }
+ }
+
+ if (impl_progress) {
+ nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance);
+ progress = true;
+ }
+ }
+
+ return progress;
+}
+using r600::r600_nir_lower_int_tg4;
+using r600::r600_nir_lower_pack_unpack_2x16;
+using r600::r600_lower_scratch_addresses;
+using r600::r600_lower_fs_out_to_vector;
+using r600::r600_lower_ubo_to_align16;
+
+int
+r600_glsl_type_size(const struct glsl_type *type, bool is_bindless)
+{
+ return glsl_count_vec4_slots(type, false, is_bindless);
+}
+
+void
+r600_get_natural_size_align_bytes(const struct glsl_type *type,
+ unsigned *size, unsigned *align)
+{
+ if (type->base_type != GLSL_TYPE_ARRAY) {
+ *align = 1;
+ *size = 1;
+ } else {
+ unsigned elem_size, elem_align;
+ glsl_get_natural_size_align_bytes(type->fields.array,
+ &elem_size, &elem_align);
+ *align = 1;
+ *size = type->length;
+ }
+}
+
+static bool
+r600_lower_shared_io_impl(nir_function *func)
+{
+ nir_builder b;
+ nir_builder_init(&b, func->impl);
+
+ bool progress = false;
+ nir_foreach_block(block, func->impl) {
+ nir_foreach_instr_safe(instr, block) {
+
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
+ if (op->intrinsic != nir_intrinsic_load_shared &&
+ op->intrinsic != nir_intrinsic_store_shared)
+ continue;
+
+ b.cursor = nir_before_instr(instr);
+
+ if (op->intrinsic == nir_intrinsic_load_shared) {
+ nir_ssa_def *addr = op->src[0].ssa;
+
+ switch (nir_dest_num_components(op->dest)) {
+ case 2: {
+ auto addr2 = nir_iadd_imm(&b, addr, 4);
+ addr = nir_vec2(&b, addr, addr2);
+ break;
+ }
+ case 3: {
+ auto addr2 = nir_iadd(&b, addr, nir_imm_ivec2(&b, 4, 8));
+ addr = nir_vec3(&b, addr,
+ nir_channel(&b, addr2, 0),
+ nir_channel(&b, addr2, 1));
+ break;
+ }
+ case 4: {
+ addr = nir_iadd(&b, addr, nir_imm_ivec4(&b, 0, 4, 8, 12));
+ break;
+ }
+ }
+
+ auto load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_local_shared_r600);
+ load->num_components = nir_dest_num_components(op->dest);
+ load->src[0] = nir_src_for_ssa(addr);
+ nir_ssa_dest_init(&load->instr, &load->dest,
+ load->num_components, 32, NULL);
+ nir_ssa_def_rewrite_uses(&op->dest.ssa, &load->dest.ssa);
+ nir_builder_instr_insert(&b, &load->instr);
+ } else {
+ nir_ssa_def *addr = op->src[1].ssa;
+ for (int i = 0; i < 2; ++i) {
+ unsigned test_mask = (0x3 << 2 * i);
+ if (!(nir_intrinsic_write_mask(op) & test_mask))
+ continue;
+
+ auto store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_local_shared_r600);
+ unsigned writemask = nir_intrinsic_write_mask(op) & test_mask;
+ nir_intrinsic_set_write_mask(store, writemask);
+ store->src[0] = nir_src_for_ssa(op->src[0].ssa);
+ store->num_components = store->src[0].ssa->num_components;
+ bool start_even = (writemask & (1u << (2 * i)));
+
+ auto addr2 = nir_iadd(&b, addr, nir_imm_int(&b, 8 * i + (start_even ? 0 : 4)));
+ store->src[1] = nir_src_for_ssa(addr2);
+
+ nir_builder_instr_insert(&b, &store->instr);
+ }
+ }
+ nir_instr_remove(instr);
+ progress = true;
+ }
+ }
+ return progress;
+}
+
+static bool
+r600_lower_shared_io(nir_shader *nir)
+{
+ bool progress=false;
+ nir_foreach_function(function, nir) {
+ if (function->impl &&
+ r600_lower_shared_io_impl(function))
+ progress = true;
+ }
+ return progress;
+}
+
+
+static nir_ssa_def *
+r600_lower_fs_pos_input_impl(nir_builder *b, nir_instr *instr, void *_options)
+{
+ auto old_ir = nir_instr_as_intrinsic(instr);
+ auto load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
+ nir_ssa_dest_init(&load->instr, &load->dest,
+ old_ir->dest.ssa.num_components, old_ir->dest.ssa.bit_size, NULL);
+ nir_intrinsic_set_io_semantics(load, nir_intrinsic_io_semantics(old_ir));
+
+ nir_intrinsic_set_base(load, nir_intrinsic_base(old_ir));
+ nir_intrinsic_set_component(load, nir_intrinsic_component(old_ir));
+ nir_intrinsic_set_dest_type(load, nir_type_float32);
+ load->num_components = old_ir->num_components;
+ load->src[0] = old_ir->src[1];
+ nir_builder_instr_insert(b, &load->instr);
+ return &load->dest.ssa;
+}
+
+bool r600_lower_fs_pos_input_filter(const nir_instr *instr, const void *_options)
+{
+ if (instr->type != nir_instr_type_intrinsic)
+ return false;
+
+ auto ir = nir_instr_as_intrinsic(instr);
+ if (ir->intrinsic != nir_intrinsic_load_interpolated_input)
+ return false;
+
+ return nir_intrinsic_io_semantics(ir).location == VARYING_SLOT_POS;
+}
+
+/* Strip the interpolator specification, it is not needed and irritates */
+bool r600_lower_fs_pos_input(nir_shader *shader)
+{
+ return nir_shader_lower_instructions(shader,
+ r600_lower_fs_pos_input_filter,
+ r600_lower_fs_pos_input_impl,
+ nullptr);
+};
+
+static bool
+optimize_once(nir_shader *shader, bool vectorize)
+{
+ bool progress = false;
+ NIR_PASS(progress, shader, nir_lower_vars_to_ssa);
+ NIR_PASS(progress, shader, nir_copy_prop);
+ NIR_PASS(progress, shader, nir_opt_dce);
+ NIR_PASS(progress, shader, nir_opt_algebraic);
+ NIR_PASS(progress, shader, nir_opt_constant_folding);
+ NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
+ if (vectorize)
+ NIR_PASS(progress, shader, nir_opt_vectorize, NULL, NULL);
+
+ NIR_PASS(progress, shader, nir_opt_remove_phis);
+
+ if (nir_opt_trivial_continues(shader)) {
+ progress = true;
+ NIR_PASS(progress, shader, nir_copy_prop);
+ NIR_PASS(progress, shader, nir_opt_dce);
+ }
+
+ NIR_PASS(progress, shader, nir_opt_if, false);
+ NIR_PASS(progress, shader, nir_opt_dead_cf);
+ NIR_PASS(progress, shader, nir_opt_cse);
+ NIR_PASS(progress, shader, nir_opt_peephole_select, 200, true, true);
+
+ NIR_PASS(progress, shader, nir_opt_conditional_discard);
+ NIR_PASS(progress, shader, nir_opt_dce);
+ NIR_PASS(progress, shader, nir_opt_undef);
+ return progress;
+}
+
+bool has_saturate(const nir_function *func)
+{
+ nir_foreach_block(block, func->impl) {
+ nir_foreach_instr(instr, block) {
+ if (instr->type == nir_instr_type_alu) {
+ auto alu = nir_instr_as_alu(instr);
+ if (alu->dest.saturate)
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *)
+{
+ if (instr->type != nir_instr_type_alu)
+ return true;
+
+ auto alu = nir_instr_as_alu(instr);
+ switch (alu->op) {
+ case nir_op_bany_fnequal3:
+ case nir_op_bany_fnequal4:
+ case nir_op_ball_fequal3:
+ case nir_op_ball_fequal4:
+ case nir_op_bany_inequal3:
+ case nir_op_bany_inequal4:
+ case nir_op_ball_iequal3:
+ case nir_op_ball_iequal4:
+ case nir_op_fdot2:
+ case nir_op_fdot3:
+ case nir_op_fdot4:
+ case nir_op_cube_r600:
+ return false;
+ case nir_op_bany_fnequal2:
+ case nir_op_ball_fequal2:
+ case nir_op_bany_inequal2:
+ case nir_op_ball_iequal2:
+ return nir_src_bit_size(alu->src[0].src) != 64;
+ default:
+ return true;
+ }
+}
+
+int r600_shader_from_nir(struct r600_context *rctx,
+ struct r600_pipe_shader *pipeshader,
+ r600_shader_key *key)
+{
+ char filename[4000];
+ struct r600_pipe_shader_selector *sel = pipeshader->selector;
+
+ bool lower_64bit = ((sel->nir->options->lower_int64_options ||
+ sel->nir->options->lower_doubles_options) &&
+ (sel->nir->info.bit_sizes_float | sel->nir->info.bit_sizes_int) & 64);
+
+ r600::ShaderFromNir convert;
+
+ if (rctx->screen->b.debug_flags & DBG_PREOPT_IR) {
+ fprintf(stderr, "PRE-OPT-NIR-----------.------------------------------\n");
+ nir_print_shader(sel->nir, stderr);
+ fprintf(stderr, "END PRE-OPT-NIR--------------------------------------\n\n");
+ }
+
+ r600::sort_uniforms(sel->nir);
+
+ NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
+ NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
+ nir_lower_idiv_options idiv_options = {
+ .imprecise_32bit_lowering = sel->nir->info.stage != MESA_SHADER_COMPUTE,
+ .allow_fp16 = true,
+ };
+ NIR_PASS_V(sel->nir, nir_lower_idiv, &idiv_options);
+ NIR_PASS_V(sel->nir, r600_lower_alu);
+ NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
+
+ if (lower_64bit)
+ NIR_PASS_V(sel->nir, nir_lower_int64);
+ while(optimize_once(sel->nir, false));
+
+ NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
+
+ NIR_PASS_V(sel->nir, r600_lower_shared_io);
+ NIR_PASS_V(sel->nir, r600_nir_lower_atomics);
+
+ static const struct nir_lower_tex_options lower_tex_options = {
+ .lower_txp = ~0u,
+ };
+ NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
+ NIR_PASS_V(sel->nir, r600::r600_nir_lower_txl_txf_array_or_cube);
+ NIR_PASS_V(sel->nir, r600::r600_nir_lower_cube_to_2darray);
+
+ NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16);
+
+ if (sel->nir->info.stage == MESA_SHADER_VERTEX)
+ NIR_PASS_V(sel->nir, r600_vectorize_vs_inputs);
+
+ if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
+ NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector);
+ }
+
+ nir_variable_mode io_modes = nir_var_uniform | nir_var_shader_in;
+
+ //if (sel->nir->info.stage != MESA_SHADER_FRAGMENT)
+ io_modes |= nir_var_shader_out;
+
+ if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
+
+ /* Lower IO to temporaries late, because otherwise we get into trouble
+ * with the glsl 4.40 interpolateAt swizzle tests. There seems to be a bug
+ * somewhere that results in the input alweas reading from the same temp
+ * regardless of interpolation when the lowering is done early */
+ NIR_PASS_V(sel->nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(sel->nir),
+ true, true);
+
+ /* Since we're doing nir_lower_io_to_temporaries late, we need
+ * to lower all the copy_deref's introduced by
+ * lower_io_to_temporaries before calling nir_lower_io.
+ */
+ NIR_PASS_V(sel->nir, nir_split_var_copies);
+ NIR_PASS_V(sel->nir, nir_lower_var_copies);
+ NIR_PASS_V(sel->nir, nir_lower_global_vars_to_local);
+ }
+
+ NIR_PASS_V(sel->nir, nir_lower_io, io_modes, r600_glsl_type_size,
+ nir_lower_io_lower_64bit_to_32);
+
+ if (sel->nir->info.stage == MESA_SHADER_FRAGMENT)
+ NIR_PASS_V(sel->nir, r600_lower_fs_pos_input);
+
+ /**/
+ if (lower_64bit)
+ NIR_PASS_V(sel->nir, nir_lower_indirect_derefs, nir_var_function_temp, 10);
+
+ NIR_PASS_V(sel->nir, nir_opt_constant_folding);
+ NIR_PASS_V(sel->nir, nir_io_add_const_offset_to_base, io_modes);
+
+ NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
+ NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
+ if (lower_64bit)
+ NIR_PASS_V(sel->nir, r600::r600_nir_split_64bit_io);
+ NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
+ NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
+ NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
+ NIR_PASS_V(sel->nir, nir_copy_prop);
+ NIR_PASS_V(sel->nir, nir_opt_dce);
+
+ auto sh = nir_shader_clone(sel->nir, sel->nir);
+
+ if (sh->info.stage == MESA_SHADER_TESS_CTRL ||
+ sh->info.stage == MESA_SHADER_TESS_EVAL ||
+ (sh->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) {
+ auto prim_type = sh->info.stage == MESA_SHADER_TESS_EVAL ?
+ sh->info.tess.primitive_mode: key->tcs.prim_mode;
+ NIR_PASS_V(sh, r600_lower_tess_io, static_cast<pipe_prim_type>(prim_type));
+ }
+
+ if (sh->info.stage == MESA_SHADER_TESS_CTRL)
+ NIR_PASS_V(sh, r600_append_tcs_TF_emission,
+ (pipe_prim_type)key->tcs.prim_mode);
+
+ if (sh->info.stage == MESA_SHADER_TESS_EVAL)
+ NIR_PASS_V(sh, r600_lower_tess_coord,
+ static_cast<pipe_prim_type>(sh->info.tess.primitive_mode));
+
+ NIR_PASS_V(sh, nir_lower_ubo_vec4);
+ if (lower_64bit)
+ NIR_PASS_V(sh, r600::r600_nir_64_to_vec2);
+
+ /* Lower to scalar to let some optimization work out better */
+ while(optimize_once(sh, false));
+
+ NIR_PASS_V(sh, r600::r600_merge_vec2_stores);
+
+ NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_in, NULL);
+ NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_out, NULL);
+
+
+ NIR_PASS_V(sh, nir_lower_vars_to_scratch,
+ nir_var_function_temp,
+ 40,
+ r600_get_natural_size_align_bytes);
+
+ while (optimize_once(sh, true));
+
+ NIR_PASS_V(sh, nir_lower_bool_to_int32);
+ NIR_PASS_V(sh, r600_nir_lower_int_tg4);
+ NIR_PASS_V(sh, nir_opt_algebraic_late);
+
+ if (sh->info.stage == MESA_SHADER_FRAGMENT)
+ r600::sort_fsoutput(sh);
+
+ NIR_PASS_V(sh, nir_lower_locals_to_regs);
+
+ //NIR_PASS_V(sh, nir_opt_algebraic);
+ //NIR_PASS_V(sh, nir_copy_prop);
+ NIR_PASS_V(sh, nir_lower_to_source_mods,
+ (nir_lower_to_source_mods_flags)(nir_lower_float_source_mods |
+ nir_lower_64bit_source_mods));
+ NIR_PASS_V(sh, nir_convert_from_ssa, true);
+ NIR_PASS_V(sh, nir_opt_dce);
+
+ if ((rctx->screen->b.debug_flags & DBG_NIR_PREFERRED) &&
+ (rctx->screen->b.debug_flags & DBG_ALL_SHADERS)) {
+ fprintf(stderr, "-- NIR --------------------------------------------------------\n");
+ struct nir_function *func = (struct nir_function *)exec_list_get_head(&sh->functions);
+ nir_index_ssa_defs(func->impl);
+ nir_print_shader(sh, stderr);
+ fprintf(stderr, "-- END --------------------------------------------------------\n");
+ }
+
+ memset(&pipeshader->shader, 0, sizeof(r600_shader));
+ pipeshader->scratch_space_needed = sh->scratch_size;
+
+ if (sh->info.stage == MESA_SHADER_TESS_EVAL ||
+ sh->info.stage == MESA_SHADER_VERTEX ||
+ sh->info.stage == MESA_SHADER_GEOMETRY) {
+ pipeshader->shader.clip_dist_write |= ((1 << sh->info.clip_distance_array_size) - 1);
+ pipeshader->shader.cull_dist_write = ((1 << sh->info.cull_distance_array_size) - 1)
+ << sh->info.clip_distance_array_size;
+ pipeshader->shader.cc_dist_mask = (1 << (sh->info.cull_distance_array_size +
+ sh->info.clip_distance_array_size)) - 1;
+ }
+
+ struct r600_shader* gs_shader = nullptr;
+ if (rctx->gs_shader)
+ gs_shader = &rctx->gs_shader->current->shader;
+ r600_screen *rscreen = rctx->screen;
+
+ bool r = convert.lower(sh, pipeshader, sel, *key, gs_shader, rscreen->b.chip_class);
+ if (!r || rctx->screen->b.debug_flags & DBG_ALL_SHADERS) {
+ static int shnr = 0;
+
+ snprintf(filename, 4000, "nir-%s_%d.inc", sh->info.name, shnr++);
+
+ if (access(filename, F_OK) == -1) {
+ FILE *f = fopen(filename, "w");
+
+ if (f) {
+ fprintf(f, "const char *shader_blob_%s = {\nR\"(", sh->info.name);
+ nir_print_shader(sh, f);
+ fprintf(f, ")\";\n");
+ fclose(f);
+ }
+ }
+ if (!r)
+ return -2;
+ }
+
+ auto shader = convert.shader();
+
+ r600_bytecode_init(&pipeshader->shader.bc, rscreen->b.chip_class, rscreen->b.family,
+ rscreen->has_compressed_msaa_texturing);
+
+ r600::sfn_log << r600::SfnLog::shader_info
+ << "pipeshader->shader.processor_type = "
+ << pipeshader->shader.processor_type << "\n";
+
+ pipeshader->shader.bc.type = pipeshader->shader.processor_type;
+ pipeshader->shader.bc.isa = rctx->isa;
+
+ r600::AssemblyFromShaderLegacy afs(&pipeshader->shader, key);
+ if (!afs.lower(shader.m_ir)) {
+ R600_ERR("%s: Lowering to assembly failed\n", __func__);
+ return -1;
+ }
+
+ if (sh->info.stage == MESA_SHADER_GEOMETRY) {
+ r600::sfn_log << r600::SfnLog::shader_info << "Geometry shader, create copy shader\n";
+ generate_gs_copy_shader(rctx, pipeshader, &sel->so);
+ assert(pipeshader->gs_copy_shader);
+ } else {
+ r600::sfn_log << r600::SfnLog::shader_info << "This is not a Geometry shader\n";
+ }
+ if (pipeshader->shader.bc.ngpr < 6)
+ pipeshader->shader.bc.ngpr = 6;
+
+ return 0;
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.h
new file mode 100644
index 000000000..d13accb3b
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.h
@@ -0,0 +1,161 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_NIR_H
+#define SFN_NIR_H
+
+#include "nir.h"
+#include "nir_builder.h"
+
+#ifdef __cplusplus
+#include "sfn_shader_base.h"
+#include <vector>
+
+namespace r600 {
+
+class NirLowerInstruction {
+public:
+ NirLowerInstruction();
+
+ bool run(nir_shader *shader);
+
+private:
+ static bool filter_instr(const nir_instr *instr, const void *data);
+ static nir_ssa_def *lower_instr(nir_builder *b, nir_instr *instr, void *data);
+
+ void set_builder(nir_builder *_b) { b = _b;}
+
+ virtual bool filter(const nir_instr *instr) const = 0;
+ virtual nir_ssa_def *lower(nir_instr *instr) = 0;
+protected:
+ nir_builder *b;
+};
+
+bool r600_nir_lower_pack_unpack_2x16(nir_shader *shader);
+
+bool r600_lower_scratch_addresses(nir_shader *shader);
+
+bool r600_lower_ubo_to_align16(nir_shader *shader);
+
+bool r600_nir_split_64bit_io(nir_shader *sh);
+
+bool r600_nir_64_to_vec2(nir_shader *sh);
+
+bool r600_merge_vec2_stores(nir_shader *shader);
+
+class Shader {
+public:
+ std::vector<InstructionBlock>& m_ir;
+ ValueMap m_temp;
+};
+
+class ShaderFromNir {
+public:
+ ShaderFromNir();
+ ~ShaderFromNir();
+
+ unsigned ninputs() const;
+
+ bool lower(const nir_shader *shader, r600_pipe_shader *sh,
+ r600_pipe_shader_selector *sel, r600_shader_key &key,
+ r600_shader *gs_shader, enum chip_class chip_class);
+
+ bool process_declaration();
+
+ pipe_shader_type processor_type() const;
+
+ bool emit_instruction(nir_instr *instr);
+
+ const std::vector<InstructionBlock> &shader_ir() const;
+
+ Shader shader() const;
+private:
+
+ bool process_block();
+ bool process_cf_node(nir_cf_node *node);
+ bool process_if(nir_if *node);
+ bool process_loop(nir_loop *node);
+ bool process_block(nir_block *node);
+
+ std::unique_ptr<ShaderFromNirProcessor> impl;
+ const nir_shader *sh;
+
+ enum chip_class chip_class;
+ int m_current_if_id;
+ int m_current_loop_id;
+ std::stack<int> m_if_stack;
+ int scratch_size;
+};
+
+class AssemblyFromShader {
+public:
+ virtual ~AssemblyFromShader();
+ bool lower(const std::vector<InstructionBlock> &ir);
+private:
+ virtual bool do_lower(const std::vector<InstructionBlock>& ir) = 0 ;
+};
+
+}
+
+static inline nir_ssa_def *
+r600_imm_ivec3(nir_builder *build, int x, int y, int z)
+{
+ nir_const_value v[3] = {
+ nir_const_value_for_int(x, 32),
+ nir_const_value_for_int(y, 32),
+ nir_const_value_for_int(z, 32),
+ };
+
+ return nir_build_imm(build, 3, 32, v);
+}
+
+bool r600_lower_tess_io(nir_shader *shader, enum pipe_prim_type prim_type);
+bool r600_append_tcs_TF_emission(nir_shader *shader, enum pipe_prim_type prim_type);
+bool r600_lower_tess_coord(nir_shader *sh, enum pipe_prim_type prim_type);
+
+#else
+#include "gallium/drivers/r600/r600_shader.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool r600_vectorize_vs_inputs(nir_shader *shader);
+
+
+int r600_shader_from_nir(struct r600_context *rctx,
+ struct r600_pipe_shader *pipeshader,
+ union r600_shader_key *key);
+
+bool r600_lower_alu(nir_shader *sh);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif // SFN_NIR_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.cpp
new file mode 100644
index 000000000..4a177d15d
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.cpp
@@ -0,0 +1,462 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_nir_lower_fs_out_to_vector.h"
+
+#include "nir_builder.h"
+#include "nir_deref.h"
+#include "util/u_math.h"
+
+#include <set>
+#include <vector>
+#include <array>
+#include <algorithm>
+
+namespace r600 {
+
+using std::multiset;
+using std::vector;
+using std::array;
+
+struct nir_intrinsic_instr_less {
+ bool operator () (const nir_intrinsic_instr *lhs, const nir_intrinsic_instr *rhs) const
+ {
+ nir_variable *vlhs = nir_deref_instr_get_variable(nir_src_as_deref(lhs->src[0]));
+ nir_variable *vrhs = nir_deref_instr_get_variable(nir_src_as_deref(rhs->src[0]));
+
+ auto ltype = glsl_get_base_type(vlhs->type);
+ auto rtype = glsl_get_base_type(vrhs->type);
+
+ if (ltype != rtype)
+ return ltype < rtype;
+ return vlhs->data.location < vrhs->data.location;
+ }
+};
+
+class NirLowerIOToVector {
+public:
+ NirLowerIOToVector(int base_slot);
+ bool run(nir_function_impl *shader);
+
+protected:
+ bool var_can_merge(const nir_variable *lhs, const nir_variable *rhs);
+ bool var_can_rewrite(nir_variable *var) const;
+ void create_new_io_vars(nir_shader *shader);
+ void create_new_io_var(nir_shader *shader, unsigned location, unsigned comps);
+
+ nir_deref_instr *clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail,
+ const nir_deref_instr *src_head);
+
+ bool vectorize_block(nir_builder *b, nir_block *block);
+ bool instr_can_rewrite(nir_instr *instr);
+ bool vec_instr_set_remove(nir_builder *b,nir_instr *instr);
+
+ using InstrSet = multiset<nir_intrinsic_instr *, nir_intrinsic_instr_less>;
+ using InstrSubSet = std::pair<InstrSet::iterator, InstrSet::iterator>;
+
+ bool vec_instr_stack_pop(nir_builder *b, InstrSubSet& ir_set,
+ nir_intrinsic_instr *instr);
+
+ array<array<nir_variable *, 4>, 16> m_vars;
+ InstrSet m_block_io;
+ int m_next_index;
+private:
+ virtual nir_variable_mode get_io_mode(nir_shader *shader) const = 0;
+ virtual bool instr_can_rewrite_type(nir_intrinsic_instr *intr) const = 0;
+ virtual bool var_can_rewrite_slot(nir_variable *var) const = 0;
+ virtual void create_new_io(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
+ nir_ssa_def **srcs, unsigned first_comp, unsigned num_comps) = 0;
+
+ int m_base_slot;
+};
+
+class NirLowerFSOutToVector : public NirLowerIOToVector {
+public:
+ NirLowerFSOutToVector();
+
+private:
+ nir_variable_mode get_io_mode(nir_shader *shader) const override;
+ bool var_can_rewrite_slot(nir_variable *var) const override;
+ void create_new_io(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
+ nir_ssa_def **srcs, unsigned first_comp, unsigned num_comps) override;
+ bool instr_can_rewrite_type(nir_intrinsic_instr *intr) const override;
+
+ nir_ssa_def *create_combined_vector(nir_builder *b, nir_ssa_def **srcs,
+ int first_comp, int num_comp);
+};
+
+bool r600_lower_fs_out_to_vector(nir_shader *shader)
+{
+ NirLowerFSOutToVector processor;
+
+ assert(shader->info.stage == MESA_SHADER_FRAGMENT);
+ bool progress = false;
+
+ nir_foreach_function(function, shader) {
+ if (function->impl)
+ progress |= processor.run(function->impl);
+ }
+ return progress;
+}
+
+NirLowerIOToVector::NirLowerIOToVector(int base_slot):
+ m_next_index(0),
+ m_base_slot(base_slot)
+{
+ for(auto& a : m_vars)
+ for(auto& aa : a)
+ aa = nullptr;
+}
+
+bool NirLowerIOToVector::run(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_metadata_require(impl, nir_metadata_dominance);
+ create_new_io_vars(impl->function->shader);
+
+ bool progress = vectorize_block(&b, nir_start_block(impl));
+ if (progress) {
+ nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance);
+ }
+ return progress;
+}
+
+void NirLowerIOToVector::create_new_io_vars(nir_shader *shader)
+{
+ nir_variable_mode mode = get_io_mode(shader);
+
+ bool can_rewrite_vars = false;
+ nir_foreach_variable_with_modes(var, shader, mode) {
+ if (var_can_rewrite(var)) {
+ can_rewrite_vars = true;
+ unsigned loc = var->data.location - m_base_slot;
+ m_vars[loc][var->data.location_frac] = var;
+ }
+ }
+
+ if (!can_rewrite_vars)
+ return;
+
+ /* We don't handle combining vars of different type e.g. different array
+ * lengths.
+ */
+ for (unsigned i = 0; i < 16; i++) {
+ unsigned comps = 0;
+
+ for (unsigned j = 0; j < 3; j++) {
+ if (!m_vars[i][j])
+ continue;
+
+ for (unsigned k = j + 1; k < 4; k++) {
+ if (!m_vars[i][k])
+ continue;
+
+ if (!var_can_merge(m_vars[i][j], m_vars[i][k]))
+ continue;
+
+ /* Set comps */
+ for (unsigned n = 0; n < glsl_get_components(m_vars[i][j]->type); ++n)
+ comps |= 1 << (m_vars[i][j]->data.location_frac + n);
+
+ for (unsigned n = 0; n < glsl_get_components(m_vars[i][k]->type); ++n)
+ comps |= 1 << (m_vars[i][k]->data.location_frac + n);
+
+ }
+ }
+ if (comps)
+ create_new_io_var(shader, i, comps);
+ }
+}
+
+bool
+NirLowerIOToVector::var_can_merge(const nir_variable *lhs,
+ const nir_variable *rhs)
+{
+ return (glsl_get_base_type(lhs->type) == glsl_get_base_type(rhs->type));
+}
+
+void
+NirLowerIOToVector::create_new_io_var(nir_shader *shader,
+ unsigned location, unsigned comps)
+{
+ unsigned num_comps = util_bitcount(comps);
+ assert(num_comps > 1);
+
+ /* Note: u_bit_scan() strips a component of the comps bitfield here */
+ unsigned first_comp = u_bit_scan(&comps);
+
+ nir_variable *var = nir_variable_clone(m_vars[location][first_comp], shader);
+ var->data.location_frac = first_comp;
+ var->type = glsl_replace_vector_type(var->type, num_comps);
+
+ nir_shader_add_variable(shader, var);
+
+ m_vars[location][first_comp] = var;
+
+ while (comps) {
+ const int comp = u_bit_scan(&comps);
+ if (m_vars[location][comp]) {
+ m_vars[location][comp] = var;
+ }
+ }
+}
+
+bool NirLowerIOToVector::var_can_rewrite(nir_variable *var) const
+{
+ /* Skip complex types we don't split in the first place */
+ if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type)))
+ return false;
+
+ if (glsl_get_bit_size(glsl_without_array(var->type)) != 32)
+ return false;
+
+ return var_can_rewrite_slot(var);
+}
+
+bool
+NirLowerIOToVector::vectorize_block(nir_builder *b, nir_block *block)
+{
+ bool progress = false;
+
+ nir_foreach_instr_safe(instr, block) {
+ if (instr_can_rewrite(instr)) {
+ instr->index = m_next_index++;
+ nir_intrinsic_instr *ir = nir_instr_as_intrinsic(instr);
+ m_block_io.insert(ir);
+ }
+ }
+
+ for (unsigned i = 0; i < block->num_dom_children; i++) {
+ nir_block *child = block->dom_children[i];
+ progress |= vectorize_block(b, child);
+ }
+
+ nir_foreach_instr_reverse_safe(instr, block) {
+ progress |= vec_instr_set_remove(b, instr);
+ }
+ m_block_io.clear();
+
+ return progress;
+}
+
+bool NirLowerIOToVector::instr_can_rewrite(nir_instr *instr)
+{
+ if (instr->type != nir_instr_type_intrinsic)
+ return false;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ if (intr->num_components > 3)
+ return false;
+
+ return instr_can_rewrite_type(intr);
+}
+
+bool NirLowerIOToVector::vec_instr_set_remove(nir_builder *b,nir_instr *instr)
+{
+ if (!instr_can_rewrite(instr))
+ return false;
+
+ nir_intrinsic_instr *ir = nir_instr_as_intrinsic(instr);
+ auto entry = m_block_io.equal_range(ir);
+ if (entry.first != m_block_io.end()) {
+ vec_instr_stack_pop(b, entry, ir);
+ }
+ return true;
+}
+
+nir_deref_instr *
+NirLowerIOToVector::clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail,
+ const nir_deref_instr *src_head)
+{
+ const nir_deref_instr *parent = nir_deref_instr_parent(src_head);
+
+ if (!parent)
+ return dst_tail;
+
+ assert(src_head->deref_type == nir_deref_type_array);
+
+ dst_tail = clone_deref_array(b, dst_tail, parent);
+
+ return nir_build_deref_array(b, dst_tail,
+ nir_ssa_for_src(b, src_head->arr.index, 1));
+}
+
+NirLowerFSOutToVector::NirLowerFSOutToVector():
+ NirLowerIOToVector(FRAG_RESULT_COLOR)
+{
+
+}
+
+bool NirLowerFSOutToVector::var_can_rewrite_slot(nir_variable *var) const
+{
+ return ((var->data.mode == nir_var_shader_out) &&
+ ((var->data.location == FRAG_RESULT_COLOR) ||
+ ((var->data.location >= FRAG_RESULT_DATA0) &&
+ (var->data.location <= FRAG_RESULT_DATA7))));
+}
+
+bool NirLowerIOToVector::vec_instr_stack_pop(nir_builder *b, InstrSubSet &ir_set,
+ nir_intrinsic_instr *instr)
+{
+ vector< nir_intrinsic_instr *> ir_sorted_set(ir_set.first, ir_set.second);
+ std::sort(ir_sorted_set.begin(), ir_sorted_set.end(),
+ [](const nir_intrinsic_instr *lhs, const nir_intrinsic_instr *rhs) {
+ return lhs->instr.index > rhs->instr.index;
+ }
+ );
+
+ nir_intrinsic_instr *intr = *ir_sorted_set.begin();
+ nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
+
+ unsigned loc = var->data.location - m_base_slot;
+
+ nir_variable *new_var = m_vars[loc][var->data.location_frac];
+ unsigned num_comps = glsl_get_vector_elements(glsl_without_array(new_var->type));
+ unsigned old_num_comps = glsl_get_vector_elements(glsl_without_array(var->type));
+
+ /* Don't bother walking the stack if this component can't be vectorised. */
+ if (old_num_comps > 3) {
+ return false;
+ }
+
+ if (new_var == var) {
+ return false;
+ }
+
+ b->cursor = nir_after_instr(&intr->instr);
+ nir_ssa_undef_instr *instr_undef =
+ nir_ssa_undef_instr_create(b->shader, 1, 32);
+ nir_builder_instr_insert(b, &instr_undef->instr);
+
+ nir_ssa_def *srcs[4];
+ for (int i = 0; i < 4; i++) {
+ srcs[i] = &instr_undef->def;
+ }
+ srcs[var->data.location_frac] = intr->src[1].ssa;
+
+ for (auto k = ir_sorted_set.begin() + 1; k != ir_sorted_set.end(); ++k) {
+ nir_intrinsic_instr *intr2 = *k;
+ nir_variable *var2 =
+ nir_deref_instr_get_variable(nir_src_as_deref(intr2->src[0]));
+ unsigned loc2 = var->data.location - m_base_slot;
+
+ if (m_vars[loc][var->data.location_frac] !=
+ m_vars[loc2][var2->data.location_frac]) {
+ continue;
+ }
+
+ assert(glsl_get_vector_elements(glsl_without_array(var2->type)) < 4);
+
+ if (srcs[var2->data.location_frac] == &instr_undef->def) {
+ assert(intr2->src[1].is_ssa);
+ assert(intr2->src[1].ssa);
+ srcs[var2->data.location_frac] = intr2->src[1].ssa;
+ }
+ nir_instr_remove(&intr2->instr);
+ }
+
+ create_new_io(b, intr, new_var, srcs, new_var->data.location_frac,
+ num_comps);
+ return true;
+}
+
+nir_variable_mode NirLowerFSOutToVector::get_io_mode(nir_shader *shader) const
+{
+ return nir_var_shader_out;
+}
+
+void
+NirLowerFSOutToVector::create_new_io(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
+ nir_ssa_def **srcs, unsigned first_comp, unsigned num_comps)
+{
+ b->cursor = nir_before_instr(&intr->instr);
+
+ nir_intrinsic_instr *new_intr =
+ nir_intrinsic_instr_create(b->shader, intr->intrinsic);
+ new_intr->num_components = num_comps;
+
+ nir_intrinsic_set_write_mask(new_intr, (1 << num_comps) - 1);
+
+ nir_deref_instr *deref = nir_build_deref_var(b, var);
+ deref = clone_deref_array(b, deref, nir_src_as_deref(intr->src[0]));
+
+ new_intr->src[0] = nir_src_for_ssa(&deref->dest.ssa);
+ new_intr->src[1] = nir_src_for_ssa(create_combined_vector(b, srcs, first_comp, num_comps));
+
+ nir_builder_instr_insert(b, &new_intr->instr);
+
+ /* Remove the old store intrinsic */
+ nir_instr_remove(&intr->instr);
+}
+
+bool NirLowerFSOutToVector::instr_can_rewrite_type(nir_intrinsic_instr *intr) const
+{
+ if (intr->intrinsic != nir_intrinsic_store_deref)
+ return false;
+
+ nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+ if (!nir_deref_mode_is(deref, nir_var_shader_out))
+ return false;
+
+ return var_can_rewrite(nir_deref_instr_get_variable(deref));
+}
+
+nir_ssa_def *NirLowerFSOutToVector::create_combined_vector(nir_builder *b, nir_ssa_def **srcs,
+ int first_comp, int num_comp)
+{
+ nir_op op;
+ switch (num_comp) {
+ case 2: op = nir_op_vec2; break;
+ case 3: op = nir_op_vec3; break;
+ case 4: op = nir_op_vec4; break;
+ default:
+ unreachable("combined vector must have 2 to 4 components");
+ }
+ nir_alu_instr * instr = nir_alu_instr_create(b->shader, op);
+ instr->exact = b->exact;
+
+ int i = 0;
+ unsigned k = 0;
+ while (i < num_comp) {
+ nir_ssa_def *s = srcs[first_comp + k];
+ for(uint8_t kk = 0; kk < s->num_components && i < num_comp; ++kk) {
+ instr->src[i].src = nir_src_for_ssa(s);
+ instr->src[i].swizzle[0] = kk;
+ ++i;
+ }
+ k += s->num_components;
+ }
+
+ nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_comp, 32, NULL);
+ instr->dest.write_mask = (1 << num_comp) - 1;
+ nir_builder_instr_insert(b, &instr->instr);
+ return &instr->dest.dest.ssa;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.h
new file mode 100644
index 000000000..016b7a222
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.h
@@ -0,0 +1,38 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_NIR_LOWER_FS_OUT_TO_VECTOR_H
+#define SFN_NIR_LOWER_FS_OUT_TO_VECTOR_H
+
+#include "nir.h"
+
+namespace r600 {
+
+bool r600_lower_fs_out_to_vector(nir_shader *sh);
+
+}
+
+#endif // SFN_NIR_LOWER_FS_OUT_TO_VECTOR_H \ No newline at end of file
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp
new file mode 100644
index 000000000..a830d0753
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp
@@ -0,0 +1,575 @@
+#include "sfn_nir.h"
+
+bool r600_lower_tess_io_filter(const nir_instr *instr, gl_shader_stage stage)
+{
+ if (instr->type != nir_instr_type_intrinsic)
+ return false;
+
+ nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
+ switch (op->intrinsic) {
+ case nir_intrinsic_load_input:
+ return stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL;
+ case nir_intrinsic_load_output:
+ case nir_intrinsic_load_per_vertex_input:
+ case nir_intrinsic_load_per_vertex_output:
+ case nir_intrinsic_store_per_vertex_output:
+ case nir_intrinsic_load_patch_vertices_in:
+ case nir_intrinsic_load_tess_level_outer:
+ case nir_intrinsic_load_tess_level_inner:
+ return true;
+ case nir_intrinsic_store_output:
+ return stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_VERTEX;
+ default:
+ ;
+ }
+ return false;
+}
+
+static nir_ssa_def *
+emit_load_param_base(nir_builder *b, nir_intrinsic_op op)
+{
+ nir_intrinsic_instr *result = nir_intrinsic_instr_create(b->shader, op);
+ nir_ssa_dest_init(&result->instr, &result->dest,
+ 4, 32, NULL);
+ nir_builder_instr_insert(b, &result->instr);
+ return &result->dest.ssa;
+}
+
+static int get_tcs_varying_offset(nir_intrinsic_instr *op)
+{
+ unsigned location = nir_intrinsic_io_semantics(op).location;
+
+ switch (location) {
+ case VARYING_SLOT_POS:
+ return 0;
+ case VARYING_SLOT_PSIZ:
+ return 0x10;
+ case VARYING_SLOT_CLIP_DIST0:
+ return 0x20;
+ case VARYING_SLOT_CLIP_DIST1:
+ return 0x30;
+ case VARYING_SLOT_TESS_LEVEL_OUTER:
+ return 0;
+ case VARYING_SLOT_TESS_LEVEL_INNER:
+ return 0x10;
+ default:
+ if (location >= VARYING_SLOT_VAR0 &&
+ location <= VARYING_SLOT_VAR31)
+ return 0x10 * (location - VARYING_SLOT_VAR0) + 0x40;
+
+ if (location >= VARYING_SLOT_PATCH0) {
+ return 0x10 * (location - VARYING_SLOT_PATCH0) + 0x20;
+ }
+ }
+ return 0;
+}
+
+static inline nir_ssa_def *
+r600_umad_24(nir_builder *b, nir_ssa_def *op1, nir_ssa_def *op2, nir_ssa_def *op3)
+{
+ return nir_build_alu(b, nir_op_umad24, op1, op2, op3, NULL);
+}
+
+static inline nir_ssa_def *
+r600_tcs_base_address(nir_builder *b, nir_ssa_def *param_base, nir_ssa_def *rel_patch_id)
+{
+ return r600_umad_24(b, nir_channel(b, param_base, 0),
+ rel_patch_id,
+ nir_channel(b, param_base, 3));
+}
+
+
+static nir_ssa_def *
+emil_lsd_in_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op)
+{
+ nir_ssa_def *addr = nir_build_alu(b, nir_op_umul24,
+ nir_channel(b, base, 0),
+ patch_id, NULL, NULL);
+
+ auto idx1 = nir_src_as_const_value(op->src[0]);
+ if (!idx1 || idx1->u32 != 0)
+ addr = r600_umad_24(b, nir_channel(b, base, 1),
+ op->src[0].ssa, addr);
+
+ auto offset = nir_imm_int(b, get_tcs_varying_offset(op));
+
+ auto idx2 = nir_src_as_const_value(op->src[1]);
+ if (!idx2 || idx2->u32 != 0)
+ offset = nir_iadd(b, offset, nir_ishl(b, op->src[1].ssa, nir_imm_int(b, 4)));
+
+ return nir_iadd(b, addr, offset);
+}
+
+static nir_ssa_def *
+emil_lsd_out_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op, nir_variable_mode mode, int src_offset)
+{
+
+ nir_ssa_def *addr1 = r600_umad_24(b, nir_channel(b, base, 0),
+ patch_id,
+ nir_channel(b, base, 2));
+ nir_ssa_def *addr2 = r600_umad_24(b, nir_channel(b, base, 1),
+ op->src[src_offset].ssa, addr1);
+ int offset = get_tcs_varying_offset(op);
+ return nir_iadd(b, nir_iadd(b, addr2,
+ nir_ishl(b, op->src[src_offset + 1].ssa, nir_imm_int(b,4))),
+ nir_imm_int(b, offset));
+}
+
+static nir_ssa_def *load_offset_group(nir_builder *b, int ncomponents)
+{
+ switch (ncomponents) {
+ /* tess outer offsets */
+ case 1: return nir_imm_int(b, 0);
+ case 2: return nir_imm_ivec2(b, 0, 4);
+ case 3: return r600_imm_ivec3(b, 0, 4, 8);
+ case 4: return nir_imm_ivec4(b, 0, 4, 8, 12);
+ /* tess inner offsets */
+ case 5: return nir_imm_int(b, 16);
+ case 6: return nir_imm_ivec2(b, 16, 20);
+ default:
+ debug_printf("Got %d components\n", ncomponents);
+ unreachable("Unsupported component count");
+ }
+}
+
+static nir_ssa_def *load_offset_group_from_mask(nir_builder *b, uint32_t mask)
+{
+ auto full_mask = nir_imm_ivec4(b, 0, 4, 8, 12);
+ return nir_channels(b, full_mask, mask);
+}
+
+struct MaskQuery {
+ uint32_t mask;
+ uint32_t ssa_index;
+ nir_alu_instr *alu;
+ int index;
+ uint32_t full_mask;
+};
+
+static bool update_alu_mask(nir_src *src, void *data)
+{
+ auto mq = reinterpret_cast<MaskQuery *>(data);
+
+ if (mq->ssa_index == src->ssa->index) {
+ mq->mask |= nir_alu_instr_src_read_mask(mq->alu, mq->index);
+ }
+ ++mq->index;
+
+ return mq->mask != mq->full_mask;
+}
+
+static uint32_t get_dest_usee_mask(nir_intrinsic_instr *op)
+{
+ assert(op->dest.is_ssa);
+
+ MaskQuery mq = {0};
+ mq.full_mask = (1 << nir_dest_num_components(op->dest)) - 1;
+
+ nir_foreach_use(use_src, &op->dest.ssa) {
+ auto use_instr = use_src->parent_instr;
+ mq.ssa_index = use_src->ssa->index;
+
+ switch (use_instr->type) {
+ case nir_instr_type_alu: {
+ mq.alu = nir_instr_as_alu(use_instr);
+ mq.index = 0;
+ if (!nir_foreach_src(use_instr, update_alu_mask, &mq))
+ return 0xf;
+ break;
+ }
+ case nir_instr_type_intrinsic: {
+ auto intr = nir_instr_as_intrinsic(use_instr);
+ switch (intr->intrinsic) {
+ case nir_intrinsic_store_output:
+ case nir_intrinsic_store_per_vertex_output:
+ mq.mask |= nir_intrinsic_write_mask(intr) << nir_intrinsic_component(intr);
+ break;
+ case nir_intrinsic_store_scratch:
+ case nir_intrinsic_store_local_shared_r600:
+ mq.mask |= nir_intrinsic_write_mask(intr);
+ break;
+ default:
+ return 0xf;
+ }
+ break;
+ }
+ default:
+ return 0xf;
+ }
+
+ }
+ return mq.mask;
+}
+
+static void replace_load_instr(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr)
+{
+ uint32_t mask = get_dest_usee_mask(op);
+ if (mask) {
+ nir_ssa_def *addr_outer = nir_iadd(b, addr, load_offset_group_from_mask(b, mask));
+ if (nir_intrinsic_component(op))
+ addr_outer = nir_iadd(b, addr_outer, nir_imm_int(b, 4 * nir_intrinsic_component(op)));
+
+ auto new_load = nir_load_local_shared_r600(b, 32, addr_outer);
+
+ auto undef = nir_ssa_undef(b, 1, 32);
+ int comps = nir_dest_num_components(op->dest);
+ nir_ssa_def *remix[4] = {undef, undef, undef, undef};
+
+ int chan = 0;
+ for (int i = 0; i < comps; ++i) {
+ if (mask & (1 << i)) {
+ remix[i] = nir_channel(b, new_load, chan++);
+ }
+ }
+ auto new_load_remixed = nir_vec(b, remix, comps);
+ nir_ssa_def_rewrite_uses(&op->dest.ssa, new_load_remixed);
+ }
+ nir_instr_remove(&op->instr);
+}
+
+static nir_ssa_def *
+r600_load_rel_patch_id(nir_builder *b)
+{
+ auto patch_id = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_tcs_rel_patch_id_r600);
+ nir_ssa_dest_init(&patch_id->instr, &patch_id->dest,
+ 1, 32, NULL);
+ nir_builder_instr_insert(b, &patch_id->instr);
+ return &patch_id->dest.ssa;
+}
+
+static void
+emit_store_lds(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr)
+{
+ uint32_t orig_writemask = nir_intrinsic_write_mask(op) << nir_intrinsic_component(op);
+
+ for (int i = 0; i < 2; ++i) {
+ unsigned test_mask = (0x3 << 2 * i);
+ if (!(orig_writemask & test_mask))
+ continue;
+
+ uint32_t writemask = test_mask >> nir_intrinsic_component(op);
+
+ auto store_tcs_out = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_local_shared_r600);
+ nir_intrinsic_set_write_mask(store_tcs_out, writemask);
+ store_tcs_out->src[0] = nir_src_for_ssa(op->src[0].ssa);
+ store_tcs_out->num_components = store_tcs_out->src[0].ssa->num_components;
+ bool start_even = (orig_writemask & (1u << (2 * i)));
+
+ auto addr2 = nir_iadd(b, addr, nir_imm_int(b, 8 * i + (start_even ? 0 : 4)));
+ store_tcs_out->src[1] = nir_src_for_ssa(addr2);
+
+ nir_builder_instr_insert(b, &store_tcs_out->instr);
+ }
+}
+
+static nir_ssa_def *
+emil_tcs_io_offset(nir_builder *b, nir_ssa_def *addr, nir_intrinsic_instr *op, int src_offset)
+{
+ int offset = get_tcs_varying_offset(op);
+ return nir_iadd(b, nir_iadd(b, addr,
+ nir_ishl(b, op->src[src_offset].ssa, nir_imm_int(b,4))),
+ nir_imm_int(b, offset));
+}
+
+
+inline unsigned
+outer_tf_components(pipe_prim_type prim_type)
+{
+ switch (prim_type) {
+ case PIPE_PRIM_LINES: return 2;
+ case PIPE_PRIM_TRIANGLES: return 3;
+ case PIPE_PRIM_QUADS: return 4;
+ default:
+ return 0;
+ }
+}
+
+
+
+static bool
+r600_lower_tess_io_impl(nir_builder *b, nir_instr *instr, enum pipe_prim_type prim_type)
+{
+ static nir_ssa_def *load_in_param_base = nullptr;
+ static nir_ssa_def *load_out_param_base = nullptr;
+
+ b->cursor = nir_before_instr(instr);
+ nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
+
+ if (b->shader->info.stage == MESA_SHADER_TESS_CTRL) {
+ load_in_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600);
+ load_out_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
+ } else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
+ load_in_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
+ } else if (b->shader->info.stage == MESA_SHADER_VERTEX) {
+ load_out_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600);
+ }
+
+ auto rel_patch_id = r600_load_rel_patch_id(b);
+
+ unsigned tf_inner_address_offset = 0;
+ unsigned ncomps_correct = 0;
+
+ switch (op->intrinsic) {
+ case nir_intrinsic_load_patch_vertices_in: {
+ nir_ssa_def *vertices_in;
+ if (b->shader->info.stage == MESA_SHADER_TESS_CTRL)
+ vertices_in = nir_channel(b, load_in_param_base, 2);
+ else {
+ auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600);
+ vertices_in = nir_channel(b, base, 2);
+ }
+ nir_ssa_def_rewrite_uses(&op->dest.ssa, vertices_in);
+ nir_instr_remove(&op->instr);
+ return true;
+ }
+ case nir_intrinsic_load_per_vertex_input: {
+ nir_ssa_def *addr =
+ b->shader->info.stage == MESA_SHADER_TESS_CTRL ?
+ emil_lsd_in_addr(b, load_in_param_base, rel_patch_id, op) :
+ emil_lsd_out_addr(b, load_in_param_base, rel_patch_id, op, nir_var_shader_in, 0);
+ replace_load_instr(b, op, addr);
+ return true;
+ }
+ case nir_intrinsic_store_per_vertex_output: {
+ nir_ssa_def *addr = emil_lsd_out_addr(b, load_out_param_base, rel_patch_id, op, nir_var_shader_out, 1);
+ emit_store_lds(b, op, addr);
+ nir_instr_remove(instr);
+ return true;
+ }
+ case nir_intrinsic_load_per_vertex_output: {
+ nir_ssa_def *addr = emil_lsd_out_addr(b, load_out_param_base, rel_patch_id, op, nir_var_shader_out, 0);
+ replace_load_instr(b, op, addr);
+ return true;
+ }
+ case nir_intrinsic_store_output: {
+ nir_ssa_def *addr = (b->shader->info.stage == MESA_SHADER_TESS_CTRL) ?
+ r600_tcs_base_address(b, load_out_param_base, rel_patch_id):
+ nir_build_alu(b, nir_op_umul24,
+ nir_channel(b, load_out_param_base, 1),
+ rel_patch_id, NULL, NULL);
+ addr = emil_tcs_io_offset(b, addr, op, 1);
+ emit_store_lds(b, op, addr);
+ nir_instr_remove(instr);
+ return true;
+ }
+ case nir_intrinsic_load_output: {
+ nir_ssa_def *addr = r600_tcs_base_address(b, load_out_param_base, rel_patch_id);
+ addr = emil_tcs_io_offset(b, addr, op, 0);
+ replace_load_instr(b, op, addr);
+ return true;
+ }
+ case nir_intrinsic_load_input: {
+ nir_ssa_def *addr = r600_tcs_base_address(b, load_in_param_base, rel_patch_id);
+ addr = emil_tcs_io_offset(b, addr, op, 0);
+ replace_load_instr(b, op, addr);
+ return true;
+ }
+ case nir_intrinsic_load_tess_level_inner:
+ tf_inner_address_offset = 4;
+ ncomps_correct = 2;
+ FALLTHROUGH;
+ case nir_intrinsic_load_tess_level_outer: {
+ auto ncomps = outer_tf_components(prim_type);
+ if (!ncomps)
+ return false;
+ ncomps -= ncomps_correct;
+ auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
+ auto rel_patch_id = r600_load_rel_patch_id(b);
+ nir_ssa_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
+ nir_ssa_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, tf_inner_address_offset + ncomps));
+
+ auto tf = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
+ tf->num_components = ncomps;
+ tf->src[0] = nir_src_for_ssa(addr_outer);
+ nir_ssa_dest_init(&tf->instr, &tf->dest,
+ tf->num_components, 32, NULL);
+ nir_builder_instr_insert(b, &tf->instr);
+
+ nir_ssa_def_rewrite_uses(&op->dest.ssa, &tf->dest.ssa);
+ nir_instr_remove(instr);
+ return true;
+ }
+ default:
+ ;
+ }
+
+ return false;
+}
+
+bool r600_lower_tess_io(nir_shader *shader, enum pipe_prim_type prim_type)
+{
+ bool progress = false;
+ nir_foreach_function(function, shader) {
+ if (function->impl) {
+ nir_builder b;
+ nir_builder_init(&b, function->impl);
+
+ nir_foreach_block(block, function->impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ if (r600_lower_tess_io_filter(instr, shader->info.stage))
+ progress |= r600_lower_tess_io_impl(&b, instr, prim_type);
+ }
+ }
+ }
+ }
+ return progress;
+}
+
+bool r600_emit_tf(nir_builder *b, nir_ssa_def *val)
+{
+ nir_intrinsic_instr *store_tf = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_tf_r600);
+ store_tf->num_components = val->num_components;
+ store_tf->src[0] = nir_src_for_ssa(val);
+ nir_builder_instr_insert(b, &store_tf->instr);
+ return true;
+}
+
+bool r600_append_tcs_TF_emission(nir_shader *shader, enum pipe_prim_type prim_type) {
+ if (shader->info.stage != MESA_SHADER_TESS_CTRL)
+ return false;
+
+ nir_foreach_function(function, shader) {
+ nir_foreach_block(block, function->impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic == nir_intrinsic_store_tf_r600) {
+ return false;
+ }
+ }
+ }
+ }
+ nir_builder builder;
+ nir_builder *b = &builder;
+
+ assert(exec_list_length(&shader->functions) == 1);
+ nir_function *f = (nir_function *)shader->functions.get_head();
+ nir_builder_init(b, f->impl);
+
+ auto outer_comps = outer_tf_components(prim_type);
+ if (!outer_comps)
+ return false;
+
+ unsigned inner_comps = outer_comps - 2;
+ unsigned stride = (inner_comps + outer_comps) * 4;
+
+ b->cursor = nir_after_cf_list(&f->impl->body);
+
+ auto invocation_id = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_invocation_id);
+ nir_ssa_dest_init(&invocation_id->instr, &invocation_id->dest,
+ 1, 32, NULL);
+ nir_builder_instr_insert(b, &invocation_id->instr);
+
+ nir_push_if(b, nir_ieq_imm(b, &invocation_id->dest.ssa, 0));
+ auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
+ auto rel_patch_id = r600_load_rel_patch_id(b);
+
+ nir_ssa_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
+
+ nir_ssa_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, outer_comps));
+ auto tf_outer = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
+ tf_outer->num_components = outer_comps;
+ tf_outer->src[0] = nir_src_for_ssa(addr_outer);
+ nir_ssa_dest_init(&tf_outer->instr, &tf_outer->dest,
+ tf_outer->num_components, 32, NULL);
+ nir_builder_instr_insert(b, &tf_outer->instr);
+
+ std::vector<nir_ssa_def *> tf_out;
+
+
+ auto tf_out_base = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_tcs_tess_factor_base_r600);
+ nir_ssa_dest_init(&tf_out_base->instr, &tf_out_base->dest,
+ 1, 32, NULL);
+ nir_builder_instr_insert(b, &tf_out_base->instr);
+
+ auto out_addr0 = nir_build_alu(b, nir_op_umad24,
+ rel_patch_id,
+ nir_imm_int(b, stride),
+ &tf_out_base->dest.ssa,
+ NULL);
+ int chanx = 0;
+ int chany = 1;
+
+ if (prim_type == PIPE_PRIM_LINES)
+ std::swap(chanx, chany);
+
+
+ auto v0 = nir_vec4(b, out_addr0, nir_channel(b, &tf_outer->dest.ssa, chanx),
+ nir_iadd(b, out_addr0, nir_imm_int(b, 4)),
+ nir_channel(b, &tf_outer->dest.ssa, chany));
+
+ tf_out.push_back(v0);
+ if (outer_comps > 2) {
+ auto v1 = (outer_comps > 3) ? nir_vec4(b, nir_iadd(b, out_addr0, nir_imm_int(b, 8)),
+ nir_channel(b, &tf_outer->dest.ssa, 2),
+ nir_iadd(b, out_addr0, nir_imm_int(b, 12)),
+ nir_channel(b, &tf_outer->dest.ssa, 3)) :
+ nir_vec2(b, nir_iadd(b, out_addr0, nir_imm_int(b, 8)),
+ nir_channel(b, &tf_outer->dest.ssa, 2));
+ tf_out.push_back(v1);
+ }
+
+ if (inner_comps) {
+ nir_ssa_def *addr1 = nir_iadd(b, addr0, load_offset_group(b, 4 + inner_comps));
+ auto tf_inner = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
+ tf_inner->num_components = inner_comps;
+ tf_inner->src[0] = nir_src_for_ssa(addr1);
+ nir_ssa_dest_init(&tf_inner->instr, &tf_inner->dest,
+ tf_inner->num_components, 32, NULL);
+ nir_builder_instr_insert(b, &tf_inner->instr);
+
+ auto v2 = (inner_comps > 1) ? nir_vec4(b, nir_iadd(b, out_addr0, nir_imm_int(b, 16)),
+ nir_channel(b, &tf_inner->dest.ssa, 0),
+ nir_iadd(b, out_addr0, nir_imm_int(b, 20)),
+ nir_channel(b, &tf_inner->dest.ssa, 1)):
+ nir_vec2(b, nir_iadd(b, out_addr0, nir_imm_int(b, 12)),
+ nir_channel(b, &tf_inner->dest.ssa, 0));
+ tf_out.push_back(v2);
+ }
+
+ for (auto tf: tf_out)
+ r600_emit_tf(b, tf);
+
+ nir_pop_if(b, nullptr);
+
+ nir_metadata_preserve(f->impl, nir_metadata_none);
+
+ return true;
+}
+
+static bool
+r600_lower_tess_coord_filter(const nir_instr *instr, UNUSED const void *_options)
+{
+ if (instr->type != nir_instr_type_intrinsic)
+ return false;
+ auto intr = nir_instr_as_intrinsic(instr);
+ return intr->intrinsic == nir_intrinsic_load_tess_coord;
+}
+
+static nir_ssa_def *
+r600_lower_tess_coord_impl(nir_builder *b, nir_instr *instr, void *_options)
+{
+ pipe_prim_type prim_type = *(pipe_prim_type *)_options;
+
+ auto tc_xy = nir_load_tess_coord_r600(b);
+
+ auto tc_x = nir_channel(b, tc_xy, 0);
+ auto tc_y = nir_channel(b, tc_xy, 1);
+
+ if (prim_type == PIPE_PRIM_TRIANGLES)
+ return nir_vec3(b, tc_x, tc_y, nir_fsub(b, nir_imm_float(b, 1.0),
+ nir_fadd(b, tc_x, tc_y)));
+ else
+ return nir_vec3(b, tc_x, tc_y, nir_imm_float(b, 0.0));
+}
+
+
+bool r600_lower_tess_coord(nir_shader *sh, enum pipe_prim_type prim_type)
+{
+ return nir_shader_lower_instructions(sh, r600_lower_tess_coord_filter,
+ r600_lower_tess_coord_impl, &prim_type);
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_vectorize_vs_inputs.c b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_vectorize_vs_inputs.c
new file mode 100644
index 000000000..2ff60cf6a
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_vectorize_vs_inputs.c
@@ -0,0 +1,466 @@
+/*
+ * Copyright © 2018 Timothy Arceri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_deref.h"
+#include "util/u_dynarray.h"
+#include "util/u_math.h"
+#define XXH_INLINE_ALL
+#include "util/xxhash.h"
+
+/** @file nir_opt_vectorize_io.c
+ *
+ * Replaces scalar nir_load_input/nir_store_output operations with
+ * vectorized instructions.
+ */
+bool
+r600_vectorize_vs_inputs(nir_shader *shader);
+
+static nir_deref_instr *
+r600_clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail,
+ const nir_deref_instr *src_head)
+{
+ const nir_deref_instr *parent = nir_deref_instr_parent(src_head);
+
+ if (!parent)
+ return dst_tail;
+
+ assert(src_head->deref_type == nir_deref_type_array);
+
+ dst_tail = r600_clone_deref_array(b, dst_tail, parent);
+
+ return nir_build_deref_array(b, dst_tail,
+ nir_ssa_for_src(b, src_head->arr.index, 1));
+}
+
+static bool
+r600_variable_can_rewrite(nir_variable *var)
+{
+
+ /* Skip complex types we don't split in the first place */
+ if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type)))
+ return false;
+
+
+ /* TODO: add 64/16bit support ? */
+ if (glsl_get_bit_size(glsl_without_array(var->type)) != 32)
+ return false;
+
+ /* We only check VSand attribute imputs */
+ return (var->data.location >= VERT_ATTRIB_GENERIC0 &&
+ var->data.location <= VERT_ATTRIB_GENERIC15);
+}
+
+static bool
+r600_instr_can_rewrite(nir_instr *instr)
+{
+ if (instr->type != nir_instr_type_intrinsic)
+ return false;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ if (intr->num_components > 3)
+ return false;
+
+ if (intr->intrinsic != nir_intrinsic_load_deref)
+ return false;
+
+ nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+ if (!nir_deref_mode_is(deref, nir_var_shader_in))
+ return false;
+
+ return r600_variable_can_rewrite(nir_deref_instr_get_variable(deref));
+}
+
+static bool
+r600_io_access_same_var(const nir_instr *instr1, const nir_instr *instr2)
+{
+ assert(instr1->type == nir_instr_type_intrinsic &&
+ instr2->type == nir_instr_type_intrinsic);
+
+ nir_intrinsic_instr *intr1 = nir_instr_as_intrinsic(instr1);
+ nir_intrinsic_instr *intr2 = nir_instr_as_intrinsic(instr2);
+
+ nir_variable *var1 =
+ nir_deref_instr_get_variable(nir_src_as_deref(intr1->src[0]));
+ nir_variable *var2 =
+ nir_deref_instr_get_variable(nir_src_as_deref(intr2->src[0]));
+
+ /* We don't handle combining vars of different base types, so skip those */
+ if (glsl_get_base_type(var1->type) != glsl_get_base_type(var2->type))
+ return false;
+
+ if (var1->data.location != var2->data.location)
+ return false;
+
+ return true;
+}
+
+static struct util_dynarray *
+r600_vec_instr_stack_create(void *mem_ctx)
+{
+ struct util_dynarray *stack = ralloc(mem_ctx, struct util_dynarray);
+ util_dynarray_init(stack, mem_ctx);
+ return stack;
+}
+
+static void
+r600_vec_instr_stack_push(struct util_dynarray *stack, nir_instr *instr)
+{
+ util_dynarray_append(stack, nir_instr *, instr);
+}
+
+static unsigned r600_correct_location(nir_variable *var)
+{
+ return var->data.location - VERT_ATTRIB_GENERIC0;
+}
+
+static void
+r600_create_new_load(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
+ unsigned comp, unsigned num_comps, unsigned old_num_comps)
+{
+ unsigned channels[4];
+
+ b->cursor = nir_before_instr(&intr->instr);
+
+ assert(intr->dest.is_ssa);
+
+ nir_intrinsic_instr *new_intr =
+ nir_intrinsic_instr_create(b->shader, intr->intrinsic);
+ nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, num_comps,
+ intr->dest.ssa.bit_size, NULL);
+ new_intr->num_components = num_comps;
+
+ nir_deref_instr *deref = nir_build_deref_var(b, var);
+ deref = r600_clone_deref_array(b, deref, nir_src_as_deref(intr->src[0]));
+
+ new_intr->src[0] = nir_src_for_ssa(&deref->dest.ssa);
+
+ if (intr->intrinsic == nir_intrinsic_interp_deref_at_offset ||
+ intr->intrinsic == nir_intrinsic_interp_deref_at_sample)
+ nir_src_copy(&new_intr->src[1], &intr->src[1], &new_intr->instr);
+
+ nir_builder_instr_insert(b, &new_intr->instr);
+
+ for (unsigned i = 0; i < old_num_comps; ++i)
+ channels[i] = comp - var->data.location_frac + i;
+ nir_ssa_def *load = nir_swizzle(b, &new_intr->dest.ssa, channels, old_num_comps);
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
+
+ /* Remove the old load intrinsic */
+ nir_instr_remove(&intr->instr);
+}
+
+
+static bool
+r600_vec_instr_stack_pop(nir_builder *b, struct util_dynarray *stack,
+ nir_instr *instr,
+ nir_variable *updated_vars[16][4])
+{
+ nir_instr *last = util_dynarray_pop(stack, nir_instr *);
+
+ assert(last == instr);
+ assert(last->type == nir_instr_type_intrinsic);
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(last);
+ nir_variable *var =
+ nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
+ unsigned loc = r600_correct_location(var);
+
+ nir_variable *new_var;
+ new_var = updated_vars[loc][var->data.location_frac];
+
+ unsigned num_comps =
+ glsl_get_vector_elements(glsl_without_array(new_var->type));
+
+ unsigned old_num_comps =
+ glsl_get_vector_elements(glsl_without_array(var->type));
+
+ /* Don't bother walking the stack if this component can't be vectorised. */
+ if (old_num_comps > 3) {
+ return false;
+ }
+
+ if (new_var == var) {
+ return false;
+ }
+
+ r600_create_new_load(b, intr, new_var, var->data.location_frac,
+ num_comps, old_num_comps);
+ return true;
+}
+
+static bool
+r600_cmp_func(const void *data1, const void *data2)
+{
+ const struct util_dynarray *arr1 = data1;
+ const struct util_dynarray *arr2 = data2;
+
+ const nir_instr *instr1 = *(nir_instr **)util_dynarray_begin(arr1);
+ const nir_instr *instr2 = *(nir_instr **)util_dynarray_begin(arr2);
+
+ return r600_io_access_same_var(instr1, instr2);
+}
+
+#define HASH(hash, data) XXH32(&(data), sizeof(data), (hash))
+
+static uint32_t
+r600_hash_instr(const nir_instr *instr)
+{
+ assert(instr->type == nir_instr_type_intrinsic);
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ nir_variable *var =
+ nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
+
+ uint32_t hash = 0;
+
+ hash = HASH(hash, var->type);
+ return HASH(hash, var->data.location);
+}
+
+static uint32_t
+r600_hash_stack(const void *data)
+{
+ const struct util_dynarray *stack = data;
+ const nir_instr *first = *(nir_instr **)util_dynarray_begin(stack);
+ return r600_hash_instr(first);
+}
+
+static struct set *
+r600_vec_instr_set_create(void)
+{
+ return _mesa_set_create(NULL, r600_hash_stack, r600_cmp_func);
+}
+
+static void
+r600_vec_instr_set_destroy(struct set *instr_set)
+{
+ _mesa_set_destroy(instr_set, NULL);
+}
+
+static void
+r600_vec_instr_set_add(struct set *instr_set, nir_instr *instr)
+{
+ if (!r600_instr_can_rewrite(instr)) {
+ return;
+ }
+
+ struct util_dynarray *new_stack = r600_vec_instr_stack_create(instr_set);
+ r600_vec_instr_stack_push(new_stack, instr);
+
+ struct set_entry *entry = _mesa_set_search(instr_set, new_stack);
+
+ if (entry) {
+ ralloc_free(new_stack);
+ struct util_dynarray *stack = (struct util_dynarray *) entry->key;
+ r600_vec_instr_stack_push(stack, instr);
+ return;
+ }
+
+ _mesa_set_add(instr_set, new_stack);
+
+ return;
+}
+
+static bool
+r600_vec_instr_set_remove(nir_builder *b, struct set *instr_set, nir_instr *instr,
+ nir_variable *updated_vars[16][4])
+{
+ if (!r600_instr_can_rewrite(instr)) {
+ return false;
+ }
+ /*
+ * It's pretty unfortunate that we have to do this, but it's a side effect
+ * of the hash set interfaces. The hash set assumes that we're only
+ * interested in storing one equivalent element at a time, and if we try to
+ * insert a duplicate element it will remove the original. We could hack up
+ * the comparison function to "know" which input is an instruction we
+ * passed in and which is an array that's part of the entry, but that
+ * wouldn't work because we need to pass an array to _mesa_set_add() in
+ * vec_instr_add() above, and _mesa_set_add() will call our comparison
+ * function as well.
+ */
+ struct util_dynarray *temp = r600_vec_instr_stack_create(instr_set);
+ r600_vec_instr_stack_push(temp, instr);
+ struct set_entry *entry = _mesa_set_search(instr_set, temp);
+ ralloc_free(temp);
+
+ if (entry) {
+ struct util_dynarray *stack = (struct util_dynarray *) entry->key;
+ bool progress = r600_vec_instr_stack_pop(b, stack, instr, updated_vars);
+
+ if (!util_dynarray_num_elements(stack, nir_instr *))
+ _mesa_set_remove(instr_set, entry);
+
+ return progress;
+ }
+
+ return false;
+}
+
+static bool
+r600_vectorize_block(nir_builder *b, nir_block *block, struct set *instr_set,
+ nir_variable *updated_vars[16][4])
+{
+ bool progress = false;
+
+ nir_foreach_instr_safe(instr, block) {
+ r600_vec_instr_set_add(instr_set, instr);
+ }
+
+ for (unsigned i = 0; i < block->num_dom_children; i++) {
+ nir_block *child = block->dom_children[i];
+ progress |= r600_vectorize_block(b, child, instr_set, updated_vars);
+ }
+
+ nir_foreach_instr_reverse_safe(instr, block) {
+ progress |= r600_vec_instr_set_remove(b, instr_set, instr, updated_vars);
+ }
+
+ return progress;
+}
+
+static void
+r600_create_new_io_var(nir_shader *shader,
+ nir_variable *vars[16][4],
+ unsigned location, unsigned comps)
+{
+ unsigned num_comps = util_bitcount(comps);
+ assert(num_comps > 1);
+
+ /* Note: u_bit_scan() strips a component of the comps bitfield here */
+ unsigned first_comp = u_bit_scan(&comps);
+
+ nir_variable *var = nir_variable_clone(vars[location][first_comp], shader);
+ var->data.location_frac = first_comp;
+ var->type = glsl_replace_vector_type(var->type, num_comps);
+
+ nir_shader_add_variable(shader, var);
+
+ vars[location][first_comp] = var;
+
+ while (comps) {
+ const int comp = u_bit_scan(&comps);
+ if (vars[location][comp]) {
+ vars[location][comp] = var;
+ }
+ }
+}
+
+static inline bool
+r600_variables_can_merge(const nir_variable *lhs, const nir_variable *rhs)
+{
+ return (glsl_get_base_type(lhs->type) == glsl_get_base_type(rhs->type));
+}
+
+static void
+r600_create_new_io_vars(nir_shader *shader, nir_variable_mode mode,
+ nir_variable *vars[16][4])
+{
+ bool can_rewrite_vars = false;
+ nir_foreach_variable_with_modes(var, shader, mode) {
+ if (r600_variable_can_rewrite(var)) {
+ can_rewrite_vars = true;
+ unsigned loc = r600_correct_location(var);
+ vars[loc][var->data.location_frac] = var;
+ }
+ }
+
+ if (!can_rewrite_vars)
+ return;
+
+ /* We don't handle combining vars of different type e.g. different array
+ * lengths.
+ */
+ for (unsigned i = 0; i < 16; i++) {
+ unsigned comps = 0;
+
+ for (unsigned j = 0; j < 3; j++) {
+
+ if (!vars[i][j])
+ continue;
+
+ for (unsigned k = j + 1; k < 4; k++) {
+ if (!vars[i][k])
+ continue;
+
+ if (!r600_variables_can_merge(vars[i][j], vars[i][k]))
+ continue;
+
+ /* Set comps */
+ for (unsigned n = 0; n < glsl_get_components(vars[i][j]->type); ++n)
+ comps |= 1 << (vars[i][j]->data.location_frac + n);
+
+ for (unsigned n = 0; n < glsl_get_components(vars[i][k]->type); ++n)
+ comps |= 1 << (vars[i][k]->data.location_frac + n);
+
+ }
+ }
+ if (comps)
+ r600_create_new_io_var(shader, vars, i, comps);
+ }
+}
+
+static bool
+r600_vectorize_io_impl(nir_function_impl *impl)
+{
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_metadata_require(impl, nir_metadata_dominance);
+
+ nir_shader *shader = impl->function->shader;
+ nir_variable *updated_vars[16][4] = {0};
+
+ r600_create_new_io_vars(shader, nir_var_shader_in, updated_vars);
+
+ struct set *instr_set = r600_vec_instr_set_create();
+ bool progress = r600_vectorize_block(&b, nir_start_block(impl), instr_set,
+ updated_vars);
+
+ if (progress) {
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+
+ r600_vec_instr_set_destroy(instr_set);
+ return false;
+}
+
+bool
+r600_vectorize_vs_inputs(nir_shader *shader)
+{
+ bool progress = false;
+
+ if (shader->info.stage != MESA_SHADER_VERTEX)
+ return false;
+
+ nir_foreach_function(function, shader) {
+ if (function->impl)
+ progress |= r600_vectorize_io_impl(function->impl);
+ }
+
+ return progress;
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp
new file mode 100644
index 000000000..e37e2732b
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp
@@ -0,0 +1,1179 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "../r600_pipe.h"
+#include "../r600_shader.h"
+#include "sfn_shader_vertex.h"
+
+#include "sfn_shader_compute.h"
+#include "sfn_shader_fragment.h"
+#include "sfn_shader_geometry.h"
+#include "sfn_liverange.h"
+#include "sfn_ir_to_assembly.h"
+#include "sfn_nir.h"
+#include "sfn_instruction_misc.h"
+#include "sfn_instruction_fetch.h"
+#include "sfn_instruction_lds.h"
+
+#include <iostream>
+
+#define ENABLE_DEBUG 1
+
+#ifdef ENABLE_DEBUG
+#define DEBUG_SFN(X) \
+ do {\
+ X; \
+ } while (0)
+#else
+#define DEBUG_SFN(X)
+#endif
+
+namespace r600 {
+
+using namespace std;
+
+
+ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
+ r600_pipe_shader_selector& sel,
+ r600_shader &sh_info, int scratch_size,
+ enum chip_class chip_class,
+ int atomic_base):
+ m_processor_type(ptype),
+ m_nesting_depth(0),
+ m_block_number(0),
+ m_export_output(0, -1),
+ m_sh_info(sh_info),
+ m_chip_class(chip_class),
+ m_tex_instr(*this),
+ m_alu_instr(*this),
+ m_ssbo_instr(*this),
+ m_pending_else(nullptr),
+ m_scratch_size(scratch_size),
+ m_next_hwatomic_loc(0),
+ m_sel(sel),
+ m_atomic_base(atomic_base),
+ m_image_count(0),
+ last_emitted_alu(nullptr)
+{
+ m_sh_info.processor_type = ptype;
+
+}
+
+
+ShaderFromNirProcessor::~ShaderFromNirProcessor()
+{
+}
+
+bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
+{
+ switch (instr->type) {
+ case nir_instr_type_tex: {
+ nir_tex_instr *t = nir_instr_as_tex(instr);
+ if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
+ sh_info().uses_tex_buffers = true;
+ if (t->op == nir_texop_txs &&
+ t->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
+ t->is_array)
+ sh_info().has_txq_cube_array_z_comp = true;
+ break;
+ }
+ case nir_instr_type_intrinsic: {
+ auto *i = nir_instr_as_intrinsic(instr);
+ switch (i->intrinsic) {
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_image_atomic_imin:
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_image_atomic_imax:
+ case nir_intrinsic_ssbo_atomic_umin:
+ case nir_intrinsic_image_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_umax:
+ case nir_intrinsic_image_atomic_umax:
+ case nir_intrinsic_ssbo_atomic_xor:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_comp_swap:
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ m_sel.info.writes_memory = 1;
+ FALLTHROUGH;
+ case nir_intrinsic_image_load:
+ m_ssbo_instr.set_require_rat_return_address();
+ break;
+ case nir_intrinsic_image_size: {
+ if (nir_intrinsic_image_dim(i) == GLSL_SAMPLER_DIM_CUBE &&
+ nir_intrinsic_image_array(i) && nir_dest_num_components(i->dest) > 2)
+ sh_info().has_txq_cube_array_z_comp = true;
+ }
+
+
+
+ default:
+ ;
+ }
+
+
+ }
+ default:
+ ;
+ }
+
+ return scan_sysvalue_access(instr);
+}
+
+enum chip_class ShaderFromNirProcessor::get_chip_class(void) const
+{
+ return m_chip_class;
+}
+
+bool ShaderFromNirProcessor::allocate_reserved_registers()
+{
+ bool retval = do_allocate_reserved_registers();
+ m_ssbo_instr.load_rat_return_address();
+ if (sh_info().uses_atomics)
+ m_ssbo_instr.load_atomic_inc_limits();
+ m_ssbo_instr.set_ssbo_offset(m_image_count);
+ return retval;
+}
+
+static void remap_shader_info(r600_shader& sh_info,
+ std::vector<rename_reg_pair>& map,
+ UNUSED ValueMap& values)
+{
+ for (unsigned i = 0; i < sh_info.num_arrays; ++i) {
+ auto new_index = map[sh_info.arrays[i].gpr_start];
+ if (new_index.valid)
+ sh_info.arrays[i].gpr_start = new_index.new_reg;
+ map[sh_info.arrays[i].gpr_start].used = true;
+ }
+
+ for (unsigned i = 0; i < sh_info.ninput; ++i) {
+ sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
+ << " of map.size()\n";
+
+ assert(sh_info.input[i].gpr < map.size());
+ auto new_index = map[sh_info.input[i].gpr];
+ if (new_index.valid)
+ sh_info.input[i].gpr = new_index.new_reg;
+ map[sh_info.input[i].gpr].used = true;
+ }
+
+ for (unsigned i = 0; i < sh_info.noutput; ++i) {
+ assert(sh_info.output[i].gpr < map.size());
+ auto new_index = map[sh_info.output[i].gpr];
+ if (new_index.valid)
+ sh_info.output[i].gpr = new_index.new_reg;
+ map[sh_info.output[i].gpr].used = true;
+ }
+}
+
+void ShaderFromNirProcessor::remap_registers()
+{
+ // register renumbering
+ auto rc = register_count();
+ if (!rc)
+ return;
+
+ std::vector<register_live_range> register_live_ranges(rc);
+
+ auto temp_register_map = get_temp_registers();
+
+ Shader sh{m_output, temp_register_map};
+ LiverangeEvaluator().run(sh, register_live_ranges);
+ auto register_map = get_temp_registers_remapping(register_live_ranges);
+
+ sfn_log << SfnLog::merge << "=========Mapping===========\n";
+ for (size_t i = 0; i < register_map.size(); ++i)
+ if (register_map[i].valid)
+ sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
+
+ ValueRemapper vmap0(register_map, temp_register_map);
+ for (auto& block: m_output)
+ block.remap_registers(vmap0);
+
+ remap_shader_info(m_sh_info, register_map, temp_register_map);
+
+ /* Mark inputs as used registers, these registers should no be remapped */
+ for (auto& v: sh.m_temp) {
+ if (v.second->type() == Value::gpr) {
+ const auto& g = static_cast<const GPRValue&>(*v.second);
+ if (g.is_input())
+ register_map[g.sel()].used = true;
+ }
+ }
+
+ int new_index = 0;
+ for (auto& i : register_map) {
+ i.valid = i.used;
+ if (i.used)
+ i.new_reg = new_index++;
+ }
+
+ ValueRemapper vmap1(register_map, temp_register_map);
+ for (auto& ir: m_output)
+ ir.remap_registers(vmap1);
+
+ remap_shader_info(m_sh_info, register_map, temp_register_map);
+}
+
+bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
+{
+ // m_uniform_type_map
+ m_uniform_type_map[uniform->data.location] = uniform->type;
+
+ if (uniform->type->contains_atomic()) {
+ int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
+ sh_info().nhwatomic += natomics;
+
+ if (uniform->type->is_array())
+ sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
+
+ sh_info().uses_atomics = 1;
+
+ struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
+ ++sh_info().nhwatomic_ranges;
+ atom.buffer_id = uniform->data.binding;
+ atom.hw_idx = m_atomic_base + m_next_hwatomic_loc;
+
+ atom.start = uniform->data.offset >> 2;
+ atom.end = atom.start + natomics - 1;
+
+ if (m_atomic_base_map.find(uniform->data.binding) ==
+ m_atomic_base_map.end())
+ m_atomic_base_map[uniform->data.binding] = m_next_hwatomic_loc;
+
+ m_next_hwatomic_loc += natomics;
+
+ m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end - atom.start + 1;
+
+ sfn_log << SfnLog::io << "HW_ATOMIC file count: "
+ << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
+ }
+
+ auto type = uniform->type->is_array() ? uniform->type->without_array(): uniform->type;
+ if (type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
+ sh_info().uses_images = 1;
+ if (uniform->type->is_array() && ! (uniform->data.mode == nir_var_mem_ssbo))
+ sh_info().indirect_files |= 1 << TGSI_FILE_IMAGE;
+ }
+
+ if (uniform->type->is_image()) {
+ ++m_image_count;
+ }
+
+ return true;
+}
+
+bool ShaderFromNirProcessor::scan_inputs_read(const nir_shader *sh)
+{
+ return true;
+}
+
+void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
+{
+ auto& dest = instr->dest;
+ unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
+ assert(util_bitcount(instr->modes) == 1);
+ m_var_mode[instr->var] = instr->modes;
+ m_var_derefs[index] = instr->var;
+
+ sfn_log << SfnLog::io << "Add var deref:" << index
+ << " with DDL:" << instr->var->data.driver_location << "\n";
+}
+
+void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
+{
+ switch (io.name) {
+ case TGSI_SEMANTIC_POSITION:
+ case TGSI_SEMANTIC_PSIZE:
+ case TGSI_SEMANTIC_EDGEFLAG:
+ case TGSI_SEMANTIC_FACE:
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ case TGSI_SEMANTIC_CLIPVERTEX:
+ io.spi_sid = 0;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ case TGSI_SEMANTIC_TEXCOORD:
+ case TGSI_SEMANTIC_PCOORD:
+ io.spi_sid = io.sid + 1;
+ break;
+ default:
+ /* For non-generic params - pack name and sid into 8 bits */
+ io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
+ }
+}
+
+const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
+{
+ unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
+
+ sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
+
+ auto v = m_var_derefs.find(index);
+ if (v != m_var_derefs.end())
+ return v->second;
+
+ fprintf(stderr, "R600: could not find deref with index %d\n", index);
+
+ return nullptr;
+
+ /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
+ return nir_deref_instr_get_variable(deref); */
+}
+
+bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
+{
+ return m_tex_instr.emit(instr);
+}
+
+void ShaderFromNirProcessor::emit_instruction(AluInstruction *ir)
+{
+ if (last_emitted_alu && !last_emitted_alu->flag(alu_last_instr)) {
+ for (unsigned i = 0; i < ir->n_sources(); ++i) {
+ auto& s = ir->src(i);
+ if (s.type() == Value::kconst) {
+ auto& c = static_cast<UniformValue&>(s);
+ if (c.addr()) {
+ last_emitted_alu->set_flag(alu_last_instr);
+ break;
+ }
+ }
+ }
+ }
+ last_emitted_alu = ir;
+ emit_instruction_internal(ir);
+}
+
+
+void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
+{
+
+ emit_instruction_internal(ir);
+ last_emitted_alu = nullptr;
+}
+
+void ShaderFromNirProcessor::emit_instruction_internal(Instruction *ir)
+{
+ if (m_pending_else) {
+ append_block(-1);
+ m_output.back().emit(PInstruction(m_pending_else));
+ append_block(1);
+ m_pending_else = nullptr;
+ }
+
+ r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n";
+ if (m_output.empty())
+ append_block(0);
+
+ m_output.back().emit(Instruction::Pointer(ir));
+}
+
+void ShaderFromNirProcessor::emit_shader_start()
+{
+ /* placeholder, may become an abstract method */
+ m_ssbo_instr.set_ssbo_offset(m_image_count);
+}
+
+bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
+{
+ switch (instr->type) {
+ case nir_jump_break: {
+ auto b = new LoopBreakInstruction();
+ emit_instruction(b);
+ return true;
+ }
+ case nir_jump_continue: {
+ auto b = new LoopContInstruction();
+ emit_instruction(b);
+ return true;
+ }
+ default: {
+ nir_instr *i = reinterpret_cast<nir_instr*>(instr);
+ sfn_log << SfnLog::err << "Jump instrunction " << *i << " not supported\n";
+ return false;
+ }
+ }
+ return true;
+}
+
+bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
+{
+ return m_alu_instr.emit(instr);
+}
+
+bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
+{
+ return false;
+}
+
+bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
+{
+ LoopBeginInstruction *loop = new LoopBeginInstruction();
+ emit_instruction(loop);
+ m_loop_begin_block_map[loop_id] = loop;
+ append_block(1);
+ return true;
+}
+bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
+{
+ auto start = m_loop_begin_block_map.find(loop_id);
+ if (start == m_loop_begin_block_map.end()) {
+ sfn_log << SfnLog::err << "End loop: Loop start for "
+ << loop_id << " not found\n";
+ return false;
+ }
+ m_nesting_depth--;
+ m_block_number++;
+ m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number));
+ LoopEndInstruction *loop = new LoopEndInstruction(start->second);
+ emit_instruction(loop);
+
+ m_loop_begin_block_map.erase(start);
+ return true;
+}
+
+bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
+{
+
+ auto value = from_nir(if_stmt->condition, 0, 0);
+ AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
+ value, Value::zero, EmitInstruction::last);
+ pred->set_flag(alu_update_exec);
+ pred->set_flag(alu_update_pred);
+ pred->set_cf_type(cf_alu_push_before);
+
+ append_block(1);
+
+ IfInstruction *ir = new IfInstruction(pred);
+ emit_instruction(ir);
+ assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
+ m_if_block_start_map[if_id] = ir;
+ return true;
+}
+
+bool ShaderFromNirProcessor::emit_else_start(int if_id)
+{
+ auto iif = m_if_block_start_map.find(if_id);
+ if (iif == m_if_block_start_map.end()) {
+ std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
+ return false;
+ }
+
+ if (iif->second->type() != Instruction::cond_if) {
+ std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
+ return false;
+ }
+ IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
+ ElseInstruction *ir = new ElseInstruction(if_instr);
+ m_if_block_start_map[if_id] = ir;
+ m_pending_else = ir;
+
+ return true;
+}
+
+bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
+{
+ auto ifelse = m_if_block_start_map.find(if_id);
+ if (ifelse == m_if_block_start_map.end()) {
+ std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
+ return false;
+ }
+
+ if (ifelse->second->type() != Instruction::cond_if &&
+ ifelse->second->type() != Instruction::cond_else) {
+ std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
+ return false;
+ }
+ /* Clear pending else, if the else branch was empty, non will be emitted */
+
+ m_pending_else = nullptr;
+
+ append_block(-1);
+ IfElseEndInstruction *ir = new IfElseEndInstruction();
+ emit_instruction(ir);
+
+ return true;
+}
+
+bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
+{
+ PValue src = get_temp_register();
+ emit_instruction(new AluInstruction(op1_mov, src, Value::zero, {alu_write, alu_last_instr}));
+
+ GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
+ emit_instruction(new FetchTCSIOParam(dest, src, offset));
+
+ return true;
+
+}
+
+bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr)
+{
+ auto address = varvec_from_nir(instr->src[0], instr->num_components);
+ auto dest_value = varvec_from_nir(instr->dest, instr->num_components);
+
+ emit_instruction(new LDSReadInstruction(address, dest_value));
+ return true;
+}
+
+static unsigned
+lds_op_from_intrinsic(nir_intrinsic_op op) {
+ switch (op) {
+ case nir_intrinsic_shared_atomic_add:
+ return LDS_OP2_LDS_ADD_RET;
+ case nir_intrinsic_shared_atomic_and:
+ return LDS_OP2_LDS_AND_RET;
+ case nir_intrinsic_shared_atomic_or:
+ return LDS_OP2_LDS_OR_RET;
+ case nir_intrinsic_shared_atomic_imax:
+ return LDS_OP2_LDS_MAX_INT_RET;
+ case nir_intrinsic_shared_atomic_umax:
+ return LDS_OP2_LDS_MAX_UINT_RET;
+ case nir_intrinsic_shared_atomic_imin:
+ return LDS_OP2_LDS_MIN_INT_RET;
+ case nir_intrinsic_shared_atomic_umin:
+ return LDS_OP2_LDS_MIN_UINT_RET;
+ case nir_intrinsic_shared_atomic_xor:
+ return LDS_OP2_LDS_XOR_RET;
+ case nir_intrinsic_shared_atomic_exchange:
+ return LDS_OP2_LDS_XCHG_RET;
+ case nir_intrinsic_shared_atomic_comp_swap:
+ return LDS_OP3_LDS_CMP_XCHG_RET;
+ default:
+ unreachable("Unsupported shared atomic opcode");
+ }
+}
+
+bool ShaderFromNirProcessor::emit_atomic_local_shared(nir_intrinsic_instr* instr)
+{
+ auto address = from_nir(instr->src[0], 0);
+ auto dest_value = from_nir(instr->dest, 0);
+ auto value = from_nir(instr->src[1], 0);
+ auto op = lds_op_from_intrinsic(instr->intrinsic);
+
+ if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap)) {
+ auto value2 = from_nir(instr->src[2], 0);
+ emit_instruction(new LDSAtomicInstruction(dest_value, value, value2, address, op));
+ } else {
+ emit_instruction(new LDSAtomicInstruction(dest_value, value, address, op));
+ }
+ return true;
+}
+
+
+bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr)
+{
+ unsigned write_mask = nir_intrinsic_write_mask(instr);
+
+ auto address = from_nir(instr->src[1], 0);
+ int swizzle_base = (write_mask & 0x3) ? 0 : 2;
+ write_mask |= write_mask >> 2;
+
+ auto value = from_nir(instr->src[0], swizzle_base);
+ if (!(write_mask & 2)) {
+ emit_instruction(new LDSWriteInstruction(address, 0, value));
+ } else {
+ auto value1 = from_nir(instr->src[0], swizzle_base + 1);
+ emit_instruction(new LDSWriteInstruction(address, 0, value, value1));
+ }
+
+ return true;
+}
+
+bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
+{
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "' (" << __func__ << ")\n";
+
+ if (emit_intrinsic_instruction_override(instr))
+ return true;
+
+ if (m_ssbo_instr.emit(&instr->instr)) {
+ m_sel.info.writes_memory = true;
+ return true;
+ }
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_deref: {
+ auto var = get_deref_location(instr->src[0]);
+ if (!var)
+ return false;
+ auto mode_helper = m_var_mode.find(var);
+ if (mode_helper == m_var_mode.end()) {
+ cerr << "r600-nir: variable '" << var->name << "' not found\n";
+ return false;
+ }
+ switch (mode_helper->second) {
+ case nir_var_function_temp:
+ return emit_load_function_temp(var, instr);
+ default:
+ cerr << "r600-nir: Unsupported mode" << mode_helper->second
+ << "for src variable\n";
+ return false;
+ }
+ }
+ case nir_intrinsic_store_scratch:
+ return emit_store_scratch(instr);
+ case nir_intrinsic_load_scratch:
+ return emit_load_scratch(instr);
+ case nir_intrinsic_load_uniform:
+ return load_uniform(instr);
+ case nir_intrinsic_discard:
+ case nir_intrinsic_discard_if:
+ return emit_discard_if(instr);
+ case nir_intrinsic_load_ubo_vec4:
+ return emit_load_ubo_vec4(instr);
+ case nir_intrinsic_load_tcs_in_param_base_r600:
+ return emit_load_tcs_param_base(instr, 0);
+ case nir_intrinsic_load_tcs_out_param_base_r600:
+ return emit_load_tcs_param_base(instr, 16);
+ case nir_intrinsic_load_local_shared_r600:
+ case nir_intrinsic_load_shared:
+ return emit_load_local_shared(instr);
+ case nir_intrinsic_store_local_shared_r600:
+ case nir_intrinsic_store_shared:
+ return emit_store_local_shared(instr);
+ case nir_intrinsic_control_barrier:
+ case nir_intrinsic_memory_barrier_tcs_patch:
+ case nir_intrinsic_memory_barrier_shared:
+ case nir_intrinsic_memory_barrier_buffer:
+ case nir_intrinsic_memory_barrier:
+ case nir_intrinsic_memory_barrier_image:
+ case nir_intrinsic_group_memory_barrier:
+ return emit_barrier(instr);
+ case nir_intrinsic_memory_barrier_atomic_counter:
+ return true;
+ case nir_intrinsic_shared_atomic_add:
+ case nir_intrinsic_shared_atomic_and:
+ case nir_intrinsic_shared_atomic_or:
+ case nir_intrinsic_shared_atomic_imax:
+ case nir_intrinsic_shared_atomic_umax:
+ case nir_intrinsic_shared_atomic_imin:
+ case nir_intrinsic_shared_atomic_umin:
+ case nir_intrinsic_shared_atomic_xor:
+ case nir_intrinsic_shared_atomic_exchange:
+ case nir_intrinsic_shared_atomic_comp_swap:
+ return emit_atomic_local_shared(instr);
+ case nir_intrinsic_shader_clock:
+ return emit_shader_clock(instr);
+ case nir_intrinsic_copy_deref:
+ case nir_intrinsic_load_constant:
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_store_output:
+
+ default:
+ fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
+ return false;
+ }
+ return false;
+}
+
+bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
+{
+ return false;
+}
+
+bool
+ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
+{
+ return false;
+}
+
+bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr* instr)
+{
+ AluInstruction *ir = new AluInstruction(op0_group_barrier);
+ ir->set_flag(alu_last_instr);
+ emit_instruction(ir);
+ return true;
+}
+
+
+bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
+{
+ if (!dest.is_ssa) {
+ auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
+ if (as_last)
+ ir->set_flag(alu_last_instr);
+ emit_instruction(ir);
+ } else {
+ inject_register(dest.ssa.index, chan, value, true);
+ }
+ return true;
+}
+
+bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
+{
+ PValue address = from_nir(instr->src[1], 0, 0);
+
+ auto value = vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
+ swizzle_from_comps(instr->num_components));
+
+ int writemask = nir_intrinsic_write_mask(instr);
+ int align = nir_intrinsic_align_mul(instr);
+ int align_offset = nir_intrinsic_align_offset(instr);
+
+ WriteScratchInstruction *ir = nullptr;
+ if (address->type() == Value::literal) {
+ const auto& lv = static_cast<const LiteralValue&>(*address);
+ ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
+ } else {
+ address = from_nir_with_fetch_constant(instr->src[1], 0);
+ ir = new WriteScratchInstruction(address, value, align, align_offset,
+ writemask, m_scratch_size);
+ }
+ emit_instruction(ir);
+ sh_info().needs_scratch_space = 1;
+ return true;
+}
+
+bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
+{
+ PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
+ std::array<PValue, 4> dst_val;
+ for (int i = 0; i < 4; ++i)
+ dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
+
+ GPRVector dst(dst_val);
+ auto ir = new LoadFromScratch(dst, address, m_scratch_size);
+ ir->prelude_append(new WaitAck(0));
+ emit_instruction(ir);
+ sh_info().needs_scratch_space = 1;
+ return true;
+}
+
+bool ShaderFromNirProcessor::emit_shader_clock(nir_intrinsic_instr* instr)
+{
+ emit_instruction(new AluInstruction(op1_mov, from_nir(instr->dest, 0),
+ PValue(new InlineConstValue(ALU_SRC_TIME_LO, 0)), EmitInstruction::write));
+ emit_instruction(new AluInstruction(op1_mov, from_nir(instr->dest, 1),
+ PValue(new InlineConstValue(ALU_SRC_TIME_HI, 0)), EmitInstruction::last_write));
+ return true;
+}
+
+GPRVector ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
+ unsigned mask,
+ const GPRVector::Swizzle& swizzle,
+ bool match)
+{
+ bool use_same = true;
+ GPRVector::Values v;
+
+ std::array<bool,4> used_swizzles = {false, false, false, false};
+
+ /* Check whether all sources come from a GPR, and,
+ * if requested, whether they are swizzled as expected */
+
+ for (int i = 0; i < 4 && use_same; ++i) {
+ if ((1 << i) & mask) {
+ if (swizzle[i] < 4) {
+ v[i] = from_nir(src, swizzle[i]);
+ assert(v[i]);
+ use_same &= (v[i]->type() == Value::gpr);
+ if (match) {
+ use_same &= (v[i]->chan() == swizzle[i]);
+ }
+ used_swizzles[v[i]->chan()] = true;
+ }
+ }
+ }
+
+
+ /* Now check whether all inputs come from the same GPR, and fill
+ * empty slots in the vector with unused swizzles, bail out if
+ * the sources are not from the same GPR
+ */
+
+ if (use_same) {
+ int next_free_swizzle = 0;
+ while (used_swizzles[next_free_swizzle] && next_free_swizzle < 4)
+ next_free_swizzle++;
+
+ /* Find the first GPR index used */
+ int i = 0;
+ while (!v[i] && i < 4) ++i;
+ assert(i < 4);
+ unsigned sel = v[i]->sel();
+
+
+ for (i = 0; i < 4 && use_same; ++i) {
+ if (!v[i]) {
+ if (swizzle[i] >= 4)
+ v[i] = PValue(new GPRValue(sel, swizzle[i]));
+ else {
+ assert(next_free_swizzle < 4);
+ v[i] = PValue(new GPRValue(sel, next_free_swizzle));
+ used_swizzles[next_free_swizzle] = true;
+ while (next_free_swizzle < 4 && used_swizzles[next_free_swizzle])
+ next_free_swizzle++;
+ }
+ }
+ else
+ use_same &= v[i]->sel() == sel;
+ }
+ }
+
+ /* We can't re-use the source data because they either need re-swizzling, or
+ * they didn't come all from a GPR or the same GPR, so copy to a new vector
+ */
+ if (!use_same) {
+ AluInstruction *ir = nullptr;
+ GPRVector result = get_temp_vec4(swizzle);
+ for (int i = 0; i < 4; ++i) {
+ if (swizzle[i] < 4 && (mask & (1 << i))) {
+ ir = new AluInstruction(op1_mov, result[i], from_nir(src, swizzle[i]),
+ EmitInstruction::write);
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return result;
+ } else
+ return GPRVector(v);;
+}
+
+bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
+{
+ auto bufid = nir_src_as_const_value(instr->src[0]);
+ auto buf_offset = nir_src_as_const_value(instr->src[1]);
+
+ if (!buf_offset) {
+ /* TODO: if buf_offset is constant then this can also be solved by using the CF indes
+ * on the ALU block, and this would probably make sense when there are more then one
+ * loads with the same buffer ID. */
+
+ PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
+ GPRVector trgt;
+ std::array<int, 4> swz = {7,7,7,7};
+ for (unsigned i = 0; i < 4; ++i) {
+ if (i < nir_dest_num_components(instr->dest)) {
+ trgt.set_reg_i(i, from_nir(instr->dest, i));
+ swz[i] = i + nir_intrinsic_component(instr);
+ } else {
+ trgt.set_reg_i(i, from_nir(instr->dest, 7));
+ }
+ }
+
+ FetchInstruction *ir;
+ if (bufid) {
+ ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
+ 1 + bufid->u32, nullptr, bim_none);
+ } else {
+ PValue bufid = from_nir(instr->src[0], 0, 0);
+ ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
+ 1, bufid, bim_zero);
+ }
+ ir->set_dest_swizzle(swz);
+ emit_instruction(ir);
+ m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
+ return true;
+ }
+
+
+ if (bufid) {
+ int buf_cmp = nir_intrinsic_component(instr);
+ AluInstruction *ir = nullptr;
+ for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+ int cmp = buf_cmp + i;
+ assert(cmp < 4);
+ auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, bufid->u32 + 1));
+ if (instr->dest.is_ssa)
+ load_preloaded_value(instr->dest, i, u);
+ else {
+ ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return true;
+
+ } else {
+ int buf_cmp = nir_intrinsic_component(instr);
+ AluInstruction *ir = nullptr;
+ auto kc_id = from_nir(instr->src[0], 0);
+ for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+ int cmp = buf_cmp + i;
+ auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, kc_id));
+ if (instr->dest.is_ssa)
+ load_preloaded_value(instr->dest, i, u);
+ else {
+ ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ return true;
+ }
+}
+
+bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
+{
+ r600::sfn_log << SfnLog::instr << "emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "' (" << __func__ << ")\n";
+
+ if (instr->intrinsic == nir_intrinsic_discard_if) {
+ emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
+ {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
+
+ } else {
+ emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
+ {Value::zero, Value::zero}, {alu_last_instr}));
+ }
+ m_sh_info.uses_kill = 1;
+ return true;
+}
+
+bool ShaderFromNirProcessor::load_uniform(nir_intrinsic_instr* instr)
+{
+ r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "'\n";
+
+
+ /* If the target register is a SSA register and the loading is not
+ * indirect then we can do lazy loading, i.e. the uniform value can
+ * be used directly. Otherwise we have to load the data for real
+ * rigt away.
+ */
+ auto literal = nir_src_as_const_value(instr->src[0]);
+ int base = nir_intrinsic_base(instr);
+
+ if (literal) {
+ AluInstruction *ir = nullptr;
+ for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+ PValue u = PValue(new UniformValue(512 + literal->u32 + base, i));
+ sfn_log << SfnLog::io << "uniform "
+ << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
+
+ if (instr->dest.is_ssa)
+ load_preloaded_value(instr->dest, i, u);
+ else {
+ ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
+ u, {alu_write});
+ emit_instruction(ir);
+ }
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ } else {
+ PValue addr = from_nir(instr->src[0], 0, 0);
+ return load_uniform_indirect(instr, addr, 16 * base, 0);
+ }
+ return true;
+}
+
+bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
+{
+ if (!addr) {
+ std::cerr << "r600-nir: don't know how uniform is addressed\n";
+ return false;
+ }
+
+ GPRVector trgt;
+ std::array<int, 4> swz = {7,7,7,7};
+ for (int i = 0; i < 4; ++i) {
+ trgt.set_reg_i(i, from_nir(instr->dest, i));
+ swz[i] = i;
+ }
+
+ if (addr->type() != Value::gpr) {
+ emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
+ addr = trgt.reg_i(0);
+ }
+
+ auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
+ bufferid, PValue(), bim_none);
+ ir->set_dest_swizzle(swz);
+ emit_instruction(ir);
+ m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
+ return true;
+}
+
+AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
+{
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < literal->def.num_components ; ++i) {
+ if (writemask & (1 << i)){
+ PValue lsrc;
+ switch (literal->def.bit_size) {
+
+ case 1:
+ sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
+ lsrc = literal->value[i].b ?
+ PValue(new LiteralValue( 0xffffffff, i)) :
+ Value::zero;
+ break;
+ case 32:
+ sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
+ if (literal->value[i].u32 == 0)
+ lsrc = Value::zero;
+ else if (literal->value[i].u32 == 1)
+ lsrc = Value::one_i;
+ else if (literal->value[i].f32 == 1.0f)
+ lsrc = Value::one_f;
+ else if (literal->value[i].f32 == 0.5f)
+ lsrc = Value::zero_dot_5;
+ else
+ lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
+ break;
+ default:
+ sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
+ << " falling back to 32 bit\n";
+ lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
+ }
+ ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
+
+ emit_instruction(ir);
+ }
+ }
+ return ir;
+}
+
+PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel)
+{
+ PValue value = from_nir(src, component);
+ if (value->type() != Value::gpr &&
+ value->type() != Value::gpr_vector &&
+ value->type() != Value::gpr_array_value) {
+ PValue retval = get_temp_register(channel);
+ emit_instruction(new AluInstruction(op1_mov, retval, value,
+ EmitInstruction::last_write));
+ value = retval;
+ }
+ return value;
+}
+
+bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
+{
+ r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
+ << *reinterpret_cast<nir_instr*>(instr)
+ << "'\n";
+
+ /* Give the specific shader type a chance to process this, i.e. Geometry and
+ * tesselation shaders need specialized deref_array, for the other shaders
+ * it is lowered.
+ */
+ if (emit_deref_instruction_override(instr))
+ return true;
+
+ switch (instr->deref_type) {
+ case nir_deref_type_var:
+ set_var_address(instr);
+ return true;
+ case nir_deref_type_array:
+ case nir_deref_type_array_wildcard:
+ case nir_deref_type_struct:
+ case nir_deref_type_cast:
+ default:
+ fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
+ }
+ return false;
+}
+
+bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
+ std::vector<PValue> srcs,
+ const std::set<AluModifiers>& m_flags)
+{
+ AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
+ emit_instruction(ir);
+ return true;
+}
+
+void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
+{
+ m_output_register_map[loc] = gpr;
+}
+
+void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
+{
+ r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n";
+ m_export_output.emit(PInstruction(ir));
+}
+
+const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
+{
+ const GPRVector *retval = nullptr;
+ auto val = m_output_register_map.find(location);
+ if (val != m_output_register_map.end())
+ retval = val->second;
+ return retval;
+}
+
+void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
+{
+ r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var << "\n";
+ m_inputs[pos] = var;
+}
+
+void ShaderFromNirProcessor::set_output(unsigned pos, int sel)
+{
+ r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << sel << "\n";
+ m_outputs[pos] = sel;
+}
+
+void ShaderFromNirProcessor::append_block(int nesting_change)
+{
+ m_nesting_depth += nesting_change;
+ m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++));
+}
+
+void ShaderFromNirProcessor::get_array_info(r600_shader& shader) const
+{
+ shader.num_arrays = m_reg_arrays.size();
+ if (shader.num_arrays) {
+ shader.arrays = (r600_shader_array *)calloc(shader.num_arrays, sizeof(r600_shader_array));
+ for (unsigned i = 0; i < shader.num_arrays; ++i) {
+ shader.arrays[i].comp_mask = m_reg_arrays[i]->mask();
+ shader.arrays[i].gpr_start = m_reg_arrays[i]->sel();
+ shader.arrays[i].gpr_count = m_reg_arrays[i]->size();
+ }
+ shader.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
+ }
+}
+
+void ShaderFromNirProcessor::finalize()
+{
+ do_finalize();
+
+ for (auto& i : m_inputs)
+ m_sh_info.input[i.first].gpr = i.second->sel();
+
+ for (auto& i : m_outputs)
+ m_sh_info.output[i.first].gpr = i.second;
+
+ m_output.push_back(m_export_output);
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.h
new file mode 100644
index 000000000..a48674dab
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.h
@@ -0,0 +1,224 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef sfn_shader_from_nir_h
+#define sfn_shader_from_nir_h
+
+
+#include "gallium/drivers/r600/r600_shader.h"
+
+#include "compiler/nir/nir.h"
+#include "compiler/nir_types.h"
+
+#include "sfn_instruction_block.h"
+#include "sfn_instruction_export.h"
+#include "sfn_alu_defines.h"
+#include "sfn_valuepool.h"
+#include "sfn_debug.h"
+#include "sfn_instruction_cf.h"
+#include "sfn_emittexinstruction.h"
+#include "sfn_emitaluinstruction.h"
+#include "sfn_emitssboinstruction.h"
+
+#include <vector>
+#include <set>
+#include <stack>
+#include <unordered_map>
+
+struct nir_instr;
+
+namespace r600 {
+
+extern SfnLog sfn_log;
+
+class ShaderFromNirProcessor : public ValuePool {
+public:
+ ShaderFromNirProcessor(pipe_shader_type ptype, r600_pipe_shader_selector& sel,
+ r600_shader& sh_info, int scratch_size, enum chip_class _chip_class,
+ int atomic_base);
+ virtual ~ShaderFromNirProcessor();
+
+ void emit_instruction(Instruction *ir);
+
+ PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel = -1);
+ GPRVector vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
+ const GPRVector::Swizzle& swizzle, bool match = false);
+
+ bool emit_instruction(EAluOp opcode, PValue dest,
+ std::vector<PValue> src0,
+ const std::set<AluModifiers>& m_flags);
+ void emit_export_instruction(WriteoutInstruction *ir);
+ void emit_instruction(AluInstruction *ir);
+
+ void split_constants(nir_alu_instr* instr);
+ void remap_registers();
+
+ const nir_variable *get_deref_location(const nir_src& src) const;
+
+ r600_shader& sh_info() {return m_sh_info;}
+ void add_param_output_reg(int loc, const GPRVector *gpr);
+ void set_output(unsigned pos, int sel);
+ const GPRVector *output_register(unsigned location) const;
+ void evaluate_spi_sid(r600_shader_io &io);
+
+ enum chip_class get_chip_class() const;
+
+ int remap_atomic_base(int base) {
+ return m_atomic_base_map[base];
+ }
+
+ void get_array_info(r600_shader& shader) const;
+
+ virtual bool scan_inputs_read(const nir_shader *sh);
+
+protected:
+
+ void set_var_address(nir_deref_instr *instr);
+ void set_input(unsigned pos, PValue var);
+
+ bool scan_instruction(nir_instr *instr);
+
+ virtual bool scan_sysvalue_access(nir_instr *instr) = 0;
+
+ bool emit_if_start(int if_id, nir_if *if_stmt);
+ bool emit_else_start(int if_id);
+ bool emit_ifelse_end(int if_id);
+
+ bool emit_loop_start(int loop_id);
+ bool emit_loop_end(int loop_id);
+ bool emit_jump_instruction(nir_jump_instr *instr);
+
+ bool emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset);
+ bool emit_load_local_shared(nir_intrinsic_instr* instr);
+ bool emit_store_local_shared(nir_intrinsic_instr* instr);
+ bool emit_atomic_local_shared(nir_intrinsic_instr* instr);
+
+ bool emit_barrier(nir_intrinsic_instr* instr);
+
+ bool load_preloaded_value(const nir_dest& dest, int chan, PValue value,
+ bool as_last = true);
+
+ void inc_atomic_file_count();
+
+ enum ESlots {
+ es_face,
+ es_instanceid,
+ es_invocation_id,
+ es_patch_id,
+ es_pos,
+ es_rel_patch_id,
+ es_sample_mask_in,
+ es_sample_id,
+ es_sample_pos,
+ es_tess_factor_base,
+ es_vertexid,
+ es_tess_coord,
+ es_primitive_id,
+ es_helper_invocation,
+ es_last
+ };
+
+ std::bitset<es_last> m_sv_values;
+
+ bool allocate_reserved_registers();
+
+
+private:
+ virtual bool do_allocate_reserved_registers() = 0;
+
+
+ void emit_instruction_internal(Instruction *ir);
+
+ bool emit_alu_instruction(nir_instr *instr);
+ bool emit_deref_instruction(nir_deref_instr* instr);
+ bool emit_intrinsic_instruction(nir_intrinsic_instr* instr);
+ virtual bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr);
+ bool emit_tex_instruction(nir_instr* instr);
+ bool emit_discard_if(nir_intrinsic_instr* instr);
+ bool emit_load_ubo_vec4(nir_intrinsic_instr* instr);
+ bool emit_ssbo_atomic_add(nir_intrinsic_instr* instr);
+ bool load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufid);
+
+ /* Code creating functions */
+ bool emit_load_function_temp(const nir_variable *var, nir_intrinsic_instr *instr);
+ AluInstruction *emit_load_literal(const nir_load_const_instr *literal, const nir_src& src, unsigned writemask);
+
+ bool load_uniform(nir_intrinsic_instr* instr);
+ bool process_uniforms(nir_variable *uniform);
+
+ void append_block(int nesting_change);
+
+ virtual void emit_shader_start();
+ virtual bool emit_deref_instruction_override(nir_deref_instr* instr);
+
+ bool emit_store_scratch(nir_intrinsic_instr* instr);
+ bool emit_load_scratch(nir_intrinsic_instr* instr);
+ bool emit_shader_clock(nir_intrinsic_instr* instr);
+ virtual void do_finalize() = 0;
+
+ void finalize();
+ friend class ShaderFromNir;
+
+ std::set<nir_variable*> m_arrays;
+
+ std::map<unsigned, PValue> m_inputs;
+ std::map<unsigned, int> m_outputs;
+
+ std::map<unsigned, nir_variable*> m_var_derefs;
+ std::map<const nir_variable *, nir_variable_mode> m_var_mode;
+
+ std::map<unsigned, const glsl_type*> m_uniform_type_map;
+ std::map<int, IfElseInstruction *> m_if_block_start_map;
+ std::map<int, LoopBeginInstruction *> m_loop_begin_block_map;
+
+ pipe_shader_type m_processor_type;
+
+ std::vector<InstructionBlock> m_output;
+ unsigned m_nesting_depth;
+ unsigned m_block_number;
+ InstructionBlock m_export_output;
+ r600_shader& m_sh_info;
+ enum chip_class m_chip_class;
+ EmitTexInstruction m_tex_instr;
+ EmitAluInstruction m_alu_instr;
+ EmitSSBOInstruction m_ssbo_instr;
+ OutputRegisterMap m_output_register_map;
+
+ IfElseInstruction *m_pending_else;
+ int m_scratch_size;
+ int m_next_hwatomic_loc;
+
+ r600_pipe_shader_selector& m_sel;
+ int m_atomic_base ;
+ int m_image_count;
+
+ std::unordered_map<int, int> m_atomic_base_map;
+ AluInstruction *last_emitted_alu;
+};
+
+}
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp
new file mode 100644
index 000000000..26ac54981
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp
@@ -0,0 +1,112 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_shader_compute.h"
+#include "sfn_instruction_fetch.h"
+
+namespace r600 {
+
+ComputeShaderFromNir::ComputeShaderFromNir(r600_pipe_shader *sh,
+ r600_pipe_shader_selector& sel,
+ UNUSED const r600_shader_key& key,
+ enum chip_class chip_class):
+ ShaderFromNirProcessor (PIPE_SHADER_COMPUTE, sel, sh->shader,
+ sh->scratch_space_needed, chip_class, 0),
+ m_reserved_registers(0)
+{
+}
+
+bool ComputeShaderFromNir::scan_sysvalue_access(UNUSED nir_instr *instr)
+{
+ return true;
+}
+bool ComputeShaderFromNir::do_allocate_reserved_registers()
+{
+ int thread_id_sel = m_reserved_registers++;
+ int wg_id_sel = m_reserved_registers++;
+
+ for (int i = 0; i < 3; ++i) {
+ auto tmp = new GPRValue(thread_id_sel, i);
+ tmp->set_as_input();
+ tmp->set_keep_alive();
+ m_local_invocation_id[i] = PValue(tmp);
+ inject_register(tmp->sel(), i, m_local_invocation_id[i], false);
+
+ tmp = new GPRValue(wg_id_sel, i);
+ tmp->set_as_input();
+ tmp->set_keep_alive();
+ m_workgroup_id[i] = PValue(tmp);
+ inject_register(tmp->sel(), i, m_workgroup_id[i], false);
+ }
+ return true;
+}
+
+bool ComputeShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
+{
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_local_invocation_id:
+ return emit_load_3vec(instr, m_local_invocation_id);
+ case nir_intrinsic_load_work_group_id:
+ return emit_load_3vec(instr, m_workgroup_id);
+ case nir_intrinsic_load_num_work_groups:
+ return emit_load_num_work_groups(instr);
+ default:
+ return false;
+ }
+}
+
+bool ComputeShaderFromNir::emit_load_3vec(nir_intrinsic_instr* instr,
+ const std::array<PValue,3>& src)
+{
+ for (int i = 0; i < 3; ++i)
+ load_preloaded_value(instr->dest, i, src[i], i == 2);
+ return true;
+}
+
+bool ComputeShaderFromNir::emit_load_num_work_groups(nir_intrinsic_instr* instr)
+{
+ PValue a_zero = get_temp_register(1);
+ emit_instruction(new AluInstruction(op1_mov, a_zero, Value::zero, EmitInstruction::last_write));
+ GPRVector dest;
+ for (int i = 0; i < 3; ++i)
+ dest.set_reg_i(i, from_nir(instr->dest, i));
+ dest.set_reg_i(3, from_nir(instr->dest, 7));
+
+ auto ir = new FetchInstruction(vc_fetch, no_index_offset,
+ fmt_32_32_32_32, vtx_nf_int, vtx_es_none, a_zero, dest, 16,
+ false, 16, R600_BUFFER_INFO_CONST_BUFFER, 0,
+ bim_none, false, false, 0, 0, 0, PValue(), {0,1,2,7});
+ ir->set_flag(vtx_srf_mode);
+ emit_instruction(ir);
+ return true;
+}
+
+void ComputeShaderFromNir::do_finalize()
+{
+
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.h
new file mode 100644
index 000000000..fea6f0122
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.h
@@ -0,0 +1,62 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_COMPUTE_SHADER_FROM_NIR_H
+#define SFN_COMPUTE_SHADER_FROM_NIR_H
+
+#include "sfn_shader_base.h"
+#include "sfn_shaderio.h"
+#include <bitset>
+
+namespace r600 {
+
+class ComputeShaderFromNir : public ShaderFromNirProcessor
+{
+public:
+ ComputeShaderFromNir(r600_pipe_shader *sh,
+ r600_pipe_shader_selector& sel,
+ const r600_shader_key &key,
+ enum chip_class chip_class);
+
+ bool scan_sysvalue_access(nir_instr *instr) override;
+
+private:
+ bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
+
+ bool do_allocate_reserved_registers() override;
+ void do_finalize() override;
+
+ bool emit_load_3vec(nir_intrinsic_instr* instr, const std::array<PValue,3>& src);
+ bool emit_load_num_work_groups(nir_intrinsic_instr* instr);
+
+ int m_reserved_registers;
+ std::array<PValue,3> m_workgroup_id;
+ std::array<PValue,3> m_local_invocation_id;
+};
+
+}
+
+#endif // SFN_COMPUTE_SHADER_FROM_NIR_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp
new file mode 100644
index 000000000..b13cb8a8a
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp
@@ -0,0 +1,1085 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_defines.h"
+#include "tgsi/tgsi_from_mesa.h"
+#include "sfn_shader_fragment.h"
+#include "sfn_instruction_fetch.h"
+
+namespace r600 {
+
+FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader& nir,
+ r600_shader& sh,
+ r600_pipe_shader_selector &sel,
+ const r600_shader_key &key,
+ enum chip_class chip_class):
+ ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh, nir.scratch_size, chip_class, 0),
+ m_max_color_exports(MAX2(key.ps.nr_cbufs,1)),
+ m_max_counted_color_exports(0),
+ m_two_sided_color(key.ps.color_two_side),
+ m_last_pixel_export(nullptr),
+ m_nir(nir),
+ m_reserved_registers(0),
+ m_frag_pos_index(0),
+ m_need_back_color(false),
+ m_front_face_loaded(false),
+ m_depth_exports(0),
+ m_apply_sample_mask(key.ps.apply_sample_id_mask),
+ m_dual_source_blend(key.ps.dual_source_blend),
+ m_pos_input(nullptr)
+{
+ for (auto& i: m_interpolator) {
+ i.enabled = false;
+ i.ij_index= 0;
+ }
+
+ sh_info().rat_base = key.ps.nr_cbufs;
+ sh_info().atomic_base = key.ps.first_atomic_counter;
+}
+
+unsigned barycentric_ij_index(nir_intrinsic_instr *instr)
+{
+ unsigned index = 0;
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_barycentric_sample:
+ index = 0;
+ break;
+ case nir_intrinsic_load_barycentric_at_sample:
+ case nir_intrinsic_load_barycentric_at_offset:
+ case nir_intrinsic_load_barycentric_pixel:
+ index = 1;
+ break;
+ case nir_intrinsic_load_barycentric_centroid:
+ index = 2;
+ break;
+ default:
+ unreachable("Unknown interpolator intrinsic");
+ }
+
+ switch (nir_intrinsic_interp_mode(instr)) {
+ case INTERP_MODE_NONE:
+ case INTERP_MODE_SMOOTH:
+ case INTERP_MODE_COLOR:
+ return index;
+ case INTERP_MODE_NOPERSPECTIVE:
+ return index + 3;
+ case INTERP_MODE_FLAT:
+ case INTERP_MODE_EXPLICIT:
+ default:
+ unreachable("unknown/unsupported mode for load_interpolated");
+ }
+ return 0;
+}
+
+bool FragmentShaderFromNir::process_load_input(nir_intrinsic_instr *instr,
+ bool interpolated)
+{
+ sfn_log << SfnLog::io << "Parse " << instr->instr
+ << "\n";
+
+ auto index = nir_src_as_const_value(instr->src[interpolated ? 1 : 0]);
+ assert(index);
+
+ unsigned location = nir_intrinsic_io_semantics(instr).location + index->u32;
+ auto semantic = r600_get_varying_semantic(location);
+ tgsi_semantic name = (tgsi_semantic)semantic.first;
+ unsigned sid = semantic.second;
+
+
+ if (location == VARYING_SLOT_POS) {
+ m_sv_values.set(es_pos);
+ m_pos_input = new ShaderInputVarying(name, sid, nir_intrinsic_base(instr) + index->u32,
+ nir_intrinsic_component(instr),
+ nir_dest_num_components(instr->dest),
+ TGSI_INTERPOLATE_LINEAR, TGSI_INTERPOLATE_LOC_CENTER);
+ m_shaderio.add_input(m_pos_input);
+ return true;
+ }
+
+ if (location == VARYING_SLOT_FACE) {
+ m_sv_values.set(es_face);
+ return true;
+ }
+
+
+ tgsi_interpolate_mode tgsi_interpolate = TGSI_INTERPOLATE_CONSTANT;
+ tgsi_interpolate_loc tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
+
+ bool uses_interpol_at_centroid = false;
+
+ if (interpolated) {
+
+ glsl_interp_mode mode = INTERP_MODE_NONE;
+ auto parent = nir_instr_as_intrinsic(instr->src[0].ssa->parent_instr);
+ mode = (glsl_interp_mode)nir_intrinsic_interp_mode(parent);
+ switch (parent->intrinsic) {
+ case nir_intrinsic_load_barycentric_sample:
+ tgsi_loc = TGSI_INTERPOLATE_LOC_SAMPLE;
+ break;
+ case nir_intrinsic_load_barycentric_at_sample:
+ case nir_intrinsic_load_barycentric_at_offset:
+ case nir_intrinsic_load_barycentric_pixel:
+ tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
+ break;
+ case nir_intrinsic_load_barycentric_centroid:
+ tgsi_loc = TGSI_INTERPOLATE_LOC_CENTROID;
+ uses_interpol_at_centroid = true;
+ break;
+ default:
+ std::cerr << "Instruction " << nir_intrinsic_infos[parent->intrinsic].name << " as parent of "
+ << nir_intrinsic_infos[instr->intrinsic].name
+ << " interpolator?\n";
+ assert(0);
+ }
+
+ switch (mode) {
+ case INTERP_MODE_NONE:
+ if (name == TGSI_SEMANTIC_COLOR) {
+ tgsi_interpolate = TGSI_INTERPOLATE_COLOR;
+ break;
+ }
+ FALLTHROUGH;
+ case INTERP_MODE_SMOOTH:
+ tgsi_interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
+ break;
+ case INTERP_MODE_NOPERSPECTIVE:
+ tgsi_interpolate = TGSI_INTERPOLATE_LINEAR;
+ break;
+ case INTERP_MODE_FLAT:
+ break;
+ case INTERP_MODE_COLOR:
+ tgsi_interpolate = TGSI_INTERPOLATE_COLOR;
+ break;
+ case INTERP_MODE_EXPLICIT:
+ default:
+ assert(0);
+ }
+
+ m_interpolators_used.set(barycentric_ij_index(parent));
+
+ }
+
+ switch (name) {
+ case TGSI_SEMANTIC_COLOR: {
+ auto input = m_shaderio.find_varying(name, sid);
+ if (!input) {
+ m_shaderio.add_input(new ShaderInputColor(name, sid,
+ nir_intrinsic_base(instr) + index->u32,
+ nir_intrinsic_component(instr),
+ nir_dest_num_components(instr->dest),
+ tgsi_interpolate, tgsi_loc));
+ } else {
+ if (uses_interpol_at_centroid)
+ input->set_uses_interpolate_at_centroid();
+
+ auto varying = static_cast<ShaderInputVarying&>(*input);
+ varying.update_mask(nir_dest_num_components(instr->dest),
+ nir_intrinsic_component(instr));
+ }
+
+ m_need_back_color = m_two_sided_color;
+ return true;
+ }
+ case TGSI_SEMANTIC_PRIMID:
+ sh_info().gs_prim_id_input = true;
+ sh_info().ps_prim_id_input = m_shaderio.inputs().size();
+ FALLTHROUGH;
+ case TGSI_SEMANTIC_FOG:
+ case TGSI_SEMANTIC_GENERIC:
+ case TGSI_SEMANTIC_TEXCOORD:
+ case TGSI_SEMANTIC_LAYER:
+ case TGSI_SEMANTIC_PCOORD:
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ case TGSI_SEMANTIC_CLIPDIST: {
+ auto input = m_shaderio.find_varying(name, sid);
+ if (!input) {
+ m_shaderio.add_input(new ShaderInputVarying(name, sid, nir_intrinsic_base(instr) + index->u32,
+ nir_intrinsic_component(instr),
+ nir_dest_num_components(instr->dest),
+ tgsi_interpolate, tgsi_loc));
+ } else {
+ if (uses_interpol_at_centroid)
+ input->set_uses_interpolate_at_centroid();
+
+ auto varying = static_cast<ShaderInputVarying&>(*input);
+ varying.update_mask(nir_dest_num_components(instr->dest),
+ nir_intrinsic_component(instr));
+ }
+
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+
+
+bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr)
+{
+ switch (instr->type) {
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr);
+
+ switch (ii->intrinsic) {
+ case nir_intrinsic_load_front_face:
+ m_sv_values.set(es_face);
+ break;
+ case nir_intrinsic_load_sample_mask_in:
+ m_sv_values.set(es_sample_mask_in);
+ break;
+ case nir_intrinsic_load_sample_pos:
+ m_sv_values.set(es_sample_pos);
+ FALLTHROUGH;
+ case nir_intrinsic_load_sample_id:
+ m_sv_values.set(es_sample_id);
+ break;
+ case nir_intrinsic_load_helper_invocation:
+ m_sv_values.set(es_helper_invocation);
+ sh_info().uses_helper_invocation = true;
+ break;
+ case nir_intrinsic_load_input:
+ return process_load_input(ii, false);
+ case nir_intrinsic_load_interpolated_input: {
+ return process_load_input(ii, true);
+ }
+ case nir_intrinsic_store_output:
+ return process_store_output(ii);
+
+ default:
+ ;
+ }
+ }
+ default:
+ ;
+ }
+ return true;
+}
+
+bool FragmentShaderFromNir::do_allocate_reserved_registers()
+{
+ assert(!m_reserved_registers);
+
+ int face_reg_index = -1;
+ int sample_id_index = -1;
+ // enabled interpolators based on inputs
+ for (unsigned i = 0; i < s_max_interpolators; ++i) {
+ if (m_interpolators_used.test(i)) {
+ sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n";
+ m_interpolator[i].enabled = true;
+ }
+ }
+
+ // sort the varying inputs
+ m_shaderio.sort_varying_inputs();
+
+ // handle interpolators
+ int num_baryc = 0;
+ for (int i = 0; i < 6; ++i) {
+ if (m_interpolator[i].enabled) {
+ sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n";
+
+ m_interpolator[i].ij_index = num_baryc;
+
+ unsigned sel = num_baryc / 2;
+ unsigned chan = 2 * (num_baryc % 2);
+
+ auto ip_i = new GPRValue(sel, chan + 1);
+ ip_i->set_as_input();
+ m_interpolator[i].i.reset(ip_i);
+ inject_register(sel, chan + 1, m_interpolator[i].i, false);
+
+ auto ip_j = new GPRValue(sel, chan);
+ ip_j->set_as_input();
+ m_interpolator[i].j.reset(ip_j);
+ inject_register(sel, chan, m_interpolator[i].j, false);
+
+ ++num_baryc;
+ }
+ }
+ m_reserved_registers += (num_baryc + 1) >> 1;
+
+ if (m_sv_values.test(es_pos)) {
+ m_frag_pos_index = m_reserved_registers++;
+ assert(m_pos_input);
+ m_pos_input->set_gpr(m_frag_pos_index);
+ }
+
+ // handle system values
+ if (m_sv_values.test(es_face) || m_need_back_color) {
+ face_reg_index = m_reserved_registers++;
+ m_front_face_reg = std::make_shared<GPRValue>(face_reg_index,0);
+ m_front_face_reg->set_as_input();
+ sfn_log << SfnLog::io << "Set front_face register to " << *m_front_face_reg << "\n";
+ inject_register(m_front_face_reg->sel(), m_front_face_reg->chan(), m_front_face_reg, false);
+
+ m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE, face_reg_index));
+ load_front_face();
+ }
+
+ if (m_sv_values.test(es_sample_mask_in)) {
+ if (face_reg_index < 0)
+ face_reg_index = m_reserved_registers++;
+
+ m_sample_mask_reg = std::make_shared<GPRValue>(face_reg_index,2);
+ m_sample_mask_reg->set_as_input();
+ sfn_log << SfnLog::io << "Set sample mask in register to " << *m_sample_mask_reg << "\n";
+ sh_info().nsys_inputs = 1;
+ m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK, face_reg_index));
+ }
+
+ if (m_sv_values.test(es_sample_id) ||
+ m_sv_values.test(es_sample_mask_in)) {
+ if (sample_id_index < 0)
+ sample_id_index = m_reserved_registers++;
+
+ m_sample_id_reg = std::make_shared<GPRValue>(sample_id_index, 3);
+ m_sample_id_reg->set_as_input();
+ sfn_log << SfnLog::io << "Set sample id register to " << *m_sample_id_reg << "\n";
+ sh_info().nsys_inputs++;
+ m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID, sample_id_index));
+ }
+
+ // The back color handling is not emmited in the code, so we have
+ // to add the inputs here and later we also need to inject the code to set
+ // the right color
+ if (m_need_back_color) {
+ size_t ninputs = m_shaderio.inputs().size();
+ for (size_t k = 0; k < ninputs; ++k) {
+ ShaderInput& i = m_shaderio.input(k);
+
+ if (i.name() != TGSI_SEMANTIC_COLOR)
+ continue;
+
+ ShaderInputColor& col = static_cast<ShaderInputColor&>(i);
+
+ size_t next_pos = m_shaderio.size();
+ auto bcol = new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR, col, next_pos);
+ m_shaderio.add_input(bcol);
+ col.set_back_color(next_pos);
+ }
+ m_shaderio.set_two_sided();
+ }
+
+ m_shaderio.update_lds_pos();
+
+ set_reserved_registers(m_reserved_registers);
+
+ return true;
+}
+
+void FragmentShaderFromNir::emit_shader_start()
+{
+ if (m_sv_values.test(es_face))
+ load_front_face();
+
+ if (m_sv_values.test(es_pos)) {
+ for (int i = 0; i < 4; ++i) {
+ auto v = new GPRValue(m_frag_pos_index, i);
+ v->set_as_input();
+ auto reg = PValue(v);
+ if (i == 3)
+ emit_instruction(new AluInstruction(op1_recip_ieee, reg, reg, {alu_write, alu_last_instr}));
+ m_frag_pos[i] = reg;
+ }
+ }
+
+ if (m_sv_values.test(es_helper_invocation)) {
+ m_helper_invocation = get_temp_register();
+ auto dummy = PValue(new GPRValue(m_helper_invocation->sel(), 7));
+ emit_instruction(new AluInstruction(op1_mov, m_helper_invocation, literal(-1), {alu_write, alu_last_instr}));
+ GPRVector dst({dummy, dummy, dummy, dummy});
+ std::array<int,4> swz = {7,7,7,7};
+ dst.set_reg_i(m_helper_invocation->chan(), m_helper_invocation);
+ swz[m_helper_invocation->chan()] = 4;
+
+ auto vtx = new FetchInstruction(dst, m_helper_invocation,
+ R600_BUFFER_INFO_CONST_BUFFER, bim_none);
+ vtx->set_flag(vtx_vpm);
+ vtx->set_flag(vtx_use_tc);
+ vtx->set_dest_swizzle(swz);
+ emit_instruction(vtx);
+ }
+}
+
+bool FragmentShaderFromNir::process_store_output(nir_intrinsic_instr *instr)
+{
+
+ auto semantic = nir_intrinsic_io_semantics(instr);
+ unsigned driver_loc = nir_intrinsic_base(instr);
+
+ if (sh_info().noutput <= driver_loc)
+ sh_info().noutput = driver_loc + 1;
+
+ r600_shader_io& io = sh_info().output[driver_loc];
+ tgsi_get_gl_frag_result_semantic(static_cast<gl_frag_result>(semantic.location),
+ &io.name, &io.sid);
+
+ unsigned component = nir_intrinsic_component(instr);
+ io.write_mask |= nir_intrinsic_write_mask(instr) << component;
+
+ if (semantic.location == FRAG_RESULT_COLOR && !m_dual_source_blend) {
+ sh_info().fs_write_all = true;
+ }
+
+ if (semantic.location == FRAG_RESULT_COLOR ||
+ (semantic.location >= FRAG_RESULT_DATA0 &&
+ semantic.location <= FRAG_RESULT_DATA7)) {
+ ++m_max_counted_color_exports;
+
+ /* Hack: force dual source output handling if one color output has a
+ * dual_source_blend_index > 0 */
+ if (semantic.location == FRAG_RESULT_COLOR &&
+ semantic.dual_source_blend_index > 0)
+ m_dual_source_blend = true;
+
+ if (m_max_counted_color_exports > 1)
+ sh_info().fs_write_all = false;
+ return true;
+ }
+
+ if (semantic.location == FRAG_RESULT_DEPTH ||
+ semantic.location == FRAG_RESULT_STENCIL ||
+ semantic.location == FRAG_RESULT_SAMPLE_MASK) {
+ io.write_mask = 15;
+ return true;
+ }
+
+ return false;
+
+
+}
+
+bool FragmentShaderFromNir::emit_load_sample_mask_in(nir_intrinsic_instr* instr)
+{
+ auto dest = from_nir(instr->dest, 0);
+ assert(m_sample_id_reg);
+ assert(m_sample_mask_reg);
+
+ emit_instruction(new AluInstruction(op2_lshl_int, dest, Value::one_i, m_sample_id_reg, EmitInstruction::last_write));
+ emit_instruction(new AluInstruction(op2_and_int, dest, dest, m_sample_mask_reg, EmitInstruction::last_write));
+ return true;
+}
+
+bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
+{
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_sample_mask_in:
+ if (m_apply_sample_mask) {
+ return emit_load_sample_mask_in(instr);
+ } else
+ return load_preloaded_value(instr->dest, 0, m_sample_mask_reg);
+ case nir_intrinsic_load_sample_id:
+ return load_preloaded_value(instr->dest, 0, m_sample_id_reg);
+ case nir_intrinsic_load_front_face:
+ return load_preloaded_value(instr->dest, 0, m_front_face_reg);
+ case nir_intrinsic_load_sample_pos:
+ return emit_load_sample_pos(instr);
+ case nir_intrinsic_load_helper_invocation:
+ return load_preloaded_value(instr->dest, 0, m_helper_invocation);
+ case nir_intrinsic_load_input:
+ return emit_load_input(instr);
+ case nir_intrinsic_load_barycentric_sample:
+ case nir_intrinsic_load_barycentric_pixel:
+ case nir_intrinsic_load_barycentric_centroid: {
+ unsigned ij = barycentric_ij_index(instr);
+ return load_preloaded_value(instr->dest, 0, m_interpolator[ij].i) &&
+ load_preloaded_value(instr->dest, 1, m_interpolator[ij].j);
+ }
+ case nir_intrinsic_load_barycentric_at_offset:
+ return load_barycentric_at_offset(instr);
+ case nir_intrinsic_load_barycentric_at_sample:
+ return load_barycentric_at_sample(instr);
+
+ case nir_intrinsic_load_interpolated_input: {
+ return emit_load_interpolated_input(instr);
+ }
+ case nir_intrinsic_store_output:
+ return emit_store_output(instr);
+
+ default:
+ return false;
+ }
+}
+
+bool FragmentShaderFromNir::emit_store_output(nir_intrinsic_instr* instr)
+{
+ auto location = nir_intrinsic_io_semantics(instr).location;
+
+ if (location == FRAG_RESULT_COLOR)
+ return emit_export_pixel(instr, m_dual_source_blend ? 1 : m_max_color_exports);
+
+ if ((location >= FRAG_RESULT_DATA0 &&
+ location <= FRAG_RESULT_DATA7) ||
+ location == FRAG_RESULT_DEPTH ||
+ location == FRAG_RESULT_STENCIL ||
+ location == FRAG_RESULT_SAMPLE_MASK)
+ return emit_export_pixel(instr, 1);
+
+ sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_output for " << location << ")\n";
+ return false;
+
+}
+
+bool FragmentShaderFromNir::emit_load_interpolated_input(nir_intrinsic_instr* instr)
+{
+ unsigned loc = nir_intrinsic_io_semantics(instr).location;
+ switch (loc) {
+ case VARYING_SLOT_POS:
+ for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+ load_preloaded_value(instr->dest, i, m_frag_pos[i]);
+ }
+ return true;
+ case VARYING_SLOT_FACE:
+ return load_preloaded_value(instr->dest, 0, m_front_face_reg);
+ default:
+ ;
+ }
+
+ auto param = nir_src_as_const_value(instr->src[1]);
+ assert(param && "Indirect PS inputs not (yet) supported");
+
+ auto& io = m_shaderio.input(param->u32 + nir_intrinsic_base(instr), nir_intrinsic_component(instr));
+ auto dst = nir_intrinsic_component(instr) ? get_temp_vec4() : vec_from_nir(instr->dest, 4);
+
+ io.set_gpr(dst.sel());
+
+ Interpolator ip = {true, 0, from_nir(instr->src[0], 0), from_nir(instr->src[0], 1)};
+
+
+ if (!load_interpolated(dst, io, ip, nir_dest_num_components(instr->dest),
+ nir_intrinsic_component(instr)))
+ return false;
+
+ if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
+
+ auto & color_input = static_cast<ShaderInputColor&> (io);
+ auto& bgio = m_shaderio.input(color_input.back_color_input_index());
+
+ GPRVector bgcol = get_temp_vec4();
+ bgio.set_gpr(bgcol.sel());
+ load_interpolated(bgcol, bgio, ip, nir_dest_num_components(instr->dest), 0);
+
+ load_front_face();
+
+ AluInstruction *ir = nullptr;
+ for (unsigned i = 0; i < 4 ; ++i) {
+ ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
+ emit_instruction(ir);
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ }
+
+
+ AluInstruction *ir = nullptr;
+ if (nir_intrinsic_component(instr) != 0) {
+ for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+ ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), dst[i + nir_intrinsic_component(instr)], {alu_write});
+ emit_instruction(ir);
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ }
+
+ return true;
+}
+
+bool FragmentShaderFromNir::load_barycentric_at_offset(nir_intrinsic_instr* instr)
+{
+ auto interpolator = m_interpolator[barycentric_ij_index(instr)];
+ PValue dummy(new GPRValue(interpolator.i->sel(), 0));
+
+ GPRVector help = get_temp_vec4();
+ GPRVector interp({interpolator.j, interpolator.i, dummy, dummy});
+
+ auto getgradh = new TexInstruction(TexInstruction::get_gradient_h, help, interp, 0, 0, PValue());
+ getgradh->set_dest_swizzle({0,1,7,7});
+ getgradh->set_flag(TexInstruction::x_unnormalized);
+ getgradh->set_flag(TexInstruction::y_unnormalized);
+ getgradh->set_flag(TexInstruction::z_unnormalized);
+ getgradh->set_flag(TexInstruction::w_unnormalized);
+ getgradh->set_flag(TexInstruction::grad_fine);
+ emit_instruction(getgradh);
+
+ auto getgradv = new TexInstruction(TexInstruction::get_gradient_v, help, interp, 0, 0, PValue());
+ getgradv->set_dest_swizzle({7,7,0,1});
+ getgradv->set_flag(TexInstruction::x_unnormalized);
+ getgradv->set_flag(TexInstruction::y_unnormalized);
+ getgradv->set_flag(TexInstruction::z_unnormalized);
+ getgradv->set_flag(TexInstruction::w_unnormalized);
+ getgradv->set_flag(TexInstruction::grad_fine);
+ emit_instruction(getgradv);
+
+ PValue ofs_x = from_nir(instr->src[0], 0);
+ PValue ofs_y = from_nir(instr->src[0], 1);
+ emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(0), ofs_x, interpolator.j, {alu_write}));
+ emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(1), ofs_x, interpolator.i, {alu_write, alu_last_instr}));
+ emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 0), help.reg_i(3), ofs_y, help.reg_i(1), {alu_write}));
+ emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 1), help.reg_i(2), ofs_y, help.reg_i(0), {alu_write, alu_last_instr}));
+
+ return true;
+}
+
+bool FragmentShaderFromNir::load_barycentric_at_sample(nir_intrinsic_instr* instr)
+{
+ GPRVector slope = get_temp_vec4();
+
+ auto fetch = new FetchInstruction(vc_fetch, no_index_offset, slope,
+ from_nir_with_fetch_constant(instr->src[0], 0),
+ 0, R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none);
+ fetch->set_flag(vtx_srf_mode);
+ emit_instruction(fetch);
+
+ GPRVector grad = get_temp_vec4();
+
+ auto interpolator = m_interpolator[barycentric_ij_index(instr)];
+ assert(interpolator.enabled);
+ PValue dummy(new GPRValue(interpolator.i->sel(), 0));
+
+ GPRVector src({interpolator.j, interpolator.i, dummy, dummy});
+
+ auto tex = new TexInstruction(TexInstruction::get_gradient_h, grad, src, 0, 0, PValue());
+ tex->set_flag(TexInstruction::grad_fine);
+ tex->set_flag(TexInstruction::x_unnormalized);
+ tex->set_flag(TexInstruction::y_unnormalized);
+ tex->set_flag(TexInstruction::z_unnormalized);
+ tex->set_flag(TexInstruction::w_unnormalized);
+ tex->set_dest_swizzle({0,1,7,7});
+ emit_instruction(tex);
+
+ tex = new TexInstruction(TexInstruction::get_gradient_v, grad, src, 0, 0, PValue());
+ tex->set_flag(TexInstruction::x_unnormalized);
+ tex->set_flag(TexInstruction::y_unnormalized);
+ tex->set_flag(TexInstruction::z_unnormalized);
+ tex->set_flag(TexInstruction::w_unnormalized);
+ tex->set_flag(TexInstruction::grad_fine);
+ tex->set_dest_swizzle({7,7,0,1});
+ emit_instruction(tex);
+
+ emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(0), slope.reg_i(2), interpolator.j}, {alu_write}));
+ emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(1), slope.reg_i(2), interpolator.i}, {alu_write, alu_last_instr}));
+
+ emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 0), {grad.reg_i(3), slope.reg_i(3), slope.reg_i(1)}, {alu_write}));
+ emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 1), {grad.reg_i(2), slope.reg_i(3), slope.reg_i(0)}, {alu_write, alu_last_instr}));
+
+ return true;
+}
+
+bool FragmentShaderFromNir::emit_load_input(nir_intrinsic_instr* instr)
+{
+ unsigned loc = nir_intrinsic_io_semantics(instr).location;
+ auto param = nir_src_as_const_value(instr->src[0]);
+ assert(param && "Indirect PS inputs not (yet) supported");
+
+ auto& io = m_shaderio.input(param->u32 + nir_intrinsic_base(instr), nir_intrinsic_component(instr));
+
+ assert(nir_intrinsic_io_semantics(instr).num_slots == 1);
+
+ unsigned num_components = nir_dest_num_components(instr->dest);
+
+ switch (loc) {
+ case VARYING_SLOT_POS:
+ for (unsigned i = 0; i < num_components; ++i) {
+ load_preloaded_value(instr->dest, i, m_frag_pos[i]);
+ }
+ return true;
+ case VARYING_SLOT_FACE:
+ return load_preloaded_value(instr->dest, 0, m_front_face_reg);
+ default:
+ ;
+ }
+
+ auto dst = nir_intrinsic_component(instr) ? get_temp_vec4() : vec_from_nir(instr->dest, 4);
+
+ AluInstruction *ir = nullptr;
+ for (unsigned i = 0; i < 4 ; ++i) {
+ ir = new AluInstruction(op1_interp_load_p0, dst[i],
+ PValue(new InlineConstValue(ALU_SRC_PARAM_BASE +
+ io.lds_pos(), i)),
+ EmitInstruction::write);
+ emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+
+ /* TODO: back color */
+ if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
+ Interpolator ip = {false, 0, NULL, NULL};
+
+ auto & color_input = static_cast<ShaderInputColor&> (io);
+ auto& bgio = m_shaderio.input(color_input.back_color_input_index());
+
+ GPRVector bgcol = get_temp_vec4();
+ bgio.set_gpr(bgcol.sel());
+ load_interpolated(bgcol, bgio, ip, num_components, 0);
+
+ load_front_face();
+
+ AluInstruction *ir = nullptr;
+ for (unsigned i = 0; i < 4 ; ++i) {
+ ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
+ emit_instruction(ir);
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ }
+
+ if (nir_intrinsic_component(instr) != 0) {
+ for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+ ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), dst[i + nir_intrinsic_component(instr)], {alu_write});
+ emit_instruction(ir);
+ }
+ if (ir)
+ ir->set_flag(alu_last_instr);
+ }
+
+
+ return true;
+}
+
+void FragmentShaderFromNir::load_front_face()
+{
+ assert(m_front_face_reg);
+ if (m_front_face_loaded)
+ return;
+
+ auto ir = new AluInstruction(op2_setge_dx10, m_front_face_reg, m_front_face_reg,
+ Value::zero, {alu_write, alu_last_instr});
+ m_front_face_loaded = true;
+ emit_instruction(ir);
+}
+
+bool FragmentShaderFromNir::emit_load_sample_pos(nir_intrinsic_instr* instr)
+{
+ GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
+ auto fetch = new FetchInstruction(vc_fetch,
+ no_index_offset,
+ fmt_32_32_32_32_float,
+ vtx_nf_scaled,
+ vtx_es_none,
+ m_sample_id_reg,
+ dest,
+ 0,
+ false,
+ 0xf,
+ R600_BUFFER_INFO_CONST_BUFFER,
+ 0,
+ bim_none,
+ false,
+ false,
+ 0,
+ 0,
+ 0,
+ PValue(),
+ {0,1,2,3});
+ fetch->set_flag(vtx_srf_mode);
+ emit_instruction(fetch);
+ return true;
+}
+
+bool FragmentShaderFromNir::load_interpolated(GPRVector &dest,
+ ShaderInput& io, const Interpolator &ip,
+ int num_components, int start_comp)
+{
+ // replace io with ShaderInputVarying
+ if (io.interpolate() > 0) {
+
+ sfn_log << SfnLog::io << "Using Interpolator (" << *ip.j << ", " << *ip.i << ")" << "\n";
+
+ if (num_components == 1) {
+ switch (start_comp) {
+ case 0: return load_interpolated_one_comp(dest, io, ip, op2_interp_x);
+ case 1: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
+ case 2: return load_interpolated_one_comp(dest, io, ip, op2_interp_z);
+ case 3: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_zw, 2, 3);
+ default:
+ assert(0);
+ }
+ }
+
+ if (num_components == 2) {
+ switch (start_comp) {
+ case 0: return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3);
+ case 2: return load_interpolated_two_comp(dest, io, ip, op2_interp_zw, 0xc);
+ case 1: return load_interpolated_one_comp(dest, io, ip, op2_interp_z) &&
+ load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
+ default:
+ assert(0);
+ }
+ }
+
+ if (num_components == 3 && start_comp == 0)
+ return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3) &&
+ load_interpolated_one_comp(dest, io, ip, op2_interp_z);
+
+ int full_write_mask = ((1 << num_components) - 1) << start_comp;
+
+ bool success = load_interpolated_two_comp(dest, io, ip, op2_interp_zw, full_write_mask & 0xc);
+ success &= load_interpolated_two_comp(dest, io, ip, op2_interp_xy, full_write_mask & 0x3);
+ return success;
+
+ } else {
+ AluInstruction *ir = nullptr;
+ for (unsigned i = 0; i < 4 ; ++i) {
+ ir = new AluInstruction(op1_interp_load_p0, dest[i],
+ PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
+ EmitInstruction::write);
+ emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+ }
+ return true;
+}
+
+bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector &dest,
+ ShaderInput& io, const Interpolator& ip, EAluOp op)
+{
+ for (unsigned i = 0; i < 2 ; ++i) {
+ int chan = i;
+ if (op == op2_interp_z)
+ chan += 2;
+
+
+ auto ir = new AluInstruction(op, dest[chan], i & 1 ? ip.j : ip.i,
+ PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
+ i == 0 ? EmitInstruction::write : EmitInstruction::last);
+ dest.pin_to_channel(chan);
+
+ ir->set_bank_swizzle(alu_vec_210);
+ emit_instruction(ir);
+ }
+ return true;
+}
+
+bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector &dest, ShaderInput& io,
+ const Interpolator& ip, EAluOp op, int writemask)
+{
+ AluInstruction *ir = nullptr;
+ assert(ip.j);
+ assert(ip.i);
+ for (unsigned i = 0; i < 4 ; ++i) {
+ ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
+ (writemask & (1 << i)) ? EmitInstruction::write : EmitInstruction::empty);
+ dest.pin_to_channel(i);
+ ir->set_bank_swizzle(alu_vec_210);
+ emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector &dest,
+ ShaderInput& io, const Interpolator& ip,
+ EAluOp op, UNUSED int start, int comp)
+{
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 4 ; ++i) {
+ ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i,
+ PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
+ i == comp ? EmitInstruction::write : EmitInstruction::empty);
+ ir->set_bank_swizzle(alu_vec_210);
+ dest.pin_to_channel(i);
+ emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+ return true;
+}
+
+
+bool FragmentShaderFromNir::emit_export_pixel(nir_intrinsic_instr* instr, int outputs)
+{
+ std::array<uint32_t,4> swizzle;
+ unsigned writemask = nir_intrinsic_write_mask(instr);
+ auto semantics = nir_intrinsic_io_semantics(instr);
+ unsigned driver_location = nir_intrinsic_base(instr);
+
+ switch (semantics.location) {
+ case FRAG_RESULT_DEPTH:
+ writemask = 1;
+ swizzle = {0,7,7,7};
+ break;
+ case FRAG_RESULT_STENCIL:
+ writemask = 2;
+ swizzle = {7,0,7,7};
+ break;
+ case FRAG_RESULT_SAMPLE_MASK:
+ writemask = 4;
+ swizzle = {7,7,0,7};
+ break;
+ default:
+ for (int i = 0; i < 4; ++i) {
+ swizzle[i] = (i < instr->num_components) ? i : 7;
+ }
+ }
+
+ auto value = vec_from_nir_with_fetch_constant(instr->src[0], writemask, swizzle);
+
+ set_output(driver_location, value.sel());
+
+ if (semantics.location == FRAG_RESULT_COLOR ||
+ (semantics.location >= FRAG_RESULT_DATA0 &&
+ semantics.location <= FRAG_RESULT_DATA7)) {
+ for (int k = 0 ; k < outputs; ++k) {
+
+ unsigned location = (m_dual_source_blend && (semantics.location == FRAG_RESULT_COLOR)
+ ? semantics.dual_source_blend_index : driver_location) + k - m_depth_exports;
+
+ sfn_log << SfnLog::io << "Pixel output at loc:" << location << "\n";
+
+ if (location >= m_max_color_exports) {
+ sfn_log << SfnLog::io << "Pixel output loc:" << location
+ << " dl:" << driver_location
+ << " skipped because we have only " << m_max_color_exports << " CBs\n";
+ continue;
+ }
+
+ m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel);
+
+ if (sh_info().ps_export_highest < location)
+ sh_info().ps_export_highest = location;
+
+ sh_info().nr_ps_color_exports++;
+
+ unsigned mask = (0xfu << (location * 4));
+ sh_info().ps_color_export_mask |= mask;
+
+ emit_export_instruction(m_last_pixel_export);
+ };
+ } else if (semantics.location == FRAG_RESULT_DEPTH ||
+ semantics.location == FRAG_RESULT_STENCIL ||
+ semantics.location == FRAG_RESULT_SAMPLE_MASK) {
+ m_depth_exports++;
+ emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel));
+ } else {
+ return false;
+ }
+ return true;
+}
+
+
+bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_intrinsic_instr* instr, int outputs)
+{
+ std::array<uint32_t,4> swizzle;
+ unsigned writemask = nir_intrinsic_write_mask(instr);
+ switch (out_var->data.location) {
+ case FRAG_RESULT_DEPTH:
+ writemask = 1;
+ swizzle = {0,7,7,7};
+ break;
+ case FRAG_RESULT_STENCIL:
+ writemask = 2;
+ swizzle = {7,0,7,7};
+ break;
+ case FRAG_RESULT_SAMPLE_MASK:
+ writemask = 4;
+ swizzle = {7,7,0,7};
+ break;
+ default:
+ for (int i = 0; i < 4; ++i) {
+ swizzle[i] = (i < instr->num_components) ? i : 7;
+ }
+ }
+
+ auto value = vec_from_nir_with_fetch_constant(instr->src[1], writemask, swizzle);
+
+ set_output(out_var->data.driver_location, value.sel());
+
+ if (out_var->data.location == FRAG_RESULT_COLOR ||
+ (out_var->data.location >= FRAG_RESULT_DATA0 &&
+ out_var->data.location <= FRAG_RESULT_DATA7)) {
+ for (int k = 0 ; k < outputs; ++k) {
+
+ unsigned location = (m_dual_source_blend && (out_var->data.location == FRAG_RESULT_COLOR)
+ ? out_var->data.index : out_var->data.driver_location) + k - m_depth_exports;
+
+ sfn_log << SfnLog::io << "Pixel output " << out_var->name << " at loc:" << location << "\n";
+
+ if (location >= m_max_color_exports) {
+ sfn_log << SfnLog::io << "Pixel output loc:" << location
+ << " dl:" << out_var->data.location
+ << " skipped because we have only " << m_max_color_exports << " CBs\n";
+ continue;
+ }
+
+ m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel);
+
+ if (sh_info().ps_export_highest < location)
+ sh_info().ps_export_highest = location;
+
+ sh_info().nr_ps_color_exports++;
+
+ unsigned mask = (0xfu << (location * 4));
+ sh_info().ps_color_export_mask |= mask;
+
+ emit_export_instruction(m_last_pixel_export);
+ };
+ } else if (out_var->data.location == FRAG_RESULT_DEPTH ||
+ out_var->data.location == FRAG_RESULT_STENCIL ||
+ out_var->data.location == FRAG_RESULT_SAMPLE_MASK) {
+ m_depth_exports++;
+ emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel));
+ } else {
+ return false;
+ }
+ return true;
+}
+
+void FragmentShaderFromNir::do_finalize()
+{
+ // update shader io info and set LDS etc.
+ sh_info().ninput = m_shaderio.inputs().size();
+
+ sfn_log << SfnLog::io << "Have " << sh_info().ninput << " inputs\n";
+ for (size_t i = 0; i < sh_info().ninput; ++i) {
+ ShaderInput& input = m_shaderio.input(i);
+ int ij_idx = (input.ij_index() < 6 &&
+ input.ij_index() >= 0) ? input.ij_index() : 0;
+ input.set_ioinfo(sh_info().input[i], m_interpolator[ij_idx].ij_index);
+ }
+
+ sh_info().two_side = m_shaderio.two_sided();
+ sh_info().nlds = m_shaderio.nlds();
+
+ sh_info().nr_ps_max_color_exports = m_max_counted_color_exports;
+
+ if (sh_info().fs_write_all) {
+ sh_info().nr_ps_max_color_exports = m_max_color_exports;
+ }
+
+ if (!m_last_pixel_export) {
+ GPRVector v(0, {7,7,7,7});
+ m_last_pixel_export = new ExportInstruction(0, v, ExportInstruction::et_pixel);
+ sh_info().nr_ps_color_exports++;
+ sh_info().ps_color_export_mask = 0xf;
+ emit_export_instruction(m_last_pixel_export);
+ }
+
+ m_last_pixel_export->set_last();
+
+ if (sh_info().fs_write_all)
+ sh_info().nr_ps_max_color_exports = 8;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h
new file mode 100644
index 000000000..4755afbfe
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h
@@ -0,0 +1,117 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef sfn_fragment_shader_from_nir_h
+#define sfn_fragment_shader_from_nir_h
+
+#include "sfn_shader_base.h"
+#include "sfn_shaderio.h"
+#include <bitset>
+
+namespace r600 {
+
+class FragmentShaderFromNir : public ShaderFromNirProcessor {
+public:
+ FragmentShaderFromNir(const nir_shader& nir, r600_shader& sh_info,
+ r600_pipe_shader_selector &sel, const r600_shader_key &key,
+ enum chip_class chip_class);
+ bool scan_sysvalue_access(nir_instr *instr) override;
+private:
+
+ struct Interpolator {
+ bool enabled;
+ unsigned ij_index;
+ PValue i;
+ PValue j;
+ };
+
+ void emit_shader_start() override;
+ bool do_allocate_reserved_registers() override;
+ bool process_store_output(nir_intrinsic_instr *instr);
+
+ bool emit_store_output(nir_intrinsic_instr* instr);
+
+ bool emit_export_pixel(const nir_variable *, nir_intrinsic_instr* instr, int outputs);
+ bool emit_export_pixel(nir_intrinsic_instr* instr, int outputs);
+ bool load_interpolated(GPRVector &dest, ShaderInput &io, const Interpolator& ip,
+ int num_components, int start_comp);
+ bool load_interpolated_one_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip, EAluOp op);
+ bool load_interpolated_two_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip,EAluOp op, int writemask);
+ bool load_interpolated_two_comp_for_one(GPRVector &dest,
+ ShaderInput& io, const Interpolator& ip, EAluOp op, int start, int comp);
+
+ bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
+ void do_finalize() override;
+
+ void load_front_face();
+
+ bool emit_load_input(nir_intrinsic_instr* instr);
+ bool emit_load_front_face(nir_intrinsic_instr* instr);
+ bool emit_load_sample_mask_in(nir_intrinsic_instr* instr);
+ bool emit_load_sample_pos(nir_intrinsic_instr* instr);
+ bool emit_load_sample_id(nir_intrinsic_instr* instr);
+
+ bool process_load_input(nir_intrinsic_instr *instr, bool interpolated);
+ bool emit_load_interpolated_input(nir_intrinsic_instr* instr);
+ bool load_barycentric_at_offset(nir_intrinsic_instr* instr);
+ bool load_barycentric_at_sample(nir_intrinsic_instr* instr);
+
+
+ unsigned m_max_color_exports;
+ unsigned m_max_counted_color_exports;
+ bool m_two_sided_color;
+ ExportInstruction *m_last_pixel_export;
+ const nir_shader& m_nir;
+
+
+ std::array<Interpolator, 6> m_interpolator;
+ unsigned m_reserved_registers;
+ unsigned m_frag_pos_index;
+ PGPRValue m_front_face_reg;
+ PGPRValue m_sample_mask_reg;
+ PGPRValue m_sample_id_reg;
+ PGPRValue m_helper_invocation;
+ GPRVector m_frag_pos;
+ bool m_need_back_color;
+ bool m_front_face_loaded;
+ ShaderIO m_shaderio;
+ unsigned m_depth_exports;
+
+ std::map<unsigned, PValue> m_input_cache;
+
+ static const int s_max_interpolators = 6;
+
+ std::bitset<s_max_interpolators> m_interpolators_used;
+
+ unsigned m_apply_sample_mask;
+ bool m_dual_source_blend;
+ ShaderInput *m_pos_input;
+
+};
+
+}
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.cpp
new file mode 100644
index 000000000..0541e0ad0
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.cpp
@@ -0,0 +1,343 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_shader_geometry.h"
+#include "sfn_instruction_misc.h"
+#include "sfn_instruction_fetch.h"
+#include "sfn_shaderio.h"
+
+namespace r600 {
+
+GeometryShaderFromNir::GeometryShaderFromNir(r600_pipe_shader *sh,
+ r600_pipe_shader_selector &sel,
+ const r600_shader_key &key,
+ enum chip_class chip_class):
+ VertexStage(PIPE_SHADER_GEOMETRY, sel, sh->shader,
+ sh->scratch_space_needed, chip_class, key.gs.first_atomic_counter),
+ m_pipe_shader(sh),
+ m_so_info(&sel.so),
+ m_first_vertex_emitted(false),
+ m_offset(0),
+ m_next_input_ring_offset(0),
+ m_key(key),
+ m_clip_dist_mask(0),
+ m_cur_ring_output(0),
+ m_gs_tri_strip_adj_fix(false),
+ m_input_mask(0)
+{
+ sh_info().atomic_base = key.gs.first_atomic_counter;
+}
+
+bool GeometryShaderFromNir::emit_store(nir_intrinsic_instr* instr)
+{
+ auto location = nir_intrinsic_io_semantics(instr).location;
+ auto index = nir_src_as_const_value(instr->src[1]);
+ assert(index);
+ auto driver_location = nir_intrinsic_base(instr) + index->u32;
+
+ uint32_t write_mask = nir_intrinsic_write_mask(instr);
+ GPRVector::Swizzle swz = swizzle_from_mask(write_mask);
+
+ auto out_value = vec_from_nir_with_fetch_constant(instr->src[0], write_mask, swz, true);
+
+ sh_info().output[driver_location].write_mask = write_mask;
+
+ auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write_ind, out_value,
+ 4 * driver_location,
+ instr->num_components, m_export_base[0]);
+ streamout_data[location] = ir;
+
+ return true;
+}
+
+bool GeometryShaderFromNir::scan_sysvalue_access(UNUSED nir_instr *instr)
+{
+ if (instr->type != nir_instr_type_intrinsic)
+ return true;
+
+ nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr);
+
+ switch (ii->intrinsic) {
+ case nir_intrinsic_store_output:
+ return process_store_output(ii);
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_per_vertex_input:
+ return process_load_input(ii);
+ default:
+ return true;
+ }
+}
+
+bool GeometryShaderFromNir::process_store_output(nir_intrinsic_instr* instr)
+{
+ auto location = nir_intrinsic_io_semantics(instr).location;
+ auto index = nir_src_as_const_value(instr->src[1]);
+ assert(index);
+
+ auto driver_location = nir_intrinsic_base(instr) + index->u32;
+
+ if (location == VARYING_SLOT_COL0 ||
+ location == VARYING_SLOT_COL1 ||
+ (location >= VARYING_SLOT_VAR0 &&
+ location <= VARYING_SLOT_VAR31) ||
+ (location >= VARYING_SLOT_TEX0 &&
+ location <= VARYING_SLOT_TEX7) ||
+ location == VARYING_SLOT_BFC0 ||
+ location == VARYING_SLOT_BFC1 ||
+ location == VARYING_SLOT_PNTC ||
+ location == VARYING_SLOT_CLIP_VERTEX ||
+ location == VARYING_SLOT_CLIP_DIST0 ||
+ location == VARYING_SLOT_CLIP_DIST1 ||
+ location == VARYING_SLOT_PRIMITIVE_ID ||
+ location == VARYING_SLOT_POS ||
+ location == VARYING_SLOT_PSIZ ||
+ location == VARYING_SLOT_LAYER ||
+ location == VARYING_SLOT_VIEWPORT ||
+ location == VARYING_SLOT_FOGC) {
+ r600_shader_io& io = sh_info().output[driver_location];
+
+ auto semantic = r600_get_varying_semantic(location);
+ io.name = semantic.first;
+ io.sid = semantic.second;
+
+ evaluate_spi_sid(io);
+
+ if (sh_info().noutput <= driver_location)
+ sh_info().noutput = driver_location + 1;
+
+ if (location == VARYING_SLOT_CLIP_DIST0 ||
+ location == VARYING_SLOT_CLIP_DIST1) {
+ m_clip_dist_mask |= 1 << (location - VARYING_SLOT_CLIP_DIST0);
+ }
+
+ if (location == VARYING_SLOT_VIEWPORT) {
+ sh_info().vs_out_viewport = 1;
+ sh_info().vs_out_misc_write = 1;
+ }
+ return true;
+ }
+ return false;
+}
+
+bool GeometryShaderFromNir::process_load_input(nir_intrinsic_instr* instr)
+{
+ auto location = nir_intrinsic_io_semantics(instr).location;
+ auto index = nir_src_as_const_value(instr->src[1]);
+ assert(index);
+
+ auto driver_location = nir_intrinsic_base(instr) + index->u32;
+
+ if (location == VARYING_SLOT_POS ||
+ location == VARYING_SLOT_PSIZ ||
+ location == VARYING_SLOT_FOGC ||
+ location == VARYING_SLOT_CLIP_VERTEX ||
+ location == VARYING_SLOT_CLIP_DIST0 ||
+ location == VARYING_SLOT_CLIP_DIST1 ||
+ location == VARYING_SLOT_COL0 ||
+ location == VARYING_SLOT_COL1 ||
+ location == VARYING_SLOT_BFC0 ||
+ location == VARYING_SLOT_BFC1 ||
+ location == VARYING_SLOT_PNTC ||
+ (location >= VARYING_SLOT_VAR0 &&
+ location <= VARYING_SLOT_VAR31) ||
+ (location >= VARYING_SLOT_TEX0 &&
+ location <= VARYING_SLOT_TEX7)) {
+
+ uint64_t bit = 1ull << location;
+ if (!(bit & m_input_mask)) {
+ r600_shader_io& io = sh_info().input[driver_location];
+ auto semantic = r600_get_varying_semantic(location);
+ io.name = semantic.first;
+ io.sid = semantic.second;
+
+ io.ring_offset = 16 * driver_location;
+ ++sh_info().ninput;
+ m_next_input_ring_offset += 16;
+ m_input_mask |= bit;
+ }
+ return true;
+ }
+ return false;
+}
+
+bool GeometryShaderFromNir::do_allocate_reserved_registers()
+{
+ const int sel[6] = {0, 0 ,0, 1, 1, 1};
+ const int chan[6] = {0, 1 ,3, 0, 1, 2};
+
+ increment_reserved_registers();
+ increment_reserved_registers();
+
+ /* Reserve registers used by the shaders (should check how many
+ * components are actually used */
+ for (int i = 0; i < 6; ++i) {
+ auto reg = new GPRValue(sel[i], chan[i]);
+ reg->set_as_input();
+ m_per_vertex_offsets[i].reset(reg);
+ inject_register(sel[i], chan[i], m_per_vertex_offsets[i], false);
+ }
+ auto reg = new GPRValue(0, 2);
+ reg->set_as_input();
+ m_primitive_id.reset(reg);
+ inject_register(0, 2, m_primitive_id, false);
+
+ reg = new GPRValue(1, 3);
+ reg->set_as_input();
+ m_invocation_id.reset(reg);
+ inject_register(1, 3, m_invocation_id, false);
+
+ m_export_base[0] = get_temp_register(0);
+ m_export_base[1] = get_temp_register(0);
+ m_export_base[2] = get_temp_register(0);
+ m_export_base[3] = get_temp_register(0);
+ emit_instruction(new AluInstruction(op1_mov, m_export_base[0], Value::zero, {alu_write, alu_last_instr}));
+ emit_instruction(new AluInstruction(op1_mov, m_export_base[1], Value::zero, {alu_write, alu_last_instr}));
+ emit_instruction(new AluInstruction(op1_mov, m_export_base[2], Value::zero, {alu_write, alu_last_instr}));
+ emit_instruction(new AluInstruction(op1_mov, m_export_base[3], Value::zero, {alu_write, alu_last_instr}));
+
+ sh_info().ring_item_sizes[0] = m_next_input_ring_offset;
+
+ if (m_key.gs.tri_strip_adj_fix)
+ emit_adj_fix();
+
+ return true;
+}
+
+void GeometryShaderFromNir::emit_adj_fix()
+{
+ PValue adjhelp0(new GPRValue(m_export_base[0]->sel(), 1));
+ emit_instruction(op2_and_int, adjhelp0, {m_primitive_id, Value::one_i}, {alu_write, alu_last_instr});
+
+ int reg_indices[6];
+ int reg_chanels[6] = {1, 2, 3, 1, 2, 3};
+
+ int rotate_indices[6] = {4, 5, 0, 1, 2, 3};
+
+ reg_indices[0] = reg_indices[1] = reg_indices[2] = m_export_base[1]->sel();
+ reg_indices[3] = reg_indices[4] = reg_indices[5] = m_export_base[2]->sel();
+
+ std::array<PValue, 6> adjhelp;
+
+ AluInstruction *ir = nullptr;
+ for (int i = 0; i < 6; i++) {
+ adjhelp[i].reset(new GPRValue(reg_indices[i], reg_chanels[i]));
+ ir = new AluInstruction(op3_cnde_int, adjhelp[i],
+ {adjhelp0, m_per_vertex_offsets[i],
+ m_per_vertex_offsets[rotate_indices[i]]},
+ {alu_write});
+ if (i == 3)
+ ir->set_flag(alu_last_instr);
+ emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+
+ for (int i = 0; i < 6; i++)
+ m_per_vertex_offsets[i] = adjhelp[i];
+}
+
+
+bool GeometryShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
+{
+ switch (instr->intrinsic) {
+ case nir_intrinsic_emit_vertex:
+ return emit_vertex(instr, false);
+ case nir_intrinsic_end_primitive:
+ return emit_vertex(instr, true);
+ case nir_intrinsic_load_primitive_id:
+ return load_preloaded_value(instr->dest, 0, m_primitive_id);
+ case nir_intrinsic_load_invocation_id:
+ return load_preloaded_value(instr->dest, 0, m_invocation_id);
+ case nir_intrinsic_store_output:
+ return emit_store(instr);
+ case nir_intrinsic_load_per_vertex_input:
+ return emit_load_per_vertex_input(instr);
+ default:
+ ;
+ }
+ return false;
+}
+
+bool GeometryShaderFromNir::emit_vertex(nir_intrinsic_instr* instr, bool cut)
+{
+ int stream = nir_intrinsic_stream_id(instr);
+ assert(stream < 4);
+
+ for(auto v: streamout_data) {
+ if (stream == 0 || v.first != VARYING_SLOT_POS) {
+ v.second->patch_ring(stream, m_export_base[stream]);
+ emit_instruction(v.second);
+ } else
+ delete v.second;
+ }
+ streamout_data.clear();
+ emit_instruction(new EmitVertex(stream, cut));
+
+ if (!cut)
+ emit_instruction(new AluInstruction(op2_add_int, m_export_base[stream], m_export_base[stream],
+ PValue(new LiteralValue(sh_info().noutput)),
+ {alu_write, alu_last_instr}));
+
+ return true;
+}
+
+bool GeometryShaderFromNir::emit_load_per_vertex_input(nir_intrinsic_instr* instr)
+{
+ auto dest = vec_from_nir(instr->dest, 4);
+
+ std::array<int, 4> swz = {7,7,7,7};
+ for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+ swz[i] = i + nir_intrinsic_component(instr);
+ }
+
+ auto literal_index = nir_src_as_const_value(instr->src[0]);
+
+ if (!literal_index) {
+ sfn_log << SfnLog::err << "GS: Indirect input addressing not (yet) supported\n";
+ return false;
+ }
+ assert(literal_index->u32 < 6);
+ assert(nir_intrinsic_io_semantics(instr).num_slots == 1);
+
+ PValue addr = m_per_vertex_offsets[literal_index->u32];
+ auto fetch = new FetchInstruction(vc_fetch, no_index_offset, dest, addr,
+ 16 * nir_intrinsic_base(instr),
+ R600_GS_RING_CONST_BUFFER, PValue(), bim_none, true);
+ fetch->set_dest_swizzle(swz);
+
+ emit_instruction(fetch);
+ return true;
+}
+
+void GeometryShaderFromNir::do_finalize()
+{
+ if (m_clip_dist_mask) {
+ int num_clip_dist = 4 * util_bitcount(m_clip_dist_mask);
+ sh_info().cc_dist_mask = (1 << num_clip_dist) - 1;
+ sh_info().clip_dist_write = (1 << num_clip_dist) - 1;
+ }
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.h
new file mode 100644
index 000000000..b557b8f58
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.h
@@ -0,0 +1,81 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef SFN_GEOMETRYSHADERFROMNIR_H
+#define SFN_GEOMETRYSHADERFROMNIR_H
+
+#include "sfn_vertexstageexport.h"
+
+namespace r600 {
+
+class GeometryShaderFromNir : public VertexStage
+{
+public:
+ GeometryShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel, const r600_shader_key& key, enum chip_class chip_class);
+
+ bool scan_sysvalue_access(nir_instr *instr) override;
+ PValue primitive_id() override {return m_primitive_id;}
+
+private:
+
+ bool do_allocate_reserved_registers() override;
+ bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
+
+ bool emit_vertex(nir_intrinsic_instr* instr, bool cut);
+ void emit_adj_fix();
+
+ bool process_store_output(nir_intrinsic_instr* instr);
+ bool process_load_input(nir_intrinsic_instr* instr);
+
+ bool emit_store(nir_intrinsic_instr* instr);
+ bool emit_load_per_vertex_input(nir_intrinsic_instr* instr);
+
+ void do_finalize() override;
+
+ r600_pipe_shader *m_pipe_shader;
+ const pipe_stream_output_info *m_so_info;
+
+ std::array<PValue, 6> m_per_vertex_offsets;
+ PValue m_primitive_id;
+ PValue m_invocation_id;
+ PValue m_export_base[4];
+ bool m_first_vertex_emitted;
+
+ int m_offset;
+ int m_next_input_ring_offset;
+ r600_shader_key m_key;
+ int m_clip_dist_mask;
+ unsigned m_cur_ring_output;
+ bool m_gs_tri_strip_adj_fix;
+ uint64_t m_input_mask;
+
+ std::map<int, MemRingOutIntruction *> streamout_data;
+};
+
+}
+
+#endif // SFN_GEOMETRYSHADERFROMNIR_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp
new file mode 100644
index 000000000..fb76695c6
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp
@@ -0,0 +1,108 @@
+#include "sfn_shader_tcs.h"
+#include "sfn_instruction_gds.h"
+#include "tgsi/tgsi_from_mesa.h"
+
+namespace r600 {
+
+TcsShaderFromNir::TcsShaderFromNir(r600_pipe_shader *sh,
+ r600_pipe_shader_selector& sel,
+ const r600_shader_key& key,
+ enum chip_class chip_class):
+ ShaderFromNirProcessor (PIPE_SHADER_TESS_CTRL, sel, sh->shader,
+ sh->scratch_space_needed, chip_class, key.tcs.first_atomic_counter),
+ m_reserved_registers(0)
+{
+ sh_info().tcs_prim_mode = key.tcs.prim_mode;
+}
+
+bool TcsShaderFromNir::scan_sysvalue_access(nir_instr *instr)
+{
+ if (instr->type != nir_instr_type_intrinsic)
+ return true;
+
+ auto intr = nir_instr_as_intrinsic(instr);
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_primitive_id:
+ m_sv_values.set(es_primitive_id);
+ break;
+ case nir_intrinsic_load_invocation_id:
+ m_sv_values.set(es_invocation_id);
+ break;
+ case nir_intrinsic_load_tcs_rel_patch_id_r600:
+ m_sv_values.set(es_rel_patch_id);
+ break;
+ case nir_intrinsic_load_tcs_tess_factor_base_r600:
+ m_sv_values.set(es_tess_factor_base);
+ break;
+ default:
+
+ ;
+ }
+ return true;
+}
+
+bool TcsShaderFromNir::do_allocate_reserved_registers()
+{
+ if (m_sv_values.test(es_primitive_id)) {
+ m_reserved_registers = 1;
+ auto gpr = new GPRValue(0,0);
+ gpr->set_as_input();
+ m_primitive_id.reset(gpr);
+ }
+
+ if (m_sv_values.test(es_invocation_id)) {
+ m_reserved_registers = 1;
+ auto gpr = new GPRValue(0,2);
+ gpr->set_as_input();
+ m_invocation_id.reset(gpr);
+ }
+
+ if (m_sv_values.test(es_rel_patch_id)) {
+ m_reserved_registers = 1;
+ auto gpr = new GPRValue(0,1);
+ gpr->set_as_input();
+ m_rel_patch_id.reset(gpr);
+ }
+
+ if (m_sv_values.test(es_tess_factor_base)) {
+ m_reserved_registers = 1;
+ auto gpr = new GPRValue(0,3);
+ gpr->set_as_input();
+ m_tess_factor_base.reset(gpr);
+ }
+
+ set_reserved_registers(m_reserved_registers);
+
+ return true;
+}
+
+bool TcsShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
+{
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_tcs_rel_patch_id_r600:
+ return load_preloaded_value(instr->dest, 0, m_rel_patch_id);
+ case nir_intrinsic_load_invocation_id:
+ return load_preloaded_value(instr->dest, 0, m_invocation_id);
+ case nir_intrinsic_load_primitive_id:
+ return load_preloaded_value(instr->dest, 0, m_primitive_id);
+ case nir_intrinsic_load_tcs_tess_factor_base_r600:
+ return load_preloaded_value(instr->dest, 0, m_tess_factor_base);
+ case nir_intrinsic_store_tf_r600:
+ return store_tess_factor(instr);
+ default:
+ return false;
+ }
+}
+
+bool TcsShaderFromNir::store_tess_factor(nir_intrinsic_instr* instr)
+{
+ const GPRVector::Swizzle& swizzle = (instr->src[0].ssa->num_components == 4) ?
+ GPRVector::Swizzle({0, 1, 2, 3}) : GPRVector::Swizzle({0, 1, 7, 7});
+ auto val = vec_from_nir_with_fetch_constant(instr->src[0],
+ (1 << instr->src[0].ssa->num_components) - 1, swizzle);
+ emit_instruction(new GDSStoreTessFactor(val));
+ return true;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.h
new file mode 100644
index 000000000..051078104
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.h
@@ -0,0 +1,33 @@
+#ifndef TCSSHADERFROMNIR_H
+#define TCSSHADERFROMNIR_H
+
+#include "sfn_shader_base.h"
+
+namespace r600 {
+
+class TcsShaderFromNir : public ShaderFromNirProcessor
+{
+public:
+ TcsShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel, const r600_shader_key& key, enum chip_class chip_class);
+ bool scan_sysvalue_access(nir_instr *instr) override;
+
+private:
+ bool do_allocate_reserved_registers() override;
+ bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
+ bool store_tess_factor(nir_intrinsic_instr* instr);
+
+ void do_finalize() override {}
+
+ int m_reserved_registers;
+ PValue m_patch_id;
+ PValue m_rel_patch_id;
+ PValue m_invocation_id;
+ PValue m_primitive_id;
+ PValue m_tess_factor_base;
+
+
+};
+
+}
+
+#endif // TCSSHADERFROMNIR_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.cpp
new file mode 100644
index 000000000..d1c75515a
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.cpp
@@ -0,0 +1,123 @@
+#include "sfn_shader_tess_eval.h"
+#include "tgsi/tgsi_from_mesa.h"
+
+namespace r600 {
+
+TEvalShaderFromNir::TEvalShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel,
+ const r600_shader_key& key, r600_shader *gs_shader,
+ enum chip_class chip_class):
+ VertexStage(PIPE_SHADER_TESS_EVAL, sel, sh->shader,
+ sh->scratch_space_needed, chip_class, key.tes.first_atomic_counter),
+ m_reserved_registers(0),
+ m_key(key)
+
+{
+ sh->shader.tes_as_es = key.tes.as_es;
+ if (key.tes.as_es)
+ m_export_processor.reset(new VertexStageExportForGS(*this, gs_shader));
+ else
+ m_export_processor.reset(new VertexStageExportForFS(*this, &sel.so, sh, key));
+}
+
+bool TEvalShaderFromNir::scan_sysvalue_access(nir_instr *instr)
+{
+ if (instr->type != nir_instr_type_intrinsic)
+ return true;
+
+ auto ir = nir_instr_as_intrinsic(instr);
+
+ switch (ir->intrinsic) {
+ case nir_intrinsic_load_tess_coord_r600:
+ m_sv_values.set(es_tess_coord);
+ break;
+ case nir_intrinsic_load_primitive_id:
+ m_sv_values.set(es_primitive_id);
+ break;
+ case nir_intrinsic_load_tcs_rel_patch_id_r600:
+ m_sv_values.set(es_rel_patch_id);
+ break;
+ case nir_intrinsic_store_output:
+ m_export_processor->scan_store_output(ir);
+ break;
+ default:
+ ;
+ }
+ return true;
+}
+
+void TEvalShaderFromNir::emit_shader_start()
+{
+ m_export_processor->emit_shader_start();
+}
+
+bool TEvalShaderFromNir::do_allocate_reserved_registers()
+{
+ if (m_sv_values.test(es_tess_coord)) {
+ m_reserved_registers = 1;
+ auto gpr = new GPRValue(0,0);
+ gpr->set_as_input();
+ m_tess_coord[0].reset(gpr);
+ gpr = new GPRValue(0,1);
+ gpr->set_as_input();
+ m_tess_coord[1].reset(gpr);
+ }
+
+ if (m_sv_values.test(es_rel_patch_id)) {
+ m_reserved_registers = 1;
+ auto gpr = new GPRValue(0,2);
+ gpr->set_as_input();
+ m_rel_patch_id.reset(gpr);
+ }
+
+ if (m_sv_values.test(es_primitive_id) ||
+ m_key.vs.as_gs_a) {
+ m_reserved_registers = 1;
+ auto gpr = new GPRValue(0,3);
+ gpr->set_as_input();
+ m_primitive_id.reset(gpr);
+ if (m_key.vs.as_gs_a)
+ inject_register(0, 3, m_primitive_id, false);
+ }
+ set_reserved_registers(m_reserved_registers);
+ return true;
+}
+
+bool TEvalShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
+{
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_tess_coord_r600:
+ return load_preloaded_value(instr->dest, 0, m_tess_coord[0]) &&
+ load_preloaded_value(instr->dest, 1, m_tess_coord[1]);
+ case nir_intrinsic_load_primitive_id:
+ return load_preloaded_value(instr->dest, 0, m_primitive_id);
+ case nir_intrinsic_load_tcs_rel_patch_id_r600:
+ return load_preloaded_value(instr->dest, 0, m_rel_patch_id);
+ case nir_intrinsic_store_output:
+ return m_export_processor->store_output(instr);
+ default:
+ return false;
+ }
+}
+
+void TEvalShaderFromNir::do_finalize()
+{
+ m_export_processor->finalize_exports();
+}
+
+
+bool TEvalShaderFromNir::emit_load_tess_coord(nir_intrinsic_instr* instr)
+{
+ bool result = load_preloaded_value(instr->dest, 0, m_tess_coord[0]) &&
+ load_preloaded_value(instr->dest, 1, m_tess_coord[1]);
+
+ m_tess_coord[2] = from_nir(instr->dest, 2);
+
+
+ emit_instruction(new AluInstruction(op2_add, m_tess_coord[2], m_tess_coord[2],
+ m_tess_coord[0], {alu_last_instr, alu_write, alu_src0_neg}));
+ emit_instruction(new AluInstruction(op2_add, m_tess_coord[2], m_tess_coord[2],
+ m_tess_coord[1], {alu_last_instr, alu_write, alu_src0_neg}));
+ return result;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.h
new file mode 100644
index 000000000..a1b7d3a9c
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.h
@@ -0,0 +1,39 @@
+#ifndef TEVALSHADERFROMNIR_H
+#define TEVALSHADERFROMNIR_H
+
+#include "sfn_shader_base.h"
+#include "sfn_vertexstageexport.h"
+
+namespace r600 {
+
+class TEvalShaderFromNir : public VertexStage
+{
+public:
+ TEvalShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel,
+ const r600_shader_key& key, r600_shader *gs_shader,
+ enum chip_class chip_class);
+ bool scan_sysvalue_access(nir_instr *instr) override;
+ PValue primitive_id() override {return m_primitive_id;}
+ private:
+ void emit_shader_start() override;
+ bool do_allocate_reserved_registers() override;
+ bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
+ bool emit_load_tess_coord(nir_intrinsic_instr* instr);
+ bool load_tess_z_coord(nir_intrinsic_instr* instr);
+
+ void do_finalize() override;
+
+
+ unsigned m_reserved_registers;
+ PValue m_tess_coord[3];
+ PValue m_rel_patch_id;
+ PValue m_primitive_id;
+
+ std::unique_ptr<VertexStageExportBase> m_export_processor;
+ const r600_shader_key& m_key;
+};
+
+
+}
+
+#endif // TEVALSHADERFROMNIR_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp
new file mode 100644
index 000000000..f2c4de3fa
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp
@@ -0,0 +1,230 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "pipe/p_defines.h"
+#include "tgsi/tgsi_from_mesa.h"
+#include "sfn_shader_vertex.h"
+#include "sfn_instruction_lds.h"
+
+#include <queue>
+
+
+namespace r600 {
+
+using std::priority_queue;
+
+VertexShaderFromNir::VertexShaderFromNir(r600_pipe_shader *sh,
+ r600_pipe_shader_selector& sel,
+ const r600_shader_key& key,
+ struct r600_shader* gs_shader,
+ enum chip_class chip_class):
+ VertexStage(PIPE_SHADER_VERTEX, sel, sh->shader,
+ sh->scratch_space_needed, chip_class, key.vs.first_atomic_counter),
+ m_num_clip_dist(0),
+ m_last_param_export(nullptr),
+ m_last_pos_export(nullptr),
+ m_pipe_shader(sh),
+ m_enabled_stream_buffers_mask(0),
+ m_so_info(&sel.so),
+ m_vertex_id(),
+ m_key(key),
+ m_max_attrib(0)
+{
+ // reg 0 is used in the fetch shader
+ increment_reserved_registers();
+
+ sh_info().atomic_base = key.vs.first_atomic_counter;
+ sh_info().vs_as_gs_a = m_key.vs.as_gs_a;
+
+ if (key.vs.as_es) {
+ sh->shader.vs_as_es = true;
+ m_export_processor.reset(new VertexStageExportForGS(*this, gs_shader));
+ } else if (key.vs.as_ls) {
+ sh->shader.vs_as_ls = true;
+ sfn_log << SfnLog::trans << "Start VS for GS\n";
+ m_export_processor.reset(new VertexStageExportForES(*this));
+ } else {
+ m_export_processor.reset(new VertexStageExportForFS(*this, &sel.so, sh, key));
+ }
+}
+
+bool VertexShaderFromNir::scan_inputs_read(const nir_shader *sh)
+{
+ uint64_t inputs = sh->info.inputs_read;
+
+ while (inputs) {
+ unsigned i = u_bit_scan64(&inputs);
+ if (i < VERT_ATTRIB_MAX) {
+ ++sh_info().ninput;
+ }
+ }
+ m_max_attrib = sh_info().ninput;
+ return true;
+}
+
+bool VertexShaderFromNir::do_allocate_reserved_registers()
+{
+ /* Since the vertex ID is nearly always used, we add it here as an input so
+ * that the registers used for vertex attributes don't get clobbered by the
+ * register merge step */
+ auto R0x = new GPRValue(0,0);
+ R0x->set_as_input();
+ m_vertex_id.reset(R0x);
+ inject_register(0, 0, m_vertex_id, false);
+
+ if (m_key.vs.as_gs_a || m_sv_values.test(es_primitive_id)) {
+ auto R0z = new GPRValue(0,2);
+ R0x->set_as_input();
+ m_primitive_id.reset(R0z);
+ inject_register(0, 2, m_primitive_id, false);
+ }
+
+ if (m_sv_values.test(es_instanceid)) {
+ auto R0w = new GPRValue(0,3);
+ R0w->set_as_input();
+ m_instance_id.reset(R0w);
+ inject_register(0, 3, m_instance_id, false);
+ }
+
+
+ if (m_sv_values.test(es_rel_patch_id)) {
+ auto R0y = new GPRValue(0,1);
+ R0y->set_as_input();
+ m_rel_vertex_id.reset(R0y);
+ inject_register(0, 1, m_rel_vertex_id, false);
+ }
+
+ m_attribs.resize(4 * m_max_attrib + 4);
+ for (unsigned i = 0; i < m_max_attrib + 1; ++i) {
+ for (unsigned k = 0; k < 4; ++k) {
+ auto gpr = std::make_shared<GPRValue>(i + 1, k);
+ gpr->set_as_input();
+ m_attribs[4 * i + k] = gpr;
+ inject_register(i + 1, k, gpr, false);
+ }
+ }
+
+ return true;
+}
+
+void VertexShaderFromNir::emit_shader_start()
+{
+ m_export_processor->emit_shader_start();
+}
+
+bool VertexShaderFromNir::scan_sysvalue_access(nir_instr *instr)
+{
+ switch (instr->type) {
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr);
+ switch (ii->intrinsic) {
+ case nir_intrinsic_load_vertex_id:
+ m_sv_values.set(es_vertexid);
+ break;
+ case nir_intrinsic_load_instance_id:
+ m_sv_values.set(es_instanceid);
+ break;
+ case nir_intrinsic_load_tcs_rel_patch_id_r600:
+ m_sv_values.set(es_rel_patch_id);
+ break;
+ case nir_intrinsic_store_output:
+ m_export_processor->scan_store_output(ii);
+ default:
+ ;
+ }
+ }
+ default:
+ ;
+ }
+ return true;
+}
+
+bool VertexShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
+{
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_vertex_id:
+ return load_preloaded_value(instr->dest, 0, m_vertex_id);
+ case nir_intrinsic_load_tcs_rel_patch_id_r600:
+ return load_preloaded_value(instr->dest, 0, m_rel_vertex_id);
+ case nir_intrinsic_load_instance_id:
+ return load_preloaded_value(instr->dest, 0, m_instance_id);
+ case nir_intrinsic_store_local_shared_r600:
+ return emit_store_local_shared(instr);
+ case nir_intrinsic_store_output:
+ return m_export_processor->store_output(instr);
+ case nir_intrinsic_load_input:
+ return load_input(instr);
+
+ default:
+ return false;
+ }
+}
+
+bool VertexShaderFromNir::load_input(nir_intrinsic_instr* instr)
+{
+ unsigned location = nir_intrinsic_base(instr);
+
+ if (location < VERT_ATTRIB_MAX) {
+ for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+ auto src = m_attribs[4 * location + i];
+
+ if (i == 0)
+ set_input(location, src);
+
+ load_preloaded_value(instr->dest, i, src, i == (unsigned)(instr->num_components - 1));
+ }
+ return true;
+ }
+ fprintf(stderr, "r600-NIR: Unimplemented load_deref for %d\n", location);
+ return false;
+}
+
+bool VertexShaderFromNir::emit_store_local_shared(nir_intrinsic_instr* instr)
+{
+ unsigned write_mask = nir_intrinsic_write_mask(instr);
+
+ auto address = from_nir(instr->src[1], 0);
+ int swizzle_base = (write_mask & 0x3) ? 0 : 2;
+ write_mask |= write_mask >> 2;
+
+ auto value = from_nir(instr->src[0], swizzle_base);
+ if (!(write_mask & 2)) {
+ emit_instruction(new LDSWriteInstruction(address, 1, value));
+ } else {
+ auto value1 = from_nir(instr->src[0], swizzle_base + 1);
+ emit_instruction(new LDSWriteInstruction(address, 1, value, value1));
+ }
+
+ return true;
+}
+
+void VertexShaderFromNir::do_finalize()
+{
+ m_export_processor->finalize_exports();
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h
new file mode 100644
index 000000000..c1ba251de
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h
@@ -0,0 +1,83 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef sfn_vertex_shader_from_nir_h
+#define sfn_vertex_shader_from_nir_h
+
+#include "sfn_shader_base.h"
+#include "sfn_vertexstageexport.h"
+
+namespace r600 {
+
+class VertexShaderFromNir : public VertexStage {
+public:
+ VertexShaderFromNir(r600_pipe_shader *sh,
+ r600_pipe_shader_selector &sel,
+ const r600_shader_key &key, r600_shader *gs_shader,
+ enum chip_class chip_class);
+
+ bool scan_sysvalue_access(nir_instr *instr) override;
+
+ PValue primitive_id() override {return m_primitive_id;}
+protected:
+
+ // todo: encapsulate
+ unsigned m_num_clip_dist;
+ ExportInstruction *m_last_param_export;
+ ExportInstruction *m_last_pos_export;
+ r600_pipe_shader *m_pipe_shader;
+ unsigned m_enabled_stream_buffers_mask;
+ const pipe_stream_output_info *m_so_info;
+ void do_finalize() override;
+
+ std::map<unsigned, unsigned> m_param_map;
+
+ bool scan_inputs_read(const nir_shader *sh) override;
+
+private:
+ bool load_input(nir_intrinsic_instr* instr);
+
+ void finalize_exports();
+
+ void emit_shader_start() override;
+ bool do_allocate_reserved_registers() override;
+ bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
+ bool emit_store_local_shared(nir_intrinsic_instr* instr);
+
+ PValue m_vertex_id;
+ PValue m_instance_id;
+ PValue m_rel_vertex_id;
+ PValue m_primitive_id;
+ std::vector<PGPRValue> m_attribs;
+ r600_shader_key m_key;
+
+ std::unique_ptr<VertexStageExportBase> m_export_processor;
+ unsigned m_max_attrib;
+};
+
+}
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.cpp
new file mode 100644
index 000000000..1ac94ccc7
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.cpp
@@ -0,0 +1,448 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_shaderio.h"
+#include "sfn_debug.h"
+#include "tgsi/tgsi_from_mesa.h"
+
+#include <queue>
+
+namespace r600 {
+
+using std::vector;
+using std::priority_queue;
+
+ShaderIO::ShaderIO():
+ m_two_sided(false),
+ m_lds_pos(0)
+{
+
+}
+
+ShaderInput::ShaderInput(tgsi_semantic name):
+ m_name(name),
+ m_gpr(0),
+ m_uses_interpolate_at_centroid(false)
+{
+}
+
+ShaderInput::~ShaderInput()
+{
+}
+
+void ShaderInput::set_lds_pos(UNUSED int lds_pos)
+{
+}
+
+int ShaderInput::ij_index() const
+{
+ return -1;
+}
+
+bool ShaderInput::interpolate() const
+{
+ return false;
+}
+
+int ShaderInput::lds_pos() const
+{
+ return 0;
+}
+
+bool ShaderInput::is_varying() const
+{
+ return false;
+}
+
+void ShaderInput::set_uses_interpolate_at_centroid()
+{
+ m_uses_interpolate_at_centroid = true;
+}
+
+void ShaderInput::set_ioinfo(r600_shader_io& io, int translated_ij_index) const
+{
+ io.name = m_name;
+ io.gpr = m_gpr;
+ io.ij_index = translated_ij_index;
+ io.lds_pos = lds_pos();
+ io.uses_interpolate_at_centroid = m_uses_interpolate_at_centroid;
+
+ set_specific_ioinfo(io);
+}
+
+void ShaderInput::set_specific_ioinfo(UNUSED r600_shader_io& io) const
+{
+}
+
+ShaderInputSystemValue::ShaderInputSystemValue(tgsi_semantic name, int gpr):
+ ShaderInput(name),
+ m_gpr(gpr)
+{
+}
+
+void ShaderInputSystemValue::set_specific_ioinfo(r600_shader_io& io) const
+{
+ io.gpr = m_gpr;
+ io.ij_index = 0;
+}
+
+ShaderInputVarying::ShaderInputVarying(tgsi_semantic _name, int sid, unsigned driver_location,
+ unsigned frac, unsigned components,
+ tgsi_interpolate_mode interpolate,
+ tgsi_interpolate_loc interp_loc):
+ ShaderInput(_name),
+ m_driver_location(driver_location),
+ m_location_frac(frac),
+ m_sid(sid),
+ m_interpolate(interpolate),
+ m_interpolate_loc(interp_loc),
+ m_ij_index(-10),
+ m_lds_pos(0),
+ m_mask(((1 << components) - 1) << frac)
+{
+ evaluate_spi_sid();
+
+ m_ij_index = interpolate == TGSI_INTERPOLATE_LINEAR ? 3 : 0;
+ switch (interp_loc) {
+ case TGSI_INTERPOLATE_LOC_CENTROID: m_ij_index += 2; break;
+ case TGSI_INTERPOLATE_LOC_CENTER: m_ij_index += 1; break;
+ default:
+ ;
+ }
+}
+
+ShaderInputVarying::ShaderInputVarying(tgsi_semantic _name, int sid, nir_variable *input):
+ ShaderInput(_name),
+ m_driver_location(input->data.driver_location),
+ m_location_frac(input->data.location_frac),
+ m_sid(sid),
+ m_ij_index(-10),
+ m_lds_pos(0),
+ m_mask(((1 << input->type->components()) - 1) << input->data.location_frac)
+{
+ sfn_log << SfnLog::io << __func__
+ << "name:" << _name
+ << " sid: " << sid
+ << " op: " << input->data.interpolation;
+
+ evaluate_spi_sid();
+
+ enum glsl_base_type base_type =
+ glsl_get_base_type(glsl_without_array(input->type));
+
+ switch (input->data.interpolation) {
+ case INTERP_MODE_NONE:
+ if (glsl_base_type_is_integer(base_type)) {
+ m_interpolate = TGSI_INTERPOLATE_CONSTANT;
+ break;
+ }
+
+ if (name() == TGSI_SEMANTIC_COLOR) {
+ m_interpolate = TGSI_INTERPOLATE_COLOR;
+ m_ij_index = 0;
+ break;
+ }
+ FALLTHROUGH;
+
+ case INTERP_MODE_SMOOTH:
+ assert(!glsl_base_type_is_integer(base_type));
+
+ m_interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
+ m_ij_index = 0;
+ break;
+
+ case INTERP_MODE_NOPERSPECTIVE:
+ assert(!glsl_base_type_is_integer(base_type));
+
+ m_interpolate = TGSI_INTERPOLATE_LINEAR;
+ m_ij_index = 3;
+ break;
+
+ case INTERP_MODE_FLAT:
+ m_interpolate = TGSI_INTERPOLATE_CONSTANT;
+ break;
+
+ default:
+ m_interpolate = TGSI_INTERPOLATE_CONSTANT;
+ break;
+ }
+
+ if (input->data.sample) {
+ m_interpolate_loc = TGSI_INTERPOLATE_LOC_SAMPLE;
+ } else if (input->data.centroid) {
+ m_interpolate_loc = TGSI_INTERPOLATE_LOC_CENTROID;
+ m_ij_index += 2;
+ } else {
+ m_interpolate_loc = TGSI_INTERPOLATE_LOC_CENTER;
+ m_ij_index += 1;
+ }
+ sfn_log << SfnLog::io
+ << " -> IP:" << m_interpolate
+ << " IJ:" << m_ij_index
+ << "\n";
+}
+
+bool ShaderInputVarying::is_varying() const
+{
+ return true;
+}
+
+void ShaderInputVarying::update_mask(int additional_comps, int frac)
+{
+ m_mask |= ((1 << additional_comps) - 1) << frac;
+}
+
+void ShaderInputVarying::evaluate_spi_sid()
+{
+ switch (name()) {
+ case TGSI_SEMANTIC_PSIZE:
+ case TGSI_SEMANTIC_EDGEFLAG:
+ case TGSI_SEMANTIC_FACE:
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ assert(0 && "System value used as varying");
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ m_spi_sid = 0;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ case TGSI_SEMANTIC_TEXCOORD:
+ case TGSI_SEMANTIC_PCOORD:
+ m_spi_sid = m_sid + 1;
+ break;
+ default:
+ /* For non-generic params - pack name and sid into 8 bits */
+ m_spi_sid = (0x80 | (name() << 3) | m_sid) + 1;
+ }
+}
+
+ShaderInputVarying::ShaderInputVarying(tgsi_semantic name,
+ const ShaderInputVarying& orig, size_t location):
+ ShaderInput(name),
+ m_driver_location(location),
+ m_location_frac(orig.location_frac()),
+
+ m_sid(orig.m_sid),
+ m_spi_sid(orig.m_spi_sid),
+ m_interpolate(orig.m_interpolate),
+ m_interpolate_loc(orig.m_interpolate_loc),
+ m_ij_index(orig.m_ij_index),
+ m_lds_pos(0),
+ m_mask(0)
+{
+ evaluate_spi_sid();
+}
+
+bool ShaderInputVarying::interpolate() const
+{
+ return m_interpolate > 0;
+}
+
+int ShaderInputVarying::ij_index() const
+{
+ return m_ij_index;
+}
+
+void ShaderInputVarying::set_lds_pos(int lds_pos)
+{
+ m_lds_pos = lds_pos;
+}
+
+int ShaderInputVarying::lds_pos() const
+{
+ return m_lds_pos;
+}
+
+void ShaderInputVarying::set_specific_ioinfo(r600_shader_io& io) const
+{
+ io.interpolate = m_interpolate;
+ io.interpolate_location = m_interpolate_loc;
+ io.sid = m_sid;
+ io.spi_sid = m_spi_sid;
+ set_color_ioinfo(io);
+}
+
+void ShaderInputVarying::set_color_ioinfo(UNUSED r600_shader_io& io) const
+{
+ sfn_log << SfnLog::io << __func__ << " Don't set color_ioinfo\n";
+}
+
+ShaderInputColor::ShaderInputColor(tgsi_semantic name, int sid, nir_variable *input):
+ ShaderInputVarying(name, sid, input),
+ m_back_color_input_idx(0)
+{
+ sfn_log << SfnLog::io << __func__ << "name << " << name << " sid << " << sid << "\n";
+}
+
+ShaderInputColor::ShaderInputColor(tgsi_semantic _name, int sid, unsigned driver_location,
+ unsigned frac, unsigned components, tgsi_interpolate_mode interpolate,
+ tgsi_interpolate_loc interp_loc):
+ ShaderInputVarying(_name, sid, driver_location,frac, components, interpolate, interp_loc),
+ m_back_color_input_idx(0)
+{
+ sfn_log << SfnLog::io << __func__ << "name << " << _name << " sid << " << sid << "\n";
+}
+
+void ShaderInputColor::set_back_color(unsigned back_color_input_idx)
+{
+ sfn_log << SfnLog::io << "Set back color index " << back_color_input_idx << "\n";
+ m_back_color_input_idx = back_color_input_idx;
+}
+
+void ShaderInputColor::set_color_ioinfo(r600_shader_io& io) const
+{
+ sfn_log << SfnLog::io << __func__ << " set color_ioinfo " << m_back_color_input_idx << "\n";
+ io.back_color_input = m_back_color_input_idx;
+}
+
+size_t ShaderIO::add_input(ShaderInput *input)
+{
+ m_inputs.push_back(PShaderInput(input));
+ return m_inputs.size() - 1;
+}
+
+PShaderInput ShaderIO::find_varying(tgsi_semantic name, int sid)
+{
+ for (auto& a : m_inputs) {
+ if (a->name() == name) {
+ assert(a->is_varying());
+ auto& v = static_cast<ShaderInputVarying&>(*a);
+ if (v.sid() == sid)
+ return a;
+ }
+ }
+ return nullptr;
+}
+
+struct VaryingShaderIOLess {
+ bool operator () (PShaderInput lhs, PShaderInput rhs) const
+ {
+ const ShaderInputVarying& l = static_cast<ShaderInputVarying&>(*lhs);
+ const ShaderInputVarying& r = static_cast<ShaderInputVarying&>(*rhs);
+ return l.location() > r.location();
+ }
+};
+
+void ShaderIO::sort_varying_inputs()
+{
+ priority_queue<PShaderInput, vector<PShaderInput>, VaryingShaderIOLess> q;
+
+ vector<int> idx;
+
+ for (auto i = 0u; i < m_inputs.size(); ++i) {
+ if (m_inputs[i]->is_varying()) {
+ q.push(m_inputs[i]);
+ idx.push_back(i);
+ }
+ }
+
+ auto next_index = idx.begin();
+ while (!q.empty()) {
+ auto si = q.top();
+ q.pop();
+ m_inputs[*next_index++] = si;
+ }
+}
+
+void ShaderIO::update_lds_pos()
+{
+ m_lds_pos = -1;
+ m_ldspos.resize(m_inputs.size());
+ for (auto& i : m_inputs) {
+ if (!i->is_varying())
+ continue;
+
+ auto& v = static_cast<ShaderInputVarying&>(*i);
+ /* There are shaders that miss an input ...*/
+ if (m_ldspos.size() <= static_cast<unsigned>(v.location()))
+ m_ldspos.resize(v.location() + 1);
+ }
+
+ std::fill(m_ldspos.begin(), m_ldspos.end(), -1);
+ for (auto& i : m_inputs) {
+ if (!i->is_varying())
+ continue;
+
+ auto& v = static_cast<ShaderInputVarying&>(*i);
+ if (v.name() == TGSI_SEMANTIC_POSITION)
+ continue;
+
+ if (m_ldspos[v.location()] < 0) {
+ ++m_lds_pos;
+ m_ldspos[v.location()] = m_lds_pos;
+ }
+ v.set_lds_pos(m_lds_pos);
+ }
+ ++m_lds_pos;
+}
+
+std::vector<PShaderInput> &ShaderIO::inputs()
+{
+ return m_inputs;
+}
+
+ShaderInput& ShaderIO::input(size_t k)
+{
+ assert(k < m_inputs.size());
+ return *m_inputs[k];
+}
+
+ShaderInput& ShaderIO::input(size_t driver_loc, int frac)
+{
+ for (auto& i: m_inputs) {
+ if (!i->is_varying())
+ continue;
+
+ auto& v = static_cast<ShaderInputVarying&>(*i);
+ if (v.location() == driver_loc && v.location_frac() == frac)
+ return v;
+ }
+ return input(driver_loc);
+}
+
+void ShaderIO::set_two_sided()
+{
+ m_two_sided = true;
+}
+
+std::pair<unsigned, unsigned>
+r600_get_varying_semantic(unsigned varying_location)
+{
+ std::pair<unsigned, unsigned> result;
+ tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot>(varying_location),
+ true, &result.first, &result.second);
+
+ if (result.first == TGSI_SEMANTIC_GENERIC) {
+ result.second += 9;
+ } else if (result.first == TGSI_SEMANTIC_PCOORD) {
+ result.second = 8;
+ }
+ return result;
+}
+
+
+
+}
+
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.h
new file mode 100644
index 000000000..855bbe143
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.h
@@ -0,0 +1,176 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_SHADERIO_H
+#define SFN_SHADERIO_H
+
+#include "compiler/nir/nir.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "gallium/drivers/r600/r600_shader.h"
+
+#include <vector>
+#include <memory>
+
+namespace r600 {
+
+class ShaderInput {
+public:
+ ShaderInput();
+ virtual ~ShaderInput();
+
+ ShaderInput(tgsi_semantic name);
+ tgsi_semantic name() const {return m_name;}
+
+ void set_gpr(int gpr) {m_gpr = gpr;}
+ int gpr() const {return m_gpr;}
+ void set_ioinfo(r600_shader_io& io, int translated_ij_index) const;
+
+ virtual void set_lds_pos(int lds_pos);
+ virtual int ij_index() const;
+ virtual bool interpolate() const;
+ virtual int lds_pos() const;
+ void set_uses_interpolate_at_centroid();
+
+ virtual bool is_varying() const;
+
+private:
+ virtual void set_specific_ioinfo(r600_shader_io& io) const;
+
+ tgsi_semantic m_name;
+ int m_gpr;
+ bool m_uses_interpolate_at_centroid;
+};
+
+using PShaderInput = std::shared_ptr<ShaderInput>;
+
+class ShaderInputSystemValue: public ShaderInput {
+public:
+ ShaderInputSystemValue(tgsi_semantic name, int gpr);
+ void set_specific_ioinfo(r600_shader_io& io) const;
+ int m_gpr;
+};
+
+class ShaderInputVarying : public ShaderInput {
+public:
+ ShaderInputVarying(tgsi_semantic _name, int sid, unsigned driver_location,
+ unsigned frac, unsigned components, tgsi_interpolate_mode interpolate,
+ tgsi_interpolate_loc interp_loc);
+ ShaderInputVarying(tgsi_semantic name, int sid, nir_variable *input);
+ ShaderInputVarying(tgsi_semantic name, const ShaderInputVarying& orig,
+ size_t location);
+
+ void set_lds_pos(int lds_pos) override;
+
+ int ij_index() const override;
+
+ bool interpolate() const override;
+
+ int lds_pos() const override;
+
+ int sid() const {return m_sid;}
+
+ void update_mask(int additional_comps, int frac);
+
+ size_t location() const {return m_driver_location;}
+ int location_frac() const {return m_location_frac;}
+
+ bool is_varying() const override;
+
+private:
+ void evaluate_spi_sid();
+
+ virtual void set_color_ioinfo(r600_shader_io& io) const;
+ void set_specific_ioinfo(r600_shader_io& io) const override;
+ size_t m_driver_location;
+ int m_location_frac;
+ int m_sid;
+ int m_spi_sid;
+ tgsi_interpolate_mode m_interpolate;
+ tgsi_interpolate_loc m_interpolate_loc;
+ int m_ij_index;
+ int m_lds_pos;
+ int m_mask;
+};
+
+class ShaderInputColor: public ShaderInputVarying {
+public:
+ ShaderInputColor(tgsi_semantic _name, int sid, unsigned driver_location,
+ unsigned frac, unsigned components, tgsi_interpolate_mode interpolate,
+ tgsi_interpolate_loc interp_loc);
+ ShaderInputColor(tgsi_semantic name, int sid, nir_variable *input);
+ void set_back_color(unsigned back_color_input_idx);
+ unsigned back_color_input_index() const {
+ return m_back_color_input_idx;
+ }
+private:
+ void set_color_ioinfo(UNUSED r600_shader_io& io) const override;
+ unsigned m_back_color_input_idx;
+
+};
+
+class ShaderIO
+{
+public:
+ ShaderIO();
+
+ size_t add_input(ShaderInput *input);
+
+ std::vector<PShaderInput>& inputs();
+ ShaderInput& input(size_t k);
+
+ ShaderInput& input(size_t driver_loc, int frac);
+
+ void set_two_sided();
+ bool two_sided() {return m_two_sided;}
+
+ int nlds() const {
+ return m_lds_pos;
+ }
+
+ void sort_varying_inputs();
+
+ size_t size() const {return m_inputs.size();}
+
+ PShaderInput find_varying(tgsi_semantic name, int sid);
+
+ void update_lds_pos();
+
+private:
+ std::vector<PShaderInput> m_inputs;
+ std::vector<int> m_ldspos;
+ bool m_two_sided;
+ int m_lds_pos;
+
+};
+
+std::pair<unsigned, unsigned>
+r600_get_varying_semantic(unsigned varying_location);
+
+
+}
+
+#endif // SFN_SHADERIO_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.cpp
new file mode 100644
index 000000000..3228b75fb
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.cpp
@@ -0,0 +1,242 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_value.h"
+#include "util/macros.h"
+
+#include <iostream>
+#include <iomanip>
+#include <cassert>
+
+namespace r600 {
+
+using std::unique_ptr;
+using std::make_shared;
+
+const char *Value::component_names = "xyzw01?_!";
+
+Value::Value():
+ m_type(gpr),
+ m_chan(0)
+{
+}
+
+Value::Value(Type type, uint32_t chan):
+ m_type(type),
+ m_chan(chan)
+{
+
+}
+
+
+
+Value::Value(Type type):
+ Value(type, 0)
+{
+}
+
+Value::Type Value::type() const
+{
+ return m_type;
+}
+
+void Value::set_chan(uint32_t chan)
+{
+ m_chan = chan;
+}
+
+void Value::print(std::ostream& os) const
+{
+ do_print(os);
+}
+
+void Value::print(std::ostream& os, const PrintFlags& flags) const
+{
+ if (flags.flags & PrintFlags::has_neg) os << '-';
+ if (flags.flags & PrintFlags::has_abs) os << '|';
+ do_print(os, flags);
+ if (flags.flags & PrintFlags::has_abs) os << '|';
+}
+
+void Value::do_print(std::ostream& os, const PrintFlags& flags) const
+{
+ (void)flags;
+ do_print(os);
+}
+
+bool Value::operator < (const Value& lhs) const
+{
+ return sel() < lhs.sel() ||
+ (sel() == lhs.sel() && chan() < lhs.chan());
+}
+
+
+LiteralValue::LiteralValue(float value, uint32_t chan):
+ Value(Value::literal, chan)
+{
+ m_value.f=value;
+}
+
+
+LiteralValue::LiteralValue(uint32_t value, uint32_t chan):
+ Value(Value::literal, chan)
+{
+ m_value.u=value;
+}
+
+LiteralValue::LiteralValue(int value, uint32_t chan):
+ Value(Value::literal, chan)
+{
+ m_value.u=value;
+}
+
+uint32_t LiteralValue::sel() const
+{
+ return ALU_SRC_LITERAL;
+}
+
+uint32_t LiteralValue::value() const
+{
+ return m_value.u;
+}
+
+float LiteralValue::value_float() const
+{
+ return m_value.f;
+}
+
+void LiteralValue::do_print(std::ostream& os) const
+{
+ os << "[0x" << std::setbase(16) << m_value.u << " " << std::setbase(10)
+ << m_value.f << "].";
+ os << component_names[chan()];
+}
+
+void LiteralValue::do_print(std::ostream& os, UNUSED const PrintFlags& flags) const
+{
+ os << "[0x" << std::setbase(16) << m_value.u << " "
+ << std::setbase(10);
+
+ os << m_value.f << "f";
+
+ os<< "]";
+}
+
+bool LiteralValue::is_equal_to(const Value& other) const
+{
+ assert(other.type() == Value::Type::literal);
+ const auto& rhs = static_cast<const LiteralValue&>(other);
+ return (sel() == rhs.sel() &&
+ value() == rhs.value());
+}
+
+InlineConstValue::InlineConstValue(int value, int chan):
+ Value(Value::cinline, chan),
+ m_value(static_cast<AluInlineConstants>(value))
+{
+}
+
+uint32_t InlineConstValue::sel() const
+{
+ return m_value;
+}
+
+void InlineConstValue::do_print(std::ostream& os) const
+{
+ auto sv_info = alu_src_const.find(m_value);
+ if (sv_info != alu_src_const.end()) {
+ os << sv_info->second.descr;
+ if (sv_info->second.use_chan)
+ os << '.' << component_names[chan()];
+ else if (chan() > 0)
+ os << "." << component_names[chan()]
+ << " (W: Channel ignored)";
+ } else {
+ if (m_value >= ALU_SRC_PARAM_BASE && m_value < ALU_SRC_PARAM_BASE + 32)
+ os << " Param" << m_value - ALU_SRC_PARAM_BASE;
+ else
+ os << " E: unknown inline constant " << m_value;
+ }
+}
+
+bool InlineConstValue::is_equal_to(const Value& other) const
+{
+ assert(other.type() == Value::Type::cinline);
+ const auto& rhs = static_cast<const InlineConstValue&>(other);
+ return sel() == rhs.sel();
+}
+
+PValue Value::zero(new InlineConstValue(ALU_SRC_0, 0));
+PValue Value::one_f(new InlineConstValue(ALU_SRC_1, 0));
+PValue Value::one_i(new InlineConstValue(ALU_SRC_1_INT, 0));
+PValue Value::zero_dot_5(new InlineConstValue(ALU_SRC_0_5, 0));
+
+UniformValue::UniformValue(uint32_t sel, uint32_t chan, uint32_t kcache_bank):
+ Value(Value::kconst, chan)
+{
+ m_index = sel;
+ m_kcache_bank = kcache_bank;
+}
+
+UniformValue::UniformValue(uint32_t sel, uint32_t chan, PValue addr):
+ Value(Value::kconst, chan),
+ m_index(sel),
+ m_kcache_bank(1),
+ m_addr(addr)
+{
+
+}
+
+uint32_t UniformValue::sel() const
+{
+ const int bank_base[4] = {128, 160, 256, 288};
+ return m_index < 512 ? m_index + bank_base[m_kcache_bank] : m_index;
+}
+
+uint32_t UniformValue::kcache_bank() const
+{
+ return m_kcache_bank;
+}
+
+bool UniformValue::is_equal_to(const Value& other) const
+{
+ const UniformValue& o = static_cast<const UniformValue&>(other);
+ return sel() == o.sel() &&
+ m_kcache_bank == o.kcache_bank();
+}
+
+void UniformValue::do_print(std::ostream& os) const
+{
+ if (m_index < 512)
+ os << "KC" << m_kcache_bank << "[" << m_index;
+ else if (m_addr)
+ os << "KC[" << *m_addr << "][" << m_index;
+ else
+ os << "KCx[" << m_index;
+ os << "]." << component_names[chan()];
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.h
new file mode 100644
index 000000000..7bc4528f9
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.h
@@ -0,0 +1,194 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_VALUE_H
+#define SFN_VALUE_H
+
+#include "sfn_alu_defines.h"
+#include "nir.h"
+
+#include <memory>
+#include <set>
+#include <bitset>
+#include <iostream>
+
+namespace r600 {
+
+class Value {
+public:
+ using Pointer=std::shared_ptr<Value>;
+
+ struct PrintFlags {
+ PrintFlags():index_mode(0),
+ flags(0)
+ {
+ }
+ PrintFlags(int im, int f):index_mode(im),
+ flags(f)
+ {
+ }
+ int index_mode;
+ int flags;
+ static const int is_rel = 1;
+ static const int has_abs = 2;
+ static const int has_neg = 4;
+ static const int literal_is_float = 8;
+ static const int index_ar = 16;
+ static const int index_loopidx = 32;
+ };
+
+ enum Type {
+ gpr,
+ kconst,
+ literal,
+ cinline,
+ lds_direct,
+ gpr_vector,
+ gpr_array_value,
+ unknown
+ };
+
+ static const char *component_names;
+
+ using LiteralFlags=std::bitset<4>;
+
+ Value();
+
+ Value(Type type);
+
+ virtual ~Value(){}
+
+ Type type() const;
+ virtual uint32_t sel() const = 0;
+ uint32_t chan() const {return m_chan;}
+
+ void set_chan(uint32_t chan);
+ virtual void set_pin_to_channel() { assert(0 && "Only GPRs can be pinned to a channel ");}
+ void print(std::ostream& os, const PrintFlags& flags) const;
+
+ void print(std::ostream& os) const;
+
+ bool operator < (const Value& lhs) const;
+
+ static Value::Pointer zero;
+ static Value::Pointer one_f;
+ static Value::Pointer zero_dot_5;
+ static Value::Pointer one_i;
+
+protected:
+ Value(Type type, uint32_t chan);
+
+private:
+ virtual void do_print(std::ostream& os) const = 0;
+ virtual void do_print(std::ostream& os, const PrintFlags& flags) const;
+
+ virtual bool is_equal_to(const Value& other) const = 0;
+
+ Type m_type;
+ uint32_t m_chan;
+
+ friend bool operator == (const Value& lhs, const Value& rhs);
+};
+
+
+inline std::ostream& operator << (std::ostream& os, const Value& v)
+{
+ v.print(os);
+ return os;
+}
+
+
+inline bool operator == (const Value& lhs, const Value& rhs)
+{
+ if (lhs.type() == rhs.type())
+ return lhs.is_equal_to(rhs);
+ return false;
+}
+
+inline bool operator != (const Value& lhs, const Value& rhs)
+{
+ return !(lhs == rhs);
+}
+
+using PValue=Value::Pointer;
+
+struct value_less {
+ inline bool operator () (PValue lhs, PValue rhs) const {
+ return *lhs < *rhs;
+ }
+};
+
+using ValueSet = std::set<PValue, value_less>;
+
+
+class LiteralValue: public Value {
+public:
+ LiteralValue(float value, uint32_t chan= 0);
+ LiteralValue(uint32_t value, uint32_t chan= 0);
+ LiteralValue(int value, uint32_t chan= 0);
+ uint32_t sel() const override final;
+ uint32_t value() const;
+ float value_float() const;
+private:
+ void do_print(std::ostream& os) const override;
+ void do_print(std::ostream& os, const PrintFlags& flags) const override;
+ bool is_equal_to(const Value& other) const override;
+ union {
+ uint32_t u;
+ float f;
+ } m_value;
+};
+
+class InlineConstValue: public Value {
+public:
+ InlineConstValue(int value, int chan);
+ uint32_t sel() const override final;
+private:
+ void do_print(std::ostream& os) const override;
+ bool is_equal_to(const Value& other) const override;
+ AluInlineConstants m_value;
+};
+
+class UniformValue: public Value {
+public:
+ UniformValue(uint32_t sel, uint32_t chan, uint32_t kcache_bank = 0);
+ UniformValue(uint32_t sel, uint32_t chan, PValue addr);
+ uint32_t sel() const override;
+ uint32_t kcache_bank() const;
+ PValue addr() const {return m_addr;}
+ void reset_addr(PValue v) {m_addr = v;}
+private:
+ void do_print(std::ostream& os) const override;
+ bool is_equal_to(const Value& other) const override;
+
+ uint32_t m_index;
+ uint32_t m_kcache_bank;
+ PValue m_addr;
+};
+
+} // end ns r600
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp
new file mode 100644
index 000000000..c53b32527
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp
@@ -0,0 +1,380 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_value_gpr.h"
+#include "sfn_valuepool.h"
+#include "sfn_debug.h"
+#include "sfn_liverange.h"
+
+namespace r600 {
+
+using std::vector;
+using std::array;
+
+GPRValue::GPRValue(uint32_t sel, uint32_t chan, int base_offset):
+ Value(Value::gpr, chan),
+ m_sel(sel),
+ m_base_offset(base_offset),
+ m_input(false),
+ m_pin_to_channel(false),
+ m_keep_alive(false)
+{
+}
+
+GPRValue::GPRValue(uint32_t sel, uint32_t chan):
+ Value(Value::gpr, chan),
+ m_sel(sel),
+ m_base_offset(0),
+ m_input(false),
+ m_pin_to_channel(false),
+ m_keep_alive(false)
+{
+}
+
+uint32_t GPRValue::sel() const
+{
+ return m_sel;
+}
+
+void GPRValue::do_print(std::ostream& os) const
+{
+ os << 'R';
+ os << m_sel;
+ os << '.' << component_names[chan()];
+}
+
+bool GPRValue::is_equal_to(const Value& other) const
+{
+ assert(other.type() == Value::Type::gpr);
+ const auto& rhs = static_cast<const GPRValue&>(other);
+ return (sel() == rhs.sel() &&
+ chan() == rhs.chan());
+}
+
+void GPRValue::do_print(std::ostream& os, UNUSED const PrintFlags& flags) const
+{
+ os << 'R';
+ os << m_sel;
+ os << '.' << component_names[chan()];
+}
+
+GPRVector::GPRVector(const GPRVector& orig):
+ Value(gpr_vector),
+ m_elms(orig.m_elms),
+ m_valid(orig.m_valid)
+{
+}
+
+GPRVector::GPRVector(std::array<PValue,4> elms):
+ Value(gpr_vector),
+ m_elms(elms),
+ m_valid(false)
+{
+ for (unsigned i = 0; i < 4; ++i)
+ if (!m_elms[i] || (m_elms[i]->type() != Value::gpr)) {
+ assert(0 && "GPR vector not valid because element missing or nit a GPR");
+ return;
+ }
+ unsigned sel = m_elms[0]->sel();
+ for (unsigned i = 1; i < 4; ++i)
+ if (m_elms[i]->sel() != sel) {
+ assert(0 && "GPR vector not valid because sel is not equal for all elements");
+ return;
+ }
+ m_valid = true;
+}
+
+GPRVector::GPRVector(uint32_t sel, std::array<uint32_t,4> swizzle):
+ Value (gpr_vector),
+ m_valid(true)
+{
+ for (int i = 0; i < 4; ++i)
+ m_elms[i] = PValue(new GPRValue(sel, swizzle[i]));
+}
+
+GPRVector::GPRVector(const GPRVector& orig, const std::array<uint8_t,4>& swizzle)
+{
+ for (int i = 0; i < 4; ++i)
+ m_elms[i] = orig.reg_i(swizzle[i]);
+ m_valid = orig.m_valid;
+}
+
+void GPRVector::validate() const
+{
+ assert(m_elms[0]);
+ uint32_t sel = m_elms[0]->sel();
+ if (sel >= 124)
+ return;
+
+ for (unsigned i = 1; i < 4; ++i) {
+ assert(m_elms[i]);
+ if (sel != m_elms[i]->sel())
+ return;
+ }
+
+ m_valid = true;
+}
+
+uint32_t GPRVector::sel() const
+{
+ validate();
+ assert(m_valid);
+ return m_elms[0] ? m_elms[0]->sel() : 999;
+}
+
+void GPRVector::set_reg_i(int i, PValue reg)
+{
+ m_elms[i] = reg;
+}
+
+void GPRVector::pin_to_channel(int i)
+{
+ auto& v = static_cast<GPRValue&>(*m_elms[i]);
+ v.set_pin_to_channel();
+}
+
+void GPRVector::pin_all_to_channel()
+{
+ for (auto& v: m_elms) {
+ auto& c = static_cast<GPRValue&>(*v);
+ c.set_pin_to_channel();
+ }
+}
+
+void GPRVector::do_print(std::ostream& os) const
+{
+ os << "R" << sel() << ".";
+ for (int i = 0; i < 4; ++i)
+ os << (m_elms[i] ? component_names[m_elms[i]->chan() < 8 ? m_elms[i]->chan() : 8] : '?');
+}
+
+void GPRVector::swizzle(const Swizzle& swz)
+{
+ Values v(m_elms);
+ for (uint32_t i = 0; i < 4; ++i)
+ if (i != swz[i]) {
+ assert(swz[i] < 4);
+ m_elms[i] = v[swz[i]];
+ }
+}
+
+bool GPRVector::is_equal_to(const Value& other) const
+{
+ if (other.type() != gpr_vector) {
+ std::cerr << "t";
+ return false;
+ }
+
+ const GPRVector& o = static_cast<const GPRVector&>(other);
+
+ for (int i = 0; i < 4; ++i) {
+ if (*m_elms[i] != *o.m_elms[i]) {
+ std::cerr << "elm" << i;
+ return false;
+ }
+ }
+ return true;
+}
+
+
+GPRArrayValue::GPRArrayValue(PValue value, PValue addr, GPRArray *array):
+ Value(gpr_array_value, value->chan()),
+ m_value(value),
+ m_addr(addr),
+ m_array(array)
+{
+}
+
+GPRArrayValue::GPRArrayValue(PValue value, GPRArray *array):
+ Value(gpr_array_value, value->chan()),
+ m_value(value),
+ m_array(array)
+{
+}
+
+static const char *swz_char = "xyzw01_";
+
+void GPRArrayValue::do_print(std::ostream& os) const
+{
+ assert(m_array);
+ os << "R" << m_value->sel();
+ if (m_addr) {
+ os << "[" << *m_addr << "] ";
+ }
+ os << swz_char[m_value->chan()];
+
+ os << "(" << *m_array << ")";
+}
+
+bool GPRArrayValue::is_equal_to(const Value& other) const
+{
+ const GPRArrayValue& v = static_cast<const GPRArrayValue&>(other);
+
+ return *m_value == *v.m_value &&
+ *m_array == *v.m_array;
+}
+
+void GPRArrayValue::record_read(LiverangeEvaluator& ev) const
+{
+ if (m_addr) {
+ ev.record_read(*m_addr);
+ unsigned chan = m_value->chan();
+ assert(m_array);
+ m_array->record_read(ev, chan);
+ } else
+ ev.record_read(*m_value);
+}
+
+void GPRArrayValue::record_write(LiverangeEvaluator& ev) const
+{
+ if (m_addr) {
+ ev.record_read(*m_addr);
+ unsigned chan = m_value->chan();
+ assert(m_array);
+ m_array->record_write(ev, chan);
+ } else
+ ev.record_write(*m_value);
+}
+
+void GPRArrayValue::reset_value(PValue new_value)
+{
+ m_value = new_value;
+}
+
+void GPRArrayValue::reset_addr(PValue new_addr)
+{
+ m_addr = new_addr;
+}
+
+
+GPRArray::GPRArray(int base, int size, int mask, int frac):
+ Value (gpr_vector),
+ m_base_index(base),
+ m_component_mask(mask),
+ m_frac(frac)
+{
+ m_values.resize(size);
+ for (int i = 0; i < size; ++i) {
+ for (int j = 0; j < 4; ++j) {
+ if (mask & (1 << j)) {
+ auto gpr = new GPRValue(base + i, j);
+ /* If we want to use sb, we have to keep arrays
+ * alife for the whole shader range, otherwise the sb scheduler
+ * thinks is not capable to rename non-array uses of these registers */
+ gpr->set_as_input();
+ gpr->set_keep_alive();
+ m_values[i].set_reg_i(j, PValue(gpr));
+
+ }
+ }
+ }
+}
+
+uint32_t GPRArray::sel() const
+{
+ return m_base_index;
+}
+
+static const char *compchar = "xyzw";
+void GPRArray::do_print(std::ostream& os) const
+{
+ os << "ARRAY[R" << sel() << "..R" << sel() + m_values.size() - 1 << "].";
+ for (int j = 0; j < 4; ++j) {
+ if (m_component_mask & (1 << j))
+ os << compchar[j];
+ }
+}
+
+bool GPRArray::is_equal_to(const Value& other) const
+{
+ const GPRArray& o = static_cast<const GPRArray&>(other);
+ return o.sel() == sel() &&
+ o.m_values.size() == m_values.size() &&
+ o.m_component_mask == m_component_mask;
+}
+
+uint32_t GPRArrayValue::sel() const
+{
+ return m_value->sel();
+}
+
+PValue GPRArray::get_indirect(unsigned index, PValue indirect, unsigned component)
+{
+ assert(index < m_values.size());
+ assert(m_component_mask & (1 << (component + m_frac)));
+
+ sfn_log << SfnLog::reg << "Create indirect register from " << *this;
+
+ PValue v = m_values[index].reg_i(component + m_frac);
+ assert(v);
+
+ sfn_log << SfnLog::reg << " -> " << *v;
+
+ if (indirect) {
+ sfn_log << SfnLog::reg << "[" << *indirect << "]";
+ switch (indirect->type()) {
+ case Value::literal: {
+ const LiteralValue& lv = static_cast<const LiteralValue&>(*indirect);
+ v = m_values[lv.value()].reg_i(component + m_frac);
+ break;
+ }
+ case Value::gpr: {
+ v = PValue(new GPRArrayValue(v, indirect, this));
+ sfn_log << SfnLog::reg << "(" << *v << ")";
+ break;
+ }
+ default:
+ assert(0 && !"Indirect addressing must be literal value or GPR");
+ }
+ }
+ sfn_log << SfnLog::reg <<" -> " << *v << "\n";
+ return v;
+}
+
+void GPRArray::record_read(LiverangeEvaluator& ev, int chan) const
+{
+ for (auto& v: m_values)
+ ev.record_read(*v.reg_i(chan), true);
+}
+
+void GPRArray::record_write(LiverangeEvaluator& ev, int chan) const
+{
+ for (auto& v: m_values)
+ ev.record_write(*v.reg_i(chan), true);
+}
+
+void GPRArray::collect_registers(ValueMap& output) const
+{
+ for (auto& v: m_values) {
+ for (int i = 0; i < 4; ++i) {
+ auto vv = v.reg_i(i);
+ if (vv)
+ output.insert(vv);
+ }
+ }
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.h
new file mode 100644
index 000000000..789348875
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.h
@@ -0,0 +1,208 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_GPRARRAY_H
+#define SFN_GPRARRAY_H
+
+#include "sfn_value.h"
+#include <vector>
+#include <array>
+
+namespace r600 {
+
+class ValuePool;
+class ValueMap;
+class LiverangeEvaluator;
+
+class GPRValue : public Value {
+public:
+ GPRValue() = default;
+ GPRValue(GPRValue&& orig) = default;
+ GPRValue(const GPRValue& orig) = default;
+
+ GPRValue(uint32_t sel, uint32_t chan, int base_offset);
+
+ GPRValue(uint32_t sel, uint32_t chan);
+
+ GPRValue& operator = (const GPRValue& orig) = default;
+ GPRValue& operator = (GPRValue&& orig) = default;
+
+ uint32_t sel() const override final;
+
+ void set_as_input(){ m_input = true; }
+ bool is_input() const {return m_input; }
+ void set_keep_alive() { m_keep_alive = true; }
+ bool keep_alive() const {return m_keep_alive; }
+ void set_pin_to_channel() override { m_pin_to_channel = true;}
+ bool pin_to_channel() const { return m_pin_to_channel;}
+
+private:
+ void do_print(std::ostream& os) const override;
+ void do_print(std::ostream& os, const PrintFlags& flags) const override;
+ bool is_equal_to(const Value& other) const override;
+ uint32_t m_sel;
+ bool m_base_offset;
+ bool m_input;
+ bool m_pin_to_channel;
+ bool m_keep_alive;
+};
+
+using PGPRValue = std::shared_ptr<GPRValue>;
+
+class GPRVector : public Value {
+public:
+ using Swizzle = std::array<uint32_t,4>;
+ using Values = std::array<PValue,4>;
+ GPRVector() = default;
+ GPRVector(GPRVector&& orig) = default;
+ GPRVector(const GPRVector& orig);
+
+ GPRVector(const GPRVector& orig, const std::array<uint8_t, 4>& swizzle);
+ GPRVector(std::array<PValue,4> elms);
+ GPRVector(uint32_t sel, std::array<uint32_t,4> swizzle);
+
+ GPRVector& operator = (const GPRVector& orig) = default;
+ GPRVector& operator = (GPRVector&& orig) = default;
+
+ void swizzle(const Swizzle& swz);
+
+ uint32_t sel() const override final;
+
+ void set_reg_i(int i, PValue reg);
+
+ unsigned chan_i(int i) const {return m_elms[i]->chan();}
+ PValue reg_i(int i) const {return m_elms[i];}
+ PValue operator [] (int i) const {return m_elms[i];}
+ PValue& operator [] (int i) {return m_elms[i];}
+
+ void pin_to_channel(int i);
+ void pin_all_to_channel();
+
+ PValue x() const {return m_elms[0];}
+ PValue y() const {return m_elms[1];}
+ PValue z() const {return m_elms[2];}
+ PValue w() const {return m_elms[3];}
+
+ Values& values() { return m_elms;}
+
+private:
+ void do_print(std::ostream& os) const override;
+ bool is_equal_to(const Value& other) const override;
+ void validate() const;
+
+ Values m_elms;
+ mutable bool m_valid;
+};
+
+
+class GPRArray : public Value
+{
+public:
+ using Pointer = std::shared_ptr<GPRArray>;
+
+ GPRArray(int base, int size, int comp_mask, int frac);
+
+ uint32_t sel() const override;
+
+ uint32_t mask() const { return m_component_mask; };
+
+ size_t size() const {return m_values.size();}
+
+ PValue get_indirect(unsigned index, PValue indirect, unsigned component);
+
+ void record_read(LiverangeEvaluator& ev, int chan)const;
+ void record_write(LiverangeEvaluator& ev, int chan)const;
+
+ void collect_registers(ValueMap& output) const;
+
+private:
+ void do_print(std::ostream& os) const override;
+
+ bool is_equal_to(const Value& other) const override;
+
+ int m_base_index;
+ int m_component_mask;
+ int m_frac;
+
+ std::vector<GPRVector> m_values;
+};
+
+using PGPRArray = GPRArray::Pointer;
+
+class GPRArrayValue :public Value {
+public:
+ GPRArrayValue(PValue value, GPRArray *array);
+ GPRArrayValue(PValue value, PValue index, GPRArray *array);
+
+ void record_read(LiverangeEvaluator& ev) const;
+ void record_write(LiverangeEvaluator& ev) const;
+
+ size_t array_size() const;
+ uint32_t sel() const override;
+
+ PValue value() {return m_value;}
+
+ void reset_value(PValue new_value);
+ void reset_addr(PValue new_addr);
+
+ Value::Pointer indirect() const {return m_addr;}
+
+private:
+
+ void do_print(std::ostream& os) const override;
+
+ bool is_equal_to(const Value& other) const override;
+
+ PValue m_value;
+ PValue m_addr;
+ GPRArray *m_array;
+};
+
+inline size_t GPRArrayValue::array_size() const
+{
+ return m_array->size();
+}
+
+inline GPRVector::Swizzle swizzle_from_comps(unsigned ncomp)
+{
+ GPRVector::Swizzle swz = {0,1,2,3};
+ for (int i = ncomp; i < 4; ++i)
+ swz[i] = 7;
+ return swz;
+}
+
+inline GPRVector::Swizzle swizzle_from_mask(unsigned mask)
+{
+ GPRVector::Swizzle swz;
+ for (int i = 0; i < 4; ++i)
+ swz[i] = ((1 << i) & mask) ? i : 7;
+ return swz;
+}
+
+
+}
+
+#endif // SFN_GPRARRAY_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.cpp
new file mode 100644
index 000000000..efc9efdca
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.cpp
@@ -0,0 +1,526 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_debug.h"
+#include "sfn_value_gpr.h"
+#include "sfn_valuepool.h"
+
+#include <iostream>
+#include <queue>
+
+namespace r600 {
+
+using std::vector;
+using std::pair;
+using std::make_pair;
+using std::queue;
+
+ValuePool::ValuePool():
+ m_next_register_index(0),
+ current_temp_reg_index(0),
+ next_temp_reg_comp(4)
+{
+}
+
+PValue ValuePool::m_undef = Value::zero;
+
+GPRVector ValuePool::vec_from_nir(const nir_dest& dst, int num_components)
+{
+ std::array<PValue, 4> result;
+ for (int i = 0; i < 4; ++i)
+ result[i] = from_nir(dst, i < num_components ? i : 7);
+ return GPRVector(result);
+}
+
+std::vector<PValue> ValuePool::varvec_from_nir(const nir_dest& dst, int num_components)
+{
+ std::vector<PValue> result(num_components);
+ for (int i = 0; i < num_components; ++i)
+ result[i] = from_nir(dst, i);
+ return result;
+}
+
+
+std::vector<PValue> ValuePool::varvec_from_nir(const nir_src& src, int num_components)
+{
+ std::vector<PValue> result(num_components);
+ int i;
+ for (i = 0; i < num_components; ++i)
+ result[i] = from_nir(src, i);
+
+ return result;
+}
+
+
+PValue ValuePool::from_nir(const nir_src& v, unsigned component, unsigned swizzled)
+{
+ sfn_log << SfnLog::reg << "Search " << (v.is_ssa ? "ssa_reg " : "reg ")
+ << (v.is_ssa ? v.ssa->index : v.reg.reg->index);
+
+ if (!v.is_ssa) {
+ int idx = lookup_register_index(v);
+ sfn_log << SfnLog::reg << " -> got index " << idx << "\n";
+ if (idx >= 0) {
+ auto reg = lookup_register(idx, swizzled, false);
+ if (reg) {
+ if (reg->type() == Value::gpr_vector) {
+ auto& array = static_cast<GPRArray&>(*reg);
+ reg = array.get_indirect(v.reg.base_offset,
+ v.reg.indirect ?
+ from_nir(*v.reg.indirect, 0, 0) : nullptr,
+ component);
+ }
+ return reg;
+ }
+ }
+ assert(0 && "local registers should always be found");
+ }
+
+ unsigned index = v.ssa->index;
+ /* For undefs we use zero and let ()yet to be implemeneted dce deal with it */
+ if (m_ssa_undef.find(index) != m_ssa_undef.end())
+ return Value::zero;
+
+
+ int idx = lookup_register_index(v);
+ sfn_log << SfnLog::reg << " -> got index " << idx << "\n";
+ if (idx >= 0) {
+ auto reg = lookup_register(idx, swizzled, false);
+ if (reg)
+ return reg;
+ }
+
+ auto literal_val = nir_src_as_const_value(v);
+ if (literal_val) {
+ assert(v.is_ssa);
+ switch (v.ssa->bit_size) {
+ case 1:
+ return PValue(new LiteralValue(literal_val[swizzled].b ? 0xffffffff : 0, component));
+ case 32:
+ return literal(literal_val[swizzled].u32);
+ default:
+ sfn_log << SfnLog::reg << "Unsupported bit size " << v.ssa->bit_size
+ << " fall back to 32\n";
+ return PValue(new LiteralValue(literal_val[swizzled].u32, component));
+ }
+ }
+
+ return PValue();
+}
+
+PValue ValuePool::from_nir(const nir_src& v, unsigned component)
+{
+ return from_nir(v, component, component);
+}
+
+PValue ValuePool::from_nir(const nir_tex_src &v, unsigned component)
+{
+ return from_nir(v.src, component, component);
+}
+
+PValue ValuePool::from_nir(const nir_alu_src &v, unsigned component)
+{
+ return from_nir(v.src, component, v.swizzle[component]);
+}
+
+PGPRValue ValuePool::get_temp_register(int channel)
+{
+ /* Skip to next register to get the channel we want */
+ if (channel >= 0) {
+ if (next_temp_reg_comp <= channel)
+ next_temp_reg_comp = channel;
+ else
+ next_temp_reg_comp = 4;
+ }
+
+ if (next_temp_reg_comp > 3) {
+ current_temp_reg_index = allocate_temp_register();
+ next_temp_reg_comp = 0;
+ }
+ return std::make_shared<GPRValue>(current_temp_reg_index, next_temp_reg_comp++);
+}
+
+GPRVector ValuePool::get_temp_vec4(const GPRVector::Swizzle& swizzle)
+{
+ int sel = allocate_temp_register();
+ return GPRVector(sel, swizzle);
+}
+
+PValue ValuePool::create_register_from_nir_src(const nir_src& src, int comp)
+{
+ int idx = src.is_ssa ? get_dst_ssa_register_index(*src.ssa):
+ get_local_register_index(*src.reg.reg);
+
+ auto retval = lookup_register(idx, comp, false);
+ if (!retval || retval->type() != Value::gpr || retval->type() != Value::gpr_array_value)
+ retval = create_register(idx, comp);
+ return retval;
+}
+
+PValue ValuePool::from_nir(const nir_alu_dest &v, unsigned component)
+{
+ //assert(v->write_mask & (1 << component));
+ return from_nir(v.dest, component);
+}
+
+int ValuePool::lookup_register_index(const nir_dest& dst)
+{
+ return dst.is_ssa ? get_dst_ssa_register_index(dst.ssa):
+ get_local_register_index(*dst.reg.reg);
+}
+
+int ValuePool::lookup_register_index(const nir_src& src) const
+{
+ int index = 0;
+
+ index = src.is_ssa ?
+ get_ssa_register_index(*src.ssa) :
+ get_local_register_index(*src.reg.reg);
+
+ sfn_log << SfnLog::reg << " LIDX:" << index;
+
+ auto r = m_register_map.find(index);
+ if (r == m_register_map.end()) {
+ return -1;
+ }
+ return static_cast<int>(r->second.index);
+}
+
+
+int ValuePool::allocate_temp_register()
+{
+ return m_next_register_index++;
+}
+
+
+PValue ValuePool::from_nir(const nir_dest& v, unsigned component)
+{
+ int idx = lookup_register_index(v);
+ sfn_log << SfnLog::reg << __func__ << ": ";
+ if (v.is_ssa)
+ sfn_log << "ssa_" << v.ssa.index;
+ else
+ sfn_log << "r" << v.reg.reg->index;
+ sfn_log << " -> " << idx << "\n";
+
+ auto retval = lookup_register(idx, component, false);
+ if (!retval)
+ retval = create_register(idx, component);
+
+ if (retval->type() == Value::gpr_vector) {
+ assert(!v.is_ssa);
+ auto& array = static_cast<GPRArray&>(*retval);
+ retval = array.get_indirect(v.reg.base_offset,
+ v.reg.indirect ?
+ from_nir(*v.reg.indirect, 0, 0) : nullptr,
+ component);
+ }
+
+ return retval;
+}
+
+ValueMap ValuePool::get_temp_registers() const
+{
+ ValueMap result;
+
+ for (auto& v : m_registers) {
+ if (v.second->type() == Value::gpr)
+ result.insert(v.second);
+ else if (v.second->type() == Value::gpr_vector) {
+ auto& array = static_cast<GPRArray&>(*v.second);
+ array.collect_registers(result);
+ }
+ }
+ return result;
+}
+
+static const char swz[] = "xyzw01?_";
+
+PValue ValuePool::create_register(unsigned sel, unsigned swizzle)
+{
+ sfn_log << SfnLog::reg
+ <<"Create register " << sel << '.' << swz[swizzle] << "\n";
+ auto retval = PValue(new GPRValue(sel, swizzle));
+ m_registers[(sel << 3) + swizzle] = retval;
+ return retval;
+}
+
+bool ValuePool::inject_register(unsigned sel, unsigned swizzle,
+ const PValue& reg, bool map)
+{
+ uint32_t ssa_index = sel;
+
+ if (map) {
+ auto pos = m_ssa_register_map.find(sel);
+ if (pos == m_ssa_register_map.end())
+ ssa_index = m_next_register_index++;
+ else
+ ssa_index = pos->second;
+ }
+
+ sfn_log << SfnLog::reg
+ << "Inject register " << sel << '.' << swz[swizzle]
+ << " at index " << ssa_index << " ...";
+
+ if (map)
+ m_ssa_register_map[sel] = ssa_index;
+
+ allocate_with_mask(ssa_index, swizzle, true);
+
+ unsigned idx = (ssa_index << 3) + swizzle;
+ auto p = m_registers.find(idx);
+ if ( (p != m_registers.end()) && *p->second != *reg) {
+ std::cerr << "Register location (" << ssa_index << ", " << swizzle << ") was already reserved\n";
+ assert(0);
+ return false;
+ }
+ sfn_log << SfnLog::reg << " at idx:" << idx << " to " << *reg << "\n";
+ m_registers[idx] = reg;
+
+ if (m_next_register_index <= ssa_index)
+ m_next_register_index = ssa_index + 1;
+ return true;
+}
+
+
+PValue ValuePool::lookup_register(unsigned sel, unsigned swizzle,
+ bool required)
+{
+
+ PValue retval;
+ sfn_log << SfnLog::reg
+ << "lookup register " << sel << '.' << swz[swizzle] << "("
+ << ((sel << 3) + swizzle) << ")...";
+
+
+ auto reg = m_registers.find((sel << 3) + swizzle);
+ if (reg != m_registers.end()) {
+ sfn_log << SfnLog::reg << " -> Found " << *reg->second << "\n";
+ retval = reg->second;
+ } else if (swizzle == 7) {
+ PValue retval = create_register(sel, swizzle);
+ sfn_log << SfnLog::reg << " -> Created " << *retval << "\n";
+ } else if (required) {
+ sfn_log << SfnLog::reg << "Register (" << sel << ", "
+ << swizzle << ") not found but required\n";
+ assert(0 && "Unallocated register value requested\n");
+ }
+ sfn_log << SfnLog::reg << " -> Not required and not allocated\n";
+ return retval;
+}
+
+unsigned ValuePool::get_dst_ssa_register_index(const nir_ssa_def& ssa)
+{
+ sfn_log << SfnLog::reg << __func__ << ": search dst ssa "
+ << ssa.index;
+
+ auto pos = m_ssa_register_map.find(ssa.index);
+ if (pos == m_ssa_register_map.end()) {
+ sfn_log << SfnLog::reg << " Need to allocate ...";
+ allocate_ssa_register(ssa);
+ pos = m_ssa_register_map.find(ssa.index);
+ assert(pos != m_ssa_register_map.end());
+ }
+ sfn_log << SfnLog::reg << "... got " << pos->second << "\n";
+ return pos->second;
+}
+
+unsigned ValuePool::get_ssa_register_index(const nir_ssa_def& ssa) const
+{
+ sfn_log << SfnLog::reg << __func__ << ": search ssa "
+ << ssa.index;
+
+ auto pos = m_ssa_register_map.find(ssa.index);
+ sfn_log << SfnLog::reg << " got " << pos->second<< "\n";
+ if (pos == m_ssa_register_map.end()) {
+ sfn_log << SfnLog::reg << __func__ << ": ssa register "
+ << ssa.index << " lookup failed\n";
+ return -1;
+ }
+ return pos->second;
+}
+
+unsigned ValuePool::get_local_register_index(const nir_register& reg)
+{
+ unsigned index = reg.index | 0x80000000;
+
+ auto pos = m_ssa_register_map.find(index);
+ if (pos == m_ssa_register_map.end()) {
+ allocate_local_register(reg);
+ pos = m_ssa_register_map.find(index);
+ assert(pos != m_ssa_register_map.end());
+ }
+ return pos->second;
+}
+
+unsigned ValuePool::get_local_register_index(const nir_register& reg) const
+{
+ unsigned index = reg.index | 0x80000000;
+ auto pos = m_ssa_register_map.find(index);
+ if (pos == m_ssa_register_map.end()) {
+ sfn_log << SfnLog::err << __func__ << ": local register "
+ << reg.index << " lookup failed";
+ return -1;
+ }
+ return pos->second;
+}
+
+void ValuePool::allocate_ssa_register(const nir_ssa_def& ssa)
+{
+ sfn_log << SfnLog::reg << "ValuePool: Allocate ssa register " << ssa.index
+ << " as " << m_next_register_index << "\n";
+ int index = m_next_register_index++;
+ m_ssa_register_map[ssa.index] = index;
+ allocate_with_mask(index, 0xf, true);
+}
+
+void ValuePool::allocate_arrays(array_list& arrays)
+{
+ int ncomponents = 0;
+ int current_index = m_next_register_index;
+ unsigned instance = 0;
+
+ while (!arrays.empty()) {
+ auto a = arrays.top();
+ arrays.pop();
+
+ /* This is a bit hackish, return an id that encodes the array merge. To make sure
+ * that the mapping doesn't go wrong we have to make sure the arrays is longer than
+ * the number of instances in this arrays slot */
+ if (a.ncomponents + ncomponents > 4 ||
+ a.length < instance) {
+ current_index = m_next_register_index;
+ ncomponents = 0;
+ instance = 0;
+ }
+
+ if (ncomponents == 0)
+ m_next_register_index += a.length;
+
+ uint32_t mask = ((1 << a.ncomponents) - 1) << ncomponents;
+
+ PGPRArray array = PGPRArray(new GPRArray(current_index, a.length, mask, ncomponents));
+
+ m_reg_arrays.push_back(array);
+
+ sfn_log << SfnLog::reg << "Add array at "<< current_index
+ << " of size " << a.length << " with " << a.ncomponents
+ << " components, mask " << mask << "\n";
+
+ m_ssa_register_map[a.index | 0x80000000] = current_index + instance;
+
+ for (unsigned i = 0; i < a.ncomponents; ++i)
+ m_registers[((current_index + instance) << 3) + i] = array;
+
+ VRec next_reg = {current_index + instance, mask, mask};
+ m_register_map[current_index + instance] = next_reg;
+
+ ncomponents += a.ncomponents;
+ ++instance;
+ }
+}
+
+void ValuePool::allocate_local_register(const nir_register& reg)
+{
+ int index = m_next_register_index++;
+ m_ssa_register_map[reg.index | 0x80000000] = index;
+ allocate_with_mask(index, 0xf, true);
+
+ /* Create actual register and map it */;
+ for (int i = 0; i < 4; ++i) {
+ int k = (index << 3) + i;
+ m_registers[k] = std::make_shared<GPRValue>(index, i);
+ }
+}
+
+void ValuePool::allocate_local_register(const nir_register& reg, array_list& arrays)
+{
+ sfn_log << SfnLog::reg << "ValuePool: Allocate local register " << reg.index
+ << " as " << m_next_register_index << "\n";
+
+ if (reg.num_array_elems) {
+ array_entry ae = {reg.index, reg.num_array_elems, reg.num_components};
+ arrays.push(ae);
+ }
+ else
+ allocate_local_register(reg);
+}
+
+bool ValuePool::create_undef(nir_ssa_undef_instr* instr)
+{
+ m_ssa_undef.insert(instr->def.index);
+ return true;
+}
+
+int ValuePool::allocate_with_mask(unsigned index, unsigned mask, bool pre_alloc)
+{
+ int retval;
+ VRec next_register = { index, mask };
+
+ sfn_log << SfnLog::reg << (pre_alloc ? "Pre-alloc" : "Allocate")
+ << " register (" << index << ", " << mask << ")\n";
+ retval = index;
+ auto r = m_register_map.find(index);
+
+ if (r != m_register_map.end()) {
+ if ((r->second.mask & next_register.mask) &&
+ !(r->second.pre_alloc_mask & next_register.mask)) {
+ std::cerr << "r600 ERR: register ("
+ << index << ", " << mask
+ << ") already allocated as (" << r->second.index << ", "
+ << r->second.mask << ", " << r->second.pre_alloc_mask
+ << ") \n";
+ retval = -1;
+ } else {
+ r->second.mask |= next_register.mask;
+ if (pre_alloc)
+ r->second.pre_alloc_mask |= next_register.mask;
+ retval = r->second.index;
+ }
+ } else {
+ if (pre_alloc)
+ next_register.pre_alloc_mask = mask;
+ m_register_map[index] = next_register;
+ retval = next_register.index;
+ }
+
+ sfn_log << SfnLog::reg << "Allocate register (" << index << "," << mask << ") in R"
+ << retval << "\n";
+
+ return retval;
+}
+
+PValue ValuePool::literal(uint32_t value)
+{
+ auto l = m_literals.find(value);
+ if (l != m_literals.end())
+ return l->second;
+
+ m_literals[value] = PValue(new LiteralValue(value));
+ return m_literals[value];
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.h
new file mode 100644
index 000000000..fa1e5507f
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.h
@@ -0,0 +1,242 @@
+/* -*- mesa-c++ -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef SFN_VALUEPOOL_H
+#define SFN_VALUEPOOL_H
+
+#include "sfn_value.h"
+#include "sfn_value_gpr.h"
+
+#include <set>
+#include <queue>
+
+namespace r600 {
+
+using LiteralBuffer = std::map<unsigned, const nir_load_const_instr *>;
+
+class ValueMap {
+public:
+ void insert(const PValue& v) {
+ auto idx = index_from(v->sel(), v->chan());
+ m_map[idx] = v;
+ }
+ PValue get_or_inject(uint32_t index, uint32_t chan) {
+ auto idx = index_from(index, chan);
+ auto v = m_map.find(idx);
+ if (v == m_map.end()) {
+ insert(PValue(new GPRValue(index, chan)));
+ v = m_map.find(idx);
+ }
+ return v->second;
+ }
+ std::map<uint32_t, PValue>::const_iterator begin() const {return m_map.begin();}
+ std::map<uint32_t, PValue>::const_iterator end() const {return m_map.end();}
+
+private:
+ uint32_t index_from(uint32_t index, uint32_t chan) {
+ return (index << 3) + chan;
+ }
+ std::map<uint32_t, PValue> m_map;
+};
+
+/** \brief Class to keep track of registers, uniforms, and literals
+ * This class holds the references to the uniforms and the literals
+ * and is responsible for allocating the registers.
+ */
+class ValuePool
+{
+public:
+
+ struct array_entry {
+ unsigned index;
+ unsigned length;
+ unsigned ncomponents;
+
+ bool operator ()(const array_entry& a, const array_entry& b) const {
+ return a.length < b.length || (a.length == b.length && a.ncomponents > b.ncomponents);
+ }
+ };
+
+ using array_list = std::priority_queue<array_entry, std::vector<array_entry>,
+ array_entry>;
+
+ ValuePool();
+
+
+ GPRVector vec_from_nir(const nir_dest& dst, int num_components);
+
+ std::vector<PValue> varvec_from_nir(const nir_dest& src, int num_components);
+ std::vector<PValue> varvec_from_nir(const nir_src& src, int num_components);
+
+ PValue from_nir(const nir_src& v, unsigned component, unsigned swizzled);
+
+ PValue from_nir(const nir_src& v, unsigned component);
+ /** Get a register that is used as source register in an ALU instruction
+ * The PValue holds one componet as specified. If the register refers to
+ * a GPR it must already have been allocated, uniforms and literals on
+ * the other hand might be pre-loaded.
+ */
+ PValue from_nir(const nir_alu_src& v, unsigned component);
+
+ /** Get a register that is used as source register in an Texture instruction
+ * The PValue holds one componet as specified.
+ */
+ PValue from_nir(const nir_tex_src& v, unsigned component);
+
+ /** Allocate a register that is used as destination register in an ALU
+ * instruction. The PValue holds one componet as specified.
+ */
+ PValue from_nir(const nir_alu_dest& v, unsigned component);
+
+ /** Allocate a register that is used as destination register in any
+ * instruction. The PValue holds one componet as specified.
+ */
+ PValue from_nir(const nir_dest& v, unsigned component);
+
+
+ /** Inject a register into a given ssa index position
+ * This is used to redirect loads from system values and vertex attributes
+ * that are already loaded into registers */
+ bool inject_register(unsigned sel, unsigned swizzle, const PValue &reg, bool map);
+
+ /** Reserve space for a local register */
+ void allocate_local_register(const nir_register& reg);
+ void allocate_local_register(const nir_register &reg, array_list& arrays);
+
+ void allocate_arrays(array_list& arrays);
+
+
+ void increment_reserved_registers() {
+ ++m_next_register_index;
+ }
+
+ void set_reserved_registers(unsigned rr) {
+ m_next_register_index =rr;
+ }
+
+ /** Reserve a undef register, currently it uses (0,7),
+ * \todo should be eliminated in the final pass
+ */
+ bool create_undef(nir_ssa_undef_instr* instr);
+
+ /** Create a new register with the given index and store it in the
+ * lookup map
+ */
+ PValue create_register_from_nir_src(const nir_src& sel, int comp);
+
+ ValueMap get_temp_registers() const;
+
+ PValue lookup_register(unsigned sel, unsigned swizzle, bool required);
+
+ size_t register_count() const {return m_next_register_index;}
+
+ PValue literal(uint32_t value);
+
+ PGPRValue get_temp_register(int channel = -1);
+
+ GPRVector get_temp_vec4(const GPRVector::Swizzle &swizzle = {0,1,2,3});
+
+protected:
+ std::vector<PGPRArray> m_reg_arrays;
+
+private:
+
+ /** Get the register index mapped from the NIR code to the r600 ir
+ * \param index NIR index of register
+ * \returns r600 ir inxex
+ */
+ int lookup_register_index(const nir_src& src) const;
+
+ /** Get the register index mapped from the NIR code to the r600 ir
+ * \param index NIR index of register
+ * \returns r600 ir inxex
+ */
+ int lookup_register_index(const nir_dest& dst);
+
+ /** Allocate a register that is is needed for lowering an instruction
+ * that requires complex calculations,
+ */
+ int allocate_temp_register();
+
+
+ PValue create_register(unsigned index, unsigned swizzle);
+
+ unsigned get_dst_ssa_register_index(const nir_ssa_def& ssa);
+
+ unsigned get_ssa_register_index(const nir_ssa_def& ssa) const;
+
+ unsigned get_local_register_index(const nir_register& reg);
+
+ unsigned get_local_register_index(const nir_register& reg) const;
+
+ void allocate_ssa_register(const nir_ssa_def& ssa);
+
+ void allocate_array(const nir_register& reg);
+
+
+ /** Allocate a register index with the given component mask.
+ * If one of the components is already been allocated the function
+ * will signal an error bz returning -1, otherwise a register index is
+ * returned.
+ */
+ int allocate_with_mask(unsigned index, unsigned mask, bool pre_alloc);
+
+ /** search for a new register with the given index in the
+ * lookup map.
+ * \param sel register sel value
+ * \param swizzle register component, can also be 4,5, and 7
+ * \param required true: in debug mode assert when register doesn't exist
+ * false: return nullptr on failure
+ */
+
+ std::set<unsigned> m_ssa_undef;
+
+ std::map<unsigned, unsigned> m_ssa_register_map;
+
+ std::map<unsigned, PValue> m_registers;
+
+ static PValue m_undef;
+
+ struct VRec {
+ unsigned index;
+ unsigned mask;
+ unsigned pre_alloc_mask;
+ };
+ std::map<unsigned, VRec> m_register_map;
+
+ unsigned m_next_register_index;
+
+
+ std::map<uint32_t, PValue> m_literals;
+
+ int current_temp_reg_index;
+ int next_temp_reg_comp;
+};
+
+}
+
+#endif // SFN_VALUEPOOL_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp
new file mode 100644
index 000000000..ff49216a9
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp
@@ -0,0 +1,535 @@
+#include "sfn_vertexstageexport.h"
+
+#include "sfn_shaderio.h"
+
+namespace r600 {
+
+using std::priority_queue;
+
+VertexStageExportBase::VertexStageExportBase(VertexStage& proc):
+ m_proc(proc),
+ m_cur_clip_pos(1)
+{
+
+}
+
+VertexStageExportBase::~VertexStageExportBase()
+{
+
+}
+
+bool VertexStageExportBase::do_process_outputs(nir_variable *output)
+{
+ return true;
+}
+
+void VertexStageExportBase::emit_shader_start()
+{
+
+}
+
+void VertexStageExportBase::scan_store_output(nir_intrinsic_instr* instr)
+{
+
+}
+
+bool VertexStageExportBase::store_output(nir_intrinsic_instr* instr)
+{
+ auto index = nir_src_as_const_value(instr->src[1]);
+ assert(index && "Indirect outputs not supported");
+
+ const store_loc store_info = {
+ nir_intrinsic_component(instr),
+ nir_intrinsic_io_semantics(instr).location,
+ (unsigned)nir_intrinsic_base(instr) + index->u32,
+ 0
+ };
+
+ return do_store_output(store_info, instr);
+}
+
+VertexStageExportForFS::VertexStageExportForFS(VertexStage& proc,
+ const pipe_stream_output_info *so_info,
+ r600_pipe_shader *pipe_shader, const r600_shader_key &key):
+ VertexStageWithOutputInfo(proc),
+ m_last_param_export(nullptr),
+ m_last_pos_export(nullptr),
+ m_num_clip_dist(0),
+ m_enabled_stream_buffers_mask(0),
+ m_so_info(so_info),
+ m_pipe_shader(pipe_shader),
+ m_key(key)
+{
+}
+
+bool VertexStageWithOutputInfo::do_process_outputs(nir_variable *output)
+{
+ if (output->data.location == VARYING_SLOT_COL0 ||
+ output->data.location == VARYING_SLOT_COL1 ||
+ (output->data.location >= VARYING_SLOT_VAR0 &&
+ output->data.location <= VARYING_SLOT_VAR31) ||
+ (output->data.location >= VARYING_SLOT_TEX0 &&
+ output->data.location <= VARYING_SLOT_TEX7) ||
+ output->data.location == VARYING_SLOT_BFC0 ||
+ output->data.location == VARYING_SLOT_BFC1 ||
+ output->data.location == VARYING_SLOT_CLIP_VERTEX ||
+ output->data.location == VARYING_SLOT_CLIP_DIST0 ||
+ output->data.location == VARYING_SLOT_CLIP_DIST1 ||
+ output->data.location == VARYING_SLOT_POS ||
+ output->data.location == VARYING_SLOT_PSIZ ||
+ output->data.location == VARYING_SLOT_FOGC ||
+ output->data.location == VARYING_SLOT_LAYER ||
+ output->data.location == VARYING_SLOT_EDGE ||
+ output->data.location == VARYING_SLOT_VIEWPORT
+ ) {
+
+ r600_shader_io& io = m_proc.sh_info().output[output->data.driver_location];
+ auto semantic = r600_get_varying_semantic(output->data.location);
+ io.name = semantic.first;
+ io.sid = semantic.second;
+
+ m_proc.evaluate_spi_sid(io);
+ io.write_mask = ((1 << glsl_get_components(output->type)) - 1)
+ << output->data.location_frac;
+ ++m_proc.sh_info().noutput;
+
+ if (output->data.location == VARYING_SLOT_PSIZ ||
+ output->data.location == VARYING_SLOT_EDGE ||
+ output->data.location == VARYING_SLOT_LAYER) // VIEWPORT?
+ m_cur_clip_pos = 2;
+
+ if (output->data.location != VARYING_SLOT_POS &&
+ output->data.location != VARYING_SLOT_EDGE &&
+ output->data.location != VARYING_SLOT_PSIZ &&
+ output->data.location != VARYING_SLOT_CLIP_VERTEX)
+ m_param_driver_locations.push(output->data.driver_location);
+
+ return true;
+ }
+ return false;
+}
+
+bool VertexStageExportForFS::do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr)
+{
+ switch (store_info.location) {
+ case VARYING_SLOT_PSIZ:
+ m_proc.sh_info().vs_out_point_size = 1;
+ m_proc.sh_info().vs_out_misc_write = 1;
+ FALLTHROUGH;
+ case VARYING_SLOT_POS:
+ return emit_varying_pos(store_info, instr);
+ case VARYING_SLOT_EDGE: {
+ std::array<uint32_t, 4> swizzle_override = {7 ,0, 7, 7};
+ return emit_varying_pos(store_info, instr, &swizzle_override);
+ }
+ case VARYING_SLOT_VIEWPORT: {
+ std::array<uint32_t, 4> swizzle_override = {7, 7, 7, 0};
+ return emit_varying_pos(store_info, instr, &swizzle_override) &&
+ emit_varying_param(store_info, instr);
+ }
+ case VARYING_SLOT_CLIP_VERTEX:
+ return emit_clip_vertices(store_info, instr);
+ case VARYING_SLOT_CLIP_DIST0:
+ case VARYING_SLOT_CLIP_DIST1:
+ m_num_clip_dist += 4;
+ return emit_varying_param(store_info, instr) && emit_varying_pos(store_info, instr);
+ case VARYING_SLOT_LAYER: {
+ m_proc.sh_info().vs_out_misc_write = 1;
+ m_proc.sh_info().vs_out_layer = 1;
+ std::array<uint32_t, 4> swz = {7,7,0,7};
+ return emit_varying_pos(store_info, instr, &swz) &&
+ emit_varying_param(store_info, instr);
+ }
+ case VARYING_SLOT_VIEW_INDEX:
+ return emit_varying_pos(store_info, instr) &&
+ emit_varying_param(store_info, instr);
+
+ default:
+ return emit_varying_param(store_info, instr);
+ }
+
+ fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n",
+ store_info.location);
+ return false;
+}
+
+bool VertexStageExportForFS::emit_varying_pos(const store_loc &store_info, nir_intrinsic_instr* instr,
+ std::array<uint32_t, 4> *swizzle_override)
+{
+ std::array<uint32_t,4> swizzle;
+ uint32_t write_mask = 0;
+
+ if (swizzle_override) {
+ swizzle = *swizzle_override;
+ for (int i = 0; i < 4; ++i) {
+ if (swizzle[i] < 6)
+ write_mask |= 1 << i;
+ }
+ } else {
+ write_mask = nir_intrinsic_write_mask(instr) << store_info.frac;
+ for (int i = 0; i < 4; ++i)
+ swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7;
+ }
+
+ m_proc.sh_info().output[store_info.driver_location].write_mask = write_mask;
+
+ GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], write_mask, swizzle);
+ m_proc.set_output(store_info.driver_location, value.sel());
+
+ int export_slot = 0;
+
+ switch (store_info.location) {
+ case VARYING_SLOT_EDGE: {
+ m_proc.sh_info().vs_out_misc_write = 1;
+ m_proc.sh_info().vs_out_edgeflag = 1;
+ m_proc.emit_instruction(op1_mov, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr});
+ m_proc.emit_instruction(op1_flt_to_int, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_last_instr});
+ m_proc.sh_info().output[store_info.driver_location].write_mask = 0xf;
+ }
+ FALLTHROUGH;
+ case VARYING_SLOT_PSIZ:
+ case VARYING_SLOT_LAYER:
+ export_slot = 1;
+ break;
+ case VARYING_SLOT_VIEWPORT:
+ m_proc.sh_info().vs_out_misc_write = 1;
+ m_proc.sh_info().vs_out_viewport = 1;
+ export_slot = 1;
+ break;
+ case VARYING_SLOT_POS:
+ break;
+ case VARYING_SLOT_CLIP_DIST0:
+ case VARYING_SLOT_CLIP_DIST1:
+ export_slot = m_cur_clip_pos++;
+ break;
+ default:
+ sfn_log << SfnLog::err << __func__ << "Unsupported location "
+ << store_info.location << "\n";
+ return false;
+ }
+
+ m_last_pos_export = new ExportInstruction(export_slot, value, ExportInstruction::et_pos);
+ m_proc.emit_export_instruction(m_last_pos_export);
+ m_proc.add_param_output_reg(store_info.driver_location, m_last_pos_export->gpr_ptr());
+ return true;
+}
+
+bool VertexStageExportForFS::emit_varying_param(const store_loc &store_info, nir_intrinsic_instr* instr)
+{
+ assert(store_info.driver_location < m_proc.sh_info().noutput);
+ sfn_log << SfnLog::io << __func__ << ": emit DDL: " << store_info.driver_location << "\n";
+
+ int write_mask = nir_intrinsic_write_mask(instr) << store_info.frac;
+ std::array<uint32_t,4> swizzle;
+ for (int i = 0; i < 4; ++i)
+ swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7;
+
+ //m_proc.sh_info().output[store_info.driver_location].write_mask = write_mask;
+
+ GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], write_mask, swizzle, true);
+ m_proc.sh_info().output[store_info.driver_location].gpr = value.sel();
+
+ /* This should use the registers!! */
+ m_proc.set_output(store_info.driver_location, value.sel());
+
+ m_last_param_export = new ExportInstruction(param_id(store_info.driver_location),
+ value, ExportInstruction::et_param);
+ m_proc.emit_export_instruction(m_last_param_export);
+ m_proc.add_param_output_reg(store_info.driver_location, m_last_param_export->gpr_ptr());
+ return true;
+}
+
+bool VertexStageExportForFS::emit_clip_vertices(const store_loc &store_info, nir_intrinsic_instr* instr)
+{
+ m_proc.sh_info().cc_dist_mask = 0xff;
+ m_proc.sh_info().clip_dist_write = 0xff;
+
+ m_clip_vertex = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], 0xf, {0,1,2,3});
+ m_proc.add_param_output_reg(store_info.driver_location, &m_clip_vertex);
+
+ for (int i = 0; i < 4; ++i)
+ m_proc.sh_info().output[store_info.driver_location].write_mask |= 1 << i;
+
+ GPRVector clip_dist[2] = { m_proc.get_temp_vec4(), m_proc.get_temp_vec4()};
+
+ for (int i = 0; i < 8; i++) {
+ int oreg = i >> 2;
+ int ochan = i & 3;
+ AluInstruction *ir = nullptr;
+ for (int j = 0; j < 4; j++) {
+ ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), m_clip_vertex.reg_i(j),
+ PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)),
+ (j == ochan) ? EmitInstruction::write : EmitInstruction::empty);
+ m_proc.emit_instruction(ir);
+ }
+ ir->set_flag(alu_last_instr);
+ }
+
+ m_last_pos_export = new ExportInstruction(m_cur_clip_pos++, clip_dist[0], ExportInstruction::et_pos);
+ m_proc.emit_export_instruction(m_last_pos_export);
+
+ m_last_pos_export = new ExportInstruction(m_cur_clip_pos, clip_dist[1], ExportInstruction::et_pos);
+ m_proc.emit_export_instruction(m_last_pos_export);
+
+ return true;
+}
+
+VertexStageWithOutputInfo::VertexStageWithOutputInfo(VertexStage& proc):
+ VertexStageExportBase(proc),
+ m_current_param(0)
+{
+
+}
+
+void VertexStageWithOutputInfo::scan_store_output(nir_intrinsic_instr* instr)
+{
+ auto location = nir_intrinsic_io_semantics(instr).location;
+ auto driver_location = nir_intrinsic_base(instr);
+ auto index = nir_src_as_const_value(instr->src[1]);
+ assert(index);
+
+ unsigned noutputs = driver_location + index->u32 + 1;
+ if (m_proc.sh_info().noutput < noutputs)
+ m_proc.sh_info().noutput = noutputs;
+
+ r600_shader_io& io = m_proc.sh_info().output[driver_location + index->u32];
+ auto semantic = r600_get_varying_semantic(location + index->u32);
+ io.name = semantic.first;
+ io.sid = semantic.second;
+ m_proc.evaluate_spi_sid(io);
+ io.write_mask = nir_intrinsic_write_mask(instr);
+
+ if (location == VARYING_SLOT_PSIZ ||
+ location == VARYING_SLOT_EDGE ||
+ location == VARYING_SLOT_LAYER) // VIEWPORT?
+ m_cur_clip_pos = 2;
+
+ if (location != VARYING_SLOT_POS &&
+ location != VARYING_SLOT_EDGE &&
+ location != VARYING_SLOT_PSIZ &&
+ location != VARYING_SLOT_CLIP_VERTEX) {
+ m_param_driver_locations.push(driver_location + index->u32);
+ }
+}
+
+unsigned VertexStageWithOutputInfo::param_id(unsigned driver_location)
+{
+ auto param_loc = m_param_map.find(driver_location);
+ assert(param_loc != m_param_map.end());
+ return param_loc->second;
+}
+
+void VertexStageWithOutputInfo::emit_shader_start()
+{
+ while (!m_param_driver_locations.empty()) {
+ auto loc = m_param_driver_locations.top();
+ m_param_driver_locations.pop();
+ m_param_map[loc] = m_current_param++;
+ }
+}
+
+unsigned VertexStageWithOutputInfo::current_param() const
+{
+ return m_current_param;
+}
+
+void VertexStageExportForFS::finalize_exports()
+{
+ if (m_key.vs.as_gs_a) {
+ PValue o(new GPRValue(0,PIPE_SWIZZLE_0));
+ GPRVector primid({m_proc.primitive_id(), o,o,o});
+ m_last_param_export = new ExportInstruction(current_param(), primid, ExportInstruction::et_param);
+ m_proc.emit_export_instruction(m_last_param_export);
+ int i;
+ i = m_proc.sh_info().noutput++;
+ auto& io = m_proc.sh_info().output[i];
+ io.name = TGSI_SEMANTIC_PRIMID;
+ io.sid = 0;
+ io.gpr = 0;
+ io.interpolate = TGSI_INTERPOLATE_CONSTANT;
+ io.write_mask = 0x1;
+ io.spi_sid = m_key.vs.prim_id_out;
+ m_proc.sh_info().vs_as_gs_a = 1;
+ }
+
+ if (m_so_info && m_so_info->num_outputs)
+ emit_stream(-1);
+
+ m_pipe_shader->enabled_stream_buffers_mask = m_enabled_stream_buffers_mask;
+
+ if (!m_last_param_export) {
+ GPRVector value(0,{7,7,7,7});
+ m_last_param_export = new ExportInstruction(0, value, ExportInstruction::et_param);
+ m_proc.emit_export_instruction(m_last_param_export);
+ }
+ m_last_param_export->set_last();
+
+ if (!m_last_pos_export) {
+ GPRVector value(0,{7,7,7,7});
+ m_last_pos_export = new ExportInstruction(0, value, ExportInstruction::et_pos);
+ m_proc.emit_export_instruction(m_last_pos_export);
+ }
+ m_last_pos_export->set_last();
+}
+
+bool VertexStageExportForFS::emit_stream(int stream)
+{
+ assert(m_so_info);
+ if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) {
+ R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs);
+ return false;
+ }
+ for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
+ if (m_so_info->output[i].output_buffer >= 4) {
+ R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
+ m_so_info->output[i].output_buffer);
+ return false;
+ }
+ }
+ const GPRVector *so_gpr[PIPE_MAX_SHADER_OUTPUTS];
+ unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
+ std::vector<GPRVector> tmp(m_so_info->num_outputs);
+
+ /* Initialize locations where the outputs are stored. */
+ for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
+ if (stream != -1 && stream != m_so_info->output[i].stream)
+ continue;
+
+ sfn_log << SfnLog::instr << "Emit stream " << i
+ << " with register index " << m_so_info->output[i].register_index << " so_gpr:";
+
+
+ so_gpr[i] = m_proc.output_register(m_so_info->output[i].register_index);
+
+ if (!so_gpr[i]) {
+ sfn_log << SfnLog::err << "\nERR: register index "
+ << m_so_info->output[i].register_index
+ << " doesn't correspond to an output register\n";
+ return false;
+ }
+ start_comp[i] = m_so_info->output[i].start_component;
+ /* Lower outputs with dst_offset < start_component.
+ *
+ * We can only output 4D vectors with a write mask, e.g. we can
+ * only output the W component at offset 3, etc. If we want
+ * to store Y, Z, or W at buffer offset 0, we need to use MOV
+ * to move it to X and output X. */
+ if (m_so_info->output[i].dst_offset < m_so_info->output[i].start_component) {
+
+ GPRVector::Swizzle swizzle = {0,1,2,3};
+ for (auto j = m_so_info->output[i].num_components; j < 4; ++j)
+ swizzle[j] = 7;
+ tmp[i] = m_proc.get_temp_vec4(swizzle);
+
+ int sc = m_so_info->output[i].start_component;
+ AluInstruction *alu = nullptr;
+ for (int j = 0; j < m_so_info->output[i].num_components; j++) {
+ alu = new AluInstruction(op1_mov, tmp[i][j], so_gpr[i]->reg_i(j + sc), {alu_write});
+ m_proc.emit_instruction(alu);
+ }
+ if (alu)
+ alu->set_flag(alu_last_instr);
+
+ start_comp[i] = 0;
+ so_gpr[i] = &tmp[i];
+ }
+ sfn_log << SfnLog::instr << *so_gpr[i] << "\n";
+ }
+
+ /* Write outputs to buffers. */
+ for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
+ sfn_log << SfnLog::instr << "Write output buffer " << i
+ << " with register index " << m_so_info->output[i].register_index << "\n";
+
+ StreamOutIntruction *out_stream =
+ new StreamOutIntruction(*so_gpr[i],
+ m_so_info->output[i].num_components,
+ m_so_info->output[i].dst_offset - start_comp[i],
+ ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i],
+ m_so_info->output[i].output_buffer,
+ m_so_info->output[i].stream);
+ m_proc.emit_export_instruction(out_stream);
+ m_enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4;
+ }
+ return true;
+}
+
+
+VertexStageExportForGS::VertexStageExportForGS(VertexStage &proc,
+ const r600_shader *gs_shader):
+ VertexStageWithOutputInfo(proc),
+ m_num_clip_dist(0),
+ m_gs_shader(gs_shader)
+{
+
+}
+
+bool VertexStageExportForGS::do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr)
+{
+ int ring_offset = -1;
+ const r600_shader_io& out_io = m_proc.sh_info().output[store_info.driver_location];
+
+ sfn_log << SfnLog::io << "check output " << store_info.driver_location
+ << " name=" << out_io.name<< " sid=" << out_io.sid << "\n";
+ for (unsigned k = 0; k < m_gs_shader->ninput; ++k) {
+ auto& in_io = m_gs_shader->input[k];
+ sfn_log << SfnLog::io << " against " << k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n";
+
+ if (in_io.name == out_io.name &&
+ in_io.sid == out_io.sid) {
+ ring_offset = in_io.ring_offset;
+ break;
+ }
+ }
+
+ if (store_info.location == VARYING_SLOT_VIEWPORT) {
+ m_proc.sh_info().vs_out_viewport = 1;
+ m_proc.sh_info().vs_out_misc_write = 1;
+ return true;
+ }
+
+ if (ring_offset == -1) {
+ sfn_log << SfnLog::err << "VS defines output at "
+ << store_info.driver_location << "name=" << out_io.name
+ << " sid=" << out_io.sid << " that is not consumed as GS input\n";
+ return true;
+ }
+
+ uint32_t write_mask = (1 << instr->num_components) - 1;
+
+ GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], write_mask,
+ swizzle_from_comps(instr->num_components), true);
+
+ auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, value,
+ ring_offset >> 2, 4, PValue());
+ m_proc.emit_export_instruction(ir);
+
+ m_proc.sh_info().output[store_info.driver_location].write_mask |= write_mask;
+ if (store_info.location == VARYING_SLOT_CLIP_DIST0 ||
+ store_info.location == VARYING_SLOT_CLIP_DIST1)
+ m_num_clip_dist += 4;
+
+ return true;
+}
+
+void VertexStageExportForGS::finalize_exports()
+{
+
+}
+
+VertexStageExportForES::VertexStageExportForES(VertexStage& proc):
+ VertexStageExportBase(proc)
+{
+}
+
+bool VertexStageExportForES::do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr)
+{
+ return true;
+}
+
+void VertexStageExportForES::finalize_exports()
+{
+
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h
new file mode 100644
index 000000000..46aee8071
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h
@@ -0,0 +1,116 @@
+#ifndef VERTEXSTAGEEXPORT_H
+#define VERTEXSTAGEEXPORT_H
+
+#include "sfn_shader_base.h"
+#include <queue>
+
+namespace r600 {
+
+class VertexStage : public ShaderFromNirProcessor {
+public:
+ using ShaderFromNirProcessor::ShaderFromNirProcessor;
+
+ virtual PValue primitive_id() = 0;
+};
+
+class VertexStageExportBase
+{
+public:
+ VertexStageExportBase(VertexStage& proc);
+ virtual ~VertexStageExportBase();
+ virtual void finalize_exports() = 0;
+ virtual bool do_process_outputs(nir_variable *output);
+
+ virtual void emit_shader_start();
+
+ virtual void scan_store_output(nir_intrinsic_instr* instr);
+ bool store_output(nir_intrinsic_instr* instr);
+protected:
+
+ struct store_loc {
+ unsigned frac;
+ unsigned location;
+ unsigned driver_location;
+ int data_loc;
+ };
+ virtual bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) = 0;
+
+ VertexStage& m_proc;
+ int m_cur_clip_pos;
+ GPRVector m_clip_vertex;
+};
+
+
+class VertexStageWithOutputInfo: public VertexStageExportBase
+{
+protected:
+ VertexStageWithOutputInfo(VertexStage& proc);
+ void scan_store_output(nir_intrinsic_instr* instr) override;
+ void emit_shader_start() override;
+ bool do_process_outputs(nir_variable *output) override;
+protected:
+ unsigned param_id(unsigned driver_location);
+ unsigned current_param() const;
+private:
+ std::priority_queue<unsigned, std::vector<unsigned>, std::greater<unsigned> > m_param_driver_locations;
+ std::map<unsigned, unsigned> m_param_map;
+ unsigned m_current_param;
+};
+
+
+class VertexStageExportForFS : public VertexStageWithOutputInfo
+{
+public:
+ VertexStageExportForFS(VertexStage& proc,
+ const pipe_stream_output_info *so_info,
+ r600_pipe_shader *pipe_shader,
+ const r600_shader_key& key);
+
+ void finalize_exports() override;
+private:
+ bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) override;
+
+ bool emit_varying_param(const store_loc& store_info, nir_intrinsic_instr* instr);
+ bool emit_varying_pos(const store_loc& store_info, nir_intrinsic_instr* instr,
+ std::array<uint32_t, 4> *swizzle_override = nullptr);
+ bool emit_clip_vertices(const store_loc &store_info, nir_intrinsic_instr* instr);
+ bool emit_stream(int stream);
+
+ ExportInstruction *m_last_param_export;
+ ExportInstruction *m_last_pos_export;
+
+ int m_num_clip_dist;
+ int m_enabled_stream_buffers_mask;
+ const pipe_stream_output_info *m_so_info;
+ r600_pipe_shader *m_pipe_shader;
+ const r600_shader_key& m_key;
+
+
+};
+
+class VertexStageExportForGS : public VertexStageWithOutputInfo
+{
+public:
+ VertexStageExportForGS(VertexStage& proc,
+ const r600_shader *gs_shader);
+ void finalize_exports() override;
+
+private:
+ bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) override;
+ unsigned m_num_clip_dist;
+ const r600_shader *m_gs_shader;
+};
+
+class VertexStageExportForES : public VertexStageExportBase
+{
+public:
+ VertexStageExportForES(VertexStage& proc);
+ void finalize_exports() override;
+private:
+ bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) override;
+};
+
+
+}
+
+#endif // VERTEXSTAGEEXPORT_H