Merge Mesa 21.1.5

author: Jonathan Gray <jsg@cvs.openbsd.org> 2021-07-22 10:50:50 +0000
committer: Jonathan Gray <jsg@cvs.openbsd.org> 2021-07-22 10:50:50 +0000
commit: 9130ec005fbc78a62420643414d8354d0929ca50 (patch)
tree: 6762777acdd2d4eee17ef87290e80dc7afe2b73d /lib/mesa/src/gallium/drivers/r600
parent: ca11beabae33eb59fb981b8adf50b1d47a2a98f0 (diff)
96 files changed, 21571 insertions, 382 deletions
diff --git a/lib/mesa/src/gallium/drivers/r600/Makefile.sources b/lib/mesa/src/gallium/drivers/r600/Makefile.sources
index 763a49a07..52563b257 100644
--- a/lib/mesa/src/gallium/drivers/r600/Makefile.sources
+++ b/lib/mesa/src/gallium/drivers/r600/Makefile.sources
@@ -87,7 +87,81 @@ CXX_SOURCES = \
 	sb/sb_shader.cpp \
 	sb/sb_shader.h \
 	sb/sb_ssa_builder.cpp \
-	sb/sb_valtable.cpp
+	sb/sb_valtable.cpp \
+	sfn/sfn_alu_defines.cpp \
+	sfn/sfn_alu_defines.h \
+	sfn/sfn_callstack.cpp \
+	sfn/sfn_callstack.h \
+	sfn/sfn_conditionaljumptracker.cpp \
+	sfn/sfn_conditionaljumptracker.h \
+	sfn/sfn_defines.h \
+	sfn/sfn_debug.cpp \
+	sfn/sfn_debug.h \
+	sfn/sfn_emitaluinstruction.cpp \
+	sfn/sfn_emitaluinstruction.h \
+	sfn/sfn_emitinstruction.cpp \
+	sfn/sfn_emitinstruction.h \
+	sfn/sfn_emitssboinstruction.cpp \
+	sfn/sfn_emitssboinstruction.h \
+	sfn/sfn_emittexinstruction.cpp \
+	sfn/sfn_emittexinstruction.h \
+	sfn/sfn_emitinstruction.h \
+	sfn/sfn_instruction_alu.cpp \
+	sfn/sfn_instruction_alu.h \
+	sfn/sfn_instruction_base.cpp \
+	sfn/sfn_instruction_base.h \
+	sfn/sfn_instruction_block.cpp \
+	sfn/sfn_instruction_block.h \
+	sfn/sfn_instruction_cf.cpp \
+	sfn/sfn_instruction_cf.h \
+	sfn/sfn_instruction_export.cpp \
+	sfn/sfn_instruction_export.h \
+	sfn/sfn_instruction_fetch.cpp \
+	sfn/sfn_instruction_fetch.h \
+	sfn/sfn_instruction_lds.cpp \
+	sfn/sfn_instruction_lds.h \
+	sfn/sfn_instruction_gds.cpp \
+	sfn/sfn_instruction_gds.h \
+	sfn/sfn_instruction_misc.cpp \
+	sfn/sfn_instruction_misc.h \
+	sfn/sfn_instruction_tex.cpp \
+	sfn/sfn_instruction_tex.h \
+	sfn/sfn_ir_to_assembly.cpp \
+	sfn/sfn_ir_to_assembly.h \
+	sfn/sfn_liverange.cpp \
+	sfn/sfn_liverange.h \
+	sfn/sfn_nir.cpp \
+	sfn/sfn_nir.h \
+	sfn/sfn_nir_lower_64bit.cpp \
+	sfn/sfn_nir_lower_fs_out_to_vector.cpp \
+	sfn/sfn_nir_lower_fs_out_to_vector.h \
+	sfn/sfn_nir_lower_tess_io.cpp \
+	sfn/sfn_nir_vectorize_vs_inputs.c \
+	sfn/sfn_shader_base.cpp \
+	sfn/sfn_shader_base.h \
+	sfn/sfn_shader_compute.cpp \
+	sfn/sfn_shader_compute.h \
+	sfn/sfn_shader_fragment.cpp \
+	sfn/sfn_shader_fragment.h \
+	sfn/sfn_shader_geometry.cpp \
+	sfn/sfn_shader_geometry.h \
+	sfn/sfn_shader_tcs.cpp \
+        sfn/sfn_shader_tcs.h \
+        sfn/sfn_shader_tess_eval.cpp \
+        sfn/sfn_shader_tess_eval.h \
+	sfn/sfn_shader_vertex.cpp \
+	sfn/sfn_shader_vertex.h \
+	sfn/sfn_shaderio.cpp \
+	sfn/sfn_shaderio.h \
+	sfn/sfn_value.cpp \
+	sfn/sfn_value.h \
+	sfn/sfn_value_gpr.cpp \
+	sfn/sfn_value_gpr.h \
+	sfn/sfn_valuepool.cpp \
+	sfn/sfn_valuepool.h \
+	sfn/sfn_vertexstageexport.cpp \
+        sfn/sfn_vertexstageexport.h
 
 R600_GENERATED_FILES = \
-	egd_tables.h
-\ No newline at end of file
+	egd_tables.h \
+	sfn_nir_algebraic.c
diff --git a/lib/mesa/src/gallium/drivers/r600/eg_asm.c b/lib/mesa/src/gallium/drivers/r600/eg_asm.c
index acf3fd374..9468e4b01 100644
--- a/lib/mesa/src/gallium/drivers/r600/eg_asm.c
+++ b/lib/mesa/src/gallium/drivers/r600/eg_asm.c
@@ -189,7 +189,7 @@ int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_c
 	memset(&alu, 0, sizeof(alu));
 	alu.op = ALU_OP1_MOVA_INT;
 	alu.src[0].sel = bc->index_reg[id];
-	alu.src[0].chan = 0;
+	alu.src[0].chan = bc->index_reg_chan[id];
 	if (bc->chip_class == CAYMAN)
 		alu.dst.sel = id == 0 ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1;
 
diff --git a/lib/mesa/src/gallium/drivers/r600/eg_debug.c b/lib/mesa/src/gallium/drivers/r600/eg_debug.c
index 56195df29..853b61044 100644
--- a/lib/mesa/src/gallium/drivers/r600/eg_debug.c
+++ b/lib/mesa/src/gallium/drivers/r600/eg_debug.c
@@ -256,7 +256,7 @@ static uint32_t *ac_parse_packet3(FILE *f, uint32_t *ib, int *num_dw,
 					COLOR_RESET "\n");
 			break;
 		}
-		/* fall through, print all dwords */
+		FALLTHROUGH; /* print all dwords */
 	default:
 		for (i = 0; i < count+1; i++) {
 			print_spaces(f, INDENT_PKT);
@@ -305,7 +305,7 @@ static void eg_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id,
 				num_dw--;
 				break;
 			}
-			/* fall through */
+			FALLTHROUGH;
 		default:
 			fprintf(f, "Unknown packet type %i\n", type);
 			return;
@@ -332,10 +332,10 @@ static void eg_dump_last_ib(struct r600_context *rctx, FILE *f)
 		 * waited for the context, so this buffer should be idle.
 		 * If the GPU is hung, there is no point in waiting for it.
 		 */
-		uint32_t *map = rctx->b.ws->buffer_map(rctx->last_trace_buf->buf,
+		uint32_t *map = rctx->b.ws->buffer_map(rctx->b.ws, rctx->last_trace_buf->buf,
 						       NULL,
-						       PIPE_TRANSFER_UNSYNCHRONIZED |
-						       PIPE_TRANSFER_READ);
+						       PIPE_MAP_UNSYNCHRONIZED |
+						       PIPE_MAP_READ);
 		if (map)
 			last_trace_id = *map;
 	}
diff --git a/lib/mesa/src/gallium/drivers/r600/evergreen_compute.c b/lib/mesa/src/gallium/drivers/r600/evergreen_compute.c
index 419738eec..0602a54dc 100644
--- a/lib/mesa/src/gallium/drivers/r600/evergreen_compute.c
+++ b/lib/mesa/src/gallium/drivers/r600/evergreen_compute.c
@@ -193,7 +193,7 @@ static void evergreen_cs_set_constant_buffer(struct r600_context *rctx,
 	cb.buffer = buffer;
 	cb.user_buffer = NULL;
 
-	rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_COMPUTE, cb_index, &cb);
+	rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_COMPUTE, cb_index, false, &cb);
 }
 
 /* We need to define these R600 registers here, because we can't include
@@ -441,8 +441,9 @@ static void *evergreen_create_compute_state(struct pipe_context *ctx,
 
 	shader->ir_type = cso->ir_type;
 
-	if (shader->ir_type == PIPE_SHADER_IR_TGSI) {
-		shader->sel = r600_create_shader_state_tokens(ctx, cso->prog, PIPE_SHADER_COMPUTE);
+	if (shader->ir_type == PIPE_SHADER_IR_TGSI ||
+	    shader->ir_type == PIPE_SHADER_IR_NIR) {
+		shader->sel = r600_create_shader_state_tokens(ctx, cso->prog, cso->ir_type, PIPE_SHADER_COMPUTE);
 		return shader;
 	}
 #ifdef HAVE_OPENCL
@@ -457,10 +458,10 @@ static void *evergreen_create_compute_state(struct pipe_context *ctx,
 							shader->bc.ndw * 4);
 	p = r600_buffer_map_sync_with_rings(
 		&rctx->b, shader->code_bo,
-		PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
+		PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
 	//TODO: use util_memcpy_cpu_to_le32 ?
 	memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4);
-	rctx->b.ws->buffer_unmap(shader->code_bo->buf);
+	rctx->b.ws->buffer_unmap(rctx->b.ws, shader->code_bo->buf);
 #endif
 
 	return shader;
@@ -476,7 +477,8 @@ static void evergreen_delete_compute_state(struct pipe_context *ctx, void *state
 	if (!shader)
 		return;
 
-	if (shader->ir_type == PIPE_SHADER_IR_TGSI) {
+	if (shader->ir_type == PIPE_SHADER_IR_TGSI ||
+	    shader->ir_type == PIPE_SHADER_IR_NIR) {
 		r600_delete_shader_selector(ctx, shader->sel);
 	} else {
 #ifdef HAVE_OPENCL
@@ -500,12 +502,14 @@ static void evergreen_bind_compute_state(struct pipe_context *ctx, void *state)
 		return;
 	}
 
-	if (cstate->ir_type == PIPE_SHADER_IR_TGSI) {
+	if (cstate->ir_type == PIPE_SHADER_IR_TGSI ||
+	    cstate->ir_type == PIPE_SHADER_IR_NIR) {
 		bool compute_dirty;
-
-		r600_shader_select(ctx, cstate->sel, &compute_dirty);
+		cstate->sel->ir_type = cstate->ir_type;
+		if (r600_shader_select(ctx, cstate->sel, &compute_dirty))
+			R600_ERR("Failed to select compute shader\n");
 	}
-
+	
 	rctx->cs_shader_state.shader = (struct r600_pipe_compute *)state;
 }
 
@@ -553,7 +557,7 @@ static void evergreen_compute_upload_input(struct pipe_context *ctx,
 	u_box_1d(0, input_size, &box);
 	num_work_groups_start = ctx->transfer_map(ctx,
 			(struct pipe_resource*)shader->kernel_param,
-			0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE,
+			0, PIPE_MAP_WRITE | PIPE_MAP_DISCARD_RANGE,
 			&box, &transfer);
 	global_size_start = num_work_groups_start + (3 * (sizeof(uint) /4));
 	local_size_start = global_size_start + (3 * (sizeof(uint)) / 4);
@@ -594,7 +598,7 @@ static void evergreen_emit_dispatch(struct r600_context *rctx,
 				    uint32_t indirect_grid[3])
 {
 	int i;
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_pipe_compute *shader = rctx->cs_shader_state.shader;
 	bool render_cond_bit = rctx->b.render_cond && !rctx->b.render_cond_force_off;
 	unsigned num_waves;
@@ -604,9 +608,10 @@ static void evergreen_emit_dispatch(struct r600_context *rctx,
 	int grid_size = 1;
 	unsigned lds_size = shader->local_size / 4;
 
-	if (shader->ir_type != PIPE_SHADER_IR_TGSI)
+	if (shader->ir_type != PIPE_SHADER_IR_TGSI &&
+	    shader->ir_type != PIPE_SHADER_IR_NIR)
 		lds_size += shader->bc.nlds_dw;
-
+	
 	/* Calculate group_size/grid_size */
 	for (i = 0; i < 3; i++) {
 		group_size *= info->block[i];
@@ -673,7 +678,7 @@ static void evergreen_emit_dispatch(struct r600_context *rctx,
 
 static void compute_setup_cbs(struct r600_context *rctx)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	unsigned i;
 
 	/* Emit colorbuffers. */
@@ -715,7 +720,7 @@ static void compute_setup_cbs(struct r600_context *rctx)
 static void compute_emit_cs(struct r600_context *rctx,
 			    const struct pipe_grid_info *info)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	bool compute_dirty = false;
 	struct r600_pipe_shader *current;
 	struct r600_shader_atomic combined_atomics[8];
@@ -723,7 +728,7 @@ static void compute_emit_cs(struct r600_context *rctx,
 	uint32_t indirect_grid[3] = { 0, 0, 0 };
 
 	/* make sure that the gfx ring is only one active */
-	if (radeon_emitted(rctx->b.dma.cs, 0)) {
+	if (radeon_emitted(&rctx->b.dma.cs, 0)) {
 		rctx->b.dma.flush(rctx, PIPE_FLUSH_ASYNC, NULL);
 	}
 
@@ -734,8 +739,13 @@ static void compute_emit_cs(struct r600_context *rctx,
 		rctx->cmd_buf_is_compute = true;
 	}
 
-	if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI) {
-		r600_shader_select(&rctx->b.b, rctx->cs_shader_state.shader->sel, &compute_dirty);
+	if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI||
+	    rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_NIR) {
+		if (r600_shader_select(&rctx->b.b, rctx->cs_shader_state.shader->sel, &compute_dirty)) {
+			R600_ERR("Failed to select compute shader\n");
+			return;
+		}
+		
 		current = rctx->cs_shader_state.shader->sel->current;
 		if (compute_dirty) {
 			rctx->cs_shader_state.atom.num_dw = current->command_buffer.num_dw;
@@ -748,7 +758,7 @@ static void compute_emit_cs(struct r600_context *rctx,
 
 		if (info->indirect) {
 			struct r600_resource *indirect_resource = (struct r600_resource *)info->indirect;
-			unsigned *data = r600_buffer_map_sync_with_rings(&rctx->b, indirect_resource, PIPE_TRANSFER_READ);
+			unsigned *data = r600_buffer_map_sync_with_rings(&rctx->b, indirect_resource, PIPE_MAP_READ);
 			unsigned offset = info->indirect_offset / 4;
 			indirect_grid[0] = data[offset];
 			indirect_grid[1] = data[offset + 1];
@@ -786,7 +796,8 @@ static void compute_emit_cs(struct r600_context *rctx,
 
 	/* emit config state */
 	if (rctx->b.chip_class == EVERGREEN) {
-		if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI) {
+		if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI||
+		    rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_NIR) {
 			radeon_set_config_reg_seq(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, 3);
 			radeon_emit(cs, S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx->r6xx_num_clause_temp_gprs));
 			radeon_emit(cs, 0);
@@ -799,7 +810,8 @@ static void compute_emit_cs(struct r600_context *rctx,
 	rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
 	r600_flush_emit(rctx);
 
-	if (rctx->cs_shader_state.shader->ir_type != PIPE_SHADER_IR_TGSI) {
+	if (rctx->cs_shader_state.shader->ir_type != PIPE_SHADER_IR_TGSI &&
+	    rctx->cs_shader_state.shader->ir_type != PIPE_SHADER_IR_NIR) {
 
 		compute_setup_cbs(rctx);
 
@@ -855,7 +867,8 @@ static void compute_emit_cs(struct r600_context *rctx,
 		radeon_emit(cs, PKT3C(PKT3_DEALLOC_STATE, 0, 0));
 		radeon_emit(cs, 0);
 	}
-	if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI)
+	if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI ||
+	    rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_NIR)
 		evergreen_emit_atomic_buffer_save(rctx, true, combined_atomics, &atomic_used_mask);
 
 #if 0
@@ -877,12 +890,13 @@ void evergreen_emit_cs_shader(struct r600_context *rctx,
 	struct r600_cs_shader_state *state =
 					(struct r600_cs_shader_state*)atom;
 	struct r600_pipe_compute *shader = state->shader;
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	uint64_t va;
 	struct r600_resource *code_bo;
 	unsigned ngpr, nstack;
 
-	if (shader->ir_type == PIPE_SHADER_IR_TGSI) {
+	if (shader->ir_type == PIPE_SHADER_IR_TGSI ||
+	    shader->ir_type == PIPE_SHADER_IR_NIR) {
 		code_bo = shader->sel->current->bo;
 		va = shader->sel->current->bo->gpu_address;
 		ngpr = shader->sel->current->shader.bc.ngpr;
@@ -916,7 +930,8 @@ static void evergreen_launch_grid(struct pipe_context *ctx,
 	struct r600_pipe_compute *shader = rctx->cs_shader_state.shader;
 	boolean use_kill;
 
-	if (shader->ir_type != PIPE_SHADER_IR_TGSI) {
+	if (shader->ir_type != PIPE_SHADER_IR_TGSI &&
+	    shader->ir_type != PIPE_SHADER_IR_NIR) {
 		rctx->cs_shader_state.pc = info->pc;
 		/* Get the config information for this kernel. */
 		r600_shader_binary_read_config(&shader->binary, &shader->bc,
@@ -1243,7 +1258,7 @@ static void *r600_compute_global_transfer_map(struct pipe_context *ctx,
 
 	dst = (struct pipe_resource*)item->real_buffer;
 
-	if (usage & PIPE_TRANSFER_READ)
+	if (usage & PIPE_MAP_READ)
 		buffer->chunk->status |= ITEM_MAPPED_FOR_READING;
 
 	COMPUTE_DBG(rctx->screen, "* r600_compute_global_transfer_map()\n"
@@ -1273,7 +1288,7 @@ static void r600_compute_global_transfer_unmap(struct pipe_context *ctx,
 	 * to an offset within the compute memory pool.  The function
 	 * r600_compute_global_transfer_map() maps the memory pool
 	 * resource rather than the struct r600_resource_global passed to
-	 * it as an argument and then initalizes ptransfer->resource with
+	 * it as an argument and then initializes ptransfer->resource with
 	 * the memory pool resource (via pipe_buffer_map_range).
 	 * When transfer_unmap is called it uses the memory pool's
 	 * vtable which calls r600_buffer_transfer_map() rather than
diff --git a/lib/mesa/src/gallium/drivers/r600/evergreen_hw_context.c b/lib/mesa/src/gallium/drivers/r600/evergreen_hw_context.c
index da8553886..54bd19fbc 100644
--- a/lib/mesa/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/lib/mesa/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -35,7 +35,7 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx,
 			       uint64_t src_offset,
 			       uint64_t size)
 {
-	struct radeon_cmdbuf *cs = rctx->b.dma.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.dma.cs;
 	unsigned i, ncopy, csize, sub_cmd, shift;
 	struct r600_resource *rdst = (struct r600_resource*)dst;
 	struct r600_resource *rsrc = (struct r600_resource*)src;
@@ -85,7 +85,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
 				   unsigned size, uint32_t clear_value,
 				   enum r600_coherency coher)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 
 	assert(size);
 	assert(rctx->screen->b.has_cp_dma);
diff --git a/lib/mesa/src/gallium/drivers/r600/evergreen_state.c b/lib/mesa/src/gallium/drivers/r600/evergreen_state.c
index 9c103c590..f76b1e331 100644
--- a/lib/mesa/src/gallium/drivers/r600/evergreen_state.c
+++ b/lib/mesa/src/gallium/drivers/r600/evergreen_state.c
@@ -427,11 +427,11 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
 	dsa->valuemask[1] = state->stencil[1].valuemask;
 	dsa->writemask[0] = state->stencil[0].writemask;
 	dsa->writemask[1] = state->stencil[1].writemask;
-	dsa->zwritemask = state->depth.writemask;
+	dsa->zwritemask = state->depth_writemask;
 
-	db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
-		S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
-		S_028800_ZFUNC(state->depth.func);
+	db_depth_control = S_028800_Z_ENABLE(state->depth_enabled) |
+		S_028800_Z_WRITE_ENABLE(state->depth_writemask) |
+		S_028800_ZFUNC(state->depth_func);
 
 	/* stencil */
 	if (state->stencil[0].enabled) {
@@ -453,10 +453,10 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
 	/* alpha */
 	alpha_test_control = 0;
 	alpha_ref = 0;
-	if (state->alpha.enabled) {
-		alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func);
+	if (state->alpha_enabled) {
+		alpha_test_control = S_028410_ALPHA_FUNC(state->alpha_func);
 		alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1);
-		alpha_ref = fui(state->alpha.ref_value);
+		alpha_ref = fui(state->alpha_ref_value);
 	}
 	dsa->sx_alpha_test_control = alpha_test_control & 0xff;
 	dsa->alpha_ref = alpha_ref;
@@ -514,15 +514,13 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
 	}
 
 	spi_interp = S_0286D4_FLAT_SHADE_ENA(1);
-	if (state->sprite_coord_enable) {
-		spi_interp |= S_0286D4_PNT_SPRITE_ENA(1) |
-			      S_0286D4_PNT_SPRITE_OVRD_X(2) |
-			      S_0286D4_PNT_SPRITE_OVRD_Y(3) |
-			      S_0286D4_PNT_SPRITE_OVRD_Z(0) |
-			      S_0286D4_PNT_SPRITE_OVRD_W(1);
-		if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
-			spi_interp |= S_0286D4_PNT_SPRITE_TOP_1(1);
-		}
+	spi_interp |= S_0286D4_PNT_SPRITE_ENA(1) |
+		S_0286D4_PNT_SPRITE_OVRD_X(2) |
+		S_0286D4_PNT_SPRITE_OVRD_Y(3) |
+		S_0286D4_PNT_SPRITE_OVRD_Z(0) |
+		S_0286D4_PNT_SPRITE_OVRD_W(1);
+	if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
+		spi_interp |= S_0286D4_PNT_SPRITE_TOP_1(1);
 	}
 
 	r600_store_context_reg_seq(&rs->buffer, R_028A00_PA_SU_POINT_SIZE, 3);
@@ -576,6 +574,8 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
 	unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso
 						       : state->max_anisotropy;
 	unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso);
+	bool trunc_coord = state->min_img_filter == PIPE_TEX_FILTER_NEAREST &&
+			   state->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
 	float max_lod = state->max_lod;
 
 	if (!ss) {
@@ -610,6 +610,7 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
 	ss->tex_sampler_words[2] =
 		S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
 		(state->seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) |
+		S_03C008_TRUNCATE_COORD(trunc_coord) |
 		S_03C008_TYPE(1);
 
 	if (ss->border_color_use) {
@@ -755,7 +756,7 @@ static int evergreen_fill_tex_resource_words(struct r600_context *rctx,
 		case PIPE_FORMAT_X32_S8X24_UINT:
 			params->pipe_format = PIPE_FORMAT_S8_UINT;
 			tile_split = tmp->surface.u.legacy.stencil_tile_split;
-			surflevel = tmp->surface.u.legacy.stencil_level;
+			surflevel = tmp->surface.u.legacy.zs.stencil_level;
 			break;
 		default:;
 		}
@@ -846,7 +847,7 @@ static int evergreen_fill_tex_resource_words(struct r600_context *rctx,
 	tex_resource_words[1] = (S_030004_TEX_HEIGHT(height - 1) |
 				       S_030004_TEX_DEPTH(depth - 1) |
 				       S_030004_ARRAY_MODE(array_mode));
-	tex_resource_words[2] = (surflevel[base_level].offset + va) >> 8;
+	tex_resource_words[2] = ((uint64_t)surflevel[base_level].offset_256B * 256 + va) >> 8;
 
 	*skip_mip_address_reloc = false;
 	/* TEX_RESOURCE_WORD3.MIP_ADDRESS */
@@ -860,9 +861,9 @@ static int evergreen_fill_tex_resource_words(struct r600_context *rctx,
 			tex_resource_words[3] = (tmp->fmask.offset + va) >> 8;
 		}
 	} else if (last_level && texture->nr_samples <= 1) {
-		tex_resource_words[3] = (surflevel[1].offset + va) >> 8;
+		tex_resource_words[3] = ((uint64_t)surflevel[1].offset_256B * 256 + va) >> 8;
 	} else {
-		tex_resource_words[3] = (surflevel[base_level].offset + va) >> 8;
+		tex_resource_words[3] = ((uint64_t)surflevel[base_level].offset_256B * 256 + va) >> 8;
 	}
 
 	last_layer = params->last_layer;
@@ -974,7 +975,7 @@ evergreen_create_sampler_view(struct pipe_context *ctx,
 
 static void evergreen_emit_config_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_config_state *a = (struct r600_config_state*)atom;
 
 	radeon_set_config_reg_seq(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, 3);
@@ -1001,7 +1002,7 @@ static void evergreen_emit_config_state(struct r600_context *rctx, struct r600_a
 
 static void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct pipe_clip_state *state = &rctx->clip_state.state;
 
 	radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP0_X, 6*4);
@@ -1123,7 +1124,7 @@ static void evergreen_set_color_surface_common(struct r600_context *rctx,
 	bool blend_clamp = 0, blend_bypass = 0, do_endian_swap = FALSE;
 	int i;
 
-	color->offset = rtex->surface.u.legacy.level[level].offset;
+	color->offset = (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256;
 	color->view = S_028C6C_SLICE_START(first_layer) |
 			S_028C6C_SLICE_MAX(last_layer);
 
@@ -1251,7 +1252,7 @@ static void evergreen_set_color_surface_common(struct r600_context *rctx,
 		color->info |= S_028C70_COMPRESSION(1);
 	}
 
-	/* EXPORT_NORM is an optimzation that can be enabled for better
+	/* EXPORT_NORM is an optimization that can be enabled for better
 	 * performance in certain cases.
 	 * EXPORT_NORM can be enabled if:
 	 * - 11-bit or smaller UNORM/SNORM/SRGB
@@ -1281,7 +1282,7 @@ static void evergreen_set_color_surface_common(struct r600_context *rctx,
 }
 
 /**
- * This function intializes the CB* register values for RATs.  It is meant
+ * This function initializes the CB* register values for RATs.  It is meant
  * to be used for 1D aligned buffers that do not have an associated
  * radeon_surf.
  */
@@ -1360,7 +1361,7 @@ static void evergreen_init_depth_surface(struct r600_context *rctx,
 	assert(format != ~0);
 
 	offset = rtex->resource.gpu_address;
-	offset += rtex->surface.u.legacy.level[level].offset;
+	offset += (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256;
 
 	switch (rtex->surface.u.legacy.level[level].mode) {
 	case RADEON_SURF_MODE_2D:
@@ -1410,7 +1411,7 @@ static void evergreen_init_depth_surface(struct r600_context *rctx,
 
 		stile_split = eg_tile_split(stile_split);
 
-		stencil_offset = rtex->surface.u.legacy.stencil_level[level].offset;
+		stencil_offset = (uint64_t)rtex->surface.u.legacy.zs.stencil_level[level].offset_256B * 256;
 		stencil_offset += rtex->resource.gpu_address;
 
 		surf->db_stencil_base = stencil_offset >> 8;
@@ -1657,7 +1658,7 @@ static void evergreen_get_sample_position(struct pipe_context *ctx,
 static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples, int ps_iter_samples)
 {
 
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	unsigned max_dist = 0;
 
 	switch (nr_samples) {
@@ -1706,7 +1707,7 @@ static void evergreen_emit_image_state(struct r600_context *rctx, struct r600_at
 {
 	struct r600_image_state *state = (struct r600_image_state *)atom;
 	struct pipe_framebuffer_state *fb_state = &rctx->framebuffer.state;
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_texture *rtex;
 	struct r600_resource *resource;
 	int i;
@@ -1833,7 +1834,7 @@ static void evergreen_emit_compute_buffer_state(struct r600_context *rctx, struc
 
 static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct pipe_framebuffer_state *state = &rctx->framebuffer.state;
 	unsigned nr_cbufs = state->nr_cbufs;
 	unsigned i, tl, br;
@@ -1972,7 +1973,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 
 static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a;
 	float offset_units = state->offset_units;
 	float offset_scale = state->offset_scale;
@@ -2030,7 +2031,7 @@ uint32_t evergreen_construct_rat_mask(struct r600_context *rctx, struct r600_cb_
 
 static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
 	unsigned fb_colormask = a->bound_cbufs_target_mask;
 	unsigned ps_colormask = a->ps_color_export_mask;
@@ -2045,7 +2046,7 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_
 
 static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_db_state *a = (struct r600_db_state*)atom;
 
 	if (a->rsurf && a->rsurf->db_htile_surface) {
@@ -2068,7 +2069,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom
 
 static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom;
 	unsigned db_render_control = 0;
 	unsigned db_count_control = 0;
@@ -2123,7 +2124,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
 					  unsigned resource_offset,
 					  unsigned pkt_flags)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	uint32_t dirty_mask = state->dirty_mask;
 
 	while (dirty_mask) {
@@ -2182,7 +2183,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
 					    unsigned reg_alu_const_cache,
 					    unsigned pkt_flags)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	uint32_t dirty_mask = state->dirty_mask;
 
 	while (dirty_mask) {
@@ -2334,7 +2335,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
 					 struct r600_samplerview_state *state,
 					 unsigned resource_id_base, unsigned pkt_flags)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	uint32_t dirty_mask = state->dirty_mask;
 
 	while (dirty_mask) {
@@ -2443,7 +2444,7 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx,
 				unsigned border_index_reg,
 				unsigned pkt_flags)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	uint32_t dirty_mask = texinfo->states.dirty_mask;
 	union pipe_color_union border_color = {{0,0,0,1}};
 	union pipe_color_union *border_color_ptr = &border_color;
@@ -2527,14 +2528,14 @@ static void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_at
 	struct r600_sample_mask *s = (struct r600_sample_mask*)a;
 	uint8_t mask = s->sample_mask;
 
-	radeon_set_context_reg(rctx->b.gfx.cs, R_028C3C_PA_SC_AA_MASK,
+	radeon_set_context_reg(&rctx->b.gfx.cs, R_028C3C_PA_SC_AA_MASK,
 			       mask | (mask << 8) | (mask << 16) | (mask << 24));
 }
 
 static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a)
 {
 	struct r600_sample_mask *s = (struct r600_sample_mask*)a;
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	uint16_t mask = s->sample_mask;
 
 	radeon_set_context_reg_seq(cs, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
@@ -2544,7 +2545,7 @@ static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom
 
 static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_cso_state *state = (struct r600_cso_state*)a;
 	struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
 
@@ -2561,7 +2562,7 @@ static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct
 
 static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a;
 
 	uint32_t v = 0, v2 = 0, primid = 0, tf_param = 0;
@@ -2665,7 +2666,7 @@ static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_
 
 static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a;
 	struct r600_resource *rbuffer;
 
@@ -3389,8 +3390,9 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
 				tmp |= S_028644_FLAT_SHADE(1);
 			}
 
-			if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
-			    (sprite_coord_enable & (1 << rshader->input[i].sid))) {
+			if (rshader->input[i].name == TGSI_SEMANTIC_PCOORD ||
+			    (rshader->input[i].name == TGSI_SEMANTIC_TEXCOORD &&
+			     (sprite_coord_enable & (1 << rshader->input[i].sid)))) {
 				tmp |= S_028644_PT_SPRITE_TEX(1);
 			}
 
@@ -3712,7 +3714,7 @@ void *evergreen_create_fastclear_blend(struct r600_context *rctx)
 
 void *evergreen_create_db_flush_dsa(struct r600_context *rctx)
 {
-	struct pipe_depth_stencil_alpha_state dsa = {{0}};
+	struct pipe_depth_stencil_alpha_state dsa = {{{0}}};
 
 	return rctx->b.b.create_depth_stencil_alpha_state(&rctx->b.b, &dsa);
 }
@@ -3774,7 +3776,7 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
 				unsigned pitch,
 				unsigned bpp)
 {
-	struct radeon_cmdbuf *cs = rctx->b.dma.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.dma.cs;
 	struct r600_texture *rsrc = (struct r600_texture*)src;
 	struct r600_texture *rdst = (struct r600_texture*)dst;
 	unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size;
@@ -3811,8 +3813,8 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
 		x = src_x;
 		y = src_y;
 		z = src_z;
-		base = rsrc->surface.u.legacy.level[src_level].offset;
-		addr = rdst->surface.u.legacy.level[dst_level].offset;
+		base = (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256;
+		addr = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256;
 		addr += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z;
 		addr += dst_y * pitch + dst_x * bpp;
 		bank_h = eg_bank_wh(rsrc->surface.u.legacy.bankh);
@@ -3836,8 +3838,8 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
 		x = dst_x;
 		y = dst_y;
 		z = dst_z;
-		base = rdst->surface.u.legacy.level[dst_level].offset;
-		addr = rsrc->surface.u.legacy.level[src_level].offset;
+		base = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256;
+		addr = (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256;
 		addr += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_z;
 		addr += src_y * pitch + src_x * bpp;
 		bank_h = eg_bank_wh(rdst->surface.u.legacy.bankh);
@@ -3896,7 +3898,7 @@ static void evergreen_dma_copy(struct pipe_context *ctx,
 	unsigned src_x, src_y;
 	unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
 
-	if (rctx->b.dma.cs == NULL) {
+	if (rctx->b.dma.cs.priv == NULL) {
 		goto fallback;
 	}
 
@@ -3959,10 +3961,10 @@ static void evergreen_dma_copy(struct pipe_context *ctx,
 		 *   dst_x/y == 0
 		 *   dst_pitch == src_pitch
 		 */
-		src_offset= rsrc->surface.u.legacy.level[src_level].offset;
+		src_offset= (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256;
 		src_offset += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_box->z;
 		src_offset += src_y * src_pitch + src_x * bpp;
-		dst_offset = rdst->surface.u.legacy.level[dst_level].offset;
+		dst_offset = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256;
 		dst_offset += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z;
 		dst_offset += dst_y * dst_pitch + dst_x * bpp;
 		evergreen_dma_copy_buffer(rctx, dst, src, dst_offset, src_offset,
@@ -4148,7 +4150,7 @@ static void evergreen_set_shader_buffers(struct pipe_context *ctx,
 
 static void evergreen_set_shader_images(struct pipe_context *ctx,
 					enum pipe_shader_type shader, unsigned start_slot,
-					unsigned count,
+					unsigned count, unsigned unbind_num_trailing_slots,
 					const struct pipe_image_view *images)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
@@ -4162,7 +4164,9 @@ static void evergreen_set_shader_images(struct pipe_context *ctx,
 	unsigned old_mask;
 	struct r600_image_state *istate = NULL;
 	int idx;
-	if (shader != PIPE_SHADER_FRAGMENT && shader != PIPE_SHADER_COMPUTE && count == 0)
+	if (shader != PIPE_SHADER_FRAGMENT && shader != PIPE_SHADER_COMPUTE)
+		return;
+	if (!count && !unbind_num_trailing_slots)
 		return;
 
 	if (shader == PIPE_SHADER_FRAGMENT)
@@ -4305,6 +4309,16 @@ static void evergreen_set_shader_images(struct pipe_context *ctx,
 		istate->enabled_mask |= (1 << i);
 	}
 
+	for (i = start_slot + count, idx = 0;
+	     i < start_slot + count + unbind_num_trailing_slots; i++, idx++) {
+		rview = &istate->views[i];
+
+		pipe_resource_reference((struct pipe_resource **)&rview->base.resource, NULL);
+		istate->enabled_mask &= ~(1 << i);
+		istate->compressed_colortex_mask &= ~(1 << i);
+		istate->compressed_depthtex_mask &= ~(1 << i);
+	}
+
 	istate->atom.num_dw = util_bitcount(istate->enabled_mask) * 46;
 	istate->dirty_buffer_constants = TRUE;
 	rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV;
@@ -4523,11 +4537,11 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe
 	if (!rctx->tes_shader) {
 		rctx->lds_alloc = 0;
 		rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
-					      R600_LDS_INFO_CONST_BUFFER, NULL);
+					      R600_LDS_INFO_CONST_BUFFER, false, NULL);
 		rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL,
-					      R600_LDS_INFO_CONST_BUFFER, NULL);
+					      R600_LDS_INFO_CONST_BUFFER, false, NULL);
 		rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL,
-					      R600_LDS_INFO_CONST_BUFFER, NULL);
+					      R600_LDS_INFO_CONST_BUFFER, false, NULL);
 		return;
 	}
 
@@ -4587,12 +4601,11 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe
 	constbuf.buffer_size = 8 * 4;
 
 	rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
-				      R600_LDS_INFO_CONST_BUFFER, &constbuf);
+				      R600_LDS_INFO_CONST_BUFFER, false, &constbuf);
 	rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL,
-				      R600_LDS_INFO_CONST_BUFFER, &constbuf);
+				      R600_LDS_INFO_CONST_BUFFER, false, &constbuf);
 	rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL,
-				      R600_LDS_INFO_CONST_BUFFER, &constbuf);
-	pipe_resource_reference(&constbuf.buffer, NULL);
+				      R600_LDS_INFO_CONST_BUFFER, true, &constbuf);
 }
 
 uint32_t evergreen_get_ls_hs_config(struct r600_context *rctx,
@@ -4750,7 +4763,7 @@ bool evergreen_adjust_gprs(struct r600_context *rctx)
 
 void eg_trace_emit(struct r600_context *rctx)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	unsigned reloc;
 
 	if (rctx->b.chip_class < EVERGREEN)
@@ -4780,7 +4793,7 @@ static void evergreen_emit_set_append_cnt(struct r600_context *rctx,
 					  struct r600_resource *resource,
 					  uint32_t pkt_flags)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
 						   resource,
 						   RADEON_USAGE_READ,
@@ -4803,7 +4816,7 @@ static void evergreen_emit_event_write_eos(struct r600_context *rctx,
 					   struct r600_resource *resource,
 					   uint32_t pkt_flags)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	uint32_t event = EVENT_TYPE_PS_DONE;
 	uint32_t base_reg_0 = R_02872C_GDS_APPEND_COUNT_0;
 	uint32_t reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
@@ -4830,7 +4843,7 @@ static void cayman_emit_event_write_eos(struct r600_context *rctx,
 					struct r600_resource *resource,
 					uint32_t pkt_flags)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	uint32_t event = EVENT_TYPE_PS_DONE;
 	uint32_t reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
 						   resource,
@@ -4856,7 +4869,7 @@ static void cayman_write_count_to_gds(struct r600_context *rctx,
 				      struct r600_resource *resource,
 				      uint32_t pkt_flags)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
 						   resource,
 						   RADEON_USAGE_READ,
@@ -4951,7 +4964,7 @@ void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
 				       struct r600_shader_atomic *combined_atomics,
 				       uint8_t *atomic_used_mask_p)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
 	uint32_t pkt_flags = 0;
 	uint32_t event = EVENT_TYPE_PS_DONE;
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_asm.c b/lib/mesa/src/gallium/drivers/r600/r600_asm.c
index 6affa3d3a..6a9690f69 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_asm.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_asm.c
@@ -362,7 +362,7 @@ static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *
 			}
 			assignment[4] = alu;
 		} else {
-			if (assignment[chan]) {
+			if (assignment[chan]) {                           
 				assert(0); /* ALU.chan has already been allocated. */
 				return -1;
 			}
@@ -686,7 +686,7 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
 	return 0;
 }
 
-void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg, unsigned abs)
+void r600_bytecode_special_constants(uint32_t value, unsigned *sel)
 {
 	switch(value) {
 	case 0:
@@ -704,14 +704,6 @@ void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *ne
 	case 0x3F000000: /* 0.5f */
 		*sel = V_SQ_ALU_SRC_0_5;
 		break;
-	case 0xBF800000: /* -1.0f */
-		*sel = V_SQ_ALU_SRC_1;
-		*neg ^= !abs;
-		break;
-	case 0xBF000000: /* -0.5f */
-		*sel = V_SQ_ALU_SRC_0_5;
-		*neg ^= !abs;
-		break;
 	default:
 		*sel = V_SQ_ALU_SRC_LITERAL;
 		break;
@@ -1232,7 +1224,7 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
 	/* Load index register if required */
 	if (bc->chip_class >= EVERGREEN) {
 		for (i = 0; i < 3; i++)
-			if (nalu->src[i].kc_bank && nalu->src[i].kc_rel)
+			if (nalu->src[i].kc_bank &&  nalu->src[i].kc_rel)
 				egcm_load_index_reg(bc, 0, true);
 	}
 
@@ -1261,7 +1253,7 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
 		}
 		if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
 			r600_bytecode_special_constants(nalu->src[i].value,
-				&nalu->src[i].sel, &nalu->src[i].neg, nalu->src[i].abs);
+				&nalu->src[i].sel);
 	}
 	if (nalu->dst.sel >= bc->ngpr) {
 		bc->ngpr = nalu->dst.sel + 1;
@@ -1450,7 +1442,9 @@ int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_t
 		bc->cf_last->op == CF_OP_TEX) {
 		struct r600_bytecode_tex *ttex;
 		LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) {
-			if (ttex->dst_gpr == ntex->src_gpr) {
+			if (ttex->dst_gpr == ntex->src_gpr &&
+                            (ttex->dst_sel_x < 4 || ttex->dst_sel_y < 4 ||
+                             ttex->dst_sel_z < 4 || ttex->dst_sel_w < 4)) {
 				bc->force_add_cf = 1;
 				break;
 			}
@@ -2638,7 +2632,8 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
 	uint32_t *bytecode;
 	int i, j, r, fs_size;
 	struct r600_fetch_shader *shader;
-	unsigned no_sb = rctx->screen->b.debug_flags & DBG_NO_SB;
+	unsigned no_sb = rctx->screen->b.debug_flags & DBG_NO_SB ||
+                         (rctx->screen->b.debug_flags & DBG_NIR);
 	unsigned sb_disasm = !no_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
 
 	assert(count < 32);
@@ -2763,7 +2758,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
 		return NULL;
 	}
 
-	u_suballocator_alloc(rctx->allocator_fetch_shader, fs_size, 256,
+	u_suballocator_alloc(&rctx->allocator_fetch_shader, fs_size, 256,
 			     &shader->offset,
 			     (struct pipe_resource**)&shader->buffer);
 	if (!shader->buffer) {
@@ -2774,7 +2769,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
 
 	bytecode = r600_buffer_map_sync_with_rings
 		(&rctx->b, shader->buffer,
-		PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED | RADEON_TRANSFER_TEMPORARY);
+		PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED | RADEON_MAP_TEMPORARY);
 	bytecode += shader->offset / 4;
 
 	if (R600_BIG_ENDIAN) {
@@ -2784,7 +2779,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
 	} else {
 		memcpy(bytecode, bc.bytecode, fs_size);
 	}
-	rctx->b.ws->buffer_unmap(shader->buffer->buf);
+	rctx->b.ws->buffer_unmap(rctx->b.ws, shader->buffer->buf);
 
 	r600_bytecode_clear(&bc);
 	return shader;
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_asm.h b/lib/mesa/src/gallium/drivers/r600/r600_asm.h
index 71a3ae1ba..a526993b3 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_asm.h
+++ b/lib/mesa/src/gallium/drivers/r600/r600_asm.h
@@ -214,6 +214,8 @@ struct r600_bytecode_cf {
 	struct r600_bytecode_alu		*prev_bs_head;
 	struct r600_bytecode_alu		*prev2_bs_head;
 	unsigned isa[2];
+	unsigned nlds_read;
+	unsigned nqueue_read;
 };
 
 #define FC_NONE				0
@@ -276,6 +278,7 @@ struct r600_bytecode {
 	unsigned        r6xx_nop_after_rel_dst;
 	bool            index_loaded[2];
 	unsigned        index_reg[2]; /* indexing register CF_INDEX_[01] */
+	unsigned        index_reg_chan[2]; /* indexing register chanel CF_INDEX_[01] */
 	unsigned        debug_id;
 	struct r600_isa* isa;
 	struct r600_bytecode_output pending_outputs[5];
@@ -318,8 +321,7 @@ int r600_bytecode_add_cfinst(struct r600_bytecode *bc,
 		unsigned op);
 int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
 		const struct r600_bytecode_alu *alu, unsigned type);
-void r600_bytecode_special_constants(uint32_t value,
-		unsigned *sel, unsigned *neg, unsigned abs);
+void r600_bytecode_special_constants(uint32_t value, unsigned *sel);
 void r600_bytecode_disasm(struct r600_bytecode *bc);
 void r600_bytecode_alu_read(struct r600_bytecode *bc,
 		struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_blit.c b/lib/mesa/src/gallium/drivers/r600/r600_blit.c
index 606b3892e..b8924f826 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_blit.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_blit.c
@@ -463,6 +463,7 @@ static bool r600_decompress_subresource(struct pipe_context *ctx,
 }
 
 static void r600_clear(struct pipe_context *ctx, unsigned buffers,
+		       const struct pipe_scissor_state *scissor_state,
 		       const union pipe_color_union *color,
 		       double depth, unsigned stencil)
 {
@@ -660,7 +661,7 @@ static void r600_clear_buffer(struct pipe_context *ctx, struct pipe_resource *ds
 		r600_blitter_end(ctx);
 	} else {
 		uint32_t *map = r600_buffer_map_sync_with_rings(&rctx->b, r600_resource(dst),
-								 PIPE_TRANSFER_WRITE);
+								 PIPE_MAP_WRITE);
 		map += offset / 4;
 		size /= 4;
 		for (unsigned i = 0; i < size; i++)
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_hw_context.c b/lib/mesa/src/gallium/drivers/r600/r600_hw_context.c
index 494b7ed69..de032c6dc 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_hw_context.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_hw_context.c
@@ -34,17 +34,17 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
 			boolean count_draw_in, unsigned num_atomics)
 {
 	/* Flush the DMA IB if it's not empty. */
-	if (radeon_emitted(ctx->b.dma.cs, 0))
+	if (radeon_emitted(&ctx->b.dma.cs, 0))
 		ctx->b.dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL);
 
-	if (!radeon_cs_memory_below_limit(ctx->b.screen, ctx->b.gfx.cs,
+	if (!radeon_cs_memory_below_limit(ctx->b.screen, &ctx->b.gfx.cs,
 					  ctx->b.vram, ctx->b.gtt)) {
 		ctx->b.gtt = 0;
 		ctx->b.vram = 0;
 		ctx->b.gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL);
 		return;
 	}
-	/* all will be accounted once relocation are emited */
+	/* all will be accounted once relocation are emitted */
 	ctx->b.gtt = 0;
 	ctx->b.vram = 0;
 
@@ -84,14 +84,14 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
 	num_dw += 10;
 
 	/* Flush if there's not enough space. */
-	if (!ctx->b.ws->cs_check_space(ctx->b.gfx.cs, num_dw, false)) {
+	if (!ctx->b.ws->cs_check_space(&ctx->b.gfx.cs, num_dw, false)) {
 		ctx->b.gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL);
 	}
 }
 
 void r600_flush_emit(struct r600_context *rctx)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	unsigned cp_coher_cntl = 0;
 	unsigned wait_until = 0;
 
@@ -260,7 +260,7 @@ void r600_context_gfx_flush(void *context, unsigned flags,
 			    struct pipe_fence_handle **fence)
 {
 	struct r600_context *ctx = context;
-	struct radeon_cmdbuf *cs = ctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &ctx->b.gfx.cs;
 	struct radeon_winsys *ws = ctx->b.ws;
 
 	if (!radeon_emitted(cs, ctx->b.initial_gfx_cs_size))
@@ -345,7 +345,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
 	ctx->b.vram = 0;
 
 	/* Begin a new CS. */
-	r600_emit_command_buffer(ctx->b.gfx.cs, &ctx->start_cs_cmd);
+	r600_emit_command_buffer(&ctx->b.gfx.cs, &ctx->start_cs_cmd);
 
 	/* Re-emit states. */
 	r600_mark_atom_dirty(ctx, &ctx->alphatest_state.atom);
@@ -430,13 +430,13 @@ void r600_begin_new_cs(struct r600_context *ctx)
 	ctx->last_rast_prim      = -1;
 	ctx->current_rast_prim   = -1;
 
-	assert(!ctx->b.gfx.cs->prev_dw);
-	ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->current.cdw;
+	assert(!ctx->b.gfx.cs.prev_dw);
+	ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs.current.cdw;
 }
 
 void r600_emit_pfp_sync_me(struct r600_context *rctx)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 
 	if (rctx->b.chip_class >= EVERGREEN &&
 	    rctx->b.screen->info.drm_minor >= 46) {
@@ -451,7 +451,7 @@ void r600_emit_pfp_sync_me(struct r600_context *rctx)
 		uint64_t va;
 
 		/* 16-byte address alignment is required by WAIT_REG_MEM. */
-		u_suballocator_alloc(rctx->b.allocator_zeroed_memory, 4, 16,
+		u_suballocator_alloc(&rctx->b.allocator_zeroed_memory, 4, 16,
 				     &offset, (struct pipe_resource**)&buf);
 		if (!buf) {
 			/* This is too heavyweight, but will work. */
@@ -502,7 +502,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
 			     struct pipe_resource *src, uint64_t src_offset,
 			     unsigned size)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 
 	assert(size);
 	assert(rctx->screen->b.has_cp_dma);
@@ -584,7 +584,7 @@ void r600_dma_copy_buffer(struct r600_context *rctx,
 			  uint64_t src_offset,
 			  uint64_t size)
 {
-	struct radeon_cmdbuf *cs = rctx->b.dma.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.dma.cs;
 	unsigned i, ncopy, csize;
 	struct r600_resource *rdst = (struct r600_resource*)dst;
 	struct r600_resource *rsrc = (struct r600_resource*)src;
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_pipe.c b/lib/mesa/src/gallium/drivers/r600/r600_pipe.c
index f26da31d2..9e11c7442 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_pipe.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_pipe.c
@@ -55,6 +55,7 @@ static const struct debug_named_value r600_debug_options[] = {
 	{ "sbnofallback", DBG_SB_NO_FALLBACK, "Abort on errors instead of fallback" },
 	{ "sbdisasm", DBG_SB_DISASM, "Use sb disassembler for shader dumps" },
 	{ "sbsafemath", DBG_SB_SAFEMATH, "Disable unsafe math optimizations" },
+        { "nirsb", DBG_NIR_SB, "Enable NIR with SB optimizer"},
 
 	DEBUG_NAMED_VALUE_END /* must be last */
 };
@@ -81,7 +82,7 @@ static void r600_destroy_context(struct pipe_context *context)
 	if (rctx->append_fence)
 		pipe_resource_reference((struct pipe_resource**)&rctx->append_fence, NULL);
 	for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
-		rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, NULL);
+		rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, false, NULL);
 		free(rctx->driver_consts[sh].constants);
 	}
 
@@ -113,14 +114,12 @@ static void r600_destroy_context(struct pipe_context *context)
 
 	for (sh = 0; sh < PIPE_SHADER_TYPES; ++sh)
 		for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; ++i)
-			rctx->b.b.set_constant_buffer(context, sh, i, NULL);
+			rctx->b.b.set_constant_buffer(context, sh, i, false, NULL);
 
 	if (rctx->blitter) {
 		util_blitter_destroy(rctx->blitter);
 	}
-	if (rctx->allocator_fetch_shader) {
-		u_suballocator_destroy(rctx->allocator_fetch_shader);
-	}
+	u_suballocator_destroy(&rctx->allocator_fetch_shader);
 
 	r600_release_command_buffer(&rctx->start_cs_cmd);
 
@@ -211,15 +210,12 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen,
 		goto fail;
 	}
 
-	rctx->b.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX,
-				       r600_context_gfx_flush, rctx, false);
+	ws->cs_create(&rctx->b.gfx.cs, rctx->b.ctx, RING_GFX,
+                      r600_context_gfx_flush, rctx, false);
 	rctx->b.gfx.flush = r600_context_gfx_flush;
 
-	rctx->allocator_fetch_shader =
-		u_suballocator_create(&rctx->b.b, 64 * 1024,
-				      0, PIPE_USAGE_DEFAULT, 0, FALSE);
-	if (!rctx->allocator_fetch_shader)
-		goto fail;
+	u_suballocator_init(&rctx->allocator_fetch_shader, &rctx->b.b, 64 * 1024,
+                            0, PIPE_USAGE_DEFAULT, 0, FALSE);
 
 	rctx->isa = calloc(1, sizeof(struct r600_isa));
 	if (!rctx->isa || r600_isa_init(rctx, rctx->isa))
@@ -249,6 +245,12 @@ fail:
 	return NULL;
 }
 
+static bool is_nir_enabled(struct r600_common_screen *screen) {
+   return ((screen->debug_flags & DBG_NIR_PREFERRED) &&
+       screen->family >= CHIP_CEDAR &&
+       screen->family < CHIP_CAYMAN);
+}
+
 /*
  * pipe_screen
  */
@@ -282,6 +284,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_VERTEX_SHADER_SATURATE:
 	case PIPE_CAP_SEAMLESS_CUBE_MAP:
 	case PIPE_CAP_PRIMITIVE_RESTART:
+	case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
 	case PIPE_CAP_CONDITIONAL_RENDER:
 	case PIPE_CAP_TEXTURE_BARRIER:
 	case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
@@ -317,8 +320,12 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
 	case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
 	case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+        case PIPE_CAP_NIR_ATOMICS_AS_DEREF:
 		return 1;
 
+	case PIPE_CAP_SHAREABLE_SHADERS:
+		return 0;
+
 	case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET:
 		/* Optimal number for good TexSubImage performance on Polaris10. */
 		return 64 * 1024 * 1024;
@@ -333,8 +340,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 		return rscreen->b.chip_class > R700;
 
 	case PIPE_CAP_TGSI_TEXCOORD:
-		return 0;
+		return 1;
 
+	case PIPE_CAP_NIR_IMAGES_AS_DEREF:
 	case PIPE_CAP_FAKE_SW_MSAA:
 		return 0;
 
@@ -348,11 +356,11 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 		return 256;
 
 	case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
-		return 1;
+		return 4;
 
 	case PIPE_CAP_GLSL_FEATURE_LEVEL:
 		if (family >= CHIP_CEDAR)
-		   return 430;
+		   return is_nir_enabled(&rscreen->b) ? 450 : 430;
 		/* pre-evergreen geom shaders need newer kernel */
 		if (rscreen->b.info.drm_minor >= 37)
 		   return 330;
@@ -403,13 +411,21 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
 		return 0;
 
+        case PIPE_CAP_INT64:
 	case PIPE_CAP_DOUBLES:
 		if (rscreen->b.family == CHIP_ARUBA ||
 		    rscreen->b.family == CHIP_CAYMAN ||
 		    rscreen->b.family == CHIP_CYPRESS ||
 		    rscreen->b.family == CHIP_HEMLOCK)
 			return 1;
+                if (is_nir_enabled(&rscreen->b))
+                   return 1;
 		return 0;
+        case PIPE_CAP_INT64_DIVMOD:
+           /* it is actually not supported, but the nir lowering hdanles this corectly wheras
+            * the glsl lowering path seems to not initialize the buildins correctly.
+            */
+           return is_nir_enabled(&rscreen->b);
 	case PIPE_CAP_CULL_DISTANCE:
 		return 1;
 
@@ -542,7 +558,6 @@ static int r600_get_shader_param(struct pipe_screen* pscreen,
 	{
 	case PIPE_SHADER_FRAGMENT:
 	case PIPE_SHADER_VERTEX:
-	case PIPE_SHADER_COMPUTE:
 		break;
 	case PIPE_SHADER_GEOMETRY:
 		if (rscreen->b.family >= CHIP_CEDAR)
@@ -553,8 +568,10 @@ static int r600_get_shader_param(struct pipe_screen* pscreen,
 		return 0;
 	case PIPE_SHADER_TESS_CTRL:
 	case PIPE_SHADER_TESS_EVAL:
+	case PIPE_SHADER_COMPUTE:
 		if (rscreen->b.family >= CHIP_CEDAR)
 			break;
+		FALLTHROUGH;
 	default:
 		return 0;
 	}
@@ -576,9 +593,11 @@ static int r600_get_shader_param(struct pipe_screen* pscreen,
 	case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
 		if (shader == PIPE_SHADER_COMPUTE) {
 			uint64_t max_const_buffer_size;
-			pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI,
-				PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
-				&max_const_buffer_size);
+			enum pipe_shader_ir ir_type = is_nir_enabled(&rscreen->b) ?
+				PIPE_SHADER_IR_NIR: PIPE_SHADER_IR_TGSI;
+			pscreen->get_compute_param(pscreen, ir_type,
+						   PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
+						   &max_const_buffer_size);
 			return MIN2(max_const_buffer_size, INT_MAX);
 
 		} else {
@@ -598,6 +617,10 @@ static int r600_get_shader_param(struct pipe_screen* pscreen,
 	case PIPE_SHADER_CAP_SUBROUTINES:
 	case PIPE_SHADER_CAP_INT64_ATOMICS:
 	case PIPE_SHADER_CAP_FP16:
+        case PIPE_SHADER_CAP_FP16_DERIVATIVES:
+	case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
+        case PIPE_SHADER_CAP_INT16:
+        case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
 		return 0;
 	case PIPE_SHADER_CAP_INTEGERS:
 	case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
@@ -605,14 +628,19 @@ static int r600_get_shader_param(struct pipe_screen* pscreen,
 	case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
 	case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
 		return 16;
-        case PIPE_SHADER_CAP_PREFERRED_IR:
+	case PIPE_SHADER_CAP_PREFERRED_IR:
+		if (is_nir_enabled(&rscreen->b))
+			return PIPE_SHADER_IR_NIR;
 		return PIPE_SHADER_IR_TGSI;
 	case PIPE_SHADER_CAP_SUPPORTED_IRS: {
 		int ir = 0;
 		if (shader == PIPE_SHADER_COMPUTE)
 			ir = 1 << PIPE_SHADER_IR_NATIVE;
-		if (rscreen->b.family >= CHIP_CEDAR)
+		if (rscreen->b.family >= CHIP_CEDAR) {
 			ir |= 1 << PIPE_SHADER_IR_TGSI;
+			if (is_nir_enabled(&rscreen->b))
+				ir |= 1 << PIPE_SHADER_IR_NIR;
+		}
 		return ir;
 	}
 	case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
@@ -791,7 +819,7 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws,
 	templ.usage = PIPE_USAGE_DEFAULT;
 
 	struct r600_resource *res = r600_resource(rscreen->screen.resource_create(&rscreen->screen, &templ));
-	unsigned char *map = ws->buffer_map(res->buf, NULL, PIPE_TRANSFER_WRITE);
+	unsigned char *map = ws->buffer_map(res->buf, NULL, PIPE_MAP_WRITE);
 
 	memset(map, 0, 256);
 
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_pipe.h b/lib/mesa/src/gallium/drivers/r600/r600_pipe.h
index 11c16957a..3cb171a0d 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_pipe.h
+++ b/lib/mesa/src/gallium/drivers/r600/r600_pipe.h
@@ -268,6 +268,9 @@ struct r600_gs_rings_state {
 #define DBG_SB_NO_FALLBACK	(1 << 26)
 #define DBG_SB_DISASM	(1 << 27)
 #define DBG_SB_SAFEMATH	(1 << 28)
+#define DBG_NIR_SB	(1 << 28)
+
+#define DBG_NIR_PREFERRED (DBG_NIR_SB | DBG_NIR)
 
 struct r600_screen {
 	struct r600_common_screen	b;
@@ -343,12 +346,14 @@ struct r600_pipe_shader_selector {
 	struct r600_pipe_shader *current;
 
 	struct tgsi_token       *tokens;
+        struct nir_shader       *nir;
 	struct pipe_stream_output_info  so;
 	struct tgsi_shader_info		info;
 
 	unsigned	num_shaders;
 
 	enum pipe_shader_type	type;
+        enum pipe_shader_ir ir_type;
 
 	/* geometry shader properties */
 	enum pipe_prim_type	gs_output_prim;
@@ -488,7 +493,7 @@ struct r600_context {
 	struct r600_common_context	b;
 	struct r600_screen		*screen;
 	struct blitter_context		*blitter;
-	struct u_suballocator		*allocator_fetch_shader;
+	struct u_suballocator		allocator_fetch_shader;
 
 	/* Hardware info. */
 	boolean				has_vertex_cache;
@@ -1055,7 +1060,8 @@ void eg_dump_debug_state(struct pipe_context *ctx, FILE *f,
 			 unsigned flags);
 
 struct r600_pipe_shader_selector *r600_create_shader_state_tokens(struct pipe_context *ctx,
-								  const struct tgsi_token *tokens,
+								  const void *tokens,
+								  enum pipe_shader_ir,
 								  unsigned pipe_shader_type);
 int r600_shader_select(struct pipe_context *ctx,
 		       struct r600_pipe_shader_selector* sel,
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_shader.c b/lib/mesa/src/gallium/drivers/r600/r600_shader.c
index 85e584baf..c23adf2ea 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_shader.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_shader.c
@@ -24,7 +24,9 @@
 #include "r600_formats.h"
 #include "r600_opcodes.h"
 #include "r600_shader.h"
+#include "r600_dump.h"
 #include "r600d.h"
+#include "sfn/sfn_nir.h"
 
 #include "sb/sb_public.h"
 
@@ -33,6 +35,10 @@
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_scan.h"
 #include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_from_mesa.h"
+#include "nir/tgsi_to_nir.h"
+#include "nir/nir_to_tgsi_info.h"
+#include "compiler/nir/nir.h"
 #include "util/u_bitcast.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
@@ -143,7 +149,7 @@ static int store_shader(struct pipe_context *ctx,
 		}
 		ptr = r600_buffer_map_sync_with_rings(
 			&rctx->b, shader->bo,
-			PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY);
+			PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
 		if (R600_BIG_ENDIAN) {
 			for (i = 0; i < shader->shader.bc.ndw; ++i) {
 				ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]);
@@ -151,12 +157,14 @@ static int store_shader(struct pipe_context *ctx,
 		} else {
 			memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr));
 		}
-		rctx->b.ws->buffer_unmap(shader->bo->buf);
+		rctx->b.ws->buffer_unmap(rctx->b.ws, shader->bo->buf);
 	}
 
 	return 0;
 }
 
+extern const struct nir_shader_compiler_options r600_nir_options;
+static int nshader = 0;
 int r600_pipe_shader_create(struct pipe_context *ctx,
 			    struct r600_pipe_shader *shader,
 			    union r600_shader_key key)
@@ -164,27 +172,76 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
 	struct r600_context *rctx = (struct r600_context *)ctx;
 	struct r600_pipe_shader_selector *sel = shader->selector;
 	int r;
-	bool dump = r600_can_dump_shader(&rctx->screen->b,
-					 tgsi_get_processor_type(sel->tokens));
-	unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB);
+	struct r600_screen *rscreen = (struct r600_screen *)ctx->screen;
+	
+	int processor = sel->ir_type == PIPE_SHADER_IR_TGSI ?
+		tgsi_get_processor_type(sel->tokens):
+		pipe_shader_type_from_mesa(sel->nir->info.stage);
+	
+	bool dump = r600_can_dump_shader(&rctx->screen->b, processor);
+	unsigned use_sb = !(rctx->screen->b.debug_flags & (DBG_NO_SB | DBG_NIR)) ||
+                          (rctx->screen->b.debug_flags & DBG_NIR_SB);
 	unsigned sb_disasm;
 	unsigned export_shader;
-
+	
 	shader->shader.bc.isa = rctx->isa;
+	
+	if (!(rscreen->b.debug_flags & DBG_NIR_PREFERRED)) {
+		assert(sel->ir_type == PIPE_SHADER_IR_TGSI);
+		r = r600_shader_from_tgsi(rctx, shader, key);
+		if (r) {
+			R600_ERR("translation from TGSI failed !\n");
+			goto error;
+		}
+	} else {
+		if (sel->ir_type == PIPE_SHADER_IR_TGSI) {
+			sel->nir = tgsi_to_nir(sel->tokens, ctx->screen, true);
+                        const nir_shader_compiler_options *nir_options =
+                              (const nir_shader_compiler_options *)
+                              ctx->screen->get_compiler_options(ctx->screen,
+                                                                PIPE_SHADER_IR_NIR,
+                                                                shader->shader.processor_type);
+                        /* Lower int64 ops because we have some r600 build-in shaders that use it */
+			if (nir_options->lower_int64_options) {
+				NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
+				NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, NULL, NULL);
+				NIR_PASS_V(sel->nir, nir_lower_int64);
+				NIR_PASS_V(sel->nir, nir_opt_vectorize, NULL, NULL);
+			}
+			NIR_PASS_V(sel->nir, nir_lower_flrp, ~0, false);
+		}
+		nir_tgsi_scan_shader(sel->nir, &sel->info, true);
 
+		r = r600_shader_from_nir(rctx, shader, &key);
+		if (r) {
+			fprintf(stderr, "--Failed shader--------------------------------------------------\n");
+			
+			if (sel->ir_type == PIPE_SHADER_IR_TGSI) {
+				fprintf(stderr, "--TGSI--------------------------------------------------------\n");
+				tgsi_dump(sel->tokens, 0);
+			}
+			
+			if (rscreen->b.debug_flags & (DBG_NIR_PREFERRED)) {
+				fprintf(stderr, "--NIR --------------------------------------------------------\n");
+				nir_print_shader(sel->nir, stderr);
+			}
+			
+			R600_ERR("translation from NIR failed !\n");
+			goto error;
+		}
+	}
+	
 	if (dump) {
-		fprintf(stderr, "--------------------------------------------------------------\n");
-		tgsi_dump(sel->tokens, 0);
-
+		if (sel->ir_type == PIPE_SHADER_IR_TGSI) {
+			fprintf(stderr, "--TGSI--------------------------------------------------------\n");
+			tgsi_dump(sel->tokens, 0);
+		}
+		
 		if (sel->so.num_outputs) {
 			r600_dump_streamout(&sel->so);
 		}
 	}
-	r = r600_shader_from_tgsi(rctx, shader, key);
-	if (r) {
-		R600_ERR("translation from TGSI failed !\n");
-		goto error;
-	}
+	
 	if (shader->shader.processor_type == PIPE_SHADER_VERTEX) {
 		/* only disable for vertex shaders in tess paths */
 		if (key.vs.as_ls)
@@ -216,7 +273,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
 		r600_bytecode_disasm(&shader->shader.bc);
 		fprintf(stderr, "______________________________________________________________\n");
 	} else if ((dump && sb_disasm) || use_sb) {
-		r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader,
+                r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader,
 		                             dump, use_sb);
 		if (r) {
 			R600_ERR("r600_sb_bytecode_process failed !\n");
@@ -224,6 +281,30 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
 		}
 	}
 
+        if (dump) {
+           FILE *f;
+           char fname[1024];
+           snprintf(fname, 1024, "shader_from_%s_%d.cpp",
+                    (sel->ir_type == PIPE_SHADER_IR_TGSI ?
+                        (rscreen->b.debug_flags & DBG_NIR_PREFERRED ? "tgsi-nir" : "tgsi")
+                      : "nir"), nshader);
+           f = fopen(fname, "w");
+           print_shader_info(f, nshader++, &shader->shader);
+           print_shader_info(stderr, nshader++, &shader->shader);
+           print_pipe_info(stderr, &sel->info);
+           if (sel->ir_type == PIPE_SHADER_IR_TGSI) {
+              fprintf(f, "/****TGSI**********************************\n");
+              tgsi_dump_to_file(sel->tokens, 0, f);
+           }
+
+           if (rscreen->b.debug_flags & DBG_NIR_PREFERRED){
+              fprintf(f, "/****NIR **********************************\n");
+              nir_print_shader(sel->nir, f);
+           }
+           fprintf(f, "******************************************/\n");
+           fclose(f);
+        }
+
 	if (shader->gs_copy_shader) {
 		if (dump) {
 			// dump copy shader
@@ -301,7 +382,8 @@ error:
 void r600_pipe_shader_destroy(struct pipe_context *ctx UNUSED, struct r600_pipe_shader *shader)
 {
 	r600_resource_reference(&shader->bo, NULL);
-	r600_bytecode_clear(&shader->shader.bc);
+	if (list_is_linked(&shader->shader.bc.cf))
+		r600_bytecode_clear(&shader->shader.bc);
 	r600_release_command_buffer(&shader->command_buffer);
 }
 
@@ -433,24 +515,26 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
 #endif
 	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
 		if (i->Src[j].Register.Dimension) {
-		   switch (i->Src[j].Register.File) {
-		   case TGSI_FILE_CONSTANT:
-		   case TGSI_FILE_HW_ATOMIC:
-			   break;
-		   case TGSI_FILE_INPUT:
-			   if (ctx->type == PIPE_SHADER_GEOMETRY ||
-			       ctx->type == PIPE_SHADER_TESS_CTRL ||
-			       ctx->type == PIPE_SHADER_TESS_EVAL)
-				   break;
-		   case TGSI_FILE_OUTPUT:
-			   if (ctx->type == PIPE_SHADER_TESS_CTRL)
-				   break;
-		   default:
-			   R600_ERR("unsupported src %d (file %d, dimension %d)\n", j,
-				    i->Src[j].Register.File,
-				    i->Src[j].Register.Dimension);
-			   return -EINVAL;
-		   }
+			switch (i->Src[j].Register.File) {
+			case TGSI_FILE_CONSTANT:
+			case TGSI_FILE_HW_ATOMIC:
+				break;
+			case TGSI_FILE_INPUT:
+				if (ctx->type == PIPE_SHADER_GEOMETRY ||
+				    ctx->type == PIPE_SHADER_TESS_CTRL ||
+				    ctx->type == PIPE_SHADER_TESS_EVAL)
+					break;
+				FALLTHROUGH;
+			case TGSI_FILE_OUTPUT:
+				if (ctx->type == PIPE_SHADER_TESS_CTRL)
+					break;
+				FALLTHROUGH;
+			default:
+				R600_ERR("unsupported src %d (file %d, dimension %d)\n", j,
+					 i->Src[j].Register.File,
+					 i->Src[j].Register.Dimension);
+				return -EINVAL;
+			}
 		}
 	}
 	for (j = 0; j < i->Instruction.NumDstRegs; j++) {
@@ -620,6 +704,8 @@ static int r600_spi_sid(struct r600_shader_io * io)
 	else {
 		if (name == TGSI_SEMANTIC_GENERIC) {
 			/* For generic params simply use sid from tgsi */
+			index = 9 + io->sid;
+		} else if (name == TGSI_SEMANTIC_TEXCOORD) {
 			index = io->sid;
 		} else {
 			/* For non-generic params - pack name and sid into 8 bits */
@@ -646,9 +732,11 @@ int r600_get_lds_unique_index(unsigned semantic_name, unsigned index)
 	case TGSI_SEMANTIC_CLIPDIST:
 		assert(index <= 1);
 		return 2 + index;
+	case TGSI_SEMANTIC_TEXCOORD:
+		return 4 + index;
 	case TGSI_SEMANTIC_GENERIC:
 		if (index <= 63-4)
-			return 4 + index - 9;
+			return 4 + index;
 		else
 			/* same explanation as in the default statement,
 			 * the only user hitting this is st/nine.
@@ -1614,7 +1702,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
 			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
 
 			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
-			r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg, r600_src->abs);
+			r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel);
 			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
 				return;
 		}
@@ -2469,9 +2557,9 @@ static void convert_edgeflag_to_int(struct r600_shader_ctx *ctx)
 	r600_bytecode_add_alu(ctx->bc, &alu);
 }
 
-static int generate_gs_copy_shader(struct r600_context *rctx,
-				   struct r600_pipe_shader *gs,
-				   struct pipe_stream_output_info *so)
+int generate_gs_copy_shader(struct r600_context *rctx,
+                            struct r600_pipe_shader *gs,
+                            struct pipe_stream_output_info *so)
 {
 	struct r600_shader_ctx ctx = {};
 	struct r600_shader *gs_shader = &gs->shader;
@@ -2969,7 +3057,8 @@ static int emit_lds_vs_writes(struct r600_shader_ctx *ctx)
 
 	for (i = 0; i < ctx->shader->noutput; i++) {
 		struct r600_bytecode_alu alu;
-		int param = r600_get_lds_unique_index(ctx->shader->output[i].name, ctx->shader->output[i].sid);
+		int param = r600_get_lds_unique_index(ctx->shader->output[i].name,
+						      ctx->shader->output[i].sid);
 
 		if (param) {
 			r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
@@ -4625,6 +4714,14 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
 	    ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
 		op = ALU_OP2_MUL;
 
+	/* nir_to_tgsi lowers nir_op_isub to UADD + negate, since r600 doesn't support
+	 * source modifiers with integer ops we switch back to SUB_INT */
+	bool src1_neg = ctx->src[1].neg;
+	if (op == ALU_OP2_ADD_INT && src1_neg) {
+		src1_neg = false;
+		op = ALU_OP2_SUB_INT;
+	}
+
 	for (i = 0; i <= lasti; i++) {
 		if (!(write_mask & (1 << i)))
 			continue;
@@ -4642,6 +4739,7 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
 			for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
 				r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
 			}
+			alu.src[1].neg = src1_neg;
 		} else {
 			r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
 			r600_bytecode_src(&alu.src[1], &ctx->src[0], i);
@@ -8090,7 +8188,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
-				/* fall through */
+				FALLTHROUGH;
 
 			case TGSI_TEXTURE_2D:
 			case TGSI_TEXTURE_SHADOW2D:
@@ -8111,7 +8209,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 				r = r600_bytecode_add_alu(ctx->bc, &alu);
 				if (r)
 					return r;
-				/* fall through */
+				FALLTHROUGH;
 
 			case TGSI_TEXTURE_1D:
 			case TGSI_TEXTURE_SHADOW1D:
@@ -8135,7 +8233,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 			switch (inst->Texture.Texture) {
 			case TGSI_TEXTURE_3D:
 				offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
-				/* fallthrough */
+				FALLTHROUGH;
 			case TGSI_TEXTURE_2D:
 			case TGSI_TEXTURE_SHADOW2D:
 			case TGSI_TEXTURE_RECT:
@@ -8143,7 +8241,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 			case TGSI_TEXTURE_2D_ARRAY:
 			case TGSI_TEXTURE_SHADOW2D_ARRAY:
 				offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
-				/* fallthrough */
+				FALLTHROUGH;
 			case TGSI_TEXTURE_1D:
 			case TGSI_TEXTURE_SHADOW1D:
 			case TGSI_TEXTURE_1D_ARRAY:
@@ -10346,7 +10444,7 @@ static inline int callstack_update_max_depth(struct r600_shader_ctx *ctx,
 		 * elements */
 		elements += 2;
 
-		/* fallthrough */
+		FALLTHROUGH;
 		/* FIXME: do the two elements added above cover the cases for the
 		 * r8xx+ below? */
 
@@ -11050,6 +11148,76 @@ static int egcm_u64add(struct r600_shader_ctx *ctx)
 	return 0;
 }
 
+
+static int egcm_i64neg(struct r600_shader_ctx *ctx)
+{
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+	struct r600_bytecode_alu alu;
+	int r;
+	int treg = ctx->temp_reg;
+	const int op = ALU_OP2_SUB_INT;
+	const int opc = ALU_OP2_SUBB_UINT;
+
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+	alu.op = op;            ;
+	alu.dst.sel = treg;
+	alu.dst.chan = 0;
+	alu.dst.write = 1;
+	alu.src[0].sel = V_SQ_ALU_SRC_0;
+	r600_bytecode_src(&alu.src[1], &ctx->src[0], 0);
+	alu.src[1].neg = 0;
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
+	if (r)
+		return r;
+
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+	alu.op = op;
+	alu.dst.sel = treg;
+	alu.dst.chan = 1;
+	alu.dst.write = 1;
+	alu.src[0].sel = V_SQ_ALU_SRC_0;
+	r600_bytecode_src(&alu.src[1], &ctx->src[0], 1);
+	alu.src[1].neg = 0;
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
+	if (r)
+		return r;
+
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+	alu.op = opc              ;
+	alu.dst.sel = treg;
+	alu.dst.chan = 2;
+	alu.dst.write = 1;
+	alu.last = 1;
+	alu.src[0].sel = V_SQ_ALU_SRC_0;
+	r600_bytecode_src(&alu.src[1], &ctx->src[0], 0);
+	alu.src[1].neg = 0;
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
+	if (r)
+		return r;
+
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+	alu.op = op;
+	tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
+	alu.src[0].sel = treg;
+	alu.src[0].chan = 1;
+	alu.src[1].sel = treg;
+	alu.src[1].chan = 2;
+	alu.last = 1;
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
+	if (r)
+		return r;
+	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+	alu.op = ALU_OP1_MOV;
+	tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+	alu.src[0].sel = treg;
+	alu.src[0].chan = 0;
+	alu.last = 1;
+	r = r600_bytecode_add_alu(ctx->bc, &alu);
+	if (r)
+		return r;
+	return 0;
+}
+
 /* result.y = mul_high a, b
    result.x = mul a,b
    result.y += a.x * b.y + a.y * b.x;
@@ -12007,6 +12175,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
 	[TGSI_OPCODE_U64ADD]    = { ALU_OP0_NOP, egcm_u64add },
 	[TGSI_OPCODE_U64MUL]    = { ALU_OP0_NOP, egcm_u64mul },
 	[TGSI_OPCODE_U64DIV]    = { ALU_OP0_NOP, egcm_u64div },
+	[TGSI_OPCODE_I64NEG]    = { ALU_OP0_NOP, egcm_i64neg },
 	[TGSI_OPCODE_LAST]	= { ALU_OP0_NOP, tgsi_unsupported},
 };
 
@@ -12233,5 +12402,6 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
 	[TGSI_OPCODE_U64ADD]    = { ALU_OP0_NOP, egcm_u64add },
 	[TGSI_OPCODE_U64MUL]    = { ALU_OP0_NOP, egcm_u64mul },
 	[TGSI_OPCODE_U64DIV]    = { ALU_OP0_NOP, egcm_u64div },
+	[TGSI_OPCODE_I64NEG]    = { ALU_OP0_NOP, egcm_i64neg },
 	[TGSI_OPCODE_LAST]	= { ALU_OP0_NOP, tgsi_unsupported},
 };
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_shader.h b/lib/mesa/src/gallium/drivers/r600/r600_shader.h
index 7dffd592a..8acd9a3af 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_shader.h
+++ b/lib/mesa/src/gallium/drivers/r600/r600_shader.h
@@ -72,8 +72,8 @@ struct r600_shader {
 	unsigned                nhwatomic;
 	unsigned		nlds;
 	unsigned		nsys_inputs;
-	struct r600_shader_io	input[64];
-	struct r600_shader_io	output[64];
+	struct r600_shader_io	input[PIPE_MAX_SHADER_INPUTS];
+	struct r600_shader_io	output[PIPE_MAX_SHADER_OUTPUTS];
 	struct r600_shader_atomic atomics[8];
 	unsigned                nhwatomic_ranges;
 	boolean			uses_kill;
@@ -136,6 +136,8 @@ union r600_shader_key {
 		unsigned        image_size_const_offset:5;
 		unsigned	color_two_side:1;
 		unsigned	alpha_to_one:1;
+		unsigned        apply_sample_id_mask:1;
+		unsigned        dual_source_blend:1;
 	} ps;
 	struct {
 		unsigned	prim_id_out:8;
@@ -191,6 +193,10 @@ int eg_get_interpolator_index(unsigned interpolate, unsigned location);
 
 int r600_get_lds_unique_index(unsigned semantic_name, unsigned index);
 
+int generate_gs_copy_shader(struct r600_context *rctx,
+                            struct r600_pipe_shader *gs,
+                            struct pipe_stream_output_info *so);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_state.c b/lib/mesa/src/gallium/drivers/r600/r600_state.c
index b20a9d2a2..6eb2bd42b 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_state.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_state.c
@@ -246,7 +246,7 @@ bool r600_is_format_supported(struct pipe_screen *screen,
 
 static void r600_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a;
 	float offset_units = state->offset_units;
 	float offset_scale = state->offset_scale;
@@ -415,11 +415,11 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
 	dsa->valuemask[1] = state->stencil[1].valuemask;
 	dsa->writemask[0] = state->stencil[0].writemask;
 	dsa->writemask[1] = state->stencil[1].writemask;
-	dsa->zwritemask = state->depth.writemask;
+	dsa->zwritemask = state->depth_writemask;
 
-	db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
-		S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
-		S_028800_ZFUNC(state->depth.func);
+	db_depth_control = S_028800_Z_ENABLE(state->depth_enabled) |
+		S_028800_Z_WRITE_ENABLE(state->depth_writemask) |
+		S_028800_ZFUNC(state->depth_func);
 
 	/* stencil */
 	if (state->stencil[0].enabled) {
@@ -441,10 +441,10 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
 	/* alpha */
 	alpha_test_control = 0;
 	alpha_ref = 0;
-	if (state->alpha.enabled) {
-		alpha_test_control = S_028410_ALPHA_FUNC(state->alpha.func);
+	if (state->alpha_enabled) {
+		alpha_test_control = S_028410_ALPHA_FUNC(state->alpha_func);
 		alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1);
-		alpha_ref = fui(state->alpha.ref_value);
+		alpha_ref = fui(state->alpha_ref_value);
 	}
 	dsa->sx_alpha_test_control = alpha_test_control & 0xff;
 	dsa->alpha_ref = alpha_ref;
@@ -520,15 +520,13 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
 	}
 
 	spi_interp = S_0286D4_FLAT_SHADE_ENA(1);
-	if (state->sprite_coord_enable) {
-		spi_interp |= S_0286D4_PNT_SPRITE_ENA(1) |
-			      S_0286D4_PNT_SPRITE_OVRD_X(2) |
-			      S_0286D4_PNT_SPRITE_OVRD_Y(3) |
-			      S_0286D4_PNT_SPRITE_OVRD_Z(0) |
-			      S_0286D4_PNT_SPRITE_OVRD_W(1);
-		if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
-			spi_interp |= S_0286D4_PNT_SPRITE_TOP_1(1);
-		}
+	spi_interp |= S_0286D4_PNT_SPRITE_ENA(1) |
+		S_0286D4_PNT_SPRITE_OVRD_X(2) |
+		S_0286D4_PNT_SPRITE_OVRD_Y(3) |
+		S_0286D4_PNT_SPRITE_OVRD_Z(0) |
+		S_0286D4_PNT_SPRITE_OVRD_W(1);
+	if (state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT) {
+		spi_interp |= S_0286D4_PNT_SPRITE_TOP_1(1);
 	}
 
 	r600_store_context_reg_seq(&rs->buffer, R_028A00_PA_SU_POINT_SIZE, 3);
@@ -757,11 +755,11 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
 	view->tex_resource_words[1] = (S_038004_TEX_HEIGHT(height - 1) |
 				       S_038004_TEX_DEPTH(depth - 1) |
 				       S_038004_DATA_FORMAT(format));
-	view->tex_resource_words[2] = tmp->surface.u.legacy.level[offset_level].offset >> 8;
+	view->tex_resource_words[2] = tmp->surface.u.legacy.level[offset_level].offset_256B;
 	if (offset_level >= tmp->resource.b.b.last_level) {
-		view->tex_resource_words[3] = tmp->surface.u.legacy.level[offset_level].offset >> 8;
+		view->tex_resource_words[3] = tmp->surface.u.legacy.level[offset_level].offset_256B;
 	} else {
-		view->tex_resource_words[3] = tmp->surface.u.legacy.level[offset_level + 1].offset >> 8;
+		view->tex_resource_words[3] = tmp->surface.u.legacy.level[offset_level + 1].offset_256B;
 	}
 	view->tex_resource_words[4] = (word4 |
 				       S_038010_REQUEST_SIZE(1) |
@@ -792,7 +790,7 @@ r600_create_sampler_view(struct pipe_context *ctx,
 
 static void r600_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct pipe_clip_state *state = &rctx->clip_state.state;
 
 	radeon_set_context_reg_seq(cs, R_028E20_PA_CL_UCP0_X, 6*4);
@@ -826,7 +824,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
 		assert(rtex);
 	}
 
-	offset = rtex->surface.u.legacy.level[level].offset;
+	offset = (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256;
 	color_view = S_028080_SLICE_START(surf->base.u.tex.first_layer) |
 		     S_028080_SLICE_MAX(surf->base.u.tex.last_layer);
 
@@ -910,7 +908,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
 		S_0280A0_NUMBER_TYPE(ntype) |
 		S_0280A0_ENDIAN(endian);
 
-	/* EXPORT_NORM is an optimzation that can be enabled for better
+	/* EXPORT_NORM is an optimization that can be enabled for better
 	 * performance in certain cases
 	 */
 	if (rctx->b.chip_class == R600) {
@@ -984,7 +982,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
 		/* CMASK. */
 		if (!rctx->dummy_cmask ||
 		    rctx->dummy_cmask->b.b.width0 < cmask.size ||
-		    rctx->dummy_cmask->buf->alignment % cmask.alignment != 0) {
+		    (1 << rctx->dummy_cmask->buf->alignment_log2) % cmask.alignment != 0) {
 			struct pipe_transfer *transfer;
 			void *ptr;
 
@@ -1000,7 +998,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
 			}
 
 			/* Set the contents to 0xCC. */
-			ptr = pipe_buffer_map(&rctx->b.b, &rctx->dummy_cmask->b.b, PIPE_TRANSFER_WRITE, &transfer);
+			ptr = pipe_buffer_map(&rctx->b.b, &rctx->dummy_cmask->b.b, PIPE_MAP_WRITE, &transfer);
 			memset(ptr, 0xCC, cmask.size);
 			pipe_buffer_unmap(&rctx->b.b, transfer);
 		}
@@ -1009,7 +1007,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
 		/* FMASK. */
 		if (!rctx->dummy_fmask ||
 		    rctx->dummy_fmask->b.b.width0 < fmask.size ||
-		    rctx->dummy_fmask->buf->alignment % fmask.alignment != 0) {
+		    (1 << rctx->dummy_fmask->buf->alignment_log2) % fmask.alignment != 0) {
 			r600_resource_reference(&rctx->dummy_fmask, NULL);
 			rctx->dummy_fmask = (struct r600_resource*)
 				r600_aligned_buffer_create(&rscreen->b.b, 0,
@@ -1043,7 +1041,7 @@ static void r600_init_depth_surface(struct r600_context *rctx,
 	unsigned level, pitch, slice, format, offset, array_mode;
 
 	level = surf->base.u.tex.level;
-	offset = rtex->surface.u.legacy.level[level].offset;
+	offset = (uint64_t)rtex->surface.u.legacy.level[level].offset_256B * 256;
 	pitch = rtex->surface.u.legacy.level[level].nblk_x / 8 - 1;
 	slice = (rtex->surface.u.legacy.level[level].nblk_x * rtex->surface.u.legacy.level[level].nblk_y) / 64;
 	if (slice) {
@@ -1284,7 +1282,7 @@ static void r600_get_sample_position(struct pipe_context *ctx,
 
 static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	unsigned max_dist = 0;
 
 	if (rctx->b.family == CHIP_R600) {
@@ -1351,7 +1349,7 @@ static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples)
 
 static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct pipe_framebuffer_state *state = &rctx->framebuffer.state;
 	unsigned nr_cbufs = state->nr_cbufs;
 	struct r600_surface **cb = (struct r600_surface**)&state->cbufs[0];
@@ -1517,7 +1515,7 @@ static void r600_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
 
 static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
 
 	if (G_028808_SPECIAL_OP(a->cb_color_control) == V_028808_SPECIAL_RESOLVE_BOX) {
@@ -1547,7 +1545,7 @@ static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom
 
 static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_db_state *a = (struct r600_db_state*)atom;
 
 	if (a->rsurf && a->rsurf->db_htile_surface) {
@@ -1568,7 +1566,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom
 
 static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom;
 	unsigned db_render_control = 0;
 	unsigned db_render_override =
@@ -1653,7 +1651,7 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
 
 static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_config_state *a = (struct r600_config_state*)atom;
 
 	radeon_set_config_reg(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, a->sq_gpr_resource_mgmt_1);
@@ -1662,7 +1660,7 @@ static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *
 
 static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	uint32_t dirty_mask = rctx->vertex_buffer_state.dirty_mask;
 
 	while (dirty_mask) {
@@ -1702,7 +1700,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
 				       unsigned reg_alu_constbuf_size,
 				       unsigned reg_alu_const_cache)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	uint32_t dirty_mask = state->dirty_mask;
 
 	while (dirty_mask) {
@@ -1776,7 +1774,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
 				    struct r600_samplerview_state *state,
 				    unsigned resource_id_base)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	uint32_t dirty_mask = state->dirty_mask;
 
 	while (dirty_mask) {
@@ -1823,7 +1821,7 @@ static void r600_emit_sampler_states(struct r600_context *rctx,
 				unsigned resource_id_base,
 				unsigned border_color_reg)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	uint32_t dirty_mask = texinfo->states.dirty_mask;
 
 	while (dirty_mask) {
@@ -1883,7 +1881,7 @@ static void r600_emit_ps_sampler_states(struct r600_context *rctx, struct r600_a
 
 static void r600_emit_seamless_cube_map(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	unsigned tmp;
 
 	tmp = S_009508_DISABLE_CUBE_ANISO(1) |
@@ -1901,13 +1899,13 @@ static void r600_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a
 	struct r600_sample_mask *s = (struct r600_sample_mask*)a;
 	uint8_t mask = s->sample_mask;
 
-	radeon_set_context_reg(rctx->b.gfx.cs, R_028C48_PA_SC_AA_MASK,
+	radeon_set_context_reg(&rctx->b.gfx.cs, R_028C48_PA_SC_AA_MASK,
 			       mask | (mask << 8) | (mask << 16) | (mask << 24));
 }
 
 static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_cso_state *state = (struct r600_cso_state*)a;
 	struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
 
@@ -1923,7 +1921,7 @@ static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600
 
 static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a;
 
 	uint32_t v2 = 0, primid = 0;
@@ -1958,7 +1956,7 @@ static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom
 
 static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a;
 	struct r600_resource *rbuffer;
 
@@ -2474,8 +2472,9 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 				rctx->rasterizer && rctx->rasterizer->flatshade))
 			tmp |= S_028644_FLAT_SHADE(1);
 
-		if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
-		    sprite_coord_enable & (1 << rshader->input[i].sid)) {
+		if (rshader->input[i].name == TGSI_SEMANTIC_PCOORD ||
+		    (rshader->input[i].name == TGSI_SEMANTIC_TEXCOORD &&
+		     sprite_coord_enable & (1 << rshader->input[i].sid))) {
 			tmp |= S_028644_PT_SPRITE_TEX(1);
 		}
 
@@ -2777,8 +2776,8 @@ void *r600_create_db_flush_dsa(struct r600_context *rctx)
 	memset(&dsa, 0, sizeof(dsa));
 
 	if (quirk) {
-		dsa.depth.enabled = 1;
-		dsa.depth.func = PIPE_FUNC_LEQUAL;
+		dsa.depth_enabled = 1;
+		dsa.depth_func = PIPE_FUNC_LEQUAL;
 		dsa.stencil[0].enabled = 1;
 		dsa.stencil[0].func = PIPE_FUNC_ALWAYS;
 		dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_KEEP;
@@ -2855,7 +2854,7 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
 				unsigned pitch,
 				unsigned bpp)
 {
-	struct radeon_cmdbuf *cs = rctx->b.dma.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.dma.cs;
 	struct r600_texture *rsrc = (struct r600_texture*)src;
 	struct r600_texture *rdst = (struct r600_texture*)dst;
 	unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size;
@@ -2885,8 +2884,8 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
 		x = src_x;
 		y = src_y;
 		z = src_z;
-		base = rsrc->surface.u.legacy.level[src_level].offset;
-		addr = rdst->surface.u.legacy.level[dst_level].offset;
+		base = (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256;
+		addr = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256;
 		addr += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z;
 		addr += dst_y * pitch + dst_x * bpp;
 	} else {
@@ -2904,8 +2903,8 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
 		x = dst_x;
 		y = dst_y;
 		z = dst_z;
-		base = rdst->surface.u.legacy.level[dst_level].offset;
-		addr = rsrc->surface.u.legacy.level[src_level].offset;
+		base = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256;
+		addr = (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256;
 		addr += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_z;
 		addr += src_y * pitch + src_x * bpp;
 	}
@@ -2959,7 +2958,7 @@ static void r600_dma_copy(struct pipe_context *ctx,
 	unsigned src_x, src_y;
 	unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
 
-	if (rctx->b.dma.cs == NULL) {
+	if (rctx->b.dma.cs.priv == NULL) {
 		goto fallback;
 	}
 
@@ -3008,10 +3007,10 @@ static void r600_dma_copy(struct pipe_context *ctx,
 		 *   dst_x/y == 0
 		 *   dst_pitch == src_pitch
 		 */
-		src_offset= rsrc->surface.u.legacy.level[src_level].offset;
+		src_offset= (uint64_t)rsrc->surface.u.legacy.level[src_level].offset_256B * 256;
 		src_offset += (uint64_t)rsrc->surface.u.legacy.level[src_level].slice_size_dw * 4 * src_box->z;
 		src_offset += src_y * src_pitch + src_x * bpp;
-		dst_offset = rdst->surface.u.legacy.level[dst_level].offset;
+		dst_offset = (uint64_t)rdst->surface.u.legacy.level[dst_level].offset_256B * 256;
 		dst_offset += (uint64_t)rdst->surface.u.legacy.level[dst_level].slice_size_dw * 4 * dst_z;
 		dst_offset += dst_y * dst_pitch + dst_x * bpp;
 		size = src_box->height * src_pitch;
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_state_common.c b/lib/mesa/src/gallium/drivers/r600/r600_state_common.c
index 4718286bd..2ded6c822 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_state_common.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_state_common.c
@@ -29,6 +29,7 @@
 #include "r600d.h"
 
 #include "util/format/u_format_s3tc.h"
+#include "util/u_draw.h"
 #include "util/u_index_modify.h"
 #include "util/u_memory.h"
 #include "util/u_upload_mgr.h"
@@ -37,6 +38,10 @@
 #include "tgsi/tgsi_scan.h"
 #include "tgsi/tgsi_ureg.h"
 
+#include "nir.h"
+#include "nir/nir_to_tgsi_info.h"
+#include "tgsi/tgsi_from_mesa.h"
+
 void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw)
 {
 	assert(!cb->buf);
@@ -72,12 +77,12 @@ void r600_init_atom(struct r600_context *rctx,
 
 void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	r600_emit_command_buffer(rctx->b.gfx.cs, ((struct r600_cso_state*)atom)->cb);
+	r600_emit_command_buffer(&rctx->b.gfx.cs, ((struct r600_cso_state*)atom)->cb);
 }
 
 void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_alphatest_state *a = (struct r600_alphatest_state*)atom;
 	unsigned alpha_ref = a->sx_alpha_ref;
 
@@ -245,7 +250,7 @@ static void r600_set_blend_color(struct pipe_context *ctx,
 
 void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct pipe_blend_color *state = &rctx->blend_color.state;
 
 	radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
@@ -257,7 +262,7 @@ void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
 
 void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_vgt_state *a = (struct r600_vgt_state *)atom;
 
 	radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, a->vgt_multi_prim_ib_reset_en);
@@ -281,17 +286,17 @@ static void r600_set_clip_state(struct pipe_context *ctx,
 }
 
 static void r600_set_stencil_ref(struct pipe_context *ctx,
-				 const struct r600_stencil_ref *state)
+				 const struct r600_stencil_ref state)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
 
-	rctx->stencil_ref.state = *state;
+	rctx->stencil_ref.state = state;
 	r600_mark_atom_dirty(rctx, &rctx->stencil_ref.atom);
 }
 
 void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_stencil_ref_state *a = (struct r600_stencil_ref_state*)atom;
 
 	radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
@@ -306,25 +311,25 @@ void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom)
 }
 
 static void r600_set_pipe_stencil_ref(struct pipe_context *ctx,
-				      const struct pipe_stencil_ref *state)
+				      const struct pipe_stencil_ref state)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
 	struct r600_dsa_state *dsa = (struct r600_dsa_state*)rctx->dsa_state.cso;
 	struct r600_stencil_ref ref;
 
-	rctx->stencil_ref.pipe_state = *state;
+	rctx->stencil_ref.pipe_state = state;
 
 	if (!dsa)
 		return;
 
-	ref.ref_value[0] = state->ref_value[0];
-	ref.ref_value[1] = state->ref_value[1];
+	ref.ref_value[0] = state.ref_value[0];
+	ref.ref_value[1] = state.ref_value[1];
 	ref.valuemask[0] = dsa->valuemask[0];
 	ref.valuemask[1] = dsa->valuemask[1];
 	ref.writemask[0] = dsa->writemask[0];
 	ref.writemask[1] = dsa->writemask[1];
 
-	r600_set_stencil_ref(ctx, &ref);
+	r600_set_stencil_ref(ctx, ref);
 }
 
 static void r600_bind_dsa_state(struct pipe_context *ctx, void *state)
@@ -357,7 +362,7 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, void *state)
 		}
 	}
 
-	r600_set_stencil_ref(ctx, &ref);
+	r600_set_stencil_ref(ctx, ref);
 
 	/* Update alphatest state. */
 	if (rctx->alphatest_state.sx_alpha_test_control != dsa->sx_alpha_test_control ||
@@ -562,6 +567,8 @@ void r600_vertex_buffers_dirty(struct r600_context *rctx)
 
 static void r600_set_vertex_buffers(struct pipe_context *ctx,
 				    unsigned start_slot, unsigned count,
+				    unsigned unbind_num_trailing_slots,
+				    bool take_ownership,
 				    const struct pipe_vertex_buffer *input)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
@@ -582,7 +589,13 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx,
 				if (input[i].buffer.resource) {
 					vb[i].stride = input[i].stride;
 					vb[i].buffer_offset = input[i].buffer_offset;
-					pipe_resource_reference(&vb[i].buffer.resource, input[i].buffer.resource);
+					if (take_ownership) {
+						pipe_resource_reference(&vb[i].buffer.resource, NULL);
+						vb[i].buffer.resource = input[i].buffer.resource;
+					} else {
+						pipe_resource_reference(&vb[i].buffer.resource,
+									input[i].buffer.resource);
+					}
 					new_buffer_mask |= 1 << i;
 					r600_context_add_resource_size(ctx, input[i].buffer.resource);
 				} else {
@@ -598,6 +611,11 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx,
 		disable_mask = ((1ull << count) - 1);
 	}
 
+	for (i = 0; i < unbind_num_trailing_slots; i++) {
+		pipe_resource_reference(&vb[count + i].buffer.resource, NULL);
+	}
+	disable_mask |= ((1ull << unbind_num_trailing_slots) - 1) << count;
+
 	disable_mask <<= start_slot;
 	new_buffer_mask <<= start_slot;
 
@@ -622,6 +640,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx,
 static void r600_set_sampler_views(struct pipe_context *pipe,
 				   enum pipe_shader_type shader,
 				   unsigned start, unsigned count,
+				   unsigned unbind_num_trailing_slots,
 				   struct pipe_sampler_view **views)
 {
 	struct r600_context *rctx = (struct r600_context *) pipe;
@@ -815,9 +834,12 @@ static inline void r600_shader_selector_key(const struct pipe_context *ctx,
 				      rctx->rasterizer && rctx->rasterizer->multisample_enable &&
 				      !rctx->framebuffer.cb0_is_integer;
 		key->ps.nr_cbufs = rctx->framebuffer.state.nr_cbufs;
+                key->ps.apply_sample_id_mask = (rctx->ps_iter_samples > 1) || !rctx->rasterizer->multisample_enable;
 		/* Dual-source blending only makes sense with nr_cbufs == 1. */
-		if (key->ps.nr_cbufs == 1 && rctx->dual_src_blend)
+		if (key->ps.nr_cbufs == 1 && rctx->dual_src_blend) {
 			key->ps.nr_cbufs = 2;
+			key->ps.dual_source_blend = 1;
+		}
 		break;
 	}
 	case PIPE_SHADER_TESS_EVAL:
@@ -906,14 +928,19 @@ int r600_shader_select(struct pipe_context *ctx,
 }
 
 struct r600_pipe_shader_selector *r600_create_shader_state_tokens(struct pipe_context *ctx,
-								  const struct tgsi_token *tokens,
+								  const void *prog, enum pipe_shader_ir ir,
 								  unsigned pipe_shader_type)
 {
 	struct r600_pipe_shader_selector *sel = CALLOC_STRUCT(r600_pipe_shader_selector);
 
 	sel->type = pipe_shader_type;
-	sel->tokens = tgsi_dup_tokens(tokens);
-	tgsi_scan_shader(tokens, &sel->info);
+	if (ir == PIPE_SHADER_IR_TGSI) {
+		sel->tokens = tgsi_dup_tokens((const struct tgsi_token *)prog);
+		tgsi_scan_shader(sel->tokens, &sel->info);
+	} else if (ir == PIPE_SHADER_IR_NIR){
+		sel->nir = nir_shader_clone(NULL, (const nir_shader *)prog);
+		nir_tgsi_scan_shader(sel->nir, &sel->info, true);
+	}
 	return sel;
 }
 
@@ -922,8 +949,16 @@ static void *r600_create_shader_state(struct pipe_context *ctx,
 			       unsigned pipe_shader_type)
 {
 	int i;
-	struct r600_pipe_shader_selector *sel = r600_create_shader_state_tokens(ctx, state->tokens, pipe_shader_type);
-
+	struct r600_pipe_shader_selector *sel;
+	
+	if (state->type == PIPE_SHADER_IR_TGSI)
+		sel = r600_create_shader_state_tokens(ctx, state->tokens, state->type, pipe_shader_type);
+	else if (state->type == PIPE_SHADER_IR_NIR) {
+		sel = r600_create_shader_state_tokens(ctx, state->ir.nir, state->type, pipe_shader_type);
+	} else
+		assert(0 && "Unknown shader type\n");
+	
+	sel->ir_type = state->type;
 	sel->so = state->stream_output;
 
 	switch (pipe_shader_type) {
@@ -1082,7 +1117,14 @@ void r600_delete_shader_selector(struct pipe_context *ctx,
 		p = c;
 	}
 
-	free(sel->tokens);
+	if (sel->ir_type == PIPE_SHADER_IR_TGSI) {
+		free(sel->tokens);
+		/* We might have converted the TGSI shader to a NIR shader */
+		if (sel->nir)
+			ralloc_free(sel->nir);
+	}
+	else if (sel->ir_type == PIPE_SHADER_IR_NIR)
+		ralloc_free(sel->nir);
 	free(sel);
 }
 
@@ -1159,6 +1201,7 @@ void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf
 
 static void r600_set_constant_buffer(struct pipe_context *ctx,
 				     enum pipe_shader_type shader, uint index,
+				     bool take_ownership,
 				     const struct pipe_constant_buffer *input)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
@@ -1166,7 +1209,7 @@ static void r600_set_constant_buffer(struct pipe_context *ctx,
 	struct pipe_constant_buffer *cb;
 	const uint8_t *ptr;
 
-	/* Note that the state tracker can unbind constant buffers by
+	/* Note that the gallium frontend can unbind constant buffers by
 	 * passing NULL here.
 	 */
 	if (unlikely(!input || (!input->buffer && !input->user_buffer))) {
@@ -1209,7 +1252,12 @@ static void r600_set_constant_buffer(struct pipe_context *ctx,
 	} else {
 		/* Setup the hw buffer. */
 		cb->buffer_offset = input->buffer_offset;
-		pipe_resource_reference(&cb->buffer, input->buffer);
+		if (take_ownership) {
+			pipe_resource_reference(&cb->buffer, NULL);
+			cb->buffer = input->buffer;
+		} else {
+			pipe_resource_reference(&cb->buffer, input->buffer);
+		}
 		r600_context_add_resource_size(ctx, input->buffer);
 	}
 
@@ -1315,7 +1363,7 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on
 		cb.user_buffer = ptr;
 		cb.buffer_offset = 0;
 		cb.buffer_size = size;
-		rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, &cb);
+		rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, false, &cb);
 		pipe_resource_reference(&cb.buffer, NULL);
 	}
 }
@@ -1504,21 +1552,21 @@ static void update_gs_block_state(struct r600_context *rctx, unsigned enable)
 
 		if (enable) {
 			r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_GEOMETRY,
-					R600_GS_RING_CONST_BUFFER, &rctx->gs_rings.esgs_ring);
+					R600_GS_RING_CONST_BUFFER, false, &rctx->gs_rings.esgs_ring);
 			if (rctx->tes_shader) {
 				r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL,
-							 R600_GS_RING_CONST_BUFFER, &rctx->gs_rings.gsvs_ring);
+							 R600_GS_RING_CONST_BUFFER, false, &rctx->gs_rings.gsvs_ring);
 			} else {
 				r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
-							 R600_GS_RING_CONST_BUFFER, &rctx->gs_rings.gsvs_ring);
+							 R600_GS_RING_CONST_BUFFER, false, &rctx->gs_rings.gsvs_ring);
 			}
 		} else {
 			r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_GEOMETRY,
-					R600_GS_RING_CONST_BUFFER, NULL);
+					R600_GS_RING_CONST_BUFFER, false, NULL);
 			r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
-					R600_GS_RING_CONST_BUFFER, NULL);
+					R600_GS_RING_CONST_BUFFER, false, NULL);
 			r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL,
-					R600_GS_RING_CONST_BUFFER, NULL);
+					R600_GS_RING_CONST_BUFFER, false, NULL);
 		}
 	}
 }
@@ -1638,7 +1686,7 @@ void r600_setup_scratch_area_for_shader(struct r600_context *rctx,
 	if (scratch->dirty ||
 		unlikely(shader->scratch_space_needed != scratch->item_size ||
 		size > scratch->size)) {
-		struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+		struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 
 		scratch->dirty = false;
 
@@ -1846,7 +1894,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
 	 * to LS slots and won't reflect what is dirty as VS stage even if the
 	 * TES didn't overwrite it. The story for re-enabled TES is similar.
 	 * In any case, we're not allowed to submit any TES state when
-	 * TES is disabled (the state tracker may not do this but this looks
+	 * TES is disabled (the gallium frontend may not do this but this looks
 	 * like an optimization to me, not something which can be relied on).
 	 */
 
@@ -1982,7 +2030,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
 
 void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_clip_misc_state *state = &rctx->clip_misc_state;
 
 	radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
@@ -2002,7 +2050,7 @@ void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom
 /* rast_prim is the primitive type after GS. */
 static inline void r600_emit_rasterizer_prim_state(struct r600_context *rctx)
 {
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	enum pipe_prim_type rast_prim = rctx->current_rast_prim;
 
 	/* Skip this if not rendering lines. */
@@ -2025,21 +2073,35 @@ static inline void r600_emit_rasterizer_prim_state(struct r600_context *rctx)
 	rctx->last_rast_prim = rast_prim;
 }
 
-static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
+static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info,
+                          const struct pipe_draw_indirect_info *indirect,
+                          const struct pipe_draw_start_count *draws,
+                          unsigned num_draws)
 {
+	if (num_draws > 1) {
+		util_draw_multi(ctx, info, indirect, draws, num_draws);
+		return;
+	}
+
 	struct r600_context *rctx = (struct r600_context *)ctx;
-	struct pipe_resource *indexbuf = info->has_user_indices ? NULL : info->index.resource;
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct pipe_resource *indexbuf = !info->index_size || info->has_user_indices ? NULL : info->index.resource;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	bool render_cond_bit = rctx->b.render_cond && !rctx->b.render_cond_force_off;
-	bool has_user_indices = info->has_user_indices;
+	bool has_user_indices = info->index_size && info->has_user_indices;
 	uint64_t mask;
 	unsigned num_patches, dirty_tex_counter, index_offset = 0;
 	unsigned index_size = info->index_size;
 	int index_bias;
 	struct r600_shader_atomic combined_atomics[8];
-	uint8_t atomic_used_mask;
+	uint8_t atomic_used_mask = 0;
+	struct pipe_stream_output_target *count_from_so = NULL;
+
+	if (indirect && indirect->count_from_stream_output) {
+		count_from_so = indirect->count_from_stream_output;
+		indirect = NULL;
+	}
 
-	if (!info->indirect && !info->count && (index_size || !info->count_from_stream_output)) {
+	if (!indirect && !draws[0].count && (index_size || !count_from_so)) {
 		return;
 	}
 
@@ -2054,7 +2116,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 	}
 
 	/* make sure that the gfx ring is only one active */
-	if (radeon_emitted(rctx->b.dma.cs, 0)) {
+	if (radeon_emitted(&rctx->b.dma.cs, 0)) {
 		rctx->b.dma.flush(rctx, PIPE_FLUSH_ASYNC, NULL);
 	}
 
@@ -2101,7 +2163,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 	}
 
 	if (index_size) {
-		index_offset += info->start * index_size;
+		index_offset += draws[0].start * index_size;
 
 		/* Translate 8-bit indices to 16-bit. */
 		if (unlikely(index_size == 1)) {
@@ -2110,17 +2172,17 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 			void *ptr;
 			unsigned start, count;
 
-			if (likely(!info->indirect)) {
+			if (likely(!indirect)) {
 				start = 0;
-				count = info->count;
+				count = draws[0].count;
 			}
 			else {
 				/* Have to get start/count from indirect buffer, slow path ahead... */
-				struct r600_resource *indirect_resource = (struct r600_resource *)info->indirect->buffer;
+				struct r600_resource *indirect_resource = (struct r600_resource *)indirect->buffer;
 				unsigned *data = r600_buffer_map_sync_with_rings(&rctx->b, indirect_resource,
-					PIPE_TRANSFER_READ);
+					PIPE_MAP_READ);
 				if (data) {
-					data += info->indirect->offset / sizeof(unsigned);
+					data += indirect->offset / sizeof(unsigned);
 					start = data[2] * index_size;
 					count = data[0];
 				}
@@ -2149,25 +2211,28 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		 * and the indices are emitted via PKT3_DRAW_INDEX_IMMD.
 		 * Indirect draws never use immediate indices.
 		 * Note: Instanced rendering in combination with immediate indices hangs. */
-		if (has_user_indices && (R600_BIG_ENDIAN || info->indirect ||
+		if (has_user_indices && (R600_BIG_ENDIAN || indirect ||
 						 info->instance_count > 1 ||
-						 info->count*index_size > 20)) {
+						 draws[0].count*index_size > 20)) {
+			unsigned start_offset = draws[0].start * index_size;
 			indexbuf = NULL;
-			u_upload_data(ctx->stream_uploader, 0,
-                                      info->count * index_size, 256,
-				      info->index.user, &index_offset, &indexbuf);
+			u_upload_data(ctx->stream_uploader, start_offset,
+                                      draws[0].count * index_size, 256,
+				      (char*)info->index.user + start_offset,
+				      &index_offset, &indexbuf);
+			index_offset -= start_offset;
 			has_user_indices = false;
 		}
 		index_bias = info->index_bias;
 	} else {
-		index_bias = info->start;
+		index_bias = indirect ? 0 : draws[0].start;
 	}
 
 	/* Set the index offset and primitive restart. */
 	if (rctx->vgt_state.vgt_multi_prim_ib_reset_en != info->primitive_restart ||
 	    rctx->vgt_state.vgt_multi_prim_ib_reset_indx != info->restart_index ||
 	    rctx->vgt_state.vgt_indx_offset != index_bias ||
-	    (rctx->vgt_state.last_draw_was_indirect && !info->indirect)) {
+	    (rctx->vgt_state.last_draw_was_indirect && !indirect)) {
 		rctx->vgt_state.vgt_multi_prim_ib_reset_en = info->primitive_restart;
 		rctx->vgt_state.vgt_multi_prim_ib_reset_indx = info->restart_index;
 		rctx->vgt_state.vgt_indx_offset = index_bias;
@@ -2247,7 +2312,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 	}
 
 	/* Update start instance. */
-	if (!info->indirect && rctx->last_start_instance != info->start_instance) {
+	if (!indirect && rctx->last_start_instance != info->start_instance) {
 		radeon_set_ctl_const(cs, R_03CFF4_SQ_VTX_START_INST_LOC, info->start_instance);
 		rctx->last_start_instance = info->start_instance;
 	}
@@ -2262,11 +2327,11 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 	}
 
 	/* Draw packets. */
-	if (likely(!info->indirect)) {
+	if (likely(!indirect)) {
 		radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
 		radeon_emit(cs, info->instance_count);
 	} else {
-		uint64_t va = r600_resource(info->indirect->buffer)->gpu_address;
+		uint64_t va = r600_resource(indirect->buffer)->gpu_address;
 		assert(rctx->b.chip_class >= EVERGREEN);
 
 		// Invalidate so non-indirect draw calls reset this state
@@ -2280,7 +2345,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
 		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
-							  (struct r600_resource*)info->indirect->buffer,
+							  (struct r600_resource*)indirect->buffer,
 							  RADEON_USAGE_READ,
                                                           RADEON_PRIO_DRAW_INDIRECT));
 	}
@@ -2292,20 +2357,20 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 				(VGT_INDEX_16 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_16_BIT : 0)));
 
 		if (has_user_indices) {
-			unsigned size_bytes = info->count*index_size;
+			unsigned size_bytes = draws[0].count*index_size;
 			unsigned size_dw = align(size_bytes, 4) / 4;
 			radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_IMMD, 1 + size_dw, render_cond_bit));
-			radeon_emit(cs, info->count);
+			radeon_emit(cs, draws[0].count);
 			radeon_emit(cs, V_0287F0_DI_SRC_SEL_IMMEDIATE);
-			radeon_emit_array(cs, info->index.user, size_dw);
+			radeon_emit_array(cs, info->index.user + draws[0].start * index_size, size_dw);
 		} else {
 			uint64_t va = r600_resource(indexbuf)->gpu_address + index_offset;
 
-			if (likely(!info->indirect)) {
+			if (likely(!indirect)) {
 				radeon_emit(cs, PKT3(PKT3_DRAW_INDEX, 3, render_cond_bit));
 				radeon_emit(cs, va);
 				radeon_emit(cs, (va >> 32UL) & 0xFF);
-				radeon_emit(cs, info->count);
+				radeon_emit(cs, draws[0].count);
 				radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA);
 				radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
 				radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
@@ -2330,13 +2395,13 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 				radeon_emit(cs, max_size);
 
 				radeon_emit(cs, PKT3(EG_PKT3_DRAW_INDEX_INDIRECT, 1, render_cond_bit));
-				radeon_emit(cs, info->indirect->offset);
+				radeon_emit(cs, indirect->offset);
 				radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA);
 			}
 		}
 	} else {
-		if (unlikely(info->count_from_stream_output)) {
-			struct r600_so_target *t = (struct r600_so_target*)info->count_from_stream_output;
+		if (unlikely(count_from_so)) {
+			struct r600_so_target *t = (struct r600_so_target*)count_from_so;
 			uint64_t va = t->buf_filled_size->gpu_address + t->buf_filled_size_offset;
 
 			radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw);
@@ -2354,16 +2419,16 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 								  RADEON_PRIO_SO_FILLED_SIZE));
 		}
 
-		if (likely(!info->indirect)) {
+		if (likely(!indirect)) {
 			radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit));
-			radeon_emit(cs, info->count);
+			radeon_emit(cs, draws[0].count);
 		}
 		else {
 			radeon_emit(cs, PKT3(EG_PKT3_DRAW_INDIRECT, 1, render_cond_bit));
-			radeon_emit(cs, info->indirect->offset);
+			radeon_emit(cs, indirect->offset);
 		}
 		radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX |
-				(info->count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0));
+				(count_from_so ? S_0287F0_USE_OPAQUE(1) : 0));
 	}
 
 	/* SMX returns CONTEXT_DONE too early workaround */
@@ -2549,7 +2614,7 @@ bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
 void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a)
 {
 
-	struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
+	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
 	struct r600_pipe_shader *shader = ((struct r600_shader_state*)a)->shader;
 
 	if (!shader)
@@ -2757,6 +2822,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen,
 		case PIPE_FORMAT_RGTC1_SNORM:
 		case PIPE_FORMAT_LATC1_SNORM:
 			word4 |= sign_bit[0];
+			FALLTHROUGH;
 		case PIPE_FORMAT_RGTC1_UNORM:
 		case PIPE_FORMAT_LATC1_UNORM:
 			result = FMT_BC4;
@@ -2764,6 +2830,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen,
 		case PIPE_FORMAT_RGTC2_SNORM:
 		case PIPE_FORMAT_LATC2_SNORM:
 			word4 |= sign_bit[0] | sign_bit[1];
+			FALLTHROUGH;
 		case PIPE_FORMAT_RGTC2_UNORM:
 		case PIPE_FORMAT_LATC2_UNORM:
 			result = FMT_BC5;
@@ -2809,7 +2876,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen,
 				goto out_word4;
 			case PIPE_FORMAT_BPTC_RGB_FLOAT:
 				word4 |= sign_bit[0] | sign_bit[1] | sign_bit[2];
-				/* fall through */
+				FALLTHROUGH;
 			case PIPE_FORMAT_BPTC_RGB_UFLOAT:
 				result = FMT_BC6;
 				goto out_word4;
diff --git a/lib/mesa/src/gallium/drivers/r600/r600_uvd.c b/lib/mesa/src/gallium/drivers/r600/r600_uvd.c
index 2e7d7ee4d..18ac073da 100644
--- a/lib/mesa/src/gallium/drivers/r600/r600_uvd.c
+++ b/lib/mesa/src/gallium/drivers/r600/r600_uvd.c
@@ -66,6 +66,8 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe,
 	struct pipe_video_buffer template;
 	struct pipe_resource templ;
 	unsigned i, array_size;
+	enum pipe_video_chroma_format chroma_format =
+		pipe_format_to_chroma_format(tmpl->buffer_format);
 
 	assert(pipe);
 
@@ -77,7 +79,8 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe,
 	template.width = align(tmpl->width, VL_MACROBLOCK_WIDTH);
 	template.height = align(tmpl->height / array_size, VL_MACROBLOCK_HEIGHT);
 
-	vl_video_buffer_template(&templ, &template, resource_formats[0], 1, array_size, PIPE_USAGE_DEFAULT, 0);
+	vl_video_buffer_template(&templ, &template, resource_formats[0], 1, array_size,
+									 PIPE_USAGE_DEFAULT, 0, chroma_format);
 	if (ctx->b.chip_class < EVERGREEN || tmpl->interlaced || !R600_UVD_ENABLE_TILING)
 		templ.bind = PIPE_BIND_LINEAR;
 	resources[0] = (struct r600_texture *)
@@ -86,7 +89,8 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe,
 		goto error;
 
 	if (resource_formats[1] != PIPE_FORMAT_NONE) {
-		vl_video_buffer_template(&templ, &template, resource_formats[1], 1, array_size, PIPE_USAGE_DEFAULT, 1);
+		vl_video_buffer_template(&templ, &template, resource_formats[1], 1, array_size,
+										 PIPE_USAGE_DEFAULT, 1, chroma_format);
 		if (ctx->b.chip_class < EVERGREEN || tmpl->interlaced || !R600_UVD_ENABLE_TILING)
 			templ.bind = PIPE_BIND_LINEAR;
 		resources[1] = (struct r600_texture *)
@@ -96,7 +100,8 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe,
 	}
 
 	if (resource_formats[2] != PIPE_FORMAT_NONE) {
-		vl_video_buffer_template(&templ, &template, resource_formats[2], 1, array_size, PIPE_USAGE_DEFAULT, 2);
+		vl_video_buffer_template(&templ, &template, resource_formats[2], 1, array_size,
+										 PIPE_USAGE_DEFAULT, 2, chroma_format);
 		if (ctx->b.chip_class < EVERGREEN || tmpl->interlaced || !R600_UVD_ENABLE_TILING)
 			templ.bind = PIPE_BIND_LINEAR;
 		resources[2] = (struct r600_texture *)
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc.h b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc.h
index e7231702d..ef2f39855 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_bc.h
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_bc.h
@@ -495,6 +495,15 @@ struct bc_alu_src {
 	unsigned abs:1;
 	unsigned rel:1;
 	literal value;
+
+	void clear() {
+		sel = 0;
+		chan = 0;
+		neg = 0;
+		abs = 0;
+		rel = 0;
+		value = 0;
+	}
 };
 
 struct bc_alu {
@@ -529,6 +538,31 @@ struct bc_alu {
 		this->op = op;
 		op_ptr = r600_isa_alu(op);
 	}
+	void clear() {
+		op_ptr = nullptr;
+		op = 0;
+		for (int i = 0; i < 3; ++i)
+			src[i].clear();
+		dst_gpr = 0;
+		dst_chan = 0;
+		dst_rel = 0;
+		clamp = 0;
+		omod = 0;
+		bank_swizzle = 0;
+		index_mode = 0;
+		last = 0;
+		pred_sel = 0;
+		fog_merge = 0;
+		write_mask = 0;
+		update_exec_mask = 0;
+		update_pred = 0;
+		slot = 0;
+		lds_idx_offset = 0;
+		slot_flags = AF_NONE;
+	}
+	bc_alu() {
+		clear();
+	}
 };
 
 struct bc_fetch {
@@ -658,7 +692,12 @@ public:
 	static unsigned dskip_mode;
 
 	sb_context() : src_stats(), opt_stats(), isa(0),
-			hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN) {}
+			hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN),
+			alu_temp_gprs(0), max_fetch(0), has_trans(false), vtx_src_num(0),
+			num_slots(0), uses_mova_gpr(false),
+			r6xx_gpr_index_workaround(false), stack_workaround_8xx(false),
+			stack_workaround_9xx(false), wavefront_size(0),
+			stack_entry_size(0) {}
 
 	int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass);
 
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_expr.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_expr.cpp
index 05674ff24..36361a251 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_expr.cpp
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_expr.cpp
@@ -326,7 +326,7 @@ void expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src,
 	const bc_alu_src &s = bc.src[src];
 
 	if (s.abs)
-		v = fabs(v.f);
+		v = fabsf(v.f);
 	if (s.neg)
 		v = -v.f;
 }
@@ -424,21 +424,21 @@ bool expr_handler::fold_alu_op1(alu_node& n) {
 	apply_alu_src_mod(n.bc, 0, cv);
 
 	switch (n.bc.op) {
-	case ALU_OP1_CEIL: dv = ceil(cv.f); break;
+	case ALU_OP1_CEIL: dv = ceilf(cv.f); break;
 	case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break;
-	case ALU_OP1_EXP_IEEE: dv = exp2(cv.f); break;
-	case ALU_OP1_FLOOR: dv = floor(cv.f); break;
+	case ALU_OP1_EXP_IEEE: dv = exp2f(cv.f); break;
+	case ALU_OP1_FLOOR: dv = floorf(cv.f); break;
 	case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ????
-	case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floor(cv.f); break;
-	case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floor(cv.f + 0.5f); break;
-	case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)trunc(cv.f); break;
+	case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floorf(cv.f); break;
+	case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floorf(cv.f + 0.5f); break;
+	case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)truncf(cv.f); break;
 	case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break;
-	case ALU_OP1_FRACT: dv = cv.f - floor(cv.f); break;
+	case ALU_OP1_FRACT: dv = cv.f - floorf(cv.f); break;
 	case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break;
 	case ALU_OP1_LOG_CLAMPED:
 	case ALU_OP1_LOG_IEEE:
 		if (cv.f != 0.0f)
-			dv = log2(cv.f);
+			dv = log2f(cv.f);
 		else
 			// don't fold to NAN, let the GPU handle it for now
 			// (prevents degenerate LIT tests from failing)
@@ -454,7 +454,7 @@ bool expr_handler::fold_alu_op1(alu_node& n) {
 	case ALU_OP1_PRED_SET_RESTORE: dv = cv; break;
 	case ALU_OP1_RECIPSQRT_CLAMPED:
 	case ALU_OP1_RECIPSQRT_FF:
-	case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrt(cv.f); break;
+	case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrtf(cv.f); break;
 	case ALU_OP1_RECIP_CLAMPED:
 	case ALU_OP1_RECIP_FF:
 	case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break;
@@ -462,8 +462,8 @@ bool expr_handler::fold_alu_op1(alu_node& n) {
 	case ALU_OP1_RECIP_UINT: dv.u = (1ull << 32) / cv.u; break;
 //	case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break;
 	case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break;
-	case ALU_OP1_SQRT_IEEE: dv = sqrt(cv.f); break;
-	case ALU_OP1_TRUNC: dv = trunc(cv.f); break;
+	case ALU_OP1_SQRT_IEEE: dv = sqrtf(cv.f); break;
+	case ALU_OP1_TRUNC: dv = truncf(cv.f); break;
 
 	default:
 		return false;
@@ -719,7 +719,7 @@ bool expr_handler::fold_assoc(alu_node *n) {
 			n->src[0] = n->src[2];
 			n->bc.src[0] = n->bc.src[2];
 			n->src[1] = sh.get_const_value(cr);
-			memset(&n->bc.src[1], 0, sizeof(bc_alu_src));
+			n->bc.src[1].clear();
 
 			n->src.resize(2);
 			n->bc.set_op(ALU_OP2_ADD);
@@ -729,7 +729,7 @@ bool expr_handler::fold_assoc(alu_node *n) {
 		n->bc.src[0] = a->bc.src[last_arg];
 		n->bc.src[0].neg ^= cur_neg;
 		n->src[1] = sh.get_const_value(cr);
-		memset(&n->bc.src[1], 0, sizeof(bc_alu_src));
+		n->bc.src[1].clear();
 	}
 
 	return false;
@@ -770,7 +770,7 @@ bool expr_handler::fold_alu_op2(alu_node& n) {
 			case ALU_OP2_ADD:  // (ADD x, x) => (MUL x, 2)
 				if (!sh.safe_math) {
 					n.src[1] = sh.get_const_value(2.0f);
-					memset(&n.bc.src[1], 0, sizeof(bc_alu_src));
+					n.bc.src[1].clear();
 					n.bc.set_op(ALU_OP2_MUL);
 					return fold_alu_op2(n);
 				}
@@ -1070,7 +1070,7 @@ bool expr_handler::fold_alu_op3(alu_node& n) {
 				}
 
 				n.src[1] = t;
-				memset(&n.bc.src[1], 0, sizeof(bc_alu_src));
+				n.bc.src[1].clear();
 
 				n.src.resize(2);
 
@@ -1101,7 +1101,7 @@ bool expr_handler::fold_alu_op3(alu_node& n) {
 				dv = cv0.f * cv1.f;
 				n.bc.set_op(ALU_OP2_ADD);
 				n.src[0] = sh.get_const_value(dv);
-				memset(&n.bc.src[0], 0, sizeof(bc_alu_src));
+				n.bc.src[0].clear();
 				n.src[1] = n.src[2];
 				n.bc.src[1] = n.bc.src[2];
 				n.src.resize(2);
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_if_conversion.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_if_conversion.cpp
index 017153434..48355e8d6 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_if_conversion.cpp
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_if_conversion.cpp
@@ -99,8 +99,8 @@ void if_conversion::convert_kill_instructions(region_node *r,
 			a->src[0] = cnd;
 			a->src[1] = sh.get_const_value(0);
 			// clear modifiers
-			memset(&a->bc.src[0], 0, sizeof(bc_alu_src));
-			memset(&a->bc.src[1], 0, sizeof(bc_alu_src));
+			a->bc.src[0].clear();
+			a->bc.src[1].clear();
 		} else {
 			// kill with constant 'false' condition, this shouldn't happen
 			// but remove it anyway
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h b/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h
index ef0fbd4e6..eecf17d28 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_ir.h
@@ -713,7 +713,8 @@ enum node_flags {
 	NF_SCHEDULE_EARLY = (1 << 9),
 
 	// for ALU_PUSH_BEFORE - when set, replace with PUSH + ALU
-	NF_ALU_STACK_WORKAROUND = (1 << 10)
+	NF_ALU_STACK_WORKAROUND = (1 << 10),
+	NF_ALU_2SLOT = (1 << 11),
 };
 
 inline node_flags operator |(node_flags l, node_flags r) {
@@ -929,7 +930,7 @@ public:
 	bool empty() { assert(first != NULL || first == last); return !first; }
 	unsigned count();
 
-	// used with node containers that represent shceduling queues
+	// used with node containers that represent scheduling queues
 	// ignores copies and takes into account alu_packed_node items
 	unsigned real_alu_count();
 
@@ -1012,7 +1013,7 @@ public:
 
 class alu_node : public node {
 protected:
-	alu_node() : node(NT_OP, NST_ALU_INST) { memset(&bc, 0, sizeof(bc_alu)); }
+	alu_node() : node(NT_OP, NST_ALU_INST) {  }
 public:
 	bc_alu bc;
 
@@ -1021,8 +1022,9 @@ public:
 	virtual bool fold_dispatch(expr_handler *ex);
 
 	unsigned forced_bank_swizzle() {
-		return ((bc.op_ptr->flags & AF_INTERP) && (bc.slot_flags == AF_4V)) ?
-				VEC_210 : 0;
+		return ((bc.op_ptr->flags & AF_INTERP) &&
+			((bc.slot_flags == AF_4V) ||
+			 (bc.slot_flags == AF_2V))) ? VEC_210 : 0;
 	}
 
 	// return param index + 1 if instruction references interpolation param,
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_init.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_init.cpp
index c557b8687..e14b187de 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_init.cpp
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_ra_init.cpp
@@ -313,24 +313,26 @@ int ra_init::run() {
 
 	alloc_arrays();
 
-	ra_node(sh.root);
-	return 0;
+	return ra_node(sh.root) ? 0 : 1;
 }
 
-void ra_init::ra_node(container_node* c) {
+bool ra_init::ra_node(container_node* c) {
 
 	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
 		node *n = *I;
 		if (n->type == NT_OP) {
-			process_op(n);
+			if (!process_op(n))
+                           return false;
 		}
 		if (n->is_container() && !n->is_alu_packed()) {
-			ra_node(static_cast<container_node*>(n));
+			if (!ra_node(static_cast<container_node*>(n)))
+                           return false;
 		}
 	}
+        return true;
 }
 
-void ra_init::process_op(node* n) {
+bool ra_init::process_op(node* n) {
 
 	bool copy = n->is_copy_mov();
 
@@ -355,7 +357,8 @@ void ra_init::process_op(node* n) {
 		for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) {
 			value *v = *I;
 			if (v && v->is_sgpr())
-				color(v);
+				if (!color(v))
+                                       return false;
 		}
 	}
 
@@ -372,10 +375,12 @@ void ra_init::process_op(node* n) {
 						assign_color(v, s->gpr);
 					}
 				} else
-					color(v);
+                                   if (!color(v))
+                                          return false;
 			}
 		}
 	}
+        return true;
 }
 
 void ra_init::color_bs_constraint(ra_constraint* c) {
@@ -476,15 +481,15 @@ void ra_init::color_bs_constraint(ra_constraint* c) {
 	}
 }
 
-void ra_init::color(value* v) {
+bool ra_init::color(value* v) {
 
 	if (v->constraint && v->constraint->kind == CK_PACKED_BS) {
 		color_bs_constraint(v->constraint);
-		return;
+		return true;
 	}
 
 	if (v->chunk && v->chunk->is_fixed())
-		return;
+		return true;
 
 	RA_DUMP(
 		sblog << "coloring ";
@@ -497,24 +502,24 @@ void ra_init::color(value* v) {
 	if (v->is_reg_pinned()) {
 		assert(v->is_chan_pinned());
 		assign_color(v, v->pin_gpr);
-		return;
+		return true;
 	}
 
 	regbits rb(sh, v->interferences);
 	sel_chan c;
 
 	if (v->is_chan_pinned()) {
-		RA_DUMP( sblog << "chan_pinned = " << v->pin_gpr.chan() << "  ";	);
 		unsigned mask = 1 << v->pin_gpr.chan();
 		c = rb.find_free_chans(mask) + v->pin_gpr.chan();
 	} else {
 		unsigned cm = get_preferable_chan_mask();
-		RA_DUMP( sblog << "pref chan mask: " << cm << "\n"; );
 		c = rb.find_free_chan_by_mask(cm);
-	}
+	}    
 
-	assert(c && c.sel() < 128 - ctx.alu_temp_gprs && "color failed");
+        if (!c || c.sel() >= 128 - ctx.alu_temp_gprs)
+           return false;
 	assign_color(v, c);
+        return true;
 }
 
 void ra_init::assign_color(value* v, sel_chan c) {
diff --git a/lib/mesa/src/gallium/drivers/r600/sb/sb_sched.cpp b/lib/mesa/src/gallium/drivers/r600/sb/sb_sched.cpp
index fe887c84c..2d5fbfdb2 100644
--- a/lib/mesa/src/gallium/drivers/r600/sb/sb_sched.cpp
+++ b/lib/mesa/src/gallium/drivers/r600/sb/sb_sched.cpp
@@ -1950,7 +1950,10 @@ void post_scheduler::release_src_vec(vvec& vv, bool src) {
 }
 
 void literal_tracker::reset() {
-	memset(lt, 0, sizeof(lt));
+	lt[0].u = 0;
+	lt[1].u = 0;
+	lt[2].u = 0;
+	lt[3].u = 0;
 	memset(uc, 0, sizeof(uc));
 }
 
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/.editorconfig b/lib/mesa/src/gallium/drivers/r600/sfn/.editorconfig
new file mode 100644
index 000000000..9cb67618b
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/.editorconfig
@@ -0,0 +1,2 @@
+[*.{cpp,c,h}]
+indent_style = space
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp
new file mode 100644
index 000000000..8690fc269
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.cpp
@@ -0,0 +1,325 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_alu_defines.h"
+
+namespace r600 {
+
+const std::map<EAluOp, AluOp> alu_ops = {
+   {op0_nop                 ,AluOp(0, 0, AluOp::a,"NOP")},
+   {op0_group_barrier       ,AluOp(0, 0, AluOp::a,"GROUP_BARRIER")},
+   {op0_group_seq_begin     ,AluOp(0, 0, AluOp::a,"GROUP_SEQ_BEGIN")},
+   {op0_group_seq_end       ,AluOp(0, 0, AluOp::a,"GROUP_SEQ_END")},
+   {op0_pred_set_clr        ,AluOp(0, 1, AluOp::a,"PRED_SET_CLR")},
+   {op0_store_flags         ,AluOp(0, 0, AluOp::v,"STORE_FLAGS")},
+   {op0_lds_1a              ,AluOp(0, 0, AluOp::v,"LDS_1A")},
+   {op0_lds_1a1d            ,AluOp(0, 0, AluOp::v,"LDS_1A1D")},
+   {op0_lds_2a              ,AluOp(0, 0, AluOp::v,"LDS_2A")},
+
+   {op1_bcnt_int            ,AluOp(1, 0, AluOp::v,"BCNT_INT")},
+   {op1_bcnt_accum_prev_int ,AluOp(1, 0, AluOp::v,"BCNT_ACCUM_PREV_INT")},
+   {op1_bfrev_int           ,AluOp(1, 0, AluOp::a,"BFREV_INT")},
+   {op1_ceil                ,AluOp(1, 1, AluOp::a,"CEIL")},
+   {op1_cos                 ,AluOp(1, 1, AluOp::t,"COS")},
+   {op1_exp_ieee            ,AluOp(1, 1, AluOp::t,"EXP_IEEE")},
+   {op1_floor               ,AluOp(1, 1, AluOp::a,"FLOOR")},
+   {op1_flt_to_int          ,AluOp(1, 0, AluOp::a,"FLT_TO_INT")},
+   {op1_flt_to_uint         ,AluOp(1, 1, AluOp::t,"FLT_TO_UINT")},
+   {op1_flt_to_int_rpi      ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_RPI")},
+   {op1_flt_to_int_floor    ,AluOp(1, 1, AluOp::v,"FLT_TO_INT_FLOOR")},
+   {op1_flt16_to_flt32      ,AluOp(1, 1, AluOp::v,"FLT16_TO_FLT32")},
+   {op1_flt32_to_flt16      ,AluOp(1, 1, AluOp::v,"FLT32_TO_FLT16")},
+   {op1_flt32_to_flt64      ,AluOp(1, 1, AluOp::v,"FLT32_TO_FLT64")},
+   {op1_flt64_to_flt32      ,AluOp(1, 1, AluOp::a,"FLT64_TO_FLT32")},
+   {op1_fract               ,AluOp(1, 1, AluOp::a,"FRACT")},
+   {op1_fract_64            ,AluOp(1, 1, AluOp::v,"FRACT_64")},
+   {op1_frexp_64            ,AluOp(1, 1, AluOp::v,"FREXP_64")},
+   {op1_int_to_flt          ,AluOp(1, 0, AluOp::t,"INT_TO_FLT")},
+   {op1_ldexp_64            ,AluOp(1, 1, AluOp::v,"LDEXP_64")},
+   {op1_interp_load_p0      ,AluOp(1, 1, AluOp::v,"INTERP_LOAD_P0")},
+   {op1_interp_load_p10     ,AluOp(1, 1, AluOp::v,"INTERP_LOAD_P10")},
+   {op1_interp_load_p20     ,AluOp(1, 1, AluOp::v,"INTERP_LOAD_P20")},
+   {op1_load_store_flags    ,AluOp(1, 0, AluOp::v,"LOAD_STORE_FLAGS")},
+   {op1_log_clamped         ,AluOp(1, 1, AluOp::t,"LOG_CLAMPED")},
+   {op1_log_ieee            ,AluOp(1, 1, AluOp::t,"LOG_IEEE")},
+   {op1_max4                ,AluOp(1, 1, AluOp::v,"MAX4")},
+   {op1_mbcnt_32hi_int      ,AluOp(1, 0, AluOp::v,"MBCNT_32HI_INT")},
+   {op1_mbcnt_32lo_accum_prev_int ,AluOp(1, 0, AluOp::v,"MBCNT_32LO_ACCUM_PREV_INT")},
+   {op1_mov                 ,AluOp(1, 0, AluOp::a,"MOV")},
+   {op1_mova_int            ,AluOp(1, 0, AluOp::v,"MOVA_INT")},
+   {op1_not_int             ,AluOp(1, 0, AluOp::a,"NOT_INT")},
+   {op1_offset_to_flt       ,AluOp(1, 0, AluOp::v,"OFFSET_TO_FLT")},
+   {op1_pred_set_inv        ,AluOp(1, 1, AluOp::a,"PRED_SET_INV")},
+   {op1_pred_set_restore    ,AluOp(1, 1, AluOp::a,"PRED_SET_RESTORE")},
+   {op1_set_cf_idx0         ,AluOp(1, 0, AluOp::a,"SET_CF_IDX0")}, /* Reads from AR register? */
+   {op1_set_cf_idx1         ,AluOp(1, 0, AluOp::a,"SET_CF_IDX1")}, /* Reads from AR register? */
+   {op1_recip_clamped       ,AluOp(1, 1, AluOp::t,"RECIP_CLAMPED")},
+   {op1_recip_ff            ,AluOp(1, 1, AluOp::t,"RECIP_FF")},
+   {op1_recip_ieee          ,AluOp(1, 1, AluOp::t,"RECIP_IEEE")},
+   {op1_recipsqrt_clamped   ,AluOp(1, 1, AluOp::t,"RECIPSQRT_CLAMPED")},
+   {op1_recipsqrt_ff        ,AluOp(1, 1, AluOp::t,"RECIPSQRT_FF")},
+   {op1_recipsqrt_ieee1     ,AluOp(1, 1, AluOp::t,"RECIPSQRT_IEEE")},
+   {op1_recip_int           ,AluOp(1, 0, AluOp::t,"RECIP_INT")},
+   {op1_recip_uint          ,AluOp(1, 0, AluOp::t,"RECIP_UINT")},
+   {op1_recip_64            ,AluOp(1, 1, AluOp::t,"RECIP_64")},
+   {op1_recip_clamped_64    ,AluOp(1, 1, AluOp::t,"RECIP_CLAMPED_64")},
+   {op1_recipsqrt_64        ,AluOp(1, 1, AluOp::t,"RECIPSQRT_64")},
+   {op1_recipsqrt_clamped_64,AluOp(1, 1, AluOp::t,"RECIPSQRT_CLAMPED_64")},
+   {op1_rndne               ,AluOp(1, 1, AluOp::a,"RNDNE")},
+   {op1_sqrt_ieee           ,AluOp(1, 1, AluOp::t,"SQRT_IEEE")},
+   {op1_sin                 ,AluOp(1, 1, AluOp::t,"SIN")},
+   {op1_trunc               ,AluOp(1, 1, AluOp::a,"TRUNC")},
+   {op1_sqrt_64             ,AluOp(1, 1, AluOp::t,"SQRT_64")},
+   {op1_ubyte0_flt          ,AluOp(1, 1, AluOp::v,"UBYTE0_FLT")},
+   {op1_ubyte1_flt          ,AluOp(1, 1, AluOp::v,"UBYTE1_FLT")},
+   {op1_ubyte2_flt          ,AluOp(1, 1, AluOp::v,"UBYTE2_FLT")},
+   {op1_ubyte3_flt          ,AluOp(1, 1, AluOp::v,"UBYTE3_FLT")},
+   {op1_uint_to_flt         ,AluOp(1, 0, AluOp::t,"UINT_TO_FLT")},
+   {op1_ffbh_uint           ,AluOp(1, 0, AluOp::v,"FFBH_UINT")},
+   {op1_ffbl_int            ,AluOp(1, 0, AluOp::v,"FFBL_INT")},
+   {op1_ffbh_int            ,AluOp(1, 0, AluOp::v,"FFBH_INT")},
+   {op1_flt_to_uint4        ,AluOp(1, 1, AluOp::v,"FLT_TO_UINT4")},
+   {op1v_flt32_to_flt64     ,AluOp(1, 1, AluOp::a,"FLT32_TO_FLT64")},
+   {op1v_flt64_to_flt32     ,AluOp(1, 1, AluOp::v,"FLT64_TO_FLT32")},
+
+   {op2_add                 ,AluOp(2, 1, AluOp::a,"ADD")},
+   {op2_bfm_int             ,AluOp(2, 0, AluOp::v,"BFM_INT")},
+   {op2_mul                 ,AluOp(2, 1, AluOp::a,"MUL")},
+   {op2_mul_ieee            ,AluOp(2, 1, AluOp::a,"MUL_IEEE")},
+   {op2_max                 ,AluOp(2, 1, AluOp::a,"MAX")},
+   {op2_min                 ,AluOp(2, 1, AluOp::a,"MIN")},
+   {op2_max_dx10            ,AluOp(2, 1, AluOp::a,"MAX_DX10")},
+   {op2_min_dx10            ,AluOp(2, 1, AluOp::a,"MIN_DX10")},
+   {op2_sete                ,AluOp(2, 1, AluOp::a,"SETE")},
+   {op2_setgt               ,AluOp(2, 1, AluOp::a,"SETGT")},
+   {op2_setge               ,AluOp(2, 1, AluOp::a,"SETGE")},
+   {op2_setne               ,AluOp(2, 1, AluOp::a,"SETNE")},
+   {op2_sete_dx10           ,AluOp(2, 1, AluOp::a,"SETE_DX10")},
+   {op2_setgt_dx10          ,AluOp(2, 1, AluOp::a,"SETGT_DX10")},
+   {op2_setge_dx10          ,AluOp(2, 1, AluOp::a,"SETGE_DX10")},
+   {op2_setne_dx10          ,AluOp(2, 1, AluOp::a,"SETNE_DX10")},
+   {op2_ashr_int            ,AluOp(2, 0, AluOp::a,"ASHR_INT")},
+   {op2_lshr_int            ,AluOp(2, 0, AluOp::a,"LSHR_INT")},
+   {op2_lshl_int            ,AluOp(2, 0, AluOp::a,"LSHL_INT")},
+   {op2_mul_64              ,AluOp(2, 1, AluOp::a,"MUL_64")},
+   {op2_pred_setgt_uint     ,AluOp(2, 0, AluOp::a,"PRED_SETGT_UINT")},
+   {op2_pred_setge_uint     ,AluOp(2, 0, AluOp::a,"PRED_SETGE_UINT")},
+   {op2_pred_sete           ,AluOp(2, 1, AluOp::a,"PRED_SETE")},
+   {op2_pred_setgt          ,AluOp(2, 1, AluOp::a,"PRED_SETGT")},
+   {op2_pred_setge          ,AluOp(2, 1, AluOp::a,"PRED_SETGE")},
+   {op2_pred_setne          ,AluOp(2, 1, AluOp::a,"PRED_SETNE")},
+   {op2_pred_set_pop        ,AluOp(2, 1, AluOp::a,"PRED_SET_POP")},
+   {op2_pred_sete_push      ,AluOp(2, 1, AluOp::a,"PRED_SETE_PUSH")},
+   {op2_pred_setgt_push     ,AluOp(2, 1, AluOp::a,"PRED_SETGT_PUSH")},
+   {op2_pred_setge_push     ,AluOp(2, 1, AluOp::a,"PRED_SETGE_PUSH")},
+   {op2_pred_setne_push     ,AluOp(2, 1, AluOp::a,"PRED_SETNE_PUSH")},
+   {op2_kille               ,AluOp(2, 1, AluOp::a,"KILLE")},
+   {op2_killgt              ,AluOp(2, 1, AluOp::a,"KILLGT")},
+   {op2_killge              ,AluOp(2, 1, AluOp::a,"KILLGE")},
+   {op2_killne              ,AluOp(2, 1, AluOp::a,"KILLNE")},
+   {op2_and_int             ,AluOp(2, 0, AluOp::a,"AND_INT")},
+   {op2_or_int              ,AluOp(2, 0, AluOp::a,"OR_INT")},
+   {op2_xor_int             ,AluOp(2, 0, AluOp::a,"XOR_INT")},
+   {op2_add_int             ,AluOp(2, 0, AluOp::a,"ADD_INT")},
+   {op2_sub_int             ,AluOp(2, 0, AluOp::a,"SUB_INT")},
+   {op2_max_int             ,AluOp(2, 0, AluOp::a,"MAX_INT")},
+   {op2_min_int             ,AluOp(2, 0, AluOp::a,"MIN_INT")},
+   {op2_max_uint            ,AluOp(2, 0, AluOp::a,"MAX_UINT")},
+   {op2_min_uint            ,AluOp(2, 0, AluOp::a,"MIN_UINT")},
+   {op2_sete_int            ,AluOp(2, 0, AluOp::a,"SETE_INT")},
+   {op2_setgt_int           ,AluOp(2, 0, AluOp::a,"SETGT_INT")},
+   {op2_setge_int           ,AluOp(2, 0, AluOp::a,"SETGE_INT")},
+   {op2_setne_int           ,AluOp(2, 0, AluOp::a,"SETNE_INT")},
+   {op2_setgt_uint          ,AluOp(2, 0, AluOp::a,"SETGT_UINT")},
+   {op2_setge_uint          ,AluOp(2, 0, AluOp::a,"SETGE_UINT")},
+   {op2_killgt_uint         ,AluOp(2, 0, AluOp::a,"KILLGT_UINT")},
+   {op2_killge_uint         ,AluOp(2, 0, AluOp::a,"KILLGE_UINT")},
+   {op2_prede_int           ,AluOp(2, 0, AluOp::a,"PREDE_INT")},
+   {op2_pred_setgt_int      ,AluOp(2, 0, AluOp::a,"PRED_SETGT_INT")},
+   {op2_pred_setge_int      ,AluOp(2, 0, AluOp::a,"PRED_SETGE_INT")},
+   {op2_pred_setne_int      ,AluOp(2, 0, AluOp::a,"PRED_SETNE_INT")},
+   {op2_kille_int           ,AluOp(2, 0, AluOp::a,"KILLE_INT")},
+   {op2_killgt_int          ,AluOp(2, 0, AluOp::a,"KILLGT_INT")},
+   {op2_killge_int          ,AluOp(2, 0, AluOp::a,"KILLGE_INT")},
+   {op2_killne_int          ,AluOp(2, 0, AluOp::a,"KILLNE_INT")},
+   {op2_pred_sete_push_int  ,AluOp(2, 0, AluOp::a,"PRED_SETE_PUSH_INT")},
+   {op2_pred_setgt_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETGT_PUSH_INT")},
+   {op2_pred_setge_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETGE_PUSH_INT")},
+   {op2_pred_setne_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETNE_PUSH_INT")},
+   {op2_pred_setlt_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETLT_PUSH_INT")},
+   {op2_pred_setle_push_int ,AluOp(2, 0, AluOp::a,"PRED_SETLE_PUSH_INT")},
+   {op2_addc_uint           ,AluOp(2, 0, AluOp::a,"ADDC_UINT")},
+   {op2_subb_uint           ,AluOp(2, 0, AluOp::a,"SUBB_UINT")},
+   {op2_set_mode            ,AluOp(2, 0, AluOp::a,"SET_MODE")},
+   {op2_set_lds_size        ,AluOp(2, 0, AluOp::a,"SET_LDS_SIZE")},
+   {op2_mullo_int           ,AluOp(2, 0, AluOp::t,"MULLO_INT")},
+   {op2_mulhi_int           ,AluOp(2, 0, AluOp::t,"MULHI_INT")},
+   {op2_mullo_uint          ,AluOp(2, 0, AluOp::t,"MULLO_UINT")},
+   {op2_mulhi_uint          ,AluOp(2, 0, AluOp::t,"MULHI_UINT")},
+   {op2_dot_ieee            ,AluOp(2, 1, AluOp::v,"DOT_IEEE")},
+   {op2_mulhi_uint24        ,AluOp(2, 0, AluOp::v,"MULHI_UINT24")},
+   {op2_mul_uint24          ,AluOp(2, 0, AluOp::v,"MUL_UINT24")},
+   {op2_sete_64             ,AluOp(2, 1, AluOp::v,"SETE_64")},
+   {op2_setne_64            ,AluOp(2, 1, AluOp::v,"SETNE_64")},
+   {op2_setgt_64            ,AluOp(2, 1, AluOp::v,"SETGT_64")},
+   {op2_setge_64            ,AluOp(2, 1, AluOp::v,"SETGE_64")},
+   {op2_min_64              ,AluOp(2, 1, AluOp::v,"MIN_64")},
+   {op2_max_64              ,AluOp(2, 1, AluOp::v,"MAX_64")},
+   {op2_dot4                ,AluOp(2, 1, AluOp::v,"DOT4")},
+   {op2_dot4_ieee           ,AluOp(2, 1, AluOp::v,"DOT4_IEEE")},
+   {op2_cube                ,AluOp(2, 1, AluOp::v,"CUBE")},
+   {op2_pred_setgt_64       ,AluOp(2, 1, AluOp::v,"PRED_SETGT_64")},
+   {op2_pred_sete_64        ,AluOp(2, 1, AluOp::v,"PRED_SETE_64")},
+   {op2_pred_setge_64       ,AluOp(2, 1, AluOp::v,"PRED_SETGE_64")},
+   {OP2V_MUL_64             ,AluOp(2, 1, AluOp::v,"MUL_64")},
+   {op2_add_64              ,AluOp(2, 1, AluOp::v,"ADD_64")},
+   {op2_sad_accum_prev_uint ,AluOp(2, 0, AluOp::v,"SAD_ACCUM_PREV_UINT")},
+   {op2_dot                 ,AluOp(2, 1, AluOp::v,"DOT")},
+   {op2_mul_prev            ,AluOp(2, 1, AluOp::v,"MUL_PREV")},
+   {op2_mul_ieee_prev       ,AluOp(2, 1, AluOp::v,"MUL_IEEE_PREV")},
+   {op2_add_prev            ,AluOp(2, 1, AluOp::v,"ADD_PREV")},
+   {op2_muladd_prev         ,AluOp(2, 1, AluOp::v,"MULADD_PREV")},
+   {op2_muladd_ieee_prev    ,AluOp(2, 1, AluOp::v,"MULADD_IEEE_PREV")},
+   {op2_interp_xy           ,AluOp(2, 1, AluOp::v,"INTERP_XY")},
+   {op2_interp_zw           ,AluOp(2, 1, AluOp::v,"INTERP_ZW")},
+   {op2_interp_x            ,AluOp(2, 1, AluOp::v,"INTERP_X")},
+   {op2_interp_z            ,AluOp(2, 1, AluOp::v,"INTERP_Z")},
+
+   {op3_bfe_uint            ,AluOp(3, 0, AluOp::v,"BFE_UINT")},
+   {op3_bfe_int             ,AluOp(3, 0, AluOp::v,"BFE_INT")},
+   {op3_bfi_int             ,AluOp(3, 0, AluOp::v,"BFI_INT")},
+   {op3_fma                 ,AluOp(3, 1, AluOp::v,"FMA")},
+   {op3_cndne_64            ,AluOp(3, 1, AluOp::v,"CNDNE_64")},
+   {op3_fma_64              ,AluOp(3, 1, AluOp::v,"FMA_64")},
+   {op3_lerp_uint           ,AluOp(3, 0, AluOp::v,"LERP_UINT")},
+   {op3_bit_align_int       ,AluOp(3, 0, AluOp::v,"BIT_ALIGN_INT")},
+   {op3_byte_align_int      ,AluOp(3, 0, AluOp::v,"BYTE_ALIGN_INT")},
+   {op3_sad_accum_uint      ,AluOp(3, 0, AluOp::v,"SAD_ACCUM_UINT")},
+   {op3_sad_accum_hi_uint   ,AluOp(3, 0, AluOp::v,"SAD_ACCUM_HI_UINT")},
+   {op3_muladd_uint24       ,AluOp(3, 0, AluOp::v,"MULADD_UINT24")},
+   {op3_lds_idx_op          ,AluOp(3, 0, AluOp::x,"LDS_IDX_OP")},
+   {op3_muladd              ,AluOp(3, 1, AluOp::a,"MULADD")},
+   {op3_muladd_m2           ,AluOp(3, 1, AluOp::a,"MULADD_M2")},
+   {op3_muladd_m4           ,AluOp(3, 1, AluOp::a,"MULADD_M4")},
+   {op3_muladd_d2           ,AluOp(3, 1, AluOp::a,"MULADD_D2")},
+   {op3_muladd_ieee         ,AluOp(3, 1, AluOp::a,"MULADD_IEEE")},
+   {op3_cnde                ,AluOp(3, 1, AluOp::a,"CNDE")},
+   {op3_cndgt               ,AluOp(3, 1, AluOp::a,"CNDGT")},
+   {op3_cndge               ,AluOp(3, 1, AluOp::a,"CNDGE")},
+   {op3_cnde_int            ,AluOp(3, 0, AluOp::a,"CNDE_INT")},
+   {op3_cndgt_int           ,AluOp(3, 0, AluOp::a,"CNDGT_INT")},
+   {op3_cndge_int           ,AluOp(3, 0, AluOp::a,"CNDGE_INT")},
+   {op3_mul_lit             ,AluOp(3, 1, AluOp::t,"MUL_LIT")}
+};
+
+const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const = {
+   {ALU_SRC_LDS_OQ_A, {false, "LDS_OQ_A"}},
+   {ALU_SRC_LDS_OQ_B, {false, "LDS_OQ_B"}},
+   {ALU_SRC_LDS_OQ_A_POP, {false, "LDS_OQ_A_POP"}},
+   {ALU_SRC_LDS_OQ_B_POP, {false, "LDS_OQ_B_POP"}},
+   {ALU_SRC_LDS_DIRECT_A, {false, "LDS_DIRECT_A"}},
+   {ALU_SRC_LDS_DIRECT_B, {false, "LDS_DIRECT_B"}},
+   {ALU_SRC_TIME_HI, {false, "TIME_HI"}},
+   {ALU_SRC_TIME_LO, {false, "TIME_LO"}},
+   {ALU_SRC_MASK_HI, {false, "MASK_HI"}},
+   {ALU_SRC_MASK_LO, {false, "MASK_LO"}},
+   {ALU_SRC_HW_WAVE_ID, {false, "HW_WAVE_ID"}},
+   {ALU_SRC_SIMD_ID, {false, "SIMD_ID"}},
+   {ALU_SRC_SE_ID, {false, "SE_ID"}},
+   {ALU_SRC_HW_THREADGRP_ID, {false, "HW_THREADGRP_ID"}},
+   {ALU_SRC_WAVE_ID_IN_GRP, {false, "WAVE_ID_IN_GRP"}},
+   {ALU_SRC_NUM_THREADGRP_WAVES, {false, "NUM_THREADGRP_WAVES"}},
+   {ALU_SRC_HW_ALU_ODD, {false, "HW_ALU_ODD"}},
+   {ALU_SRC_LOOP_IDX, {false, "LOOP_IDX"}},
+   {ALU_SRC_PARAM_BASE_ADDR, {false, "PARAM_BASE_ADDR"}},
+   {ALU_SRC_NEW_PRIM_MASK, {false, "NEW_PRIM_MASK"}},
+   {ALU_SRC_PRIM_MASK_HI, {false, "PRIM_MASK_HI"}},
+   {ALU_SRC_PRIM_MASK_LO, {false, "PRIM_MASK_LO"}},
+   {ALU_SRC_1_DBL_L, {false, "1.0L"}},
+   {ALU_SRC_1_DBL_M, {false, "1.0H"}},
+   {ALU_SRC_0_5_DBL_L, {false, "0.5L"}},
+   {ALU_SRC_0_5_DBL_M, {false, "0.5H"}},
+   {ALU_SRC_0, {false, "0"}},
+   {ALU_SRC_1, {false, "1.0"}},
+   {ALU_SRC_1_INT, {false, "1"}},
+   {ALU_SRC_M_1_INT, {false, "-1"}},
+   {ALU_SRC_0_5, {false, "0.5"}},
+   {ALU_SRC_LITERAL, {true, "ALU_SRC_LITERAL"}},
+   {ALU_SRC_PV, {true, "PV"}},
+   {ALU_SRC_PS, {false, "PS"}}
+};
+
+const std::map<ESDOp, LDSOp> lds_ops = {
+   {DS_OP_ADD           , {2, "DS_ADD"}},
+   {DS_OP_SUB           , {2, "DS_SUB"}},
+   {DS_OP_RSUB          , {2, "DS_RSUB"}},
+   {DS_OP_INC           , {2, "DS_INC"}},
+   {DS_OP_DEC           , {2, "DS_DEC"}},
+   {DS_OP_MIN_INT       , {2, "DS_MIN_INT"}},
+   {DS_OP_MAX_INT       , {2, "DS_MAX_INT"}},
+   {DS_OP_MIN_UINT      , {2, "DS_MIN_UINT"}},
+   {DS_OP_MAX_UINT      , {2, "DS_MAX_UINT"}},
+   {DS_OP_AND           , {2, "DS_AND"}},
+   {DS_OP_OR            , {2, "DS_OR"}},
+   {DS_OP_XOR           , {2, "DS_XOR"}},
+   {DS_OP_MSKOR         , {3, "DS_MSKOR"}},
+   {DS_OP_WRITE         , {2, "DS_WRITE"}},
+   {DS_OP_WRITE_REL     , {3, "DS_WRITE_REL"}},
+   {DS_OP_WRITE2        , {3, "DS_WRITE2"}},
+   {DS_OP_CMP_STORE     , {3, "DS_CMP_STORE"}},
+   {DS_OP_CMP_STORE_SPF , {3, "DS_CMP_STORE_SPF"}},
+   {DS_OP_BYTE_WRITE    , {2, "DS_BYTE_WRITE"}},
+   {DS_OP_SHORT_WRITE   , {2, "DS_SHORT_WRITE"}},
+   {DS_OP_ADD_RET       , {2, "DS_ADD_RET"}},
+   {DS_OP_SUB_RET       , {2, "DS_SUB_RET"}},
+   {DS_OP_RSUB_RET      , {2, "DS_RSUB_RET"}},
+   {DS_OP_INC_RET       , {2, "DS_INC_RET"}},
+   {DS_OP_DEC_RET       , {2, "DS_DEC_RET"}},
+   {DS_OP_MIN_INT_RET   , {2, "DS_MIN_INT_RET"}},
+   {DS_OP_MAX_INT_RET   , {2, "DS_MAX_INT_RET"}},
+   {DS_OP_MIN_UINT_RET  , {2, "DS_MIN_UINT_RET"}},
+   {DS_OP_MAX_UINT_RET  , {2, "DS_MAX_UINT_RET"}},
+   {DS_OP_AND_RET       , {2, "DS_AND_RET"}},
+   {DS_OP_OR_RET        , {2, "DS_OR_RET"}},
+   {DS_OP_XOR_RET       , {2, "DS_XOR_RET"}},
+   {DS_OP_MSKOR_RET     , {3, "DS_MSKOR_RET"}},
+   {DS_OP_XCHG_RET      , {2, "DS_XCHG_RET"}},
+   {DS_OP_XCHG_REL_RET  , {3, "DS_XCHG_REL_RET"}},
+   {DS_OP_XCHG2_RET     , {3, "DS_XCHG2_RET"}},
+   {DS_OP_CMP_XCHG_RET  , {3, "DS_CMP_XCHG_RET"}},
+   {DS_OP_CMP_XCHG_SPF_RET, {3, "DS_CMP_XCHG_SPF_RET"}},
+   {DS_OP_READ_RET      , {1, "DS_READ_RET"}},
+   {DS_OP_READ_REL_RET  , {1, "DS_READ_REL_RET"}},
+   {DS_OP_READ2_RET     , {2, "DS_READ2_RET"}},
+   {DS_OP_READWRITE_RET , {3, "DS_READWRITE_RET"}},
+   {DS_OP_BYTE_READ_RET , {1, "DS_BYTE_READ_RET"}},
+   {DS_OP_UBYTE_READ_RET, {1, "DS_UBYTE_READ_RET"}},
+   {DS_OP_SHORT_READ_RET, {1, "DS_SHORT_READ_RET"}},
+   {DS_OP_USHORT_READ_RET, {1, "DS_USHORT_READ_RET"}},
+   {DS_OP_ATOMIC_ORDERED_ALLOC_RET , {3, "DS_ATOMIC_ORDERED_ALLOC_RET"}}
+};
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.h
new file mode 100644
index 000000000..4481c49db
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_alu_defines.h
@@ -0,0 +1,377 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef r600_sfn_alu_defines_h
+#define r600_sfn_alu_defines_h
+
+#include <map>
+#include <bitset>
+
+namespace r600 {
+
+/* ALU op2 instructions 17:7 top three bits always zero. */
+enum EAluOp {
+   op2_add = 0,
+   op2_mul = 1,
+   op2_mul_ieee = 2,
+   op2_max = 3,
+   op2_min = 4,
+   op2_max_dx10 = 5,
+   op2_min_dx10 = 6,
+   op2_sete = 8,
+   op2_setgt = 9,
+   op2_setge = 10,
+   op2_setne = 11,
+   op2_sete_dx10 = 12,
+   op2_setgt_dx10 = 13,
+   op2_setge_dx10 = 14,
+   op2_setne_dx10 = 15,
+   op1_fract = 16,
+   op1_trunc = 17,
+   op1_ceil = 18,
+   op1_rndne = 19,
+   op1_floor = 20,
+   op2_ashr_int = 21,
+   op2_lshr_int = 22,
+   op2_lshl_int = 23,
+   op1_mov = 25,
+   op0_nop = 26,
+   op2_mul_64 = 27,
+   op1_flt64_to_flt32 = 28,
+   op1_flt32_to_flt64 = 29,
+   op2_pred_setgt_uint = 30,
+   op2_pred_setge_uint = 31,
+   op2_pred_sete = 32,
+   op2_pred_setgt = 33,
+   op2_pred_setge = 34,
+   op2_pred_setne = 35,
+   op1_pred_set_inv = 36,
+   op2_pred_set_pop = 37,
+   op0_pred_set_clr = 38,
+   op1_pred_set_restore = 39,
+   op2_pred_sete_push = 40,
+   op2_pred_setgt_push = 41,
+   op2_pred_setge_push = 42,
+   op2_pred_setne_push = 43,
+   op2_kille = 44,
+   op2_killgt = 45,
+   op2_killge = 46,
+   op2_killne = 47,
+   op2_and_int = 48,
+   op2_or_int = 49,
+   op2_xor_int = 50,
+   op1_not_int = 51,
+   op2_add_int = 52,
+   op2_sub_int = 53,
+   op2_max_int = 54,
+   op2_min_int = 55,
+   op2_max_uint = 56,
+   op2_min_uint = 57,
+   op2_sete_int = 58,
+   op2_setgt_int = 59,
+   op2_setge_int = 60,
+   op2_setne_int = 61,
+   op2_setgt_uint = 62,
+   op2_setge_uint = 63,
+   op2_killgt_uint = 64,
+   op2_killge_uint = 65,
+   op2_prede_int = 66,
+   op2_pred_setgt_int = 67,
+   op2_pred_setge_int = 68,
+   op2_pred_setne_int = 69,
+   op2_kille_int = 70,
+   op2_killgt_int = 71,
+   op2_killge_int = 72,
+   op2_killne_int = 73,
+   op2_pred_sete_push_int = 74,
+   op2_pred_setgt_push_int = 75,
+   op2_pred_setge_push_int = 76,
+   op2_pred_setne_push_int = 77,
+   op2_pred_setlt_push_int = 78,
+   op2_pred_setle_push_int = 79,
+   op1_flt_to_int = 80,
+   op1_bfrev_int = 81,
+   op2_addc_uint = 82,
+   op2_subb_uint = 83,
+   op0_group_barrier = 84,
+   op0_group_seq_begin = 85,
+   op0_group_seq_end = 86,
+   op2_set_mode = 87,
+   op1_set_cf_idx0 = 88,
+   op1_set_cf_idx1 = 89,
+   op2_set_lds_size = 90,
+   op1_exp_ieee = 129,
+   op1_log_clamped = 130,
+   op1_log_ieee = 131,
+   op1_recip_clamped = 132,
+   op1_recip_ff = 133,
+   op1_recip_ieee = 134,
+   op1_recipsqrt_clamped = 135,
+   op1_recipsqrt_ff = 136,
+   op1_recipsqrt_ieee1 = 137,
+   op1_sqrt_ieee = 138,
+   op1_sin = 141,
+   op1_cos = 142,
+   op2_mullo_int = 143,
+   op2_mulhi_int = 144,
+   op2_mullo_uint = 145,
+   op2_mulhi_uint = 146,
+   op1_recip_int = 147,
+   op1_recip_uint = 148,
+   op1_recip_64 = 149,
+   op1_recip_clamped_64 = 150,
+   op1_recipsqrt_64 = 151,
+   op1_recipsqrt_clamped_64 = 152,
+   op1_sqrt_64 = 153,
+   op1_flt_to_uint = 154,
+   op1_int_to_flt = 155,
+   op1_uint_to_flt = 156,
+   op2_bfm_int = 160,
+   op1_flt32_to_flt16 = 162,
+   op1_flt16_to_flt32 = 163,
+   op1_ubyte0_flt = 164,
+   op1_ubyte1_flt = 165,
+   op1_ubyte2_flt = 166,
+   op1_ubyte3_flt = 167,
+   op1_bcnt_int = 170,
+   op1_ffbh_uint = 171,
+   op1_ffbl_int = 172,
+   op1_ffbh_int = 173,
+   op1_flt_to_uint4 = 174,
+   op2_dot_ieee = 175,
+   op1_flt_to_int_rpi = 176,
+   op1_flt_to_int_floor = 177,
+   op2_mulhi_uint24 = 178,
+   op1_mbcnt_32hi_int = 179,
+   op1_offset_to_flt = 180,
+   op2_mul_uint24 = 181,
+   op1_bcnt_accum_prev_int = 182,
+   op1_mbcnt_32lo_accum_prev_int = 183,
+   op2_sete_64 = 184,
+   op2_setne_64 = 185,
+   op2_setgt_64 = 186,
+   op2_setge_64 = 187,
+   op2_min_64 = 188,
+   op2_max_64 = 189,
+   op2_dot4 = 190,
+   op2_dot4_ieee = 191,
+   op2_cube = 192,
+   op1_max4 = 193,
+   op1_frexp_64 = 196,
+   op1_ldexp_64 = 197,
+   op1_fract_64 = 198,
+   op2_pred_setgt_64 = 199,
+   op2_pred_sete_64 = 198,
+   op2_pred_setge_64 = 201,
+   OP2V_MUL_64 = 202,
+   op2_add_64 = 203,
+   op1_mova_int = 204,
+   op1v_flt64_to_flt32 = 205,
+   op1v_flt32_to_flt64 = 206,
+   op2_sad_accum_prev_uint = 207,
+   op2_dot = 208,
+   op2_mul_prev = 209,
+   op2_mul_ieee_prev = 210,
+   op2_add_prev = 211,
+   op2_muladd_prev = 212,
+   op2_muladd_ieee_prev = 213,
+   op2_interp_xy = 214,
+   op2_interp_zw = 215,
+   op2_interp_x = 216,
+   op2_interp_z = 217,
+   op0_store_flags = 218,
+   op1_load_store_flags = 219,
+   op0_lds_1a = 220,
+   op0_lds_1a1d = 221,
+   op0_lds_2a = 223,
+   op1_interp_load_p0 = 224,
+   op1_interp_load_p10 = 125,
+   op1_interp_load_p20 = 126,
+   // op 3 all left shift 6
+   op3_bfe_uint = 4<< 6,
+   op3_bfe_int = 5<< 6,
+   op3_bfi_int = 6<< 6,
+   op3_fma = 7<< 6,
+   op3_cndne_64 = 9<< 6,
+   op3_fma_64 = 10<< 6,
+   op3_lerp_uint = 11<< 6,
+   op3_bit_align_int = 12<< 6,
+   op3_byte_align_int = 13<< 6,
+   op3_sad_accum_uint = 14<< 6,
+   op3_sad_accum_hi_uint = 15<< 6,
+   op3_muladd_uint24 = 16<< 6,
+   op3_lds_idx_op = 17<< 6,
+   op3_muladd = 20<< 6,
+   op3_muladd_m2 = 21<< 6,
+   op3_muladd_m4 = 22<< 6,
+   op3_muladd_d2 = 23<< 6,
+   op3_muladd_ieee = 24<< 6,
+   op3_cnde = 25<< 6,
+   op3_cndgt = 26<< 6,
+   op3_cndge = 27<< 6,
+   op3_cnde_int = 28<< 6,
+   op3_cndgt_int = 29<< 6,
+   op3_cndge_int = 30<< 6,
+   op3_mul_lit = 31<< 6
+};
+
+
+
+using AluOpFlags=std::bitset<32>;
+
+struct AluOp {
+   static constexpr int x = 1;
+   static constexpr int y = 2;
+   static constexpr int z = 4;
+   static constexpr int w = 8;
+   static constexpr int v = 15;
+   static constexpr int t = 16;
+   static constexpr int a = 31;
+
+   AluOp(int ns, int f, int um, const char *n):
+      nsrc(ns), is_float(f), unit_mask(um), name(n)
+   {
+   }
+
+   bool can_channel(int flags) const {
+      return flags & unit_mask;
+   }
+
+   int nsrc: 4;
+   int is_float:1;
+   int unit_mask: 5;
+   const char *name;
+};
+
+extern const std::map<EAluOp, AluOp> alu_ops;
+
+enum AluInlineConstants  {
+   ALU_SRC_LDS_OQ_A = 219,
+   ALU_SRC_LDS_OQ_B = 220,
+   ALU_SRC_LDS_OQ_A_POP = 221,
+   ALU_SRC_LDS_OQ_B_POP = 222,
+   ALU_SRC_LDS_DIRECT_A = 223,
+   ALU_SRC_LDS_DIRECT_B = 224,
+   ALU_SRC_TIME_HI = 227,
+   ALU_SRC_TIME_LO = 228,
+   ALU_SRC_MASK_HI = 229,
+   ALU_SRC_MASK_LO = 230,
+   ALU_SRC_HW_WAVE_ID = 231,
+   ALU_SRC_SIMD_ID = 232,
+   ALU_SRC_SE_ID = 233,
+   ALU_SRC_HW_THREADGRP_ID = 234,
+   ALU_SRC_WAVE_ID_IN_GRP = 235,
+   ALU_SRC_NUM_THREADGRP_WAVES = 236,
+   ALU_SRC_HW_ALU_ODD = 237,
+   ALU_SRC_LOOP_IDX = 238,
+   ALU_SRC_PARAM_BASE_ADDR = 240,
+   ALU_SRC_NEW_PRIM_MASK = 241,
+   ALU_SRC_PRIM_MASK_HI = 242,
+   ALU_SRC_PRIM_MASK_LO = 243,
+   ALU_SRC_1_DBL_L = 244,
+   ALU_SRC_1_DBL_M = 245,
+   ALU_SRC_0_5_DBL_L = 246,
+   ALU_SRC_0_5_DBL_M = 247,
+   ALU_SRC_0 = 248,
+   ALU_SRC_1 = 249,
+   ALU_SRC_1_INT = 250,
+   ALU_SRC_M_1_INT = 251,
+   ALU_SRC_0_5 = 252,
+   ALU_SRC_LITERAL = 253,
+   ALU_SRC_PV = 254,
+   ALU_SRC_PS = 255,
+   ALU_SRC_PARAM_BASE = 0x1C0,
+   ALU_SRC_UNKNOWN
+};
+
+struct AluInlineConstantDescr {
+   bool use_chan;
+   const char *descr;
+};
+
+extern const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const;
+
+enum ESDOp {
+   DS_OP_ADD = 0,
+   DS_OP_SUB = 1,
+   DS_OP_RSUB = 2,
+   DS_OP_INC = 3,
+   DS_OP_DEC = 4,
+   DS_OP_MIN_INT = 5,
+   DS_OP_MAX_INT = 6,
+   DS_OP_MIN_UINT = 7,
+   DS_OP_MAX_UINT = 8,
+   DS_OP_AND = 9,
+   DS_OP_OR = 10,
+   DS_OP_XOR = 11,
+   DS_OP_MSKOR = 12,
+   DS_OP_WRITE = 13,
+   DS_OP_WRITE_REL = 14,
+   DS_OP_WRITE2 = 15,
+   DS_OP_CMP_STORE = 16,
+   DS_OP_CMP_STORE_SPF = 17,
+   DS_OP_BYTE_WRITE = 18,
+   DS_OP_SHORT_WRITE = 19,
+   DS_OP_ADD_RET = 32,
+   DS_OP_SUB_RET = 33,
+   DS_OP_RSUB_RET = 34,
+   DS_OP_INC_RET = 35,
+   DS_OP_DEC_RET = 36,
+   DS_OP_MIN_INT_RET = 37,
+   DS_OP_MAX_INT_RET = 38,
+   DS_OP_MIN_UINT_RET = 39,
+   DS_OP_MAX_UINT_RET = 40,
+   DS_OP_AND_RET = 41,
+   DS_OP_OR_RET = 42,
+   DS_OP_XOR_RET = 43,
+   DS_OP_MSKOR_RET = 44,
+   DS_OP_XCHG_RET = 45,
+   DS_OP_XCHG_REL_RET = 46,
+   DS_OP_XCHG2_RET = 47,
+   DS_OP_CMP_XCHG_RET = 48,
+   DS_OP_CMP_XCHG_SPF_RET = 49,
+   DS_OP_READ_RET = 50,
+   DS_OP_READ_REL_RET = 51,
+   DS_OP_READ2_RET = 52,
+   DS_OP_READWRITE_RET = 53,
+   DS_OP_BYTE_READ_RET = 54,
+   DS_OP_UBYTE_READ_RET = 55,
+   DS_OP_SHORT_READ_RET = 56,
+   DS_OP_USHORT_READ_RET = 57,
+   DS_OP_ATOMIC_ORDERED_ALLOC_RET = 63,
+   DS_OP_INVALID = 64
+};
+
+struct LDSOp {
+   int nsrc;
+   const char *name;
+};
+
+extern const std::map<ESDOp, LDSOp> lds_ops;
+
+}
+
+#endif // ALU_DEFINES_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.cpp
new file mode 100644
index 000000000..681b89d86
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.cpp
@@ -0,0 +1,139 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_callstack.h"
+
+namespace r600 {
+
+CallStack::CallStack(r600_bytecode& bc):
+   m_bc(bc)
+{
+
+}
+
+CallStack::~CallStack()
+{
+}
+
+int CallStack::push(unsigned type)
+{
+   switch (type) {
+   case FC_PUSH_VPM:
+      ++m_bc.stack.push;
+      break;
+   case FC_PUSH_WQM:
+      ++m_bc.stack.push_wqm;
+      break;
+   case FC_LOOP:
+      ++m_bc.stack.loop;
+      break;
+   default:
+      assert(0);
+	}
+
+   return update_max_depth(type);
+}
+
+void CallStack::pop(unsigned type)
+{
+   switch(type) {
+   case FC_PUSH_VPM:
+      --m_bc.stack.push;
+      assert(m_bc.stack.push >= 0);
+      break;
+   case FC_PUSH_WQM:
+      --m_bc.stack.push_wqm;
+      assert(m_bc.stack.push_wqm >= 0);
+      break;
+   case FC_LOOP:
+      --m_bc.stack.loop;
+      assert(m_bc.stack.loop >= 0);
+      break;
+   default:
+      assert(0);
+      break;
+   }
+}
+
+int CallStack::update_max_depth(unsigned type)
+{
+
+   r600_stack_info& stack = m_bc.stack;
+   int elements;
+   int entries;
+
+   int entry_size = stack.entry_size;
+
+   elements = (stack.loop + stack.push_wqm ) * entry_size;
+   elements += stack.push;
+
+   switch (m_bc.chip_class) {
+   case R600:
+   case R700:
+     /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on
+      * the stack must be reserved to hold the current active/continue
+      * masks */
+     if (type == FC_PUSH_VPM || stack.push > 0) {
+       elements += 2;
+     }
+     break;
+   case CAYMAN:
+     /* r9xx: any stack operation on empty stack consumes 2 additional
+      * elements */
+     elements += 2;
+     break;
+   case EVERGREEN:
+     /* r8xx+: 2 extra elements are not always required, but one extra
+      * element must be added for each of the following cases:
+      * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest
+      *    stack usage.
+      *    (Currently we don't use ALU_ELSE_AFTER.)
+      * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM
+      *    PUSH instruction executed.
+      *
+      *    NOTE: it seems we also need to reserve additional element in some
+      *    other cases, e.g. when we have 4 levels of PUSH_VPM in the shader,
+      *    then STACK_SIZE should be 2 instead of 1 */
+     if (type == FC_PUSH_VPM || stack.push > 0) {
+       elements += 1;
+     }
+     break;
+   default:
+     assert(0);
+     break;
+   }
+
+   entry_size = 4;
+
+   entries = (elements + (entry_size - 1)) / entry_size;
+
+   if (entries > stack.max_entries)
+      stack.max_entries = entries;
+
+   return elements;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.h
new file mode 100644
index 000000000..e1babb7c1
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_callstack.h
@@ -0,0 +1,47 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_CALLSTACK_HH
+#define SFN_CALLSTACK_HH
+
+#include "gallium/drivers/r600/r600_asm.h"
+
+namespace r600 {
+
+class CallStack {
+public:
+   CallStack(r600_bytecode& bc);
+   ~CallStack();
+   int push(unsigned type);
+   void pop(unsigned type);
+   int update_max_depth(unsigned type);
+private:
+   r600_bytecode& m_bc;
+};
+
+}
+
+#endif // SFN_CALLSTACK_HH
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.cpp
new file mode 100644
index 000000000..ad9a03f8f
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.cpp
@@ -0,0 +1,195 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_conditionaljumptracker.h"
+#include "sfn_debug.h"
+
+#include <stack>
+#include <vector>
+#include <memory>
+#include <iostream>
+
+namespace r600 {
+
+using std::stack;
+using std::vector;
+using std::shared_ptr;
+
+struct StackFrame {
+
+   StackFrame(r600_bytecode_cf *s, JumpType t):
+      type(t),
+      start(s)
+   {}
+
+   virtual ~StackFrame();
+
+   JumpType type;
+   r600_bytecode_cf *start;
+   vector<r600_bytecode_cf *> mid;
+
+   virtual void fixup_mid(r600_bytecode_cf *cf) = 0;
+   virtual void fixup_pop(r600_bytecode_cf *final) = 0;
+};
+
+using PStackFrame = shared_ptr<StackFrame>;
+
+struct IfFrame : public StackFrame {
+   IfFrame(r600_bytecode_cf *s);
+   void fixup_mid(r600_bytecode_cf *cf) override;
+   void fixup_pop(r600_bytecode_cf *final) override;
+};
+
+struct LoopFrame : public StackFrame {
+   LoopFrame(r600_bytecode_cf *s);
+   void fixup_mid(r600_bytecode_cf *cf) override;
+   void fixup_pop(r600_bytecode_cf *final) override;
+};
+
+struct ConditionalJumpTrackerImpl {
+   ConditionalJumpTrackerImpl();
+   stack<PStackFrame> m_jump_stack;
+   stack<PStackFrame> m_loop_stack;
+   int m_current_loop_stack_pos;
+};
+
+ConditionalJumpTrackerImpl::ConditionalJumpTrackerImpl():
+   m_current_loop_stack_pos(0)
+{
+
+}
+
+ConditionalJumpTracker::~ConditionalJumpTracker()
+{
+   delete impl;
+}
+
+ConditionalJumpTracker::ConditionalJumpTracker()
+{
+   impl = new ConditionalJumpTrackerImpl();
+}
+
+void ConditionalJumpTracker::push(r600_bytecode_cf *start, JumpType type)
+{
+   PStackFrame f;
+   switch (type) {
+   case  jt_if:
+      f.reset(new IfFrame(start));
+      break;
+   case  jt_loop:
+      f.reset(new LoopFrame(start));
+      impl->m_loop_stack.push(f);
+      break;
+   }
+   impl->m_jump_stack.push(f);
+}
+
+bool ConditionalJumpTracker::pop(r600_bytecode_cf *final, JumpType type)
+{
+   if (impl->m_jump_stack.empty())
+      return false;
+
+   auto& frame = *impl->m_jump_stack.top();
+   if (frame.type != type)
+      return false;
+
+   frame.fixup_pop(final);
+   if (frame.type == jt_loop)
+      impl->m_loop_stack.pop();
+   impl->m_jump_stack.pop();
+   return true;
+}
+
+bool ConditionalJumpTracker::add_mid(r600_bytecode_cf *source, JumpType type)
+{
+   if (impl->m_jump_stack.empty()) {
+      sfn_log << "Jump stack empty\n";
+      return false;
+   }
+
+   PStackFrame pframe;
+   if (type == jt_loop) {
+      if (impl->m_loop_stack.empty()) {
+         sfn_log << "Loop jump stack empty\n";
+         return false;
+      }
+      pframe = impl->m_loop_stack.top();
+   } else {
+      pframe = impl->m_jump_stack.top();
+   }
+
+   pframe->mid.push_back(source);
+   pframe->fixup_mid(source);
+   return true;
+}
+
+IfFrame::IfFrame(r600_bytecode_cf *s):
+   StackFrame (s, jt_if)
+{
+}
+
+StackFrame::~StackFrame()
+{
+}
+
+void IfFrame::fixup_mid(r600_bytecode_cf *source)
+{
+   /* JUMP target is ELSE */
+   start->cf_addr = source->id;
+}
+
+void IfFrame::fixup_pop(r600_bytecode_cf *final)
+{
+   /* JUMP or ELSE target is one past last CF instruction */
+   unsigned offset = final->eg_alu_extended ? 4 : 2;
+   auto src = mid.empty() ? start : mid[0];
+   src->cf_addr = final->id + offset;
+   src->pop_count = 1;
+}
+
+LoopFrame::LoopFrame(r600_bytecode_cf *s):
+   StackFrame(s, jt_loop)
+{
+}
+
+void LoopFrame::fixup_mid(UNUSED r600_bytecode_cf *mid)
+{
+}
+
+void LoopFrame::fixup_pop(r600_bytecode_cf *final)
+{
+   /* LOOP END address is past LOOP START */
+   final->cf_addr = start->id + 2;
+
+   /* LOOP START address is past LOOP END*/
+   start->cf_addr = final->id + 2;
+
+   /* BREAK and CONTINUE point at LOOP END*/
+   for (auto m : mid)
+      m->cf_addr = final->id;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h
new file mode 100644
index 000000000..76cc02a27
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_conditionaljumptracker.h
@@ -0,0 +1,69 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_CONDITIONALJUMPTRACKER_H
+#define SFN_CONDITIONALJUMPTRACKER_H
+
+#include "gallium/drivers/r600/r600_asm.h"
+
+namespace r600 {
+
+enum JumpType {
+   jt_loop,
+   jt_if
+};
+
+/**
+  Class to link the jump locations
+
+*/
+
+
+class ConditionalJumpTracker
+{
+public:
+   ConditionalJumpTracker();
+   ~ConditionalJumpTracker();
+
+   /* Mark the start of a loop or a if/else */
+
+   void push(r600_bytecode_cf *start, JumpType type);
+
+   /* Mark the end of a loop or a if/else and fixup the jump sites */
+   bool pop(r600_bytecode_cf *final, JumpType type);
+
+   /* Add middle sites to the call frame i.e. continue,
+    * break inside loops, and else in if-then-else constructs.
+    */
+   bool add_mid(r600_bytecode_cf *source, JumpType type);
+
+private:
+   struct ConditionalJumpTrackerImpl * impl;
+};
+
+}
+
+#endif // SFN_CONDITIONALJUMPTRACKER_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.cpp
new file mode 100644
index 000000000..d993d42af
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.cpp
@@ -0,0 +1,139 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/u_debug.h"
+#include "sfn_debug.h"
+
+namespace r600 {
+
+class stderr_streambuf : public std::streambuf
+{
+public:
+   stderr_streambuf();
+protected:
+   int sync();
+   int overflow(int c);
+   std::streamsize xsputn ( const char *s, std::streamsize n );
+};
+
+stderr_streambuf::stderr_streambuf()
+{
+
+}
+
+int stderr_streambuf::sync()
+{
+   fflush(stderr);
+   return 0;
+}
+
+int stderr_streambuf::overflow(int c)
+{
+   fputc(c, stderr);
+   return 0;
+}
+
+static const struct debug_named_value sfn_debug_options[] = {
+   {"instr", SfnLog::instr, "Log all consumed nir instructions"},
+   {"ir", SfnLog::r600ir, "Log created R600 IR"},
+   {"cc", SfnLog::cc, "Log R600 IR to assembly code creation"},
+   {"noerr", SfnLog::err, "Don't log shader conversion errors"},
+   {"si", SfnLog::shader_info, "Log shader info (non-zero values)"},
+   {"ts", SfnLog::test_shader, "Log shaders in tests"},
+   {"reg", SfnLog::reg, "Log register allocation and lookup"},
+   {"io", SfnLog::io, "Log shader in and output"},
+   {"ass", SfnLog::assembly, "Log IR to assembly conversion"},
+   {"flow", SfnLog::flow, "Log Flow instructions"},
+   {"merge", SfnLog::merge, "Log register merge operations"},
+   {"nomerge", SfnLog::nomerge, "Skip register merge step"},
+   {"tex", SfnLog::tex, "Log texture ops"},
+   {"trans", SfnLog::trans, "Log generic translation messages"},
+   DEBUG_NAMED_VALUE_END
+};
+
+SfnLog sfn_log;
+
+std::streamsize stderr_streambuf::xsputn ( const char *s, std::streamsize n )
+{
+   std::streamsize i = n;
+   while (i--)
+      fputc(*s++, stderr);
+   return n;
+}
+
+SfnLog::SfnLog():
+   m_active_log_flags(0),
+   m_log_mask(0),
+   m_output(new stderr_streambuf())
+{
+   m_log_mask = debug_get_flags_option("R600_NIR_DEBUG", sfn_debug_options, 0);
+   m_log_mask ^= err;
+}
+
+SfnLog& SfnLog::operator << (SfnLog::LogFlag const l)
+{
+   m_active_log_flags = l;
+   return *this;
+}
+
+SfnLog& SfnLog::operator << (UNUSED std::ostream & (*f)(std::ostream&))
+{
+   if (m_active_log_flags & m_log_mask)
+      m_output << f;
+   return *this;
+}
+
+SfnLog& SfnLog::operator << (nir_shader& sh)
+{
+   if (m_active_log_flags & m_log_mask)
+      nir_print_shader(&sh, stderr);
+   return *this;
+}
+
+SfnLog& SfnLog::operator << (nir_instr &instr)
+{
+   if (m_active_log_flags & m_log_mask)
+      nir_print_instr(&instr, stderr);
+   return *this;
+}
+
+SfnTrace::SfnTrace(SfnLog::LogFlag flag, const char *msg):
+   m_flag(flag),
+   m_msg(msg)
+{
+   sfn_log << m_flag << std::string(" ", 2 * m_indention++)
+           << "BEGIN: " << m_msg << "\n";
+}
+
+SfnTrace::~SfnTrace()
+{
+   sfn_log << m_flag << std::string(" ", 2 * m_indention--)
+           << "END:   " << m_msg << "\n";
+}
+
+int SfnTrace::m_indention = 0;
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.h
new file mode 100644
index 000000000..372379c66
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_debug.h
@@ -0,0 +1,121 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_STDERR_STREAMLOG_H
+#define SFN_STDERR_STREAMLOG_H
+
+
+#include <streambuf>
+#include <ostream>
+#include <fstream>
+#include "compiler/nir/nir.h"
+
+namespace r600 {
+/* Implement some logging for shader-from-nir
+
+*/
+
+class SfnLog {
+public:
+   enum LogFlag {
+      instr = 1 << 0,
+      r600ir = 1 << 1,
+      cc     = 1 << 2,
+      err    = 1 << 3,
+      shader_info = 1 << 4,
+      test_shader = 1 << 5,
+      reg = 1 << 6,
+      io = 1 << 7,
+      assembly = 1 << 8,
+      flow = 1 << 9,
+      merge = 1 << 10,
+      tex = 1 << 11,
+      trans = 1 << 12,
+      all = (1 << 13) - 1,
+      nomerge = 1 << 16,
+   };
+
+   SfnLog();
+
+   /** a special handling to set the output level "inline"
+       \param l the level of the following messages
+     */
+   SfnLog& operator << (LogFlag const l);
+
+   /* general output routine; output is only given, if the log flags and the
+    * currently active log mask overlap
+      \returns a reference to this object
+   */
+   template <class T>
+   SfnLog& operator << (const T&  text)
+   {
+      if (m_active_log_flags & m_log_mask)
+         m_output << text;
+
+      return *this;
+   }
+
+   /* A funny construct to enable std::endl to work on this stream
+      idea of Dave Brondsema:
+      http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8567
+    */
+   SfnLog& operator << (std::ostream & (*f)(std::ostream&));
+
+   SfnLog& operator << (nir_shader &sh);
+
+   SfnLog& operator << (nir_instr& instr);
+
+   int has_debug_flag(uint64_t flag) {
+      return (m_log_mask & flag) == flag;
+   }
+
+private:
+   uint64_t m_active_log_flags;
+   uint64_t m_log_mask;
+   std::ostream m_output;
+};
+
+class SfnTrace {
+public:
+   SfnTrace(SfnLog::LogFlag flag, const char *msg);
+   ~SfnTrace();
+private:
+   SfnLog::LogFlag m_flag;
+   const char *m_msg;
+   static int m_indention;
+};
+
+
+#ifndef NDEBUG
+#define SFN_TRACE_FUNC(LEVEL, MSG) SfnTrace __trace(LEVEL, MSG)
+#else
+#define SFN_TRACE_FUNC(LEVEL, MSG)
+#endif
+
+extern SfnLog sfn_log;
+
+}
+#endif // SFN_STDERR_STREAMBUF_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_defines.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_defines.h
new file mode 100644
index 000000000..31a10ae2f
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_defines.h
@@ -0,0 +1,318 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef sfn_defines_h
+#define sfn_defines_h
+
+#include "../r600_isa.h"
+#include "amd_family.h"
+namespace r600 {
+
+
+enum EGWSOpCode {
+        cf_sema_v = 0,
+        cf_sema_p = 1,
+        cf_gws_barrier = 2,
+        cf_gws_init = 3,
+};
+
+/* CF ALU instructions [29:26], highest bit always set. */
+enum ECFAluOpCode {
+        cf_alu_undefined = 0,
+        cf_alu = CF_OP_ALU,
+        cf_alu_push_before = CF_OP_ALU_PUSH_BEFORE,
+        cf_alu_pop_after = CF_OP_ALU_POP_AFTER,
+        cf_alu_pop2_after = CF_OP_ALU_POP2_AFTER,
+        cf_alu_extended = CF_OP_ALU_EXT,
+        cf_alu_continue = CF_OP_ALU_CONTINUE,
+        cf_alu_break = CF_OP_ALU_BREAK,
+        cf_alu_else_after = CF_OP_ALU_ELSE_AFTER,
+};
+
+enum ECFAluOpCodeEG {
+        eg_cf_alu_undefined = 0,
+        eg_cf_alu = 8,
+        eg_cf_alu_push_before = 9,
+        eg_cf_alu_pop_after = 10,
+        eg_cf_alu_pop2_after = 11,
+        eg_cf_alu_extended = 12,
+        eg_cf_alu_continue = 13,
+        eg_cf_alu_break = 14,
+        eg_cf_alu_else_after = 15,
+};
+
+
+enum ECFOpCode {
+        cf_nop = CF_OP_NOP,
+        cf_tc = CF_OP_TEX,
+        cf_vc = CF_OP_VTX,
+        cf_gds = CF_OP_GDS,
+        cf_loop_start = CF_OP_LOOP_START,
+        cf_loop_end = CF_OP_LOOP_END,
+        cf_loop_start_dx10 = CF_OP_LOOP_START_DX10,
+        cf_loop_start_no_al = CF_OP_LOOP_START_NO_AL,
+        cf_loop_continue = CF_OP_LOOP_CONTINUE,
+        cf_loop_break = CF_OP_LOOP_BREAK,
+        cf_jump = CF_OP_JUMP,
+        cf_push = CF_OP_PUSH,
+        cf_else = CF_OP_ELSE,
+        cf_pop = CF_OP_POP,
+        /* 15 - 17 reserved */
+        cf_call = CF_OP_CALL,
+        cf_call_fs = CF_OP_CALL_FS,
+        cf_return = CF_OP_RET,
+        cf_emit_vertex = CF_OP_EMIT_VERTEX,
+        cf_emit_cut_vertex = CF_OP_EMIT_CUT_VERTEX,
+        cf_cut_vertex = CF_OP_CUT_VERTEX,
+        cf_kill = CF_OP_KILL,
+        /* 25 reserved */
+        cf_wait_ack = CF_OP_WAIT_ACK,
+        cf_tc_ack = CF_OP_TEX_ACK,
+        cf_vc_ack = CF_OP_VTX_ACK,
+        cf_jump_table = CF_OP_JUMPTABLE,
+        cf_global_wave_sync = CF_OP_WAVE_SYNC,
+        cf_halt = CF_OP_HALT,
+        /* gap 32-63*/
+        cf_mem_stream0_buf0 = CF_OP_MEM_STREAM0_BUF0,
+        cf_mem_stream0_buf1 = CF_OP_MEM_STREAM0_BUF1,
+        cf_mem_stream0_buf2 = CF_OP_MEM_STREAM0_BUF2,
+        cf_mem_stream0_buf3 = CF_OP_MEM_STREAM0_BUF3,
+
+        cf_mem_stream1_buf0 = CF_OP_MEM_STREAM1_BUF0,
+        cf_mem_stream1_buf1 = CF_OP_MEM_STREAM1_BUF1,
+        cf_mem_stream1_buf2 = CF_OP_MEM_STREAM1_BUF2,
+        cf_mem_stream1_buf3 = CF_OP_MEM_STREAM1_BUF3,
+
+        cf_mem_stream2_buf0 = CF_OP_MEM_STREAM2_BUF0,
+        cf_mem_stream2_buf1 = CF_OP_MEM_STREAM2_BUF1,
+        cf_mem_stream2_buf2 = CF_OP_MEM_STREAM2_BUF2,
+        cf_mem_stream2_buf3 = CF_OP_MEM_STREAM2_BUF3,
+
+        cf_mem_stream3_buf0 = CF_OP_MEM_STREAM3_BUF0,
+        cf_mem_stream3_buf1 = CF_OP_MEM_STREAM3_BUF1,
+        cf_mem_stream3_buf2 = CF_OP_MEM_STREAM3_BUF2,
+        cf_mem_stream3_buf3 = CF_OP_MEM_STREAM3_BUF3,
+
+        cf_mem_write_scratch = CF_OP_MEM_SCRATCH ,
+        /* reserved 81 */
+        cf_mem_ring = CF_OP_MEM_RING,
+        cf_export = CF_OP_EXPORT,
+        cf_export_done = CF_OP_EXPORT_DONE,
+        cf_mem_export = CF_OP_MEM_EXPORT,
+        cf_mem_rat = CF_OP_MEM_RAT,
+        cf_mem_rat_cacheless = CF_OP_MEM_RAT_NOCACHE,
+
+        cf_mem_ring1 = CF_OP_MEM_RING1,
+        cf_mem_ring2 = CF_OP_MEM_RING2,
+        cf_mem_ring3 = CF_OP_MEM_RING3,
+        cf_mem_export_combined = CF_OP_MEM_MEM_COMBINED,
+        cf_mem_rat_combined_cacheless = CF_OP_MEM_RAT_COMBINED_NOCACHE
+
+};
+
+enum ECFOpCodeEG {
+        eg_cf_nop = 0,
+        eg_cf_tc = 1,
+        eg_cf_vc = 2,
+        eg_cf_gds = 3,
+        eg_cf_loop_start = 4,
+        eg_cf_loop_end = 5,
+        eg_cf_loop_start_dx10 = 6,
+        eg_cf_loop_start_no_al = 7,
+        eg_cf_loop_continue = 8,
+        eg_cf_loop_break = 9,
+        eg_cf_jump = 10,
+        eg_cf_push = 11,
+        eg_cf_else = 13,
+        eg_cf_pop = 14,
+        /* 15 - 17 reserved */
+        eg_cf_call = 18,
+        eg_cf_call_fs,
+        eg_cf_return,
+        eg_cf_emit_vertex,
+        eg_cf_emit_cut_vertex,
+        eg_cf_cut_vertex,
+        eg_cf_kill,
+        /* 25 reserved */
+        eg_cf_wait_ack = 26,
+        eg_cf_tc_ack,
+        eg_cf_vc_ack,
+        eg_cf_jump_table,
+        eg_cf_global_wave_sync,
+        eg_cf_halt,
+        /* gap 32-63*/
+        eg_cf_mem_stream0_buf0 = 64,
+        eg_cf_mem_stream0_buf1,
+        eg_cf_mem_stream0_buf2,
+        eg_cf_mem_stream0_buf3,
+
+        eg_cf_mem_stream1_buf0,
+        eg_cf_mem_stream1_buf1,
+        eg_cf_mem_stream1_buf2,
+        eg_cf_mem_stream1_buf3,
+
+        eg_cf_mem_stream2_buf0,
+        eg_cf_mem_stream2_buf1,
+        eg_cf_mem_stream2_buf2,
+        eg_cf_mem_stream2_buf3,
+
+        eg_cf_mem_stream3_buf0,
+        eg_cf_mem_stream3_buf1,
+        eg_cf_mem_stream3_buf2,
+        eg_cf_mem_stream3_buf3,
+
+        eg_cf_mem_write_scratch,
+        /* reserved 81 */
+        eg_cf_mem_ring = 82,
+        eg_cf_export,
+        eg_cf_export_done,
+        eg_cf_mem_export,
+        eg_cf_mem_rat,
+        eg_cf_mem_rat_cacheless,
+
+        eg_cf_mem_ring1,
+        eg_cf_mem_ring2,
+        eg_cf_mem_ring3,
+        eg_cf_mem_export_combined,
+        eg_cf_mem_rat_combined_cacheless
+};
+
+
+enum EVFetchInstr {
+   vc_fetch = FETCH_OP_VFETCH,
+   vc_semantic = FETCH_OP_SEMFETCH,
+   vc_get_buf_resinfo = FETCH_OP_GET_BUFFER_RESINFO,
+   vc_read_scratch = FETCH_OP_READ_SCRATCH,
+   vc_unknown
+};
+
+enum EVFetchType {
+   vertex_data = 0,
+   instance_data = 1,
+   no_index_offset = 2
+};
+
+enum EVTXDataFormat {
+   fmt_invalid = 0,
+   fmt_8 = 1,
+   fmt_4_4 = 2,
+   fmt_3_3_2 = 3,
+   fmt_reserved_4 = 4,
+   fmt_16 = 5,
+   fmt_16_float = 6,
+   fmt_8_8 = 7,
+   fmt_5_6_5 = 8,
+   fmt_6_5_5 = 9,
+   fmt_1_5_5_5 = 10,
+   fmt_4_4_4_4 = 11,
+   fmt_5_5_5_1 = 12,
+   fmt_32 = 13,
+   fmt_32_float = 14,
+   fmt_16_16 = 15,
+   fmt_16_16_float = 16,
+   fmt_8_24 = 17,
+   fmt_8_24_float = 18,
+   fmt_24_8 = 19,
+   fmt_24_8_float = 20,
+   fmt_10_11_11 = 21,
+   fmt_10_11_11_float = 22,
+   fmt_11_11_10 = 23,
+   fmt_11_11_10_float = 24,
+   fmt_2_10_10_10 = 25,
+   fmt_8_8_8_8 = 26,
+   fmt_10_10_10_2 = 27,
+   fmt_x24_8_32_float = 28,
+   fmt_32_32 = 29,
+   fmt_32_32_float = 30,
+   fmt_16_16_16_16 = 31,
+   fmt_16_16_16_16_float = 32,
+   fmt_reserved_33 = 33,
+   fmt_32_32_32_32 = 34,
+   fmt_32_32_32_32_float = 35,
+   fmt_reserved_36 = 36,
+   fmt_1 = 37,
+   fmt_1_reversed = 38,
+   fmt_gb_gr = 39,
+   fmt_bg_rg = 40,
+   fmt_32_as_8 = 41,
+   fmt_32_as_8_8 = 42,
+   fmt_5_9_9_9_sharedexp = 43,
+   fmt_8_8_8 = 44,
+   fmt_16_16_16 = 45,
+   fmt_16_16_16_float = 46,
+   fmt_32_32_32 = 47,
+   fmt_32_32_32_float = 48,
+   fmt_bc1 = 49,
+   fmt_bc2 = 50,
+   fmt_bc3 = 51,
+   fmt_bc4 = 52,
+   fmt_bc5 = 53,
+   fmt_apc0 = 54,
+   fmt_apc1 = 55,
+   fmt_apc2 = 56,
+   fmt_apc3 = 57,
+   fmt_apc4 = 58,
+   fmt_apc5 = 59,
+   fmt_apc6 = 60,
+   fmt_apc7 = 61,
+   fmt_ctx1 = 62,
+   fmt_reserved_63 = 63
+};
+
+enum EVFetchNumFormat {
+   vtx_nf_norm = 0,
+   vtx_nf_int = 1,
+   vtx_nf_scaled = 2
+};
+
+enum EVFetchEndianSwap {
+   vtx_es_none = 0,
+   vtx_es_8in16 = 1,
+   vtx_es_8in32 = 2
+};
+
+enum EVFetchFlagShift {
+   vtx_fetch_whole_quad,
+   vtx_use_const_field,
+   vtx_format_comp_signed,
+   vtx_srf_mode,
+   vtx_buf_no_stride,
+   vtx_alt_const,
+   vtx_use_tc,
+   vtx_vpm,
+   vtx_unknown
+};
+
+enum EBufferIndexMode {
+   bim_none,
+   bim_zero,
+   bim_one,
+   bim_invalid
+};
+
+}
+
+#endif // DEFINES_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_docu.txt b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_docu.txt
new file mode 100644
index 000000000..97a9c3658
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_docu.txt
@@ -0,0 +1,45 @@
+# R600 shader from NIR
+
+This code is an attempt to implement a NIR backend for r600.
+
+## State
+
+Supported hardware: Evergreen and NI (tested on CEDAR and BARTS)
+
+Thanks to soft fp64 the OpenGL version is now 4.5
+
+sb has been enabled for nir to be able to run some more demanding work loads. The aim is
+still to get rid of it.
+
+
+piglits gpu passes mostly like with TGSI, there are some fixes but also a few regressions.
+
+CTS gles
+ - 2 passes like with TGSI
+ - 3 no regressions, a few fixes compared to TGSI
+ - 31
+    * a few fixes with interpolation specifiers
+    * synchronization has some unstable tests, this might be because global synchronization is missing (in both)
+
+GL CTS:
+  * a few regressions and a hang with KHR-GL43.compute_shader.shared-max
+
+piglit:
+  * spilling arrays is broken on Barts (but it works on Cedar)
+  * a few tests fail because the register limit is exhausted, and needlessly so, because
+    with better RA it would work
+
+## Needed optimizations:
+
+  - Register allocator and scheduler (Could the sb allocator and scheduler
+    be ported?)
+
+  - peepholes:
+    - compare + set predicate
+
+  - copy propagation:
+    - Moves from inputs are usually not required, they could be forwarded
+    - texture operations often move additional parameters in extra registers
+      but they are actually needed in the same registers they come from and
+      could just be swizzled into the right place
+      (lower in NIR like it is done in e.g. in ETNAVIV)
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp
new file mode 100644
index 000000000..44e43c1b5
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp
@@ -0,0 +1,985 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "sfn_emitaluinstruction.h"
+#include "sfn_debug.h"
+
+#include "gallium/drivers/r600/r600_shader.h"
+
+namespace r600 {
+
+using std::vector;
+
+EmitAluInstruction::EmitAluInstruction(ShaderFromNirProcessor& processor):
+   EmitInstruction (processor)
+{
+
+}
+
+bool EmitAluInstruction::do_emit(nir_instr* ir)
+{
+   const nir_alu_instr& instr = *nir_instr_as_alu(ir);
+
+   r600::sfn_log << SfnLog::instr << "emit '"
+                 << *ir
+                 << " bitsize: " << static_cast<int>(instr.dest.dest.ssa.bit_size)
+                 << "' (" << __func__ << ")\n";
+
+   preload_src(instr);
+
+   switch (instr.op) {
+    /* These are in the ALU instruction list, but they should be texture instructions */
+   case nir_op_b2b1: return emit_mov(instr);
+   case nir_op_b2b32: return emit_mov(instr);
+   case nir_op_b2f32: return emit_alu_b2f(instr);
+   case nir_op_b2i32: return emit_b2i32(instr);
+   case nir_op_b32all_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
+   case nir_op_b32all_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
+   case nir_op_b32all_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
+   case nir_op_b32all_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
+   case nir_op_b32all_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
+   case nir_op_b32all_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
+   case nir_op_b32any_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
+   case nir_op_b32any_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
+   case nir_op_b32any_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
+   case nir_op_b32any_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
+   case nir_op_b32any_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
+   case nir_op_b32any_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
+   case nir_op_b32csel: return emit_alu_op3(instr, op3_cnde_int,  {0, 2, 1});
+   case nir_op_ball_fequal2: return emit_any_all_fcomp2(instr, op2_sete_dx10, true);
+   case nir_op_ball_fequal3: return emit_any_all_fcomp(instr, op2_sete, 3, true);
+   case nir_op_ball_fequal4: return emit_any_all_fcomp(instr, op2_sete, 4, true);
+   case nir_op_ball_iequal2: return emit_any_all_icomp(instr, op2_sete_int, 2, true);
+   case nir_op_ball_iequal3: return emit_any_all_icomp(instr, op2_sete_int, 3, true);
+   case nir_op_ball_iequal4: return emit_any_all_icomp(instr, op2_sete_int, 4, true);
+   case nir_op_bany_fnequal2: return emit_any_all_fcomp2(instr, op2_setne_dx10, false);
+   case nir_op_bany_fnequal3: return emit_any_all_fcomp(instr, op2_setne, 3, false);
+   case nir_op_bany_fnequal4: return emit_any_all_fcomp(instr, op2_setne, 4, false);
+   case nir_op_bany_inequal2: return emit_any_all_icomp(instr, op2_setne_int, 2, false);
+   case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
+   case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
+   case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int,  {0, 2, 1});
+   case nir_op_bfm: return emit_alu_op2_int(instr, op2_bfm_int);
+   case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int);
+
+   case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int);
+   case nir_op_bitfield_select: return emit_alu_op3(instr, op3_bfi_int);
+   case nir_op_cube_r600: return emit_cube(instr);
+   case nir_op_f2b1: return emit_alu_i2orf2_b1(instr, op2_setne_dx10);
+   case nir_op_f2b32: return emit_alu_f2b32(instr);
+   case nir_op_f2i32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_int);
+   case nir_op_f2u32: return emit_alu_f2i32_or_u32(instr, op1_flt_to_uint);
+   case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs});
+   case nir_op_fadd: return emit_alu_op2(instr, op2_add);
+   case nir_op_fceil: return emit_alu_op1(instr, op1_ceil);
+   case nir_op_fcos_r600: return emit_alu_trans_op1(instr, op1_cos);
+   case nir_op_fcsel: return emit_alu_op3(instr, op3_cnde, {0, 2, 1});
+   case nir_op_fcsel_ge: return emit_alu_op3(instr, op3_cndge, {0, 1, 2});
+   case nir_op_fcsel_gt: return emit_alu_op3(instr, op3_cndgt, {0, 1, 2});
+
+    /* These are in the ALU instruction list, but they should be texture instructions */
+   case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
+   case nir_op_fddx_coarse: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
+   case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true);
+   case nir_op_fddy: return emit_tex_fdd(instr,TexInstruction::get_gradient_v, false);
+   case nir_op_fddy_coarse:
+   case nir_op_fddy_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_v,  true);
+   case nir_op_fdot2: return emit_dot(instr, 2);
+   case nir_op_fdot3: return emit_dot(instr, 3);
+   case nir_op_fdot4: return emit_dot(instr, 4);
+   case nir_op_fdph:  return emit_fdph(instr);
+   case nir_op_feq32: return emit_alu_op2(instr, op2_sete_dx10);
+   case nir_op_feq: return emit_alu_op2(instr, op2_sete_dx10);
+   case nir_op_fexp2: return emit_alu_trans_op1(instr, op1_exp_ieee);
+   case nir_op_ffloor: return emit_alu_op1(instr, op1_floor);
+   case nir_op_ffma: return emit_alu_op3(instr, op3_muladd_ieee);
+   case nir_op_ffract: return emit_alu_op1(instr, op1_fract);
+   case nir_op_fge32: return emit_alu_op2(instr, op2_setge_dx10);
+   case nir_op_fge: return emit_alu_op2(instr, op2_setge_dx10);
+   case nir_op_find_lsb: return emit_alu_op1(instr, op1_ffbl_int);
+   case nir_op_flog2: return emit_alu_trans_op1(instr, op1_log_clamped);
+   case nir_op_flt32: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
+   case nir_op_flt: return emit_alu_op2(instr, op2_setgt_dx10, op2_opt_reverse);
+   case nir_op_fmax: return emit_alu_op2(instr, op2_max_dx10);
+   case nir_op_fmin: return emit_alu_op2(instr, op2_min_dx10);
+   case nir_op_fmul: return emit_alu_op2(instr, op2_mul_ieee);
+   case nir_op_fneg: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_neg});
+   case nir_op_fneu32: return emit_alu_op2(instr, op2_setne_dx10);
+   case nir_op_fneu: return emit_alu_op2(instr, op2_setne_dx10);
+   case nir_op_frcp: return emit_alu_trans_op1(instr, op1_recip_ieee);
+   case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne);
+   case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1);
+   case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp});
+   case nir_op_fsin_r600: return emit_alu_trans_op1(instr, op1_sin);
+   case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee);
+   case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1);
+   case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc);
+   case nir_op_i2b1: return emit_alu_i2orf2_b1(instr, op2_setne_int);
+   case nir_op_i2b32: return emit_alu_i2orf2_b1(instr, op2_setne_int);
+   case nir_op_i2f32: return emit_alu_trans_op1(instr, op1_int_to_flt);
+   case nir_op_iadd: return emit_alu_op2_int(instr, op2_add_int);
+   case nir_op_iand: return emit_alu_op2_int(instr, op2_and_int);
+   case nir_op_ibfe: return emit_alu_op3(instr, op3_bfe_int);
+   case nir_op_i32csel_ge: return emit_alu_op3(instr, op3_cndge_int,  {0, 1, 2});
+   case nir_op_i32csel_gt: return emit_alu_op3(instr, op3_cndgt_int,  {0, 1, 2});
+   case nir_op_ieq32: return emit_alu_op2_int(instr, op2_sete_int);
+   case nir_op_ieq: return emit_alu_op2_int(instr, op2_sete_int);
+   case nir_op_ifind_msb_rev: return emit_alu_op1(instr, op1_ffbh_int);
+   case nir_op_ige32: return emit_alu_op2_int(instr, op2_setge_int);
+   case nir_op_ige: return emit_alu_op2_int(instr, op2_setge_int);
+   case nir_op_ilt32: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
+   case nir_op_ilt: return emit_alu_op2_int(instr, op2_setgt_int, op2_opt_reverse);
+   case nir_op_imax: return emit_alu_op2_int(instr, op2_max_int);
+   case nir_op_imin: return emit_alu_op2_int(instr, op2_min_int);
+   case nir_op_imul: return emit_alu_trans_op2(instr, op2_mullo_int);
+   case nir_op_imul_high: return emit_alu_trans_op2(instr, op2_mulhi_int);
+   case nir_op_ine32: return emit_alu_op2_int(instr, op2_setne_int);
+   case nir_op_ine: return emit_alu_op2_int(instr, op2_setne_int);
+   case nir_op_ineg: return emit_alu_ineg(instr);
+   case nir_op_inot: return emit_alu_op1(instr, op1_not_int);
+   case nir_op_ior: return emit_alu_op2_int(instr, op2_or_int);
+   case nir_op_ishl: return emit_alu_op2_int(instr, op2_lshl_int);
+   case nir_op_ishr: return emit_alu_op2_int(instr, op2_ashr_int);
+   case nir_op_isub: return emit_alu_op2_int(instr, op2_sub_int);
+   case nir_op_ixor: return emit_alu_op2_int(instr, op2_xor_int);
+   case nir_op_mov:return emit_mov(instr);
+   case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(instr);
+   case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(instr);
+   case nir_op_slt: return emit_alu_op2(instr, op2_setgt, op2_opt_reverse);
+   case nir_op_sge: return emit_alu_op2(instr, op2_setge);
+   case nir_op_u2f32: return emit_alu_trans_op1(instr, op1_uint_to_flt);
+   case nir_op_ubfe: return emit_alu_op3(instr, op3_bfe_uint);
+   case nir_op_ufind_msb_rev: return emit_alu_op1(instr, op1_ffbh_uint);
+   case nir_op_uge32: return emit_alu_op2_int(instr, op2_setge_uint);
+   case nir_op_uge: return emit_alu_op2_int(instr, op2_setge_uint);
+   case nir_op_ult32: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
+   case nir_op_ult: return emit_alu_op2_int(instr, op2_setgt_uint, op2_opt_reverse);
+   case nir_op_umad24: return emit_alu_op3(instr, op3_muladd_uint24,  {0, 1, 2});
+   case nir_op_umax: return emit_alu_op2_int(instr, op2_max_uint);
+   case nir_op_umin: return emit_alu_op2_int(instr, op2_min_uint);
+   case nir_op_umul24: return emit_alu_op2(instr, op2_mul_uint24);
+   case nir_op_umul_high: return emit_alu_trans_op2(instr, op2_mulhi_uint);
+   case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(instr, 0);
+   case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(instr, 1);
+   case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(instr);
+   case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(instr);
+   case nir_op_ushr: return emit_alu_op2_int(instr, op2_lshr_int);
+   case nir_op_vec2: return emit_create_vec(instr, 2);
+   case nir_op_vec3: return emit_create_vec(instr, 3);
+   case nir_op_vec4: return emit_create_vec(instr, 4);
+   default:
+      return false;
+   }
+}
+
+void EmitAluInstruction::preload_src(const nir_alu_instr& instr)
+{
+   const nir_op_info *op_info = &nir_op_infos[instr.op];
+   assert(op_info->num_inputs <= 4);
+
+   unsigned nsrc_comp = num_src_comp(instr);
+   sfn_log << SfnLog::reg << "Preload:\n";
+   for (unsigned i = 0; i < op_info->num_inputs; ++i) {
+      for (unsigned c = 0; c < nsrc_comp; ++c) {
+         m_src[i][c] = from_nir(instr.src[i], c);
+         sfn_log << SfnLog::reg << " " << *m_src[i][c];
+
+      }
+      sfn_log << SfnLog::reg << "\n";
+   }
+   if (instr.op == nir_op_fdph) {
+      m_src[1][3] = from_nir(instr.src[1], 3);
+      sfn_log << SfnLog::reg << " extra:" << *m_src[1][3] << "\n";
+   }
+
+   split_constants(instr, nsrc_comp);
+}
+
+unsigned EmitAluInstruction::num_src_comp(const nir_alu_instr& instr)
+{
+   switch (instr.op) {
+   case nir_op_fdot2:
+   case nir_op_bany_inequal2:
+   case nir_op_ball_iequal2:
+   case nir_op_bany_fnequal2:
+   case nir_op_ball_fequal2:
+   case nir_op_b32any_inequal2:
+   case nir_op_b32all_iequal2:
+   case nir_op_b32any_fnequal2:
+   case nir_op_b32all_fequal2:
+   case nir_op_unpack_64_2x32_split_y:
+      return 2;
+
+   case nir_op_fdot3:
+   case nir_op_bany_inequal3:
+   case nir_op_ball_iequal3:
+   case nir_op_bany_fnequal3:
+   case nir_op_ball_fequal3:
+   case nir_op_b32any_inequal3:
+   case nir_op_b32all_iequal3:
+   case nir_op_b32any_fnequal3:
+   case nir_op_b32all_fequal3:
+   case nir_op_cube_r600:
+      return 3;
+
+   case nir_op_fdot4:
+   case nir_op_fdph:
+   case nir_op_bany_inequal4:
+   case nir_op_ball_iequal4:
+   case nir_op_bany_fnequal4:
+   case nir_op_ball_fequal4:
+   case nir_op_b32any_inequal4:
+   case nir_op_b32all_iequal4:
+   case nir_op_b32any_fnequal4:
+   case nir_op_b32all_fequal4:
+      return 4;
+
+   case nir_op_vec2:
+   case nir_op_vec3:
+   case nir_op_vec4:
+      return 1;
+
+   default:
+      return nir_dest_num_components(instr.dest.dest);
+
+   }
+}
+
+bool EmitAluInstruction::emit_cube(const nir_alu_instr& instr)
+{
+   AluInstruction *ir = nullptr;
+   const uint16_t src0_chan[4] = {2, 2, 0, 1};
+   const uint16_t src1_chan[4] = {1, 0, 2, 2};
+
+   for (int i = 0; i < 4; ++i)  {
+      ir = new AluInstruction(op2_cube, from_nir(instr.dest, i),
+                              from_nir(instr.src[0], src0_chan[i]),
+                              from_nir(instr.src[0], src1_chan[i]), {alu_write});
+      emit_instruction(ir);
+   }
+   ir->set_flag(alu_last_instr);
+   return true;
+}
+
+void EmitAluInstruction::split_constants(const nir_alu_instr& instr, unsigned nsrc_comp)
+{
+    const nir_op_info *op_info = &nir_op_infos[instr.op];
+    if (op_info->num_inputs < 2)
+       return;
+
+    int nconst = 0;
+    std::array<const UniformValue *,4> c;
+    std::array<int,4> idx;
+    for (unsigned i = 0; i < op_info->num_inputs; ++i) {
+       PValue& src = m_src[i][0];
+       assert(src);
+       sfn_log << SfnLog::reg << "Split test " << *src;
+
+       if (src->type() == Value::kconst) {
+          c[nconst] = static_cast<const UniformValue *>(src.get());
+          idx[nconst++] = i;
+          sfn_log << SfnLog::reg << " is constant " << i;
+       }
+       sfn_log << SfnLog::reg << "\n";
+    }
+
+    if (nconst < 2)
+       return;
+
+    unsigned sel = c[0]->sel();
+    unsigned kcache =  c[0]->kcache_bank();
+    sfn_log << SfnLog::reg << "split " << nconst << " constants, sel[0] = " << sel; ;
+
+    for (int i = 1; i < nconst; ++i) {
+       sfn_log << "sel[" << i << "] = " <<  c[i]->sel() << "\n";
+       if (c[i]->sel() != sel || c[i]->kcache_bank() != kcache) {
+          AluInstruction *ir = nullptr;
+          auto v = get_temp_vec4();
+          for (unsigned k = 0; k < nsrc_comp; ++k) {
+             ir = new AluInstruction(op1_mov, v[k], m_src[idx[i]][k], {write});
+             emit_instruction(ir);
+             m_src[idx[i]][k] = v[k];
+          }
+          make_last(ir);
+       }
+    }
+}
+
+bool EmitAluInstruction::emit_alu_inot(const nir_alu_instr& instr)
+{
+   if (instr.src[0].negate || instr.src[0].abs) {
+      std::cerr << "source modifiers not supported with int ops\n";
+      return false;
+   }
+
+   AluInstruction *ir = nullptr;
+   for (int i = 0; i < 4 ; ++i) {
+      if (instr.dest.write_mask & (1 << i)){
+         ir = new AluInstruction(op1_not_int, from_nir(instr.dest, i),
+                                 m_src[0][i], write);
+         emit_instruction(ir);
+      }
+   }
+   make_last(ir);
+   return true;
+}
+
+bool EmitAluInstruction::emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode,
+                                      const AluOpFlags& flags)
+{
+   AluInstruction *ir = nullptr;
+   for (int i = 0; i < 4 ; ++i) {
+      if (instr.dest.write_mask & (1 << i)){
+         ir = new AluInstruction(opcode, from_nir(instr.dest, i),
+                                 m_src[0][i], write);
+
+         if (flags.test(alu_src0_abs) || instr.src[0].abs)
+            ir->set_flag(alu_src0_abs);
+
+         if (instr.src[0].negate ^ flags.test(alu_src0_neg))
+            ir->set_flag(alu_src0_neg);
+
+         if (flags.test(alu_dst_clamp) || instr.dest.saturate)
+             ir->set_flag(alu_dst_clamp);
+
+         emit_instruction(ir);
+      }
+   }
+   make_last(ir);
+
+   return true;
+}
+
+bool EmitAluInstruction::emit_mov(const nir_alu_instr& instr)
+{
+   /* If the op is a plain move beween SSA values we can just forward
+    * the register reference to the original register */
+   if (instr.dest.dest.is_ssa && instr.src[0].src.is_ssa &&
+       !instr.src[0].abs && !instr.src[0].negate  && !instr.dest.saturate) {
+      bool result = true;
+      for (int i = 0; i < 4 ; ++i) {
+         if (instr.dest.write_mask & (1 << i)){
+            result &= inject_register(instr.dest.dest.ssa.index, i,
+                                      m_src[0][i], true);
+         }
+      }
+      return result;
+   } else {
+      return emit_alu_op1(instr, op1_mov);
+   }
+}
+
+bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode,
+                                            bool absolute)
+{
+   AluInstruction *ir = nullptr;
+   std::set<int> src_idx;
+
+   if (get_chip_class() == CAYMAN) {
+      int last_slot = (instr.dest.write_mask & 0x8) ? 4 : 3;
+      for (int i = 0; i < last_slot; ++i) {
+         ir = new AluInstruction(opcode, from_nir(instr.dest, i),
+                                 m_src[0][0], instr.dest.write_mask & (1 << i) ? write : empty);
+         if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
+         if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
+         if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+
+         if (i == (last_slot - 1)) ir->set_flag(alu_last_instr);
+
+         emit_instruction(ir);
+      }
+   } else {
+      for (int i = 0; i < 4 ; ++i) {
+         if (instr.dest.write_mask & (1 << i)){
+            ir = new AluInstruction(opcode, from_nir(instr.dest, i),
+                                    m_src[0][i], last_write);
+            if (absolute || instr.src[0].abs) ir->set_flag(alu_src0_abs);
+            if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
+            if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+            emit_instruction(ir);
+         }
+      }
+   }
+   return true;
+}
+
+bool EmitAluInstruction::emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op)
+{
+   AluInstruction *ir = nullptr;
+   std::array<PValue, 4> v;
+
+   for (int i = 0; i < 4; ++i) {
+      if (!(instr.dest.write_mask & (1 << i)))
+         continue;
+      v[i] = from_nir(instr.dest, i);
+      ir = new AluInstruction(op1_trunc, v[i], m_src[0][i], {alu_write});
+      if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
+      if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
+      emit_instruction(ir);
+   }
+   make_last(ir);
+
+   for (int i = 0; i < 4; ++i) {
+      if (!(instr.dest.write_mask & (1 << i)))
+         continue;
+      ir = new AluInstruction(op, v[i], v[i], {alu_write});
+      emit_instruction(ir);
+      if (op == op1_flt_to_uint)
+         make_last(ir);
+   }
+   make_last(ir);
+
+   return true;
+}
+
+bool EmitAluInstruction::emit_alu_f2b32(const nir_alu_instr& instr)
+{
+   AluInstruction *ir = nullptr;
+   for (int i = 0; i < 4 ; ++i) {
+      if (instr.dest.write_mask & (1 << i)){
+         ir = new AluInstruction(op2_setne_dx10, from_nir(instr.dest, i),
+                                 m_src[0][i], literal(0.0f), write);
+         emit_instruction(ir);
+      }
+   }
+   make_last(ir);
+   return true;
+}
+
+bool EmitAluInstruction::emit_b2i32(const nir_alu_instr& instr)
+{
+   AluInstruction *ir = nullptr;
+   for (int i = 0; i < 4 ; ++i) {
+      if (!(instr.dest.write_mask & (1 << i)))
+         continue;
+
+      ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
+                              m_src[0][i], Value::one_i, write);
+     emit_instruction(ir);
+   }
+   make_last(ir);
+
+   return true;
+}
+
+bool EmitAluInstruction::emit_pack_64_2x32_split(const nir_alu_instr& instr)
+{
+   AluInstruction *ir = nullptr;
+   for (unsigned i = 0; i < 2; ++i) {
+      if (!(instr.dest.write_mask & (1 << i)))
+         continue;
+     ir = new AluInstruction(op1_mov, from_nir(instr.dest, i),
+                             m_src[0][i], write);
+     emit_instruction(ir);
+   }
+   ir->set_flag(alu_last_instr);
+   return true;
+}
+
+bool EmitAluInstruction::emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp)
+{
+   emit_instruction(new AluInstruction(op1_mov, from_nir(instr.dest, 0),
+                                       m_src[0][comp], last_write));
+   return true;
+}
+
+bool EmitAluInstruction::emit_create_vec(const nir_alu_instr& instr, unsigned nc)
+{
+   AluInstruction *ir = nullptr;
+   std::set<int> src_slot;
+   for(unsigned i = 0; i < nc; ++i) {
+      if (instr.dest.write_mask & (1 << i)){
+         auto src = m_src[i][0];
+         ir = new AluInstruction(op1_mov, from_nir(instr.dest, i), src, write);
+         if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+
+         // FIXME: This is a rather crude approach to fix the problem that
+         // r600 can't read from four different slots of the same component
+         // here we check only for the register index
+         if (src->type() == Value::gpr)
+            src_slot.insert(src->sel());
+         if (src_slot.size() >= 3) {
+            src_slot.clear();
+            ir->set_flag(alu_last_instr);
+         }
+         emit_instruction(ir);
+      }
+   }
+   if (ir)
+      ir->set_flag(alu_last_instr);
+   return true;
+}
+
+bool EmitAluInstruction::emit_dot(const nir_alu_instr& instr, int n)
+{
+   const nir_alu_src& src0 = instr.src[0];
+   const nir_alu_src& src1 = instr.src[1];
+
+   AluInstruction *ir = nullptr;
+   for (int i = 0; i < n ; ++i) {
+      ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
+                              m_src[0][i], m_src[1][i],
+                              instr.dest.write_mask & (1 << i) ? write : empty);
+
+      if (src0.negate) ir->set_flag(alu_src0_neg);
+      if (src0.abs) ir->set_flag(alu_src0_abs);
+      if (src1.negate) ir->set_flag(alu_src1_neg);
+      if (src1.abs) ir->set_flag(alu_src1_abs);
+
+      if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+      emit_instruction(ir);
+   }
+   for (int i = n; i < 4 ; ++i) {
+      ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
+                              Value::zero, Value::zero,
+                              instr.dest.write_mask & (1 << i) ? write : empty);
+      emit_instruction(ir);
+   }
+
+   if (ir)
+      ir->set_flag(alu_last_instr);
+   return true;
+}
+
+bool EmitAluInstruction::emit_fdph(const nir_alu_instr& instr)
+{
+   const nir_alu_src& src0 = instr.src[0];
+   const nir_alu_src& src1 = instr.src[1];
+
+   AluInstruction *ir = nullptr;
+   for (int i = 0; i < 3 ; ++i) {
+      ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, i),
+                              m_src[0][i], m_src[1][i],
+                              instr.dest.write_mask & (1 << i) ? write : empty);
+      if (src0.negate) ir->set_flag(alu_src0_neg);
+      if (src0.abs) ir->set_flag(alu_src0_abs);
+      if (src1.negate) ir->set_flag(alu_src1_neg);
+      if (src1.abs) ir->set_flag(alu_src1_abs);
+      if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+      emit_instruction(ir);
+   }
+
+   ir = new AluInstruction(op2_dot4_ieee, from_nir(instr.dest, 3), Value::one_f,
+                           m_src[1][3], (instr.dest.write_mask) & (1 << 3) ? write : empty);
+   if (src1.negate) ir->set_flag(alu_src1_neg);
+   if (src1.abs) ir->set_flag(alu_src1_abs);
+   emit_instruction(ir);
+
+   ir->set_flag(alu_last_instr);
+   return true;
+
+}
+
+bool EmitAluInstruction::emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op)
+{
+   AluInstruction *ir = nullptr;
+   for (int i = 0; i < 4 ; ++i) {
+      if (instr.dest.write_mask & (1 << i)) {
+         ir = new AluInstruction(op, from_nir(instr.dest, i),
+                                 m_src[0][i], Value::zero,
+                                 write);
+         emit_instruction(ir);
+      }
+   }
+   if (ir)
+      ir->set_flag(alu_last_instr);
+   return true;
+}
+
+bool EmitAluInstruction::emit_alu_b2f(const nir_alu_instr& instr)
+{
+   AluInstruction *ir = nullptr;
+   for (int i = 0; i < 4 ; ++i) {
+      if (instr.dest.write_mask & (1 << i)){
+         ir = new AluInstruction(op2_and_int, from_nir(instr.dest, i),
+                                 m_src[0][i], Value::one_f, write);
+         if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
+         if (instr.src[0].abs) ir->set_flag(alu_src0_abs);
+         if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+         emit_instruction(ir);
+      }
+   }
+   if (ir)
+      ir->set_flag(alu_last_instr);
+   return true;
+}
+
+bool EmitAluInstruction::emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
+{
+
+   AluInstruction *ir = nullptr;
+   PValue v[4]; // this might need some additional temp register creation
+   for (unsigned i = 0; i < 4 ; ++i)
+      v[i] = from_nir(instr.dest, i);
+
+   EAluOp combine = all ? op2_and_int : op2_or_int;
+
+   /* For integers we can not use the modifiers, so this needs some emulation */
+   /* Should actually be lowered with NIR */
+   if (instr.src[0].negate == instr.src[1].negate &&
+       instr.src[0].abs == instr.src[1].abs) {
+
+      for (unsigned i = 0; i < nc ; ++i) {
+         ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
+         emit_instruction(ir);
+      }
+      if (ir)
+         ir->set_flag(alu_last_instr);
+   } else {
+      std::cerr << "Negate in iequal/inequal not (yet) supported\n";
+      return false;
+   }
+
+   for (unsigned i = 0; i < nc/2 ; ++i) {
+      ir = new AluInstruction(combine, v[2 * i], v[2 * i], v[2 * i + 1], write);
+      emit_instruction(ir);
+   }
+   if (ir)
+      ir->set_flag(alu_last_instr);
+
+   if (nc > 2) {
+      ir = new AluInstruction(combine, v[0], v[0], v[2], last_write);
+      emit_instruction(ir);
+   }
+
+   return true;
+}
+
+bool EmitAluInstruction::emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all)
+{
+   AluInstruction *ir = nullptr;
+   PValue v[4]; // this might need some additional temp register creation
+   for (unsigned i = 0; i < 4 ; ++i)
+      v[i] = from_nir(instr.dest, i);
+
+   for (unsigned i = 0; i < nc ; ++i) {
+      ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
+
+      if (instr.src[0].abs)
+         ir->set_flag(alu_src0_abs);
+      if (instr.src[0].negate)
+         ir->set_flag(alu_src0_neg);
+
+      if (instr.src[1].abs)
+         ir->set_flag(alu_src1_abs);
+      if (instr.src[1].negate)
+         ir->set_flag(alu_src1_neg);
+
+      emit_instruction(ir);
+   }
+   if (ir)
+      ir->set_flag(alu_last_instr);
+
+   for (unsigned i = 0; i < nc ; ++i) {
+      ir = new AluInstruction(op1_max4, v[i], v[i], write);
+      if (all) ir->set_flag(alu_src0_neg);
+      emit_instruction(ir);
+   }
+
+   for (unsigned i = nc; i < 4 ; ++i) {
+      ir = new AluInstruction(op1_max4, v[i],
+                              all ? Value::one_f : Value::zero, write);
+      if (all)
+         ir->set_flag(alu_src0_neg);
+
+      emit_instruction(ir);
+   }
+
+   ir->set_flag(alu_last_instr);
+
+   if (all)
+      op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10;
+   else
+      op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10;
+
+   ir = new AluInstruction(op, v[0], v[0], Value::one_f, last_write);
+   if (all)
+      ir->set_flag(alu_src1_neg);
+   emit_instruction(ir);
+
+   return true;
+}
+
+bool EmitAluInstruction::emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all)
+{
+   AluInstruction *ir = nullptr;
+   PValue v[4]; // this might need some additional temp register creation
+   for (unsigned i = 0; i < 4 ; ++i)
+      v[i] = from_nir(instr.dest, i);
+
+   for (unsigned i = 0; i < 2 ; ++i) {
+      ir = new AluInstruction(op, v[i], m_src[0][i], m_src[1][i], write);
+      if (instr.src[0].abs)
+         ir->set_flag(alu_src0_abs);
+      if (instr.src[0].negate)
+         ir->set_flag(alu_src0_neg);
+
+      if (instr.src[1].abs)
+         ir->set_flag(alu_src1_abs);
+      if (instr.src[1].negate)
+         ir->set_flag(alu_src1_neg);
+
+      emit_instruction(ir);
+   }
+   if (ir)
+      ir->set_flag(alu_last_instr);
+
+   op = (op == op2_setne_dx10) ? op2_or_int: op2_and_int;
+   ir = new AluInstruction(op, v[0], v[0], v[1], last_write);
+   emit_instruction(ir);
+
+   return true;
+}
+
+bool EmitAluInstruction::emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode)
+{
+   const nir_alu_src& src0 = instr.src[0];
+   const nir_alu_src& src1 = instr.src[1];
+
+   AluInstruction *ir = nullptr;
+
+   if (get_chip_class() == CAYMAN) {
+      int lasti = util_last_bit(instr.dest.write_mask);
+      for (int k = 0; k < lasti ; ++k) {
+         if (instr.dest.write_mask & (1 << k)) {
+
+            for (int i = 0; i < 4; i++) {
+               ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][k], m_src[0][k], (i == k) ? write : empty);
+               if (src0.negate) ir->set_flag(alu_src0_neg);
+               if (src0.abs) ir->set_flag(alu_src0_abs);
+               if (src1.negate) ir->set_flag(alu_src1_neg);
+               if (src1.abs) ir->set_flag(alu_src1_abs);
+               if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+               if (i == 3) ir->set_flag(alu_last_instr);
+               emit_instruction(ir);
+            }
+         }
+      }
+   } else {
+      for (int i = 0; i < 4 ; ++i) {
+         if (instr.dest.write_mask & (1 << i)){
+            ir = new AluInstruction(opcode, from_nir(instr.dest, i), m_src[0][i], m_src[1][i], last_write);
+            if (src0.negate) ir->set_flag(alu_src0_neg);
+            if (src0.abs) ir->set_flag(alu_src0_abs);
+            if (src1.negate) ir->set_flag(alu_src1_neg);
+            if (src1.abs) ir->set_flag(alu_src1_abs);
+            if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+            emit_instruction(ir);
+         }
+      }
+   }
+   return true;
+}
+
+bool EmitAluInstruction::emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts opts)
+{
+
+   const nir_alu_src& src0 = instr.src[0];
+   const nir_alu_src& src1 = instr.src[1];
+
+   if (src0.negate || src1.negate ||
+       src0.abs || src1.abs) {
+      std::cerr << "R600: don't support modifiers with integer operations";
+      return false;
+   }
+   return emit_alu_op2(instr, opcode, opts);
+}
+
+bool EmitAluInstruction::emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops)
+{
+   const nir_alu_src *src0 = &instr.src[0];
+   const nir_alu_src *src1 = &instr.src[1];
+
+   int idx0 = 0;
+   int idx1 = 1;
+   if (ops & op2_opt_reverse) {
+      std::swap(src0, src1);
+      std::swap(idx0, idx1);
+   }
+
+   bool src1_negate = (ops & op2_opt_neg_src1) ^ src1->negate;
+
+   AluInstruction *ir = nullptr;
+   for (int i = 0; i < 4 ; ++i) {
+      if (instr.dest.write_mask & (1 << i)){
+         ir = new AluInstruction(opcode, from_nir(instr.dest, i),
+                                 m_src[idx0][i], m_src[idx1][i], write);
+
+         if (src0->negate) ir->set_flag(alu_src0_neg);
+         if (src0->abs) ir->set_flag(alu_src0_abs);
+         if (src1_negate) ir->set_flag(alu_src1_neg);
+         if (src1->abs) ir->set_flag(alu_src1_abs);
+         if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+         emit_instruction(ir);
+      }
+   }
+   if (ir)
+      ir->set_flag(alu_last_instr);
+   return true;
+}
+
+bool EmitAluInstruction::emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode,
+                                      std::array<uint8_t, 3> reorder)
+{
+   const nir_alu_src *src[3];
+   src[0] = &instr.src[reorder[0]];
+   src[1] = &instr.src[reorder[1]];
+   src[2] = &instr.src[reorder[2]];
+
+   AluInstruction *ir = nullptr;
+   for (int i = 0; i < 4 ; ++i) {
+      if (instr.dest.write_mask & (1 << i)){
+         ir = new AluInstruction(opcode, from_nir(instr.dest, i),
+                                 m_src[reorder[0]][i],
+                                 m_src[reorder[1]][i],
+                                 m_src[reorder[2]][i],
+               write);
+
+         if (src[0]->negate) ir->set_flag(alu_src0_neg);
+         if (src[1]->negate) ir->set_flag(alu_src1_neg);
+         if (src[2]->negate) ir->set_flag(alu_src2_neg);
+
+         if (instr.dest.saturate) ir->set_flag(alu_dst_clamp);
+         ir->set_flag(alu_write);
+         emit_instruction(ir);
+      }
+   }
+   make_last(ir);
+   return true;
+}
+
+bool EmitAluInstruction::emit_alu_ineg(const nir_alu_instr& instr)
+{
+   AluInstruction *ir = nullptr;
+   for (int i = 0; i < 4 ; ++i) {
+      if (instr.dest.write_mask & (1 << i)){
+         ir = new AluInstruction(op2_sub_int, from_nir(instr.dest, i), Value::zero,
+                                 m_src[0][i], write);
+         emit_instruction(ir);
+      }
+   }
+   if (ir)
+      ir->set_flag(alu_last_instr);
+
+   return true;
+}
+
+static const char swz[] = "xyzw01?_";
+
+void EmitAluInstruction::split_alu_modifiers(const nir_alu_src& src,
+                                             const GPRVector::Values& v, GPRVector::Values& out, int ncomp)
+{
+
+   AluInstruction *alu = nullptr;
+   for (int i = 0; i < ncomp; ++i) {
+      alu  = new AluInstruction(op1_mov,  out[i], v[i], {alu_write});
+      if (src.abs)
+         alu->set_flag(alu_src0_abs);
+      if (src.negate)
+         alu->set_flag(alu_src0_neg);
+      emit_instruction(alu);
+   }
+   make_last(alu);
+}
+
+bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op,
+                                      bool fine)
+{
+
+   GPRVector::Values v;
+   std::array<int, 4> writemask = {0,1,2,3};
+
+   int ncomp = nir_dest_num_components(instr.dest.dest);
+   GPRVector::Swizzle src_swz = {7,7,7,7};
+   for (auto i = 0; i < ncomp; ++i)
+      src_swz[i] = instr.src[0].swizzle[i];
+
+   auto src = vec_from_nir_with_fetch_constant(instr.src[0].src, (1 << ncomp) - 1, src_swz);
+
+   if (instr.src[0].abs || instr.src[0].negate) {
+      GPRVector tmp = get_temp_vec4();
+      split_alu_modifiers(instr.src[0], src.values(), tmp.values(), ncomp);
+      src = tmp;
+   }
+
+   for (int i = 0; i < 4; ++i) {
+      writemask[i] = (instr.dest.write_mask & (1 << i)) ? i : 7;
+      v[i] = from_nir(instr.dest, (i < ncomp) ? i : 0);
+   }
+
+   /* This is querying the dreivatives of the output fb, so we would either need
+    * access to the neighboring pixels or to the framebuffer. Neither is currently
+    * implemented */
+   GPRVector dst(v);
+
+   auto tex = new TexInstruction(op, dst, src, 0, R600_MAX_CONST_BUFFERS, PValue());
+   tex->set_dest_swizzle(writemask);
+
+   if (fine)
+      tex->set_flag(TexInstruction::grad_fine);
+
+   emit_instruction(tex);
+
+   return true;
+}
+
+bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr)
+{
+   auto tmp = get_temp_register();
+   emit_instruction(op2_lshr_int, tmp,
+   {m_src[0][0], PValue(new LiteralValue(16))},
+   {alu_write, alu_last_instr});
+
+   emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
+                                  {tmp}, {alu_write, alu_last_instr});
+
+   return true;
+}
+
+bool EmitAluInstruction::emit_unpack_32_2x16_split_x(const nir_alu_instr& instr)
+{
+   emit_instruction(op1_flt16_to_flt32, from_nir(instr.dest, 0),
+   {m_src[0][0]},{alu_write, alu_last_instr});
+   return true;
+}
+
+bool EmitAluInstruction::emit_pack_32_2x16_split(const nir_alu_instr& instr)
+{
+   PValue x = get_temp_register();
+   PValue y = get_temp_register();
+
+   emit_instruction(op1_flt32_to_flt16, x,{m_src[0][0]},{alu_write});
+   emit_instruction(op1_flt32_to_flt16, y,{m_src[1][0]},{alu_write, alu_last_instr});
+
+   emit_instruction(op2_lshl_int, y, {y, PValue(new LiteralValue(16))},{alu_write, alu_last_instr});
+
+   emit_instruction(op2_or_int, {from_nir(instr.dest, 0)} , {x, y},{alu_write, alu_last_instr});
+
+   return true;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h
new file mode 100644
index 000000000..f56352f4e
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.h
@@ -0,0 +1,115 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_EMITALUINSTRUCTION_H
+#define SFN_EMITALUINSTRUCTION_H
+
+#include "sfn_emitinstruction.h"
+
+#include "sfn_alu_defines.h"
+#include "sfn_instruction_alu.h"
+#include "sfn_instruction_tex.h"
+
+namespace r600  {
+
+
+class EmitAluInstruction : public EmitInstruction
+{
+public:
+   EmitAluInstruction(ShaderFromNirProcessor& processor);
+
+private:
+
+   enum AluOp2Opts {
+      op2_opt_none = 0,
+      op2_opt_reverse = 1,
+      op2_opt_neg_src1 = 1 << 1
+   };
+
+   bool do_emit(nir_instr* instr) override;
+
+   void split_constants(const nir_alu_instr& instr, unsigned nsrc_comp);
+
+   bool emit_mov(const nir_alu_instr& instr);
+   bool emit_alu_op1(const nir_alu_instr& instr, EAluOp opcode, const AluOpFlags &flags = 0);
+   bool emit_alu_op2(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none);
+
+   bool emit_alu_trans_op2(const nir_alu_instr& instr, EAluOp opcode);
+
+   bool emit_alu_inot(const nir_alu_instr& instr);
+   bool emit_alu_ineg(const nir_alu_instr& instr);
+   bool emit_alu_op2_int(const nir_alu_instr& instr, EAluOp opcode, AluOp2Opts ops = op2_opt_none);
+
+   bool emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode, std::array<uint8_t, 3> reorder={0,1,2});
+   bool emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode, bool absolute = false);
+
+   bool emit_alu_b2f(const nir_alu_instr& instr);
+   bool emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op);
+   bool emit_dot(const nir_alu_instr& instr, int n);
+   bool emit_create_vec(const nir_alu_instr& instr, unsigned nc);
+   bool emit_any_all_icomp(const nir_alu_instr& instr, EAluOp op,  unsigned nc, bool all);
+   bool emit_any_iequal(const nir_alu_instr& instr, unsigned nc);
+
+   bool emit_any_all_fcomp(const nir_alu_instr& instr, EAluOp op, unsigned nc, bool all);
+   bool emit_any_all_fcomp2(const nir_alu_instr& instr, EAluOp op, bool all);
+
+   bool emit_fdph(const nir_alu_instr &instr);
+   bool emit_discard_if(const nir_intrinsic_instr *instr);
+
+   bool emit_alu_f2b32(const nir_alu_instr& instr);
+   bool emit_b2i32(const nir_alu_instr& instr);
+   bool emit_alu_f2i32_or_u32(const nir_alu_instr& instr, EAluOp op);
+   bool emit_pack_64_2x32_split(const nir_alu_instr& instr);
+   bool emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp);
+
+   bool emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op, bool fine);
+   bool emit_unpack_32_2x16_split_y(const nir_alu_instr& instr);
+   bool emit_unpack_32_2x16_split_x(const nir_alu_instr& instr);
+   bool emit_pack_32_2x16_split(const nir_alu_instr& instr);
+
+   bool emit_cube(const nir_alu_instr& instr);
+private:
+   void make_last(AluInstruction *ir) const;
+   void split_alu_modifiers(const nir_alu_src &src, const GPRVector::Values& v,
+                            GPRVector::Values& out, int ncomp);
+
+   void preload_src(const nir_alu_instr& instr);
+   unsigned num_src_comp(const nir_alu_instr& instr);
+
+   using vreg = std::array<PValue, 4>;
+
+   std::array<PValue, 4> m_src[4];
+};
+
+inline void EmitAluInstruction::make_last(AluInstruction *ir) const
+{
+   if (ir)
+      ir->set_flag(alu_last_instr);
+}
+
+}
+
+#endif // SFN_EMITALUINSTRUCTION_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp
new file mode 100644
index 000000000..9a75cd18b
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.cpp
@@ -0,0 +1,164 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_emitinstruction.h"
+
+#include "sfn_shader_base.h"
+
+namespace r600 {
+
+EmitInstruction::EmitInstruction(ShaderFromNirProcessor& processor):
+   m_proc(processor)
+{
+
+}
+
+EmitInstruction::~EmitInstruction()
+{
+}
+
+bool EmitInstruction::emit(nir_instr* instr)
+{
+   return do_emit(instr);
+}
+
+PValue EmitInstruction::from_nir(const nir_src& v, unsigned component, unsigned swizzled)
+{
+   return m_proc.from_nir(v, component, swizzled);
+}
+
+PValue EmitInstruction::from_nir(const nir_alu_src& v, unsigned component)
+{
+   return m_proc.from_nir(v, component);
+}
+
+PValue EmitInstruction::from_nir(const nir_tex_src& v, unsigned component)
+{
+   return m_proc.from_nir(v, component);
+}
+
+PValue EmitInstruction::from_nir(const nir_alu_dest& v, unsigned component)
+{
+   return m_proc.from_nir(v, component);
+}
+
+PValue EmitInstruction::from_nir(const nir_dest& v, unsigned component)
+{
+   return m_proc.from_nir(v, component);
+}
+
+PValue EmitInstruction::from_nir(const nir_src& v, unsigned component)
+{
+   return m_proc.from_nir(v, component);
+}
+
+void EmitInstruction::emit_instruction(Instruction *ir)
+{
+   return m_proc.emit_instruction(ir);
+}
+
+void EmitInstruction::emit_instruction(AluInstruction *ir)
+{
+   return m_proc.emit_instruction(ir);
+}
+
+bool EmitInstruction::emit_instruction(EAluOp opcode, PValue dest,
+                                       std::vector<PValue> src0,
+                                       const std::set<AluModifiers>& m_flags)
+{
+   return m_proc.emit_instruction(opcode, dest,src0, m_flags);
+}
+
+const nir_variable *
+EmitInstruction::get_deref_location(const nir_src& v) const
+{
+   return m_proc.get_deref_location(v);
+}
+
+PValue EmitInstruction::from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel)
+{
+   return m_proc.from_nir_with_fetch_constant(src, component, channel);
+}
+
+GPRVector EmitInstruction::vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
+                                                            const GPRVector::Swizzle& swizzle, bool match)
+{
+   return m_proc.vec_from_nir_with_fetch_constant(src, mask, swizzle, match);
+}
+
+PGPRValue EmitInstruction::get_temp_register(int channel)
+{
+   return m_proc.get_temp_register(channel);
+}
+
+GPRVector EmitInstruction::get_temp_vec4(const GPRVector::Swizzle& swizzle)
+{
+   return m_proc.get_temp_vec4(swizzle);
+}
+
+PValue EmitInstruction::create_register_from_nir_src(const nir_src& src, unsigned swizzle)
+{
+   return m_proc.create_register_from_nir_src(src, swizzle);
+}
+
+enum chip_class EmitInstruction::get_chip_class(void) const
+{
+   return m_proc.get_chip_class();
+}
+
+PValue EmitInstruction::literal(uint32_t value)
+{
+   return m_proc.literal(value);
+}
+
+GPRVector EmitInstruction::vec_from_nir(const nir_dest& dst, int num_components)
+{
+   return m_proc.vec_from_nir(dst, num_components);
+}
+
+bool EmitInstruction::inject_register(unsigned sel, unsigned swizzle,
+                                      const PValue& reg, bool map)
+{
+   return m_proc.inject_register(sel, swizzle, reg, map);
+}
+
+int EmitInstruction::remap_atomic_base(int base)
+{
+	return m_proc.remap_atomic_base(base);
+}
+
+void EmitInstruction::set_has_txs_cube_array_comp()
+{
+   m_proc.sh_info().has_txq_cube_array_z_comp = 1;
+}
+
+const std::set<AluModifiers> EmitInstruction::empty = {};
+const std::set<AluModifiers> EmitInstruction::write = {alu_write};
+const std::set<AluModifiers> EmitInstruction::last_write = {alu_write, alu_last_instr};
+const std::set<AluModifiers> EmitInstruction::last = {alu_last_instr};
+
+}
+
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h
new file mode 100644
index 000000000..09a6489b0
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitinstruction.h
@@ -0,0 +1,101 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef EMITINSTRUCTION_H
+#define EMITINSTRUCTION_H
+
+#include "compiler/nir/nir.h"
+#include "sfn_defines.h"
+#include "sfn_value.h"
+#include "sfn_instruction_alu.h"
+
+namespace r600 {
+
+class ShaderFromNirProcessor;
+
+class EmitInstruction
+{
+public:
+   EmitInstruction(ShaderFromNirProcessor& processor);
+   virtual ~EmitInstruction();
+   bool emit(nir_instr* instr);
+
+   static const std::set<AluModifiers> empty;
+   static const std::set<AluModifiers> write;
+   static const std::set<AluModifiers> last_write;
+   static const std::set<AluModifiers> last;
+
+protected:
+   virtual bool do_emit(nir_instr* instr) = 0;
+
+   // forwards from ValuePool
+   PValue from_nir(const nir_src& v, unsigned component, unsigned swizzled);
+   PValue from_nir(const nir_src& v, unsigned component);
+   PValue from_nir(const nir_alu_src& v, unsigned component);
+   PValue from_nir(const nir_tex_src& v, unsigned component);
+   PValue from_nir(const nir_alu_dest& v, unsigned component);
+   PValue from_nir(const nir_dest& v, unsigned component);
+
+   PValue create_register_from_nir_src(const nir_src& src, unsigned comp);
+
+   PGPRValue get_temp_register(int channel = -1);
+   GPRVector get_temp_vec4(const GPRVector::Swizzle& swizzle = {0,1,2,3});
+
+   // forwards from ShaderFromNirProcessor
+   void emit_instruction(Instruction *ir);
+   void emit_instruction(AluInstruction *ir);
+   bool emit_instruction(EAluOp opcode, PValue dest,
+                         std::vector<PValue> src0,
+                         const std::set<AluModifiers>& m_flags);
+
+   PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel = -1);
+   GPRVector vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
+                                              const GPRVector::Swizzle& swizzle, bool match = false);
+
+   const nir_variable *get_deref_location(const nir_src& v) const;
+
+   enum chip_class get_chip_class(void) const;
+
+   PValue literal(uint32_t value);
+
+   GPRVector vec_from_nir(const nir_dest& dst, int num_components);
+
+   bool inject_register(unsigned sel, unsigned swizzle,
+                        const PValue& reg, bool map);
+
+   int remap_atomic_base(int base);
+
+   void set_has_txs_cube_array_comp();
+private:
+
+   ShaderFromNirProcessor& m_proc;
+};
+
+}
+
+
+
+#endif // EMITINSTRUCTION_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp
new file mode 100644
index 000000000..9f0d0b605
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp
@@ -0,0 +1,644 @@
+#include "sfn_emitssboinstruction.h"
+
+#include "sfn_instruction_fetch.h"
+#include "sfn_instruction_gds.h"
+#include "sfn_instruction_misc.h"
+#include "sfn_instruction_tex.h"
+#include "../r600_pipe.h"
+#include "../r600_asm.h"
+
+namespace r600 {
+
+#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
+
+EmitSSBOInstruction::EmitSSBOInstruction(ShaderFromNirProcessor& processor):
+   EmitInstruction(processor),
+   m_require_rat_return_address(false),
+   m_ssbo_image_offset(0)
+{
+}
+
+void EmitSSBOInstruction::set_ssbo_offset(int offset)
+{
+   m_ssbo_image_offset = offset;
+}
+
+
+void EmitSSBOInstruction::set_require_rat_return_address()
+{
+   m_require_rat_return_address = true;
+}
+
+bool
+EmitSSBOInstruction::load_rat_return_address()
+{
+   if (m_require_rat_return_address) {
+      m_rat_return_address = get_temp_vec4();
+      emit_instruction(new AluInstruction(op1_mbcnt_32lo_accum_prev_int, m_rat_return_address.reg_i(0), literal(-1), {alu_write}));
+      emit_instruction(new AluInstruction(op1_mbcnt_32hi_int, m_rat_return_address.reg_i(1), literal(-1), {alu_write}));
+      emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(2), PValue(new InlineConstValue(ALU_SRC_SE_ID, 0)),
+                                          literal(256), PValue(new InlineConstValue(ALU_SRC_HW_WAVE_ID, 0)), {alu_write, alu_last_instr}));
+      emit_instruction(new AluInstruction(op3_muladd_uint24, m_rat_return_address.reg_i(1),
+                                          m_rat_return_address.reg_i(2), literal(0x40), m_rat_return_address.reg_i(0),
+      {alu_write, alu_last_instr}));
+      m_require_rat_return_address = false;
+   }
+   return true;
+}
+
+
+bool EmitSSBOInstruction::do_emit(nir_instr* instr)
+{
+   const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   switch (intr->intrinsic) {
+   case nir_intrinsic_atomic_counter_add:
+   case nir_intrinsic_atomic_counter_and:
+   case nir_intrinsic_atomic_counter_exchange:
+   case nir_intrinsic_atomic_counter_max:
+   case nir_intrinsic_atomic_counter_min:
+   case nir_intrinsic_atomic_counter_or:
+   case nir_intrinsic_atomic_counter_xor:
+   case nir_intrinsic_atomic_counter_comp_swap:
+      return emit_atomic(intr);
+   case nir_intrinsic_atomic_counter_read:
+   case nir_intrinsic_atomic_counter_post_dec:
+      return emit_unary_atomic(intr);
+   case nir_intrinsic_atomic_counter_inc:
+      return emit_atomic_inc(intr);
+   case nir_intrinsic_atomic_counter_pre_dec:
+      return emit_atomic_pre_dec(intr);
+   case nir_intrinsic_load_ssbo:
+       return emit_load_ssbo(intr);
+   case nir_intrinsic_store_ssbo:
+      return emit_store_ssbo(intr);
+   case nir_intrinsic_ssbo_atomic_add:
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+   case nir_intrinsic_ssbo_atomic_or:
+   case nir_intrinsic_ssbo_atomic_xor:
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_ssbo_atomic_umax:
+   case nir_intrinsic_ssbo_atomic_umin:
+   case nir_intrinsic_ssbo_atomic_and:
+   case nir_intrinsic_ssbo_atomic_exchange:
+      return emit_ssbo_atomic_op(intr);
+   case nir_intrinsic_image_store:
+      return emit_image_store(intr);
+   case nir_intrinsic_image_load:
+   case nir_intrinsic_image_atomic_add:
+   case nir_intrinsic_image_atomic_and:
+   case nir_intrinsic_image_atomic_or:
+   case nir_intrinsic_image_atomic_xor:
+   case nir_intrinsic_image_atomic_exchange:
+   case nir_intrinsic_image_atomic_comp_swap:
+   case nir_intrinsic_image_atomic_umin:
+   case nir_intrinsic_image_atomic_umax:
+   case nir_intrinsic_image_atomic_imin:
+   case nir_intrinsic_image_atomic_imax:
+      return emit_image_load(intr);
+   case nir_intrinsic_image_size:
+      return emit_image_size(intr);
+   case nir_intrinsic_get_ssbo_size:
+      return emit_buffer_size(intr);
+   case nir_intrinsic_memory_barrier:
+   case nir_intrinsic_memory_barrier_image:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_group_memory_barrier:
+      return make_stores_ack_and_waitack();
+   default:
+      return false;
+   }
+}
+
+bool EmitSSBOInstruction::emit_atomic(const nir_intrinsic_instr* instr)
+{
+   ESDOp op = get_opcode(instr->intrinsic);
+
+   if (DS_OP_INVALID == op)
+      return false;
+
+   GPRVector dest = make_dest(instr);
+
+   int base = remap_atomic_base(nir_intrinsic_base(instr));
+
+   PValue uav_id = from_nir(instr->src[0], 0);
+
+   PValue value = from_nir_with_fetch_constant(instr->src[1], 0);
+
+   GDSInstr *ir = nullptr;
+   if (instr->intrinsic == nir_intrinsic_atomic_counter_comp_swap)  {
+      PValue value2 = from_nir_with_fetch_constant(instr->src[2], 0);
+      ir = new GDSInstr(op, dest, value, value2, uav_id, base);
+   } else {
+      ir = new GDSInstr(op, dest, value, uav_id, base);
+   }
+
+   emit_instruction(ir);
+   return true;
+}
+
+bool EmitSSBOInstruction::emit_unary_atomic(const nir_intrinsic_instr* instr)
+{
+   ESDOp op = get_opcode(instr->intrinsic);
+
+   if (DS_OP_INVALID == op)
+      return false;
+
+   GPRVector dest = make_dest(instr);
+
+   PValue uav_id = from_nir(instr->src[0], 0);
+
+   auto ir = new GDSInstr(op, dest, uav_id, remap_atomic_base(nir_intrinsic_base(instr)));
+
+   emit_instruction(ir);
+   return true;
+}
+
+ESDOp EmitSSBOInstruction::get_opcode(const nir_intrinsic_op opcode)
+{
+   switch (opcode) {
+   case nir_intrinsic_atomic_counter_add:
+      return DS_OP_ADD_RET;
+   case nir_intrinsic_atomic_counter_and:
+      return DS_OP_AND_RET;
+   case nir_intrinsic_atomic_counter_exchange:
+      return DS_OP_XCHG_RET;
+   case nir_intrinsic_atomic_counter_inc:
+      return DS_OP_INC_RET;
+   case nir_intrinsic_atomic_counter_max:
+      return DS_OP_MAX_UINT_RET;
+   case nir_intrinsic_atomic_counter_min:
+      return DS_OP_MIN_UINT_RET;
+   case nir_intrinsic_atomic_counter_or:
+      return DS_OP_OR_RET;
+   case nir_intrinsic_atomic_counter_read:
+      return DS_OP_READ_RET;
+   case nir_intrinsic_atomic_counter_xor:
+      return DS_OP_XOR_RET;
+   case nir_intrinsic_atomic_counter_post_dec:
+      return DS_OP_DEC_RET;
+   case nir_intrinsic_atomic_counter_comp_swap:
+      return DS_OP_CMP_XCHG_RET;
+   case nir_intrinsic_atomic_counter_pre_dec:
+   default:
+      return DS_OP_INVALID;
+   }
+}
+
+RatInstruction::ERatOp
+EmitSSBOInstruction::get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const
+{
+   switch (opcode) {
+   case nir_intrinsic_ssbo_atomic_add:
+   case nir_intrinsic_image_atomic_add:
+      return RatInstruction::ADD_RTN;
+   case nir_intrinsic_ssbo_atomic_and:
+   case nir_intrinsic_image_atomic_and:
+      return RatInstruction::AND_RTN;
+   case nir_intrinsic_ssbo_atomic_exchange:
+   case nir_intrinsic_image_atomic_exchange:
+      return RatInstruction::XCHG_RTN;
+   case nir_intrinsic_ssbo_atomic_or:
+   case nir_intrinsic_image_atomic_or:
+      return RatInstruction::OR_RTN;
+   case nir_intrinsic_ssbo_atomic_imin:
+   case nir_intrinsic_image_atomic_imin:
+      return RatInstruction::MIN_INT_RTN;
+   case nir_intrinsic_ssbo_atomic_imax:
+   case nir_intrinsic_image_atomic_imax:
+      return RatInstruction::MAX_INT_RTN;
+   case nir_intrinsic_ssbo_atomic_umin:
+   case nir_intrinsic_image_atomic_umin:
+      return RatInstruction::MIN_UINT_RTN;
+   case nir_intrinsic_ssbo_atomic_umax:
+   case nir_intrinsic_image_atomic_umax:
+      return RatInstruction::MAX_UINT_RTN;
+   case nir_intrinsic_ssbo_atomic_xor:
+   case nir_intrinsic_image_atomic_xor:
+      return RatInstruction::XOR_RTN;
+   case nir_intrinsic_ssbo_atomic_comp_swap:
+   case nir_intrinsic_image_atomic_comp_swap:
+      if (util_format_is_float(format))
+         return RatInstruction::CMPXCHG_FLT_RTN;
+      else
+         return RatInstruction::CMPXCHG_INT_RTN;
+   case nir_intrinsic_image_load:
+      return RatInstruction::NOP_RTN;
+   default:
+      unreachable("Unsupported RAT instruction");
+   }
+}
+
+
+bool EmitSSBOInstruction::emit_atomic_add(const nir_intrinsic_instr* instr)
+{
+   GPRVector dest = make_dest(instr);
+
+   PValue value = from_nir_with_fetch_constant(instr->src[1], 0);
+
+   PValue uav_id = from_nir(instr->src[0], 0);
+
+   auto ir = new GDSInstr(DS_OP_ADD_RET, dest, value, uav_id,
+                          remap_atomic_base(nir_intrinsic_base(instr)));
+
+   emit_instruction(ir);
+   return true;
+}
+
+bool EmitSSBOInstruction::load_atomic_inc_limits()
+{
+   m_atomic_update = get_temp_register();
+   m_atomic_update->set_keep_alive();
+   emit_instruction(new AluInstruction(op1_mov, m_atomic_update, literal(1),
+   {alu_write, alu_last_instr}));
+   return true;
+}
+
+bool EmitSSBOInstruction::emit_atomic_inc(const nir_intrinsic_instr* instr)
+{
+   PValue uav_id = from_nir(instr->src[0], 0);
+   GPRVector dest = make_dest(instr);
+   auto ir = new GDSInstr(DS_OP_ADD_RET, dest, m_atomic_update, uav_id,
+                          remap_atomic_base(nir_intrinsic_base(instr)));
+   emit_instruction(ir);
+   return true;
+}
+
+bool EmitSSBOInstruction::emit_atomic_pre_dec(const nir_intrinsic_instr *instr)
+{
+   GPRVector dest = make_dest(instr);
+
+   PValue uav_id = from_nir(instr->src[0], 0);
+
+   auto ir = new GDSInstr(DS_OP_SUB_RET, dest, m_atomic_update, uav_id,
+                          remap_atomic_base(nir_intrinsic_base(instr)));
+   emit_instruction(ir);
+
+   emit_instruction(new AluInstruction(op2_sub_int,  dest.x(), dest.x(), literal(1), last_write));
+
+   return true;
+}
+
+bool EmitSSBOInstruction::emit_load_ssbo(const nir_intrinsic_instr* instr)
+{
+   GPRVector dest = make_dest(instr);
+
+   /** src0 not used, should be some offset */
+   auto addr = from_nir(instr->src[1], 0);
+   PValue addr_temp = create_register_from_nir_src(instr->src[1], 1);
+
+   /** Should be lowered in nir */
+   emit_instruction(new AluInstruction(op2_lshr_int, addr_temp, {addr, PValue(new LiteralValue(2))},
+                    {alu_write, alu_last_instr}));
+
+   const EVTXDataFormat formats[4] = {
+      fmt_32,
+      fmt_32_32,
+      fmt_32_32_32,
+      fmt_32_32_32_32
+   };
+
+   const std::array<int,4> dest_swt[4] = {
+      {0,7,7,7},
+      {0,1,7,7},
+      {0,1,2,7},
+      {0,1,2,3}
+   };
+
+   /* TODO fix resource index */
+   auto ir = new FetchInstruction(dest, addr_temp,
+                                  R600_IMAGE_REAL_RESOURCE_OFFSET + m_ssbo_image_offset
+                                  , from_nir(instr->src[0], 0),
+                                  formats[nir_dest_num_components(instr->dest) - 1], vtx_nf_int);
+   ir->set_dest_swizzle(dest_swt[nir_dest_num_components(instr->dest) - 1]);
+   ir->set_flag(vtx_use_tc);
+
+   emit_instruction(ir);
+   return true;
+}
+
+bool EmitSSBOInstruction::emit_store_ssbo(const nir_intrinsic_instr* instr)
+{
+
+   GPRVector::Swizzle swz = {7,7,7,7};
+   for (unsigned i = 0; i <  nir_src_num_components(instr->src[0]); ++i)
+      swz[i] = i;
+
+   auto orig_addr = from_nir(instr->src[2], 0);
+
+   GPRVector addr_vec = get_temp_vec4({0,1,2,7});
+
+   auto temp2 = get_temp_vec4();
+
+   auto rat_id = from_nir(instr->src[1], 0);
+
+   emit_instruction(new AluInstruction(op2_lshr_int, addr_vec.reg_i(0), orig_addr,
+                                       PValue(new LiteralValue(2)), write));
+   emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(1), Value::zero, write));
+   emit_instruction(new AluInstruction(op1_mov, addr_vec.reg_i(2), Value::zero, last_write));
+
+
+   auto values = vec_from_nir_with_fetch_constant(instr->src[0],
+         (1 << nir_src_num_components(instr->src[0])) - 1, {0,1,2,3}, true);
+
+   auto cf_op = cf_mem_rat;
+   //auto cf_op = nir_intrinsic_access(instr) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
+   auto store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
+                                   values, addr_vec, m_ssbo_image_offset, rat_id, 1,
+                                   1, 0, false);
+   emit_instruction(store);
+   m_store_ops.push_back(store);
+
+   for (unsigned i = 1; i < nir_src_num_components(instr->src[0]); ++i) {
+      emit_instruction(new AluInstruction(op1_mov, temp2.reg_i(0), from_nir(instr->src[0], i), write));
+      emit_instruction(new AluInstruction(op2_add_int, addr_vec.reg_i(0),
+                                          {addr_vec.reg_i(0), Value::one_i}, last_write));
+      store = new RatInstruction(cf_op, RatInstruction::STORE_TYPED,
+                                 temp2, addr_vec, m_ssbo_image_offset, rat_id, 1,
+                                 1, 0, false);
+      emit_instruction(store);
+      if (!(nir_intrinsic_access(instr) & ACCESS_COHERENT))
+         m_store_ops.push_back(store);
+   }
+
+   return true;
+}
+
+bool
+EmitSSBOInstruction::emit_image_store(const nir_intrinsic_instr *intrin)
+{
+   int imageid = 0;
+   PValue image_offset;
+
+   if (nir_src_is_const(intrin->src[0]))
+      imageid = nir_src_as_int(intrin->src[0]);
+   else
+      image_offset = from_nir(intrin->src[0], 0);
+
+   auto coord =  vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, {0,1,2,3});
+   auto undef = from_nir(intrin->src[2], 0);
+   auto value = vec_from_nir_with_fetch_constant(intrin->src[3],  0xf, {0,1,2,3});
+   auto unknown  = from_nir(intrin->src[4], 0);
+
+   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
+       nir_intrinsic_image_array(intrin)) {
+      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
+      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
+   }
+
+   auto op = cf_mem_rat; //nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
+   auto store = new RatInstruction(op, RatInstruction::STORE_TYPED, value, coord, imageid,
+                                   image_offset, 1, 0xf, 0, false);
+
+   //if (!(nir_intrinsic_access(intrin) & ACCESS_COHERENT))
+      m_store_ops.push_back(store);
+
+   emit_instruction(store);
+   return true;
+}
+
+bool
+EmitSSBOInstruction::emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin)
+{
+   int imageid = 0;
+   PValue image_offset;
+
+   if (nir_src_is_const(intrin->src[0]))
+      imageid = nir_src_as_int(intrin->src[0]);
+   else
+      image_offset = from_nir(intrin->src[0], 0);
+
+   auto opcode = EmitSSBOInstruction::get_rat_opcode(intrin->intrinsic, PIPE_FORMAT_R32_UINT);
+
+
+   auto coord_orig =  from_nir(intrin->src[1], 0, 0);
+   auto coord = get_temp_register(0);
+
+   emit_instruction(new AluInstruction(op2_lshr_int, coord, coord_orig, literal(2), last_write));
+
+   if (intrin->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
+      emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
+                                          from_nir(intrin->src[3], 0), {alu_write}));
+      // TODO: cayman wants channel 2 here
+      emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(3),
+                                          from_nir(intrin->src[2], 0), {alu_last_instr, alu_write}));
+   } else {
+      emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
+                                          from_nir(intrin->src[2], 0), {alu_write}));
+      emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(2), Value::zero, last_write));
+   }
+
+
+   GPRVector out_vec({coord, coord, coord, coord});
+
+   auto atomic = new RatInstruction(cf_mem_rat, opcode, m_rat_return_address, out_vec, imageid + m_ssbo_image_offset,
+                                   image_offset, 1, 0xf, 0, true);
+   emit_instruction(atomic);
+   emit_instruction(new WaitAck(0));
+
+   GPRVector dest = vec_from_nir(intrin->dest, intrin->dest.ssa.num_components);
+   auto fetch = new FetchInstruction(vc_fetch,
+                                     no_index_offset,
+                                     fmt_32,
+                                     vtx_nf_int,
+                                     vtx_es_none,
+                                     m_rat_return_address.reg_i(1),
+                                     dest,
+                                     0,
+                                     false,
+                                     0xf,
+                                     R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
+                                     0,
+                                     bim_none,
+                                     false,
+                                     false,
+                                     0,
+                                     0,
+                                     0,
+                                     image_offset,
+                                     {0,7,7,7});
+   fetch->set_flag(vtx_srf_mode);
+   fetch->set_flag(vtx_use_tc);
+   emit_instruction(fetch);
+   return true;
+
+}
+
+bool
+EmitSSBOInstruction::emit_image_load(const nir_intrinsic_instr *intrin)
+{
+   int imageid = 0;
+   PValue image_offset;
+
+   if (nir_src_is_const(intrin->src[0]))
+      imageid = nir_src_as_int(intrin->src[0]);
+   else
+      image_offset = from_nir(intrin->src[0], 0);
+
+   auto rat_op = get_rat_opcode(intrin->intrinsic, nir_intrinsic_format(intrin));
+
+   GPRVector::Swizzle swz = {0,1,2,3};
+   auto coord =  vec_from_nir_with_fetch_constant(intrin->src[1], 0xf, swz);
+
+   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_1D &&
+       nir_intrinsic_image_array(intrin)) {
+      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(2), coord.reg_i(1), {alu_write}));
+      emit_instruction(new AluInstruction(op1_mov, coord.reg_i(1), coord.reg_i(2), {alu_last_instr, alu_write}));
+   }
+
+   if (intrin->intrinsic != nir_intrinsic_image_load) {
+      if (intrin->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
+         emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
+                                             from_nir(intrin->src[4], 0), {alu_write}));
+         emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(3),
+                                             from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
+      } else {
+         emit_instruction(new AluInstruction(op1_mov, m_rat_return_address.reg_i(0),
+                                             from_nir(intrin->src[3], 0), {alu_last_instr, alu_write}));
+      }
+   }
+   auto cf_op = cf_mem_rat;// nir_intrinsic_access(intrin) & ACCESS_COHERENT ? cf_mem_rat_cacheless : cf_mem_rat;
+
+   auto store = new RatInstruction(cf_op, rat_op, m_rat_return_address, coord, imageid,
+                                   image_offset, 1, 0xf, 0, true);
+   emit_instruction(store);
+   return fetch_return_value(intrin);
+}
+
+bool EmitSSBOInstruction::fetch_return_value(const nir_intrinsic_instr *intrin)
+{
+   emit_instruction(new WaitAck(0));
+
+   pipe_format format = nir_intrinsic_format(intrin);
+   unsigned fmt = fmt_32;
+   unsigned num_format = 0;
+   unsigned format_comp = 0;
+   unsigned endian = 0;
+
+   int imageid = 0;
+   PValue image_offset;
+
+   if (nir_src_is_const(intrin->src[0]))
+      imageid = nir_src_as_int(intrin->src[0]);
+   else
+      image_offset = from_nir(intrin->src[0], 0);
+
+   r600_vertex_data_type(format, &fmt, &num_format, &format_comp, &endian);
+
+   GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
+
+   auto fetch = new FetchInstruction(vc_fetch,
+                                     no_index_offset,
+                                     (EVTXDataFormat)fmt,
+                                     (EVFetchNumFormat)num_format,
+                                     (EVFetchEndianSwap)endian,
+                                     m_rat_return_address.reg_i(1),
+                                     dest,
+                                     0,
+                                     false,
+                                     0x3,
+                                     R600_IMAGE_IMMED_RESOURCE_OFFSET + imageid,
+                                     0,
+                                     bim_none,
+                                     false,
+                                     false,
+                                     0,
+                                     0,
+                                     0,
+                                     image_offset, {0,1,2,3});
+   fetch->set_flag(vtx_srf_mode);
+   fetch->set_flag(vtx_use_tc);
+   if (format_comp)
+      fetch->set_flag(vtx_format_comp_signed);
+
+   emit_instruction(fetch);
+   return true;
+}
+
+bool EmitSSBOInstruction::emit_image_size(const nir_intrinsic_instr *intrin)
+{
+   GPRVector dest = vec_from_nir(intrin->dest, nir_dest_num_components(intrin->dest));
+   GPRVector src{0,{4,4,4,4}};
+
+   assert(nir_src_as_uint(intrin->src[1]) == 0);
+
+   auto const_offset = nir_src_as_const_value(intrin->src[0]);
+   auto dyn_offset = PValue();
+   int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
+   if (const_offset)
+      res_id += const_offset[0].u32;
+   else
+      dyn_offset = from_nir(intrin->src[0], 0);
+
+   if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_BUF) {
+      emit_instruction(new FetchInstruction(dest, PValue(new GPRValue(0, 7)),
+                       res_id,
+                       bim_none));
+      return true;
+   } else {
+      emit_instruction(new TexInstruction(TexInstruction::get_resinfo, dest, src,
+                                             0/* ?? */,
+                                             res_id, dyn_offset));
+      if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
+          nir_intrinsic_image_array(intrin) && nir_dest_num_components(intrin->dest) > 2) {
+         /* Need to load the layers from a const buffer */
+
+         unsigned lookup_resid = const_offset[0].u32;
+         emit_instruction(new AluInstruction(op1_mov, dest.reg_i(2),
+                                             PValue(new UniformValue(lookup_resid/4 + R600_SHADER_BUFFER_INFO_SEL, lookup_resid % 4,
+                                                                     R600_BUFFER_INFO_CONST_BUFFER)),
+         EmitInstruction::last_write));
+      }
+   }
+   return true;
+}
+
+bool EmitSSBOInstruction::emit_buffer_size(const nir_intrinsic_instr *intr)
+{
+   std::array<PValue,4> dst_elms;
+
+
+   for (uint16_t i = 0; i < 4; ++i) {
+      dst_elms[i] = from_nir(intr->dest, (i < intr->dest.ssa.num_components) ? i : 7);
+   }
+
+   GPRVector dst(dst_elms);
+   GPRVector src(0,{4,4,4,4});
+
+   auto const_offset = nir_src_as_const_value(intr->src[0]);
+   auto dyn_offset = PValue();
+   int res_id = R600_IMAGE_REAL_RESOURCE_OFFSET;
+   if (const_offset)
+      res_id += const_offset[0].u32;
+   else
+      assert(0 && "dynamic buffer offset not supported in buffer_size");
+
+   emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)),
+                    res_id, bim_none));
+
+   return true;
+}
+
+bool EmitSSBOInstruction::make_stores_ack_and_waitack()
+{
+   for (auto&& store: m_store_ops)
+      store->set_ack();
+
+   if (!m_store_ops.empty())
+      emit_instruction(new WaitAck(0));
+
+   m_store_ops.clear();
+
+   return true;
+}
+
+GPRVector EmitSSBOInstruction::make_dest(const nir_intrinsic_instr* ir)
+{
+   GPRVector::Values v;
+   int i;
+   for (i = 0; i < 4; ++i)
+      v[i] = from_nir(ir->dest, i);
+   return GPRVector(v);
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h
new file mode 100644
index 000000000..56e0e31f1
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h
@@ -0,0 +1,57 @@
+#ifndef SFN_EMITSSBOINSTRUCTION_H
+#define SFN_EMITSSBOINSTRUCTION_H
+
+#include "sfn_emitinstruction.h"
+#include "sfn_instruction_gds.h"
+#include "sfn_value_gpr.h"
+
+namespace r600 {
+
+class EmitSSBOInstruction: public EmitInstruction {
+public:
+   EmitSSBOInstruction(ShaderFromNirProcessor& processor);
+
+   void set_ssbo_offset(int offset);
+
+   void set_require_rat_return_address();
+   bool load_rat_return_address();
+   bool load_atomic_inc_limits();
+
+private:
+   bool do_emit(nir_instr *instr);
+
+   bool emit_atomic(const nir_intrinsic_instr* instr);
+   bool emit_unary_atomic(const nir_intrinsic_instr* instr);
+   bool emit_atomic_add(const nir_intrinsic_instr* instr);
+   bool emit_atomic_inc(const nir_intrinsic_instr* instr);
+   bool emit_atomic_pre_dec(const nir_intrinsic_instr* instr);
+
+   bool emit_load_ssbo(const nir_intrinsic_instr* instr);
+   bool emit_store_ssbo(const nir_intrinsic_instr* instr);
+
+   bool emit_image_size(const nir_intrinsic_instr *intrin);
+   bool emit_image_load(const nir_intrinsic_instr *intrin);
+   bool emit_image_store(const nir_intrinsic_instr *intrin);
+   bool emit_ssbo_atomic_op(const nir_intrinsic_instr *intrin);
+   bool emit_buffer_size(const nir_intrinsic_instr *intrin);
+
+   bool fetch_return_value(const nir_intrinsic_instr *intrin);
+
+   bool make_stores_ack_and_waitack();
+
+   ESDOp get_opcode(nir_intrinsic_op opcode);
+   RatInstruction::ERatOp get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const;
+
+   GPRVector make_dest(const nir_intrinsic_instr* instr);
+
+   PGPRValue m_atomic_update;
+
+   bool m_require_rat_return_address;
+   GPRVector m_rat_return_address;
+   int m_ssbo_image_offset;
+   std::vector<RatInstruction *> m_store_ops;
+};
+
+}
+
+#endif // SFN_EMITSSBOINSTRUCTION_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp
new file mode 100644
index 000000000..c31bee43d
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.cpp
@@ -0,0 +1,671 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_emittexinstruction.h"
+#include "sfn_shader_base.h"
+#include "sfn_instruction_fetch.h"
+
+namespace r600 {
+
+EmitTexInstruction::EmitTexInstruction(ShaderFromNirProcessor &processor):
+   EmitInstruction (processor)
+{
+}
+
+bool EmitTexInstruction::do_emit(nir_instr* instr)
+{
+   nir_tex_instr* ir = nir_instr_as_tex(instr);
+
+   TexInputs src;
+   if (!get_inputs(*ir, src))
+      return false;
+
+   if (ir->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
+      switch (ir->op) {
+      case nir_texop_txf:
+         return emit_buf_txf(ir, src);
+      case nir_texop_txs:
+         return emit_tex_txs(ir, src, {0,1,2,3});
+      default:
+         return false;
+      }
+   } else {
+      switch (ir->op) {
+      case nir_texop_tex:
+         return emit_tex_tex(ir, src);
+      case nir_texop_txf:
+         return emit_tex_txf(ir, src);
+      case nir_texop_txb:
+         return emit_tex_txb(ir, src);
+      case nir_texop_txl:
+         return emit_tex_txl(ir, src);
+      case nir_texop_txd:
+         return emit_tex_txd(ir, src);
+      case nir_texop_txs:
+         return emit_tex_txs(ir, src, {0,1,2,3});
+      case nir_texop_lod:
+         return emit_tex_lod(ir, src);
+      case nir_texop_tg4:
+         return emit_tex_tg4(ir, src);
+      case nir_texop_txf_ms:
+         return emit_tex_txf_ms(ir, src);
+      case nir_texop_query_levels:
+         return emit_tex_txs(ir, src, {3,7,7,7});
+      case nir_texop_texture_samples:
+         return emit_tex_texture_samples(ir, src, {3,7,7,7});
+      default:
+
+         return false;
+      }
+   }
+}
+
+bool EmitTexInstruction::emit_buf_txf(nir_tex_instr* instr, TexInputs &src)
+{
+   auto dst = make_dest(*instr);
+
+   auto ir = new FetchInstruction(vc_fetch, no_index_offset, dst, src.coord.reg_i(0), 0,
+                                  instr->texture_index +  R600_MAX_CONST_BUFFERS,
+                                  src.texture_offset, bim_none);
+   ir->set_flag(vtx_use_const_field);
+   emit_instruction(ir);
+   return true;
+}
+
+bool EmitTexInstruction::emit_tex_tex(nir_tex_instr* instr, TexInputs& src)
+{
+
+   r600::sfn_log << SfnLog::instr << "emit '"
+                 << *reinterpret_cast<nir_instr*>(instr)
+                 << "' (" << __func__ << ")\n";
+
+   auto tex_op = TexInstruction::sample;
+
+   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
+   assert(!sampler.indirect);
+
+   if (instr->is_shadow)  {
+      emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
+                       {alu_last_instr, alu_write}));
+      tex_op = TexInstruction::sample_c;
+   }
+
+   auto dst = make_dest(*instr);
+   auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
+                                 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+   if (instr->is_array)
+      handle_array_index(*instr, src.coord, irt);
+
+   set_rect_coordinate_flags(instr, irt);
+   set_offsets(irt, src.offset);
+
+   emit_instruction(irt);
+   return true;
+}
+
+bool EmitTexInstruction::emit_tex_txd(nir_tex_instr* instr, TexInputs& src)
+{
+   r600::sfn_log << SfnLog::instr << "emit '"
+                 << *reinterpret_cast<nir_instr*>(instr)
+                 << "' (" << __func__ << ")\n";
+
+   auto tex_op = TexInstruction::sample_g;
+   auto dst = make_dest(*instr);
+
+   GPRVector empty_dst(0,{7,7,7,7});
+
+   if (instr->is_shadow)  {
+      emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
+                       {alu_last_instr, alu_write}));
+      tex_op = TexInstruction::sample_c_g;
+   }
+
+   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
+   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+   TexInstruction *irgh = new TexInstruction(TexInstruction::set_gradient_h, empty_dst, src.ddx,
+                                             sampler.id,
+                                             sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+   irgh->set_dest_swizzle({7,7,7,7});
+
+   TexInstruction *irgv = new TexInstruction(TexInstruction::set_gradient_v, empty_dst, src.ddy,
+                           sampler.id, sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+   irgv->set_dest_swizzle({7,7,7,7});
+
+   TexInstruction *ir = new TexInstruction(tex_op, dst, src.coord, sampler.id,
+                                           sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+   if (instr->is_array)
+      handle_array_index(*instr, src.coord, ir);
+
+   set_rect_coordinate_flags(instr, ir);
+   set_offsets(ir, src.offset);
+
+   emit_instruction(irgh);
+   emit_instruction(irgv);
+   emit_instruction(ir);
+   return true;
+}
+
+bool EmitTexInstruction::emit_tex_txf(nir_tex_instr* instr, TexInputs& src)
+{
+   r600::sfn_log << SfnLog::instr << "emit '"
+                 << *reinterpret_cast<nir_instr*>(instr)
+                 << "' (" << __func__ << ")\n";
+
+   auto dst = make_dest(*instr);
+
+   if (*src.coord.reg_i(3) != *src.lod) {
+      if (src.coord.sel() != src.lod->sel())
+         emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod, {alu_write, alu_last_instr}));
+      else
+         src.coord.set_reg_i(3, src.lod);
+   }
+
+   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
+   assert(!sampler.indirect);
+
+   /* txf doesn't need rounding for the array index, but 1D has the array index
+    * in the z component */
+   if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D)
+      src.coord.set_reg_i(2, src.coord.reg_i(1));
+
+   auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord,
+                                    sampler.id,
+                                    sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+
+
+   if (src.offset) {
+      assert(src.offset->is_ssa);
+      AluInstruction *ir = nullptr;
+      for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) {
+         ir = new AluInstruction(op2_add_int, src.coord.reg_i(i),
+                  {src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write});
+         emit_instruction(ir);
+      }
+      if (ir)
+         ir->set_flag(alu_last_instr);
+   }
+
+   if (instr->is_array)
+      tex_ir->set_flag(TexInstruction::z_unnormalized);
+
+   emit_instruction(tex_ir);
+   return true;
+}
+
+bool EmitTexInstruction::emit_tex_lod(nir_tex_instr* instr, TexInputs& src)
+{
+   auto tex_op = TexInstruction::get_tex_lod;
+
+   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
+   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+   auto dst = make_dest(*instr);
+   auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
+                                 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+   irt->set_dest_swizzle({1,0,7,7});
+   emit_instruction(irt);
+
+   return true;
+
+}
+
+bool EmitTexInstruction::emit_tex_txl(nir_tex_instr* instr, TexInputs& src)
+{
+   r600::sfn_log << SfnLog::instr << "emit '"
+                 << *reinterpret_cast<nir_instr*>(instr)
+                 << "' (" << __func__ << ")\n";
+
+   auto tex_op = TexInstruction::sample_l;
+   if (instr->is_shadow)  {
+      if (src.coord.sel() != src.comperator->sel())
+         emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator, {alu_write}));
+      else
+         src.coord.set_reg_i(2, src.comperator);
+      tex_op = TexInstruction::sample_c_l;
+   }
+
+   if (src.coord.sel() != src.lod->sel())
+      emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.lod, {last_write}));
+   else
+      src.coord.set_reg_i(3, src.lod);
+
+   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
+   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+   auto dst = make_dest(*instr);
+   auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
+                                 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+
+   if (instr->is_array)
+      handle_array_index(*instr, src.coord, irt);
+
+   set_rect_coordinate_flags(instr, irt);
+   set_offsets(irt, src.offset);
+
+   emit_instruction(irt);
+   return true;
+}
+
+bool EmitTexInstruction::emit_tex_txb(nir_tex_instr* instr, TexInputs& src)
+{
+   auto tex_op = TexInstruction::sample_lb;
+
+   std::array<uint8_t, 4> in_swizzle = {0,1,2,3};
+
+   if (instr->is_shadow) {
+      if (src.coord.sel() != src.comperator->sel())
+         emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(2), src.comperator, {alu_write}));
+      else
+         src.coord.set_reg_i(2, src.comperator);
+      tex_op = TexInstruction::sample_c_lb;
+   }
+
+   if (src.coord.sel() != src.bias->sel())
+      emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.bias, {last_write}));
+   else
+      src.coord.set_reg_i(3, src.bias);
+
+   GPRVector tex_src(src.coord, in_swizzle);
+
+   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
+   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+   auto dst = make_dest(*instr);
+   auto irt = new TexInstruction(tex_op, dst, tex_src, sampler.id,
+                                 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+   if (instr->is_array)
+      handle_array_index(*instr, tex_src, irt);
+
+   set_rect_coordinate_flags(instr, irt);
+   set_offsets(irt, src.offset);
+
+   emit_instruction(irt);
+   return true;
+}
+
+bool EmitTexInstruction::emit_tex_txs(nir_tex_instr* instr, TexInputs& tex_src,
+                                      const std::array<int,4>& dest_swz)
+{
+   std::array<PValue,4> dst_elms;
+   std::array<PValue,4> src_elms;
+
+   for (uint16_t i = 0; i < 4; ++i) {
+      dst_elms[i] = from_nir(instr->dest, (i < instr->dest.ssa.num_components) ? i : 7);
+   }
+
+   GPRVector dst(dst_elms);
+
+   if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
+      emit_instruction(new FetchInstruction(dst, PValue(new GPRValue(0, 7)),
+                       instr->sampler_index + R600_MAX_CONST_BUFFERS,
+                       bim_none));
+   } else {
+      for (uint16_t i = 0; i < 4; ++i)
+         src_elms[i] =  tex_src.lod;
+      GPRVector src(src_elms);
+
+      auto sampler = get_sampler_id(instr->sampler_index, tex_src.sampler_deref);
+      assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+      auto ir = new TexInstruction(TexInstruction::get_resinfo, dst, src,
+                                   sampler.id,
+                                   sampler.id + R600_MAX_CONST_BUFFERS, tex_src.sampler_offset);
+      ir->set_dest_swizzle(dest_swz);
+      emit_instruction(ir);
+
+      if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+         PValue src(new UniformValue(512 + R600_BUFFER_INFO_OFFSET / 16 + (sampler.id >> 2),
+                                     sampler.id & 3, R600_BUFFER_INFO_CONST_BUFFER));
+
+         auto alu = new AluInstruction(op1_mov, dst[2], src, {last_write});
+         emit_instruction(alu);
+         set_has_txs_cube_array_comp();
+      }
+   }
+
+   return true;
+
+}
+
+bool EmitTexInstruction::emit_tex_texture_samples(nir_tex_instr* instr, TexInputs& src,
+                                                  const std::array<int, 4> &dest_swz)
+{
+   GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
+   GPRVector help{0,{4,4,4,4}};
+
+   auto dyn_offset = PValue();
+   int res_id = R600_MAX_CONST_BUFFERS + instr->sampler_index;
+
+   auto ir = new TexInstruction(TexInstruction::get_nsampled, dest, help,
+                                0, res_id, src.sampler_offset);
+   ir->set_dest_swizzle(dest_swz);
+   emit_instruction(ir);
+   return true;
+}
+
+bool EmitTexInstruction::emit_tex_tg4(nir_tex_instr* instr, TexInputs& src)
+{
+   r600::sfn_log << SfnLog::instr << "emit '"
+                 << *reinterpret_cast<nir_instr*>(instr)
+                 << "' (" << __func__ << ")\n";
+
+   TexInstruction *set_ofs = nullptr;
+
+   auto tex_op = TexInstruction::gather4;
+
+   if (instr->is_shadow)  {
+      emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3), src.comperator,
+                       {alu_last_instr, alu_write}));
+      tex_op = TexInstruction::gather4_c;
+   }
+
+   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
+   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+   bool literal_offset = false;
+   if (src.offset) {
+      literal_offset =  nir_src_as_const_value(*src.offset) != 0;
+      r600::sfn_log << SfnLog::tex << " really have offsets and they are " <<
+                       (literal_offset ? "literal" : "varying") <<
+                       "\n";
+
+      if (!literal_offset) {
+         GPRVector::Swizzle swizzle = {4,4,4,4};
+         for (unsigned i = 0; i < instr->coord_components; ++i)
+            swizzle[i] = i;
+
+         int noffsets = instr->coord_components;
+         if (instr->is_array)
+            --noffsets;
+
+         auto ofs = vec_from_nir_with_fetch_constant(*src.offset,
+                                                     ( 1 << noffsets) - 1,
+                                                     swizzle);
+         GPRVector dummy(0, {7,7,7,7});
+         tex_op = (tex_op == TexInstruction::gather4_c) ?
+                     TexInstruction::gather4_c_o : TexInstruction::gather4_o;
+
+         set_ofs = new TexInstruction(TexInstruction::set_offsets, dummy,
+                                           ofs, sampler.id,
+                                      sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+         set_ofs->set_dest_swizzle({7,7,7,7});
+      }
+   }
+
+
+   /* pre CAYMAN needs swizzle */
+   auto dst = make_dest(*instr);
+   auto irt = new TexInstruction(tex_op, dst, src.coord, sampler.id,
+                                 sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+
+   irt->set_dest_swizzle({1,2,0,3});
+   irt->set_gather_comp(instr->component);
+
+   if (instr->is_array)
+      handle_array_index(*instr, src.coord, irt);
+
+   if (literal_offset) {
+      r600::sfn_log << SfnLog::tex << "emit literal offsets\n";
+      set_offsets(irt, src.offset);
+   }
+
+   set_rect_coordinate_flags(instr, irt);
+
+   if (set_ofs)
+      emit_instruction(set_ofs);
+
+   emit_instruction(irt);
+   return true;
+}
+
+bool EmitTexInstruction::emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src)
+{
+   assert(instr->src[0].src.is_ssa);
+
+   r600::sfn_log << SfnLog::instr << "emit '"
+                 << *reinterpret_cast<nir_instr*>(instr)
+                 << "' (" << __func__ << ")\n";
+
+   auto sampler = get_sampler_id(instr->sampler_index, src.sampler_deref);
+   assert(!sampler.indirect && "Indirect sampler selection not yet supported");
+
+   PGPRValue sample_id_dest_reg = get_temp_register();
+   GPRVector sample_id_dest(sample_id_dest_reg->sel(), {7,7,7,7});
+   sample_id_dest.set_reg_i(sample_id_dest_reg->chan(), sample_id_dest_reg);
+   std::array<int,4> dest_swz = {7,7,7,7};
+   dest_swz[sample_id_dest_reg->chan()] = 0;
+
+   emit_instruction(new AluInstruction(op1_mov, src.coord.reg_i(3),
+                                       src.ms_index,
+                                       {alu_write, alu_last_instr}));
+
+   auto tex_sample_id_ir = new TexInstruction(TexInstruction::ld, sample_id_dest, src.coord,
+                                              sampler.id,
+                                              sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+   tex_sample_id_ir->set_flag(TexInstruction::x_unnormalized);
+   tex_sample_id_ir->set_flag(TexInstruction::y_unnormalized);
+   tex_sample_id_ir->set_flag(TexInstruction::z_unnormalized);
+   tex_sample_id_ir->set_flag(TexInstruction::w_unnormalized);
+   tex_sample_id_ir->set_inst_mode(1);
+
+   tex_sample_id_ir->set_dest_swizzle(dest_swz);
+
+   emit_instruction(tex_sample_id_ir);
+
+   if (src.ms_index->type() != Value::literal ||
+       static_cast<const LiteralValue&>(*src.ms_index).value() != 0) {
+       PValue help = get_temp_register();
+
+      emit_instruction(new AluInstruction(op2_lshl_int, help,
+                                          src.ms_index, literal(2),
+      {alu_write, alu_last_instr}));
+
+      emit_instruction(new AluInstruction(op2_lshr_int, sample_id_dest_reg,
+                                          {sample_id_dest_reg, help},
+                                          {alu_write, alu_last_instr}));
+   }
+
+   emit_instruction(new AluInstruction(op2_and_int, src.coord.reg_i(3),
+                                       {sample_id_dest_reg, PValue(new LiteralValue(15))},
+                                       {alu_write, alu_last_instr}));
+
+   auto dst = make_dest(*instr);
+
+   /* txf doesn't need rounding for the array index, but 1D has the array index
+    * in the z component */
+   if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D)
+      src.coord.set_reg_i(2, src.coord.reg_i(1));
+
+   auto tex_ir = new TexInstruction(TexInstruction::ld, dst, src.coord,
+                                    sampler.id,
+                                    sampler.id + R600_MAX_CONST_BUFFERS, src.sampler_offset);
+
+
+   if (src.offset) {
+      assert(src.offset->is_ssa);
+      AluInstruction *ir = nullptr;
+      for (unsigned i = 0; i < src.offset->ssa->num_components; ++i) {
+         ir = new AluInstruction(op2_add_int, src.coord.reg_i(i),
+                  {src.coord.reg_i(i), from_nir(*src.offset, i, i)}, {alu_write});
+         emit_instruction(ir);
+      }
+      if (ir)
+         ir->set_flag(alu_last_instr);
+   }
+
+   emit_instruction(tex_ir);
+   return true;
+}
+
+bool EmitTexInstruction::get_inputs(const nir_tex_instr& instr, TexInputs &src)
+{
+   sfn_log << SfnLog::tex << "Get Inputs with " << instr.coord_components << " components\n";
+
+   unsigned grad_components = instr.coord_components;
+   if (instr.is_array && !instr.array_is_lowered_cube)
+      --grad_components;
+
+
+   src.offset = nullptr;
+   bool retval = true;
+   for (unsigned i = 0; i < instr.num_srcs; ++i) {
+      switch (instr.src[i].src_type) {
+      case nir_tex_src_bias:
+         src.bias = from_nir(instr.src[i], 0);
+         break;
+
+      case nir_tex_src_coord: {
+         src.coord = vec_from_nir_with_fetch_constant(instr.src[i].src,
+                                                      (1 << instr.coord_components) - 1,
+         {0,1,2,3});
+      } break;
+      case nir_tex_src_comparator:
+         src.comperator = from_nir(instr.src[i], 0);
+         break;
+      case nir_tex_src_ddx: {
+         sfn_log << SfnLog::tex << "Get DDX ";
+         src.ddx = vec_from_nir_with_fetch_constant(instr.src[i].src,
+                                                    (1 << grad_components) - 1,
+                                                    swizzle_from_comps(grad_components));
+         sfn_log << SfnLog::tex << src.ddx << "\n";
+      } break;
+      case nir_tex_src_ddy:{
+         sfn_log << SfnLog::tex << "Get DDY ";
+         src.ddy = vec_from_nir_with_fetch_constant(instr.src[i].src,
+                                                    (1 << grad_components) - 1,
+                                                    swizzle_from_comps(grad_components));
+         sfn_log << SfnLog::tex << src.ddy << "\n";
+      }  break;
+      case nir_tex_src_lod:
+         src.lod = from_nir_with_fetch_constant(instr.src[i].src, 0);
+         break;
+      case nir_tex_src_offset:
+         sfn_log << SfnLog::tex << "  -- Find offset\n";
+         src.offset = &instr.src[i].src;
+         break;
+      case nir_tex_src_sampler_deref:
+         src.sampler_deref = get_deref_location(instr.src[i].src);
+         break;
+      case nir_tex_src_texture_deref:
+         src.texture_deref = get_deref_location(instr.src[i].src);
+         break;
+      case nir_tex_src_ms_index:
+         src.ms_index = from_nir(instr.src[i], 0);
+         break;
+      case nir_tex_src_texture_offset:
+         src.texture_offset = from_nir(instr.src[i], 0);
+         break;
+      case nir_tex_src_sampler_offset:
+         src.sampler_offset = from_nir(instr.src[i], 0);
+         break;
+      case nir_tex_src_plane:
+      case nir_tex_src_projector:
+      case nir_tex_src_min_lod:
+      case nir_tex_src_ms_mcs:
+      default:
+         sfn_log << SfnLog::tex << "Texture source type " <<  instr.src[i].src_type << " not supported\n";
+         retval = false;
+      }
+   }
+   return retval;
+}
+
+GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr)
+{
+   int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components :
+                                                 instr.dest.reg.reg->num_components;
+   std::array<PValue,4> dst_elms;
+   for (uint16_t i = 0; i < 4; ++i)
+      dst_elms[i] = from_nir(instr.dest, (i < num_dest_components) ? i : 7);
+   return GPRVector(dst_elms);
+}
+
+
+GPRVector EmitTexInstruction::make_dest(nir_tex_instr& instr,
+                                        const std::array<int, 4>& swizzle)
+{
+   int num_dest_components = instr.dest.is_ssa ? instr.dest.ssa.num_components :
+                                                 instr.dest.reg.reg->num_components;
+   std::array<PValue,4> dst_elms;
+   for (uint16_t i = 0; i < 4; ++i) {
+      int k = swizzle[i];
+      dst_elms[i] = from_nir(instr.dest, (k < num_dest_components) ? k : 7);
+   }
+   return GPRVector(dst_elms);
+}
+
+void EmitTexInstruction::set_rect_coordinate_flags(nir_tex_instr* instr,
+                                                   TexInstruction* ir) const
+{
+   if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
+      ir->set_flag(TexInstruction::x_unnormalized);
+      ir->set_flag(TexInstruction::y_unnormalized);
+   }
+}
+
+void EmitTexInstruction::set_offsets(TexInstruction* ir, nir_src *offset)
+{
+   if (!offset)
+      return;
+
+   assert(offset->is_ssa);
+   auto literal = nir_src_as_const_value(*offset);
+   assert(literal);
+
+   for (int i = 0; i < offset->ssa->num_components; ++i) {
+      ir->set_offset(i, literal[i].i32);
+   }
+}
+
+void EmitTexInstruction::handle_array_index(const nir_tex_instr& instr, const GPRVector& src, TexInstruction *ir)
+{
+   int src_idx = instr.sampler_dim == GLSL_SAMPLER_DIM_1D ? 1 : 2;
+   emit_instruction(new AluInstruction(op1_rndne, src.reg_i(2), src.reg_i(src_idx),
+                                       {alu_last_instr, alu_write}));
+   ir->set_flag(TexInstruction::z_unnormalized);
+}
+
+EmitTexInstruction::SamplerId
+EmitTexInstruction::get_sampler_id(int sampler_id, const nir_variable *deref)
+{
+   EmitTexInstruction::SamplerId result = {sampler_id, false};
+
+   if (deref) {
+      assert(glsl_type_is_sampler(deref->type));
+      result.id = deref->data.binding;
+   }
+   return result;
+}
+
+EmitTexInstruction::TexInputs::TexInputs():
+   sampler_deref(nullptr),
+   texture_deref(nullptr),
+   offset(nullptr)
+{
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h
new file mode 100644
index 000000000..e11ebda1c
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_emittexinstruction.h
@@ -0,0 +1,96 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_EMITTEXINSTRUCTION_H
+#define SFN_EMITTEXINSTRUCTION_H
+
+#include "sfn_emitinstruction.h"
+#include "sfn_instruction_tex.h"
+
+namespace r600  {
+
+class EmitTexInstruction : public EmitInstruction
+{
+public:
+   EmitTexInstruction(ShaderFromNirProcessor& processor);
+
+private:
+   struct TexInputs {
+      TexInputs();
+      const nir_variable *sampler_deref;
+      const nir_variable *texture_deref;
+      GPRVector coord;
+      PValue bias;
+      PValue comperator;
+      PValue lod;
+      GPRVector ddx;
+      GPRVector ddy;
+      nir_src *offset;
+      PValue gather_comp;
+      PValue ms_index;
+      PValue sampler_offset;
+      PValue texture_offset;
+   };
+
+   bool emit_tex_tex(nir_tex_instr* instr, TexInputs& src);
+
+   bool emit_tex_txf(nir_tex_instr* instr, TexInputs &src);
+   bool emit_tex_txb(nir_tex_instr* instr, TexInputs& src);
+   bool emit_tex_txd(nir_tex_instr* instr, TexInputs& src);
+   bool emit_tex_txl(nir_tex_instr* instr, TexInputs& src);
+   bool emit_tex_txs(nir_tex_instr* instr, TexInputs& src,
+                     const std::array<int, 4> &dest_swz);
+   bool emit_tex_texture_samples(nir_tex_instr* instr, TexInputs& src,
+                                 const std::array<int, 4> &dest_swz);
+   bool emit_tex_lod(nir_tex_instr* instr, TexInputs& src);
+   bool emit_tex_tg4(nir_tex_instr* instr, TexInputs& src);
+   bool emit_tex_txf_ms(nir_tex_instr* instr, TexInputs& src);
+   bool emit_buf_txf(nir_tex_instr* instr, TexInputs& src);
+
+   bool get_inputs(const nir_tex_instr& instr, TexInputs &src);
+
+   void set_rect_coordinate_flags(nir_tex_instr* instr, TexInstruction* ir) const;
+
+   bool do_emit(nir_instr* instr) override;
+
+   GPRVector make_dest(nir_tex_instr& instr);
+   GPRVector make_dest(nir_tex_instr &instr, const std::array<int, 4> &swizzle);
+
+   void set_offsets(TexInstruction* ir, nir_src *offset);
+   void handle_array_index(const nir_tex_instr& instr, const GPRVector &src, TexInstruction* ir);
+
+   struct SamplerId {
+      int id;
+      bool indirect;
+   };
+
+   SamplerId get_sampler_id(int sampler_id, const nir_variable *deref);
+
+};
+
+}
+
+#endif // SFN_EMITTEXINSTRUCTION_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp
new file mode 100644
index 000000000..72cf23172
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.cpp
@@ -0,0 +1,183 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_instruction_alu.h"
+#include "sfn_valuepool.h"
+
+namespace r600  {
+
+const AluModifiers AluInstruction::src_abs_flags[2] =
+   {alu_src0_abs, alu_src1_abs};
+const AluModifiers AluInstruction::src_neg_flags[3] =
+   {alu_src0_neg, alu_src1_neg, alu_src2_neg};
+const AluModifiers AluInstruction::src_rel_flags[3] =
+   {alu_src0_rel, alu_src1_rel, alu_src2_rel};
+
+AluInstruction::AluInstruction(EAluOp opcode):
+   Instruction (Instruction::alu),
+   m_opcode(opcode),
+   m_src(alu_ops.at(opcode).nsrc),
+   m_bank_swizzle(alu_vec_unknown),
+   m_cf_type(cf_alu)
+{
+   if (alu_ops.at(opcode).nsrc == 3)
+      m_flags.set(alu_op3);
+}
+
+AluInstruction::AluInstruction(EAluOp opcode, PValue dest,
+                               std::vector<PValue> src,
+                               const std::set<AluModifiers>& flags):
+   Instruction (Instruction::alu),
+   m_opcode(opcode),
+   m_dest(dest),
+   m_bank_swizzle(alu_vec_unknown),
+   m_cf_type(cf_alu)
+{
+   assert(dest);
+   m_src.swap(src);
+   for (auto f : flags)
+      m_flags.set(f);
+
+   if (alu_ops.at(opcode).nsrc == 3)
+      m_flags.set(alu_op3);
+
+   for (auto &s: m_src)
+      add_remappable_src_value(&s);
+
+   add_remappable_dst_value(&m_dest);
+}
+
+AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0,
+                               const std::set<AluModifiers>& flags):
+   AluInstruction(opcode, dest, std::vector<PValue>{src0}, flags)
+{
+}
+
+AluInstruction::AluInstruction(EAluOp opcode, PValue dest,
+                               PValue src0, PValue src1,
+                               const std::set<AluModifiers> &m_flags):
+   AluInstruction(opcode, dest, {src0, src1}, m_flags)
+{
+}
+
+AluInstruction::AluInstruction(EAluOp opcode, PValue dest, PValue src0,
+                               PValue src1, PValue src2,
+                               const std::set<AluModifiers> &flags):
+   AluInstruction(opcode, dest, {src0, src1, src2}, flags)
+{
+}
+
+bool AluInstruction::is_equal_to(const Instruction& lhs) const
+{
+   assert(lhs.type() == alu);
+   const auto& oth = static_cast<const AluInstruction&>(lhs);
+
+   if (m_opcode != oth.m_opcode) {
+      return false;
+   }
+
+   if (*m_dest != *oth.m_dest)
+      return false;
+
+   if (m_src.size() != oth.m_src.size())
+      return false;
+
+   for (unsigned i = 0; i < m_src.size(); ++i)
+     if (*m_src[i] != *oth.m_src[i]) {
+        return false;
+     }
+   return (m_flags == oth.m_flags && m_cf_type == oth.m_cf_type);
+}
+
+void AluInstruction::replace_values(const ValueSet& candidates, PValue new_value)
+{
+   for (auto c: candidates) {
+      if (*c == *m_dest)
+         m_dest = new_value;
+
+      for (auto& s: m_src) {
+         if (*c == *s)
+            s = new_value;
+      }
+   }
+}
+
+PValue AluInstruction::remap_one_registers(PValue reg, std::vector<rename_reg_pair>& map,
+                                           ValueMap &values)
+{
+   auto new_index = map[reg->sel()];
+   if (new_index.valid)
+      reg = values.get_or_inject(new_index.new_reg, reg->chan());
+   map[reg->sel()].used = true;
+   return reg;
+}
+
+
+void AluInstruction::set_flag(AluModifiers flag)
+{
+   m_flags.set(flag);
+}
+
+void AluInstruction::set_bank_swizzle(AluBankSwizzle bswz)
+{
+   m_bank_swizzle = bswz;
+}
+
+unsigned AluInstruction::n_sources() const
+{
+   return m_src.size();
+}
+
+void AluInstruction::do_print(std::ostream& os) const
+{
+   os << "ALU " << alu_ops.at(m_opcode).name;
+   if (m_flags.test(alu_dst_clamp))
+      os << "_CLAMP";
+   if (m_dest)
+      os << ' ' << *m_dest << " : "  ;
+
+   for (unsigned i = 0; i < m_src.size(); ++i) {
+      int pflags = 0;
+      if (i)
+         os << ' ';
+      if (m_flags.test(src_neg_flags[i])) pflags |= Value::PrintFlags::has_neg;
+      if (m_flags.test(src_rel_flags[i])) pflags |= Value::PrintFlags::is_rel;
+      if (i < 2)
+         if (m_flags.test(src_abs_flags[i])) pflags |= Value::PrintFlags::has_abs;
+      m_src[i]->print(os, Value::PrintFlags(0, pflags));
+   }
+   os << " {";
+   os << (m_flags.test(alu_write) ? 'W' : ' ');
+   os << (m_flags.test(alu_last_instr) ? 'L' : ' ');
+   os << (m_flags.test(alu_update_exec) ? 'E' : ' ');
+   os << (m_flags.test(alu_update_pred) ? 'P' : ' ');
+   os << "}";
+
+   os <<  " BS:" << m_bank_swizzle;
+   os <<  " CF:" << m_cf_type;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h
new file mode 100644
index 000000000..383fa3baf
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_alu.h
@@ -0,0 +1,144 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef sfn_r600_instruction_alu_h
+#define sfn_r600_instruction_alu_h
+
+#include "sfn_instruction_base.h"
+#include "sfn_alu_defines.h"
+
+namespace r600 {
+
+enum AluModifiers {
+   alu_src0_neg,
+   alu_src0_abs,
+   alu_src0_rel,
+   alu_src1_neg,
+   alu_src1_abs,
+   alu_src1_rel,
+   alu_src2_neg,
+   alu_src2_rel,
+   alu_dst_clamp,
+   alu_dst_rel,
+   alu_last_instr,
+   alu_update_exec,
+   alu_update_pred,
+   alu_write,
+   alu_op3
+};
+
+enum AluDstModifiers {
+   omod_off = 0,
+   omod_mul2 = 1,
+   omod_mul4 = 2,
+   omod_divl2 = 3
+};
+
+enum AluPredSel {
+   pred_off = 0,
+   pred_zero = 2,
+   pred_one = 3
+};
+
+enum AluBankSwizzle {
+   alu_vec_012 = 0,
+   sq_alu_scl_201 = 0,
+   alu_vec_021 = 1,
+   sq_alu_scl_122 = 1,
+   alu_vec_120 = 2,
+   sq_alu_scl_212 = 2,
+   alu_vec_102 = 3,
+   sq_alu_scl_221 = 3,
+   alu_vec_201 = 4,
+   alu_vec_210 = 5,
+   alu_vec_unknown = 6
+};
+
+class AluInstruction : public Instruction {
+public:
+
+   static const AluModifiers src_abs_flags[2];
+   static const AluModifiers src_neg_flags[3];
+   static const AluModifiers src_rel_flags[3];
+
+   AluInstruction(EAluOp opcode);
+   AluInstruction(EAluOp opcode, PValue dest,
+                  std::vector<PValue> src0,
+                  const std::set<AluModifiers>& m_flags);
+
+   AluInstruction(EAluOp opcode, PValue dest, PValue src0,
+                  const std::set<AluModifiers>& m_flags);
+
+   AluInstruction(EAluOp opcode, PValue dest,
+                  PValue src0, PValue src1,
+                  const std::set<AluModifiers>& m_flags);
+
+   AluInstruction(EAluOp opcode, PValue dest, PValue src0, PValue src1,
+                  PValue src2,
+                  const std::set<AluModifiers>& m_flags);
+
+   void set_flag(AluModifiers flag);
+   unsigned n_sources() const;
+
+   PValue dest() {return m_dest;}
+   EAluOp opcode() const {return m_opcode;}
+   const Value *dest() const {return m_dest.get();}
+   Value& src(unsigned i) const {assert(i < m_src.size() && m_src[i]); return *m_src[i];}
+   PValue *psrc(unsigned i) {assert(i < m_src.size()); return &m_src[i];}
+   bool is_last() const {return m_flags.test(alu_last_instr);}
+   bool write() const {return m_flags.test(alu_write);}
+   bool flag(AluModifiers f) const {return m_flags.test(f);}
+   void set_bank_swizzle(AluBankSwizzle swz);
+   int bank_swizzle() const {return m_bank_swizzle;}
+   ECFAluOpCode cf_type() const {return m_cf_type;}
+   void set_cf_type(ECFAluOpCode cf_type){ m_cf_type = cf_type; }
+
+   void replace_values(const ValueSet& candidates, PValue new_value) override;
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+
+   bool is_equal_to(const Instruction& lhs) const override;
+   void do_print(std::ostream& os) const override;
+   PValue remap_one_registers(PValue reg, std::vector<rename_reg_pair>& map,
+                              ValueMap &values);
+
+
+   EAluOp m_opcode;
+   PValue m_dest;
+   std::vector<PValue> m_src;
+   AluOpFlags m_flags;
+   AluDstModifiers m_omod;
+   AluPredSel m_pred_sel;
+   AluBankSwizzle m_bank_swizzle;
+   ECFAluOpCode m_cf_type;
+};
+
+}
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp
new file mode 100644
index 000000000..116bfaca5
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.cpp
@@ -0,0 +1,187 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include <algorithm>
+#include <cassert>
+
+#include "sfn_instruction_base.h"
+#include "sfn_liverange.h"
+#include "sfn_valuepool.h"
+
+namespace r600  {
+
+ValueRemapper::ValueRemapper(std::vector<rename_reg_pair>& m,
+                             ValueMap& values):
+   m_map(m),
+   m_values(values)
+{
+}
+
+void ValueRemapper::remap(PValue& v)
+{
+   if (!v)
+      return;
+   if (v->type() == Value::gpr) {
+      v = remap_one_registers(v);
+   } else if (v->type() == Value::gpr_array_value) {
+      GPRArrayValue& val = static_cast<GPRArrayValue&>(*v);
+      auto value = val.value();
+      auto addr = val.indirect();
+      val.reset_value(remap_one_registers(value));
+      if (addr) {
+         if (addr->type() == Value::gpr)
+            val.reset_addr(remap_one_registers(addr));
+      }
+      size_t range_start = val.sel();
+      size_t range_end = range_start + val.array_size();
+      while (range_start < range_end)
+         m_map[range_start++].used = true;
+   } else if (v->type() == Value::kconst) {
+      auto& val = static_cast<UniformValue&>(*v);
+      auto addr = val.addr();
+      if (addr && addr->type() == Value::gpr)
+            val.reset_addr(remap_one_registers(addr));
+   }
+
+}
+
+void ValueRemapper::remap(GPRVector& v)
+{
+   for (int i = 0; i < 4; ++i) {
+      if (v.reg_i(i)) {
+         auto& ns_idx = m_map[v.reg_i(i)->sel()];
+         if (ns_idx.valid)
+            v.set_reg_i(i,m_values.get_or_inject(ns_idx.new_reg, v.reg_i(i)->chan()));
+         m_map[v.reg_i(i)->sel()].used = true;
+      }
+   }
+}
+
+PValue ValueRemapper::remap_one_registers(PValue& reg)
+{
+   auto new_index = m_map[reg->sel()];
+   if (new_index.valid)
+      reg = m_values.get_or_inject(new_index.new_reg, reg->chan());
+   m_map[reg->sel()].used = true;
+   return reg;
+}
+
+
+Instruction::Instruction(instr_type t):
+   m_type(t)
+{
+}
+
+Instruction::~Instruction()
+{
+}
+
+void Instruction::print(std::ostream& os) const
+{
+   os << "OP:";
+   do_print(os);
+}
+
+
+void Instruction::remap_registers(ValueRemapper& map)
+{
+   sfn_log << SfnLog::merge << "REMAP " << *this << "\n";
+   for (auto& v: m_mappable_src_registers)
+      map.remap(*v);
+
+   for (auto& v: m_mappable_src_vectors)
+      map.remap(*v);
+
+   for (auto& v: m_mappable_dst_registers)
+      map.remap(*v);
+
+   for (auto& v: m_mappable_dst_vectors)
+      map.remap(*v);
+   sfn_log << SfnLog::merge << "TO    " << *this << "\n\n";
+}
+
+void Instruction::add_remappable_src_value(PValue *v)
+{
+   if (*v)
+      m_mappable_src_registers.push_back(v);
+}
+
+void Instruction::add_remappable_src_value(GPRVector *v)
+{
+   m_mappable_src_vectors.push_back(v);
+}
+
+void Instruction::add_remappable_dst_value(PValue *v)
+{
+   if (v)
+      m_mappable_dst_registers.push_back(v);
+}
+
+void Instruction::add_remappable_dst_value(GPRVector *v)
+{
+   m_mappable_dst_vectors.push_back(v);
+}
+
+void Instruction::replace_values(UNUSED const ValueSet& candidates, UNUSED PValue new_value)
+{
+
+}
+
+void Instruction::evalue_liveness(LiverangeEvaluator& eval) const
+{
+   sfn_log << SfnLog::merge << "Scan " << *this << "\n";
+   for (const auto& s: m_mappable_src_registers)
+      if (*s)
+         eval.record_read(**s);
+
+   for (const auto& s: m_mappable_src_vectors)
+      eval.record_read(*s);
+
+   for (const auto& s: m_mappable_dst_registers)
+      if (*s)
+         eval.record_write(**s);
+
+   for (const auto& s: m_mappable_dst_vectors)
+      eval.record_write(*s);
+
+   do_evalue_liveness(eval);
+}
+
+void Instruction::do_evalue_liveness(UNUSED LiverangeEvaluator& eval) const
+{
+
+}
+
+bool operator == (const Instruction& lhs, const Instruction& rhs)
+{
+   if (rhs.m_type != lhs.m_type)
+      return false;
+
+   return lhs.is_equal_to(rhs);
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.h
new file mode 100644
index 000000000..0689a473a
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_base.h
@@ -0,0 +1,155 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef sfn_r600_instr_h
+#define sfn_r600_instr_h
+
+#include "sfn_instructionvisitor.h"
+#include "sfn_value_gpr.h"
+#include "sfn_defines.h"
+
+#include "gallium/drivers/r600/r600_isa.h"
+#include <iostream>
+#include <memory>
+#include <vector>
+#include <set>
+
+namespace r600 {
+
+struct rename_reg_pair {
+   bool valid;
+   bool used;
+   int new_reg;
+};
+
+class LiverangeEvaluator;
+class ValueMap;
+
+
+class ValueRemapper {
+public:
+   ValueRemapper(std::vector<rename_reg_pair>& m,
+                 ValueMap& values);
+
+   void remap(PValue& v);
+   void remap(GPRVector& v);
+private:
+   PValue remap_one_registers(PValue& reg);
+
+   std::vector<rename_reg_pair>& m_map;
+   ValueMap& m_values;
+};
+
+
+using OutputRegisterMap = std::map<unsigned, const GPRVector *>;
+
+class Instruction {
+public:
+   enum instr_type {
+      alu,
+      exprt,
+      tex,
+      vtx,
+      wait_ack,
+      cond_if,
+      cond_else,
+      cond_endif,
+      lds_atomic,
+      lds_read,
+      lds_write,
+      loop_begin,
+      loop_end,
+      loop_break,
+      loop_continue,
+      phi,
+      streamout,
+      ring,
+      emit_vtx,
+      mem_wr_scratch,
+      gds,
+      rat,
+      tf_write,
+      block,
+      unknown
+   };
+
+   typedef std::shared_ptr<Instruction> Pointer;
+
+   friend bool operator == (const Instruction& lhs, const Instruction& rhs);
+
+   Instruction(instr_type t);
+
+   virtual ~Instruction();
+
+   instr_type type() const { return m_type;}
+
+   void print(std::ostream& os) const;
+
+   virtual void replace_values(const ValueSet& candidates, PValue new_value);
+
+   void evalue_liveness(LiverangeEvaluator& eval) const;
+
+   void remap_registers(ValueRemapper& map);
+
+   virtual bool accept(InstructionVisitor& visitor) = 0;
+   virtual bool accept(ConstInstructionVisitor& visitor) const = 0;
+
+protected:
+
+   void add_remappable_src_value(PValue *v);
+   void add_remappable_src_value(GPRVector *v);
+   void add_remappable_dst_value(PValue *v);
+   void add_remappable_dst_value(GPRVector *v);
+
+private:
+
+   virtual void do_evalue_liveness(LiverangeEvaluator& eval) const;
+
+   virtual bool is_equal_to(const Instruction& lhs) const = 0;
+
+   instr_type m_type;
+
+   virtual void do_print(std::ostream& os) const = 0;
+
+   std::vector<PValue*> m_mappable_src_registers;
+   std::vector<GPRVector*> m_mappable_src_vectors;
+   std::vector<PValue*> m_mappable_dst_registers;
+   std::vector<GPRVector*> m_mappable_dst_vectors;
+};
+
+using PInstruction=Instruction::Pointer;
+
+inline std::ostream& operator << (std::ostream& os, const Instruction& instr)
+{
+   instr.print(os);
+   return os;
+}
+
+bool operator == (const Instruction& lhs, const Instruction& rhs);
+
+}
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp
new file mode 100644
index 000000000..212499faf
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.cpp
@@ -0,0 +1,57 @@
+#include "sfn_instruction_block.h"
+
+namespace r600 {
+
+
+InstructionBlock::InstructionBlock(unsigned nesting_depth, unsigned block_number):
+   Instruction(block),
+   m_block_number(block_number),
+   m_nesting_depth(nesting_depth)
+{
+}
+
+void InstructionBlock::emit(PInstruction instr)
+{
+   m_block.push_back(instr);
+}
+
+void InstructionBlock::remap_registers(ValueRemapper& map)
+{
+   for(auto& i: m_block)
+      i->remap_registers(map);
+}
+
+void InstructionBlock::do_evalue_liveness(LiverangeEvaluator& eval) const
+{
+   for(auto& i: m_block)
+      i->evalue_liveness(eval);
+}
+
+bool InstructionBlock::is_equal_to(const Instruction& lhs) const
+{
+   assert(lhs.type() == block);
+   auto& l = static_cast<const InstructionBlock&>(lhs);
+
+   if (m_block.size() != l.m_block.size())
+      return false;
+
+   if (m_block_number != l.m_block_number)
+      return false;
+
+   return std::equal(m_block.begin(), m_block.end(), l.m_block.begin(),
+                     [](PInstruction ri, PInstruction li) {return *ri == *li;});
+}
+
+PInstruction InstructionBlock::last_instruction()
+{
+   return m_block.size() ? *m_block.rbegin() : nullptr;
+}
+
+void InstructionBlock::do_print(std::ostream& os) const
+{
+   std::string space(" ", 2 * m_nesting_depth);
+   for(auto& i: m_block)
+      os << space << *i << "\n";
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.h
new file mode 100644
index 000000000..fe40cc10c
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_block.h
@@ -0,0 +1,82 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef sfn_instruction_block_h
+#define sfn_instruction_block_h
+
+#include "sfn_instruction_base.h"
+
+namespace r600 {
+
+class InstructionBlock : public Instruction
+{
+public:
+	InstructionBlock(unsigned nesting_depth, unsigned block_number);
+
+        void emit(PInstruction instr);
+
+
+        std::vector<PInstruction>::const_iterator begin() const  {
+           return m_block.begin();
+        }
+        std::vector<PInstruction>::const_iterator end() const {
+           return m_block.end();
+        }
+
+        void remap_registers(ValueRemapper& map);
+
+        size_t size() const {
+           return m_block.size();
+        }
+
+        const PInstruction& operator [] (int i) const {
+           return m_block[i];
+        }
+
+        unsigned number() const  {
+           return m_block_number;
+        }
+
+        PInstruction last_instruction();
+
+        bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+        bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+        void do_evalue_liveness(LiverangeEvaluator& eval) const override;
+        bool is_equal_to(const Instruction& lhs) const override;
+        void do_print(std::ostream& os) const override;
+
+        std::vector<PInstruction> m_block;
+
+        unsigned m_block_number;
+        unsigned m_nesting_depth;
+};
+
+}
+
+#endif // INSTRUCTIONBLOCK_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp
new file mode 100644
index 000000000..455d6d630
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.cpp
@@ -0,0 +1,195 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_instruction_cf.h"
+#include "sfn_liverange.h"
+
+namespace  r600 {
+
+CFInstruction::CFInstruction(instr_type type):Instruction(type)
+{
+
+}
+
+IfElseInstruction::IfElseInstruction(instr_type type):
+   CFInstruction (type)
+{
+
+}
+
+IfInstruction::IfInstruction(AluInstruction *pred):
+   IfElseInstruction(cond_if),
+   m_pred(pred)
+{
+   PValue *v = m_pred->psrc(0);
+   add_remappable_src_value(v);
+   pred->set_cf_type(cf_alu_push_before);
+}
+
+void IfInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
+{
+   eval.scope_if();
+}
+
+bool IfInstruction::is_equal_to(const Instruction& lhs) const
+{
+   assert(lhs.type() == cond_if);
+   const IfInstruction& l = static_cast<const IfInstruction&>(lhs);
+   return *l.m_pred == *m_pred;
+}
+
+void IfInstruction::do_print(std::ostream& os) const
+{
+   os << "PRED = " << *m_pred << "\n";
+   os << "IF (PRED)";
+}
+
+ElseInstruction::ElseInstruction(IfInstruction *jump_src):
+   IfElseInstruction(cond_else),
+   m_jump_src(jump_src)
+{
+}
+
+void ElseInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
+{
+   eval.scope_else();
+}
+
+
+bool ElseInstruction::is_equal_to(const Instruction& lhs) const
+{
+   if (lhs.type() != cond_else)
+      return false;
+   auto& l = static_cast<const ElseInstruction&>(lhs);
+   return (*m_jump_src == *l.m_jump_src);
+}
+
+void ElseInstruction::do_print(std::ostream& os) const
+{
+   os << "ELSE";
+}
+
+IfElseEndInstruction::IfElseEndInstruction():
+   IfElseInstruction(cond_endif)
+{
+}
+
+void IfElseEndInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
+{
+   eval.scope_endif();
+}
+
+bool IfElseEndInstruction::is_equal_to(const Instruction& lhs) const
+{
+   if (lhs.type() != cond_endif)
+      return false;
+   return true;
+}
+
+void IfElseEndInstruction::do_print(std::ostream& os) const
+{
+   os << "ENDIF";
+}
+
+LoopBeginInstruction::LoopBeginInstruction():
+   CFInstruction(loop_begin)
+{
+}
+
+void LoopBeginInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
+{
+   eval.scope_loop_begin();
+}
+
+bool LoopBeginInstruction::is_equal_to(const Instruction& lhs) const
+{
+   assert(lhs.type() == loop_begin);
+   return true;
+}
+
+void LoopBeginInstruction::do_print(std::ostream& os) const
+{
+   os << "BGNLOOP";
+}
+
+LoopEndInstruction::LoopEndInstruction(LoopBeginInstruction *start):
+   CFInstruction (loop_end),
+   m_start(start)
+{
+}
+
+void LoopEndInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
+{
+   eval.scope_loop_end();
+}
+
+bool LoopEndInstruction::is_equal_to(const Instruction& lhs) const
+{
+   assert(lhs.type() == loop_end);
+   const auto& other = static_cast<const LoopEndInstruction&>(lhs);
+   return *m_start == *other.m_start;
+}
+
+void LoopEndInstruction::do_print(std::ostream& os) const
+{
+   os << "ENDLOOP";
+}
+
+LoopBreakInstruction::LoopBreakInstruction():
+   CFInstruction (loop_break)
+{
+}
+
+void LoopBreakInstruction::do_evalue_liveness(LiverangeEvaluator& eval) const
+{
+   eval.scope_loop_break();
+}
+
+bool LoopBreakInstruction::is_equal_to(UNUSED const Instruction& lhs) const
+{
+   return true;
+}
+
+void LoopBreakInstruction::do_print(std::ostream& os) const
+{
+   os << "BREAK";
+}
+
+LoopContInstruction::LoopContInstruction():
+   CFInstruction (loop_continue)
+{
+}
+
+bool LoopContInstruction::is_equal_to(UNUSED const Instruction& lhs) const
+{
+   return true;
+}
+void LoopContInstruction::do_print(std::ostream& os) const
+{
+   os << "CONTINUE";
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h
new file mode 100644
index 000000000..a13794803
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_cf.h
@@ -0,0 +1,142 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_IFELSEINSTRUCTION_H
+#define SFN_IFELSEINSTRUCTION_H
+
+#include "sfn_instruction_alu.h"
+
+namespace r600  {
+
+class CFInstruction : public Instruction {
+protected:
+   CFInstruction(instr_type type);
+};
+
+class IfElseInstruction : public CFInstruction {
+public:
+   IfElseInstruction(instr_type type);
+
+};
+
+class IfInstruction : public IfElseInstruction {
+public:
+   IfInstruction(AluInstruction *pred);
+   const AluInstruction& pred() const {return *m_pred;}
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+   void do_evalue_liveness(LiverangeEvaluator& eval) const override;
+   bool is_equal_to(const Instruction& lhs) const override;
+   void do_print(std::ostream& os) const override;
+   std::shared_ptr<AluInstruction> m_pred;
+};
+
+class ElseInstruction : public IfElseInstruction {
+public:
+   ElseInstruction(IfInstruction *jump_src);
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+   void do_evalue_liveness(LiverangeEvaluator& eval) const override;
+   bool is_equal_to(const Instruction& lhs) const override;
+   void do_print(std::ostream& os) const override;
+
+   IfElseInstruction *m_jump_src;
+};
+
+class IfElseEndInstruction : public IfElseInstruction {
+public:
+   IfElseEndInstruction();
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+   void do_evalue_liveness(LiverangeEvaluator& eval) const override;
+   bool is_equal_to(const Instruction& lhs) const override;
+   void do_print(std::ostream& os) const override;
+};
+
+class LoopBeginInstruction: public CFInstruction {
+public:
+   LoopBeginInstruction();
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+   void do_evalue_liveness(LiverangeEvaluator& eval) const override;
+   bool is_equal_to(const Instruction& lhs) const override;
+   void do_print(std::ostream& os) const override;
+};
+
+class LoopEndInstruction: public CFInstruction {
+public:
+   LoopEndInstruction(LoopBeginInstruction *start);
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+   void do_evalue_liveness(LiverangeEvaluator& eval) const override;
+   bool is_equal_to(const Instruction& lhs) const override;
+   void do_print(std::ostream& os) const override;
+   LoopBeginInstruction *m_start;
+};
+
+class LoopBreakInstruction: public CFInstruction {
+public:
+   LoopBreakInstruction();
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+   void do_evalue_liveness(LiverangeEvaluator& eval) const override;
+   bool is_equal_to(const Instruction& lhs) const override;
+   void do_print(std::ostream& os) const override;
+};
+
+class LoopContInstruction: public CFInstruction {
+public:
+   LoopContInstruction();
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+   bool is_equal_to(const Instruction& lhs) const override;
+   void do_print(std::ostream& os) const override;
+};
+
+}
+
+#endif // SFN_IFELSEINSTRUCTION_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp
new file mode 100644
index 000000000..7d1d948a1
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.cpp
@@ -0,0 +1,341 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "sfn_instruction_export.h"
+#include "sfn_liverange.h"
+#include "sfn_valuepool.h"
+
+namespace r600 {
+
+WriteoutInstruction::WriteoutInstruction(instr_type t, const GPRVector& value):
+   Instruction(t),
+   m_value(value)
+{
+   add_remappable_src_value(&m_value);
+}
+
+void WriteoutInstruction::replace_values(const ValueSet& candidates, PValue new_value)
+{
+   // I wonder whether we can actually end up here ...
+   for (auto c: candidates) {
+      if (*c == *m_value.reg_i(c->chan()))
+         m_value.set_reg_i(c->chan(), new_value);
+   }
+
+   replace_values_child(candidates, new_value);
+}
+
+void WriteoutInstruction::replace_values_child(UNUSED const ValueSet& candidates,
+                                               UNUSED PValue new_value)
+{
+}
+
+void WriteoutInstruction::remap_registers_child(UNUSED std::vector<rename_reg_pair>& map,
+                                                UNUSED ValueMap& values)
+{
+}
+
+ExportInstruction::ExportInstruction(unsigned loc, const GPRVector &value, ExportType type):
+   WriteoutInstruction(Instruction::exprt, value),
+   m_type(type),
+   m_loc(loc),
+   m_is_last(false)
+{
+}
+
+
+bool ExportInstruction::is_equal_to(const Instruction& lhs) const
+{
+   assert(lhs.type() == exprt);
+   const auto& oth = static_cast<const ExportInstruction&>(lhs);
+
+   return (gpr() == oth.gpr()) &&
+         (m_type == oth.m_type) &&
+         (m_loc == oth.m_loc) &&
+         (m_is_last == oth.m_is_last);
+}
+
+void ExportInstruction::do_print(std::ostream& os) const
+{
+   os << (m_is_last ? "EXPORT_DONE ":"EXPORT ");
+   switch (m_type) {
+   case et_pixel: os << "PIXEL "; break;
+   case et_pos: os << "POS "; break;
+   case et_param: os << "PARAM "; break;
+   }
+   os << m_loc << " " << gpr();
+}
+
+void ExportInstruction::update_output_map(OutputRegisterMap& map) const
+{
+   map[m_loc] = gpr_ptr();
+}
+
+void ExportInstruction::set_last()
+{
+   m_is_last = true;
+}
+
+WriteScratchInstruction::WriteScratchInstruction(unsigned loc, const GPRVector& value,
+                                                 int align, int align_offset, int writemask):
+   WriteoutInstruction (Instruction::mem_wr_scratch, value),
+   m_loc(loc),
+   m_align(align),
+   m_align_offset(align_offset),
+   m_writemask(writemask),
+   m_array_size(0)
+{
+}
+
+WriteScratchInstruction::WriteScratchInstruction(const PValue& address, const GPRVector& value,
+                                                 int align, int align_offset, int writemask, int array_size):
+   WriteoutInstruction (Instruction::mem_wr_scratch, value),
+   m_loc(0),
+   m_address(address),
+   m_align(align),
+   m_align_offset(align_offset),
+   m_writemask(writemask),
+   m_array_size(array_size - 1)
+{
+   add_remappable_src_value(&m_address);
+}
+
+bool WriteScratchInstruction::is_equal_to(const Instruction& lhs) const
+{
+   if (lhs.type() != Instruction::mem_wr_scratch)
+      return false;
+   const auto& other = static_cast<const WriteScratchInstruction&>(lhs);
+
+   if (m_address) {
+      if (!other.m_address)
+         return false;
+      if (*m_address != *other.m_address)
+         return false;
+   } else {
+      if (other.m_address)
+         return false;
+   }
+
+   return gpr() == other.gpr() &&
+         m_loc == other.m_loc &&
+         m_align == other.m_align &&
+         m_align_offset == other.m_align_offset &&
+         m_writemask == other.m_writemask;
+}
+
+static char *writemask_to_swizzle(int writemask, char *buf)
+{
+   const char *swz = "xyzw";
+   for (int i = 0; i < 4; ++i) {
+      buf[i] = (writemask & (1 << i)) ? swz[i] : '_';
+   }
+   return buf;
+}
+
+void WriteScratchInstruction::do_print(std::ostream& os) const
+{
+   char buf[5];
+
+   os << "MEM_SCRATCH_WRITE ";
+   if (m_address)
+      os << "@" << *m_address << "+";
+
+   os << m_loc  << "." << writemask_to_swizzle(m_writemask, buf)
+      << " " <<  gpr()  << " AL:" << m_align << " ALO:" << m_align_offset;
+}
+
+void WriteScratchInstruction::replace_values_child(const ValueSet& candidates, PValue new_value)
+{
+   if (!m_address)
+      return;
+
+   for (auto c: candidates) {
+      if (*c == *m_address)
+         m_address = new_value;
+   }
+}
+
+void WriteScratchInstruction::remap_registers_child(std::vector<rename_reg_pair>& map,
+                           ValueMap& values)
+{
+   if (!m_address)
+      return;
+   sfn_log << SfnLog::merge << "Remap " << *m_address <<  " of type " << m_address->type() << "\n";
+   assert(m_address->type() == Value::gpr);
+   auto new_index = map[m_address->sel()];
+   if (new_index.valid)
+      m_address = values.get_or_inject(new_index.new_reg, m_address->chan());
+   map[m_address->sel()].used = true;
+}
+
+StreamOutIntruction::StreamOutIntruction(const GPRVector& value, int num_components,
+                                         int array_base, int comp_mask, int out_buffer,
+                                         int stream):
+   WriteoutInstruction(Instruction::streamout, value),
+   m_element_size(num_components == 3 ? 3 : num_components - 1),
+   m_burst_count(1),
+   m_array_base(array_base),
+   m_array_size(0xfff),
+   m_writemask(comp_mask),
+   m_output_buffer(out_buffer),
+   m_stream(stream)
+{
+}
+
+unsigned StreamOutIntruction::op() const
+{
+   int op = 0;
+   switch (m_output_buffer) {
+   case 0: op = CF_OP_MEM_STREAM0_BUF0; break;
+   case 1: op = CF_OP_MEM_STREAM0_BUF1; break;
+   case 2: op = CF_OP_MEM_STREAM0_BUF2; break;
+   case 3: op = CF_OP_MEM_STREAM0_BUF3; break;
+   }
+   return 4 * m_stream + op;
+}
+
+bool StreamOutIntruction::is_equal_to(const Instruction& lhs) const
+{
+   assert(lhs.type() == streamout);
+   const auto& oth = static_cast<const StreamOutIntruction&>(lhs);
+
+   return gpr() == oth.gpr() &&
+         m_element_size == oth.m_element_size &&
+         m_burst_count == oth.m_burst_count &&
+         m_array_base == oth.m_array_base &&
+         m_array_size == oth.m_array_size &&
+         m_writemask == oth.m_writemask &&
+         m_output_buffer == oth.m_output_buffer &&
+         m_stream == oth.m_stream;
+}
+
+void StreamOutIntruction::do_print(std::ostream& os) const
+{
+   os << "WRITE STREAM(" << m_stream << ") "  << gpr()
+      << " ES:" << m_element_size
+      << " BC:" << m_burst_count
+      << " BUF:" << m_output_buffer
+      << " ARRAY:" <<  m_array_base;
+   if (m_array_size != 0xfff)
+      os << "+" << m_array_size;
+}
+
+MemRingOutIntruction::MemRingOutIntruction(ECFOpCode ring, EMemWriteType type,
+                                           const GPRVector& value,
+                                           unsigned base_addr, unsigned ncomp,
+                                           PValue index):
+   WriteoutInstruction(Instruction::ring, value),
+   m_ring_op(ring),
+   m_type(type),
+   m_base_address(base_addr),
+   m_num_comp(ncomp),
+   m_index(index)
+{
+   add_remappable_src_value(&m_index);
+
+   assert(m_ring_op  == cf_mem_ring || m_ring_op  == cf_mem_ring1||
+          m_ring_op  == cf_mem_ring2 || m_ring_op  == cf_mem_ring3);
+   assert(m_num_comp <= 4);
+}
+
+unsigned MemRingOutIntruction::ncomp() const
+{
+   switch (m_num_comp) {
+   case 1: return 0;
+   case 2: return 1;
+   case 3:
+   case 4: return 3;
+   default:
+      assert(0);
+   }
+   return 3;
+}
+
+bool MemRingOutIntruction::is_equal_to(const Instruction& lhs) const
+{
+   assert(lhs.type() == streamout);
+   const auto& oth = static_cast<const MemRingOutIntruction&>(lhs);
+
+   bool equal = gpr() == oth.gpr() &&
+                m_ring_op == oth.m_ring_op &&
+                m_type == oth.m_type &&
+                m_num_comp == oth.m_num_comp &&
+                m_base_address == oth.m_base_address;
+
+   if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
+      equal &= (*m_index == *oth.m_index);
+   return equal;
+
+}
+
+static const char *write_type_str[4] = {"WRITE", "WRITE_IDX", "WRITE_ACK", "WRITE_IDX_ACK" };
+void MemRingOutIntruction::do_print(std::ostream& os) const
+{
+   os << "MEM_RING " << m_ring_op;
+   os << " " << write_type_str[m_type] << " " << m_base_address;
+   os << " " << gpr();
+   if (m_type == mem_write_ind || m_type == mem_write_ind_ack)
+      os << " @" << *m_index;
+   os << " ES:" << m_num_comp;
+}
+
+
+void MemRingOutIntruction::replace_values_child(const ValueSet& candidates,
+                                                PValue new_value)
+{
+   if (!m_index)
+      return;
+
+   for (auto c: candidates) {
+      if (*c == *m_index)
+         m_index = new_value;
+   }
+}
+
+void MemRingOutIntruction::remap_registers_child(std::vector<rename_reg_pair>& map,
+                                                 ValueMap& values)
+{
+   if (!m_index)
+      return;
+
+   assert(m_index->type() == Value::gpr);
+   auto new_index = map[m_index->sel()];
+   if (new_index.valid)
+      m_index = values.get_or_inject(new_index.new_reg, m_index->chan());
+   map[m_index->sel()].used = true;
+}
+
+void MemRingOutIntruction::patch_ring(int stream, PValue index)
+{
+   const ECFOpCode ring_op[4] = {cf_mem_ring, cf_mem_ring1, cf_mem_ring2, cf_mem_ring3};
+
+   assert(stream < 4);
+   m_ring_op = ring_op[stream];
+   m_index = index;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.h
new file mode 100644
index 000000000..6d014082d
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_export.h
@@ -0,0 +1,185 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_EXPORTINSTRUCTION_H
+#define SFN_EXPORTINSTRUCTION_H
+
+#include "sfn_instruction_base.h"
+
+namespace r600 {
+
+class WriteoutInstruction: public Instruction {
+public:
+   void replace_values(const ValueSet& candidates, PValue new_value) override;
+   const GPRVector&  gpr() const {return m_value;}
+   const GPRVector  *gpr_ptr() const {return &m_value;}
+protected:
+   WriteoutInstruction(instr_type t, const GPRVector& value);
+private:
+   virtual void replace_values_child(const ValueSet& candidates, PValue new_value);
+   virtual void remap_registers_child(std::vector<rename_reg_pair>& map,
+                        ValueMap& values);
+
+   GPRVector m_value;
+};
+
+class ExportInstruction : public WriteoutInstruction {
+public:
+   enum ExportType {
+      et_pixel,
+      et_pos,
+      et_param
+   };
+
+   ExportInstruction(unsigned loc, const GPRVector& value, ExportType type);
+   void set_last();
+
+   ExportType export_type() const {return m_type;}
+
+   unsigned location() const {return m_loc;}
+   bool is_last_export() const {return m_is_last;}
+
+   void update_output_map(OutputRegisterMap& map) const;
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+
+private:
+   bool is_equal_to(const Instruction& lhs) const override;
+   void do_print(std::ostream& os) const override;
+
+   ExportType m_type;
+   unsigned m_loc;
+   bool m_is_last;
+};
+
+class WriteScratchInstruction : public WriteoutInstruction {
+public:
+
+   WriteScratchInstruction(unsigned loc, const GPRVector& value, int align,
+                           int align_offset, int writemask);
+   WriteScratchInstruction(const PValue& address, const GPRVector& value,
+                           int align, int align_offset, int writemask, int array_size);
+   unsigned location() const {return m_loc;}
+
+   int write_mask() const { return m_writemask;}
+   int address() const { assert(m_address); return m_address->sel();}
+   bool indirect() const { return !!m_address;}
+   int array_size() const { return m_array_size;}
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+   bool is_equal_to(const Instruction& lhs) const override;
+   void do_print(std::ostream& os) const override;
+
+   void replace_values_child(const ValueSet& candidates, PValue new_value) override;
+   void remap_registers_child(std::vector<rename_reg_pair>& map,
+                              ValueMap& values)override;
+
+   unsigned m_loc;
+   PValue m_address;
+   unsigned m_align;
+   unsigned m_align_offset;
+   unsigned m_writemask;
+   int m_array_size;
+};
+
+
+class StreamOutIntruction: public WriteoutInstruction {
+public:
+   StreamOutIntruction(const GPRVector& value, int num_components,
+                       int array_base, int comp_mask, int out_buffer,
+                       int stream);
+   int element_size() const { return m_element_size;}
+   int burst_count() const { return m_burst_count;}
+   int array_base() const { return m_array_base;}
+   int array_size() const { return m_array_size;}
+   int comp_mask() const { return m_writemask;}
+   unsigned op() const;
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+   bool is_equal_to(const Instruction& lhs) const override;
+   void do_print(std::ostream& os) const override;
+
+   int m_element_size;
+   int m_burst_count;
+   int m_array_base;
+   int m_array_size;
+   int m_writemask;
+   int m_output_buffer;
+   int m_stream;
+};
+
+enum EMemWriteType {
+   mem_write = 0,
+   mem_write_ind = 1,
+   mem_write_ack = 2,
+   mem_write_ind_ack = 3,
+};
+
+class MemRingOutIntruction: public WriteoutInstruction {
+public:
+
+   MemRingOutIntruction(ECFOpCode ring, EMemWriteType type,
+                        const GPRVector& value, unsigned base_addr,
+                        unsigned ncomp, PValue m_index);
+
+   unsigned op() const{return m_ring_op;}
+   unsigned ncomp() const;
+   unsigned addr() const {return m_base_address;}
+   EMemWriteType type() const {return m_type;}
+   unsigned index_reg() const {return m_index->sel();}
+   unsigned array_base() const {return m_base_address; }
+   void replace_values_child(const ValueSet& candidates, PValue new_value) override;
+   void remap_registers_child(std::vector<rename_reg_pair>& map,
+                        ValueMap& values) override;
+   void patch_ring(int stream, PValue index);
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+   bool is_equal_to(const Instruction& lhs) const override;
+   void do_print(std::ostream& os) const override;
+
+   ECFOpCode m_ring_op;
+   EMemWriteType m_type;
+   unsigned m_base_address;
+   unsigned m_num_comp;
+   PValue m_index;
+
+};
+
+}
+
+
+#endif // SFN_EXPORTINSTRUCTION_H
+\ No newline at end of file
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp
new file mode 100644
index 000000000..ec1a48887
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.cpp
@@ -0,0 +1,480 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_instruction_fetch.h"
+
+#include "gallium/drivers/r600/r600_pipe.h"
+
+namespace r600 {
+
+/* refactor this to add status create methods for specific tasks */
+FetchInstruction::FetchInstruction(EVFetchInstr op,
+                                   EVFetchType type,
+                                   GPRVector dst,
+                                   PValue src, int offset,
+                                   int buffer_id, PValue buffer_offset,
+                                   EBufferIndexMode cp_rel,
+                                   bool use_const_field):
+   Instruction(vtx),
+   m_vc_opcode(op),
+   m_fetch_type(type),
+   m_endian_swap(vtx_es_none),
+   m_src(src),
+   m_dst(dst),
+   m_offset(offset),
+   m_is_mega_fetch(1),
+   m_mega_fetch_count(16),
+   m_buffer_id(buffer_id),
+   m_semantic_id(0),
+   m_buffer_index_mode(cp_rel),
+   m_flags(0),
+   m_uncached(false),
+   m_indexed(false),
+   m_array_base(0),
+   m_array_size(0),
+   m_elm_size(0),
+   m_buffer_offset(buffer_offset),
+   m_dest_swizzle({0,1,2,3})
+{
+   if (use_const_field) {
+      m_flags.set(vtx_use_const_field);
+      m_data_format = fmt_invalid;
+      m_num_format = vtx_nf_norm;
+   } else {
+      m_flags.set(vtx_format_comp_signed);
+      m_data_format = fmt_32_32_32_32_float;
+      m_num_format = vtx_nf_scaled;
+   }
+
+   add_remappable_src_value(&m_src);
+   add_remappable_src_value(&m_buffer_offset);
+
+   add_remappable_dst_value(&m_dst);
+}
+
+/* Resource query */
+FetchInstruction::FetchInstruction(EVFetchInstr vc_opcode,
+                                   EVFetchType fetch_type,
+                                   EVTXDataFormat data_format,
+                                   EVFetchNumFormat num_format,
+                                   EVFetchEndianSwap endian_swap,
+                                   const PValue src,
+                                   const GPRVector dst,
+                                   uint32_t offset,
+                                   bool is_mega_fetch,
+                                   uint32_t mega_fetch_count,
+                                   uint32_t buffer_id,
+                                   uint32_t semantic_id,
+
+                                   EBufferIndexMode buffer_index_mode,
+                                   bool uncached,
+                                   bool indexed,
+                                   int array_base,
+                                   int array_size,
+                                   int elm_size,
+                                   PValue buffer_offset,
+                                   const std::array<int, 4>& dest_swizzle):
+   Instruction(vtx),
+   m_vc_opcode(vc_opcode),
+   m_fetch_type(fetch_type),
+   m_data_format(data_format),
+   m_num_format(num_format),
+   m_endian_swap(endian_swap),
+   m_src(src),
+   m_dst(dst),
+   m_offset(offset),
+   m_is_mega_fetch(is_mega_fetch),
+   m_mega_fetch_count(mega_fetch_count),
+   m_buffer_id(buffer_id),
+   m_semantic_id(semantic_id),
+   m_buffer_index_mode(buffer_index_mode),
+   m_uncached(uncached),
+   m_indexed(indexed),
+   m_array_base(array_base),
+   m_array_size(array_size),
+   m_elm_size(elm_size),
+   m_buffer_offset(buffer_offset),
+   m_dest_swizzle(dest_swizzle)
+{
+   add_remappable_src_value(&m_src);
+   add_remappable_dst_value(&m_dst);
+   add_remappable_src_value(&m_buffer_offset);
+}
+
+FetchInstruction::FetchInstruction(GPRVector dst,
+                                   PValue src,
+                                   int buffer_id, PValue buffer_offset,
+                                   EVTXDataFormat format,
+                                   EVFetchNumFormat num_format):
+   Instruction(vtx),
+   m_vc_opcode(vc_fetch),
+   m_fetch_type(no_index_offset),
+   m_data_format(format),
+   m_num_format(num_format),
+   m_endian_swap(vtx_es_none),
+   m_src(src),
+   m_dst(dst),
+   m_offset(0),
+   m_is_mega_fetch(0),
+   m_mega_fetch_count(0),
+   m_buffer_id(buffer_id),
+   m_semantic_id(0),
+   m_buffer_index_mode(bim_none),
+   m_flags(0),
+   m_uncached(false),
+   m_indexed(false),
+   m_array_base(0),
+   m_array_size(0),
+   m_elm_size(1),
+   m_buffer_offset(buffer_offset),
+   m_dest_swizzle({0,1,2,3})
+{
+   m_flags.set(vtx_format_comp_signed);
+
+   add_remappable_src_value(&m_src);
+   add_remappable_dst_value(&m_dst);
+   add_remappable_src_value(&m_buffer_offset);
+}
+
+
+/* Resource query */
+FetchInstruction::FetchInstruction(GPRVector dst,
+                                   PValue src,
+                                   int buffer_id,
+                                   EBufferIndexMode cp_rel):
+   Instruction(vtx),
+   m_vc_opcode(vc_get_buf_resinfo),
+   m_fetch_type(no_index_offset),
+   m_data_format(fmt_32_32_32_32),
+   m_num_format(vtx_nf_norm),
+   m_endian_swap(vtx_es_none),
+   m_src(src),
+   m_dst(dst),
+   m_offset(0),
+   m_is_mega_fetch(0),
+   m_mega_fetch_count(16),
+   m_buffer_id(buffer_id),
+   m_semantic_id(0),
+   m_buffer_index_mode(cp_rel),
+   m_flags(0),
+   m_uncached(false),
+   m_indexed(false),
+   m_array_base(0),
+   m_array_size(0),
+   m_elm_size(0),
+   m_dest_swizzle({0,1,2,3})
+{
+   m_flags.set(vtx_format_comp_signed);
+   add_remappable_src_value(&m_src);
+   add_remappable_dst_value(&m_dst);
+   add_remappable_src_value(&m_buffer_offset);
+}
+
+FetchInstruction::FetchInstruction(GPRVector dst, PValue src, int scratch_size):
+   Instruction(vtx),
+   m_vc_opcode(vc_read_scratch),
+   m_fetch_type(vertex_data),
+   m_data_format(fmt_32_32_32_32),
+   m_num_format(vtx_nf_int),
+   m_endian_swap(vtx_es_none),
+   m_dst(dst),
+   m_offset(0),
+   m_is_mega_fetch(0),
+   m_mega_fetch_count(16),
+   m_buffer_id(0),
+   m_semantic_id(0),
+   m_buffer_index_mode(bim_none),
+   m_flags(0),
+   m_uncached(true),
+   m_array_base(0),
+   m_array_size(0),
+   m_elm_size(3),
+   m_dest_swizzle({0,1,2,3})
+{
+   if (src->type() == Value::literal) {
+      const auto& lv = static_cast<const LiteralValue&>(*src);
+      m_array_base = lv.value();
+      m_indexed = false;
+      m_src.reset(new GPRValue(0,0));
+      m_array_size = 0;
+   } else {
+      m_array_base = 0;
+      m_src = src;
+      m_indexed = true;
+      m_array_size = scratch_size - 1;
+   }
+   add_remappable_src_value(&m_src);
+   add_remappable_dst_value(&m_dst);
+   add_remappable_src_value(&m_buffer_offset);
+}
+
+void FetchInstruction::replace_values(const ValueSet& candidates, PValue new_value)
+{
+   if (!m_src)
+      return;
+   for (auto c: candidates) {
+      for (int i = 0; i < 4; ++i) {
+         if (*c == *m_dst.reg_i(i))
+            m_dst.set_reg_i(i, new_value);
+      }
+      if (*m_src == *c)
+         m_src = new_value;
+   }
+}
+
+
+bool FetchInstruction::is_equal_to(const Instruction& lhs) const
+{
+   auto& l = static_cast<const FetchInstruction&>(lhs);
+   if (m_src) {
+      if (!l.m_src)
+         return false;
+      if (*m_src != *l.m_src)
+         return false;
+   } else {
+      if (l.m_src)
+         return false;
+   }
+
+   return m_vc_opcode == l.m_vc_opcode &&
+         m_fetch_type == l.m_fetch_type &&
+         m_data_format == l.m_data_format &&
+         m_num_format == l.m_num_format &&
+         m_endian_swap == l.m_endian_swap &&
+         m_dst == l.m_dst &&
+         m_offset == l.m_offset &&
+         m_buffer_id == l.m_buffer_id &&
+         m_semantic_id == l.m_semantic_id &&
+         m_buffer_index_mode == l.m_buffer_index_mode &&
+         m_flags == l.m_flags &&
+         m_indexed == l.m_indexed &&
+         m_uncached == l.m_uncached;
+}
+
+void FetchInstruction::set_format(EVTXDataFormat fmt)
+{
+   m_data_format = fmt;
+}
+
+
+void FetchInstruction::set_dest_swizzle(const std::array<int,4>& swz)
+{
+   m_dest_swizzle = swz;
+}
+
+void FetchInstruction::prelude_append(Instruction *instr)
+{
+   assert(instr);
+   m_prelude.push_back(PInstruction(instr));
+}
+
+const std::vector<PInstruction>& FetchInstruction::prelude() const
+{
+   return m_prelude;
+}
+
+LoadFromScratch::LoadFromScratch(GPRVector dst, PValue src, int scratch_size):
+   FetchInstruction(dst, src, scratch_size)
+{
+}
+
+FetchGDSOpResult::FetchGDSOpResult(const GPRVector dst, const PValue src):
+   FetchInstruction(vc_fetch,
+                    no_index_offset,
+                    fmt_32,
+                    vtx_nf_int,
+                    vtx_es_none,
+                    src,
+                    dst,
+                    0,
+                    false,
+                    0xf,
+                    R600_IMAGE_IMMED_RESOURCE_OFFSET,
+                    0,
+                    bim_none,
+                    false,
+                    false,
+                    0,
+                    0,
+                    0,
+                    PValue(),
+                    {0,7,7,7})
+{
+   set_flag(vtx_srf_mode);
+   set_flag(vtx_vpm);
+}
+
+FetchTCSIOParam::FetchTCSIOParam(GPRVector dst, PValue src, int offset):
+   FetchInstruction(vc_fetch,
+                    no_index_offset,
+                    fmt_32_32_32_32,
+                    vtx_nf_scaled,
+                    vtx_es_none,
+                    src,
+                    dst,
+                    offset,
+                    false,
+                    16,
+                    R600_LDS_INFO_CONST_BUFFER,
+                    0,
+                    bim_none,
+                    false,
+                    false,
+                    0,
+                    0,
+                    0,
+                    PValue(),
+                    {0,1,2,3})
+{
+   set_flag(vtx_srf_mode);
+   set_flag(vtx_format_comp_signed);
+}
+
+
+static const char *fmt_descr[64] = {
+   "INVALID",
+   "8",
+   "4_4",
+   "3_3_2",
+   "RESERVED_4",
+   "16",
+   "16F",
+   "8_8",
+   "5_6_5",
+   "6_5_5",
+   "1_5_5_5",
+   "4_4_4_4",
+   "5_5_5_1",
+   "32",
+   "32F",
+   "16_16",
+   "16_16F",
+   "8_24",
+   "8_24F",
+   "24_8",
+   "24_8F",
+   "10_11_11",
+   "10_11_11F",
+   "11_11_10",
+   "11_11_10F",
+   "2_10_10_10",
+   "8_8_8_8",
+   "10_10_10_2",
+   "X24_8_32F",
+   "32_32",
+   "32_32F",
+   "16_16_16_16",
+   "16_16_16_16F",
+   "RESERVED_33",
+   "32_32_32_32",
+   "32_32_32_32F",
+   "RESERVED_36",
+   "1",
+   "1_REVERSED",
+   "GB_GR",
+   "BG_RG",
+   "32_AS_8",
+   "32_AS_8_8",
+   "5_9_9_9_SHAREDEXP",
+   "8_8_8",
+   "16_16_16",
+   "16_16_16F",
+   "32_32_32",
+   "32_32_32F",
+   "BC1",
+   "BC2",
+   "BC3",
+   "BC4",
+   "BC5",
+   "APC0",
+   "APC1",
+   "APC2",
+   "APC3",
+   "APC4",
+   "APC5",
+   "APC6",
+   "APC7",
+   "CTX1",
+   "RESERVED_63"
+};
+
+
+void FetchInstruction::do_print(std::ostream& os) const
+{
+   static const std::string num_format_char[] = {"norm", "int", "scaled"};
+   static const std::string endian_swap_code[] = {
+      "noswap", "8in16", "8in32"
+   };
+   static const char buffer_index_mode_char[] = "_01E";
+   static const char *flag_string[] = {"WQM",  "CF", "signed", "no_zero",
+                                       "nostride", "AC", "TC", "VPM"};
+   switch (m_vc_opcode) {
+   case vc_fetch:
+      os << "Fetch " << m_dst;
+      break;
+   case vc_semantic:
+      os << "Fetch Semantic ID:" << m_semantic_id;
+      break;
+   case vc_get_buf_resinfo:
+      os << "Fetch BufResinfo:" << m_dst;
+      break;
+   case vc_read_scratch:
+      os << "MEM_READ_SCRATCH:" << m_dst;
+      break;
+   default:
+      os << "Fetch ERROR";
+      return;
+   }
+
+   os << ", " << *m_src;
+
+   if (m_offset)
+      os << "+" << m_offset;
+
+   os << " BUFID:" << m_buffer_id
+      << " FMT:(" << fmt_descr[m_data_format]
+      << " " << num_format_char[m_num_format]
+      << " " << endian_swap_code[m_endian_swap]
+      << ")";
+   if (m_buffer_index_mode > 0)
+      os << " IndexMode:" << buffer_index_mode_char[m_buffer_index_mode];
+
+
+   if (m_is_mega_fetch)
+      os << " MFC:" << m_mega_fetch_count;
+   else
+      os << " mfc*:" << m_mega_fetch_count;
+
+   if (m_flags.any()) {
+      os << " Flags:";
+      for( int i = 0; i < vtx_unknown; ++i) {
+         if (m_flags.test(i))
+            os << ' ' << flag_string[i];
+      }
+   }
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h
new file mode 100644
index 000000000..71a3f69f3
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_fetch.h
@@ -0,0 +1,187 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_INSTRUCTION_FETCH_H
+#define SFN_INSTRUCTION_FETCH_H
+
+#include "sfn_instruction_base.h"
+
+namespace r600 {
+
+class FetchInstruction : public Instruction {
+public:
+
+   FetchInstruction(EVFetchInstr vc_opcode,
+                    EVFetchType fetch_type,
+                    EVTXDataFormat data_format,
+                    EVFetchNumFormat num_format,
+                    EVFetchEndianSwap endian_swap,
+                    const PValue src,
+                    const GPRVector dst,
+                    uint32_t offset,
+                    bool is_mega_fetch,
+                    uint32_t mega_fetch_count,
+                    uint32_t buffer_id,
+                    uint32_t semantic_id,
+
+                    EBufferIndexMode buffer_index_mode,
+                    bool uncached,
+                    bool indexed,
+                    int array_base,
+                    int array_size,
+                    int elm_size,
+                    PValue buffer_offset,
+                    const std::array<int, 4>& dest_swizzle);
+
+   FetchInstruction(EVFetchInstr op,
+                    EVFetchType type,
+                    GPRVector dst,
+                    PValue src, int offset,
+                    int buffer_id, PValue buffer_offset,
+                    EBufferIndexMode cp_rel,
+                    bool use_const_field = false);
+
+   FetchInstruction(GPRVector dst,
+                    PValue src,
+                    int buffer_id,
+                    PValue buffer_offset,
+                    EVTXDataFormat format,
+                    EVFetchNumFormat num_format);
+
+   FetchInstruction(GPRVector dst,
+                    PValue src,
+                    int buffer_id,
+                    EBufferIndexMode cp_rel);
+
+   FetchInstruction(GPRVector dst, PValue src, int scratch_size);
+
+   void replace_values(const ValueSet& candidates, PValue new_value) override;
+   EVFetchInstr vc_opcode() const { return m_vc_opcode;}
+   EVFetchType fetch_type() const { return m_fetch_type;}
+
+   EVTXDataFormat data_format() const { return m_data_format;}
+   EVFetchNumFormat num_format() const { return m_num_format;}
+   EVFetchEndianSwap endian_swap() const { return m_endian_swap;}
+
+   const Value& src() const { return *m_src;}
+   const GPRVector& dst() const { return m_dst;}
+   uint32_t offset() const { return m_offset;}
+
+   bool is_mega_fetchconst() { return m_is_mega_fetch;}
+   uint32_t mega_fetch_count() const { return m_mega_fetch_count;}
+
+   uint32_t buffer_id() const { return m_buffer_id;}
+   uint32_t semantic_id() const { return m_semantic_id;}
+   EBufferIndexMode buffer_index_mode() const{ return m_buffer_index_mode;}
+
+   bool is_signed() const { return m_flags.test(vtx_format_comp_signed);}
+   bool use_const_fields() const { return m_flags.test(vtx_use_const_field);}
+
+   bool srf_mode_no_zero() const { return m_flags.test(vtx_srf_mode);}
+
+   void set_flag(EVFetchFlagShift flag) {m_flags.set(flag);}
+
+   bool uncached() const {return m_uncached; }
+   bool indexed() const {return m_indexed; }
+   int array_base()const {return m_array_base; }
+   int array_size() const {return m_array_size; }
+   int elm_size() const {return m_elm_size; }
+
+   void set_buffer_offset(PValue buffer_offset) {
+      m_buffer_offset = buffer_offset;
+      add_remappable_src_value(&m_buffer_offset);
+   }
+   PValue buffer_offset() const { return m_buffer_offset; }
+
+   void set_dest_swizzle(const std::array<int,4>& swz);
+   void set_format(EVTXDataFormat fmt);
+
+   int swz(int idx) const { return m_dest_swizzle[idx];}
+
+   bool use_tc() const {return m_flags.test(vtx_use_tc);}
+
+   bool use_vpm() const {return m_flags.test(vtx_vpm);}
+
+   void prelude_append(Instruction *instr);
+
+   const std::vector<PInstruction>& prelude() const;
+
+   bool has_prelude() const {return !m_prelude.empty();}
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+   bool is_equal_to(const Instruction& lhs) const override;
+   void do_print(std::ostream& os) const override;
+
+   EVFetchInstr m_vc_opcode;
+   EVFetchType m_fetch_type;
+
+   EVTXDataFormat m_data_format;
+   EVFetchNumFormat m_num_format;
+   EVFetchEndianSwap m_endian_swap;
+
+   PValue m_src;
+   GPRVector m_dst;
+   uint32_t m_offset;
+
+   bool m_is_mega_fetch;
+   uint32_t m_mega_fetch_count;
+
+   uint32_t m_buffer_id;
+   uint32_t m_semantic_id;
+
+   EBufferIndexMode m_buffer_index_mode;
+   std::bitset<16> m_flags;
+   bool m_uncached;
+   bool m_indexed;
+   int m_array_base;
+   int m_array_size;
+   int m_elm_size;
+   PValue m_buffer_offset;
+   std::array<int, 4> m_dest_swizzle;
+   std::vector<PInstruction> m_prelude;
+};
+
+class LoadFromScratch: public FetchInstruction {
+public:
+   LoadFromScratch(GPRVector dst, PValue src, int scratch_size);
+};
+
+class FetchGDSOpResult : public FetchInstruction {
+public:
+   FetchGDSOpResult(const GPRVector dst, const PValue src);
+};
+
+class FetchTCSIOParam : public FetchInstruction {
+public:
+   FetchTCSIOParam(GPRVector dst, PValue src, int offset);
+};
+
+}
+
+#endif // SFN_INSTRUCTION_FETCH_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp
new file mode 100644
index 000000000..095cd40d6
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.cpp
@@ -0,0 +1,180 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_instruction_gds.h"
+#include "sfn_liverange.h"
+
+namespace  r600 {
+
+GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest,  const PValue& value,
+                   const PValue& value2, const PValue& uav_id, int uav_base):
+   Instruction(gds),
+   m_op(op),
+   m_src(value),
+   m_src2(value2),
+   m_dest(dest),
+   m_dest_swizzle({PIPE_SWIZZLE_X,7,7,7}),
+   m_src_swizzle({PIPE_SWIZZLE_0, PIPE_SWIZZLE_X, PIPE_SWIZZLE_0}),
+   m_buffer_index_mode(bim_none),
+   m_uav_id(uav_id),
+   m_uav_base(uav_base),
+   m_flags(0)
+{
+   add_remappable_src_value(&m_src);
+   add_remappable_src_value(&m_src2);
+   add_remappable_src_value(&m_uav_id);
+   add_remappable_dst_value(&m_dest);
+   m_dest_swizzle[0] = m_dest.chan_i(0);
+}
+
+GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest,  const PValue& value,
+                   const PValue& uav_id, int uav_base):
+   GDSInstr(op, dest,  value, PValue(), uav_id, uav_base)
+{
+      assert(value);
+      m_src_swizzle[1] = value->chan();
+      m_src_swizzle[2] = PIPE_SWIZZLE_0;
+}
+
+GDSInstr::GDSInstr(ESDOp op, const GPRVector& dest,
+                   const PValue& uav_id, int uav_base):
+   GDSInstr(op, dest,  PValue(), PValue(), uav_id, uav_base)
+{
+   m_src_swizzle[1] = PIPE_SWIZZLE_0;
+}
+
+bool GDSInstr::is_equal_to(UNUSED const Instruction& lhs) const
+{
+   return false;
+}
+
+void GDSInstr::do_print(std::ostream& os) const
+{
+   const char *swz = "xyzw01?_";
+   os << lds_ops.at(m_op).name << " R" << m_dest.sel() << ".";
+   for (int i = 0; i < 4; ++i) {
+      os << swz[m_dest_swizzle[i]];
+   }
+   if (m_src)
+      os << " " << *m_src;
+
+   os << " UAV:" << *m_uav_id;
+}
+
+RatInstruction::RatInstruction(ECFOpCode cf_opcode, ERatOp rat_op,
+                               const GPRVector& data, const GPRVector& index,
+                               int rat_id, const PValue& rat_id_offset,
+                               int burst_count, int comp_mask, int element_size, bool ack):
+   Instruction(rat),
+   m_cf_opcode(cf_opcode),
+   m_rat_op(rat_op),
+   m_data(data),
+   m_index(index),
+   m_rat_id(rat_id),
+   m_rat_id_offset(rat_id_offset),
+   m_burst_count(burst_count),
+   m_comp_mask(comp_mask),
+   m_element_size(element_size),
+   m_need_ack(ack)
+{
+   add_remappable_src_value(&m_data);
+   add_remappable_src_value(&m_rat_id_offset);
+   add_remappable_src_value(&m_index);
+}
+
+bool RatInstruction::is_equal_to(UNUSED const Instruction& lhs) const
+{
+   return false;
+}
+
+void RatInstruction::do_print(std::ostream& os) const
+{
+   os << "MEM_RAT RAT(" << m_rat_id;
+   if (m_rat_id_offset)
+      os << "+" << *m_rat_id_offset;
+   os << ") @" << m_index;
+   os << " OP:" << m_rat_op << " " << m_data;
+   os << " BC:" << m_burst_count
+      << " MASK:" << m_comp_mask
+      << " ES:" << m_element_size;
+   if (m_need_ack)
+      os << " ACK";
+}
+
+RatInstruction::ERatOp RatInstruction::opcode(nir_intrinsic_op opcode)
+{
+   switch (opcode) {
+   case nir_intrinsic_ssbo_atomic_add:
+      return ADD_RTN;
+   case nir_intrinsic_ssbo_atomic_and:
+      return AND_RTN;
+   case nir_intrinsic_ssbo_atomic_exchange:
+      return XCHG_RTN;
+   case nir_intrinsic_ssbo_atomic_umax:
+      return MAX_UINT_RTN;
+   case nir_intrinsic_ssbo_atomic_umin:
+      return MIN_UINT_RTN;
+   case nir_intrinsic_ssbo_atomic_imax:
+      return MAX_INT_RTN;
+   case nir_intrinsic_ssbo_atomic_imin:
+      return MIN_INT_RTN;
+   case nir_intrinsic_ssbo_atomic_xor:
+      return XOR_RTN;
+   default:
+      return UNSUPPORTED;
+   }
+}
+
+GDSStoreTessFactor::GDSStoreTessFactor(GPRVector& value):
+   Instruction(tf_write),
+   m_value(value)
+{
+   add_remappable_src_value(&m_value);
+}
+
+void GDSStoreTessFactor::replace_values(const ValueSet& candidates, PValue new_value)
+{
+   for (auto& c: candidates) {
+      for (int i = 0; i < 4; ++i) {
+         if (*c == *m_value[i])
+            m_value[i] = new_value;
+      }
+   }
+}
+
+
+bool GDSStoreTessFactor::is_equal_to(const Instruction& lhs) const
+{
+   auto& other = static_cast<const GDSStoreTessFactor&>(lhs);
+   return m_value == other.m_value;
+}
+
+void GDSStoreTessFactor::do_print(std::ostream& os) const
+{
+   os << "TF_WRITE " << m_value;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h
new file mode 100644
index 000000000..6f8e0f200
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h
@@ -0,0 +1,225 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_GDSINSTR_H
+#define SFN_GDSINSTR_H
+
+#include "sfn_instruction_base.h"
+
+#include <bitset>
+
+namespace r600 {
+
+class GDSInstr : public Instruction
+{
+public:
+   GDSInstr(ESDOp op, const GPRVector& dest,  const PValue& value,
+            const PValue &uav_id, int uav_base);
+   GDSInstr(ESDOp op, const GPRVector& dest,  const PValue& value,
+            const PValue& value2, const PValue &uav_id, int uav_base);
+   GDSInstr(ESDOp op, const GPRVector& dest,  const PValue &uav_id, int uav_base);
+
+   ESDOp op() const {return m_op;}
+
+   int src_sel() const {
+      if (!m_src)
+         return 0;
+
+      assert(m_src->type() == Value::gpr);
+      return m_src->sel();
+   }
+
+   int src2_chan() const {
+      if (!m_src2)
+         return 0;
+
+      assert(m_src->type() == Value::gpr);
+      return m_src->chan();
+   }
+
+   int src_swizzle(int idx) const {assert(idx < 3); return m_src_swizzle[idx];}
+
+   int dest_sel() const {
+      return m_dest.sel();
+   }
+
+   int dest_swizzle(int i) const {
+      if (i < 4)
+         return m_dest_swizzle[i];
+      return 7;
+   }
+
+   void set_dest_swizzle(const std::array<int,4>& swz) {
+      m_dest_swizzle = swz;
+   }
+
+   PValue uav_id() const {return m_uav_id;}
+   int uav_base() const {return m_uav_base;}
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+
+   bool is_equal_to(const Instruction& lhs) const override;
+   void do_print(std::ostream& os) const override;
+
+   ESDOp m_op;
+
+   PValue m_src;
+   PValue m_src2;
+   GPRVector m_dest;
+   std::array <int, 4> m_dest_swizzle;
+   std::array <int, 3> m_src_swizzle;
+
+   EBufferIndexMode m_buffer_index_mode;
+   PValue m_uav_id;
+   int m_uav_base;
+   std::bitset<8> m_flags;
+
+};
+
+class RatInstruction : public Instruction {
+
+public:
+   enum ERatOp {
+      NOP,
+      STORE_TYPED,
+      STORE_RAW,
+      STORE_RAW_FDENORM,
+      CMPXCHG_INT,
+      CMPXCHG_FLT,
+      CMPXCHG_FDENORM,
+      ADD,
+      SUB,
+      RSUB,
+      MIN_INT,
+      MIN_UINT,
+      MAX_INT,
+      MAX_UINT,
+      AND,
+      OR,
+      XOR,
+      MSKOR,
+      INC_UINT,
+      DEC_UINT,
+      NOP_RTN = 32,
+      XCHG_RTN = 34,
+      XCHG_FDENORM_RTN,
+      CMPXCHG_INT_RTN,
+      CMPXCHG_FLT_RTN,
+      CMPXCHG_FDENORM_RTN,
+      ADD_RTN,
+      SUB_RTN,
+      RSUB_RTN,
+      MIN_INT_RTN,
+      MIN_UINT_RTN,
+      MAX_INT_RTN,
+      MAX_UINT_RTN,
+      AND_RTN,
+      OR_RTN,
+      XOR_RTN,
+      MSKOR_RTN,
+      UINT_RTN,
+      UNSUPPORTED
+   };
+
+   RatInstruction(ECFOpCode cf_opcode, ERatOp rat_op,
+                  const GPRVector& data, const GPRVector& index,
+                  int rat_id, const PValue& rat_id_offset,
+                  int burst_count, int comp_mask, int element_size,
+                  bool ack);
+
+   PValue rat_id_offset() const { return m_rat_id_offset;}
+   int  rat_id() const { return m_rat_id;}
+
+   ERatOp rat_op() const {return m_rat_op;}
+
+   int data_gpr() const {return m_data.sel();}
+   int index_gpr() const {return m_index.sel();}
+   int elm_size() const {return m_element_size;}
+
+   int comp_mask() const {return m_comp_mask;}
+
+   bool need_ack() const {return m_need_ack;}
+   int burst_count() const {return m_burst_count;}
+
+   static ERatOp opcode(nir_intrinsic_op opcode);
+
+   int data_swz(int chan) const {return m_data.chan_i(chan);}
+
+   ECFOpCode cf_opcode() const { return m_cf_opcode;}
+
+   void set_ack() {m_need_ack = true; }
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+
+private:
+
+   bool is_equal_to(const Instruction& lhs) const override;
+   void do_print(std::ostream& os) const override;
+
+   ECFOpCode m_cf_opcode;
+   ERatOp m_rat_op;
+
+   GPRVector m_data;
+   GPRVector m_index;
+
+   int m_rat_id;
+   PValue m_rat_id_offset;
+   int m_burst_count;
+   int m_comp_mask;
+   int m_element_size;
+
+   std::bitset<8> m_flags;
+
+   bool m_need_ack;
+
+};
+
+class GDSStoreTessFactor : public Instruction {
+public:
+      GDSStoreTessFactor(GPRVector& value);
+      int sel() const {return m_value.sel();}
+      int chan(int i ) const {return m_value.chan_i(i);}
+
+      void replace_values(const ValueSet& candiates, PValue new_value) override;
+
+      bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+      bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+      bool is_equal_to(const Instruction& lhs) const override;
+      void do_print(std::ostream& os) const override;
+
+      GPRVector m_value;
+};
+
+}
+
+#endif // SFN_GDSINSTR_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp
new file mode 100644
index 000000000..b77461abc
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.cpp
@@ -0,0 +1,151 @@
+#include "sfn_instruction_lds.h"
+
+namespace r600 {
+
+void LDSReadInstruction::do_print(std::ostream& os) const
+{
+   os << "LDS Read  [";
+   for (auto& v : m_dest_value)
+      os << *v << " ";
+   os << "], ";
+   for (auto& a : m_address)
+      os << *a << " ";
+}
+
+LDSReadInstruction::LDSReadInstruction(std::vector<PValue>& address, std::vector<PValue>& value):
+   Instruction(lds_read),
+   m_address(address),
+   m_dest_value(value)
+{
+   assert(address.size() == value.size());
+
+   for (unsigned i = 0; i < address.size(); ++i) {
+      add_remappable_src_value(&m_address[i]);
+      add_remappable_dst_value(&m_dest_value[i]);
+   }
+}
+
+void LDSReadInstruction::replace_values(const ValueSet& candidates, PValue new_value)
+{
+   for (auto& c : candidates) {
+      for (auto& d: m_dest_value) {
+         if (*c == *d)
+            d = new_value;
+      }
+
+      for (auto& a: m_address) {
+         if (*c == *a)
+            a = new_value;
+      }
+   }
+}
+
+bool LDSReadInstruction::is_equal_to(const Instruction& lhs) const
+{
+   auto& other = static_cast<const LDSReadInstruction&>(lhs);
+   return m_address == other.m_address &&
+         m_dest_value == other.m_dest_value;
+}
+
+LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op):
+   Instruction(lds_atomic),
+   m_address(address),
+   m_dest_value(dest),
+   m_src0_value(src0),
+   m_src1_value(src1),
+   m_opcode(op)
+{
+   add_remappable_src_value(&m_src0_value);
+   add_remappable_src_value(&m_src1_value);
+   add_remappable_src_value(&m_address);
+   add_remappable_dst_value(&m_dest_value);
+}
+
+LDSAtomicInstruction::LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op):
+   LDSAtomicInstruction(dest, src0, PValue(), address, op)
+{
+
+}
+
+
+void LDSAtomicInstruction::do_print(std::ostream& os) const
+{
+   os << "LDS " << m_opcode << " " << *m_dest_value << " ";
+   os << "[" << *m_address << "] " << *m_src0_value;
+   if (m_src1_value)
+      os << ", " << *m_src1_value;
+}
+
+bool LDSAtomicInstruction::is_equal_to(const Instruction& lhs) const
+{
+   auto& other = static_cast<const LDSAtomicInstruction&>(lhs);
+
+   return m_opcode == other.m_opcode &&
+         *m_dest_value == *other.m_dest_value &&
+         *m_src0_value == *other.m_src0_value &&
+         *m_address == *other.m_address &&
+         ((m_src1_value && other.m_src1_value && (*m_src1_value == *other.m_src1_value)) ||
+          (!m_src1_value && !other.m_src1_value));
+}
+
+LDSWriteInstruction::LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0):
+   LDSWriteInstruction::LDSWriteInstruction(address, idx_offset, value0, PValue())
+
+{
+}
+
+LDSWriteInstruction::LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0, PValue value1):
+   Instruction(lds_write),
+   m_address(address),
+   m_value0(value0),
+   m_value1(value1),
+   m_idx_offset(idx_offset)
+{
+   add_remappable_src_value(&m_address);
+   add_remappable_src_value(&m_value0);
+   if (m_value1)
+      add_remappable_src_value(&m_value1);
+}
+
+
+void LDSWriteInstruction::do_print(std::ostream& os) const
+{
+   os << "LDS Write" << num_components()
+      << " " << address() << ", " << value0();
+   if (num_components() > 1)
+      os << ", " << value1();
+}
+
+void LDSWriteInstruction::replace_values(const ValueSet& candidates, PValue new_value)
+{
+   for (auto c: candidates) {
+      if (*c == *m_address)
+         m_address = new_value;
+
+      if (*c == *m_value0)
+         m_value0 = new_value;
+
+      if (*c == *m_value1)
+         m_value1 = new_value;
+   }
+}
+
+bool LDSWriteInstruction::is_equal_to(const Instruction& lhs) const
+{
+   auto& other = static_cast<const LDSWriteInstruction&>(lhs);
+
+   if (m_value1) {
+      if (!other.m_value1)
+         return false;
+      if (*m_value1 != *other.m_value1)
+         return false;
+   } else {
+      if (other.m_value1)
+         return false;
+   }
+
+   return (m_value0 != other.m_value0 &&
+           *m_address != *other.m_address);
+}
+
+} // namespace r600
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h
new file mode 100644
index 000000000..96439a7c3
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_lds.h
@@ -0,0 +1,82 @@
+#ifndef LDSINSTRUCTION_H
+#define LDSINSTRUCTION_H
+
+#include "sfn_instruction_base.h"
+
+namespace r600 {
+
+class LDSReadInstruction : public Instruction {
+public:
+   LDSReadInstruction(std::vector<PValue>& value, std::vector<PValue>& address);
+   void replace_values(const ValueSet& candidates, PValue new_value) override;
+
+   unsigned num_values() const { return m_dest_value.size();}
+   const Value& address(unsigned i) const { return *m_address[i];}
+   const Value& dest(unsigned i) const { return *m_dest_value[i];}
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+   void do_print(std::ostream& os) const override;
+   bool is_equal_to(const Instruction& lhs) const override;
+
+   std::vector<PValue> m_address;
+   std::vector<PValue> m_dest_value;
+};
+
+class LDSAtomicInstruction : public Instruction {
+public:
+   LDSAtomicInstruction(PValue& dest, PValue& src0, PValue src1, PValue& address, unsigned op);
+   LDSAtomicInstruction(PValue& dest, PValue& src0, PValue& address, unsigned op);
+
+   const Value& address() const { return *m_address;}
+   const Value& dest() const { return *m_dest_value;}
+   const Value& src0() const { return *m_src0_value;}
+   const PValue& src1() const { return m_src1_value;}
+   unsigned op() const {return m_opcode;}
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+   void do_print(std::ostream& os) const override;
+   bool is_equal_to(const Instruction& lhs) const override;
+
+   PValue m_address;
+   PValue m_dest_value;
+   PValue m_src0_value;
+   PValue m_src1_value;
+   unsigned m_opcode;
+};
+
+class LDSWriteInstruction : public Instruction {
+public:
+   LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0);
+   LDSWriteInstruction(PValue address, unsigned idx_offset, PValue value0, PValue value1);
+
+   const Value& address() const {return *m_address;};
+   const Value& value0() const { return *m_value0;}
+   const Value& value1() const { return *m_value1;}
+   unsigned num_components() const { return m_value1 ? 2 : 1;}
+   unsigned idx_offset() const {return m_idx_offset;};
+
+   void replace_values(const ValueSet& candidates, PValue new_value) override;
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+   void do_print(std::ostream& os) const override;
+   bool is_equal_to(const Instruction& lhs) const override;
+
+   PValue m_address;
+   PValue m_value0;
+   PValue m_value1;
+   unsigned m_idx_offset;
+
+};
+
+}
+
+#endif // LDSINSTRUCTION_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp
new file mode 100644
index 000000000..1c1a98c40
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.cpp
@@ -0,0 +1,68 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_instruction_misc.h"
+
+namespace r600 {
+EmitVertex::EmitVertex(int stream, bool cut):
+   Instruction (emit_vtx),
+   m_stream(stream),
+   m_cut(cut)
+{
+
+}
+
+bool EmitVertex::is_equal_to(const Instruction& lhs) const
+{
+   auto& oth = static_cast<const EmitVertex&>(lhs);
+   return oth.m_stream == m_stream &&
+         oth.m_cut == m_cut;
+}
+
+void EmitVertex::do_print(std::ostream& os) const
+{
+   os << (m_cut ? "EMIT_CUT_VERTEX @" : "EMIT_VERTEX @") << m_stream;
+}
+
+WaitAck::WaitAck(int nack):
+   Instruction (wait_ack),
+   m_nack(nack)
+{
+
+}
+
+bool WaitAck::is_equal_to(const Instruction& lhs) const
+{
+   const auto& l = static_cast<const WaitAck&>(lhs);
+   return m_nack == l.m_nack;
+}
+
+void WaitAck::do_print(std::ostream& os) const
+{
+   os << "WAIT_ACK @" << m_nack;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.h
new file mode 100644
index 000000000..d322b4aa8
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_misc.h
@@ -0,0 +1,69 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_INSTRUCTION_MISC_H
+#define SFN_INSTRUCTION_MISC_H
+
+#include "sfn_instruction_base.h"
+
+namespace r600 {
+
+class EmitVertex : public Instruction {
+public:
+   EmitVertex(int stream, bool cut);
+   ECFOpCode op() const {return m_cut ? cf_cut_vertex: cf_emit_vertex;}
+   int stream() const { return m_stream;}
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+
+   bool is_equal_to(const Instruction& lhs) const override;
+   void do_print(std::ostream& os) const override;
+   int m_stream;
+   bool m_cut;
+};
+
+class WaitAck : public Instruction {
+public:
+   WaitAck(int nack);
+   ECFOpCode op() const {return cf_wait_ack;}
+   int n_ack() const {return m_nack;}
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+
+   bool is_equal_to(const Instruction& lhs) const override;
+   void do_print(std::ostream& os) const override;
+   int m_nack;
+};
+
+}
+
+#endif // SFN_INSTRUCTION_MISC_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp
new file mode 100644
index 000000000..8fc5469f3
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.cpp
@@ -0,0 +1,414 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_instruction_tex.h"
+#include "nir_builder.h"
+#include "nir_builtin_builder.h"
+
+namespace r600 {
+
+TexInstruction::TexInstruction(Opcode op, const GPRVector &dest, const GPRVector &src,
+                               unsigned sid, unsigned rid, PValue sampler_offset):
+   Instruction(tex),
+   m_opcode(op),
+   m_dst(dest),
+   m_src(src),
+   m_sampler_id(sid),
+   m_resource_id(rid),
+   m_flags(0),
+   m_inst_mode(0),
+   m_dest_swizzle{0,1,2,3},
+   m_sampler_offset(sampler_offset)
+
+{
+   memset(m_offset, 0, sizeof (m_offset));
+
+   add_remappable_src_value(&m_src);
+   add_remappable_src_value(&m_sampler_offset);
+   add_remappable_dst_value(&m_dst);
+}
+
+void TexInstruction::set_gather_comp(int cmp)
+{
+   m_inst_mode = cmp;
+}
+
+void TexInstruction::replace_values(const ValueSet& candidates, PValue new_value)
+{
+   // I wonder whether we can actually end up here ...
+   for (auto c: candidates) {
+      if (*c == *m_src.reg_i(c->chan()))
+         m_src.set_reg_i(c->chan(), new_value);
+      if (*c == *m_dst.reg_i(c->chan()))
+         m_dst.set_reg_i(c->chan(), new_value);
+   }
+}
+
+void TexInstruction::set_offset(unsigned index, int32_t val)
+{
+   assert(index < 3);
+   m_offset[index] = val;
+}
+
+int TexInstruction::get_offset(unsigned index) const
+{
+   assert(index < 3);
+   return (m_offset[index] << 1 & 0x1f);
+}
+
+bool TexInstruction::is_equal_to(const Instruction& rhs) const
+{
+   assert(rhs.type() == tex);
+   const auto& r = static_cast<const TexInstruction&>(rhs);
+   return (m_opcode == r.m_opcode &&
+           m_dst == r.m_dst &&
+           m_src == r.m_src &&
+           m_sampler_id == r.m_sampler_id &&
+           m_resource_id == r.m_resource_id);
+}
+
+void TexInstruction::do_print(std::ostream& os) const
+{
+   const char *map_swz = "xyzw01?_";
+   os << opname(m_opcode) << " R" << m_dst.sel() << ".";
+   for (int i = 0; i < 4; ++i)
+      os << map_swz[m_dest_swizzle[i]];
+
+   os << " " << m_src
+      << " RESID:"  << m_resource_id << " SAMPLER:"
+      << m_sampler_id;
+}
+
+const char *TexInstruction::opname(Opcode op)
+{
+   switch (op) {
+   case ld: return "LD";
+   case get_resinfo: return "GET_TEXTURE_RESINFO";
+   case get_nsampled: return "GET_NUMBER_OF_SAMPLES";
+   case get_tex_lod: return "GET_LOD";
+   case get_gradient_h: return "GET_GRADIENTS_H";
+   case get_gradient_v: return "GET_GRADIENTS_V";
+   case set_offsets: return "SET_TEXTURE_OFFSETS";
+   case keep_gradients: return "KEEP_GRADIENTS";
+   case set_gradient_h: return "SET_GRADIENTS_H";
+   case set_gradient_v: return "SET_GRADIENTS_V";
+   case sample: return "SAMPLE";
+   case sample_l: return "SAMPLE_L";
+   case sample_lb: return "SAMPLE_LB";
+   case sample_lz: return "SAMPLE_LZ";
+   case sample_g: return "SAMPLE_G";
+   case sample_g_lb: return "SAMPLE_G_L";
+   case gather4: return "GATHER4";
+   case gather4_o: return "GATHER4_O";
+   case sample_c: return "SAMPLE_C";
+   case sample_c_l: return "SAMPLE_C_L";
+   case sample_c_lb: return "SAMPLE_C_LB";
+   case sample_c_lz: return "SAMPLE_C_LZ";
+   case sample_c_g: return "SAMPLE_C_G";
+   case sample_c_g_lb: return "SAMPLE_C_G_L";
+   case gather4_c: return "GATHER4_C";
+   case gather4_c_o: return "OP_GATHER4_C_O";
+   }
+   return "ERROR";
+}
+
+
+
+static bool lower_coord_shift_normalized(nir_builder *b, nir_tex_instr *tex)
+{
+   b->cursor = nir_before_instr(&tex->instr);
+
+   nir_ssa_def * size = nir_i2f32(b, nir_get_texture_size(b, tex));
+   nir_ssa_def *scale = nir_frcp(b, size);
+
+   int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
+   nir_ssa_def *corr = nullptr;
+   if (unlikely(tex->array_is_lowered_cube)) {
+      auto corr2 = nir_fadd(b, nir_channels(b, tex->src[coord_index].src.ssa, 3),
+                            nir_fmul(b, nir_imm_float(b, -0.5f), scale));
+      corr = nir_vec3(b, nir_channel(b, corr2, 0), nir_channel(b, corr2, 1),
+                      nir_channel(
+                         b, tex->src[coord_index].src.ssa, 2));
+   } else {
+      corr = nir_fadd(b,
+                      nir_fmul(b, nir_imm_float(b, -0.5f), scale),
+                      tex->src[coord_index].src.ssa);
+   }
+
+   nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
+                         nir_src_for_ssa(corr));
+   return true;
+}
+
+static bool lower_coord_shift_unnormalized(nir_builder *b, nir_tex_instr *tex)
+{
+   b->cursor = nir_before_instr(&tex->instr);
+   int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
+   nir_ssa_def *corr = nullptr;
+   if (unlikely(tex->array_is_lowered_cube)) {
+      auto corr2 = nir_fadd(b, nir_channels(b, tex->src[coord_index].src.ssa, 3),
+                            nir_imm_float(b, -0.5f));
+      corr = nir_vec3(b, nir_channel(b, corr2, 0), nir_channel(b, corr2, 1),
+                      nir_channel(b, tex->src[coord_index].src.ssa, 2));
+   } else {
+      corr = nir_fadd(b, tex->src[coord_index].src.ssa,
+                      nir_imm_float(b, -0.5f));
+   }
+   nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
+                         nir_src_for_ssa(corr));
+   return true;
+}
+
+static bool
+r600_nir_lower_int_tg4_impl(nir_function_impl *impl)
+{
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   bool progress = false;
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type == nir_instr_type_tex) {
+            nir_tex_instr *tex = nir_instr_as_tex(instr);
+            if (tex->op == nir_texop_tg4 &&
+                tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) {
+               if (nir_alu_type_get_base_type(tex->dest_type) != nir_type_float) {
+                  if (tex->sampler_dim != GLSL_SAMPLER_DIM_RECT)
+                     lower_coord_shift_normalized(&b, tex);
+                  else
+                     lower_coord_shift_unnormalized(&b, tex);
+                  progress = true;
+               }
+            }
+         }
+      }
+   }
+   return progress;
+}
+
+/*
+ * This lowering pass works around a bug in r600 when doing TG4 from
+ * integral valued samplers.
+
+ * Gather4 should follow the same rules as bilinear filtering, but the hardware
+ * incorrectly forces nearest filtering if the texture format is integer.
+ * The only effect it has on Gather4, which always returns 4 texels for
+ * bilinear filtering, is that the final coordinates are off by 0.5 of
+ * the texel size.
+*/
+
+bool r600_nir_lower_int_tg4(nir_shader *shader)
+{
+   bool progress = false;
+   bool need_lowering = false;
+
+   nir_foreach_uniform_variable(var, shader) {
+      if (var->type->is_sampler()) {
+         if (glsl_base_type_is_integer(var->type->sampled_type)) {
+            need_lowering = true;
+         }
+      }
+   }
+
+   if (need_lowering) {
+      nir_foreach_function(function, shader) {
+         if (function->impl && r600_nir_lower_int_tg4_impl(function->impl))
+            progress = true;
+      }
+   }
+
+   return progress;
+}
+
+static
+bool lower_txl_txf_array_or_cube(nir_builder *b, nir_tex_instr *tex)
+{
+   assert(tex->op == nir_texop_txb || tex->op == nir_texop_txl);
+   assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
+   assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
+
+   b->cursor = nir_before_instr(&tex->instr);
+
+   int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
+   int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
+   int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
+   assert (lod_idx >= 0 || bias_idx >= 0);
+
+   nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
+   nir_ssa_def *lod = (lod_idx >= 0) ?
+                         nir_ssa_for_src(b, tex->src[lod_idx].src, 1) :
+                         nir_get_texture_lod(b, tex);
+
+   if (bias_idx >= 0)
+      lod = nir_fadd(b, lod,nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
+
+   if (min_lod_idx >= 0)
+      lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
+
+   /* max lod? */
+
+   nir_ssa_def *lambda_exp =  nir_fexp2(b, lod);
+   nir_ssa_def *scale = NULL;
+
+   if  (tex->is_array) {
+      int cmp_mask = (1 << (size->num_components - 1)) - 1;
+      scale = nir_frcp(b, nir_channels(b, size,
+                                       (nir_component_mask_t)cmp_mask));
+   } else if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+      unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0};
+      scale = nir_frcp(b, nir_channels(b, size, 1));
+      scale = nir_swizzle(b, scale, swizzle, 3);
+   }
+
+   nir_ssa_def *grad = nir_fmul(b, lambda_exp, scale);
+
+   if (lod_idx >= 0)
+      nir_tex_instr_remove_src(tex, lod_idx);
+   if (bias_idx >= 0)
+      nir_tex_instr_remove_src(tex, bias_idx);
+   if (min_lod_idx >= 0)
+      nir_tex_instr_remove_src(tex, min_lod_idx);
+   nir_tex_instr_add_src(tex, nir_tex_src_ddx, nir_src_for_ssa(grad));
+   nir_tex_instr_add_src(tex, nir_tex_src_ddy, nir_src_for_ssa(grad));
+
+   tex->op = nir_texop_txd;
+   return true;
+}
+
+
+static bool
+r600_nir_lower_txl_txf_array_or_cube_impl(nir_function_impl *impl)
+{
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   bool progress = false;
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type == nir_instr_type_tex) {
+            nir_tex_instr *tex = nir_instr_as_tex(instr);
+
+            if (tex->is_shadow &&
+                (tex->op == nir_texop_txl || tex->op == nir_texop_txb) &&
+                (tex->is_array || tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE))
+               progress |= lower_txl_txf_array_or_cube(&b, tex);
+         }
+      }
+   }
+   return progress;
+}
+
+bool
+r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader)
+{
+   bool progress = false;
+   nir_foreach_function(function, shader) {
+      if (function->impl && r600_nir_lower_txl_txf_array_or_cube_impl(function->impl))
+         progress = true;
+   }
+   return progress;
+}
+
+static bool
+r600_nir_lower_cube_to_2darray_filer(const nir_instr *instr, const void *_options)
+{
+   if (instr->type != nir_instr_type_tex)
+      return false;
+
+   auto tex = nir_instr_as_tex(instr);
+   if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
+      return false;
+
+   switch (tex->op) {
+   case nir_texop_tex:
+   case nir_texop_txb:
+   case nir_texop_txf:
+   case nir_texop_txl:
+   case nir_texop_lod:
+   case nir_texop_tg4:
+   case nir_texop_txd:
+      return true;
+   default:
+      return false;
+   }
+}
+
+static nir_ssa_def *
+r600_nir_lower_cube_to_2darray_impl(nir_builder *b, nir_instr *instr, void *_options)
+{
+   b->cursor = nir_before_instr(instr);
+
+   auto tex = nir_instr_as_tex(instr);
+   int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
+   assert(coord_idx >= 0);
+
+   auto cubed = nir_cube_r600(b, nir_channels(b, tex->src[coord_idx].src.ssa, 0x7));
+   auto xy = nir_fmad(b,
+                      nir_vec2(b, nir_channel(b, cubed, 1), nir_channel(b, cubed, 0)),
+                      nir_frcp(b, nir_fabs(b, nir_channel(b, cubed, 2))),
+                      nir_imm_float(b, 1.5));
+
+   nir_ssa_def *z = nir_channel(b, cubed, 3);
+   if (tex->is_array) {
+      auto slice = nir_fround_even(b, nir_channel(b, tex->src[coord_idx].src.ssa, 3));
+      z = nir_fmad(b, nir_fmax(b, slice, nir_imm_float(b, 0.0)), nir_imm_float(b, 8.0),
+                   z);
+   }
+
+   if (tex->op == nir_texop_txd) {
+      int ddx_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddx);
+      auto zero_dot_5 = nir_imm_float(b, 0.5);
+      nir_instr_rewrite_src(&tex->instr, &tex->src[ddx_idx].src,
+                            nir_src_for_ssa(nir_fmul(b, nir_ssa_for_src(b, tex->src[ddx_idx].src, 3), zero_dot_5)));
+
+      int ddy_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddy);
+      nir_instr_rewrite_src(&tex->instr, &tex->src[ddy_idx].src,
+                            nir_src_for_ssa(nir_fmul(b, nir_ssa_for_src(b, tex->src[ddy_idx].src, 3), zero_dot_5)));
+   }
+
+   auto new_coord = nir_vec3(b, nir_channel(b, xy, 0), nir_channel(b, xy, 1), z);
+   nir_instr_rewrite_src(&tex->instr, &tex->src[coord_idx].src,
+                         nir_src_for_ssa(new_coord));
+   tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+   tex->is_array = true;
+   tex->array_is_lowered_cube = true;
+
+   tex->coord_components = 3;
+
+   return NIR_LOWER_INSTR_PROGRESS;
+}
+
+bool
+r600_nir_lower_cube_to_2darray(nir_shader *shader)
+{
+   return nir_shader_lower_instructions(shader,
+                                        r600_nir_lower_cube_to_2darray_filer,
+                                        r600_nir_lower_cube_to_2darray_impl, nullptr);
+}
+
+
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h
new file mode 100644
index 000000000..2fe7cbad7
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_instruction_tex.h
@@ -0,0 +1,143 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef INSTRUCTION_TEX_H
+#define INSTRUCTION_TEX_H
+
+#include "sfn_instruction_base.h"
+
+namespace r600 {
+
+class TexInstruction : public Instruction {
+public:
+   enum Opcode {
+      ld = FETCH_OP_LD,
+      get_resinfo = FETCH_OP_GET_TEXTURE_RESINFO,
+      get_nsampled = FETCH_OP_GET_NUMBER_OF_SAMPLES,
+      get_tex_lod = FETCH_OP_GET_LOD,
+      get_gradient_h = FETCH_OP_GET_GRADIENTS_H,
+      get_gradient_v = FETCH_OP_GET_GRADIENTS_V,
+      set_offsets = FETCH_OP_SET_TEXTURE_OFFSETS,
+      keep_gradients = FETCH_OP_KEEP_GRADIENTS,
+      set_gradient_h = FETCH_OP_SET_GRADIENTS_H,
+      set_gradient_v = FETCH_OP_SET_GRADIENTS_V,
+      sample = FETCH_OP_SAMPLE,
+      sample_l = FETCH_OP_SAMPLE_L,
+      sample_lb = FETCH_OP_SAMPLE_LB,
+      sample_lz = FETCH_OP_SAMPLE_LZ,
+      sample_g = FETCH_OP_SAMPLE_G,
+      sample_g_lb = FETCH_OP_SAMPLE_G_L,
+      gather4 = FETCH_OP_GATHER4,
+      gather4_o =  FETCH_OP_GATHER4_O,
+
+      sample_c = FETCH_OP_SAMPLE_C,
+      sample_c_l = FETCH_OP_SAMPLE_C_L,
+      sample_c_lb = FETCH_OP_SAMPLE_C_LB,
+      sample_c_lz = FETCH_OP_SAMPLE_C_LZ,
+      sample_c_g = FETCH_OP_SAMPLE_C_G,
+      sample_c_g_lb = FETCH_OP_SAMPLE_C_G_L,
+      gather4_c = FETCH_OP_GATHER4_C,
+      gather4_c_o =  FETCH_OP_GATHER4_C_O,
+
+   };
+
+   enum Flags {
+      x_unnormalized,
+      y_unnormalized,
+      z_unnormalized,
+      w_unnormalized,
+      grad_fine
+   };
+
+   TexInstruction(Opcode op, const GPRVector& dest, const GPRVector& src, unsigned sid,
+                  unsigned rid, PValue sampler_offset);
+
+   const GPRVector& src() const {return m_src;}
+   const GPRVector& dst() const {return m_dst;}
+   unsigned opcode() const {return m_opcode;}
+   unsigned sampler_id() const {return m_sampler_id;}
+   unsigned resource_id() const {return m_resource_id;}
+
+   void replace_values(const ValueSet& candidates, PValue new_value) override;
+
+   void set_offset(unsigned index, int32_t val);
+   int get_offset(unsigned index) const;
+
+   void set_inst_mode(int inst_mode) { m_inst_mode = inst_mode;}
+
+   int inst_mode() const { return m_inst_mode;}
+
+   void set_flag(Flags flag) {
+      m_flags.set(flag);
+   }
+
+   PValue sampler_offset() const {
+      return m_sampler_offset;
+   }
+
+   bool has_flag(Flags flag) const {
+      return m_flags.test(flag);
+   }
+
+   int dest_swizzle(int i) const {
+      assert(i < 4);
+      return m_dest_swizzle[i];
+   }
+
+   void set_dest_swizzle(const std::array<int,4>& swz) {
+      m_dest_swizzle = swz;
+   }
+
+   void set_gather_comp(int cmp);
+
+   bool accept(InstructionVisitor& visitor) override {return visitor.visit(*this);}
+   bool accept(ConstInstructionVisitor& visitor) const override {return visitor.visit(*this);}
+
+private:
+   bool is_equal_to(const Instruction& lhs) const override;
+   void do_print(std::ostream& os) const override;
+
+   static const char *opname(Opcode code);
+
+   Opcode m_opcode;
+   GPRVector m_dst;
+   GPRVector m_src;
+   unsigned m_sampler_id;
+   unsigned m_resource_id;
+   std::bitset<8> m_flags;
+   int m_offset[3];
+   int m_inst_mode;
+   std::array<int,4> m_dest_swizzle;
+   PValue m_sampler_offset;
+};
+
+bool r600_nir_lower_int_tg4(nir_shader *nir);
+bool r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader);
+bool r600_nir_lower_cube_to_2darray(nir_shader *shader);
+
+}
+
+#endif // INSTRUCTION_TEX_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp
new file mode 100644
index 000000000..e47a46b88
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.cpp
@@ -0,0 +1,1450 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_ir_to_assembly.h"
+#include "sfn_conditionaljumptracker.h"
+#include "sfn_callstack.h"
+#include "sfn_instruction_gds.h"
+#include "sfn_instruction_misc.h"
+#include "sfn_instruction_fetch.h"
+#include "sfn_instruction_lds.h"
+
+#include "../r600_shader.h"
+#include "../r600_sq.h"
+
+namespace r600 {
+
+using std::vector;
+
+
+
+struct AssemblyFromShaderLegacyImpl : public ConstInstructionVisitor {
+
+   AssemblyFromShaderLegacyImpl(r600_shader *sh, r600_shader_key *key);
+
+
+   bool emit(const Instruction::Pointer i);
+   void reset_addr_register() {m_last_addr.reset();}
+
+public:
+   bool visit(const AluInstruction& i) override;
+   bool visit(const ExportInstruction& i) override;
+   bool visit(const TexInstruction& i) override;
+   bool visit(const FetchInstruction& i) override;
+   bool visit(const IfInstruction& i) override;
+   bool visit(const ElseInstruction& i) override;
+   bool visit(const IfElseEndInstruction& i) override;
+   bool visit(const LoopBeginInstruction& i) override;
+   bool visit(const LoopEndInstruction& i) override;
+   bool visit(const LoopBreakInstruction& i) override;
+   bool visit(const LoopContInstruction& i) override;
+   bool visit(const StreamOutIntruction& i) override;
+   bool visit(const MemRingOutIntruction& i) override;
+   bool visit(const EmitVertex& i) override;
+   bool visit(const WaitAck& i) override;
+   bool visit(const WriteScratchInstruction& i) override;
+   bool visit(const GDSInstr& i) override;
+   bool visit(const RatInstruction& i) override;
+   bool visit(const LDSWriteInstruction& i) override;
+   bool visit(const LDSReadInstruction& i) override;
+   bool visit(const LDSAtomicInstruction& i) override;
+   bool visit(const GDSStoreTessFactor& i) override;
+   bool visit(const InstructionBlock& i) override;
+
+   bool emit_load_addr(PValue addr);
+   bool emit_fs_pixel_export(const ExportInstruction & exi);
+   bool emit_vs_pos_export(const ExportInstruction & exi);
+   bool emit_vs_param_export(const ExportInstruction & exi);
+   bool copy_dst(r600_bytecode_alu_dst& dst, const Value& src);
+   bool copy_src(r600_bytecode_alu_src& src, const Value& s);
+
+   EBufferIndexMode emit_index_reg(const Value& reg, unsigned idx);
+
+   ConditionalJumpTracker m_jump_tracker;
+   CallStack m_callstack;
+
+public:
+   r600_bytecode *m_bc;
+   r600_shader *m_shader;
+   r600_shader_key *m_key;
+   r600_bytecode_output m_output;
+   unsigned m_max_color_exports;
+   bool has_pos_output;
+   bool has_param_output;
+   PValue m_last_addr;
+   int m_loop_nesting;
+   int m_nliterals_in_group;
+   std::set<int> vtx_fetch_results;
+   bool m_last_op_was_barrier;
+};
+
+
+AssemblyFromShaderLegacy::AssemblyFromShaderLegacy(struct r600_shader *sh,
+                                                   r600_shader_key *key)
+{
+   impl = new AssemblyFromShaderLegacyImpl(sh, key);
+}
+
+AssemblyFromShaderLegacy::~AssemblyFromShaderLegacy()
+{
+   delete impl;
+}
+
+bool AssemblyFromShaderLegacy::do_lower(const std::vector<InstructionBlock>& ir)
+{
+   if (impl->m_shader->processor_type == PIPE_SHADER_VERTEX &&
+       impl->m_shader->ninput > 0)
+         r600_bytecode_add_cfinst(impl->m_bc, CF_OP_CALL_FS);
+
+
+   std::vector<Instruction::Pointer> exports;
+
+   for (const auto& block : ir) {
+      if (!impl->visit(block))
+         return false;
+   }   /*
+   for (const auto& i : exports) {
+      if (!impl->emit_export(static_cast<const ExportInstruction&>(*i)))
+          return false;
+   }*/
+
+
+   const struct cf_op_info *last = nullptr;
+   if (impl->m_bc->cf_last)
+      last = r600_isa_cf(impl->m_bc->cf_last->op);
+
+   /* alu clause instructions don't have EOP bit, so add NOP */
+   if (!last || last->flags & CF_ALU || impl->m_bc->cf_last->op == CF_OP_LOOP_END
+       || impl->m_bc->cf_last->op == CF_OP_POP)
+      r600_bytecode_add_cfinst(impl->m_bc, CF_OP_NOP);
+
+    /* A fetch shader only can't be EOP (results in hang), but we can replace it
+     * by a NOP */
+   else if (impl->m_bc->cf_last->op == CF_OP_CALL_FS)
+      impl->m_bc->cf_last->op = CF_OP_NOP;
+
+   if (impl->m_shader->bc.chip_class != CAYMAN)
+      impl->m_bc->cf_last->end_of_program = 1;
+   else
+      cm_bytecode_add_cf_end(impl->m_bc);
+
+   return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const InstructionBlock& block)
+{
+   for (const auto& i : block) {
+
+      if (i->type() != Instruction::vtx)
+          vtx_fetch_results.clear();
+
+      m_last_op_was_barrier &= i->type() == Instruction::alu;
+
+      sfn_log << SfnLog::assembly << "Emit from '" << *i << "\n";
+
+      if (!i->accept(*this))
+         return false;
+
+      if (i->type() != Instruction::alu)
+         reset_addr_register();
+   }
+
+   return true;
+}
+
+AssemblyFromShaderLegacyImpl::AssemblyFromShaderLegacyImpl(r600_shader *sh,
+                                                           r600_shader_key *key):
+   m_callstack(sh->bc),
+   m_bc(&sh->bc),
+   m_shader(sh),
+   m_key(key),
+   has_pos_output(false),
+   has_param_output(false),
+   m_loop_nesting(0),
+   m_nliterals_in_group(0),
+   m_last_op_was_barrier(false)
+{
+   m_max_color_exports = MAX2(m_key->ps.nr_cbufs, 1);
+
+}
+
+extern const std::map<EAluOp, int> opcode_map;
+
+bool AssemblyFromShaderLegacyImpl::emit_load_addr(PValue addr)
+{
+   m_bc->ar_reg = addr->sel();
+   m_bc->ar_chan = addr->chan();
+   m_bc->ar_loaded = 0;
+   m_last_addr = addr;
+
+   sfn_log << SfnLog::assembly << "   Prepare " << *addr << " to address register\n";
+
+   return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const AluInstruction& ai)
+{
+
+   struct r600_bytecode_alu alu;
+   memset(&alu, 0, sizeof(alu));
+   PValue addr_in_use;
+
+   if (opcode_map.find(ai.opcode()) == opcode_map.end()) {
+      std::cerr << "Opcode not handled for " << ai <<"\n";
+      return false;
+   }
+
+   if (m_last_op_was_barrier && ai.opcode() == op0_group_barrier)
+      return true;
+
+   m_last_op_was_barrier = ai.opcode() == op0_group_barrier;
+
+   unsigned old_nliterals_in_group = m_nliterals_in_group;
+   for (unsigned i = 0; i < ai.n_sources(); ++i) {
+      auto& s = ai.src(i);
+      if (s.type() == Value::literal)
+         ++m_nliterals_in_group;
+   }
+
+   /* This instruction group would exceed the limit of literals, so
+    * force a new instruction group by adding a NOP as last
+    * instruction. This will no loner be needed with a real
+    * scheduler */
+   if (m_nliterals_in_group > 4) {
+      sfn_log << SfnLog::assembly << "  Have " << m_nliterals_in_group << " inject a last op (nop)\n";
+      alu.op = ALU_OP0_NOP;
+      alu.last = 1;
+      alu.dst.chan = 3;
+      int retval = r600_bytecode_add_alu(m_bc, &alu);
+      if (retval)
+         return false;
+      memset(&alu, 0, sizeof(alu));
+      m_nliterals_in_group -= old_nliterals_in_group;
+   }
+
+   alu.op = opcode_map.at(ai.opcode());
+
+   /* Missing test whether ai actually has a dest */
+   auto dst = ai.dest();
+
+   if (dst) {
+      if (!copy_dst(alu.dst, *dst))
+         return false;
+
+      alu.dst.write = ai.flag(alu_write);
+      alu.dst.clamp = ai.flag(alu_dst_clamp);
+
+      if (dst->type() == Value::gpr_array_value) {
+         auto& v = static_cast<const GPRArrayValue&>(*dst);
+         PValue addr = v.indirect();
+         if (addr) {
+            if (!m_last_addr || *addr != *m_last_addr) {
+               emit_load_addr(addr);
+               addr_in_use = addr;
+            }
+            alu.dst.rel = addr ? 1 : 0;;
+         }
+      }
+   }
+
+   alu.is_op3 = ai.n_sources() == 3;
+
+   for (unsigned i = 0; i < ai.n_sources(); ++i) {
+      auto& s = ai.src(i);
+
+      if (!copy_src(alu.src[i], s))
+         return false;
+      alu.src[i].neg = ai.flag(AluInstruction::src_neg_flags[i]);
+
+      if (s.type() == Value::gpr_array_value) {
+         auto& v = static_cast<const GPRArrayValue&>(s);
+         PValue addr = v.indirect();
+         if (addr) {
+            assert(!addr_in_use || (*addr_in_use == *addr));
+            if (!m_last_addr || *addr != *m_last_addr) {
+               emit_load_addr(addr);
+               addr_in_use = addr;
+            }
+            alu.src[i].rel = addr ? 1 : 0;
+         }
+      }
+      if (!alu.is_op3)
+         alu.src[i].abs = ai.flag(AluInstruction::src_abs_flags[i]);
+   }
+
+   if (ai.bank_swizzle() != alu_vec_unknown)
+      alu.bank_swizzle_force = ai.bank_swizzle();
+
+   alu.last = ai.flag(alu_last_instr);
+   alu.update_pred = ai.flag(alu_update_pred);
+   alu.execute_mask = ai.flag(alu_update_exec);
+
+   /* If the destination register is equal to the last loaded address register
+    * then clear the latter one, because the values will no longer be identical */
+   if (m_last_addr)
+      sfn_log << SfnLog::assembly << "  Current address register is " << *m_last_addr << "\n";
+
+   if (dst)
+      sfn_log << SfnLog::assembly << "  Current dst register is " << *dst << "\n";
+
+   if (dst && m_last_addr)
+      if (*dst == *m_last_addr) {
+         sfn_log << SfnLog::assembly << "  Clear address register (was " << *m_last_addr << "\n";
+         m_last_addr.reset();
+      }
+
+   auto cf_op = ai.cf_type();
+
+   unsigned type = 0;
+   switch (cf_op) {
+   case cf_alu: type = CF_OP_ALU; break;
+   case cf_alu_push_before: type = CF_OP_ALU_PUSH_BEFORE; break;
+   case cf_alu_pop_after: type = CF_OP_ALU_POP_AFTER; break;
+   case cf_alu_pop2_after: type = CF_OP_ALU_POP2_AFTER; break;
+   case cf_alu_break: type = CF_OP_ALU_BREAK; break;
+   case cf_alu_else_after: type = CF_OP_ALU_ELSE_AFTER; break;
+   case cf_alu_continue: type = CF_OP_ALU_CONTINUE; break;
+   case cf_alu_extended: type = CF_OP_ALU_EXT; break;
+   default:
+      assert(0 && "cf_alu_undefined should have been replaced");
+   }
+
+   if (alu.last)
+      m_nliterals_in_group = 0;
+
+   bool retval = !r600_bytecode_add_alu_type(m_bc, &alu, type);
+
+   if (ai.opcode() == op1_mova_int)
+      m_bc->ar_loaded = 0;
+
+   if (ai.opcode() == op1_set_cf_idx0)
+      m_bc->index_loaded[0] = 1;
+
+   if (ai.opcode() == op1_set_cf_idx1)
+      m_bc->index_loaded[1] = 1;
+
+
+   m_bc->force_add_cf |= (ai.opcode() == op2_kille ||
+                          ai.opcode() == op2_killne_int ||
+                          ai.opcode() == op1_set_cf_idx0 ||
+                          ai.opcode() == op1_set_cf_idx1);
+   return retval;
+}
+
+bool AssemblyFromShaderLegacyImpl::emit_vs_pos_export(const ExportInstruction & exi)
+{
+   r600_bytecode_output output;
+   memset(&output, 0, sizeof(output));
+   assert(exi.gpr().type() == Value::gpr_vector);
+   const auto& gpr = exi.gpr();
+   output.gpr = gpr.sel();
+   output.elem_size = 3;
+   output.swizzle_x = gpr.chan_i(0);
+   output.swizzle_y = gpr.chan_i(1);
+   output.swizzle_z = gpr.chan_i(2);
+   output.swizzle_w = gpr.chan_i(3);
+   output.burst_count = 1;
+   output.array_base = 60 + exi.location();
+   output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
+   output.type = exi.export_type();
+
+
+   if (r600_bytecode_add_output(m_bc, &output)) {
+      R600_ERR("Error adding pixel export at location %d\n", exi.location());
+      return false;
+   }
+
+   return true;
+}
+
+
+bool AssemblyFromShaderLegacyImpl::emit_vs_param_export(const ExportInstruction & exi)
+{
+   r600_bytecode_output output;
+   assert(exi.gpr().type() == Value::gpr_vector);
+   const auto& gpr = exi.gpr();
+
+   memset(&output, 0, sizeof(output));
+   output.gpr = gpr.sel();
+   output.elem_size = 3;
+   output.swizzle_x = gpr.chan_i(0);
+   output.swizzle_y = gpr.chan_i(1);
+   output.swizzle_z = gpr.chan_i(2);
+   output.swizzle_w = gpr.chan_i(3);
+   output.burst_count = 1;
+   output.array_base = exi.location();
+   output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
+   output.type = exi.export_type();
+
+
+   if (r600_bytecode_add_output(m_bc, &output)) {
+      R600_ERR("Error adding pixel export at location %d\n", exi.location());
+      return false;
+   }
+
+   return true;
+}
+
+
+bool AssemblyFromShaderLegacyImpl::emit_fs_pixel_export(const ExportInstruction & exi)
+{
+   if (exi.location() >= m_max_color_exports && exi.location()  < 60) {
+      R600_ERR("shader_from_nir: ignore pixel export %u, because supported max is %u\n",
+               exi.location(), m_max_color_exports);
+      return true;
+   }
+
+   assert(exi.gpr().type() == Value::gpr_vector);
+   const auto& gpr = exi.gpr();
+
+   r600_bytecode_output output;
+   memset(&output, 0, sizeof(output));
+
+   output.gpr = gpr.sel();
+   output.elem_size = 3;
+   output.swizzle_x = gpr.chan_i(0);
+   output.swizzle_y = gpr.chan_i(1);
+   output.swizzle_z = gpr.chan_i(2);
+   output.swizzle_w = m_key->ps.alpha_to_one ? 5 : gpr.chan_i(3); ;
+   output.burst_count = 1;
+   output.array_base = exi.location();
+   output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
+   output.type = exi.export_type();
+
+
+   if (r600_bytecode_add_output(m_bc, &output)) {
+      R600_ERR("Error adding pixel export at location %d\n", exi.location());
+      return false;
+   }
+
+   return true;
+}
+
+
+bool AssemblyFromShaderLegacyImpl::visit(const ExportInstruction & exi)
+{
+   switch (exi.export_type()) {
+   case ExportInstruction::et_pixel:
+      return emit_fs_pixel_export(exi);
+   case ExportInstruction::et_pos:
+      return emit_vs_pos_export(exi);
+   case ExportInstruction::et_param:
+      return emit_vs_param_export(exi);
+   default:
+      R600_ERR("shader_from_nir: export %d type not yet supported\n", exi.export_type());
+      return false;
+   }
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const IfInstruction & if_instr)
+{
+   int elems = m_callstack.push(FC_PUSH_VPM);
+   bool needs_workaround = false;
+
+   if (m_bc->chip_class == CAYMAN && m_bc->stack.loop > 1)
+      needs_workaround = true;
+
+   if (m_bc->family != CHIP_HEMLOCK &&
+       m_bc->family != CHIP_CYPRESS &&
+       m_bc->family != CHIP_JUNIPER) {
+      unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size;
+      unsigned dmod2 = (elems) % m_bc->stack.entry_size;
+
+      if (elems && (!dmod1 || !dmod2))
+         needs_workaround = true;
+   }
+
+   auto& pred = if_instr.pred();
+
+   if (needs_workaround) {
+      r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH);
+      m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
+      auto new_pred = pred;
+      new_pred.set_cf_type(cf_alu);
+      visit(new_pred);
+   } else
+      visit(pred);
+
+   r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
+
+   m_jump_tracker.push(m_bc->cf_last, jt_if);
+   return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(UNUSED const ElseInstruction & else_instr)
+{
+   r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE);
+   m_bc->cf_last->pop_count = 1;
+   return m_jump_tracker.add_mid(m_bc->cf_last, jt_if);
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(UNUSED const IfElseEndInstruction & endif_instr)
+{
+   m_callstack.pop(FC_PUSH_VPM);
+
+   unsigned force_pop = m_bc->force_add_cf;
+   if (!force_pop) {
+      int alu_pop = 3;
+      if (m_bc->cf_last) {
+         if (m_bc->cf_last->op == CF_OP_ALU)
+            alu_pop = 0;
+         else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER)
+            alu_pop = 1;
+      }
+      alu_pop += 1;
+      if (alu_pop == 1) {
+         m_bc->cf_last->op = CF_OP_ALU_POP_AFTER;
+         m_bc->force_add_cf = 1;
+      } else if (alu_pop == 2) {
+         m_bc->cf_last->op = CF_OP_ALU_POP2_AFTER;
+         m_bc->force_add_cf = 1;
+      } else {
+         force_pop = 1;
+      }
+   }
+
+   if (force_pop) {
+      r600_bytecode_add_cfinst(m_bc, CF_OP_POP);
+      m_bc->cf_last->pop_count = 1;
+      m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
+   }
+
+   return m_jump_tracker.pop(m_bc->cf_last, jt_if);
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopBeginInstruction& instr)
+{
+   r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10);
+   m_jump_tracker.push(m_bc->cf_last, jt_loop);
+   m_callstack.push(FC_LOOP);
+   ++m_loop_nesting;
+   return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopEndInstruction& instr)
+{
+   r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END);
+   m_callstack.pop(FC_LOOP);
+   assert(m_loop_nesting);
+   --m_loop_nesting;
+   return m_jump_tracker.pop(m_bc->cf_last, jt_loop);
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopBreakInstruction& instr)
+{
+   r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK);
+   return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(UNUSED const LoopContInstruction &instr)
+{
+   r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE);
+   return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const StreamOutIntruction& so_instr)
+{
+   struct r600_bytecode_output output;
+   memset(&output, 0, sizeof(struct r600_bytecode_output));
+
+   output.gpr = so_instr.gpr().sel();
+   output.elem_size = so_instr.element_size();
+   output.array_base = so_instr.array_base();
+   output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
+   output.burst_count = so_instr.burst_count();
+   output.array_size = so_instr.array_size();
+   output.comp_mask = so_instr.comp_mask();
+   output.op = so_instr.op();
+
+   assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3);
+
+
+   if (r600_bytecode_add_output(m_bc, &output))  {
+      R600_ERR("shader_from_nir: Error creating stream output instruction\n");
+      return false;
+   }
+   return true;
+}
+
+
+bool AssemblyFromShaderLegacyImpl::visit(const MemRingOutIntruction& instr)
+{
+   struct r600_bytecode_output output;
+   memset(&output, 0, sizeof(struct r600_bytecode_output));
+
+   output.gpr = instr.gpr().sel();
+   output.type = instr.type();
+   output.elem_size = 3;
+   output.comp_mask = 0xf;
+   output.burst_count = 1;
+   output.op = instr.op();
+   if (instr.type() == mem_write_ind || instr.type() == mem_write_ind_ack) {
+      output.index_gpr = instr.index_reg();
+      output.array_size = 0xfff;
+   }
+   output.array_base = instr.array_base();
+
+   if (r600_bytecode_add_output(m_bc, &output)) {
+      R600_ERR("shader_from_nir: Error creating mem ring write instruction\n");
+      return false;
+   }
+   return true;
+}
+
+
+bool AssemblyFromShaderLegacyImpl::visit(const TexInstruction & tex_instr)
+{
+   auto addr = tex_instr.sampler_offset();
+   if (addr && (!m_bc->index_loaded[1] || m_loop_nesting
+                ||  m_bc->index_reg[1] != addr->sel()
+                ||  m_bc->index_reg_chan[1] != addr->chan())) {
+      struct r600_bytecode_alu alu;
+      memset(&alu, 0, sizeof(alu));
+      alu.op = opcode_map.at(op1_mova_int);
+      alu.dst.chan = 0;
+      alu.src[0].sel = addr->sel();
+      alu.src[0].chan = addr->chan();
+      alu.last = 1;
+      int r = r600_bytecode_add_alu(m_bc, &alu);
+      if (r)
+         return false;
+
+      m_bc->ar_loaded = 0;
+
+      alu.op = opcode_map.at(op1_set_cf_idx1);
+      alu.dst.chan = 0;
+      alu.src[0].sel = 0;
+      alu.src[0].chan = 0;
+      alu.last = 1;
+
+      r = r600_bytecode_add_alu(m_bc, &alu);
+      if (r)
+         return false;
+
+      m_bc->index_reg[1] = addr->sel();
+      m_bc->index_reg_chan[1] = addr->chan();
+      m_bc->index_loaded[1] = true;
+   }
+
+   r600_bytecode_tex tex;
+   memset(&tex, 0, sizeof(struct r600_bytecode_tex));
+   tex.op = tex_instr.opcode();
+   tex.sampler_id = tex_instr.sampler_id();
+   tex.sampler_index_mode = 0;
+   tex.resource_id = tex_instr.resource_id();;
+   tex.resource_index_mode = 0;
+   tex.src_gpr = tex_instr.src().sel();
+   tex.dst_gpr = tex_instr.dst().sel();
+   tex.dst_sel_x = tex_instr.dest_swizzle(0);
+   tex.dst_sel_y = tex_instr.dest_swizzle(1);
+   tex.dst_sel_z = tex_instr.dest_swizzle(2);
+   tex.dst_sel_w = tex_instr.dest_swizzle(3);
+   tex.src_sel_x = tex_instr.src().chan_i(0);
+   tex.src_sel_y = tex_instr.src().chan_i(1);
+   tex.src_sel_z = tex_instr.src().chan_i(2);
+   tex.src_sel_w = tex_instr.src().chan_i(3);
+   tex.coord_type_x = !tex_instr.has_flag(TexInstruction::x_unnormalized);
+   tex.coord_type_y = !tex_instr.has_flag(TexInstruction::y_unnormalized);
+   tex.coord_type_z = !tex_instr.has_flag(TexInstruction::z_unnormalized);
+   tex.coord_type_w = !tex_instr.has_flag(TexInstruction::w_unnormalized);
+   tex.offset_x = tex_instr.get_offset(0);
+   tex.offset_y = tex_instr.get_offset(1);
+   tex.offset_z = tex_instr.get_offset(2);
+   tex.resource_index_mode = (!!addr) ? 2 : 0;
+   tex.sampler_index_mode = tex.resource_index_mode;
+
+   if (tex_instr.opcode() == TexInstruction::get_gradient_h ||
+       tex_instr.opcode() == TexInstruction::get_gradient_v)
+      tex.inst_mod = tex_instr.has_flag(TexInstruction::grad_fine) ? 1 : 0;
+   else
+      tex.inst_mod = tex_instr.inst_mode();
+   if (r600_bytecode_add_tex(m_bc, &tex)) {
+      R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
+      return false;
+   }
+   return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const FetchInstruction& fetch_instr)
+{
+   int buffer_offset = 0;
+   auto addr = fetch_instr.buffer_offset();
+   auto index_mode = fetch_instr.buffer_index_mode();
+
+   if (addr) {
+      if (addr->type() == Value::literal) {
+         const auto& boffs = static_cast<const LiteralValue&>(*addr);
+         buffer_offset = boffs.value();
+      } else {
+         index_mode = emit_index_reg(*addr, 0);
+      }
+   }
+
+   if (fetch_instr.has_prelude()) {
+      for(auto &i : fetch_instr.prelude()) {
+         if (!i->accept(*this))
+            return false;
+      }
+   }
+
+   if (vtx_fetch_results.find(fetch_instr.src().sel()) !=
+       vtx_fetch_results.end()) {
+      m_bc->force_add_cf = 1;
+      vtx_fetch_results.clear();
+   }
+   vtx_fetch_results.insert(fetch_instr.dst().sel());
+
+   struct r600_bytecode_vtx vtx;
+   memset(&vtx, 0, sizeof(vtx));
+   vtx.op = fetch_instr.vc_opcode();
+   vtx.buffer_id = fetch_instr.buffer_id() + buffer_offset;
+   vtx.fetch_type = fetch_instr.fetch_type();
+   vtx.src_gpr = fetch_instr.src().sel();
+   vtx.src_sel_x = fetch_instr.src().chan();
+   vtx.mega_fetch_count = fetch_instr.mega_fetch_count();
+   vtx.dst_gpr = fetch_instr.dst().sel();
+   vtx.dst_sel_x = fetch_instr.swz(0);		/* SEL_X */
+   vtx.dst_sel_y = fetch_instr.swz(1);		/* SEL_Y */
+   vtx.dst_sel_z = fetch_instr.swz(2);		/* SEL_Z */
+   vtx.dst_sel_w = fetch_instr.swz(3);		/* SEL_W */
+   vtx.use_const_fields = fetch_instr.use_const_fields();
+   vtx.data_format = fetch_instr.data_format();
+   vtx.num_format_all = fetch_instr.num_format();		/* NUM_FORMAT_SCALED */
+   vtx.format_comp_all = fetch_instr.is_signed();	/* FORMAT_COMP_SIGNED */
+   vtx.endian = fetch_instr.endian_swap();
+   vtx.buffer_index_mode = index_mode;
+   vtx.offset = fetch_instr.offset();
+   vtx.indexed = fetch_instr.indexed();
+   vtx.uncached = fetch_instr.uncached();
+   vtx.elem_size = fetch_instr.elm_size();
+   vtx.array_base = fetch_instr.array_base();
+   vtx.array_size = fetch_instr.array_size();
+   vtx.srf_mode_all = fetch_instr.srf_mode_no_zero();
+
+   if (fetch_instr.use_tc()) {
+      if ((r600_bytecode_add_vtx_tc(m_bc, &vtx))) {
+         R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
+         return false;
+      }
+
+   } else {
+      if ((r600_bytecode_add_vtx(m_bc, &vtx))) {
+         R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
+         return false;
+      }
+   }
+
+   m_bc->cf_last->vpm = fetch_instr.use_vpm();
+   m_bc->cf_last->barrier = 1;
+
+   return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const EmitVertex &instr)
+{
+   int r = r600_bytecode_add_cfinst(m_bc, instr.op());
+   if (!r)
+      m_bc->cf_last->count = instr.stream();
+   assert(m_bc->cf_last->count < 4);
+
+   return r == 0;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const WaitAck& instr)
+{
+   int r = r600_bytecode_add_cfinst(m_bc, instr.op());
+   if (!r)
+      m_bc->cf_last->cf_addr = instr.n_ack();
+
+   return r == 0;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const WriteScratchInstruction& instr)
+{
+   struct r600_bytecode_output cf;
+
+   memset(&cf, 0, sizeof(struct r600_bytecode_output));
+
+   cf.op = CF_OP_MEM_SCRATCH;
+   cf.elem_size = 3;
+   cf.gpr = instr.gpr().sel();
+   cf.mark = 1;
+   cf.comp_mask = instr.write_mask();
+   cf.swizzle_x = 0;
+   cf.swizzle_y = 1;
+   cf.swizzle_z = 2;
+   cf.swizzle_w = 3;
+   cf.burst_count = 1;
+
+   if (instr.indirect()) {
+      cf.type = 3;
+      cf.index_gpr = instr.address();
+
+      /* The docu seems to be wrong here: In indirect addressing the
+       * address_base seems to be the array_size */
+      cf.array_size = instr.array_size();
+   } else {
+      cf.type = 2;
+      cf.array_base = instr.location();
+   }
+   /* This should be 0, but the address calculation is apparently wrong */
+
+
+   if (r600_bytecode_add_output(m_bc, &cf)){
+      R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n");
+      return false;
+   }
+
+   return true;
+}
+
+extern const std::map<ESDOp, int> ds_opcode_map;
+
+bool AssemblyFromShaderLegacyImpl::visit(const GDSInstr& instr)
+{
+   struct r600_bytecode_gds gds;
+
+   int uav_idx = -1;
+   auto addr = instr.uav_id();
+   if (addr->type() != Value::literal) {
+      emit_index_reg(*addr, 1);
+   } else {
+      const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
+      uav_idx = addr_reg.value();
+   }
+
+   memset(&gds, 0, sizeof(struct r600_bytecode_gds));
+
+   gds.op = ds_opcode_map.at(instr.op());
+   gds.dst_gpr = instr.dest_sel();
+   gds.uav_id = (uav_idx >= 0 ? uav_idx : 0) + instr.uav_base();
+   gds.uav_index_mode = uav_idx >= 0 ? bim_none : bim_one;
+   gds.src_gpr = instr.src_sel();
+
+   gds.src_sel_x = instr.src_swizzle(0);
+   gds.src_sel_y = instr.src_swizzle(1);
+   gds.src_sel_z = instr.src_swizzle(2);
+
+   gds.dst_sel_x = instr.dest_swizzle(0);
+   gds.dst_sel_y = 7;
+   gds.dst_sel_z = 7;
+   gds.dst_sel_w = 7;
+   gds.src_gpr2 = 0;
+   gds.alloc_consume = 1; // Not Cayman
+
+   int r = r600_bytecode_add_gds(m_bc, &gds);
+   if (r)
+      return false;
+   m_bc->cf_last->vpm = 1;
+   m_bc->cf_last->barrier = 1;
+   return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const GDSStoreTessFactor& instr)
+{
+   struct r600_bytecode_gds gds;
+
+   memset(&gds, 0, sizeof(struct r600_bytecode_gds));
+   gds.src_gpr = instr.sel();
+   gds.src_sel_x = instr.chan(0);
+   gds.src_sel_y = instr.chan(1);
+   gds.src_sel_z = 4;
+   gds.dst_sel_x = 7;
+   gds.dst_sel_y = 7;
+   gds.dst_sel_z = 7;
+   gds.dst_sel_w = 7;
+   gds.op = FETCH_OP_TF_WRITE;
+
+   if (r600_bytecode_add_gds(m_bc, &gds) != 0)
+         return false;
+
+   if (instr.chan(2) != 7) {
+      memset(&gds, 0, sizeof(struct r600_bytecode_gds));
+      gds.src_gpr = instr.sel();
+      gds.src_sel_x = instr.chan(2);
+      gds.src_sel_y = instr.chan(3);
+      gds.src_sel_z = 4;
+      gds.dst_sel_x = 7;
+      gds.dst_sel_y = 7;
+      gds.dst_sel_z = 7;
+      gds.dst_sel_w = 7;
+      gds.op = FETCH_OP_TF_WRITE;
+
+      if (r600_bytecode_add_gds(m_bc, &gds))
+         return false;
+   }
+   return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const LDSWriteInstruction& instr)
+{
+   r600_bytecode_alu alu;
+   memset(&alu, 0, sizeof(r600_bytecode_alu));
+
+   alu.last = true;
+   alu.is_lds_idx_op = true;
+   copy_src(alu.src[0], instr.address());
+   copy_src(alu.src[1], instr.value0());
+
+   if (instr.num_components() == 1) {
+      alu.op = LDS_OP2_LDS_WRITE;
+   } else {
+      alu.op = LDS_OP3_LDS_WRITE_REL;
+      alu.lds_idx = 1;
+      copy_src(alu.src[2], instr.value1());
+   }
+
+   return r600_bytecode_add_alu(m_bc, &alu) == 0;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const LDSReadInstruction& instr)
+{
+   int r;
+   unsigned nread = 0;
+   unsigned nfetch = 0;
+   unsigned n_values = instr.num_values();
+
+   r600_bytecode_alu alu_fetch;
+   r600_bytecode_alu alu_read;
+
+   /* We must add a new ALU clause if the fetch and read op would be split otherwise
+    * r600_asm limits at 120 slots = 240 dwords */
+   if (m_bc->cf_last->ndw > 240 - 4 * n_values)
+      m_bc->force_add_cf = 1;
+
+   while (nread < n_values) {
+      if (nfetch < n_values) {
+         memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
+         alu_fetch.is_lds_idx_op = true;
+         alu_fetch.op = LDS_OP1_LDS_READ_RET;
+
+         copy_src(alu_fetch.src[0], instr.address(nfetch));
+         alu_fetch.src[1].sel = V_SQ_ALU_SRC_0;
+         alu_fetch.src[2].sel = V_SQ_ALU_SRC_0;
+         alu_fetch.last = 1;
+         r = r600_bytecode_add_alu(m_bc, &alu_fetch);
+         m_bc->cf_last->nlds_read++;
+         if (r)
+            return false;
+      }
+
+      if (nfetch >= n_values) {
+         memset(&alu_read, 0, sizeof(r600_bytecode_alu));
+         copy_dst(alu_read.dst, instr.dest(nread));
+         alu_read.op = ALU_OP1_MOV;
+         alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
+         alu_read.last = 1;
+         alu_read.dst.write = 1;
+         r = r600_bytecode_add_alu(m_bc, &alu_read);
+         m_bc->cf_last->nqueue_read++;
+         if (r)
+            return false;
+         ++nread;
+      }
+      ++nfetch;
+   }
+   assert(m_bc->cf_last->nlds_read == m_bc->cf_last->nqueue_read);
+
+   return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const LDSAtomicInstruction& instr)
+{
+   if (m_bc->cf_last->ndw > 240 - 4)
+      m_bc->force_add_cf = 1;
+
+   r600_bytecode_alu alu_fetch;
+   r600_bytecode_alu alu_read;
+
+   memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
+   alu_fetch.is_lds_idx_op = true;
+   alu_fetch.op = instr.op();
+
+   copy_src(alu_fetch.src[0], instr.address());
+   copy_src(alu_fetch.src[1], instr.src0());
+
+   if (instr.src1())
+      copy_src(alu_fetch.src[2], *instr.src1());
+   alu_fetch.last = 1;
+   int r = r600_bytecode_add_alu(m_bc, &alu_fetch);
+   if (r)
+      return false;
+
+   memset(&alu_read, 0, sizeof(r600_bytecode_alu));
+   copy_dst(alu_read.dst, instr.dest());
+   alu_read.op = ALU_OP1_MOV;
+   alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
+   alu_read.last = 1;
+   alu_read.dst.write = 1;
+   r = r600_bytecode_add_alu(m_bc, &alu_read);
+   if (r)
+      return false;
+   return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::visit(const RatInstruction& instr)
+{
+   struct r600_bytecode_gds gds;
+
+   int rat_idx = instr.rat_id();
+   EBufferIndexMode rat_index_mode = bim_none;
+   auto addr = instr.rat_id_offset();
+
+   if (addr) {
+      if (addr->type() != Value::literal) {
+         rat_index_mode = emit_index_reg(*addr, 1);
+      } else {
+         const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
+         rat_idx += addr_reg.value();
+      }
+   }
+   memset(&gds, 0, sizeof(struct r600_bytecode_gds));
+
+   r600_bytecode_add_cfinst(m_bc, instr.cf_opcode());
+   auto cf = m_bc->cf_last;
+   cf->rat.id = rat_idx + m_shader->rat_base;
+   cf->rat.inst = instr.rat_op();
+   cf->rat.index_mode = rat_index_mode;
+   cf->output.type = instr.need_ack() ? 3 : 1;
+   cf->output.gpr = instr.data_gpr();
+   cf->output.index_gpr = instr.index_gpr();
+   cf->output.comp_mask = instr.comp_mask();
+   cf->output.burst_count = instr.burst_count();
+   assert(instr.data_swz(0) == PIPE_SWIZZLE_X);
+   if (cf->rat.inst != RatInstruction::STORE_TYPED) {
+      assert(instr.data_swz(1) == PIPE_SWIZZLE_Y ||
+             instr.data_swz(1) == PIPE_SWIZZLE_MAX) ;
+      assert(instr.data_swz(2) == PIPE_SWIZZLE_Z ||
+             instr.data_swz(2) == PIPE_SWIZZLE_MAX) ;
+   }
+
+   cf->vpm = 1;
+   cf->barrier = 1;
+   cf->mark = instr.need_ack();
+   cf->output.elem_size = instr.elm_size();
+   return true;
+}
+
+EBufferIndexMode
+AssemblyFromShaderLegacyImpl::emit_index_reg(const Value& addr, unsigned idx)
+{
+   assert(idx < 2);
+
+   EAluOp idxop = idx ? op1_set_cf_idx1 : op1_set_cf_idx0;
+
+   if (!m_bc->index_loaded[idx] || m_loop_nesting ||
+       m_bc->index_reg[idx] != addr.sel()
+       ||  m_bc->index_reg_chan[idx] != addr.chan()) {
+      struct r600_bytecode_alu alu;
+
+      // Make sure MOVA is not last instr in clause
+      if ((m_bc->cf_last->ndw>>1) >= 110)
+              m_bc->force_add_cf = 1;
+
+      memset(&alu, 0, sizeof(alu));
+      alu.op = opcode_map.at(op1_mova_int);
+      alu.dst.chan = 0;
+      alu.src[0].sel = addr.sel();
+      alu.src[0].chan = addr.chan();
+      alu.last = 1;
+      sfn_log << SfnLog::assembly << "   mova_int, ";
+      int r = r600_bytecode_add_alu(m_bc, &alu);
+      if (r)
+         return bim_invalid;
+
+      m_bc->ar_loaded = 0;
+
+      alu.op = opcode_map.at(idxop);
+      alu.dst.chan = 0;
+      alu.src[0].sel = 0;
+      alu.src[0].chan = 0;
+      alu.last = 1;
+      sfn_log << SfnLog::assembly << "op1_set_cf_idx" << idx;
+      r = r600_bytecode_add_alu(m_bc, &alu);
+      if (r)
+         return bim_invalid;
+
+      m_bc->index_reg[idx] = addr.sel();
+      m_bc->index_reg_chan[idx] = addr.chan();
+      m_bc->index_loaded[idx] = true;
+      sfn_log << SfnLog::assembly << "\n";
+   }
+   return idx == 0 ? bim_zero : bim_one;
+}
+
+bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst,
+                                            const Value& d)
+{
+   assert(d.type() == Value::gpr || d.type() == Value::gpr_array_value);
+
+   if (d.sel() > 124) {
+      R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", d.sel());
+      return false;
+   }
+
+   dst.sel = d.sel();
+   dst.chan = d.chan();
+
+   if (m_bc->index_reg[1] == dst.sel &&
+       m_bc->index_reg_chan[1] == dst.chan)
+      m_bc->index_loaded[1] = false;
+
+   if (m_bc->index_reg[0] == dst.sel &&
+       m_bc->index_reg_chan[0] == dst.chan)
+      m_bc->index_loaded[0] = false;
+
+   return true;
+}
+
+bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src& src, const Value& s)
+{
+
+   if (s.type() == Value::gpr && s.sel() > 124) {
+      R600_ERR("shader_from_nir: Don't support more then 124 GPRs, try using %d\n", s.sel());
+      return false;
+   }
+
+   if (s.type() == Value::lds_direct)  {
+      R600_ERR("shader_from_nir: LDS_DIRECT values not supported\n");
+      return false;
+   }
+
+   if (s.type() == Value::kconst && s.sel() < 512)  {
+      R600_ERR("shader_from_nir: Uniforms should have values >= 512, got %d \n", s.sel());
+      return false;
+   }
+
+   if (s.type() == Value::literal) {
+      auto& v = static_cast<const LiteralValue&>(s);
+      if (v.value() == 0) {
+         src.sel = ALU_SRC_0;
+         src.chan = 0;
+         --m_nliterals_in_group;
+         return true;
+      }
+      if (v.value() == 1) {
+         src.sel = ALU_SRC_1_INT;
+         src.chan = 0;
+         --m_nliterals_in_group;
+         return true;
+      }
+      if (v.value_float() == 1.0f) {
+         src.sel = ALU_SRC_1;
+         src.chan = 0;
+         --m_nliterals_in_group;
+         return true;
+      }
+      if (v.value_float() == 0.5f) {
+         src.sel = ALU_SRC_0_5;
+         src.chan = 0;
+         --m_nliterals_in_group;
+         return true;
+      }
+      if (v.value() == 0xffffffff) {
+         src.sel = ALU_SRC_M_1_INT;
+         src.chan = 0;
+         --m_nliterals_in_group;
+         return true;
+      }
+      src.value = v.value();
+   }
+
+   src.sel = s.sel();
+   src.chan = s.chan();
+   if (s.type() == Value::kconst) {
+      const UniformValue& cv = static_cast<const UniformValue&>(s);
+      src.kc_bank = cv.kcache_bank();
+      auto addr = cv.addr();
+      if (addr) {
+         src.kc_rel = 1;
+         emit_index_reg(*addr, 0);
+         auto type = m_bc->cf_last->op;
+         if (r600_bytecode_add_cf(m_bc)) {
+                 return false;
+         }
+         m_bc->cf_last->op = type;
+      }
+   }
+
+   return true;
+}
+
+const std::map<EAluOp, int> opcode_map = {
+
+   {op2_add, ALU_OP2_ADD},
+   {op2_mul, ALU_OP2_MUL},
+   {op2_mul_ieee, ALU_OP2_MUL_IEEE},
+   {op2_max, ALU_OP2_MAX},
+   {op2_min, ALU_OP2_MIN},
+   {op2_max_dx10, ALU_OP2_MAX_DX10},
+   {op2_min_dx10, ALU_OP2_MIN_DX10},
+   {op2_sete, ALU_OP2_SETE},
+   {op2_setgt, ALU_OP2_SETGT},
+   {op2_setge, ALU_OP2_SETGE},
+   {op2_setne, ALU_OP2_SETNE},
+   {op2_sete_dx10, ALU_OP2_SETE_DX10},
+   {op2_setgt_dx10, ALU_OP2_SETGT_DX10},
+   {op2_setge_dx10, ALU_OP2_SETGE_DX10},
+   {op2_setne_dx10, ALU_OP2_SETNE_DX10},
+   {op1_fract, ALU_OP1_FRACT},
+   {op1_trunc, ALU_OP1_TRUNC},
+   {op1_ceil, ALU_OP1_CEIL},
+   {op1_rndne, ALU_OP1_RNDNE},
+   {op1_floor, ALU_OP1_FLOOR},
+   {op2_ashr_int, ALU_OP2_ASHR_INT},
+   {op2_lshr_int, ALU_OP2_LSHR_INT},
+   {op2_lshl_int, ALU_OP2_LSHL_INT},
+   {op1_mov, ALU_OP1_MOV},
+   {op0_nop, ALU_OP0_NOP},
+   {op2_mul_64, ALU_OP2_MUL_64},
+   {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
+   {op1v_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
+   {op2_pred_setgt_uint, ALU_OP2_PRED_SETGT_UINT},
+   {op2_pred_setge_uint, ALU_OP2_PRED_SETGE_UINT},
+   {op2_pred_sete, ALU_OP2_PRED_SETE},
+   {op2_pred_setgt, ALU_OP2_PRED_SETGT},
+   {op2_pred_setge, ALU_OP2_PRED_SETGE},
+   {op2_pred_setne, ALU_OP2_PRED_SETNE},
+   //{op2_pred_set_inv, ALU_OP2_PRED_SET},
+   //{op2_pred_set_clr, ALU_OP2_PRED_SET_CRL},
+   //{op2_pred_set_restore, ALU_OP2_PRED_SET_RESTORE},
+   {op2_pred_sete_push, ALU_OP2_PRED_SETE_PUSH},
+   {op2_pred_setgt_push, ALU_OP2_PRED_SETGT_PUSH},
+   {op2_pred_setge_push, ALU_OP2_PRED_SETGE_PUSH},
+   {op2_pred_setne_push, ALU_OP2_PRED_SETNE_PUSH},
+   {op2_kille, ALU_OP2_KILLE},
+   {op2_killgt, ALU_OP2_KILLGT},
+   {op2_killge, ALU_OP2_KILLGE},
+   {op2_killne, ALU_OP2_KILLNE},
+   {op2_and_int, ALU_OP2_AND_INT},
+   {op2_or_int, ALU_OP2_OR_INT},
+   {op2_xor_int, ALU_OP2_XOR_INT},
+   {op1_not_int, ALU_OP1_NOT_INT},
+   {op2_add_int, ALU_OP2_ADD_INT},
+   {op2_sub_int, ALU_OP2_SUB_INT},
+   {op2_max_int, ALU_OP2_MAX_INT},
+   {op2_min_int, ALU_OP2_MIN_INT},
+   {op2_max_uint, ALU_OP2_MAX_UINT},
+   {op2_min_uint, ALU_OP2_MIN_UINT},
+   {op2_sete_int, ALU_OP2_SETE_INT},
+   {op2_setgt_int, ALU_OP2_SETGT_INT},
+   {op2_setge_int, ALU_OP2_SETGE_INT},
+   {op2_setne_int, ALU_OP2_SETNE_INT},
+   {op2_setgt_uint, ALU_OP2_SETGT_UINT},
+   {op2_setge_uint, ALU_OP2_SETGE_UINT},
+   {op2_killgt_uint, ALU_OP2_KILLGT_UINT},
+   {op2_killge_uint, ALU_OP2_KILLGE_UINT},
+   //p2_prede_int, ALU_OP2_PREDE_INT},
+   {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT},
+   {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT},
+   {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT},
+   {op2_kille_int, ALU_OP2_KILLE_INT},
+   {op2_killgt_int, ALU_OP2_KILLGT_INT},
+   {op2_killge_int, ALU_OP2_KILLGE_INT},
+   {op2_killne_int, ALU_OP2_KILLNE_INT},
+   {op2_pred_sete_push_int, ALU_OP2_PRED_SETE_PUSH_INT},
+   {op2_pred_setgt_push_int, ALU_OP2_PRED_SETGT_PUSH_INT},
+   {op2_pred_setge_push_int, ALU_OP2_PRED_SETGE_PUSH_INT},
+   {op2_pred_setne_push_int, ALU_OP2_PRED_SETNE_PUSH_INT},
+   {op2_pred_setlt_push_int, ALU_OP2_PRED_SETLT_PUSH_INT},
+   {op2_pred_setle_push_int, ALU_OP2_PRED_SETLE_PUSH_INT},
+   {op1_flt_to_int, ALU_OP1_FLT_TO_INT},
+   {op1_bfrev_int, ALU_OP1_BFREV_INT},
+   {op2_addc_uint, ALU_OP2_ADDC_UINT},
+   {op2_subb_uint, ALU_OP2_SUBB_UINT},
+   {op0_group_barrier, ALU_OP0_GROUP_BARRIER},
+   {op0_group_seq_begin, ALU_OP0_GROUP_SEQ_BEGIN},
+   {op0_group_seq_end, ALU_OP0_GROUP_SEQ_END},
+   {op2_set_mode, ALU_OP2_SET_MODE},
+   {op1_set_cf_idx0, ALU_OP0_SET_CF_IDX0},
+   {op1_set_cf_idx1, ALU_OP0_SET_CF_IDX1},
+   {op2_set_lds_size, ALU_OP2_SET_LDS_SIZE},
+   {op1_exp_ieee, ALU_OP1_EXP_IEEE},
+   {op1_log_clamped, ALU_OP1_LOG_CLAMPED},
+   {op1_log_ieee, ALU_OP1_LOG_IEEE},
+   {op1_recip_clamped, ALU_OP1_RECIP_CLAMPED},
+   {op1_recip_ff, ALU_OP1_RECIP_FF},
+   {op1_recip_ieee, ALU_OP1_RECIP_IEEE},
+   {op1_recipsqrt_clamped, ALU_OP1_RECIPSQRT_CLAMPED},
+   {op1_recipsqrt_ff, ALU_OP1_RECIPSQRT_FF},
+   {op1_recipsqrt_ieee1, ALU_OP1_RECIPSQRT_IEEE},
+   {op1_sqrt_ieee, ALU_OP1_SQRT_IEEE},
+   {op1_sin, ALU_OP1_SIN},
+   {op1_cos, ALU_OP1_COS},
+   {op2_mullo_int, ALU_OP2_MULLO_INT},
+   {op2_mulhi_int, ALU_OP2_MULHI_INT},
+   {op2_mullo_uint, ALU_OP2_MULLO_UINT},
+   {op2_mulhi_uint, ALU_OP2_MULHI_UINT},
+   {op1_recip_int, ALU_OP1_RECIP_INT},
+   {op1_recip_uint, ALU_OP1_RECIP_UINT},
+   {op1_recip_64, ALU_OP2_RECIP_64},
+   {op1_recip_clamped_64, ALU_OP2_RECIP_CLAMPED_64},
+   {op1_recipsqrt_64, ALU_OP2_RECIPSQRT_64},
+   {op1_recipsqrt_clamped_64, ALU_OP2_RECIPSQRT_CLAMPED_64},
+   {op1_sqrt_64, ALU_OP2_SQRT_64},
+   {op1_flt_to_uint, ALU_OP1_FLT_TO_UINT},
+   {op1_int_to_flt, ALU_OP1_INT_TO_FLT},
+   {op1_uint_to_flt, ALU_OP1_UINT_TO_FLT},
+   {op2_bfm_int, ALU_OP2_BFM_INT},
+   {op1_flt32_to_flt16, ALU_OP1_FLT32_TO_FLT16},
+   {op1_flt16_to_flt32, ALU_OP1_FLT16_TO_FLT32},
+   {op1_ubyte0_flt, ALU_OP1_UBYTE0_FLT},
+   {op1_ubyte1_flt, ALU_OP1_UBYTE1_FLT},
+   {op1_ubyte2_flt, ALU_OP1_UBYTE2_FLT},
+   {op1_ubyte3_flt, ALU_OP1_UBYTE3_FLT},
+   {op1_bcnt_int, ALU_OP1_BCNT_INT},
+   {op1_ffbh_uint, ALU_OP1_FFBH_UINT},
+   {op1_ffbl_int, ALU_OP1_FFBL_INT},
+   {op1_ffbh_int, ALU_OP1_FFBH_INT},
+   {op1_flt_to_uint4, ALU_OP1_FLT_TO_UINT4},
+   {op2_dot_ieee, ALU_OP2_DOT_IEEE},
+   {op1_flt_to_int_rpi, ALU_OP1_FLT_TO_INT_RPI},
+   {op1_flt_to_int_floor, ALU_OP1_FLT_TO_INT_FLOOR},
+   {op2_mulhi_uint24, ALU_OP2_MULHI_UINT24},
+   {op1_mbcnt_32hi_int, ALU_OP1_MBCNT_32HI_INT},
+   {op1_offset_to_flt, ALU_OP1_OFFSET_TO_FLT},
+   {op2_mul_uint24, ALU_OP2_MUL_UINT24},
+   {op1_bcnt_accum_prev_int, ALU_OP1_BCNT_ACCUM_PREV_INT},
+   {op1_mbcnt_32lo_accum_prev_int, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT},
+   {op2_sete_64, ALU_OP2_SETE_64},
+   {op2_setne_64, ALU_OP2_SETNE_64},
+   {op2_setgt_64, ALU_OP2_SETGT_64},
+   {op2_setge_64, ALU_OP2_SETGE_64},
+   {op2_min_64, ALU_OP2_MIN_64},
+   {op2_max_64, ALU_OP2_MAX_64},
+   {op2_dot4, ALU_OP2_DOT4},
+   {op2_dot4_ieee, ALU_OP2_DOT4_IEEE},
+   {op2_cube, ALU_OP2_CUBE},
+   {op1_max4, ALU_OP1_MAX4},
+   {op1_frexp_64, ALU_OP1_FREXP_64},
+   {op1_ldexp_64, ALU_OP2_LDEXP_64},
+   {op1_fract_64, ALU_OP1_FRACT_64},
+   {op2_pred_setgt_64, ALU_OP2_PRED_SETGT_64},
+   {op2_pred_sete_64, ALU_OP2_PRED_SETE_64},
+   {op2_pred_setge_64, ALU_OP2_PRED_SETGE_64},
+   {op2_add_64, ALU_OP2_ADD_64},
+   {op1_mova_int, ALU_OP1_MOVA_INT},
+   {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
+   {op1_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
+   {op2_sad_accum_prev_uint, ALU_OP2_SAD_ACCUM_PREV_UINT},
+   {op2_dot, ALU_OP2_DOT},
+   //p2_mul_prev, ALU_OP2_MUL_PREV},
+   //p2_mul_ieee_prev, ALU_OP2_MUL_IEEE_PREV},
+   //p2_add_prev, ALU_OP2_ADD_PREV},
+   {op2_muladd_prev, ALU_OP2_MULADD_PREV},
+   {op2_muladd_ieee_prev, ALU_OP2_MULADD_IEEE_PREV},
+   {op2_interp_xy, ALU_OP2_INTERP_XY},
+   {op2_interp_zw, ALU_OP2_INTERP_ZW},
+   {op2_interp_x, ALU_OP2_INTERP_X},
+   {op2_interp_z, ALU_OP2_INTERP_Z},
+   {op0_store_flags, ALU_OP1_STORE_FLAGS},
+   {op1_load_store_flags, ALU_OP1_LOAD_STORE_FLAGS},
+   {op0_lds_1a, ALU_OP2_LDS_1A},
+   {op0_lds_1a1d, ALU_OP2_LDS_1A1D},
+   {op0_lds_2a, ALU_OP2_LDS_2A},
+   {op1_interp_load_p0, ALU_OP1_INTERP_LOAD_P0},
+   {op1_interp_load_p10, ALU_OP1_INTERP_LOAD_P10},
+   {op1_interp_load_p20, ALU_OP1_INTERP_LOAD_P20},
+      // {op 3 all left shift 6
+   {op3_bfe_uint, ALU_OP3_BFE_UINT},
+   {op3_bfe_int, ALU_OP3_BFE_INT},
+   {op3_bfi_int, ALU_OP3_BFI_INT},
+   {op3_fma, ALU_OP3_FMA},
+   {op3_cndne_64, ALU_OP3_CNDNE_64},
+   {op3_fma_64, ALU_OP3_FMA_64},
+   {op3_lerp_uint, ALU_OP3_LERP_UINT},
+   {op3_bit_align_int, ALU_OP3_BIT_ALIGN_INT},
+   {op3_byte_align_int, ALU_OP3_BYTE_ALIGN_INT},
+   {op3_sad_accum_uint, ALU_OP3_SAD_ACCUM_UINT},
+   {op3_sad_accum_hi_uint, ALU_OP3_SAD_ACCUM_HI_UINT},
+   {op3_muladd_uint24, ALU_OP3_MULADD_UINT24},
+   {op3_lds_idx_op, ALU_OP3_LDS_IDX_OP},
+   {op3_muladd, ALU_OP3_MULADD},
+   {op3_muladd_m2, ALU_OP3_MULADD_M2},
+   {op3_muladd_m4, ALU_OP3_MULADD_M4},
+   {op3_muladd_d2, ALU_OP3_MULADD_D2},
+   {op3_muladd_ieee, ALU_OP3_MULADD_IEEE},
+   {op3_cnde, ALU_OP3_CNDE},
+   {op3_cndgt, ALU_OP3_CNDGT},
+   {op3_cndge, ALU_OP3_CNDGE},
+   {op3_cnde_int, ALU_OP3_CNDE_INT},
+   {op3_cndgt_int, ALU_OP3_CNDGT_INT},
+   {op3_cndge_int, ALU_OP3_CNDGE_INT},
+   {op3_mul_lit, ALU_OP3_MUL_LIT},
+};
+
+const std::map<ESDOp, int> ds_opcode_map = {
+   {DS_OP_ADD, FETCH_OP_GDS_ADD},
+   {DS_OP_SUB, FETCH_OP_GDS_SUB},
+   {DS_OP_RSUB, FETCH_OP_GDS_RSUB},
+   {DS_OP_INC, FETCH_OP_GDS_INC},
+   {DS_OP_DEC, FETCH_OP_GDS_DEC},
+   {DS_OP_MIN_INT, FETCH_OP_GDS_MIN_INT},
+   {DS_OP_MAX_INT, FETCH_OP_GDS_MAX_INT},
+   {DS_OP_MIN_UINT, FETCH_OP_GDS_MIN_UINT},
+   {DS_OP_MAX_UINT, FETCH_OP_GDS_MAX_UINT},
+   {DS_OP_AND, FETCH_OP_GDS_AND},
+   {DS_OP_OR, FETCH_OP_GDS_OR},
+   {DS_OP_XOR, FETCH_OP_GDS_XOR},
+   {DS_OP_MSKOR, FETCH_OP_GDS_MSKOR},
+   {DS_OP_WRITE, FETCH_OP_GDS_WRITE},
+   {DS_OP_WRITE_REL, FETCH_OP_GDS_WRITE_REL},
+   {DS_OP_WRITE2, FETCH_OP_GDS_WRITE2},
+   {DS_OP_CMP_STORE, FETCH_OP_GDS_CMP_STORE},
+   {DS_OP_CMP_STORE_SPF, FETCH_OP_GDS_CMP_STORE_SPF},
+   {DS_OP_BYTE_WRITE, FETCH_OP_GDS_BYTE_WRITE},
+   {DS_OP_SHORT_WRITE, FETCH_OP_GDS_SHORT_WRITE},
+   {DS_OP_ADD_RET, FETCH_OP_GDS_ADD_RET},
+   {DS_OP_SUB_RET, FETCH_OP_GDS_SUB_RET},
+   {DS_OP_RSUB_RET, FETCH_OP_GDS_RSUB_RET},
+   {DS_OP_INC_RET, FETCH_OP_GDS_INC_RET},
+   {DS_OP_DEC_RET, FETCH_OP_GDS_DEC_RET},
+   {DS_OP_MIN_INT_RET, FETCH_OP_GDS_MIN_INT_RET},
+   {DS_OP_MAX_INT_RET, FETCH_OP_GDS_MAX_INT_RET},
+   {DS_OP_MIN_UINT_RET, FETCH_OP_GDS_MIN_UINT_RET},
+   {DS_OP_MAX_UINT_RET, FETCH_OP_GDS_MAX_UINT_RET},
+   {DS_OP_AND_RET, FETCH_OP_GDS_AND_RET},
+   {DS_OP_OR_RET, FETCH_OP_GDS_OR_RET},
+   {DS_OP_XOR_RET, FETCH_OP_GDS_XOR_RET},
+   {DS_OP_MSKOR_RET, FETCH_OP_GDS_MSKOR_RET},
+   {DS_OP_XCHG_RET, FETCH_OP_GDS_XCHG_RET},
+   {DS_OP_XCHG_REL_RET, FETCH_OP_GDS_XCHG_REL_RET},
+   {DS_OP_XCHG2_RET, FETCH_OP_GDS_XCHG2_RET},
+   {DS_OP_CMP_XCHG_RET, FETCH_OP_GDS_CMP_XCHG_RET},
+   {DS_OP_CMP_XCHG_SPF_RET, FETCH_OP_GDS_CMP_XCHG_SPF_RET},
+   {DS_OP_READ_RET, FETCH_OP_GDS_READ_RET},
+   {DS_OP_READ_REL_RET, FETCH_OP_GDS_READ_REL_RET},
+   {DS_OP_READ2_RET, FETCH_OP_GDS_READ2_RET},
+   {DS_OP_READWRITE_RET, FETCH_OP_GDS_READWRITE_RET},
+   {DS_OP_BYTE_READ_RET, FETCH_OP_GDS_BYTE_READ_RET},
+   {DS_OP_UBYTE_READ_RET, FETCH_OP_GDS_UBYTE_READ_RET},
+   {DS_OP_SHORT_READ_RET, FETCH_OP_GDS_SHORT_READ_RET},
+   {DS_OP_USHORT_READ_RET, FETCH_OP_GDS_USHORT_READ_RET},
+   {DS_OP_ATOMIC_ORDERED_ALLOC_RET, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC},
+   {DS_OP_INVALID, 0},
+};
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h
new file mode 100644
index 000000000..0c82032e6
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_ir_to_assembly.h
@@ -0,0 +1,45 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "sfn_nir.h"
+
+struct r600_shader;
+union r600_shader_key;
+
+namespace r600 {
+
+class AssemblyFromShaderLegacy : public AssemblyFromShader {
+public:
+   AssemblyFromShaderLegacy(struct r600_shader *sh, r600_shader_key *key);
+   ~AssemblyFromShaderLegacy() override;
+private:
+   bool do_lower(const std::vector<InstructionBlock> &ir)  override ;
+
+   struct AssemblyFromShaderLegacyImpl *impl;
+};
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.cpp
new file mode 100644
index 000000000..28eef0593
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.cpp
@@ -0,0 +1,1006 @@
+/*
+ * Copyright (c) 2017-2019 Gert Wollny
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_liverange.h"
+#include "sfn_debug.h"
+#include "sfn_value.h"
+#include "sfn_value_gpr.h"
+
+#include "program/prog_instruction.h"
+#include "util/bitscan.h"
+#include "util/u_math.h"
+
+#include <limits>
+#include <cstdlib>
+#include <iomanip>
+
+/* std::sort is significantly faster than qsort */
+#include <algorithm>
+
+/* If <windows.h> is included this is defined and clashes with
+ * std::numeric_limits<>::max()
+ */
+#ifdef max
+#undef max
+#endif
+
+
+namespace r600 {
+
+using std::numeric_limits;
+using std::unique_ptr;
+using std::setw;
+
+prog_scope_storage::prog_scope_storage(int n):
+   current_slot(0),
+   storage(n)
+{
+}
+
+prog_scope_storage::~prog_scope_storage()
+{
+}
+
+prog_scope*
+prog_scope_storage::create(prog_scope *p, prog_scope_type type, int id,
+                           int lvl, int s_begin)
+{
+   storage[current_slot] = prog_scope(p, type, id, lvl, s_begin);
+   return &storage[current_slot++];
+}
+
+prog_scope::prog_scope(prog_scope *parent, prog_scope_type type, int id,
+                       int depth, int scope_begin):
+   scope_type(type),
+   scope_id(id),
+   scope_nesting_depth(depth),
+   scope_begin(scope_begin),
+   scope_end(-1),
+   break_loop_line(numeric_limits<int>::max()),
+   parent_scope(parent)
+{
+}
+
+prog_scope::prog_scope():
+   prog_scope(nullptr, undefined_scope, -1, -1, -1)
+{
+}
+
+prog_scope_type prog_scope::type() const
+{
+   return scope_type;
+}
+
+prog_scope *prog_scope::parent() const
+{
+   return parent_scope;
+}
+
+int prog_scope::nesting_depth() const
+{
+   return scope_nesting_depth;
+}
+
+bool prog_scope::is_loop() const
+{
+   return (scope_type == loop_body);
+}
+
+bool prog_scope::is_in_loop() const
+{
+   if (scope_type == loop_body)
+      return true;
+
+   if (parent_scope)
+      return parent_scope->is_in_loop();
+
+   return false;
+}
+
+const prog_scope *prog_scope::innermost_loop() const
+{
+   if (scope_type == loop_body)
+      return this;
+
+   if (parent_scope)
+      return parent_scope->innermost_loop();
+
+   return nullptr;
+}
+
+const prog_scope *prog_scope::outermost_loop() const
+{
+   const prog_scope *loop = nullptr;
+   const prog_scope *p = this;
+
+   do {
+      if (p->type() == loop_body)
+         loop = p;
+      p = p->parent();
+   } while (p);
+
+   return loop;
+}
+
+bool prog_scope::is_child_of_ifelse_id_sibling(const prog_scope *scope) const
+{
+   const prog_scope *my_parent = in_parent_ifelse_scope();
+   while (my_parent) {
+      /* is a direct child? */
+      if (my_parent == scope)
+         return false;
+      /* is a child of the conditions sibling? */
+      if (my_parent->id() == scope->id())
+         return true;
+      my_parent = my_parent->in_parent_ifelse_scope();
+   }
+   return false;
+}
+
+bool prog_scope::is_child_of(const prog_scope *scope) const
+{
+   const prog_scope *my_parent = parent();
+   while (my_parent) {
+      if (my_parent == scope)
+         return true;
+      my_parent = my_parent->parent();
+   }
+   return false;
+}
+
+const prog_scope *prog_scope::enclosing_conditional() const
+{
+   if (is_conditional())
+      return this;
+
+   if (parent_scope)
+      return parent_scope->enclosing_conditional();
+
+   return nullptr;
+}
+
+bool prog_scope::contains_range_of(const prog_scope& other) const
+{
+   return (begin() <= other.begin()) && (end() >= other.end());
+}
+
+bool prog_scope::is_conditional() const
+{
+   return scope_type == if_branch ||
+         scope_type == else_branch ||
+         scope_type == switch_case_branch ||
+         scope_type == switch_default_branch;
+}
+
+const prog_scope *prog_scope::in_else_scope() const
+{
+   if (scope_type == else_branch)
+      return this;
+
+   if (parent_scope)
+      return parent_scope->in_else_scope();
+
+   return nullptr;
+}
+
+const prog_scope *prog_scope::in_parent_ifelse_scope() const
+{
+        if (parent_scope)
+                return parent_scope->in_ifelse_scope();
+        else
+                return nullptr;
+}
+
+const prog_scope *prog_scope::in_ifelse_scope() const
+{
+   if (scope_type == if_branch ||
+       scope_type == else_branch)
+      return this;
+
+   if (parent_scope)
+      return parent_scope->in_ifelse_scope();
+
+   return nullptr;
+}
+
+bool prog_scope::is_switchcase_scope_in_loop() const
+{
+   return (scope_type == switch_case_branch ||
+           scope_type == switch_default_branch) &&
+         is_in_loop();
+}
+
+bool prog_scope::break_is_for_switchcase() const
+{
+   if (scope_type == loop_body)
+      return false;
+
+   if (scope_type == switch_case_branch ||
+       scope_type == switch_default_branch ||
+       scope_type == switch_body)
+      return true;
+
+   if (parent_scope)
+      return parent_scope->break_is_for_switchcase();
+
+   return false;
+}
+
+int prog_scope::id() const
+{
+   return scope_id;
+}
+
+int prog_scope::begin() const
+{
+   return scope_begin;
+}
+
+int prog_scope::end() const
+{
+   return scope_end;
+}
+
+void prog_scope::set_end(int end)
+{
+   if (scope_end == -1)
+      scope_end = end;
+}
+
+void prog_scope::set_loop_break_line(int line)
+{
+   if (scope_type == loop_body) {
+      break_loop_line = MIN2(break_loop_line, line);
+   } else {
+      if (parent_scope)
+         parent()->set_loop_break_line(line);
+   }
+}
+
+int prog_scope::loop_break_line() const
+{
+   return break_loop_line;
+}
+
+temp_access::temp_access():
+   access_mask(0),
+   needs_component_tracking(false),
+   is_array_element(false)
+{
+}
+
+void temp_access::update_access_mask(int mask)
+{
+   if (access_mask && access_mask != mask)
+      needs_component_tracking = true;
+   access_mask |= mask;
+}
+
+void temp_access::record_write(int line, prog_scope *scope, int writemask, bool is_array_elm)
+{
+
+
+   update_access_mask(writemask);
+   is_array_element |= is_array_elm;
+
+   if (writemask & WRITEMASK_X)
+      comp[0].record_write(line, scope);
+   if (writemask & WRITEMASK_Y)
+      comp[1].record_write(line, scope);
+   if (writemask & WRITEMASK_Z)
+      comp[2].record_write(line, scope);
+   if (writemask & WRITEMASK_W)
+      comp[3].record_write(line, scope);
+}
+
+void temp_access::record_read(int line, prog_scope *scope, int readmask, bool is_array_elm)
+{
+   update_access_mask(readmask);
+   is_array_element |= is_array_elm;
+
+   if (readmask & WRITEMASK_X)
+      comp[0].record_read(line, scope);
+   if (readmask & WRITEMASK_Y)
+      comp[1].record_read(line, scope);
+   if (readmask & WRITEMASK_Z)
+      comp[2].record_read(line, scope);
+   if (readmask & WRITEMASK_W)
+      comp[3].record_read(line, scope);
+}
+
+inline static register_live_range make_live_range(int b, int e)
+{
+   register_live_range lt;
+   lt.begin = b;
+   lt.end = e;
+   lt.is_array_elm = false;
+   return lt;
+}
+
+register_live_range temp_access::get_required_live_range()
+{
+   register_live_range result = make_live_range(-1, -1);
+
+   unsigned mask = access_mask;
+   while (mask) {
+      unsigned chan = u_bit_scan(&mask);
+      register_live_range lt = comp[chan].get_required_live_range();
+
+      if (lt.begin >= 0) {
+         if ((result.begin < 0) || (result.begin > lt.begin))
+            result.begin = lt.begin;
+      }
+
+      if (lt.end > result.end)
+         result.end = lt.end;
+
+      if (!needs_component_tracking)
+         break;
+   }
+   result.is_array_elm = is_array_element;
+
+   return result;
+}
+
+const int
+temp_comp_access::conditionality_untouched = std::numeric_limits<int>::max();
+
+const int
+temp_comp_access::write_is_unconditional = std::numeric_limits<int>::max() - 1;
+
+
+temp_comp_access::temp_comp_access():
+   last_read_scope(nullptr),
+   first_read_scope(nullptr),
+   first_write_scope(nullptr),
+   first_write(-1),
+   last_read(-1),
+   last_write(-1),
+   first_read(numeric_limits<int>::max()),
+   conditionality_in_loop_id(conditionality_untouched),
+   if_scope_write_flags(0),
+   next_ifelse_nesting_depth(0),
+   current_unpaired_if_write_scope(nullptr),
+   was_written_in_current_else_scope(false)
+{
+}
+
+void temp_comp_access::record_read(int line, prog_scope *scope)
+{
+   last_read_scope = scope;
+   if (last_read < line)
+      last_read = line;
+
+   if (first_read > line) {
+      first_read = line;
+      first_read_scope = scope;
+   }
+
+   /* If the conditionality of the first write is already resolved then
+    * no further checks are required.
+    */
+   if (conditionality_in_loop_id == write_is_unconditional ||
+       conditionality_in_loop_id == write_is_conditional)
+      return;
+
+   /* Check whether we are in a condition within a loop */
+   const prog_scope *ifelse_scope = scope->in_ifelse_scope();
+   const prog_scope *enclosing_loop;
+   if (ifelse_scope && (enclosing_loop = ifelse_scope->innermost_loop())) {
+
+      /* If we have either not yet written to this register nor writes are
+       * resolved as unconditional in the enclosing loop then check whether
+       * we read before write in an IF/ELSE branch.
+       */
+      if ((conditionality_in_loop_id != write_is_conditional) &&
+          (conditionality_in_loop_id != enclosing_loop->id())) {
+
+         if (current_unpaired_if_write_scope)  {
+
+            /* Has been written in this or a parent scope? - this makes the temporary
+             * unconditionally set at this point.
+             */
+            if (scope->is_child_of(current_unpaired_if_write_scope))
+               return;
+
+            /* Has been written in the same scope before it was read? */
+            if (ifelse_scope->type() == if_branch) {
+               if (current_unpaired_if_write_scope->id() == scope->id())
+                  return;
+            } else {
+               if (was_written_in_current_else_scope)
+                  return;
+            }
+         }
+
+         /* The temporary was read (conditionally) before it is written, hence
+          * it should survive a loop. This can be signaled like if it were
+          * conditionally written.
+          */
+         conditionality_in_loop_id = write_is_conditional;
+      }
+   }
+}
+
+void temp_comp_access::record_write(int line, prog_scope *scope)
+{
+   last_write = line;
+
+   if (first_write < 0) {
+      first_write = line;
+      first_write_scope = scope;
+
+      /* If the first write we encounter is not in a conditional branch, or
+       * the conditional write is not within a loop, then this is to be
+       * considered an unconditional dominant write.
+       */
+      const prog_scope *conditional = scope->enclosing_conditional();
+      if (!conditional || !conditional->innermost_loop()) {
+         conditionality_in_loop_id = write_is_unconditional;
+      }
+   }
+
+   /* The conditionality of the first write is already resolved. */
+   if (conditionality_in_loop_id == write_is_unconditional ||
+       conditionality_in_loop_id == write_is_conditional)
+      return;
+
+   /* If the nesting depth is larger than the supported level,
+    * then we assume conditional writes.
+    */
+   if (next_ifelse_nesting_depth >= supported_ifelse_nesting_depth) {
+      conditionality_in_loop_id = write_is_conditional;
+      return;
+   }
+
+   /* If we are in an IF/ELSE scope within a loop and the loop has not
+    * been resolved already, then record this write.
+    */
+   const prog_scope *ifelse_scope = scope->in_ifelse_scope();
+   if (ifelse_scope && ifelse_scope->innermost_loop() &&
+       ifelse_scope->innermost_loop()->id()  != conditionality_in_loop_id)
+      record_ifelse_write(*ifelse_scope);
+}
+
+void temp_comp_access::record_ifelse_write(const prog_scope& scope)
+{
+   if (scope.type() == if_branch) {
+      /* The first write in an IF branch within a loop implies unresolved
+       * conditionality (if it was untouched or unconditional before).
+       */
+      conditionality_in_loop_id = conditionality_unresolved;
+      was_written_in_current_else_scope = false;
+      record_if_write(scope);
+   } else {
+      was_written_in_current_else_scope = true;
+      record_else_write(scope);
+   }
+}
+
+void temp_comp_access::record_if_write(const prog_scope& scope)
+{
+   /* Don't record write if this IF scope if it ...
+    * - is not the first write in this IF scope,
+    * - has already been written in a parent IF scope.
+    * In both cases this write is a secondary write that doesn't contribute
+    * to resolve conditionality.
+    *
+    * Record the write if it
+    * - is the first one (obviously),
+    * - happens in an IF branch that is a child of the ELSE branch of the
+    *   last active IF/ELSE pair. In this case recording this write is used to
+    *   established whether the write is (un-)conditional in the scope enclosing
+    *   this outer IF/ELSE pair.
+    */
+   if (!current_unpaired_if_write_scope ||
+       (current_unpaired_if_write_scope->id() != scope.id() &&
+        scope.is_child_of_ifelse_id_sibling(current_unpaired_if_write_scope)))  {
+      if_scope_write_flags |= 1 << next_ifelse_nesting_depth;
+      current_unpaired_if_write_scope = &scope;
+      next_ifelse_nesting_depth++;
+   }
+}
+
+void temp_comp_access::record_else_write(const prog_scope& scope)
+{
+   int mask = 1 << (next_ifelse_nesting_depth - 1);
+
+   /* If the temporary was written in an IF branch on the same scope level
+    * and this branch is the sibling of this ELSE branch, then we have a
+    * pair of writes that makes write access to this temporary unconditional
+    * in the enclosing scope.
+    */
+
+   if ((if_scope_write_flags & mask) &&
+       (scope.id() == current_unpaired_if_write_scope->id())) {
+          --next_ifelse_nesting_depth;
+         if_scope_write_flags &= ~mask;
+
+         /* The following code deals with propagating unconditionality from
+          * inner levels of nested IF/ELSE to the outer levels like in
+          *
+          * 1: var t;
+          * 2: if (a) {        <- start scope A
+          * 3:    if (b)
+          * 4:         t = ...
+          * 5:    else
+          * 6:         t = ...
+          * 7: } else {        <- start scope B
+          * 8:    if (c)
+          * 9:         t = ...
+          * A:    else         <- start scope C
+          * B:         t = ...
+          * C: }
+          *
+          */
+
+         const prog_scope *parent_ifelse = scope.parent()->in_ifelse_scope();
+
+         if (1 << (next_ifelse_nesting_depth - 1) & if_scope_write_flags) {
+            /* We are at the end of scope C and already recorded a write
+             * within an IF scope (A), the sibling of the parent ELSE scope B,
+             * and it is not yet resolved. Mark that as the last relevant
+             * IF scope. Below the write will be resolved for the A/B
+             * scope pair.
+             */
+            current_unpaired_if_write_scope = parent_ifelse;
+         } else {
+            current_unpaired_if_write_scope = nullptr;
+         }
+	 /* Promote the first write scope to the enclosing scope because
+	  * the current IF/ELSE pair is now irrelevant for the analysis.
+	  * This is also required to evaluate the minimum life time for t in
+	  * {
+	  *    var t;
+	  *    if (a)
+	  *      t = ...
+	  *    else
+	  *      t = ...
+	  *    x = t;
+	  *    ...
+	  * }
+	  */
+	 first_write_scope = scope.parent();
+
+         /* If some parent is IF/ELSE and in a loop then propagate the
+          * write to that scope. Otherwise the write is unconditional
+          * because it happens in both corresponding IF/ELSE branches
+          * in this loop, and hence, record the loop id to signal the
+          * resolution.
+          */
+         if (parent_ifelse && parent_ifelse->is_in_loop()) {
+            record_ifelse_write(*parent_ifelse);
+         } else {
+            conditionality_in_loop_id = scope.innermost_loop()->id();
+         }
+   } else {
+     /* The temporary was not written in the IF branch corresponding
+      * to this ELSE branch, hence the write is conditional.
+      */
+      conditionality_in_loop_id = write_is_conditional;
+   }
+}
+
+bool temp_comp_access::conditional_ifelse_write_in_loop() const
+{
+   return conditionality_in_loop_id <= conditionality_unresolved;
+}
+
+void temp_comp_access::propagate_live_range_to_dominant_write_scope()
+{
+   first_write = first_write_scope->begin();
+   int lr = first_write_scope->end();
+
+   if (last_read < lr)
+      last_read = lr;
+}
+
+register_live_range temp_comp_access::get_required_live_range()
+{
+   bool keep_for_full_loop = false;
+
+   /* This register component is not used at all, or only read,
+    * mark it as unused and ignore it when renaming.
+    * glsl_to_tgsi_visitor::renumber_registers will take care of
+    * eliminating registers that are not written to.
+    */
+   if (last_write < 0)
+      return make_live_range(-1, -1);
+
+   assert(first_write_scope);
+
+   /* Only written to, just make sure the register component is not
+    * reused in the range it is used to write to
+    */
+   if (!last_read_scope)
+      return make_live_range(first_write, last_write + 1);
+
+   const prog_scope *enclosing_scope_first_read = first_read_scope;
+   const prog_scope *enclosing_scope_first_write = first_write_scope;
+
+   /* We read before writing in a loop
+    * hence the value must survive the loops
+    */
+   if ((first_read <= first_write) &&
+       first_read_scope->is_in_loop()) {
+      keep_for_full_loop = true;
+      enclosing_scope_first_read = first_read_scope->outermost_loop();
+   }
+
+   /* A conditional write within a (nested) loop must survive the outermost
+    * loop if the last read was not within the same scope.
+    */
+   const prog_scope *conditional = enclosing_scope_first_write->enclosing_conditional();
+   if (conditional && !conditional->contains_range_of(*last_read_scope) &&
+       (conditional->is_switchcase_scope_in_loop() ||
+        conditional_ifelse_write_in_loop())) {
+         keep_for_full_loop = true;
+         enclosing_scope_first_write = conditional->outermost_loop();
+   }
+
+   /* Evaluate the scope that is shared by all: required first write scope,
+    * required first read before write scope, and last read scope.
+    */
+   const prog_scope *enclosing_scope = enclosing_scope_first_read;
+   if (enclosing_scope_first_write->contains_range_of(*enclosing_scope))
+      enclosing_scope = enclosing_scope_first_write;
+
+   if (last_read_scope->contains_range_of(*enclosing_scope))
+      enclosing_scope = last_read_scope;
+
+   while (!enclosing_scope->contains_range_of(*enclosing_scope_first_write) ||
+          !enclosing_scope->contains_range_of(*last_read_scope)) {
+      enclosing_scope = enclosing_scope->parent();
+      assert(enclosing_scope);
+   }
+
+   /* Propagate the last read scope to the target scope */
+   while (enclosing_scope->nesting_depth() < last_read_scope->nesting_depth()) {
+      /* If the read is in a loop and we have to move up the scope we need to
+       * extend the live range to the end of this current loop because at this
+       * point we don't know whether the component was written before
+       * un-conditionally in the same loop.
+       */
+      if (last_read_scope->is_loop())
+         last_read = last_read_scope->end();
+
+      last_read_scope = last_read_scope->parent();
+   }
+
+   /* If the variable has to be kept for the whole loop, and we
+    * are currently in a loop, then propagate the live range.
+    */
+   if (keep_for_full_loop && first_write_scope->is_loop())
+      propagate_live_range_to_dominant_write_scope();
+
+   /* Propagate the first_dominant_write scope to the target scope */
+   while (enclosing_scope->nesting_depth() < first_write_scope->nesting_depth()) {
+      /* Propagate live_range if there was a break in a loop and the write was
+       * after the break inside that loop. Note, that this is only needed if
+       * we move up in the scopes.
+       */
+      if (first_write_scope->loop_break_line() < first_write) {
+         keep_for_full_loop = true;
+	 propagate_live_range_to_dominant_write_scope();
+      }
+
+      first_write_scope = first_write_scope->parent();
+
+      /* Propagate live_range if we are now in a loop */
+      if (keep_for_full_loop && first_write_scope->is_loop())
+	  propagate_live_range_to_dominant_write_scope();
+   }
+
+   /* The last write past the last read is dead code, but we have to
+    * ensure that the component is not reused too early, hence extend the
+    * live_range past the last write.
+    */
+   if (last_write >= last_read)
+      last_read = last_write + 1;
+
+   /* Here we are at the same scope, all is resolved */
+   return make_live_range(first_write, last_read);
+}
+
+/* Helper class for sorting and searching the registers based
+ * on live ranges. */
+class register_merge_record {
+public:
+   int begin;
+   int end;
+   int reg;
+   bool erase;
+   bool is_array_elm;
+
+   bool operator < (const register_merge_record& rhs) const {
+      return begin < rhs.begin;
+   }
+};
+
+LiverangeEvaluator::LiverangeEvaluator():
+   line(0),
+   loop_id(1),
+   if_id(1),
+   switch_id(0),
+   is_at_end(false),
+   n_scopes(1),
+   cur_scope(nullptr)
+{
+}
+
+void LiverangeEvaluator::run(const Shader& shader,
+                             std::vector<register_live_range>& register_live_ranges)
+{
+   temp_acc.resize(register_live_ranges.size());
+   fill(temp_acc.begin(), temp_acc.end(), temp_access());
+
+   sfn_log << SfnLog::merge << "have " << temp_acc.size() << " temps\n";
+
+   for (const auto& block: shader.m_ir) {
+      for (const auto& ir: block) {
+         switch (ir->type()) {
+         case Instruction::cond_if:
+         case Instruction::cond_else:
+         case Instruction::loop_begin:
+            ++n_scopes;
+         default:
+            ;
+         }
+      }
+   }
+
+   scopes.reset(new prog_scope_storage(n_scopes));
+
+   cur_scope = scopes->create(nullptr, outer_scope, 0, 0, line);
+
+   line = 0;
+
+   for (auto& v: shader.m_temp) {
+      if (v.second->type() == Value::gpr) {
+         sfn_log << SfnLog::merge << "Record " << *v.second << "\n";
+         const auto& g = static_cast<const GPRValue&>(*v.second);
+         if (g.is_input()) {
+            sfn_log << SfnLog::merge << "Record INPUT write for "
+                    << g << " in " << temp_acc.size() << " temps\n";
+            temp_acc[g.sel()].record_write(line, cur_scope, 1 << g.chan(), false);
+            temp_acc[g.sel()].record_read(line, cur_scope, 1 << g.chan(), false);
+         }
+         if (g.keep_alive()) {
+            sfn_log << SfnLog::merge << "Record KEEP ALIVE for "
+                    << g << " in " << temp_acc.size() << " temps\n";
+            temp_acc[g.sel()].record_read(0x7fffff, cur_scope, 1 << g.chan(), false);
+         }
+      }
+   }
+
+   for (const auto& block: shader.m_ir)
+      for (const auto& ir: block)  {
+         ir->evalue_liveness(*this);
+         if (ir->type() != Instruction::alu ||
+             static_cast<const AluInstruction&>(*ir).flag(alu_last_instr))
+            ++line;
+      }
+
+   assert(cur_scope->type() == outer_scope);
+   cur_scope->set_end(line);
+   is_at_end = true;
+
+   get_required_live_ranges(register_live_ranges);
+}
+
+
+void LiverangeEvaluator::record_read(const Value& src, bool is_array_elm)
+{
+   sfn_log << SfnLog::merge << "Record read l:" << line << " reg:" << src << "\n";
+   if (src.type() == Value::gpr) {
+      const GPRValue& v = static_cast<const GPRValue&>(src);
+      if (v.chan() < 4)
+         temp_acc[v.sel()].record_read(v.keep_alive() ? 0x7fffff: line, cur_scope, 1 << v.chan(), is_array_elm);
+      return;
+   } else if (src.type() == Value::gpr_array_value) {
+      const GPRArrayValue& v = static_cast<const GPRArrayValue&>(src);
+      v.record_read(*this);
+   } else if (src.type() == Value::kconst) {
+      const UniformValue& v = static_cast<const UniformValue&>(src);
+      if (v.addr())
+         record_read(*v.addr(),is_array_elm);
+   }
+}
+
+void LiverangeEvaluator::record_write(const Value& src, bool is_array_elm)
+{
+   sfn_log << SfnLog::merge << "Record write for "
+           << src << " in " << temp_acc.size() << " temps\n";
+
+   if (src.type() == Value::gpr) {
+      const GPRValue& v = static_cast<const GPRValue&>(src);
+      assert(v.sel() < temp_acc.size());
+      if (v.chan() < 4)
+         temp_acc[v.sel()].record_write(line, cur_scope, 1 << v.chan(), is_array_elm);
+      return;
+   } else if (src.type() == Value::gpr_array_value) {
+      const GPRArrayValue& v = static_cast<const GPRArrayValue&>(src);
+      v.record_write(*this);
+   } else if (src.type() == Value::kconst) {
+      const UniformValue& v = static_cast<const UniformValue&>(src);
+      if (v.addr())
+         record_write(*v.addr(),is_array_elm);
+   }
+}
+
+void LiverangeEvaluator::record_read(const GPRVector& src)
+{
+   for (int i = 0; i < 4; ++i)
+      if (src.reg_i(i))
+         record_read(*src.reg_i(i));
+}
+
+void LiverangeEvaluator::record_write(const GPRVector& dst)
+{
+   for (int i = 0; i < 4; ++i)
+      if (dst.reg_i(i))
+         record_write(*dst.reg_i(i));
+}
+
+void LiverangeEvaluator::get_required_live_ranges(std::vector<register_live_range>& register_live_ranges)
+{
+   sfn_log << SfnLog::merge << "== register live ranges ==========\n";
+   for(unsigned i = 0; i < register_live_ranges.size(); ++i) {
+      sfn_log << SfnLog::merge << setw(4) << i;
+      register_live_ranges[i] = temp_acc[i].get_required_live_range();
+      sfn_log << SfnLog::merge << ": [" << register_live_ranges[i].begin << ", "
+		   << register_live_ranges[i].end << "]\n";
+   }
+   sfn_log << SfnLog::merge << "==================================\n\n";
+}
+
+void LiverangeEvaluator::scope_if()
+{
+   cur_scope = scopes->create(cur_scope, if_branch, if_id++,
+                              cur_scope->nesting_depth() + 1, line + 1);
+}
+
+void LiverangeEvaluator::scope_else()
+{
+   assert(cur_scope->type() == if_branch);
+   cur_scope->set_end(line - 1);
+   cur_scope = scopes->create(cur_scope->parent(), else_branch,
+                              cur_scope->id(), cur_scope->nesting_depth(),
+                             line + 1);
+}
+
+void LiverangeEvaluator::scope_endif()
+{
+   cur_scope->set_end(line - 1);
+   cur_scope = cur_scope->parent();
+   assert(cur_scope);
+}
+
+void LiverangeEvaluator::scope_loop_begin()
+{
+   cur_scope = scopes->create(cur_scope, loop_body, loop_id++,
+                              cur_scope->nesting_depth() + 1, line);
+}
+
+void LiverangeEvaluator::scope_loop_end()
+{
+   assert(cur_scope->type() == loop_body);
+   cur_scope->set_end(line);
+   cur_scope = cur_scope->parent();
+   assert(cur_scope);
+}
+
+void LiverangeEvaluator::scope_loop_break()
+{
+   cur_scope->set_loop_break_line(line);
+}
+
+/* This functions evaluates the register merges by using a binary
+ * search to find suitable merge candidates. */
+
+std::vector<rename_reg_pair>
+get_temp_registers_remapping(const std::vector<register_live_range>& live_ranges)
+{
+
+   std::vector<rename_reg_pair> result(live_ranges.size(), rename_reg_pair{false, false, 0});
+   std::vector<register_merge_record> reg_access;
+
+   for (unsigned i = 0; i < live_ranges.size(); ++i) {
+      if (live_ranges[i].begin >= 0) {
+         register_merge_record r;
+         r.begin = live_ranges[i].begin;
+         r.end = live_ranges[i].end;
+         r.is_array_elm = live_ranges[i].is_array_elm;
+         r.reg = i;
+         r.erase = false;
+         reg_access.push_back(r);
+      }
+   }
+
+   std::sort(reg_access.begin(), reg_access.end());
+
+   for (auto& r : reg_access)
+      sfn_log << SfnLog::merge << "Use Range " <<r.reg << " ["
+              << r.begin << ", "  << r.end << "]\n";
+
+   auto trgt = reg_access.begin();
+   auto reg_access_end = reg_access.end();
+   auto first_erase = reg_access_end;
+   auto search_start = trgt + 1;
+
+   while (trgt != reg_access_end) {
+      /* Find the next register that has a live-range starting past the
+       * search start and that is not an array element. Array elements can't
+       * be moved (Moving the whole array could be an option to be implemented later)*/
+
+      sfn_log << SfnLog::merge << "Next target is "
+              << trgt->reg << "[" << trgt->begin << ", "  << trgt->end << "]\n";
+
+
+      auto src = upper_bound(search_start, reg_access_end, trgt->end,
+                             [](int bound, const register_merge_record& m){
+                                    return bound < m.begin && !m.is_array_elm;}
+                             );
+
+      if (src != reg_access_end) {
+         result[src->reg].new_reg = trgt->reg;
+         result[src->reg].valid = true;
+
+         sfn_log << SfnLog::merge << "Map "
+                 << src->reg << "[" << src->begin << ", "  << src->end << "] to  "
+                 << trgt->reg << "[" << trgt->begin << ", "  << trgt->end << ":";
+         trgt->end = src->end;
+         sfn_log << SfnLog::merge << trgt->end  << "]\n";
+
+         /* Since we only search forward, don't remove the renamed
+          * register just now, only mark it. */
+         src->erase = true;
+
+         if (first_erase == reg_access_end)
+            first_erase = src;
+
+         search_start = src + 1;
+      } else {
+         /* Moving to the next target register it is time to remove
+          * the already merged registers from the search range */
+         if (first_erase != reg_access_end) {
+	    auto outp = first_erase;
+	    auto inp = first_erase + 1;
+
+            while (inp != reg_access_end) {
+               if (!inp->erase)
+                  *outp++ = *inp;
+               ++inp;
+            }
+
+            reg_access_end = outp;
+            first_erase = reg_access_end;
+         }
+         ++trgt;
+         search_start = trgt + 1;
+      }
+   }
+   return result;
+}
+
+} // end ns r600
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.h
new file mode 100644
index 000000000..8b9ed2ef2
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_liverange.h
@@ -0,0 +1,314 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_LIVERANGE_H
+#define SFN_LIVERANGE_H
+
+#include <cstdint>
+#include <ostream>
+#include <vector>
+#include <limits>
+
+#include "sfn_instruction_base.h"
+#include "sfn_nir.h"
+
+namespace r600 {
+
+/** Storage to record the required live range of a temporary register
+ * begin == end == -1 indicates that the register can be reused without
+ * limitations. Otherwise, "begin" indicates the first instruction in which
+ * a write operation may target this temporary, and end indicates the
+ * last instruction in which a value can be read from this temporary.
+ * Hence, a register R2 can be merged with a register R1 if R1.end <= R2.begin.
+ */
+struct register_live_range {
+   int begin;
+   int end;
+   bool is_array_elm;
+};
+
+enum prog_scope_type {
+   outer_scope,           /* Outer program scope */
+   loop_body,             /* Inside a loop */
+   if_branch,             /* Inside if branch */
+   else_branch,           /* Inside else branch */
+   switch_body,           /* Inside switch statement */
+   switch_case_branch,    /* Inside switch case statement */
+   switch_default_branch, /* Inside switch default statement */
+   undefined_scope
+};
+
+class prog_scope {
+public:
+   prog_scope();
+   prog_scope(prog_scope *parent, prog_scope_type type, int id,
+              int depth, int begin);
+
+   prog_scope_type type() const;
+   prog_scope *parent() const;
+   int nesting_depth() const;
+   int id() const;
+   int end() const;
+   int begin() const;
+   int loop_break_line() const;
+
+   const prog_scope *in_else_scope() const;
+   const prog_scope *in_ifelse_scope() const;
+   const prog_scope *in_parent_ifelse_scope() const;
+   const prog_scope *innermost_loop() const;
+   const prog_scope *outermost_loop() const;
+   const prog_scope *enclosing_conditional() const;
+
+   bool is_loop() const;
+   bool is_in_loop() const;
+   bool is_switchcase_scope_in_loop() const;
+   bool is_conditional() const;
+   bool is_child_of(const prog_scope *scope) const;
+   bool is_child_of_ifelse_id_sibling(const prog_scope *scope) const;
+
+   bool break_is_for_switchcase() const;
+   bool contains_range_of(const prog_scope& other) const;
+
+   void set_end(int end);
+   void set_loop_break_line(int line);
+
+private:
+   prog_scope_type scope_type;
+   int scope_id;
+   int scope_nesting_depth;
+   int scope_begin;
+   int scope_end;
+   int break_loop_line;
+   prog_scope *parent_scope;
+};
+
+/* Some storage class to encapsulate the prog_scope (de-)allocations */
+class prog_scope_storage {
+public:
+   prog_scope_storage(int n);
+   ~prog_scope_storage();
+   prog_scope * create(prog_scope *p, prog_scope_type type, int id,
+                       int lvl, int s_begin);
+private:
+   int current_slot;
+   std::vector<prog_scope> storage;
+};
+
+/* Class to track the access to a component of a temporary register. */
+
+class temp_comp_access {
+public:
+   temp_comp_access();
+
+   void record_read(int line, prog_scope *scope);
+   void record_write(int line, prog_scope *scope);
+   register_live_range get_required_live_range();
+private:
+   void propagate_live_range_to_dominant_write_scope();
+   bool conditional_ifelse_write_in_loop() const;
+
+   void record_ifelse_write(const prog_scope& scope);
+   void record_if_write(const prog_scope& scope);
+   void record_else_write(const prog_scope& scope);
+
+   prog_scope *last_read_scope;
+   prog_scope *first_read_scope;
+   prog_scope *first_write_scope;
+
+   int first_write;
+   int last_read;
+   int last_write;
+   int first_read;
+
+   /* This member variable tracks the current resolution of conditional writing
+    * to this temporary in IF/ELSE clauses.
+    *
+    * The initial value "conditionality_untouched" indicates that this
+    * temporary has not yet been written to within an if clause.
+    *
+    * A positive (other than "conditionality_untouched") number refers to the
+    * last loop id for which the write was resolved as unconditional. With each
+    * new loop this value will be overwitten by "conditionality_unresolved"
+    * on entering the first IF clause writing this temporary.
+    *
+    * The value "conditionality_unresolved" indicates that no resolution has
+    * been achieved so far. If the variable is set to this value at the end of
+    * the processing of the whole shader it also indicates a conditional write.
+    *
+    * The value "write_is_conditional" marks that the variable is written
+    * conditionally (i.e. not in all relevant IF/ELSE code path pairs) in at
+    * least one loop.
+    */
+   int conditionality_in_loop_id;
+
+   /* Helper constants to make the tracking code more readable. */
+   static const int write_is_conditional = -1;
+   static const int conditionality_unresolved = 0;
+   static const int conditionality_untouched;
+   static const int write_is_unconditional;
+
+   /* A bit field tracking the nexting levels of if-else clauses where the
+    * temporary has (so far) been written to in the if branch, but not in the
+    * else branch.
+    */
+   unsigned int if_scope_write_flags;
+
+   int next_ifelse_nesting_depth;
+   static const int supported_ifelse_nesting_depth = 32;
+
+   /* Tracks the last if scope in which the temporary was written to
+    * without a write in the corresponding else branch. Is also used
+    * to track read-before-write in the according scope.
+    */
+   const prog_scope *current_unpaired_if_write_scope;
+
+   /* Flag to resolve read-before-write in the else scope. */
+   bool was_written_in_current_else_scope;
+};
+
+/* Class to track the access to all components of a temporary register. */
+class temp_access {
+public:
+   temp_access();
+   void record_read(int line, prog_scope *scope, int swizzle, bool is_array_elm);
+   void record_write(int line, prog_scope *scope, int writemask, bool is_array_elm);
+   register_live_range get_required_live_range();
+private:
+   void update_access_mask(int mask);
+
+   temp_comp_access comp[4];
+   int access_mask;
+   bool needs_component_tracking;
+   bool is_array_element;
+};
+
+/* Helper class to merge the live ranges of an arrays.
+ *
+ * For arrays the array length, live range, and component access needs to
+ * be kept, because when live ranges are merged or arrays are interleaved
+ * one can only merge or interleave an array into another with equal or more
+ * elements. For interleaving it is also required that the sum of used swizzles
+ * is at most four.
+ */
+
+class array_live_range {
+public:
+   array_live_range();
+   array_live_range(unsigned aid, unsigned alength);
+   array_live_range(unsigned aid, unsigned alength, int first_access,
+		  int last_access, int mask);
+
+   void set_live_range(int first_access, int last_access);
+   void set_begin(int _begin){first_access = _begin;}
+   void set_end(int _end){last_access = _end;}
+   void set_access_mask(int s);
+
+   static void merge(array_live_range *a, array_live_range *b);
+   static void interleave(array_live_range *a, array_live_range *b);
+
+   int array_id() const {return id;}
+   int target_array_id() const {return target_array ? target_array->id : 0;}
+   const array_live_range *final_target() const {return target_array ?
+	       target_array->final_target() : this;}
+   unsigned array_length() const { return length;}
+   int begin() const { return first_access;}
+   int end() const { return last_access;}
+   int access_mask() const { return component_access_mask;}
+   int used_components() const {return used_component_count;}
+
+   bool time_doesnt_overlap(const array_live_range& other) const;
+
+   void print(std::ostream& os) const;
+
+   bool is_mapped() const { return target_array != nullptr;}
+
+   int8_t remap_one_swizzle(int8_t idx) const;
+
+private:
+   void init_swizzles();
+   void set_target(array_live_range  *target);
+   void merge_live_range_from(array_live_range *other);
+   void interleave_into(array_live_range *other);
+
+   unsigned id;
+   unsigned length;
+   int first_access;
+   int last_access;
+   uint8_t component_access_mask;
+   uint8_t used_component_count;
+   array_live_range *target_array;
+   int8_t swizzle_map[4];
+};
+
+
+
+class LiverangeEvaluator {
+public:
+   LiverangeEvaluator();
+
+   void run(const Shader& shader,
+            std::vector<register_live_range> &register_live_ranges);
+
+   void scope_if();
+   void scope_else();
+   void scope_endif();
+   void scope_loop_begin();
+   void scope_loop_end();
+   void scope_loop_break();
+
+   void record_read(const Value& src, bool is_array_elm = false);
+   void record_write(const Value& dst, bool is_array_elm = false);
+
+   void record_read(const GPRVector& src);
+   void record_write(const GPRVector& dst);
+
+private:
+
+   prog_scope *create_scope(prog_scope *parent, prog_scope_type type, int id,
+                            int lvl, int s_begin);
+
+
+   void get_required_live_ranges(std::vector<register_live_range>& register_live_ranges);
+
+   int line;
+   int loop_id;
+   int if_id;
+   int switch_id;
+   bool is_at_end;
+   int n_scopes;
+   std::unique_ptr<prog_scope_storage> scopes;
+   prog_scope *cur_scope;
+
+   std::vector<temp_access> temp_acc;
+
+};
+
+std::vector<rename_reg_pair>
+get_temp_registers_remapping(const std::vector<register_live_range>& live_ranges);
+
+} // end namespace r600
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.cpp
new file mode 100644
index 000000000..b421f838c
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.cpp
@@ -0,0 +1,1076 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_nir.h"
+#include "nir_builder.h"
+
+#include "../r600_pipe.h"
+#include "../r600_shader.h"
+
+#include "sfn_instruction_tex.h"
+
+#include "sfn_shader_vertex.h"
+#include "sfn_shader_fragment.h"
+#include "sfn_shader_geometry.h"
+#include "sfn_shader_compute.h"
+#include "sfn_shader_tcs.h"
+#include "sfn_shader_tess_eval.h"
+#include "sfn_nir_lower_fs_out_to_vector.h"
+#include "sfn_ir_to_assembly.h"
+
+#include <vector>
+
+namespace r600 {
+
+using std::vector;
+
+
+NirLowerInstruction::NirLowerInstruction():
+	b(nullptr)
+{
+
+}
+
+bool NirLowerInstruction::filter_instr(const nir_instr *instr, const void *data)
+{
+   auto me = reinterpret_cast<const NirLowerInstruction*>(data);
+   return me->filter(instr);
+}
+
+nir_ssa_def *NirLowerInstruction::lower_instr(nir_builder *b, nir_instr *instr, void *data)
+{
+   auto me = reinterpret_cast<NirLowerInstruction*>(data);
+   me->set_builder(b);
+   return me->lower(instr);
+}
+
+bool NirLowerInstruction::run(nir_shader *shader)
+{
+   return nir_shader_lower_instructions(shader,
+                                        filter_instr,
+                                        lower_instr,
+                                        (void *)this);
+}
+
+
+ShaderFromNir::ShaderFromNir():sh(nullptr),
+   chip_class(CLASS_UNKNOWN),
+   m_current_if_id(0),
+   m_current_loop_id(0),
+   scratch_size(0)
+{
+}
+
+bool ShaderFromNir::lower(const nir_shader *shader, r600_pipe_shader *pipe_shader,
+                          r600_pipe_shader_selector *sel, r600_shader_key& key,
+                          struct r600_shader* gs_shader, enum chip_class _chip_class)
+{
+   sh = shader;
+   chip_class = _chip_class;
+   assert(sh);
+
+   switch (shader->info.stage) {
+   case MESA_SHADER_VERTEX:
+      impl.reset(new VertexShaderFromNir(pipe_shader, *sel, key, gs_shader, chip_class));
+      break;
+   case MESA_SHADER_TESS_CTRL:
+      sfn_log << SfnLog::trans << "Start TCS\n";
+      impl.reset(new TcsShaderFromNir(pipe_shader, *sel, key, chip_class));
+      break;
+   case MESA_SHADER_TESS_EVAL:
+      sfn_log << SfnLog::trans << "Start TESS_EVAL\n";
+      impl.reset(new TEvalShaderFromNir(pipe_shader, *sel, key, gs_shader, chip_class));
+      break;
+   case MESA_SHADER_GEOMETRY:
+      sfn_log << SfnLog::trans << "Start GS\n";
+      impl.reset(new GeometryShaderFromNir(pipe_shader, *sel, key, chip_class));
+      break;
+   case MESA_SHADER_FRAGMENT:
+      sfn_log << SfnLog::trans << "Start FS\n";
+      impl.reset(new FragmentShaderFromNir(*shader, pipe_shader->shader, *sel, key, chip_class));
+      break;
+   case MESA_SHADER_COMPUTE:
+      sfn_log << SfnLog::trans << "Start CS\n";
+      impl.reset(new ComputeShaderFromNir(pipe_shader, *sel, key, chip_class));
+      break;
+   default:
+      return false;
+   }
+
+   sfn_log << SfnLog::trans << "Process declarations\n";
+   if (!process_declaration())
+      return false;
+
+   // at this point all functions should be inlined
+   const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&sh->functions));
+
+   sfn_log << SfnLog::trans << "Scan shader\n";
+
+   if (sfn_log.has_debug_flag(SfnLog::instr))
+      nir_print_shader(const_cast<nir_shader *>(shader), stderr);
+
+   nir_foreach_block(block, func->impl) {
+      nir_foreach_instr(instr, block) {
+         if (!impl->scan_instruction(instr)) {
+            fprintf(stderr, "Unhandled sysvalue access ");
+            nir_print_instr(instr, stderr);
+            fprintf(stderr, "\n");
+            return false;
+         }
+      }
+   }
+
+   sfn_log << SfnLog::trans << "Reserve registers\n";
+   if (!impl->allocate_reserved_registers()) {
+      return false;
+   }
+
+   ValuePool::array_list arrays;
+   sfn_log << SfnLog::trans << "Allocate local registers\n";
+   foreach_list_typed(nir_register, reg, node, &func->impl->registers) {
+      impl->allocate_local_register(*reg, arrays);
+   }
+
+   sfn_log << SfnLog::trans << "Emit shader start\n";
+   impl->allocate_arrays(arrays);
+
+   impl->emit_shader_start();
+
+   sfn_log << SfnLog::trans << "Process shader \n";
+   foreach_list_typed(nir_cf_node, node, node, &func->impl->body) {
+      if (!process_cf_node(node))
+         return false;
+   }
+
+   // Add optimizations here
+   sfn_log << SfnLog::trans << "Finalize\n";
+   impl->finalize();
+
+   impl->get_array_info(pipe_shader->shader);
+
+   if (!sfn_log.has_debug_flag(SfnLog::nomerge)) {
+      sfn_log << SfnLog::trans << "Merge registers\n";
+      impl->remap_registers();
+   }
+
+   sfn_log << SfnLog::trans << "Finished translating to R600 IR\n";
+   return true;
+}
+
+Shader ShaderFromNir::shader() const
+{
+   return Shader{impl->m_output, impl->get_temp_registers()};
+}
+
+
+bool ShaderFromNir::process_cf_node(nir_cf_node *node)
+{
+   SFN_TRACE_FUNC(SfnLog::flow, "CF");
+   switch (node->type) {
+   case nir_cf_node_block:
+      return process_block(nir_cf_node_as_block(node));
+   case nir_cf_node_if:
+      return process_if(nir_cf_node_as_if(node));
+   case nir_cf_node_loop:
+      return process_loop(nir_cf_node_as_loop(node));
+   default:
+      return false;
+   }
+}
+
+bool ShaderFromNir::process_if(nir_if *if_stmt)
+{
+   SFN_TRACE_FUNC(SfnLog::flow, "IF");
+
+   if (!impl->emit_if_start(m_current_if_id, if_stmt))
+      return false;
+
+   int if_id = m_current_if_id++;
+   m_if_stack.push(if_id);
+
+   foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list)
+         if (!process_cf_node(n)) return false;
+
+   if (!if_stmt->then_list.is_empty()) {
+      if (!impl->emit_else_start(if_id))
+         return false;
+
+      foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list)
+            if (!process_cf_node(n)) return false;
+   }
+
+   if (!impl->emit_ifelse_end(if_id))
+      return false;
+
+   m_if_stack.pop();
+   return true;
+}
+
+bool ShaderFromNir::process_loop(nir_loop *node)
+{
+   SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
+   int loop_id = m_current_loop_id++;
+
+   if (!impl->emit_loop_start(loop_id))
+      return false;
+
+   foreach_list_typed(nir_cf_node, n, node, &node->body)
+         if (!process_cf_node(n)) return false;
+
+   if (!impl->emit_loop_end(loop_id))
+      return false;
+
+   return true;
+}
+
+bool ShaderFromNir::process_block(nir_block *block)
+{
+   SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
+   nir_foreach_instr(instr, block) {
+      int r = emit_instruction(instr);
+      if (!r) {
+         sfn_log << SfnLog::err << "R600: Unsupported instruction: "
+                 << *instr << "\n";
+         return false;
+      }
+   }
+   return true;
+}
+
+
+ShaderFromNir::~ShaderFromNir()
+{
+}
+
+pipe_shader_type ShaderFromNir::processor_type() const
+{
+   return impl->m_processor_type;
+}
+
+
+bool ShaderFromNir::emit_instruction(nir_instr *instr)
+{
+   assert(impl);
+
+   sfn_log << SfnLog::instr << "Read instruction " << *instr << "\n";
+
+   switch (instr->type) {
+   case nir_instr_type_alu:
+      return impl->emit_alu_instruction(instr);
+   case nir_instr_type_deref:
+      return impl->emit_deref_instruction(nir_instr_as_deref(instr));
+   case nir_instr_type_intrinsic:
+      return impl->emit_intrinsic_instruction(nir_instr_as_intrinsic(instr));
+   case nir_instr_type_load_const: /* const values are loaded when needed */
+      return true;
+   case nir_instr_type_tex:
+      return impl->emit_tex_instruction(instr);
+   case nir_instr_type_jump:
+      return impl->emit_jump_instruction(nir_instr_as_jump(instr));
+   default:
+      fprintf(stderr, "R600: %s: ShaderFromNir Unsupported instruction: type %d:'", __func__, instr->type);
+      nir_print_instr(instr, stderr);
+      fprintf(stderr, "'\n");
+      return false;
+   case nir_instr_type_ssa_undef:
+      return impl->create_undef(nir_instr_as_ssa_undef(instr));
+      return true;
+   }
+}
+
+bool ShaderFromNir::process_declaration()
+{
+
+   if (!impl->scan_inputs_read(sh))
+      return false;
+
+   // scan declarations
+   nir_foreach_variable_with_modes(variable, sh, nir_var_uniform |
+                                                 nir_var_mem_ubo |
+                                                 nir_var_mem_ssbo) {
+      if (!impl->process_uniforms(variable)) {
+         fprintf(stderr, "R600: error parsing outputs variable %s\n", variable->name);
+         return false;
+      }
+   }
+
+   return true;
+}
+
+const std::vector<InstructionBlock>& ShaderFromNir::shader_ir() const
+{
+   assert(impl);
+   return impl->m_output;
+}
+
+
+AssemblyFromShader::~AssemblyFromShader()
+{
+}
+
+bool AssemblyFromShader::lower(const std::vector<InstructionBlock>& ir)
+{
+   return do_lower(ir);
+}
+
+static nir_ssa_def *
+r600_nir_lower_pack_unpack_2x16_impl(nir_builder *b, nir_instr *instr, void *_options)
+{
+   nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+   switch (alu->op) {
+   case nir_op_unpack_half_2x16: {
+      nir_ssa_def *packed = nir_ssa_for_alu_src(b, alu, 0);
+      return  nir_vec2(b, nir_unpack_half_2x16_split_x(b, packed),
+                       nir_unpack_half_2x16_split_y(b, packed));
+
+   }
+   case nir_op_pack_half_2x16: {
+      nir_ssa_def *src_vec2 = nir_ssa_for_alu_src(b, alu, 0);
+      return nir_pack_half_2x16_split(b, nir_channel(b, src_vec2, 0),
+                                      nir_channel(b, src_vec2, 1));
+   }
+   default:
+      return nullptr;
+   }
+}
+
+bool r600_nir_lower_pack_unpack_2x16_filter(const nir_instr *instr, const void *_options)
+{
+   return instr->type == nir_instr_type_alu;
+}
+
+bool r600_nir_lower_pack_unpack_2x16(nir_shader *shader)
+{
+   return nir_shader_lower_instructions(shader,
+                                        r600_nir_lower_pack_unpack_2x16_filter,
+                                        r600_nir_lower_pack_unpack_2x16_impl,
+                                        nullptr);
+};
+
+static void
+r600_nir_lower_scratch_address_impl(nir_builder *b, nir_intrinsic_instr *instr)
+{
+   b->cursor = nir_before_instr(&instr->instr);
+
+   int address_index = 0;
+   int align;
+
+   if (instr->intrinsic == nir_intrinsic_store_scratch) {
+      align  = instr->src[0].ssa->num_components;
+      address_index = 1;
+   } else{
+      align = instr->dest.ssa.num_components;
+   }
+
+   nir_ssa_def *address = instr->src[address_index].ssa;
+   nir_ssa_def *new_address = nir_ishr(b, address,  nir_imm_int(b, 4 * align));
+
+   nir_instr_rewrite_src(&instr->instr, &instr->src[address_index],
+                         nir_src_for_ssa(new_address));
+}
+
+bool r600_lower_scratch_addresses(nir_shader *shader)
+{
+   bool progress = false;
+   nir_foreach_function(function, shader) {
+      nir_builder build;
+      nir_builder_init(&build, function->impl);
+
+      nir_foreach_block(block, function->impl) {
+         nir_foreach_instr(instr, block) {
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
+            nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
+            if (op->intrinsic != nir_intrinsic_load_scratch &&
+                op->intrinsic != nir_intrinsic_store_scratch)
+               continue;
+            r600_nir_lower_scratch_address_impl(&build, op);
+            progress = true;
+         }
+      }
+   }
+   return progress;
+}
+
+static void
+insert_uniform_sorted(struct exec_list *var_list, nir_variable *new_var)
+{
+   nir_foreach_variable_in_list(var, var_list) {
+      if (var->data.binding > new_var->data.binding ||
+          (var->data.binding == new_var->data.binding &&
+           var->data.offset > new_var->data.offset)) {
+         exec_node_insert_node_before(&var->node, &new_var->node);
+         return;
+      }
+   }
+   exec_list_push_tail(var_list, &new_var->node);
+}
+
+void sort_uniforms(nir_shader *shader)
+{
+   struct exec_list new_list;
+   exec_list_make_empty(&new_list);
+
+   nir_foreach_uniform_variable_safe(var, shader) {
+      exec_node_remove(&var->node);
+      insert_uniform_sorted(&new_list, var);
+   }
+   exec_list_append(&shader->variables, &new_list);
+}
+
+static void
+insert_fsoutput_sorted(struct exec_list *var_list, nir_variable *new_var)
+{
+
+   nir_foreach_variable_in_list(var, var_list) {
+      if (var->data.location > new_var->data.location ||
+          (var->data.location == new_var->data.location &&
+           var->data.index > new_var->data.index)) {
+         exec_node_insert_node_before(&var->node, &new_var->node);
+         return;
+      }
+   }
+
+   exec_list_push_tail(var_list, &new_var->node);
+}
+
+void sort_fsoutput(nir_shader *shader)
+{
+   struct exec_list new_list;
+   exec_list_make_empty(&new_list);
+
+   nir_foreach_shader_out_variable_safe(var, shader) {
+      exec_node_remove(&var->node);
+      insert_fsoutput_sorted(&new_list, var);
+   }
+
+   unsigned driver_location = 0;
+   nir_foreach_variable_in_list(var, &new_list)
+      var->data.driver_location = driver_location++;
+
+   exec_list_append(&shader->variables, &new_list);
+}
+
+}
+
+static nir_intrinsic_op
+r600_map_atomic(nir_intrinsic_op op)
+{
+   switch (op) {
+   case nir_intrinsic_atomic_counter_read_deref:
+      return nir_intrinsic_atomic_counter_read;
+   case nir_intrinsic_atomic_counter_inc_deref:
+      return nir_intrinsic_atomic_counter_inc;
+   case nir_intrinsic_atomic_counter_pre_dec_deref:
+      return nir_intrinsic_atomic_counter_pre_dec;
+   case nir_intrinsic_atomic_counter_post_dec_deref:
+      return nir_intrinsic_atomic_counter_post_dec;
+   case nir_intrinsic_atomic_counter_add_deref:
+      return nir_intrinsic_atomic_counter_add;
+   case nir_intrinsic_atomic_counter_min_deref:
+      return nir_intrinsic_atomic_counter_min;
+   case nir_intrinsic_atomic_counter_max_deref:
+      return nir_intrinsic_atomic_counter_max;
+   case nir_intrinsic_atomic_counter_and_deref:
+      return nir_intrinsic_atomic_counter_and;
+   case nir_intrinsic_atomic_counter_or_deref:
+      return nir_intrinsic_atomic_counter_or;
+   case nir_intrinsic_atomic_counter_xor_deref:
+      return nir_intrinsic_atomic_counter_xor;
+   case nir_intrinsic_atomic_counter_exchange_deref:
+      return nir_intrinsic_atomic_counter_exchange;
+   case nir_intrinsic_atomic_counter_comp_swap_deref:
+      return nir_intrinsic_atomic_counter_comp_swap;
+   default:
+      return nir_num_intrinsics;
+   }
+}
+
+static bool
+r600_lower_deref_instr(nir_builder *b, nir_intrinsic_instr *instr,
+                       nir_shader *shader)
+{
+   nir_intrinsic_op op = r600_map_atomic(instr->intrinsic);
+   if (nir_num_intrinsics == op)
+      return false;
+
+   nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+
+   if (var->data.mode != nir_var_uniform &&
+       var->data.mode != nir_var_mem_ssbo &&
+       var->data.mode != nir_var_mem_shared)
+      return false; /* atomics passed as function arguments can't be lowered */
+
+   const unsigned idx = var->data.binding;
+
+   b->cursor = nir_before_instr(&instr->instr);
+
+   nir_ssa_def *offset = nir_imm_int(b, var->data.index);
+   for (nir_deref_instr *d = deref; d->deref_type != nir_deref_type_var;
+        d = nir_deref_instr_parent(d)) {
+      assert(d->deref_type == nir_deref_type_array);
+      assert(d->arr.index.is_ssa);
+
+      unsigned array_stride = 1;
+      if (glsl_type_is_array(d->type))
+         array_stride *= glsl_get_aoa_size(d->type);
+
+      offset = nir_iadd(b, offset, nir_imul(b, d->arr.index.ssa,
+                                            nir_imm_int(b, array_stride)));
+   }
+
+   /* Since the first source is a deref and the first source in the lowered
+    * instruction is the offset, we can just swap it out and change the
+    * opcode.
+    */
+   instr->intrinsic = op;
+   nir_instr_rewrite_src(&instr->instr, &instr->src[0],
+                         nir_src_for_ssa(offset));
+   nir_intrinsic_set_base(instr, idx);
+
+   nir_deref_instr_remove_if_unused(deref);
+
+   return true;
+}
+
+static bool
+r600_nir_lower_atomics(nir_shader *shader)
+{
+   bool progress = false;
+
+   /* First re-do the offsets, in Hardware we start at zero for each new
+    * binding, and we use an offset of one per counter */
+   int current_binding = -1;
+   int current_offset = 0;
+   nir_foreach_variable_with_modes(var, shader, nir_var_uniform) {
+      if (!var->type->contains_atomic())
+         continue;
+
+      if (current_binding == (int)var->data.binding) {
+         var->data.index = current_offset;
+         current_offset += var->type->atomic_size() / ATOMIC_COUNTER_SIZE;
+      } else {
+         current_binding = var->data.binding;
+         var->data.index = 0;
+         current_offset = var->type->atomic_size() / ATOMIC_COUNTER_SIZE;
+      }
+   }
+
+   nir_foreach_function(function, shader) {
+      if (!function->impl)
+         continue;
+
+      bool impl_progress = false;
+
+      nir_builder build;
+      nir_builder_init(&build, function->impl);
+
+      nir_foreach_block(block, function->impl) {
+         nir_foreach_instr_safe(instr, block) {
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
+
+            impl_progress |= r600_lower_deref_instr(&build,
+                                                    nir_instr_as_intrinsic(instr), shader);
+         }
+      }
+
+      if (impl_progress) {
+         nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance);
+         progress = true;
+      }
+   }
+
+   return progress;
+}
+using r600::r600_nir_lower_int_tg4;
+using r600::r600_nir_lower_pack_unpack_2x16;
+using r600::r600_lower_scratch_addresses;
+using r600::r600_lower_fs_out_to_vector;
+using r600::r600_lower_ubo_to_align16;
+
+int
+r600_glsl_type_size(const struct glsl_type *type, bool is_bindless)
+{
+   return glsl_count_vec4_slots(type, false, is_bindless);
+}
+
+void
+r600_get_natural_size_align_bytes(const struct glsl_type *type,
+                                  unsigned *size, unsigned *align)
+{
+   if (type->base_type != GLSL_TYPE_ARRAY) {
+      *align = 1;
+      *size = 1;
+   } else {
+      unsigned elem_size, elem_align;
+      glsl_get_natural_size_align_bytes(type->fields.array,
+                                        &elem_size, &elem_align);
+      *align = 1;
+      *size = type->length;
+   }
+}
+
+static bool
+r600_lower_shared_io_impl(nir_function *func)
+{
+   nir_builder b;
+   nir_builder_init(&b, func->impl);
+
+   bool progress = false;
+   nir_foreach_block(block, func->impl) {
+      nir_foreach_instr_safe(instr, block) {
+
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
+         if (op->intrinsic != nir_intrinsic_load_shared &&
+             op->intrinsic != nir_intrinsic_store_shared)
+            continue;
+
+         b.cursor = nir_before_instr(instr);
+
+         if (op->intrinsic == nir_intrinsic_load_shared) {
+            nir_ssa_def *addr = op->src[0].ssa;
+
+            switch (nir_dest_num_components(op->dest)) {
+            case 2: {
+               auto addr2 = nir_iadd_imm(&b, addr, 4);
+               addr = nir_vec2(&b, addr, addr2);
+               break;
+            }
+            case 3: {
+               auto addr2 = nir_iadd(&b, addr, nir_imm_ivec2(&b, 4, 8));
+               addr = nir_vec3(&b, addr,
+                               nir_channel(&b, addr2, 0),
+                               nir_channel(&b, addr2, 1));
+               break;
+            }
+            case 4: {
+               addr = nir_iadd(&b, addr, nir_imm_ivec4(&b, 0, 4, 8, 12));
+               break;
+            }
+            }
+
+            auto load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_local_shared_r600);
+            load->num_components = nir_dest_num_components(op->dest);
+            load->src[0] = nir_src_for_ssa(addr);
+            nir_ssa_dest_init(&load->instr, &load->dest,
+                              load->num_components, 32, NULL);
+            nir_ssa_def_rewrite_uses(&op->dest.ssa, &load->dest.ssa);
+            nir_builder_instr_insert(&b, &load->instr);
+         } else {
+            nir_ssa_def *addr = op->src[1].ssa;
+            for (int i = 0; i < 2; ++i) {
+               unsigned test_mask = (0x3 << 2 * i);
+               if (!(nir_intrinsic_write_mask(op) & test_mask))
+                  continue;
+
+               auto store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_local_shared_r600);
+               unsigned writemask = nir_intrinsic_write_mask(op) & test_mask;
+               nir_intrinsic_set_write_mask(store, writemask);
+               store->src[0] = nir_src_for_ssa(op->src[0].ssa);
+               store->num_components = store->src[0].ssa->num_components;
+               bool start_even = (writemask & (1u << (2 * i)));
+
+               auto addr2 = nir_iadd(&b, addr, nir_imm_int(&b, 8 * i + (start_even ? 0 : 4)));
+               store->src[1] = nir_src_for_ssa(addr2);
+
+               nir_builder_instr_insert(&b, &store->instr);
+            }
+         }
+         nir_instr_remove(instr);
+         progress = true;
+      }
+   }
+   return progress;
+}
+
+static bool
+r600_lower_shared_io(nir_shader *nir)
+{
+	bool progress=false;
+	nir_foreach_function(function, nir) {
+		if (function->impl &&
+			 r600_lower_shared_io_impl(function))
+			progress = true;
+	}
+	return progress;
+}
+
+
+static nir_ssa_def *
+r600_lower_fs_pos_input_impl(nir_builder *b, nir_instr *instr, void *_options)
+{
+   auto old_ir = nir_instr_as_intrinsic(instr);
+   auto load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
+   nir_ssa_dest_init(&load->instr, &load->dest,
+                     old_ir->dest.ssa.num_components, old_ir->dest.ssa.bit_size, NULL);
+   nir_intrinsic_set_io_semantics(load, nir_intrinsic_io_semantics(old_ir));
+
+   nir_intrinsic_set_base(load, nir_intrinsic_base(old_ir));
+   nir_intrinsic_set_component(load, nir_intrinsic_component(old_ir));
+   nir_intrinsic_set_dest_type(load, nir_type_float32);
+   load->num_components = old_ir->num_components;
+   load->src[0] = old_ir->src[1];
+   nir_builder_instr_insert(b, &load->instr);
+   return &load->dest.ssa;
+}
+
+bool r600_lower_fs_pos_input_filter(const nir_instr *instr, const void *_options)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   auto ir = nir_instr_as_intrinsic(instr);
+   if (ir->intrinsic != nir_intrinsic_load_interpolated_input)
+      return false;
+
+   return nir_intrinsic_io_semantics(ir).location == VARYING_SLOT_POS;
+}
+
+/* Strip the interpolator specification, it is not needed and irritates */
+bool r600_lower_fs_pos_input(nir_shader *shader)
+{
+   return nir_shader_lower_instructions(shader,
+                                        r600_lower_fs_pos_input_filter,
+                                        r600_lower_fs_pos_input_impl,
+                                        nullptr);
+};
+
+static bool
+optimize_once(nir_shader *shader, bool vectorize)
+{
+   bool progress = false;
+   NIR_PASS(progress, shader, nir_lower_vars_to_ssa);
+   NIR_PASS(progress, shader, nir_copy_prop);
+   NIR_PASS(progress, shader, nir_opt_dce);
+   NIR_PASS(progress, shader, nir_opt_algebraic);
+   NIR_PASS(progress, shader, nir_opt_constant_folding);
+   NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
+   if (vectorize)
+      NIR_PASS(progress, shader, nir_opt_vectorize, NULL, NULL);
+
+   NIR_PASS(progress, shader, nir_opt_remove_phis);
+
+   if (nir_opt_trivial_continues(shader)) {
+           progress = true;
+           NIR_PASS(progress, shader, nir_copy_prop);
+           NIR_PASS(progress, shader, nir_opt_dce);
+   }
+
+   NIR_PASS(progress, shader, nir_opt_if, false);
+   NIR_PASS(progress, shader, nir_opt_dead_cf);
+   NIR_PASS(progress, shader, nir_opt_cse);
+   NIR_PASS(progress, shader, nir_opt_peephole_select, 200, true, true);
+
+   NIR_PASS(progress, shader, nir_opt_conditional_discard);
+   NIR_PASS(progress, shader, nir_opt_dce);
+   NIR_PASS(progress, shader, nir_opt_undef);
+   return progress;
+}
+
+bool has_saturate(const nir_function *func)
+{
+   nir_foreach_block(block, func->impl) {
+      nir_foreach_instr(instr, block) {
+         if (instr->type == nir_instr_type_alu) {
+            auto alu = nir_instr_as_alu(instr);
+            if (alu->dest.saturate)
+               return true;
+         }
+      }
+   }
+   return false;
+}
+
+bool r600_lower_to_scalar_instr_filter(const nir_instr *instr, const void *)
+{
+   if (instr->type != nir_instr_type_alu)
+      return true;
+
+   auto alu = nir_instr_as_alu(instr);
+   switch (alu->op) {
+   case nir_op_bany_fnequal3:
+   case nir_op_bany_fnequal4:
+   case nir_op_ball_fequal3:
+   case nir_op_ball_fequal4:
+   case nir_op_bany_inequal3:
+   case nir_op_bany_inequal4:
+   case nir_op_ball_iequal3:
+   case nir_op_ball_iequal4:
+   case nir_op_fdot2:
+   case nir_op_fdot3:
+   case nir_op_fdot4:
+   case nir_op_cube_r600:
+      return false;
+   case nir_op_bany_fnequal2:
+   case nir_op_ball_fequal2:
+   case nir_op_bany_inequal2:
+   case nir_op_ball_iequal2:
+      return nir_src_bit_size(alu->src[0].src) != 64;
+   default:
+      return true;
+   }
+}
+
+int r600_shader_from_nir(struct r600_context *rctx,
+                         struct r600_pipe_shader *pipeshader,
+                         r600_shader_key *key)
+{
+   char filename[4000];
+   struct r600_pipe_shader_selector *sel = pipeshader->selector;
+
+   bool lower_64bit = ((sel->nir->options->lower_int64_options ||
+                        sel->nir->options->lower_doubles_options) &&
+                       (sel->nir->info.bit_sizes_float | sel->nir->info.bit_sizes_int) & 64);
+
+   r600::ShaderFromNir convert;
+
+   if (rctx->screen->b.debug_flags & DBG_PREOPT_IR) {
+      fprintf(stderr, "PRE-OPT-NIR-----------.------------------------------\n");
+      nir_print_shader(sel->nir, stderr);
+      fprintf(stderr, "END PRE-OPT-NIR--------------------------------------\n\n");
+   }
+
+   r600::sort_uniforms(sel->nir);
+
+   NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
+   NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
+   nir_lower_idiv_options idiv_options = {
+      .imprecise_32bit_lowering = sel->nir->info.stage != MESA_SHADER_COMPUTE,
+      .allow_fp16 = true,
+   };
+   NIR_PASS_V(sel->nir, nir_lower_idiv, &idiv_options);
+   NIR_PASS_V(sel->nir, r600_lower_alu);
+   NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
+
+   if (lower_64bit)
+      NIR_PASS_V(sel->nir, nir_lower_int64);
+   while(optimize_once(sel->nir, false));
+
+   NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
+
+   NIR_PASS_V(sel->nir, r600_lower_shared_io);
+   NIR_PASS_V(sel->nir, r600_nir_lower_atomics);
+
+   static const struct nir_lower_tex_options lower_tex_options = {
+      .lower_txp = ~0u,
+   };
+   NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
+   NIR_PASS_V(sel->nir, r600::r600_nir_lower_txl_txf_array_or_cube);
+   NIR_PASS_V(sel->nir, r600::r600_nir_lower_cube_to_2darray);
+
+   NIR_PASS_V(sel->nir, r600_nir_lower_pack_unpack_2x16);
+
+   if (sel->nir->info.stage == MESA_SHADER_VERTEX)
+      NIR_PASS_V(sel->nir, r600_vectorize_vs_inputs);
+
+   if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
+      NIR_PASS_V(sel->nir, r600_lower_fs_out_to_vector);
+   }
+
+   nir_variable_mode io_modes = nir_var_uniform | nir_var_shader_in;
+
+   //if (sel->nir->info.stage != MESA_SHADER_FRAGMENT)
+      io_modes |= nir_var_shader_out;
+
+   if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
+
+      /* Lower IO to temporaries late, because otherwise we get into trouble
+       * with the glsl 4.40 interpolateAt swizzle tests. There seems to be a bug
+       * somewhere that results in the input alweas reading from the same temp
+       * regardless of interpolation when the lowering is done early */
+      NIR_PASS_V(sel->nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(sel->nir),
+              true, true);
+
+      /* Since we're doing nir_lower_io_to_temporaries late, we need
+       * to lower all the copy_deref's introduced by
+       * lower_io_to_temporaries before calling nir_lower_io.
+       */
+      NIR_PASS_V(sel->nir, nir_split_var_copies);
+      NIR_PASS_V(sel->nir, nir_lower_var_copies);
+      NIR_PASS_V(sel->nir, nir_lower_global_vars_to_local);
+   }
+
+   NIR_PASS_V(sel->nir, nir_lower_io, io_modes, r600_glsl_type_size,
+                 nir_lower_io_lower_64bit_to_32);
+
+   if (sel->nir->info.stage == MESA_SHADER_FRAGMENT)
+      NIR_PASS_V(sel->nir, r600_lower_fs_pos_input);
+
+   /**/
+   if (lower_64bit)
+      NIR_PASS_V(sel->nir, nir_lower_indirect_derefs, nir_var_function_temp, 10);
+
+   NIR_PASS_V(sel->nir, nir_opt_constant_folding);
+   NIR_PASS_V(sel->nir, nir_io_add_const_offset_to_base, io_modes);
+
+   NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
+   NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
+   if (lower_64bit)
+      NIR_PASS_V(sel->nir, r600::r600_nir_split_64bit_io);
+   NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
+   NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
+   NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL);
+   NIR_PASS_V(sel->nir, nir_copy_prop);
+   NIR_PASS_V(sel->nir, nir_opt_dce);
+
+   auto sh = nir_shader_clone(sel->nir, sel->nir);
+
+   if (sh->info.stage == MESA_SHADER_TESS_CTRL ||
+       sh->info.stage == MESA_SHADER_TESS_EVAL ||
+       (sh->info.stage == MESA_SHADER_VERTEX && key->vs.as_ls)) {
+      auto prim_type = sh->info.stage == MESA_SHADER_TESS_EVAL ?
+                          sh->info.tess.primitive_mode: key->tcs.prim_mode;
+      NIR_PASS_V(sh, r600_lower_tess_io, static_cast<pipe_prim_type>(prim_type));
+   }
+
+   if (sh->info.stage == MESA_SHADER_TESS_CTRL)
+      NIR_PASS_V(sh, r600_append_tcs_TF_emission,
+                 (pipe_prim_type)key->tcs.prim_mode);
+
+   if (sh->info.stage == MESA_SHADER_TESS_EVAL)
+      NIR_PASS_V(sh, r600_lower_tess_coord,
+                 static_cast<pipe_prim_type>(sh->info.tess.primitive_mode));
+
+   NIR_PASS_V(sh, nir_lower_ubo_vec4);
+   if (lower_64bit)
+      NIR_PASS_V(sh, r600::r600_nir_64_to_vec2);
+
+   /* Lower to scalar to let some optimization work out better */
+   while(optimize_once(sh, false));
+
+   NIR_PASS_V(sh, r600::r600_merge_vec2_stores);
+
+   NIR_PASS_V(sh, nir_remove_dead_variables, nir_var_shader_in, NULL);
+   NIR_PASS_V(sh, nir_remove_dead_variables,  nir_var_shader_out, NULL);
+
+
+   NIR_PASS_V(sh, nir_lower_vars_to_scratch,
+              nir_var_function_temp,
+              40,
+              r600_get_natural_size_align_bytes);
+
+   while (optimize_once(sh, true));
+
+   NIR_PASS_V(sh, nir_lower_bool_to_int32);
+   NIR_PASS_V(sh, r600_nir_lower_int_tg4);
+   NIR_PASS_V(sh, nir_opt_algebraic_late);
+
+   if (sh->info.stage == MESA_SHADER_FRAGMENT)
+      r600::sort_fsoutput(sh);
+
+   NIR_PASS_V(sh, nir_lower_locals_to_regs);
+
+   //NIR_PASS_V(sh, nir_opt_algebraic);
+   //NIR_PASS_V(sh, nir_copy_prop);
+   NIR_PASS_V(sh, nir_lower_to_source_mods,
+	      (nir_lower_to_source_mods_flags)(nir_lower_float_source_mods |
+					       nir_lower_64bit_source_mods));
+   NIR_PASS_V(sh, nir_convert_from_ssa, true);
+   NIR_PASS_V(sh, nir_opt_dce);
+
+   if ((rctx->screen->b.debug_flags & DBG_NIR_PREFERRED) &&
+       (rctx->screen->b.debug_flags & DBG_ALL_SHADERS)) {
+      fprintf(stderr, "-- NIR --------------------------------------------------------\n");
+      struct nir_function *func = (struct nir_function *)exec_list_get_head(&sh->functions);
+      nir_index_ssa_defs(func->impl);
+      nir_print_shader(sh, stderr);
+      fprintf(stderr, "-- END --------------------------------------------------------\n");
+   }
+
+   memset(&pipeshader->shader, 0, sizeof(r600_shader));
+   pipeshader->scratch_space_needed = sh->scratch_size;
+
+   if (sh->info.stage == MESA_SHADER_TESS_EVAL ||
+       sh->info.stage == MESA_SHADER_VERTEX ||
+       sh->info.stage == MESA_SHADER_GEOMETRY) {
+      pipeshader->shader.clip_dist_write |= ((1 << sh->info.clip_distance_array_size) - 1);
+      pipeshader->shader.cull_dist_write = ((1 << sh->info.cull_distance_array_size) - 1)
+                                           << sh->info.clip_distance_array_size;
+      pipeshader->shader.cc_dist_mask = (1 <<  (sh->info.cull_distance_array_size +
+                                                sh->info.clip_distance_array_size)) - 1;
+   }
+
+   struct r600_shader* gs_shader = nullptr;
+   if (rctx->gs_shader)
+      gs_shader = &rctx->gs_shader->current->shader;
+   r600_screen *rscreen = rctx->screen;
+
+   bool r = convert.lower(sh, pipeshader, sel, *key, gs_shader, rscreen->b.chip_class);
+   if (!r || rctx->screen->b.debug_flags & DBG_ALL_SHADERS) {
+      static int shnr = 0;
+
+      snprintf(filename, 4000, "nir-%s_%d.inc", sh->info.name, shnr++);
+
+      if (access(filename, F_OK) == -1) {
+         FILE *f = fopen(filename, "w");
+
+         if (f) {
+            fprintf(f, "const char *shader_blob_%s = {\nR\"(", sh->info.name);
+            nir_print_shader(sh, f);
+            fprintf(f, ")\";\n");
+            fclose(f);
+         }
+      }
+      if (!r)
+         return -2;
+   }
+
+   auto shader = convert.shader();
+
+   r600_bytecode_init(&pipeshader->shader.bc, rscreen->b.chip_class, rscreen->b.family,
+                      rscreen->has_compressed_msaa_texturing);
+
+   r600::sfn_log << r600::SfnLog::shader_info
+                 << "pipeshader->shader.processor_type = "
+                 << pipeshader->shader.processor_type << "\n";
+
+   pipeshader->shader.bc.type = pipeshader->shader.processor_type;
+   pipeshader->shader.bc.isa = rctx->isa;
+
+   r600::AssemblyFromShaderLegacy afs(&pipeshader->shader, key);
+   if (!afs.lower(shader.m_ir)) {
+      R600_ERR("%s: Lowering to assembly failed\n", __func__);
+      return -1;
+   }
+
+   if (sh->info.stage == MESA_SHADER_GEOMETRY) {
+      r600::sfn_log << r600::SfnLog::shader_info << "Geometry shader, create copy shader\n";
+      generate_gs_copy_shader(rctx, pipeshader, &sel->so);
+      assert(pipeshader->gs_copy_shader);
+   } else {
+      r600::sfn_log << r600::SfnLog::shader_info << "This is not a Geometry shader\n";
+   }
+   if (pipeshader->shader.bc.ngpr < 6)
+      pipeshader->shader.bc.ngpr = 6;
+
+   return 0;
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.h
new file mode 100644
index 000000000..d13accb3b
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir.h
@@ -0,0 +1,161 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_NIR_H
+#define SFN_NIR_H
+
+#include "nir.h"
+#include "nir_builder.h"
+
+#ifdef __cplusplus
+#include "sfn_shader_base.h"
+#include <vector>
+
+namespace r600 {
+
+class NirLowerInstruction {
+public:
+	NirLowerInstruction();
+
+	bool run(nir_shader *shader);
+
+private:
+	static bool filter_instr(const nir_instr *instr, const void *data);
+        static nir_ssa_def *lower_instr(nir_builder *b, nir_instr *instr,  void *data);
+
+        void set_builder(nir_builder *_b) { b = _b;}
+
+	virtual bool filter(const nir_instr *instr) const = 0;
+	virtual nir_ssa_def *lower(nir_instr *instr) = 0;
+protected:
+	nir_builder *b;
+};
+
+bool r600_nir_lower_pack_unpack_2x16(nir_shader *shader);
+
+bool r600_lower_scratch_addresses(nir_shader *shader);
+
+bool r600_lower_ubo_to_align16(nir_shader *shader);
+
+bool r600_nir_split_64bit_io(nir_shader *sh);
+
+bool r600_nir_64_to_vec2(nir_shader *sh);
+
+bool r600_merge_vec2_stores(nir_shader *shader);
+
+class Shader {
+public:
+   std::vector<InstructionBlock>& m_ir;
+   ValueMap m_temp;
+};
+
+class ShaderFromNir {
+public:
+   ShaderFromNir();
+   ~ShaderFromNir();
+
+   unsigned ninputs() const;
+
+   bool lower(const nir_shader *shader, r600_pipe_shader *sh,
+              r600_pipe_shader_selector *sel, r600_shader_key &key,
+              r600_shader *gs_shader, enum chip_class chip_class);
+
+   bool process_declaration();
+
+   pipe_shader_type processor_type() const;
+
+   bool emit_instruction(nir_instr *instr);
+
+   const std::vector<InstructionBlock> &shader_ir() const;
+
+   Shader shader() const;
+private:
+
+   bool process_block();
+   bool process_cf_node(nir_cf_node *node);
+   bool process_if(nir_if *node);
+   bool process_loop(nir_loop *node);
+   bool process_block(nir_block *node);
+
+   std::unique_ptr<ShaderFromNirProcessor> impl;
+   const nir_shader *sh;
+
+   enum chip_class chip_class;
+   int m_current_if_id;
+   int m_current_loop_id;
+   std::stack<int> m_if_stack;
+   int scratch_size;
+};
+
+class AssemblyFromShader {
+public:
+   virtual ~AssemblyFromShader();
+   bool lower(const std::vector<InstructionBlock> &ir);
+private:
+   virtual bool do_lower(const std::vector<InstructionBlock>& ir)  = 0 ;
+};
+
+}
+
+static inline nir_ssa_def *
+r600_imm_ivec3(nir_builder *build, int x, int y, int z)
+{
+   nir_const_value v[3] = {
+      nir_const_value_for_int(x, 32),
+      nir_const_value_for_int(y, 32),
+      nir_const_value_for_int(z, 32),
+   };
+
+   return nir_build_imm(build, 3, 32, v);
+}
+
+bool r600_lower_tess_io(nir_shader *shader, enum pipe_prim_type prim_type);
+bool r600_append_tcs_TF_emission(nir_shader *shader, enum pipe_prim_type prim_type);
+bool r600_lower_tess_coord(nir_shader *sh, enum pipe_prim_type prim_type);
+
+#else
+#include "gallium/drivers/r600/r600_shader.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bool r600_vectorize_vs_inputs(nir_shader *shader);
+
+
+int r600_shader_from_nir(struct r600_context *rctx,
+                         struct r600_pipe_shader *pipeshader,
+                         union r600_shader_key *key);
+
+bool r600_lower_alu(nir_shader *sh);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif // SFN_NIR_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.cpp
new file mode 100644
index 000000000..4a177d15d
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.cpp
@@ -0,0 +1,462 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_nir_lower_fs_out_to_vector.h"
+
+#include "nir_builder.h"
+#include "nir_deref.h"
+#include "util/u_math.h"
+
+#include <set>
+#include <vector>
+#include <array>
+#include <algorithm>
+
+namespace r600 {
+
+using std::multiset;
+using std::vector;
+using std::array;
+
+struct nir_intrinsic_instr_less  {
+   bool operator () (const nir_intrinsic_instr *lhs, const nir_intrinsic_instr *rhs) const
+   {
+      nir_variable *vlhs = nir_deref_instr_get_variable(nir_src_as_deref(lhs->src[0]));
+      nir_variable *vrhs = nir_deref_instr_get_variable(nir_src_as_deref(rhs->src[0]));
+
+      auto ltype = glsl_get_base_type(vlhs->type);
+      auto rtype = glsl_get_base_type(vrhs->type);
+
+      if (ltype != rtype)
+         return ltype < rtype;
+      return vlhs->data.location < vrhs->data.location;
+   }
+};
+
+class NirLowerIOToVector {
+public:
+   NirLowerIOToVector(int base_slot);
+   bool run(nir_function_impl *shader);
+
+protected:
+   bool var_can_merge(const nir_variable *lhs, const nir_variable *rhs);
+   bool var_can_rewrite(nir_variable *var) const;
+   void create_new_io_vars(nir_shader *shader);
+   void create_new_io_var(nir_shader *shader, unsigned location, unsigned comps);
+
+   nir_deref_instr *clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail,
+                                      const nir_deref_instr *src_head);
+
+   bool vectorize_block(nir_builder *b, nir_block *block);
+   bool instr_can_rewrite(nir_instr *instr);
+   bool vec_instr_set_remove(nir_builder *b,nir_instr *instr);
+
+   using InstrSet  = multiset<nir_intrinsic_instr *, nir_intrinsic_instr_less>;
+   using InstrSubSet = std::pair<InstrSet::iterator, InstrSet::iterator>;
+
+   bool vec_instr_stack_pop(nir_builder *b, InstrSubSet& ir_set,
+                            nir_intrinsic_instr *instr);
+
+   array<array<nir_variable *, 4>, 16> m_vars;
+   InstrSet m_block_io;
+   int m_next_index;
+private:
+   virtual nir_variable_mode get_io_mode(nir_shader *shader) const  = 0;
+   virtual bool instr_can_rewrite_type(nir_intrinsic_instr *intr) const  = 0;
+   virtual bool var_can_rewrite_slot(nir_variable *var) const = 0;
+   virtual void create_new_io(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
+                              nir_ssa_def **srcs, unsigned first_comp, unsigned num_comps) = 0;
+
+   int m_base_slot;
+};
+
+class NirLowerFSOutToVector : public NirLowerIOToVector {
+public:
+   NirLowerFSOutToVector();
+
+private:
+   nir_variable_mode get_io_mode(nir_shader *shader) const  override;
+   bool var_can_rewrite_slot(nir_variable *var) const override;
+   void create_new_io(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
+                         nir_ssa_def **srcs, unsigned first_comp, unsigned num_comps) override;
+   bool instr_can_rewrite_type(nir_intrinsic_instr *intr) const  override;
+
+   nir_ssa_def *create_combined_vector(nir_builder *b, nir_ssa_def **srcs,
+                                       int first_comp, int num_comp);
+};
+
+bool r600_lower_fs_out_to_vector(nir_shader *shader)
+{
+   NirLowerFSOutToVector processor;
+
+   assert(shader->info.stage == MESA_SHADER_FRAGMENT);
+   bool progress = false;
+
+   nir_foreach_function(function, shader) {
+      if (function->impl)
+         progress |= processor.run(function->impl);
+   }
+   return progress;
+}
+
+NirLowerIOToVector::NirLowerIOToVector(int base_slot):
+   m_next_index(0),
+   m_base_slot(base_slot)
+{
+   for(auto& a : m_vars)
+      for(auto& aa : a)
+         aa = nullptr;
+}
+
+bool NirLowerIOToVector::run(nir_function_impl *impl)
+{
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_metadata_require(impl, nir_metadata_dominance);
+   create_new_io_vars(impl->function->shader);
+
+   bool progress = vectorize_block(&b, nir_start_block(impl));
+   if (progress) {
+      nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance);
+   }
+   return progress;
+}
+
+void NirLowerIOToVector::create_new_io_vars(nir_shader *shader)
+{
+   nir_variable_mode mode = get_io_mode(shader);
+
+   bool can_rewrite_vars = false;
+   nir_foreach_variable_with_modes(var, shader, mode) {
+      if (var_can_rewrite(var)) {
+         can_rewrite_vars = true;
+         unsigned loc = var->data.location - m_base_slot;
+         m_vars[loc][var->data.location_frac] = var;
+      }
+   }
+
+   if (!can_rewrite_vars)
+      return;
+
+   /* We don't handle combining vars of different type e.g. different array
+    * lengths.
+    */
+   for (unsigned i = 0; i < 16; i++) {
+      unsigned comps = 0;
+
+      for (unsigned j = 0; j < 3; j++) {
+         if (!m_vars[i][j])
+            continue;
+
+         for (unsigned k = j + 1; k < 4; k++) {
+            if (!m_vars[i][k])
+               continue;
+
+            if (!var_can_merge(m_vars[i][j], m_vars[i][k]))
+               continue;
+
+            /* Set comps */
+            for (unsigned n = 0; n < glsl_get_components(m_vars[i][j]->type); ++n)
+               comps |= 1 << (m_vars[i][j]->data.location_frac + n);
+
+            for (unsigned n = 0; n < glsl_get_components(m_vars[i][k]->type); ++n)
+               comps |= 1 << (m_vars[i][k]->data.location_frac + n);
+
+         }
+      }
+      if (comps)
+         create_new_io_var(shader, i, comps);
+   }
+}
+
+bool
+NirLowerIOToVector::var_can_merge(const nir_variable *lhs,
+                                     const nir_variable *rhs)
+{
+   return (glsl_get_base_type(lhs->type) == glsl_get_base_type(rhs->type));
+}
+
+void
+NirLowerIOToVector::create_new_io_var(nir_shader *shader,
+                                    unsigned location, unsigned comps)
+{
+   unsigned num_comps = util_bitcount(comps);
+   assert(num_comps > 1);
+
+   /* Note: u_bit_scan() strips a component of the comps bitfield here */
+   unsigned first_comp = u_bit_scan(&comps);
+
+   nir_variable *var = nir_variable_clone(m_vars[location][first_comp], shader);
+   var->data.location_frac = first_comp;
+   var->type = glsl_replace_vector_type(var->type, num_comps);
+
+   nir_shader_add_variable(shader, var);
+
+   m_vars[location][first_comp] = var;
+
+   while (comps) {
+      const int comp = u_bit_scan(&comps);
+      if (m_vars[location][comp]) {
+         m_vars[location][comp] = var;
+      }
+   }
+}
+
+bool NirLowerIOToVector::var_can_rewrite(nir_variable *var) const
+{
+   /* Skip complex types we don't split in the first place */
+   if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type)))
+      return false;
+
+   if (glsl_get_bit_size(glsl_without_array(var->type)) != 32)
+      return false;
+
+   return var_can_rewrite_slot(var);
+}
+
+bool
+NirLowerIOToVector::vectorize_block(nir_builder *b, nir_block *block)
+{
+   bool progress = false;
+
+   nir_foreach_instr_safe(instr, block) {
+      if (instr_can_rewrite(instr)) {
+         instr->index = m_next_index++;
+         nir_intrinsic_instr *ir = nir_instr_as_intrinsic(instr);
+         m_block_io.insert(ir);
+      }
+   }
+
+   for (unsigned i = 0; i < block->num_dom_children; i++) {
+      nir_block *child = block->dom_children[i];
+      progress |= vectorize_block(b, child);
+   }
+
+   nir_foreach_instr_reverse_safe(instr, block) {
+      progress |= vec_instr_set_remove(b, instr);
+   }
+   m_block_io.clear();
+
+   return progress;
+}
+
+bool NirLowerIOToVector::instr_can_rewrite(nir_instr *instr)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+   if (intr->num_components > 3)
+      return false;
+
+   return instr_can_rewrite_type(intr);
+}
+
+bool NirLowerIOToVector::vec_instr_set_remove(nir_builder *b,nir_instr *instr)
+{
+   if (!instr_can_rewrite(instr))
+      return false;
+
+   nir_intrinsic_instr *ir = nir_instr_as_intrinsic(instr);
+   auto entry = m_block_io.equal_range(ir);
+   if (entry.first != m_block_io.end()) {
+      vec_instr_stack_pop(b, entry, ir);
+   }
+   return true;
+}
+
+nir_deref_instr *
+NirLowerIOToVector::clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail,
+                                    const nir_deref_instr *src_head)
+{
+   const nir_deref_instr *parent = nir_deref_instr_parent(src_head);
+
+   if (!parent)
+      return dst_tail;
+
+   assert(src_head->deref_type == nir_deref_type_array);
+
+   dst_tail = clone_deref_array(b, dst_tail, parent);
+
+   return nir_build_deref_array(b, dst_tail,
+                                nir_ssa_for_src(b, src_head->arr.index, 1));
+}
+
+NirLowerFSOutToVector::NirLowerFSOutToVector():
+  NirLowerIOToVector(FRAG_RESULT_COLOR)
+{
+
+}
+
+bool NirLowerFSOutToVector::var_can_rewrite_slot(nir_variable *var) const
+{
+   return ((var->data.mode == nir_var_shader_out) &&
+           ((var->data.location == FRAG_RESULT_COLOR) ||
+              ((var->data.location >= FRAG_RESULT_DATA0) &&
+               (var->data.location <= FRAG_RESULT_DATA7))));
+}
+
+bool NirLowerIOToVector::vec_instr_stack_pop(nir_builder *b, InstrSubSet &ir_set,
+                                           nir_intrinsic_instr *instr)
+{
+   vector< nir_intrinsic_instr *> ir_sorted_set(ir_set.first, ir_set.second);
+   std::sort(ir_sorted_set.begin(), ir_sorted_set.end(),
+             [](const nir_intrinsic_instr *lhs, const nir_intrinsic_instr *rhs) {
+                  return lhs->instr.index > rhs->instr.index;
+             }
+   );
+
+   nir_intrinsic_instr *intr = *ir_sorted_set.begin();
+   nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
+
+   unsigned loc = var->data.location - m_base_slot;
+
+   nir_variable *new_var = m_vars[loc][var->data.location_frac];
+   unsigned num_comps = glsl_get_vector_elements(glsl_without_array(new_var->type));
+   unsigned old_num_comps = glsl_get_vector_elements(glsl_without_array(var->type));
+
+   /* Don't bother walking the stack if this component can't be vectorised. */
+   if (old_num_comps > 3) {
+      return false;
+   }
+
+   if (new_var == var) {
+      return false;
+   }
+
+   b->cursor = nir_after_instr(&intr->instr);
+   nir_ssa_undef_instr *instr_undef =
+      nir_ssa_undef_instr_create(b->shader, 1, 32);
+   nir_builder_instr_insert(b, &instr_undef->instr);
+
+   nir_ssa_def *srcs[4];
+   for (int i = 0; i < 4; i++) {
+      srcs[i] = &instr_undef->def;
+   }
+   srcs[var->data.location_frac] = intr->src[1].ssa;
+
+   for (auto k = ir_sorted_set.begin() + 1; k != ir_sorted_set.end(); ++k) {
+      nir_intrinsic_instr *intr2 = *k;
+      nir_variable *var2 =
+         nir_deref_instr_get_variable(nir_src_as_deref(intr2->src[0]));
+      unsigned loc2 = var->data.location - m_base_slot;
+
+      if (m_vars[loc][var->data.location_frac] !=
+          m_vars[loc2][var2->data.location_frac]) {
+         continue;
+      }
+
+     assert(glsl_get_vector_elements(glsl_without_array(var2->type)) < 4);
+
+      if (srcs[var2->data.location_frac] == &instr_undef->def) {
+         assert(intr2->src[1].is_ssa);
+         assert(intr2->src[1].ssa);
+         srcs[var2->data.location_frac] = intr2->src[1].ssa;
+      }
+      nir_instr_remove(&intr2->instr);
+   }
+
+   create_new_io(b, intr, new_var, srcs, new_var->data.location_frac,
+                 num_comps);
+   return true;
+}
+
+nir_variable_mode NirLowerFSOutToVector::get_io_mode(nir_shader *shader) const
+{
+   return nir_var_shader_out;
+}
+
+void
+NirLowerFSOutToVector::create_new_io(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
+                                        nir_ssa_def **srcs, unsigned first_comp, unsigned num_comps)
+{
+   b->cursor = nir_before_instr(&intr->instr);
+
+   nir_intrinsic_instr *new_intr =
+      nir_intrinsic_instr_create(b->shader, intr->intrinsic);
+   new_intr->num_components = num_comps;
+
+   nir_intrinsic_set_write_mask(new_intr, (1 << num_comps) - 1);
+
+   nir_deref_instr *deref = nir_build_deref_var(b, var);
+   deref = clone_deref_array(b, deref, nir_src_as_deref(intr->src[0]));
+
+   new_intr->src[0] = nir_src_for_ssa(&deref->dest.ssa);
+   new_intr->src[1] = nir_src_for_ssa(create_combined_vector(b, srcs, first_comp, num_comps));
+
+   nir_builder_instr_insert(b, &new_intr->instr);
+
+   /* Remove the old store intrinsic */
+   nir_instr_remove(&intr->instr);
+}
+
+bool NirLowerFSOutToVector::instr_can_rewrite_type(nir_intrinsic_instr *intr) const
+{
+   if (intr->intrinsic != nir_intrinsic_store_deref)
+      return false;
+
+   nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+   if (!nir_deref_mode_is(deref, nir_var_shader_out))
+      return false;
+
+   return var_can_rewrite(nir_deref_instr_get_variable(deref));
+}
+
+nir_ssa_def *NirLowerFSOutToVector::create_combined_vector(nir_builder *b, nir_ssa_def **srcs,
+                                                           int first_comp, int num_comp)
+{
+   nir_op op;
+   switch (num_comp) {
+   case 2: op = nir_op_vec2; break;
+   case 3: op = nir_op_vec3; break;
+   case 4: op = nir_op_vec4; break;
+   default:
+      unreachable("combined vector must have 2 to 4 components");
+   }
+   nir_alu_instr * instr = nir_alu_instr_create(b->shader, op);
+   instr->exact = b->exact;
+
+   int i = 0;
+   unsigned k = 0;
+   while (i < num_comp) {
+      nir_ssa_def *s = srcs[first_comp + k];
+      for(uint8_t kk = 0; kk < s->num_components && i < num_comp; ++kk) {
+         instr->src[i].src  = nir_src_for_ssa(s);
+         instr->src[i].swizzle[0] = kk;
+         ++i;
+      }
+      k += s->num_components;
+   }
+
+   nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_comp, 32, NULL);
+   instr->dest.write_mask = (1 << num_comp) - 1;
+   nir_builder_instr_insert(b, &instr->instr);
+   return &instr->dest.dest.ssa;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.h
new file mode 100644
index 000000000..016b7a222
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_fs_out_to_vector.h
@@ -0,0 +1,38 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_NIR_LOWER_FS_OUT_TO_VECTOR_H
+#define SFN_NIR_LOWER_FS_OUT_TO_VECTOR_H
+
+#include "nir.h"
+
+namespace r600 {
+
+bool r600_lower_fs_out_to_vector(nir_shader *sh);
+
+}
+
+#endif // SFN_NIR_LOWER_FS_OUT_TO_VECTOR_H
+\ No newline at end of file
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp
new file mode 100644
index 000000000..a830d0753
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_lower_tess_io.cpp
@@ -0,0 +1,575 @@
+#include "sfn_nir.h"
+
+bool r600_lower_tess_io_filter(const nir_instr *instr, gl_shader_stage stage)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
+   switch (op->intrinsic) {
+   case nir_intrinsic_load_input:
+      return stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL;
+   case nir_intrinsic_load_output:
+   case nir_intrinsic_load_per_vertex_input:
+   case nir_intrinsic_load_per_vertex_output:
+   case nir_intrinsic_store_per_vertex_output:
+   case nir_intrinsic_load_patch_vertices_in:
+   case nir_intrinsic_load_tess_level_outer:
+   case nir_intrinsic_load_tess_level_inner:
+      return true;
+   case nir_intrinsic_store_output:
+      return stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_VERTEX;
+   default:
+      ;
+   }
+   return false;
+}
+
+static nir_ssa_def *
+emit_load_param_base(nir_builder *b, nir_intrinsic_op op)
+{
+   nir_intrinsic_instr *result = nir_intrinsic_instr_create(b->shader, op);
+	nir_ssa_dest_init(&result->instr, &result->dest,
+                     4, 32, NULL);
+   nir_builder_instr_insert(b, &result->instr);
+   return &result->dest.ssa;
+}
+
+static int get_tcs_varying_offset(nir_intrinsic_instr *op)
+{
+   unsigned location = nir_intrinsic_io_semantics(op).location;
+
+   switch (location) {
+   case VARYING_SLOT_POS:
+      return 0;
+   case VARYING_SLOT_PSIZ:
+      return 0x10;
+   case VARYING_SLOT_CLIP_DIST0:
+      return 0x20;
+   case VARYING_SLOT_CLIP_DIST1:
+      return 0x30;
+   case VARYING_SLOT_TESS_LEVEL_OUTER:
+      return 0;
+   case VARYING_SLOT_TESS_LEVEL_INNER:
+      return 0x10;
+   default:
+      if (location >= VARYING_SLOT_VAR0 &&
+          location <= VARYING_SLOT_VAR31)
+         return 0x10 * (location - VARYING_SLOT_VAR0) + 0x40;
+
+      if (location >=  VARYING_SLOT_PATCH0) {
+         return 0x10 * (location - VARYING_SLOT_PATCH0) + 0x20;
+      }
+   }
+   return 0;
+}
+
+static inline nir_ssa_def *
+r600_umad_24(nir_builder *b, nir_ssa_def *op1, nir_ssa_def *op2, nir_ssa_def *op3)
+{
+   return nir_build_alu(b, nir_op_umad24, op1, op2, op3, NULL);
+}
+
+static inline nir_ssa_def *
+r600_tcs_base_address(nir_builder *b, nir_ssa_def *param_base, nir_ssa_def *rel_patch_id)
+{
+   return r600_umad_24(b,  nir_channel(b, param_base, 0),
+                       rel_patch_id,
+                       nir_channel(b, param_base, 3));
+}
+
+
+static nir_ssa_def *
+emil_lsd_in_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op)
+{
+   nir_ssa_def *addr = nir_build_alu(b, nir_op_umul24,
+                                      nir_channel(b, base, 0),
+                                      patch_id, NULL, NULL);
+
+   auto idx1 = nir_src_as_const_value(op->src[0]);
+   if (!idx1 || idx1->u32 != 0)
+      addr = r600_umad_24(b, nir_channel(b, base, 1),
+                          op->src[0].ssa, addr);
+
+   auto offset = nir_imm_int(b, get_tcs_varying_offset(op));
+
+   auto idx2 = nir_src_as_const_value(op->src[1]);
+   if (!idx2 || idx2->u32 != 0)
+      offset = nir_iadd(b, offset, nir_ishl(b, op->src[1].ssa, nir_imm_int(b, 4)));
+
+   return nir_iadd(b, addr, offset);
+}
+
+static nir_ssa_def *
+emil_lsd_out_addr(nir_builder *b, nir_ssa_def *base, nir_ssa_def *patch_id, nir_intrinsic_instr *op, nir_variable_mode mode, int src_offset)
+{
+
+   nir_ssa_def *addr1 = r600_umad_24(b, nir_channel(b, base, 0),
+                                     patch_id,
+                                     nir_channel(b, base, 2));
+   nir_ssa_def *addr2 = r600_umad_24(b, nir_channel(b, base, 1),
+                                     op->src[src_offset].ssa, addr1);
+   int offset = get_tcs_varying_offset(op);
+   return nir_iadd(b, nir_iadd(b, addr2,
+                               nir_ishl(b, op->src[src_offset + 1].ssa, nir_imm_int(b,4))),
+                               nir_imm_int(b, offset));
+}
+
+static nir_ssa_def *load_offset_group(nir_builder *b, int ncomponents)
+{
+   switch (ncomponents) {
+   /* tess outer offsets */
+   case 1: return nir_imm_int(b, 0);
+   case 2: return nir_imm_ivec2(b, 0, 4);
+   case 3: return r600_imm_ivec3(b, 0, 4, 8);
+   case 4: return nir_imm_ivec4(b, 0, 4, 8, 12);
+      /* tess inner offsets */
+   case 5: return nir_imm_int(b, 16);
+   case 6: return nir_imm_ivec2(b, 16, 20);
+   default:
+      debug_printf("Got %d components\n", ncomponents);
+      unreachable("Unsupported component count");
+   }
+}
+
+static nir_ssa_def *load_offset_group_from_mask(nir_builder *b, uint32_t mask)
+{
+   auto full_mask = nir_imm_ivec4(b, 0, 4, 8, 12);
+   return nir_channels(b, full_mask, mask);
+}
+
+struct MaskQuery {
+   uint32_t mask;
+   uint32_t ssa_index;
+   nir_alu_instr *alu;
+   int index;
+   uint32_t full_mask;
+};
+
+static bool update_alu_mask(nir_src *src, void *data)
+{
+   auto mq = reinterpret_cast<MaskQuery *>(data);
+
+   if (mq->ssa_index == src->ssa->index) {
+      mq->mask |= nir_alu_instr_src_read_mask(mq->alu, mq->index);
+   }
+   ++mq->index;
+
+   return mq->mask != mq->full_mask;
+}
+
+static uint32_t get_dest_usee_mask(nir_intrinsic_instr *op)
+{
+   assert(op->dest.is_ssa);
+
+   MaskQuery mq = {0};
+   mq.full_mask = (1 << nir_dest_num_components(op->dest)) - 1;
+
+   nir_foreach_use(use_src,  &op->dest.ssa) {
+      auto use_instr = use_src->parent_instr;
+      mq.ssa_index = use_src->ssa->index;
+
+      switch (use_instr->type) {
+      case nir_instr_type_alu: {
+         mq.alu = nir_instr_as_alu(use_instr);
+         mq.index = 0;
+         if (!nir_foreach_src(use_instr, update_alu_mask, &mq))
+            return 0xf;
+         break;
+      }
+      case nir_instr_type_intrinsic:  {
+         auto intr = nir_instr_as_intrinsic(use_instr);
+         switch (intr->intrinsic) {
+         case nir_intrinsic_store_output:
+         case nir_intrinsic_store_per_vertex_output:
+            mq.mask |= nir_intrinsic_write_mask(intr) << nir_intrinsic_component(intr);
+            break;
+         case nir_intrinsic_store_scratch:
+         case nir_intrinsic_store_local_shared_r600:
+            mq.mask |= nir_intrinsic_write_mask(intr);
+            break;
+         default:
+            return 0xf;
+         }
+         break;
+      }
+      default:
+         return 0xf;
+      }
+
+   }
+   return mq.mask;
+}
+
+static void replace_load_instr(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr)
+{
+   uint32_t mask = get_dest_usee_mask(op);
+   if (mask) {
+      nir_ssa_def *addr_outer = nir_iadd(b, addr, load_offset_group_from_mask(b, mask));
+      if (nir_intrinsic_component(op))
+         addr_outer = nir_iadd(b, addr_outer, nir_imm_int(b, 4 * nir_intrinsic_component(op)));
+
+      auto new_load = nir_load_local_shared_r600(b, 32, addr_outer);
+
+      auto undef = nir_ssa_undef(b, 1, 32);
+      int comps = nir_dest_num_components(op->dest);
+      nir_ssa_def *remix[4] = {undef, undef, undef, undef};
+
+      int chan = 0;
+      for (int i = 0; i < comps; ++i) {
+         if (mask & (1 << i)) {
+            remix[i] = nir_channel(b, new_load, chan++);
+         }
+      }
+      auto new_load_remixed = nir_vec(b, remix, comps);
+      nir_ssa_def_rewrite_uses(&op->dest.ssa, new_load_remixed);
+   }
+   nir_instr_remove(&op->instr);
+}
+
+static nir_ssa_def *
+r600_load_rel_patch_id(nir_builder *b)
+{
+   auto patch_id = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_tcs_rel_patch_id_r600);
+   nir_ssa_dest_init(&patch_id->instr, &patch_id->dest,
+                     1, 32, NULL);
+   nir_builder_instr_insert(b, &patch_id->instr);
+   return &patch_id->dest.ssa;
+}
+
+static void
+emit_store_lds(nir_builder *b, nir_intrinsic_instr *op, nir_ssa_def *addr)
+{
+   uint32_t orig_writemask = nir_intrinsic_write_mask(op) << nir_intrinsic_component(op);
+
+   for (int i = 0; i < 2; ++i) {
+      unsigned test_mask = (0x3 << 2 * i);
+      if (!(orig_writemask & test_mask))
+         continue;
+
+      uint32_t writemask =  test_mask >> nir_intrinsic_component(op);
+
+      auto store_tcs_out = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_local_shared_r600);
+      nir_intrinsic_set_write_mask(store_tcs_out, writemask);
+      store_tcs_out->src[0] = nir_src_for_ssa(op->src[0].ssa);
+      store_tcs_out->num_components = store_tcs_out->src[0].ssa->num_components;
+      bool start_even = (orig_writemask & (1u << (2 * i)));
+
+      auto addr2 = nir_iadd(b, addr, nir_imm_int(b, 8 * i + (start_even ? 0 : 4)));
+      store_tcs_out->src[1] = nir_src_for_ssa(addr2);
+
+      nir_builder_instr_insert(b, &store_tcs_out->instr);
+   }
+}
+
+static nir_ssa_def *
+emil_tcs_io_offset(nir_builder *b, nir_ssa_def *addr, nir_intrinsic_instr *op, int src_offset)
+{
+   int offset = get_tcs_varying_offset(op);
+   return nir_iadd(b, nir_iadd(b, addr,
+                               nir_ishl(b, op->src[src_offset].ssa, nir_imm_int(b,4))),
+                               nir_imm_int(b, offset));
+}
+
+
+inline unsigned
+outer_tf_components(pipe_prim_type prim_type)
+{
+   switch (prim_type) {
+   case PIPE_PRIM_LINES: return 2;
+   case PIPE_PRIM_TRIANGLES: return 3;
+   case PIPE_PRIM_QUADS: return 4;
+   default:
+      return 0;
+   }
+}
+
+
+
+static bool
+r600_lower_tess_io_impl(nir_builder *b, nir_instr *instr, enum pipe_prim_type prim_type)
+{
+   static nir_ssa_def *load_in_param_base = nullptr;
+   static nir_ssa_def *load_out_param_base = nullptr;
+
+   b->cursor = nir_before_instr(instr);
+   nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
+
+   if (b->shader->info.stage == MESA_SHADER_TESS_CTRL) {
+      load_in_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600);
+      load_out_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
+   } else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
+      load_in_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
+   } else if (b->shader->info.stage == MESA_SHADER_VERTEX) {
+      load_out_param_base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600);
+   }
+
+   auto rel_patch_id = r600_load_rel_patch_id(b);
+
+   unsigned tf_inner_address_offset = 0;
+   unsigned ncomps_correct = 0;
+
+   switch (op->intrinsic) {
+   case nir_intrinsic_load_patch_vertices_in: {
+      nir_ssa_def *vertices_in;
+      if (b->shader->info.stage == MESA_SHADER_TESS_CTRL)
+         vertices_in = nir_channel(b, load_in_param_base, 2);
+      else {
+         auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_in_param_base_r600);
+         vertices_in = nir_channel(b, base, 2);
+      }
+      nir_ssa_def_rewrite_uses(&op->dest.ssa, vertices_in);
+      nir_instr_remove(&op->instr);
+      return true;
+   }
+   case nir_intrinsic_load_per_vertex_input: {
+      nir_ssa_def *addr =
+            b->shader->info.stage == MESA_SHADER_TESS_CTRL ?
+               emil_lsd_in_addr(b, load_in_param_base, rel_patch_id, op) :
+               emil_lsd_out_addr(b, load_in_param_base, rel_patch_id, op, nir_var_shader_in, 0);
+      replace_load_instr(b, op, addr);
+      return true;
+   }
+   case nir_intrinsic_store_per_vertex_output: {
+      nir_ssa_def *addr = emil_lsd_out_addr(b, load_out_param_base, rel_patch_id, op, nir_var_shader_out, 1);
+      emit_store_lds(b, op, addr);
+      nir_instr_remove(instr);
+      return true;
+   }
+   case nir_intrinsic_load_per_vertex_output: {
+      nir_ssa_def *addr = emil_lsd_out_addr(b, load_out_param_base, rel_patch_id, op, nir_var_shader_out, 0);
+      replace_load_instr(b, op, addr);
+      return true;
+   }
+   case nir_intrinsic_store_output: {
+      nir_ssa_def *addr = (b->shader->info.stage == MESA_SHADER_TESS_CTRL) ?
+                             r600_tcs_base_address(b, load_out_param_base, rel_patch_id):
+                             nir_build_alu(b, nir_op_umul24,
+                                           nir_channel(b, load_out_param_base, 1),
+                                           rel_patch_id, NULL, NULL);
+      addr = emil_tcs_io_offset(b, addr, op, 1);
+      emit_store_lds(b, op, addr);
+      nir_instr_remove(instr);
+      return true;
+   }
+   case nir_intrinsic_load_output: {
+      nir_ssa_def *addr = r600_tcs_base_address(b, load_out_param_base, rel_patch_id);
+      addr = emil_tcs_io_offset(b, addr, op, 0);
+      replace_load_instr(b, op, addr);
+      return true;
+   }
+   case nir_intrinsic_load_input: {
+      nir_ssa_def *addr = r600_tcs_base_address(b, load_in_param_base, rel_patch_id);
+      addr = emil_tcs_io_offset(b, addr, op, 0);
+      replace_load_instr(b, op, addr);
+      return true;
+   }
+   case nir_intrinsic_load_tess_level_inner:
+      tf_inner_address_offset = 4;
+      ncomps_correct = 2;
+      FALLTHROUGH;
+   case nir_intrinsic_load_tess_level_outer: {
+      auto ncomps = outer_tf_components(prim_type);
+      if (!ncomps)
+         return false;
+      ncomps -= ncomps_correct;
+      auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
+      auto rel_patch_id = r600_load_rel_patch_id(b);
+      nir_ssa_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
+      nir_ssa_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, tf_inner_address_offset + ncomps));
+
+      auto tf = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
+      tf->num_components = ncomps;
+      tf->src[0] = nir_src_for_ssa(addr_outer);
+      nir_ssa_dest_init(&tf->instr, &tf->dest,
+                        tf->num_components, 32, NULL);
+      nir_builder_instr_insert(b, &tf->instr);
+
+      nir_ssa_def_rewrite_uses(&op->dest.ssa, &tf->dest.ssa);
+      nir_instr_remove(instr);
+      return true;
+   }
+   default:
+      ;
+   }
+
+   return false;
+}
+
+bool r600_lower_tess_io(nir_shader *shader, enum pipe_prim_type prim_type)
+{
+   bool progress = false;
+   nir_foreach_function(function, shader) {
+      if (function->impl) {
+         nir_builder b;
+         nir_builder_init(&b, function->impl);
+
+         nir_foreach_block(block, function->impl) {
+            nir_foreach_instr_safe(instr, block) {
+               if (instr->type != nir_instr_type_intrinsic)
+                  continue;
+
+               if (r600_lower_tess_io_filter(instr, shader->info.stage))
+                  progress |= r600_lower_tess_io_impl(&b, instr, prim_type);
+            }
+         }
+      }
+   }
+   return progress;
+}
+
+bool r600_emit_tf(nir_builder *b, nir_ssa_def *val)
+{
+   nir_intrinsic_instr *store_tf = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_tf_r600);
+   store_tf->num_components = val->num_components;
+   store_tf->src[0] = nir_src_for_ssa(val);
+   nir_builder_instr_insert(b, &store_tf->instr);
+   return true;
+}
+
+bool r600_append_tcs_TF_emission(nir_shader *shader, enum pipe_prim_type prim_type) {
+   if (shader->info.stage != MESA_SHADER_TESS_CTRL)
+      return false;
+
+   nir_foreach_function(function, shader) {
+      nir_foreach_block(block, function->impl) {
+         nir_foreach_instr_safe(instr, block) {
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
+            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+            if (intr->intrinsic == nir_intrinsic_store_tf_r600) {
+               return false;
+            }
+         }
+      }
+   }
+   nir_builder builder;
+   nir_builder *b = &builder;
+
+   assert(exec_list_length(&shader->functions) == 1);
+   nir_function *f = (nir_function *)shader->functions.get_head();
+   nir_builder_init(b, f->impl);
+
+   auto outer_comps = outer_tf_components(prim_type);
+   if (!outer_comps)
+      return false;
+
+   unsigned inner_comps = outer_comps - 2;
+   unsigned stride = (inner_comps + outer_comps) * 4;
+
+   b->cursor = nir_after_cf_list(&f->impl->body);
+
+   auto invocation_id = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_invocation_id);
+	nir_ssa_dest_init(&invocation_id->instr, &invocation_id->dest,
+                     1, 32, NULL);
+   nir_builder_instr_insert(b, &invocation_id->instr);
+
+   nir_push_if(b, nir_ieq_imm(b, &invocation_id->dest.ssa, 0));
+   auto base = emit_load_param_base(b, nir_intrinsic_load_tcs_out_param_base_r600);
+   auto rel_patch_id = r600_load_rel_patch_id(b);
+
+   nir_ssa_def *addr0 = r600_tcs_base_address(b, base, rel_patch_id);
+
+   nir_ssa_def *addr_outer = nir_iadd(b, addr0, load_offset_group(b, outer_comps));
+   auto tf_outer = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
+   tf_outer->num_components = outer_comps;
+   tf_outer->src[0] = nir_src_for_ssa(addr_outer);
+   nir_ssa_dest_init(&tf_outer->instr, &tf_outer->dest,
+                     tf_outer->num_components, 32, NULL);
+   nir_builder_instr_insert(b, &tf_outer->instr);
+
+   std::vector<nir_ssa_def *> tf_out;
+
+
+   auto tf_out_base = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_tcs_tess_factor_base_r600);
+	nir_ssa_dest_init(&tf_out_base->instr, &tf_out_base->dest,
+                     1, 32, NULL);
+   nir_builder_instr_insert(b, &tf_out_base->instr);
+
+   auto out_addr0 = nir_build_alu(b, nir_op_umad24,
+                                  rel_patch_id,
+                                  nir_imm_int(b, stride),
+                                  &tf_out_base->dest.ssa,
+                                  NULL);
+   int chanx = 0;
+   int chany = 1;
+
+   if (prim_type == PIPE_PRIM_LINES)
+      std::swap(chanx, chany);
+
+
+   auto v0 = nir_vec4(b, out_addr0, nir_channel(b, &tf_outer->dest.ssa, chanx),
+                      nir_iadd(b, out_addr0, nir_imm_int(b, 4)),
+                      nir_channel(b, &tf_outer->dest.ssa, chany));
+
+   tf_out.push_back(v0);
+   if (outer_comps > 2) {
+      auto v1 = (outer_comps > 3) ? nir_vec4(b, nir_iadd(b, out_addr0, nir_imm_int(b, 8)),
+                                             nir_channel(b, &tf_outer->dest.ssa, 2),
+                                             nir_iadd(b, out_addr0, nir_imm_int(b, 12)),
+                                             nir_channel(b, &tf_outer->dest.ssa, 3)) :
+                                    nir_vec2(b, nir_iadd(b, out_addr0, nir_imm_int(b, 8)),
+                                             nir_channel(b, &tf_outer->dest.ssa, 2));
+      tf_out.push_back(v1);
+   }
+
+   if (inner_comps) {
+      nir_ssa_def *addr1 = nir_iadd(b, addr0, load_offset_group(b, 4 + inner_comps));
+      auto tf_inner = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_local_shared_r600);
+      tf_inner->num_components = inner_comps;
+      tf_inner->src[0] = nir_src_for_ssa(addr1);
+      nir_ssa_dest_init(&tf_inner->instr, &tf_inner->dest,
+                        tf_inner->num_components, 32, NULL);
+      nir_builder_instr_insert(b, &tf_inner->instr);
+
+      auto v2 = (inner_comps > 1) ? nir_vec4(b, nir_iadd(b, out_addr0, nir_imm_int(b, 16)),
+                                             nir_channel(b, &tf_inner->dest.ssa, 0),
+                                             nir_iadd(b, out_addr0, nir_imm_int(b, 20)),
+                                             nir_channel(b, &tf_inner->dest.ssa, 1)):
+                                    nir_vec2(b, nir_iadd(b, out_addr0, nir_imm_int(b, 12)),
+                                             nir_channel(b, &tf_inner->dest.ssa, 0));
+      tf_out.push_back(v2);
+   }
+
+   for (auto tf: tf_out)
+      r600_emit_tf(b, tf);
+
+   nir_pop_if(b, nullptr);
+
+   nir_metadata_preserve(f->impl, nir_metadata_none);
+
+   return true;
+}
+
+static bool
+r600_lower_tess_coord_filter(const nir_instr *instr, UNUSED const void *_options)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+   auto intr = nir_instr_as_intrinsic(instr);
+   return intr->intrinsic == nir_intrinsic_load_tess_coord;
+}
+
+static nir_ssa_def *
+r600_lower_tess_coord_impl(nir_builder *b, nir_instr *instr, void *_options)
+{
+   pipe_prim_type prim_type = *(pipe_prim_type *)_options;
+
+   auto tc_xy = nir_load_tess_coord_r600(b);
+
+   auto tc_x = nir_channel(b, tc_xy, 0);
+   auto tc_y = nir_channel(b, tc_xy, 1);
+
+   if (prim_type == PIPE_PRIM_TRIANGLES)
+      return nir_vec3(b, tc_x, tc_y, nir_fsub(b, nir_imm_float(b, 1.0),
+                                              nir_fadd(b, tc_x, tc_y)));
+   else
+      return nir_vec3(b, tc_x, tc_y, nir_imm_float(b, 0.0));
+}
+
+
+bool r600_lower_tess_coord(nir_shader *sh, enum pipe_prim_type prim_type)
+{
+   return nir_shader_lower_instructions(sh, r600_lower_tess_coord_filter,
+                                        r600_lower_tess_coord_impl, &prim_type);
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_vectorize_vs_inputs.c b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_vectorize_vs_inputs.c
new file mode 100644
index 000000000..2ff60cf6a
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_nir_vectorize_vs_inputs.c
@@ -0,0 +1,466 @@
+/*
+ * Copyright © 2018 Timothy Arceri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_deref.h"
+#include "util/u_dynarray.h"
+#include "util/u_math.h"
+#define XXH_INLINE_ALL
+#include "util/xxhash.h"
+
+/** @file nir_opt_vectorize_io.c
+ *
+ * Replaces scalar nir_load_input/nir_store_output operations with
+ * vectorized instructions.
+ */
+bool
+r600_vectorize_vs_inputs(nir_shader *shader);
+
+static nir_deref_instr *
+r600_clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail,
+                  const nir_deref_instr *src_head)
+{
+   const nir_deref_instr *parent = nir_deref_instr_parent(src_head);
+
+   if (!parent)
+      return dst_tail;
+
+   assert(src_head->deref_type == nir_deref_type_array);
+
+   dst_tail = r600_clone_deref_array(b, dst_tail, parent);
+
+   return nir_build_deref_array(b, dst_tail,
+                                nir_ssa_for_src(b, src_head->arr.index, 1));
+}
+
+static bool
+r600_variable_can_rewrite(nir_variable *var)
+{
+
+   /* Skip complex types we don't split in the first place */
+   if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type)))
+      return false;
+
+
+   /* TODO: add 64/16bit support ? */
+   if (glsl_get_bit_size(glsl_without_array(var->type)) != 32)
+      return false;
+
+   /* We only check VSand attribute imputs */
+   return (var->data.location >= VERT_ATTRIB_GENERIC0 &&
+           var->data.location <= VERT_ATTRIB_GENERIC15);
+}
+
+static bool
+r600_instr_can_rewrite(nir_instr *instr)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+   if (intr->num_components > 3)
+      return false;
+
+   if (intr->intrinsic != nir_intrinsic_load_deref)
+      return false;
+
+   nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+   if (!nir_deref_mode_is(deref, nir_var_shader_in))
+      return false;
+
+   return r600_variable_can_rewrite(nir_deref_instr_get_variable(deref));
+}
+
+static bool
+r600_io_access_same_var(const nir_instr *instr1, const nir_instr *instr2)
+{
+   assert(instr1->type == nir_instr_type_intrinsic &&
+          instr2->type == nir_instr_type_intrinsic);
+
+   nir_intrinsic_instr *intr1 = nir_instr_as_intrinsic(instr1);
+   nir_intrinsic_instr *intr2 = nir_instr_as_intrinsic(instr2);
+
+   nir_variable *var1 =
+      nir_deref_instr_get_variable(nir_src_as_deref(intr1->src[0]));
+   nir_variable *var2 =
+      nir_deref_instr_get_variable(nir_src_as_deref(intr2->src[0]));
+
+   /* We don't handle combining vars of different base types, so skip those */
+   if (glsl_get_base_type(var1->type) != glsl_get_base_type(var2->type))
+      return false;
+
+   if (var1->data.location != var2->data.location)
+      return false;
+
+   return true;
+}
+
+static struct util_dynarray *
+r600_vec_instr_stack_create(void *mem_ctx)
+{
+   struct util_dynarray *stack = ralloc(mem_ctx, struct util_dynarray);
+   util_dynarray_init(stack, mem_ctx);
+   return stack;
+}
+
+static void
+r600_vec_instr_stack_push(struct util_dynarray *stack, nir_instr *instr)
+{
+   util_dynarray_append(stack, nir_instr *, instr);
+}
+
+static unsigned r600_correct_location(nir_variable *var)
+{
+   return var->data.location - VERT_ATTRIB_GENERIC0;
+}
+
+static void
+r600_create_new_load(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
+                unsigned comp, unsigned num_comps, unsigned old_num_comps)
+{
+   unsigned channels[4];
+
+   b->cursor = nir_before_instr(&intr->instr);
+
+   assert(intr->dest.is_ssa);
+
+   nir_intrinsic_instr *new_intr =
+      nir_intrinsic_instr_create(b->shader, intr->intrinsic);
+   nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, num_comps,
+                     intr->dest.ssa.bit_size, NULL);
+   new_intr->num_components = num_comps;
+
+   nir_deref_instr *deref = nir_build_deref_var(b, var);
+   deref = r600_clone_deref_array(b, deref, nir_src_as_deref(intr->src[0]));
+
+   new_intr->src[0] = nir_src_for_ssa(&deref->dest.ssa);
+
+   if (intr->intrinsic == nir_intrinsic_interp_deref_at_offset ||
+       intr->intrinsic == nir_intrinsic_interp_deref_at_sample)
+      nir_src_copy(&new_intr->src[1], &intr->src[1], &new_intr->instr);
+
+   nir_builder_instr_insert(b, &new_intr->instr);
+
+   for (unsigned i = 0; i < old_num_comps; ++i)
+      channels[i] = comp - var->data.location_frac + i;
+   nir_ssa_def *load = nir_swizzle(b, &new_intr->dest.ssa, channels, old_num_comps);
+   nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
+
+   /* Remove the old load intrinsic */
+   nir_instr_remove(&intr->instr);
+}
+
+
+static bool
+r600_vec_instr_stack_pop(nir_builder *b, struct util_dynarray *stack,
+                         nir_instr *instr,
+                         nir_variable *updated_vars[16][4])
+{
+   nir_instr *last = util_dynarray_pop(stack, nir_instr *);
+
+   assert(last == instr);
+   assert(last->type == nir_instr_type_intrinsic);
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(last);
+   nir_variable *var =
+      nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
+   unsigned loc = r600_correct_location(var);
+
+   nir_variable *new_var;
+   new_var = updated_vars[loc][var->data.location_frac];
+
+   unsigned num_comps =
+      glsl_get_vector_elements(glsl_without_array(new_var->type));
+
+   unsigned old_num_comps =
+         glsl_get_vector_elements(glsl_without_array(var->type));
+
+   /* Don't bother walking the stack if this component can't be vectorised. */
+   if (old_num_comps > 3) {
+      return false;
+   }
+
+   if (new_var == var) {
+      return false;
+   }
+
+   r600_create_new_load(b, intr, new_var, var->data.location_frac,
+                        num_comps, old_num_comps);
+   return true;
+}
+
+static bool
+r600_cmp_func(const void *data1, const void *data2)
+{
+   const struct util_dynarray *arr1 = data1;
+   const struct util_dynarray *arr2 = data2;
+
+   const nir_instr *instr1 = *(nir_instr **)util_dynarray_begin(arr1);
+   const nir_instr *instr2 = *(nir_instr **)util_dynarray_begin(arr2);
+
+   return r600_io_access_same_var(instr1, instr2);
+}
+
+#define HASH(hash, data) XXH32(&(data), sizeof(data), (hash))
+
+static uint32_t
+r600_hash_instr(const nir_instr *instr)
+{
+   assert(instr->type == nir_instr_type_intrinsic);
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   nir_variable *var =
+      nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
+
+   uint32_t hash = 0;
+
+   hash = HASH(hash, var->type);
+   return HASH(hash, var->data.location);
+}
+
+static uint32_t
+r600_hash_stack(const void *data)
+{
+   const struct util_dynarray *stack = data;
+   const nir_instr *first = *(nir_instr **)util_dynarray_begin(stack);
+   return r600_hash_instr(first);
+}
+
+static struct set *
+r600_vec_instr_set_create(void)
+{
+   return _mesa_set_create(NULL, r600_hash_stack, r600_cmp_func);
+}
+
+static void
+r600_vec_instr_set_destroy(struct set *instr_set)
+{
+   _mesa_set_destroy(instr_set, NULL);
+}
+
+static void
+r600_vec_instr_set_add(struct set *instr_set, nir_instr *instr)
+{
+   if (!r600_instr_can_rewrite(instr)) {
+      return;
+   }
+
+   struct util_dynarray *new_stack = r600_vec_instr_stack_create(instr_set);
+   r600_vec_instr_stack_push(new_stack, instr);
+
+   struct set_entry *entry = _mesa_set_search(instr_set, new_stack);
+
+   if (entry) {
+      ralloc_free(new_stack);
+      struct util_dynarray *stack = (struct util_dynarray *) entry->key;
+      r600_vec_instr_stack_push(stack, instr);
+      return;
+   }
+
+   _mesa_set_add(instr_set, new_stack);
+
+   return;
+}
+
+static bool
+r600_vec_instr_set_remove(nir_builder *b, struct set *instr_set, nir_instr *instr,
+                          nir_variable *updated_vars[16][4])
+{
+   if (!r600_instr_can_rewrite(instr)) {
+      return false;
+   }
+   /*
+    * It's pretty unfortunate that we have to do this, but it's a side effect
+    * of the hash set interfaces. The hash set assumes that we're only
+    * interested in storing one equivalent element at a time, and if we try to
+    * insert a duplicate element it will remove the original. We could hack up
+    * the comparison function to "know" which input is an instruction we
+    * passed in and which is an array that's part of the entry, but that
+    * wouldn't work because we need to pass an array to _mesa_set_add() in
+    * vec_instr_add() above, and _mesa_set_add() will call our comparison
+    * function as well.
+    */
+   struct util_dynarray *temp = r600_vec_instr_stack_create(instr_set);
+   r600_vec_instr_stack_push(temp, instr);
+   struct set_entry *entry = _mesa_set_search(instr_set, temp);
+   ralloc_free(temp);
+
+   if (entry) {
+      struct util_dynarray *stack = (struct util_dynarray *) entry->key;
+      bool progress = r600_vec_instr_stack_pop(b, stack, instr, updated_vars);
+
+      if (!util_dynarray_num_elements(stack, nir_instr *))
+         _mesa_set_remove(instr_set, entry);
+
+      return progress;
+   }
+
+   return false;
+}
+
+static bool
+r600_vectorize_block(nir_builder *b, nir_block *block, struct set *instr_set,
+                nir_variable *updated_vars[16][4])
+{
+   bool progress = false;
+
+   nir_foreach_instr_safe(instr, block) {
+      r600_vec_instr_set_add(instr_set, instr);
+   }
+
+   for (unsigned i = 0; i < block->num_dom_children; i++) {
+      nir_block *child = block->dom_children[i];
+      progress |= r600_vectorize_block(b, child, instr_set, updated_vars);
+   }
+
+   nir_foreach_instr_reverse_safe(instr, block) {
+      progress |= r600_vec_instr_set_remove(b, instr_set, instr, updated_vars);
+   }
+
+   return progress;
+}
+
+static void
+r600_create_new_io_var(nir_shader *shader,
+                  nir_variable *vars[16][4],
+                  unsigned location, unsigned comps)
+{
+   unsigned num_comps = util_bitcount(comps);
+   assert(num_comps > 1);
+
+   /* Note: u_bit_scan() strips a component of the comps bitfield here */
+   unsigned first_comp = u_bit_scan(&comps);
+
+   nir_variable *var = nir_variable_clone(vars[location][first_comp], shader);
+   var->data.location_frac = first_comp;
+   var->type = glsl_replace_vector_type(var->type, num_comps);
+
+   nir_shader_add_variable(shader, var);
+
+   vars[location][first_comp] = var;
+
+   while (comps) {
+      const int comp = u_bit_scan(&comps);
+      if (vars[location][comp]) {
+         vars[location][comp] = var;
+      }
+   }
+}
+
+static inline bool
+r600_variables_can_merge(const nir_variable *lhs, const nir_variable *rhs)
+{
+   return (glsl_get_base_type(lhs->type) == glsl_get_base_type(rhs->type));
+}
+
+static void
+r600_create_new_io_vars(nir_shader *shader, nir_variable_mode mode,
+                   nir_variable *vars[16][4])
+{
+   bool can_rewrite_vars = false;
+   nir_foreach_variable_with_modes(var, shader, mode) {
+      if (r600_variable_can_rewrite(var)) {
+         can_rewrite_vars = true;
+         unsigned loc = r600_correct_location(var);
+         vars[loc][var->data.location_frac] = var;
+      }
+   }
+
+   if (!can_rewrite_vars)
+      return;
+
+   /* We don't handle combining vars of different type e.g. different array
+    * lengths.
+    */
+   for (unsigned i = 0; i < 16; i++) {
+      unsigned comps = 0;
+
+      for (unsigned j = 0; j < 3; j++) {
+
+         if (!vars[i][j])
+            continue;
+
+         for (unsigned k = j + 1; k < 4; k++) {
+            if (!vars[i][k])
+               continue;
+
+            if (!r600_variables_can_merge(vars[i][j], vars[i][k]))
+               continue;
+
+            /* Set comps */
+            for (unsigned n = 0; n < glsl_get_components(vars[i][j]->type); ++n)
+               comps |= 1 << (vars[i][j]->data.location_frac + n);
+
+            for (unsigned n = 0; n < glsl_get_components(vars[i][k]->type); ++n)
+               comps |= 1 << (vars[i][k]->data.location_frac + n);
+
+         }
+      }
+      if (comps)
+         r600_create_new_io_var(shader, vars, i, comps);
+   }
+}
+
+static bool
+r600_vectorize_io_impl(nir_function_impl *impl)
+{
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_metadata_require(impl, nir_metadata_dominance);
+
+   nir_shader *shader = impl->function->shader;
+   nir_variable *updated_vars[16][4] = {0};
+
+   r600_create_new_io_vars(shader, nir_var_shader_in, updated_vars);
+
+   struct set *instr_set = r600_vec_instr_set_create();
+   bool progress = r600_vectorize_block(&b, nir_start_block(impl), instr_set,
+                                        updated_vars);
+
+   if (progress) {
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+   }
+
+   r600_vec_instr_set_destroy(instr_set);
+   return false;
+}
+
+bool
+r600_vectorize_vs_inputs(nir_shader *shader)
+{
+   bool progress = false;
+
+   if (shader->info.stage != MESA_SHADER_VERTEX)
+      return false;
+
+   nir_foreach_function(function, shader) {
+      if (function->impl)
+         progress |= r600_vectorize_io_impl(function->impl);
+   }
+
+   return progress;
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp
new file mode 100644
index 000000000..e37e2732b
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp
@@ -0,0 +1,1179 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "../r600_pipe.h"
+#include "../r600_shader.h"
+#include "sfn_shader_vertex.h"
+
+#include "sfn_shader_compute.h"
+#include "sfn_shader_fragment.h"
+#include "sfn_shader_geometry.h"
+#include "sfn_liverange.h"
+#include "sfn_ir_to_assembly.h"
+#include "sfn_nir.h"
+#include "sfn_instruction_misc.h"
+#include "sfn_instruction_fetch.h"
+#include "sfn_instruction_lds.h"
+
+#include <iostream>
+
+#define ENABLE_DEBUG 1
+
+#ifdef ENABLE_DEBUG
+#define DEBUG_SFN(X)  \
+   do {\
+      X; \
+   } while (0)
+#else
+#define DEBUG_SFN(X)
+#endif
+
+namespace r600 {
+
+using namespace std;
+
+
+ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
+                                               r600_pipe_shader_selector& sel,
+                                               r600_shader &sh_info, int scratch_size,
+                                               enum chip_class chip_class,
+                                               int atomic_base):
+   m_processor_type(ptype),
+   m_nesting_depth(0),
+   m_block_number(0),
+   m_export_output(0, -1),
+   m_sh_info(sh_info),
+   m_chip_class(chip_class),
+   m_tex_instr(*this),
+   m_alu_instr(*this),
+   m_ssbo_instr(*this),
+   m_pending_else(nullptr),
+   m_scratch_size(scratch_size),
+   m_next_hwatomic_loc(0),
+   m_sel(sel),
+   m_atomic_base(atomic_base),
+   m_image_count(0),
+   last_emitted_alu(nullptr)
+{
+   m_sh_info.processor_type = ptype;
+
+}
+
+
+ShaderFromNirProcessor::~ShaderFromNirProcessor()
+{
+}
+
+bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
+{
+   switch (instr->type) {
+   case nir_instr_type_tex: {
+      nir_tex_instr *t = nir_instr_as_tex(instr);
+      if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
+         sh_info().uses_tex_buffers = true;
+      if (t->op == nir_texop_txs &&
+          t->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
+          t->is_array)
+         sh_info().has_txq_cube_array_z_comp = true;
+      break;
+   }
+   case nir_instr_type_intrinsic: {
+      auto *i = nir_instr_as_intrinsic(instr);
+      switch (i->intrinsic) {
+      case nir_intrinsic_ssbo_atomic_add:
+      case nir_intrinsic_image_atomic_add:
+      case nir_intrinsic_ssbo_atomic_and:
+      case nir_intrinsic_image_atomic_and:
+      case nir_intrinsic_ssbo_atomic_or:
+      case nir_intrinsic_image_atomic_or:
+      case nir_intrinsic_ssbo_atomic_imin:
+      case nir_intrinsic_image_atomic_imin:
+      case nir_intrinsic_ssbo_atomic_imax:
+      case nir_intrinsic_image_atomic_imax:
+      case nir_intrinsic_ssbo_atomic_umin:
+      case nir_intrinsic_image_atomic_umin:
+      case nir_intrinsic_ssbo_atomic_umax:
+      case nir_intrinsic_image_atomic_umax:
+      case nir_intrinsic_ssbo_atomic_xor:
+      case nir_intrinsic_image_atomic_xor:
+      case nir_intrinsic_ssbo_atomic_exchange:
+      case nir_intrinsic_image_atomic_exchange:
+      case nir_intrinsic_image_atomic_comp_swap:
+      case nir_intrinsic_ssbo_atomic_comp_swap:
+         m_sel.info.writes_memory = 1;
+         FALLTHROUGH;
+      case nir_intrinsic_image_load:
+         m_ssbo_instr.set_require_rat_return_address();
+         break;
+      case nir_intrinsic_image_size: {
+         if (nir_intrinsic_image_dim(i) == GLSL_SAMPLER_DIM_CUBE &&
+             nir_intrinsic_image_array(i) && nir_dest_num_components(i->dest) > 2)
+            sh_info().has_txq_cube_array_z_comp = true;
+      }
+
+
+
+      default:
+         ;
+      }
+
+
+   }
+   default:
+      ;
+   }
+
+   return scan_sysvalue_access(instr);
+}
+
+enum chip_class ShaderFromNirProcessor::get_chip_class(void) const
+{
+  return m_chip_class;
+}
+
+bool ShaderFromNirProcessor::allocate_reserved_registers()
+{
+   bool retval = do_allocate_reserved_registers();
+   m_ssbo_instr.load_rat_return_address();
+   if (sh_info().uses_atomics)
+      m_ssbo_instr.load_atomic_inc_limits();
+   m_ssbo_instr.set_ssbo_offset(m_image_count);
+   return retval;
+}
+
+static void remap_shader_info(r600_shader& sh_info,
+                              std::vector<rename_reg_pair>& map,
+                              UNUSED ValueMap& values)
+{
+   for (unsigned i = 0; i < sh_info.num_arrays; ++i) {
+      auto new_index = map[sh_info.arrays[i].gpr_start];
+      if (new_index.valid)
+         sh_info.arrays[i].gpr_start = new_index.new_reg;
+      map[sh_info.arrays[i].gpr_start].used = true;
+   }
+
+   for (unsigned i = 0; i < sh_info.ninput; ++i) {
+      sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
+              << " of map.size()\n";
+
+      assert(sh_info.input[i].gpr < map.size());
+      auto new_index = map[sh_info.input[i].gpr];
+      if (new_index.valid)
+         sh_info.input[i].gpr = new_index.new_reg;
+      map[sh_info.input[i].gpr].used = true;
+   }
+
+   for (unsigned i = 0; i < sh_info.noutput; ++i) {
+      assert(sh_info.output[i].gpr < map.size());
+      auto new_index = map[sh_info.output[i].gpr];
+      if (new_index.valid)
+         sh_info.output[i].gpr = new_index.new_reg;
+      map[sh_info.output[i].gpr].used = true;
+   }
+}
+
+void ShaderFromNirProcessor::remap_registers()
+{
+   // register renumbering
+   auto rc = register_count();
+   if (!rc)
+      return;
+
+   std::vector<register_live_range> register_live_ranges(rc);
+
+   auto temp_register_map = get_temp_registers();
+
+   Shader sh{m_output, temp_register_map};
+   LiverangeEvaluator().run(sh, register_live_ranges);
+   auto register_map = get_temp_registers_remapping(register_live_ranges);
+
+   sfn_log << SfnLog::merge << "=========Mapping===========\n";
+   for (size_t  i = 0; i < register_map.size(); ++i)
+      if (register_map[i].valid)
+         sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
+
+   ValueRemapper vmap0(register_map, temp_register_map);
+   for (auto& block: m_output)
+      block.remap_registers(vmap0);
+
+   remap_shader_info(m_sh_info, register_map, temp_register_map);
+
+   /* Mark inputs as used registers, these registers should no be remapped */
+   for (auto& v: sh.m_temp) {
+      if (v.second->type() == Value::gpr) {
+         const auto& g = static_cast<const GPRValue&>(*v.second);
+         if (g.is_input())
+            register_map[g.sel()].used = true;
+      }
+   }
+
+   int new_index = 0;
+   for (auto& i : register_map) {
+      i.valid = i.used;
+      if (i.used)
+         i.new_reg = new_index++;
+   }
+
+   ValueRemapper vmap1(register_map, temp_register_map);
+   for (auto& ir: m_output)
+      ir.remap_registers(vmap1);
+
+   remap_shader_info(m_sh_info, register_map, temp_register_map);
+}
+
+bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
+{
+   // m_uniform_type_map
+   m_uniform_type_map[uniform->data.location] = uniform->type;
+
+   if (uniform->type->contains_atomic()) {
+      int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
+      sh_info().nhwatomic += natomics;
+
+      if (uniform->type->is_array())
+         sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
+
+      sh_info().uses_atomics = 1;
+
+      struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
+      ++sh_info().nhwatomic_ranges;
+      atom.buffer_id = uniform->data.binding;
+      atom.hw_idx = m_atomic_base + m_next_hwatomic_loc;
+
+      atom.start = uniform->data.offset >> 2;
+      atom.end = atom.start + natomics - 1;
+
+      if (m_atomic_base_map.find(uniform->data.binding) ==
+          m_atomic_base_map.end())
+         m_atomic_base_map[uniform->data.binding] = m_next_hwatomic_loc;
+
+      m_next_hwatomic_loc += natomics;
+
+      m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end  - atom.start + 1;
+
+      sfn_log << SfnLog::io << "HW_ATOMIC file count: "
+              << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
+   }
+
+   auto type = uniform->type->is_array() ? uniform->type->without_array(): uniform->type;
+   if (type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
+      sh_info().uses_images = 1;
+      if (uniform->type->is_array() && ! (uniform->data.mode == nir_var_mem_ssbo))
+         sh_info().indirect_files |= 1 << TGSI_FILE_IMAGE;
+   }
+
+   if (uniform->type->is_image()) {
+      ++m_image_count;
+   }
+
+   return true;
+}
+
+bool ShaderFromNirProcessor::scan_inputs_read(const nir_shader *sh)
+{
+   return true;
+}
+
+void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
+{
+   auto& dest = instr->dest;
+   unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
+   assert(util_bitcount(instr->modes) == 1);
+   m_var_mode[instr->var] = instr->modes;
+   m_var_derefs[index] = instr->var;
+
+   sfn_log << SfnLog::io << "Add var deref:" << index
+           << " with DDL:" << instr->var->data.driver_location << "\n";
+}
+
+void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
+{
+   switch (io.name) {
+   case TGSI_SEMANTIC_POSITION:
+   case TGSI_SEMANTIC_PSIZE:
+   case TGSI_SEMANTIC_EDGEFLAG:
+   case TGSI_SEMANTIC_FACE:
+   case TGSI_SEMANTIC_SAMPLEMASK:
+   case TGSI_SEMANTIC_CLIPVERTEX:
+      io.spi_sid = 0;
+      break;
+   case TGSI_SEMANTIC_GENERIC:
+   case TGSI_SEMANTIC_TEXCOORD:
+   case TGSI_SEMANTIC_PCOORD:
+      io.spi_sid = io.sid + 1;
+      break;
+   default:
+      /* For non-generic params - pack name and sid into 8 bits */
+      io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
+   }   
+}
+
+const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
+{
+   unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
+
+   sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
+
+   auto v = m_var_derefs.find(index);
+   if (v != m_var_derefs.end())
+      return v->second;
+
+     fprintf(stderr, "R600: could not find deref with index %d\n", index);
+
+     return nullptr;
+
+   /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
+   return  nir_deref_instr_get_variable(deref); */
+}
+
+bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
+{
+   return m_tex_instr.emit(instr);
+}
+
+void ShaderFromNirProcessor::emit_instruction(AluInstruction *ir)
+{
+   if (last_emitted_alu && !last_emitted_alu->flag(alu_last_instr)) {
+      for (unsigned i = 0; i < ir->n_sources(); ++i) {
+         auto& s = ir->src(i);
+         if (s.type() == Value::kconst) {
+            auto& c = static_cast<UniformValue&>(s);
+            if (c.addr()) {
+               last_emitted_alu->set_flag(alu_last_instr);
+               break;
+            }
+         }
+      }
+   }
+   last_emitted_alu = ir;
+   emit_instruction_internal(ir);
+}
+
+
+void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
+{
+
+   emit_instruction_internal(ir);
+   last_emitted_alu = nullptr;
+}
+
+void ShaderFromNirProcessor::emit_instruction_internal(Instruction *ir)
+{
+   if (m_pending_else) {
+      append_block(-1);
+      m_output.back().emit(PInstruction(m_pending_else));
+      append_block(1);
+      m_pending_else = nullptr;
+   }
+
+   r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
+   if (m_output.empty())
+      append_block(0);
+
+   m_output.back().emit(Instruction::Pointer(ir));
+}
+
+void ShaderFromNirProcessor::emit_shader_start()
+{
+   /* placeholder, may become an abstract method */
+   m_ssbo_instr.set_ssbo_offset(m_image_count);
+}
+
+bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
+{
+   switch (instr->type) {
+   case nir_jump_break: {
+      auto b = new LoopBreakInstruction();
+      emit_instruction(b);
+      return true;
+   }
+   case nir_jump_continue: {
+      auto  b = new LoopContInstruction();
+      emit_instruction(b);
+      return true;
+   }
+   default: {
+      nir_instr *i = reinterpret_cast<nir_instr*>(instr);
+      sfn_log << SfnLog::err << "Jump instrunction " << *i <<  " not supported\n";
+      return false;
+   }
+   }
+   return true;
+}
+
+bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
+{
+   return m_alu_instr.emit(instr);
+}
+
+bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
+{
+   return false;
+}
+
+bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
+{
+   LoopBeginInstruction *loop = new LoopBeginInstruction();
+   emit_instruction(loop);
+   m_loop_begin_block_map[loop_id] = loop;
+   append_block(1);
+   return true;
+}
+bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
+{
+   auto start = m_loop_begin_block_map.find(loop_id);
+   if (start == m_loop_begin_block_map.end()) {
+      sfn_log << SfnLog::err  << "End loop: Loop start for "
+              << loop_id << "  not found\n";
+      return false;
+   }
+   m_nesting_depth--;
+   m_block_number++;
+   m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number));
+   LoopEndInstruction *loop = new LoopEndInstruction(start->second);
+   emit_instruction(loop);
+
+   m_loop_begin_block_map.erase(start);
+   return true;
+}
+
+bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
+{
+
+   auto value = from_nir(if_stmt->condition, 0, 0);
+   AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
+                                             value, Value::zero, EmitInstruction::last);
+   pred->set_flag(alu_update_exec);
+   pred->set_flag(alu_update_pred);
+   pred->set_cf_type(cf_alu_push_before);
+
+   append_block(1);
+
+   IfInstruction *ir = new IfInstruction(pred);
+   emit_instruction(ir);
+   assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
+   m_if_block_start_map[if_id] = ir;
+   return true;
+}
+
+bool ShaderFromNirProcessor::emit_else_start(int if_id)
+{
+   auto iif = m_if_block_start_map.find(if_id);
+   if (iif == m_if_block_start_map.end()) {
+      std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
+      return false;
+   }
+
+   if (iif->second->type() != Instruction::cond_if) {
+      std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
+      return false;
+   }
+   IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
+   ElseInstruction *ir = new ElseInstruction(if_instr);
+   m_if_block_start_map[if_id] = ir;
+   m_pending_else = ir;
+
+   return true;
+}
+
+bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
+{
+   auto ifelse = m_if_block_start_map.find(if_id);
+   if (ifelse == m_if_block_start_map.end()) {
+      std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
+      return false;
+   }
+
+   if (ifelse->second->type() != Instruction::cond_if &&
+       ifelse->second->type() != Instruction::cond_else) {
+      std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
+      return false;
+   }
+   /* Clear pending else, if the else branch was empty, non will be emitted */
+
+   m_pending_else = nullptr;
+
+   append_block(-1);
+   IfElseEndInstruction *ir = new IfElseEndInstruction();
+   emit_instruction(ir);
+
+   return true;
+}
+
+bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
+{
+   PValue src = get_temp_register();
+   emit_instruction(new AluInstruction(op1_mov, src, Value::zero, {alu_write, alu_last_instr}));
+
+   GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
+   emit_instruction(new FetchTCSIOParam(dest, src, offset));
+
+   return true;
+
+}
+
+bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr)
+{
+   auto address = varvec_from_nir(instr->src[0], instr->num_components);
+   auto dest_value = varvec_from_nir(instr->dest, instr->num_components);
+
+   emit_instruction(new LDSReadInstruction(address, dest_value));
+   return true;
+}
+
+static unsigned
+lds_op_from_intrinsic(nir_intrinsic_op op) {
+   switch (op) {
+   case nir_intrinsic_shared_atomic_add:
+      return LDS_OP2_LDS_ADD_RET;
+   case nir_intrinsic_shared_atomic_and:
+      return LDS_OP2_LDS_AND_RET;
+   case nir_intrinsic_shared_atomic_or:
+      return LDS_OP2_LDS_OR_RET;
+   case nir_intrinsic_shared_atomic_imax:
+      return LDS_OP2_LDS_MAX_INT_RET;
+   case nir_intrinsic_shared_atomic_umax:
+      return LDS_OP2_LDS_MAX_UINT_RET;
+   case nir_intrinsic_shared_atomic_imin:
+      return LDS_OP2_LDS_MIN_INT_RET;
+   case nir_intrinsic_shared_atomic_umin:
+      return LDS_OP2_LDS_MIN_UINT_RET;
+   case nir_intrinsic_shared_atomic_xor:
+      return LDS_OP2_LDS_XOR_RET;
+   case nir_intrinsic_shared_atomic_exchange:
+      return LDS_OP2_LDS_XCHG_RET;
+   case nir_intrinsic_shared_atomic_comp_swap:
+      return LDS_OP3_LDS_CMP_XCHG_RET;
+   default:
+      unreachable("Unsupported shared atomic opcode");
+   }
+}
+
+bool ShaderFromNirProcessor::emit_atomic_local_shared(nir_intrinsic_instr* instr)
+{
+   auto address = from_nir(instr->src[0], 0);
+   auto dest_value = from_nir(instr->dest, 0);
+   auto value = from_nir(instr->src[1], 0);
+   auto op = lds_op_from_intrinsic(instr->intrinsic);
+
+   if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap)) {
+      auto value2 = from_nir(instr->src[2], 0);
+      emit_instruction(new LDSAtomicInstruction(dest_value, value, value2, address, op));
+   } else {
+      emit_instruction(new LDSAtomicInstruction(dest_value, value, address, op));
+   }
+   return true;
+}
+
+
+bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr)
+{
+   unsigned write_mask = nir_intrinsic_write_mask(instr);
+
+   auto address = from_nir(instr->src[1], 0);
+   int swizzle_base = (write_mask & 0x3) ? 0 : 2;
+   write_mask |= write_mask >> 2;
+
+   auto value =  from_nir(instr->src[0], swizzle_base);
+   if (!(write_mask & 2)) {
+      emit_instruction(new LDSWriteInstruction(address, 0, value));
+   } else {
+      auto value1 = from_nir(instr->src[0], swizzle_base + 1);
+      emit_instruction(new LDSWriteInstruction(address, 0, value, value1));
+   }
+
+   return true;
+}
+
+bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
+{
+   r600::sfn_log << SfnLog::instr << "emit '"
+                 << *reinterpret_cast<nir_instr*>(instr)
+                 << "' (" << __func__ << ")\n";
+
+   if (emit_intrinsic_instruction_override(instr))
+      return true;
+
+   if (m_ssbo_instr.emit(&instr->instr)) {
+      m_sel.info.writes_memory = true;
+      return true;
+   }
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_deref: {
+      auto var = get_deref_location(instr->src[0]);
+      if (!var)
+         return false;
+      auto mode_helper = m_var_mode.find(var);
+      if (mode_helper == m_var_mode.end()) {
+         cerr << "r600-nir: variable '" << var->name << "' not found\n";
+         return false;
+      }
+      switch (mode_helper->second) {
+      case nir_var_function_temp:
+         return emit_load_function_temp(var, instr);
+      default:
+         cerr << "r600-nir: Unsupported mode" << mode_helper->second
+              << "for src variable\n";
+         return false;
+      }
+   }
+   case nir_intrinsic_store_scratch:
+      return emit_store_scratch(instr);
+   case nir_intrinsic_load_scratch:
+      return emit_load_scratch(instr);
+   case nir_intrinsic_load_uniform:
+      return load_uniform(instr);
+   case nir_intrinsic_discard:
+   case nir_intrinsic_discard_if:
+      return emit_discard_if(instr);
+   case nir_intrinsic_load_ubo_vec4:
+      return emit_load_ubo_vec4(instr);
+   case nir_intrinsic_load_tcs_in_param_base_r600:
+      return emit_load_tcs_param_base(instr, 0);
+   case nir_intrinsic_load_tcs_out_param_base_r600:
+      return emit_load_tcs_param_base(instr, 16);
+   case nir_intrinsic_load_local_shared_r600:
+   case nir_intrinsic_load_shared:
+      return emit_load_local_shared(instr);
+   case nir_intrinsic_store_local_shared_r600:
+   case nir_intrinsic_store_shared:
+      return emit_store_local_shared(instr);
+   case nir_intrinsic_control_barrier:
+   case nir_intrinsic_memory_barrier_tcs_patch:
+   case nir_intrinsic_memory_barrier_shared:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier:
+   case nir_intrinsic_memory_barrier_image:
+   case nir_intrinsic_group_memory_barrier:
+      return emit_barrier(instr);
+   case nir_intrinsic_memory_barrier_atomic_counter:
+      return true;
+   case nir_intrinsic_shared_atomic_add:
+   case nir_intrinsic_shared_atomic_and:
+   case nir_intrinsic_shared_atomic_or:
+   case nir_intrinsic_shared_atomic_imax:
+   case nir_intrinsic_shared_atomic_umax:
+   case nir_intrinsic_shared_atomic_imin:
+   case nir_intrinsic_shared_atomic_umin:
+   case nir_intrinsic_shared_atomic_xor:
+   case nir_intrinsic_shared_atomic_exchange:
+   case nir_intrinsic_shared_atomic_comp_swap:
+      return emit_atomic_local_shared(instr);
+   case nir_intrinsic_shader_clock:
+      return emit_shader_clock(instr);
+   case nir_intrinsic_copy_deref:
+   case nir_intrinsic_load_constant:
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_store_output:
+
+   default:
+      fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
+      return false;
+   }
+   return false;
+}
+
+bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
+{
+   return false;
+}
+
+bool
+ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
+{
+   return false;
+}
+
+bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr* instr)
+{
+   AluInstruction *ir = new AluInstruction(op0_group_barrier);
+   ir->set_flag(alu_last_instr);
+   emit_instruction(ir);
+   return true;
+}
+
+
+bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
+{
+   if (!dest.is_ssa) {
+      auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
+      if (as_last)
+         ir->set_flag(alu_last_instr);
+      emit_instruction(ir);
+   } else {
+      inject_register(dest.ssa.index, chan, value, true);
+   }
+   return true;
+}
+
+bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
+{
+   PValue address = from_nir(instr->src[1], 0, 0);
+
+   auto value = vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
+         swizzle_from_comps(instr->num_components));
+
+   int writemask = nir_intrinsic_write_mask(instr);
+   int align = nir_intrinsic_align_mul(instr);
+   int align_offset = nir_intrinsic_align_offset(instr);
+
+   WriteScratchInstruction *ir = nullptr;
+   if (address->type() == Value::literal) {
+      const auto& lv = static_cast<const LiteralValue&>(*address);
+      ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
+   } else {
+      address = from_nir_with_fetch_constant(instr->src[1], 0);
+      ir = new WriteScratchInstruction(address, value, align, align_offset,
+                                       writemask, m_scratch_size);
+   }
+   emit_instruction(ir);
+   sh_info().needs_scratch_space = 1;
+   return true;
+}
+
+bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
+{
+   PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
+   std::array<PValue, 4> dst_val;
+   for (int i = 0; i < 4; ++i)
+      dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
+
+   GPRVector dst(dst_val);
+   auto ir = new LoadFromScratch(dst, address, m_scratch_size);
+   ir->prelude_append(new WaitAck(0));
+   emit_instruction(ir);
+   sh_info().needs_scratch_space = 1;
+   return true;
+}
+
+bool ShaderFromNirProcessor::emit_shader_clock(nir_intrinsic_instr* instr)
+{
+   emit_instruction(new AluInstruction(op1_mov, from_nir(instr->dest, 0),
+                                       PValue(new InlineConstValue(ALU_SRC_TIME_LO, 0)), EmitInstruction::write));
+   emit_instruction(new AluInstruction(op1_mov, from_nir(instr->dest, 1),
+                                       PValue(new InlineConstValue(ALU_SRC_TIME_HI, 0)), EmitInstruction::last_write));
+   return true;
+}
+
+GPRVector ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
+                                                                   unsigned mask,
+                                                                   const GPRVector::Swizzle& swizzle,
+                                                                   bool match)
+{
+   bool use_same = true;
+   GPRVector::Values v;
+
+   std::array<bool,4> used_swizzles = {false, false, false, false};
+
+   /* Check whether all sources come from a GPR, and,
+    * if requested, whether they are swizzled as expected */
+
+   for (int i = 0; i < 4 && use_same; ++i)  {
+      if ((1 << i) & mask) {
+         if (swizzle[i] < 4) {
+            v[i] = from_nir(src, swizzle[i]);
+            assert(v[i]);
+            use_same &= (v[i]->type() == Value::gpr);
+            if (match) {
+               use_same &= (v[i]->chan() == swizzle[i]);
+            }
+            used_swizzles[v[i]->chan()] = true;
+         }
+      }
+   }
+
+
+   /* Now check whether all inputs come from the same GPR, and fill
+    * empty slots in the vector with unused swizzles, bail out if
+    * the sources are not from the same GPR
+    */
+
+   if (use_same) {
+      int next_free_swizzle = 0;
+      while (used_swizzles[next_free_swizzle] && next_free_swizzle < 4)
+         next_free_swizzle++;
+
+      /* Find the first GPR index used */
+      int i = 0;
+      while (!v[i] && i < 4) ++i;
+      assert(i < 4);
+      unsigned sel = v[i]->sel();
+
+
+      for (i = 0; i < 4 && use_same; ++i) {
+         if (!v[i]) {
+            if (swizzle[i] >= 4)
+               v[i] = PValue(new GPRValue(sel, swizzle[i]));
+            else {
+               assert(next_free_swizzle < 4);
+               v[i] = PValue(new GPRValue(sel, next_free_swizzle));
+               used_swizzles[next_free_swizzle] = true;
+               while (next_free_swizzle < 4 && used_swizzles[next_free_swizzle])
+                  next_free_swizzle++;
+            }
+         }
+         else
+            use_same &= v[i]->sel() == sel;
+      }
+   }
+
+   /* We can't re-use the source data because they either need re-swizzling, or
+    * they didn't come all from a GPR or the same GPR, so copy to a new vector
+    */
+   if (!use_same) {
+      AluInstruction *ir = nullptr;
+      GPRVector result = get_temp_vec4(swizzle);
+      for (int i = 0; i < 4; ++i) {
+         if (swizzle[i] < 4 && (mask & (1 << i))) {
+            ir = new AluInstruction(op1_mov, result[i], from_nir(src, swizzle[i]),
+                                    EmitInstruction::write);
+            emit_instruction(ir);
+         }
+      }
+      if (ir)
+         ir->set_flag(alu_last_instr);
+      return result;
+   } else
+      return GPRVector(v);;
+}
+
+bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
+{
+   auto bufid = nir_src_as_const_value(instr->src[0]);
+   auto buf_offset = nir_src_as_const_value(instr->src[1]);
+
+   if (!buf_offset) {
+      /* TODO: if buf_offset is constant then this can also be solved by using the CF indes
+       * on the ALU block, and this would probably make sense when there are more then one
+       * loads with the same buffer ID. */
+
+      PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
+      GPRVector trgt;
+      std::array<int, 4> swz = {7,7,7,7};
+      for (unsigned i = 0; i < 4; ++i) {
+         if (i < nir_dest_num_components(instr->dest)) {
+            trgt.set_reg_i(i, from_nir(instr->dest, i));
+            swz[i] = i + nir_intrinsic_component(instr);
+         } else {
+            trgt.set_reg_i(i, from_nir(instr->dest, 7));
+         }
+      }
+
+      FetchInstruction *ir;
+      if (bufid) {
+         ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
+                                              1 + bufid->u32, nullptr, bim_none);
+      } else {
+         PValue bufid = from_nir(instr->src[0], 0, 0);
+         ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
+                                              1, bufid, bim_zero);
+      }
+      ir->set_dest_swizzle(swz);
+      emit_instruction(ir);
+      m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
+      return true;
+   }
+
+
+   if (bufid) {
+      int buf_cmp = nir_intrinsic_component(instr);
+      AluInstruction *ir = nullptr;
+      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+         int cmp = buf_cmp + i;
+         assert(cmp < 4);
+         auto u = PValue(new UniformValue(512 +  buf_offset->u32, cmp, bufid->u32 + 1));
+         if (instr->dest.is_ssa)
+            load_preloaded_value(instr->dest, i, u);
+         else {
+            ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
+            emit_instruction(ir);
+         }
+      }
+      if (ir)
+         ir->set_flag(alu_last_instr);
+      return true;
+
+   } else {
+      int buf_cmp = nir_intrinsic_component(instr);
+      AluInstruction *ir = nullptr;
+      auto kc_id = from_nir(instr->src[0], 0);
+      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+         int cmp = buf_cmp + i;
+         auto u = PValue(new UniformValue(512 +  buf_offset->u32, cmp, kc_id));
+         if (instr->dest.is_ssa)
+            load_preloaded_value(instr->dest, i, u);
+         else {
+            ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
+            emit_instruction(ir);
+         }
+      }
+      if (ir)
+         ir->set_flag(alu_last_instr);
+      return true;
+   }
+}
+
+bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
+{
+   r600::sfn_log << SfnLog::instr << "emit '"
+                 << *reinterpret_cast<nir_instr*>(instr)
+                 << "' (" << __func__ << ")\n";
+
+   if (instr->intrinsic == nir_intrinsic_discard_if) {
+      emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
+                          {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
+
+   } else {
+      emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
+                       {Value::zero, Value::zero}, {alu_last_instr}));
+   }
+   m_sh_info.uses_kill = 1;
+   return true;
+}
+
+bool ShaderFromNirProcessor::load_uniform(nir_intrinsic_instr* instr)
+{
+   r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
+                 << *reinterpret_cast<nir_instr*>(instr)
+                 << "'\n";
+
+
+   /* If the target register is a SSA register and the loading is not
+    * indirect then we can do lazy loading, i.e. the uniform value can
+    * be used directly. Otherwise we have to load the data for real
+    * rigt away.
+    */
+   auto literal = nir_src_as_const_value(instr->src[0]);
+   int base = nir_intrinsic_base(instr);
+
+   if (literal) {
+      AluInstruction *ir = nullptr;
+      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+         PValue u = PValue(new UniformValue(512 + literal->u32 + base, i));
+         sfn_log << SfnLog::io << "uniform "
+                 << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
+
+         if (instr->dest.is_ssa)
+            load_preloaded_value(instr->dest, i, u);
+         else {
+            ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
+                                                   u, {alu_write});
+             emit_instruction(ir);
+         }
+      }
+      if (ir)
+         ir->set_flag(alu_last_instr);
+   } else {
+      PValue addr = from_nir(instr->src[0], 0, 0);
+      return load_uniform_indirect(instr, addr, 16 * base, 0);
+   }
+   return true;
+}
+
+bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
+{
+   if (!addr) {
+      std::cerr << "r600-nir: don't know how uniform is addressed\n";
+      return false;
+   }
+
+   GPRVector trgt;
+   std::array<int, 4> swz = {7,7,7,7};
+   for (int i = 0; i < 4; ++i) {
+      trgt.set_reg_i(i, from_nir(instr->dest, i));
+      swz[i] = i;
+   }
+
+   if (addr->type() != Value::gpr) {
+      emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
+      addr = trgt.reg_i(0);
+   }
+
+   auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
+                                  bufferid, PValue(), bim_none);
+   ir->set_dest_swizzle(swz);
+   emit_instruction(ir);
+   m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
+   return true;
+}
+
+AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
+{
+   AluInstruction *ir = nullptr;
+   for (int i = 0; i < literal->def.num_components ; ++i) {
+      if (writemask & (1 << i)){
+         PValue lsrc;
+         switch (literal->def.bit_size) {
+
+         case 1:
+            sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
+            lsrc = literal->value[i].b ?
+                     PValue(new LiteralValue( 0xffffffff, i)) :
+                     Value::zero;
+            break;
+         case 32:
+            sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
+            if (literal->value[i].u32 == 0)
+               lsrc = Value::zero;
+            else if (literal->value[i].u32 == 1)
+               lsrc = Value::one_i;
+            else if (literal->value[i].f32 == 1.0f)
+               lsrc = Value::one_f;
+            else if (literal->value[i].f32 == 0.5f)
+               lsrc = Value::zero_dot_5;
+            else
+               lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
+            break;
+         default:
+            sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
+                    << " falling back to 32 bit\n";
+            lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
+         }
+         ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
+
+         emit_instruction(ir);
+      }
+   }
+   return ir;
+}
+
+PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel)
+{
+   PValue value = from_nir(src, component);
+   if (value->type() != Value::gpr &&
+       value->type() != Value::gpr_vector &&
+       value->type() != Value::gpr_array_value) {
+      PValue retval = get_temp_register(channel);
+      emit_instruction(new AluInstruction(op1_mov, retval, value,
+                                          EmitInstruction::last_write));
+      value = retval;
+   }
+   return value;
+}
+
+bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
+{
+   r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
+                 << *reinterpret_cast<nir_instr*>(instr)
+                 << "'\n";
+
+   /* Give the specific shader type a chance to process this, i.e. Geometry and
+    * tesselation shaders need specialized deref_array, for the other shaders
+    * it is lowered.
+    */
+   if (emit_deref_instruction_override(instr))
+      return true;
+
+   switch (instr->deref_type) {
+   case nir_deref_type_var:
+      set_var_address(instr);
+      return true;
+   case nir_deref_type_array:
+   case nir_deref_type_array_wildcard:
+   case nir_deref_type_struct:
+   case nir_deref_type_cast:
+   default:
+      fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
+   }
+   return false;
+}
+
+bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
+                                              std::vector<PValue> srcs,
+                                              const std::set<AluModifiers>& m_flags)
+{
+   AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
+   emit_instruction(ir);
+   return true;
+}
+
+void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
+{
+   m_output_register_map[loc] = gpr;
+}
+
+void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
+{
+   r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
+   m_export_output.emit(PInstruction(ir));
+}
+
+const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
+{
+   const GPRVector *retval = nullptr;
+   auto val = m_output_register_map.find(location);
+   if (val != m_output_register_map.end())
+      retval =  val->second;
+   return retval;
+}
+
+void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
+{
+   r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var <<  "\n";
+   m_inputs[pos] = var;
+}
+
+void ShaderFromNirProcessor::set_output(unsigned pos, int sel)
+{
+   r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << sel <<  "\n";
+   m_outputs[pos] = sel;
+}
+
+void ShaderFromNirProcessor::append_block(int nesting_change)
+{
+   m_nesting_depth += nesting_change;
+   m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++));
+}
+
+void ShaderFromNirProcessor::get_array_info(r600_shader& shader) const
+{
+   shader.num_arrays = m_reg_arrays.size();
+   if (shader.num_arrays) {
+      shader.arrays = (r600_shader_array *)calloc(shader.num_arrays, sizeof(r600_shader_array));
+      for (unsigned i = 0; i < shader.num_arrays; ++i) {
+         shader.arrays[i].comp_mask = m_reg_arrays[i]->mask();
+         shader.arrays[i].gpr_start = m_reg_arrays[i]->sel();
+         shader.arrays[i].gpr_count = m_reg_arrays[i]->size();
+      }
+      shader.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
+   }
+}
+
+void ShaderFromNirProcessor::finalize()
+{
+   do_finalize();
+
+   for (auto& i : m_inputs)
+      m_sh_info.input[i.first].gpr = i.second->sel();
+
+   for (auto& i : m_outputs)
+      m_sh_info.output[i.first].gpr = i.second;
+
+   m_output.push_back(m_export_output);
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.h
new file mode 100644
index 000000000..a48674dab
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_base.h
@@ -0,0 +1,224 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef sfn_shader_from_nir_h
+#define sfn_shader_from_nir_h
+
+
+#include "gallium/drivers/r600/r600_shader.h"
+
+#include "compiler/nir/nir.h"
+#include "compiler/nir_types.h"
+
+#include "sfn_instruction_block.h"
+#include "sfn_instruction_export.h"
+#include "sfn_alu_defines.h"
+#include "sfn_valuepool.h"
+#include "sfn_debug.h"
+#include "sfn_instruction_cf.h"
+#include "sfn_emittexinstruction.h"
+#include "sfn_emitaluinstruction.h"
+#include "sfn_emitssboinstruction.h"
+
+#include <vector>
+#include <set>
+#include <stack>
+#include <unordered_map>
+
+struct nir_instr;
+
+namespace r600 {
+
+extern SfnLog sfn_log;
+
+class ShaderFromNirProcessor : public ValuePool {
+public:
+   ShaderFromNirProcessor(pipe_shader_type ptype, r600_pipe_shader_selector& sel,
+                          r600_shader& sh_info, int scratch_size, enum chip_class _chip_class,
+                          int atomic_base);
+   virtual ~ShaderFromNirProcessor();
+
+   void emit_instruction(Instruction *ir);
+
+   PValue from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel = -1);
+   GPRVector vec_from_nir_with_fetch_constant(const nir_src& src, unsigned mask,
+                                              const GPRVector::Swizzle& swizzle, bool match = false);
+
+   bool emit_instruction(EAluOp opcode, PValue dest,
+                         std::vector<PValue> src0,
+                         const std::set<AluModifiers>& m_flags);
+   void emit_export_instruction(WriteoutInstruction *ir);
+   void emit_instruction(AluInstruction *ir);
+
+   void split_constants(nir_alu_instr* instr);
+   void remap_registers();
+
+   const nir_variable *get_deref_location(const nir_src& src) const;
+
+   r600_shader& sh_info() {return m_sh_info;}
+   void add_param_output_reg(int loc, const GPRVector *gpr);
+   void set_output(unsigned pos, int sel);
+   const GPRVector *output_register(unsigned location) const;
+   void evaluate_spi_sid(r600_shader_io &io);
+
+   enum chip_class get_chip_class() const;
+
+   int remap_atomic_base(int base) {
+      return m_atomic_base_map[base];
+   }
+
+   void get_array_info(r600_shader& shader) const;
+
+   virtual bool scan_inputs_read(const nir_shader *sh);
+
+protected:
+
+   void set_var_address(nir_deref_instr *instr);
+   void set_input(unsigned pos, PValue var);
+
+   bool scan_instruction(nir_instr *instr);
+
+   virtual bool scan_sysvalue_access(nir_instr *instr) = 0;
+
+   bool emit_if_start(int if_id, nir_if *if_stmt);
+   bool emit_else_start(int if_id);
+   bool emit_ifelse_end(int if_id);
+
+   bool emit_loop_start(int loop_id);
+   bool emit_loop_end(int loop_id);
+   bool emit_jump_instruction(nir_jump_instr *instr);
+
+   bool emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset);
+   bool emit_load_local_shared(nir_intrinsic_instr* instr);
+   bool emit_store_local_shared(nir_intrinsic_instr* instr);
+   bool emit_atomic_local_shared(nir_intrinsic_instr* instr);
+
+   bool emit_barrier(nir_intrinsic_instr* instr);
+
+   bool load_preloaded_value(const nir_dest& dest, int chan, PValue value,
+                             bool as_last = true);
+
+   void inc_atomic_file_count();
+
+   enum ESlots {
+      es_face,
+      es_instanceid,
+      es_invocation_id,
+      es_patch_id,
+      es_pos,
+      es_rel_patch_id,
+      es_sample_mask_in,
+      es_sample_id,
+      es_sample_pos,
+      es_tess_factor_base,
+      es_vertexid,
+      es_tess_coord,
+      es_primitive_id,
+      es_helper_invocation,
+      es_last
+   };
+
+   std::bitset<es_last> m_sv_values;
+
+   bool allocate_reserved_registers();
+
+
+private:
+   virtual bool do_allocate_reserved_registers() = 0;
+
+
+   void emit_instruction_internal(Instruction *ir);
+
+   bool emit_alu_instruction(nir_instr *instr);
+   bool emit_deref_instruction(nir_deref_instr* instr);
+   bool emit_intrinsic_instruction(nir_intrinsic_instr* instr);
+   virtual bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr);
+   bool emit_tex_instruction(nir_instr* instr);
+   bool emit_discard_if(nir_intrinsic_instr* instr);
+   bool emit_load_ubo_vec4(nir_intrinsic_instr* instr);
+   bool emit_ssbo_atomic_add(nir_intrinsic_instr* instr);
+   bool load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufid);
+
+   /* Code creating functions */
+   bool emit_load_function_temp(const nir_variable *var, nir_intrinsic_instr *instr);
+   AluInstruction *emit_load_literal(const nir_load_const_instr *literal, const nir_src& src, unsigned writemask);
+
+   bool load_uniform(nir_intrinsic_instr* instr);
+   bool process_uniforms(nir_variable *uniform);
+
+   void append_block(int nesting_change);
+
+   virtual void emit_shader_start();
+   virtual bool emit_deref_instruction_override(nir_deref_instr* instr);
+
+   bool emit_store_scratch(nir_intrinsic_instr* instr);
+   bool emit_load_scratch(nir_intrinsic_instr* instr);
+   bool emit_shader_clock(nir_intrinsic_instr* instr);
+   virtual void do_finalize() = 0;
+
+   void finalize();
+   friend class ShaderFromNir;
+
+   std::set<nir_variable*> m_arrays;
+
+   std::map<unsigned, PValue> m_inputs;
+   std::map<unsigned, int> m_outputs;
+
+   std::map<unsigned, nir_variable*> m_var_derefs;
+   std::map<const nir_variable *, nir_variable_mode> m_var_mode;
+
+   std::map<unsigned, const glsl_type*>  m_uniform_type_map;
+   std::map<int, IfElseInstruction *> m_if_block_start_map;
+   std::map<int, LoopBeginInstruction *> m_loop_begin_block_map;
+
+   pipe_shader_type m_processor_type;
+
+   std::vector<InstructionBlock> m_output;
+   unsigned m_nesting_depth;
+   unsigned m_block_number;
+   InstructionBlock m_export_output;
+   r600_shader& m_sh_info;
+   enum chip_class m_chip_class;
+   EmitTexInstruction m_tex_instr;
+   EmitAluInstruction m_alu_instr;
+   EmitSSBOInstruction m_ssbo_instr;
+   OutputRegisterMap m_output_register_map;
+
+   IfElseInstruction *m_pending_else;
+   int m_scratch_size;
+   int m_next_hwatomic_loc;
+
+   r600_pipe_shader_selector& m_sel;
+   int m_atomic_base ;
+   int m_image_count;
+
+   std::unordered_map<int, int> m_atomic_base_map;
+   AluInstruction *last_emitted_alu;
+};
+
+}
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp
new file mode 100644
index 000000000..26ac54981
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.cpp
@@ -0,0 +1,112 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_shader_compute.h"
+#include "sfn_instruction_fetch.h"
+
+namespace r600 {
+
+ComputeShaderFromNir::ComputeShaderFromNir(r600_pipe_shader *sh,
+                                           r600_pipe_shader_selector& sel,
+                                           UNUSED const r600_shader_key& key,
+                                           enum chip_class chip_class):
+     ShaderFromNirProcessor (PIPE_SHADER_COMPUTE, sel, sh->shader,
+                             sh->scratch_space_needed, chip_class, 0),
+     m_reserved_registers(0)
+{
+}
+
+bool ComputeShaderFromNir::scan_sysvalue_access(UNUSED nir_instr *instr)
+{
+   return true;
+}
+bool ComputeShaderFromNir::do_allocate_reserved_registers()
+{
+   int thread_id_sel = m_reserved_registers++;
+   int wg_id_sel = m_reserved_registers++;
+
+   for (int i = 0; i < 3; ++i) {
+      auto tmp = new GPRValue(thread_id_sel, i);
+      tmp->set_as_input();
+      tmp->set_keep_alive();
+      m_local_invocation_id[i] = PValue(tmp);
+      inject_register(tmp->sel(), i, m_local_invocation_id[i], false);
+
+      tmp = new GPRValue(wg_id_sel, i);
+      tmp->set_as_input();
+      tmp->set_keep_alive();
+      m_workgroup_id[i] = PValue(tmp);
+      inject_register(tmp->sel(), i, m_workgroup_id[i], false);
+   }
+   return true;
+}
+
+bool ComputeShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
+{
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_local_invocation_id:
+      return emit_load_3vec(instr, m_local_invocation_id);
+   case nir_intrinsic_load_work_group_id:
+      return emit_load_3vec(instr, m_workgroup_id);
+   case nir_intrinsic_load_num_work_groups:
+      return emit_load_num_work_groups(instr);
+   default:
+      return false;
+   }
+}
+
+bool ComputeShaderFromNir::emit_load_3vec(nir_intrinsic_instr* instr,
+                                          const std::array<PValue,3>& src)
+{
+   for (int i = 0; i < 3; ++i)
+      load_preloaded_value(instr->dest, i, src[i], i == 2);
+   return true;
+}
+
+bool ComputeShaderFromNir::emit_load_num_work_groups(nir_intrinsic_instr* instr)
+{
+   PValue a_zero = get_temp_register(1);
+   emit_instruction(new AluInstruction(op1_mov, a_zero, Value::zero, EmitInstruction::last_write));
+   GPRVector dest;
+   for (int i = 0; i < 3; ++i)
+      dest.set_reg_i(i, from_nir(instr->dest, i));
+   dest.set_reg_i(3, from_nir(instr->dest, 7));
+
+   auto ir = new FetchInstruction(vc_fetch, no_index_offset,
+                                  fmt_32_32_32_32, vtx_nf_int, vtx_es_none, a_zero, dest, 16,
+                                  false, 16, R600_BUFFER_INFO_CONST_BUFFER, 0,
+                                  bim_none, false, false, 0, 0, 0, PValue(), {0,1,2,7});
+   ir->set_flag(vtx_srf_mode);
+   emit_instruction(ir);
+   return true;
+}
+
+void ComputeShaderFromNir::do_finalize()
+{
+
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.h
new file mode 100644
index 000000000..fea6f0122
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_compute.h
@@ -0,0 +1,62 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_COMPUTE_SHADER_FROM_NIR_H
+#define SFN_COMPUTE_SHADER_FROM_NIR_H
+
+#include "sfn_shader_base.h"
+#include "sfn_shaderio.h"
+#include <bitset>
+
+namespace r600 {
+
+class ComputeShaderFromNir : public ShaderFromNirProcessor
+{
+public:
+   ComputeShaderFromNir(r600_pipe_shader *sh,
+                        r600_pipe_shader_selector& sel,
+                        const r600_shader_key &key,
+                        enum chip_class chip_class);
+
+   bool scan_sysvalue_access(nir_instr *instr) override;
+
+private:
+   bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
+
+   bool do_allocate_reserved_registers() override;
+   void do_finalize() override;
+
+   bool emit_load_3vec(nir_intrinsic_instr* instr, const std::array<PValue,3>& src);
+   bool emit_load_num_work_groups(nir_intrinsic_instr* instr);
+
+   int m_reserved_registers;
+   std::array<PValue,3> m_workgroup_id;
+   std::array<PValue,3> m_local_invocation_id;
+};
+
+}
+
+#endif // SFN_COMPUTE_SHADER_FROM_NIR_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp
new file mode 100644
index 000000000..b13cb8a8a
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.cpp
@@ -0,0 +1,1085 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_defines.h"
+#include "tgsi/tgsi_from_mesa.h"
+#include "sfn_shader_fragment.h"
+#include "sfn_instruction_fetch.h"
+
+namespace r600 {
+
+FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader& nir,
+                                             r600_shader& sh,
+                                             r600_pipe_shader_selector &sel,
+                                             const r600_shader_key &key,
+                                             enum chip_class chip_class):
+   ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh, nir.scratch_size, chip_class, 0),
+   m_max_color_exports(MAX2(key.ps.nr_cbufs,1)),
+   m_max_counted_color_exports(0),
+   m_two_sided_color(key.ps.color_two_side),
+   m_last_pixel_export(nullptr),
+   m_nir(nir),
+   m_reserved_registers(0),
+   m_frag_pos_index(0),
+   m_need_back_color(false),
+   m_front_face_loaded(false),
+   m_depth_exports(0),
+   m_apply_sample_mask(key.ps.apply_sample_id_mask),
+   m_dual_source_blend(key.ps.dual_source_blend),
+   m_pos_input(nullptr)
+{
+   for (auto&  i: m_interpolator) {
+      i.enabled = false;
+      i.ij_index= 0;
+   }
+
+   sh_info().rat_base = key.ps.nr_cbufs;
+   sh_info().atomic_base = key.ps.first_atomic_counter;
+}
+
+unsigned barycentric_ij_index(nir_intrinsic_instr *instr)
+{
+   unsigned index = 0;
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_barycentric_sample:
+      index = 0;
+      break;
+   case nir_intrinsic_load_barycentric_at_sample:
+   case nir_intrinsic_load_barycentric_at_offset:
+   case nir_intrinsic_load_barycentric_pixel:
+      index = 1;
+      break;
+   case nir_intrinsic_load_barycentric_centroid:
+      index = 2;
+      break;
+   default:
+      unreachable("Unknown interpolator intrinsic");
+   }
+
+   switch (nir_intrinsic_interp_mode(instr)) {
+   case INTERP_MODE_NONE:
+   case INTERP_MODE_SMOOTH:
+   case INTERP_MODE_COLOR:
+      return index;
+   case INTERP_MODE_NOPERSPECTIVE:
+      return index + 3;
+   case INTERP_MODE_FLAT:
+   case INTERP_MODE_EXPLICIT:
+   default:
+      unreachable("unknown/unsupported mode for load_interpolated");
+   }
+   return 0;
+}
+
+bool FragmentShaderFromNir::process_load_input(nir_intrinsic_instr *instr,
+                                               bool interpolated)
+{
+   sfn_log << SfnLog::io << "Parse " << instr->instr        
+           << "\n";
+
+   auto index = nir_src_as_const_value(instr->src[interpolated ? 1 : 0]);
+   assert(index);
+
+   unsigned location = nir_intrinsic_io_semantics(instr).location + index->u32;
+   auto semantic = r600_get_varying_semantic(location);
+   tgsi_semantic name = (tgsi_semantic)semantic.first;
+   unsigned sid = semantic.second;
+
+
+   if (location == VARYING_SLOT_POS) {
+      m_sv_values.set(es_pos);
+      m_pos_input = new ShaderInputVarying(name, sid, nir_intrinsic_base(instr) + index->u32,
+                                               nir_intrinsic_component(instr),
+                                               nir_dest_num_components(instr->dest),
+                                               TGSI_INTERPOLATE_LINEAR, TGSI_INTERPOLATE_LOC_CENTER);
+      m_shaderio.add_input(m_pos_input);
+      return true;
+   }
+
+   if (location == VARYING_SLOT_FACE) {
+      m_sv_values.set(es_face);
+      return true;
+   }
+
+
+   tgsi_interpolate_mode tgsi_interpolate = TGSI_INTERPOLATE_CONSTANT;
+   tgsi_interpolate_loc tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
+
+   bool uses_interpol_at_centroid = false;
+
+   if (interpolated) {
+
+      glsl_interp_mode mode = INTERP_MODE_NONE;
+      auto parent = nir_instr_as_intrinsic(instr->src[0].ssa->parent_instr);
+      mode = (glsl_interp_mode)nir_intrinsic_interp_mode(parent);
+      switch (parent->intrinsic) {
+      case nir_intrinsic_load_barycentric_sample:
+         tgsi_loc = TGSI_INTERPOLATE_LOC_SAMPLE;
+         break;
+      case nir_intrinsic_load_barycentric_at_sample:
+      case nir_intrinsic_load_barycentric_at_offset:
+      case nir_intrinsic_load_barycentric_pixel:
+         tgsi_loc = TGSI_INTERPOLATE_LOC_CENTER;
+         break;
+      case nir_intrinsic_load_barycentric_centroid:
+         tgsi_loc = TGSI_INTERPOLATE_LOC_CENTROID;
+         uses_interpol_at_centroid = true;
+         break;
+      default:
+         std::cerr << "Instruction " << nir_intrinsic_infos[parent->intrinsic].name << " as parent of "
+                   << nir_intrinsic_infos[instr->intrinsic].name
+                   << " interpolator?\n";
+         assert(0);
+      }
+
+      switch (mode) {
+      case INTERP_MODE_NONE:
+         if (name == TGSI_SEMANTIC_COLOR) {
+            tgsi_interpolate = TGSI_INTERPOLATE_COLOR;
+            break;
+      }
+         FALLTHROUGH;
+      case INTERP_MODE_SMOOTH:
+         tgsi_interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
+         break;
+      case INTERP_MODE_NOPERSPECTIVE:
+         tgsi_interpolate = TGSI_INTERPOLATE_LINEAR;
+         break;
+      case INTERP_MODE_FLAT:
+         break;
+      case INTERP_MODE_COLOR:
+         tgsi_interpolate = TGSI_INTERPOLATE_COLOR;
+         break;
+      case INTERP_MODE_EXPLICIT:
+      default:
+         assert(0);
+      }
+
+      m_interpolators_used.set(barycentric_ij_index(parent));
+
+   }
+
+   switch (name) {
+   case TGSI_SEMANTIC_COLOR: {
+      auto input = m_shaderio.find_varying(name, sid);
+      if (!input) {
+         m_shaderio.add_input(new ShaderInputColor(name, sid,
+                                                   nir_intrinsic_base(instr) + index->u32,
+                                                   nir_intrinsic_component(instr),
+                                                   nir_dest_num_components(instr->dest),
+                                                   tgsi_interpolate, tgsi_loc));
+      }  else {
+         if (uses_interpol_at_centroid)
+            input->set_uses_interpolate_at_centroid();
+
+         auto varying = static_cast<ShaderInputVarying&>(*input);
+         varying.update_mask(nir_dest_num_components(instr->dest),
+                             nir_intrinsic_component(instr));
+      }
+
+      m_need_back_color = m_two_sided_color;
+      return true;
+   }
+   case TGSI_SEMANTIC_PRIMID:
+      sh_info().gs_prim_id_input = true;
+      sh_info().ps_prim_id_input = m_shaderio.inputs().size();
+      FALLTHROUGH;
+   case TGSI_SEMANTIC_FOG:
+   case TGSI_SEMANTIC_GENERIC:
+   case TGSI_SEMANTIC_TEXCOORD:
+   case TGSI_SEMANTIC_LAYER:
+   case TGSI_SEMANTIC_PCOORD:
+   case TGSI_SEMANTIC_VIEWPORT_INDEX:
+   case TGSI_SEMANTIC_CLIPDIST: {
+      auto input = m_shaderio.find_varying(name, sid);
+      if (!input) {
+         m_shaderio.add_input(new ShaderInputVarying(name, sid, nir_intrinsic_base(instr) + index->u32,
+                                                     nir_intrinsic_component(instr),
+                                                     nir_dest_num_components(instr->dest),
+                                                     tgsi_interpolate, tgsi_loc));
+      } else {
+         if (uses_interpol_at_centroid)
+            input->set_uses_interpolate_at_centroid();
+
+         auto varying = static_cast<ShaderInputVarying&>(*input);
+         varying.update_mask(nir_dest_num_components(instr->dest),
+                             nir_intrinsic_component(instr));
+      }
+
+      return true;
+   }
+   default:
+      return false;
+   }
+}
+
+
+bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr)
+{
+   switch (instr->type) {
+   case nir_instr_type_intrinsic: {
+      nir_intrinsic_instr *ii =  nir_instr_as_intrinsic(instr);
+
+      switch (ii->intrinsic) {
+      case nir_intrinsic_load_front_face:
+         m_sv_values.set(es_face);
+         break;
+      case nir_intrinsic_load_sample_mask_in:
+         m_sv_values.set(es_sample_mask_in);
+         break;
+      case nir_intrinsic_load_sample_pos:
+         m_sv_values.set(es_sample_pos);
+         FALLTHROUGH;
+      case nir_intrinsic_load_sample_id:
+         m_sv_values.set(es_sample_id);
+         break;
+      case nir_intrinsic_load_helper_invocation:
+         m_sv_values.set(es_helper_invocation);
+         sh_info().uses_helper_invocation = true;
+         break;
+      case nir_intrinsic_load_input:
+         return process_load_input(ii, false);
+      case nir_intrinsic_load_interpolated_input: {
+         return process_load_input(ii, true);
+      }
+      case nir_intrinsic_store_output:
+         return process_store_output(ii);
+
+      default:
+         ;
+      }
+   }
+   default:
+      ;
+   }
+   return true;
+}
+
+bool FragmentShaderFromNir::do_allocate_reserved_registers()
+{
+   assert(!m_reserved_registers);
+
+   int face_reg_index = -1;
+   int sample_id_index = -1;
+   // enabled interpolators based on inputs
+   for (unsigned i = 0; i < s_max_interpolators; ++i) {
+      if (m_interpolators_used.test(i)) {
+         sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n";
+         m_interpolator[i].enabled = true;
+      }
+   }
+
+   // sort the varying inputs
+   m_shaderio.sort_varying_inputs();
+
+   // handle interpolators
+   int num_baryc = 0;
+   for (int i = 0; i < 6; ++i) {
+      if (m_interpolator[i].enabled) {
+         sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n";
+
+         m_interpolator[i].ij_index = num_baryc;
+
+         unsigned sel = num_baryc / 2;
+         unsigned chan = 2 * (num_baryc % 2);
+
+         auto ip_i = new GPRValue(sel, chan + 1);
+         ip_i->set_as_input();
+         m_interpolator[i].i.reset(ip_i);
+         inject_register(sel, chan + 1, m_interpolator[i].i, false);
+
+         auto ip_j = new GPRValue(sel, chan);
+         ip_j->set_as_input();
+         m_interpolator[i].j.reset(ip_j);
+         inject_register(sel, chan, m_interpolator[i].j, false);
+
+         ++num_baryc;
+      }
+   }
+   m_reserved_registers += (num_baryc + 1) >> 1;
+
+   if (m_sv_values.test(es_pos)) {
+      m_frag_pos_index = m_reserved_registers++;
+      assert(m_pos_input);
+      m_pos_input->set_gpr(m_frag_pos_index);
+   }
+
+   // handle system values
+   if (m_sv_values.test(es_face) || m_need_back_color) {
+      face_reg_index = m_reserved_registers++;
+      m_front_face_reg = std::make_shared<GPRValue>(face_reg_index,0);
+      m_front_face_reg->set_as_input();
+      sfn_log << SfnLog::io << "Set front_face register to " <<  *m_front_face_reg << "\n";
+      inject_register(m_front_face_reg->sel(), m_front_face_reg->chan(), m_front_face_reg, false);
+
+      m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE, face_reg_index));
+      load_front_face();
+   }
+
+   if (m_sv_values.test(es_sample_mask_in)) {
+      if (face_reg_index < 0)
+         face_reg_index = m_reserved_registers++;
+
+      m_sample_mask_reg = std::make_shared<GPRValue>(face_reg_index,2);
+      m_sample_mask_reg->set_as_input();
+      sfn_log << SfnLog::io << "Set sample mask in register to " <<  *m_sample_mask_reg << "\n";
+      sh_info().nsys_inputs = 1;
+      m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK, face_reg_index));
+   }
+
+   if (m_sv_values.test(es_sample_id) ||
+       m_sv_values.test(es_sample_mask_in)) {
+      if (sample_id_index < 0)
+         sample_id_index = m_reserved_registers++;
+
+      m_sample_id_reg = std::make_shared<GPRValue>(sample_id_index, 3);
+      m_sample_id_reg->set_as_input();
+      sfn_log << SfnLog::io << "Set sample id register to " <<  *m_sample_id_reg << "\n";
+      sh_info().nsys_inputs++;
+      m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID, sample_id_index));
+   }
+
+   // The back color handling is not emmited in the code, so we have
+   // to add the inputs here and later we also need to inject the code to set
+   // the right color
+   if (m_need_back_color) {
+      size_t ninputs = m_shaderio.inputs().size();
+      for (size_t k = 0; k < ninputs; ++k) {
+         ShaderInput& i = m_shaderio.input(k);
+
+         if (i.name() != TGSI_SEMANTIC_COLOR)
+            continue;
+
+         ShaderInputColor& col = static_cast<ShaderInputColor&>(i);
+
+         size_t next_pos = m_shaderio.size();
+         auto bcol = new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR, col, next_pos);
+         m_shaderio.add_input(bcol);
+         col.set_back_color(next_pos);
+      }
+      m_shaderio.set_two_sided();
+   }
+
+   m_shaderio.update_lds_pos();
+
+   set_reserved_registers(m_reserved_registers);
+
+   return true;
+}
+
+void FragmentShaderFromNir::emit_shader_start()
+{
+   if (m_sv_values.test(es_face))
+      load_front_face();
+
+   if (m_sv_values.test(es_pos)) {
+      for (int i = 0; i < 4; ++i) {
+         auto v = new GPRValue(m_frag_pos_index, i);
+         v->set_as_input();
+         auto reg = PValue(v);
+         if (i == 3)
+            emit_instruction(new AluInstruction(op1_recip_ieee, reg, reg, {alu_write, alu_last_instr}));
+         m_frag_pos[i] = reg;
+      }
+   }
+
+   if (m_sv_values.test(es_helper_invocation)) {
+      m_helper_invocation = get_temp_register();
+      auto dummy = PValue(new GPRValue(m_helper_invocation->sel(), 7));
+      emit_instruction(new AluInstruction(op1_mov, m_helper_invocation, literal(-1), {alu_write, alu_last_instr}));
+      GPRVector dst({dummy, dummy, dummy, dummy});
+      std::array<int,4> swz = {7,7,7,7};
+      dst.set_reg_i(m_helper_invocation->chan(), m_helper_invocation);
+      swz[m_helper_invocation->chan()] = 4;
+
+      auto vtx = new FetchInstruction(dst, m_helper_invocation,
+                                      R600_BUFFER_INFO_CONST_BUFFER, bim_none);
+      vtx->set_flag(vtx_vpm);
+      vtx->set_flag(vtx_use_tc);
+      vtx->set_dest_swizzle(swz);
+      emit_instruction(vtx);
+   }
+}
+
+bool FragmentShaderFromNir::process_store_output(nir_intrinsic_instr *instr)
+{
+
+   auto semantic = nir_intrinsic_io_semantics(instr);
+   unsigned driver_loc = nir_intrinsic_base(instr);
+
+   if (sh_info().noutput <= driver_loc)
+      sh_info().noutput = driver_loc + 1;
+
+   r600_shader_io& io = sh_info().output[driver_loc];
+   tgsi_get_gl_frag_result_semantic(static_cast<gl_frag_result>(semantic.location),
+                                    &io.name, &io.sid);
+
+   unsigned component = nir_intrinsic_component(instr);
+   io.write_mask |= nir_intrinsic_write_mask(instr) << component;
+
+   if (semantic.location == FRAG_RESULT_COLOR && !m_dual_source_blend) {
+      sh_info().fs_write_all = true;
+   }
+
+   if (semantic.location == FRAG_RESULT_COLOR ||
+       (semantic.location >= FRAG_RESULT_DATA0 &&
+        semantic.location <= FRAG_RESULT_DATA7))  {
+      ++m_max_counted_color_exports;
+
+      /* Hack: force dual source output handling if one color output has a
+       * dual_source_blend_index > 0 */
+      if (semantic.location == FRAG_RESULT_COLOR &&
+          semantic.dual_source_blend_index > 0)
+         m_dual_source_blend = true;
+
+      if (m_max_counted_color_exports > 1)
+         sh_info().fs_write_all = false;
+      return true;
+   }
+
+   if (semantic.location == FRAG_RESULT_DEPTH ||
+       semantic.location == FRAG_RESULT_STENCIL ||
+       semantic.location == FRAG_RESULT_SAMPLE_MASK) {
+      io.write_mask = 15;
+      return true;
+   }
+
+   return false;
+
+
+}
+
+bool FragmentShaderFromNir::emit_load_sample_mask_in(nir_intrinsic_instr* instr)
+{
+   auto dest = from_nir(instr->dest, 0);
+   assert(m_sample_id_reg);
+   assert(m_sample_mask_reg);
+
+   emit_instruction(new AluInstruction(op2_lshl_int, dest, Value::one_i, m_sample_id_reg, EmitInstruction::last_write));
+   emit_instruction(new AluInstruction(op2_and_int, dest, dest, m_sample_mask_reg, EmitInstruction::last_write));
+   return true;
+}
+
+bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
+{
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_sample_mask_in:
+      if (m_apply_sample_mask) {
+         return emit_load_sample_mask_in(instr);
+      } else
+         return load_preloaded_value(instr->dest, 0, m_sample_mask_reg);
+   case nir_intrinsic_load_sample_id:
+      return load_preloaded_value(instr->dest, 0, m_sample_id_reg);
+   case nir_intrinsic_load_front_face:
+      return load_preloaded_value(instr->dest, 0, m_front_face_reg);
+   case nir_intrinsic_load_sample_pos:
+      return emit_load_sample_pos(instr);
+   case nir_intrinsic_load_helper_invocation:
+      return load_preloaded_value(instr->dest, 0, m_helper_invocation);
+   case nir_intrinsic_load_input:
+      return emit_load_input(instr);
+   case nir_intrinsic_load_barycentric_sample:
+   case nir_intrinsic_load_barycentric_pixel:
+   case nir_intrinsic_load_barycentric_centroid:  {
+      unsigned ij = barycentric_ij_index(instr);
+      return load_preloaded_value(instr->dest, 0, m_interpolator[ij].i) &&
+            load_preloaded_value(instr->dest, 1, m_interpolator[ij].j);
+   }
+   case nir_intrinsic_load_barycentric_at_offset:
+         return load_barycentric_at_offset(instr);
+   case nir_intrinsic_load_barycentric_at_sample:
+      return load_barycentric_at_sample(instr);
+
+   case nir_intrinsic_load_interpolated_input: {
+      return emit_load_interpolated_input(instr);
+   }
+   case nir_intrinsic_store_output:
+      return emit_store_output(instr);
+
+   default:
+      return false;
+   }
+}
+
+bool FragmentShaderFromNir::emit_store_output(nir_intrinsic_instr* instr)
+{
+   auto location = nir_intrinsic_io_semantics(instr).location;
+
+   if (location == FRAG_RESULT_COLOR)
+      return emit_export_pixel(instr, m_dual_source_blend ? 1 : m_max_color_exports);
+
+   if ((location >= FRAG_RESULT_DATA0 &&
+        location <= FRAG_RESULT_DATA7) ||
+       location == FRAG_RESULT_DEPTH ||
+       location == FRAG_RESULT_STENCIL ||
+       location == FRAG_RESULT_SAMPLE_MASK)
+      return emit_export_pixel(instr, 1);
+
+   sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_output for " << location << ")\n";
+   return false;
+
+}
+
+bool FragmentShaderFromNir::emit_load_interpolated_input(nir_intrinsic_instr* instr)
+{
+   unsigned loc = nir_intrinsic_io_semantics(instr).location;
+   switch (loc) {
+   case VARYING_SLOT_POS:
+      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+         load_preloaded_value(instr->dest, i, m_frag_pos[i]);
+      }
+      return true;
+   case VARYING_SLOT_FACE:
+      return load_preloaded_value(instr->dest, 0, m_front_face_reg);
+   default:
+      ;
+   }
+
+   auto param = nir_src_as_const_value(instr->src[1]);
+   assert(param && "Indirect PS inputs not (yet) supported");
+
+   auto& io = m_shaderio.input(param->u32 + nir_intrinsic_base(instr), nir_intrinsic_component(instr));
+   auto dst = nir_intrinsic_component(instr) ? get_temp_vec4() : vec_from_nir(instr->dest, 4);
+
+   io.set_gpr(dst.sel());
+
+   Interpolator ip = {true, 0, from_nir(instr->src[0], 0), from_nir(instr->src[0], 1)};
+
+
+   if (!load_interpolated(dst, io, ip, nir_dest_num_components(instr->dest),
+                          nir_intrinsic_component(instr)))
+      return false;
+
+   if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
+
+      auto & color_input  = static_cast<ShaderInputColor&> (io);
+      auto& bgio = m_shaderio.input(color_input.back_color_input_index());
+
+      GPRVector bgcol = get_temp_vec4();
+      bgio.set_gpr(bgcol.sel());
+      load_interpolated(bgcol, bgio, ip, nir_dest_num_components(instr->dest), 0);
+
+      load_front_face();
+
+      AluInstruction *ir = nullptr;
+      for (unsigned i = 0; i < 4 ; ++i) {
+         ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
+         emit_instruction(ir);
+      }
+      if (ir)
+         ir->set_flag(alu_last_instr);
+   }
+
+
+   AluInstruction *ir = nullptr;
+   if (nir_intrinsic_component(instr) != 0) {
+      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+         ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), dst[i + nir_intrinsic_component(instr)], {alu_write});
+         emit_instruction(ir);
+      }
+      if (ir)
+         ir->set_flag(alu_last_instr);
+   }
+
+   return true;
+}
+
+bool FragmentShaderFromNir::load_barycentric_at_offset(nir_intrinsic_instr* instr)
+{
+   auto interpolator = m_interpolator[barycentric_ij_index(instr)];
+   PValue dummy(new GPRValue(interpolator.i->sel(), 0));
+
+   GPRVector help = get_temp_vec4();
+   GPRVector interp({interpolator.j, interpolator.i, dummy, dummy});
+
+   auto getgradh = new TexInstruction(TexInstruction::get_gradient_h, help, interp, 0, 0, PValue());
+   getgradh->set_dest_swizzle({0,1,7,7});
+   getgradh->set_flag(TexInstruction::x_unnormalized);
+   getgradh->set_flag(TexInstruction::y_unnormalized);
+   getgradh->set_flag(TexInstruction::z_unnormalized);
+   getgradh->set_flag(TexInstruction::w_unnormalized);
+   getgradh->set_flag(TexInstruction::grad_fine);
+   emit_instruction(getgradh);
+
+   auto getgradv = new TexInstruction(TexInstruction::get_gradient_v, help, interp, 0, 0, PValue());
+   getgradv->set_dest_swizzle({7,7,0,1});
+   getgradv->set_flag(TexInstruction::x_unnormalized);
+   getgradv->set_flag(TexInstruction::y_unnormalized);
+   getgradv->set_flag(TexInstruction::z_unnormalized);
+   getgradv->set_flag(TexInstruction::w_unnormalized);
+   getgradv->set_flag(TexInstruction::grad_fine);
+   emit_instruction(getgradv);
+
+   PValue ofs_x = from_nir(instr->src[0], 0);
+   PValue ofs_y = from_nir(instr->src[0], 1);
+   emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(0), ofs_x, interpolator.j, {alu_write}));
+   emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(1), ofs_x, interpolator.i, {alu_write, alu_last_instr}));
+   emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 0), help.reg_i(3), ofs_y, help.reg_i(1), {alu_write}));
+   emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 1), help.reg_i(2), ofs_y, help.reg_i(0), {alu_write, alu_last_instr}));
+
+   return true;
+}
+
+bool FragmentShaderFromNir::load_barycentric_at_sample(nir_intrinsic_instr* instr)
+{
+   GPRVector slope = get_temp_vec4();
+
+   auto fetch = new FetchInstruction(vc_fetch, no_index_offset, slope,
+                                     from_nir_with_fetch_constant(instr->src[0], 0),
+                                     0, R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none);
+   fetch->set_flag(vtx_srf_mode);
+   emit_instruction(fetch);
+
+   GPRVector grad = get_temp_vec4();
+
+   auto interpolator = m_interpolator[barycentric_ij_index(instr)];
+   assert(interpolator.enabled);
+   PValue dummy(new GPRValue(interpolator.i->sel(), 0));
+
+   GPRVector src({interpolator.j, interpolator.i, dummy, dummy});
+
+   auto tex = new TexInstruction(TexInstruction::get_gradient_h, grad, src, 0, 0, PValue());
+   tex->set_flag(TexInstruction::grad_fine);
+   tex->set_flag(TexInstruction::x_unnormalized);
+   tex->set_flag(TexInstruction::y_unnormalized);
+   tex->set_flag(TexInstruction::z_unnormalized);
+   tex->set_flag(TexInstruction::w_unnormalized);
+   tex->set_dest_swizzle({0,1,7,7});
+   emit_instruction(tex);
+
+   tex = new TexInstruction(TexInstruction::get_gradient_v, grad, src, 0, 0, PValue());
+   tex->set_flag(TexInstruction::x_unnormalized);
+   tex->set_flag(TexInstruction::y_unnormalized);
+   tex->set_flag(TexInstruction::z_unnormalized);
+   tex->set_flag(TexInstruction::w_unnormalized);
+   tex->set_flag(TexInstruction::grad_fine);
+   tex->set_dest_swizzle({7,7,0,1});
+   emit_instruction(tex);
+
+   emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(0), slope.reg_i(2), interpolator.j}, {alu_write}));
+   emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(1), slope.reg_i(2), interpolator.i}, {alu_write, alu_last_instr}));
+
+   emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 0), {grad.reg_i(3), slope.reg_i(3), slope.reg_i(1)}, {alu_write}));
+   emit_instruction(new AluInstruction(op3_muladd, from_nir(instr->dest, 1), {grad.reg_i(2), slope.reg_i(3), slope.reg_i(0)}, {alu_write, alu_last_instr}));
+
+   return true;
+}
+
+bool FragmentShaderFromNir::emit_load_input(nir_intrinsic_instr* instr)
+{
+   unsigned loc = nir_intrinsic_io_semantics(instr).location;
+   auto param = nir_src_as_const_value(instr->src[0]);
+   assert(param && "Indirect PS inputs not (yet) supported");
+
+   auto& io = m_shaderio.input(param->u32 + nir_intrinsic_base(instr), nir_intrinsic_component(instr));
+
+   assert(nir_intrinsic_io_semantics(instr).num_slots == 1);
+
+   unsigned num_components = nir_dest_num_components(instr->dest);
+
+   switch (loc) {
+   case VARYING_SLOT_POS:
+      for (unsigned i = 0; i < num_components; ++i) {
+         load_preloaded_value(instr->dest, i, m_frag_pos[i]);
+      }
+      return true;
+   case VARYING_SLOT_FACE:
+      return load_preloaded_value(instr->dest, 0, m_front_face_reg);
+   default:
+      ;
+   }
+
+   auto dst = nir_intrinsic_component(instr) ? get_temp_vec4() : vec_from_nir(instr->dest, 4);
+
+   AluInstruction *ir = nullptr;
+   for (unsigned i = 0; i < 4 ; ++i) {
+      ir = new AluInstruction(op1_interp_load_p0, dst[i],
+                              PValue(new InlineConstValue(ALU_SRC_PARAM_BASE +
+                                                          io.lds_pos(), i)),
+                              EmitInstruction::write);
+      emit_instruction(ir);
+   }
+   ir->set_flag(alu_last_instr);
+
+   /* TODO: back color */
+   if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
+      Interpolator ip = {false, 0, NULL, NULL};
+
+      auto & color_input  = static_cast<ShaderInputColor&> (io);
+      auto& bgio = m_shaderio.input(color_input.back_color_input_index());
+
+      GPRVector bgcol = get_temp_vec4();
+      bgio.set_gpr(bgcol.sel());
+      load_interpolated(bgcol, bgio, ip, num_components, 0);
+
+      load_front_face();
+
+      AluInstruction *ir = nullptr;
+      for (unsigned i = 0; i < 4 ; ++i) {
+         ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
+         emit_instruction(ir);
+      }
+      if (ir)
+         ir->set_flag(alu_last_instr);
+   }
+
+   if (nir_intrinsic_component(instr) != 0) {
+      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+         ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), dst[i + nir_intrinsic_component(instr)], {alu_write});
+         emit_instruction(ir);
+      }
+      if (ir)
+         ir->set_flag(alu_last_instr);
+   }
+
+
+   return true;
+}
+
+void FragmentShaderFromNir::load_front_face()
+{
+   assert(m_front_face_reg);
+   if (m_front_face_loaded)
+      return;
+
+   auto ir = new AluInstruction(op2_setge_dx10, m_front_face_reg, m_front_face_reg,
+                                Value::zero, {alu_write, alu_last_instr});
+   m_front_face_loaded = true;
+   emit_instruction(ir);
+}
+
+bool FragmentShaderFromNir::emit_load_sample_pos(nir_intrinsic_instr* instr)
+{
+   GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
+   auto fetch = new FetchInstruction(vc_fetch,
+                                     no_index_offset,
+                                     fmt_32_32_32_32_float,
+                                     vtx_nf_scaled,
+                                     vtx_es_none,
+                                     m_sample_id_reg,
+                                     dest,
+                                     0,
+                                     false,
+                                     0xf,
+                                     R600_BUFFER_INFO_CONST_BUFFER,
+                                     0,
+                                     bim_none,
+                                     false,
+                                     false,
+                                     0,
+                                     0,
+                                     0,
+                                     PValue(),
+                                     {0,1,2,3});
+   fetch->set_flag(vtx_srf_mode);
+   emit_instruction(fetch);
+   return true;
+}
+
+bool FragmentShaderFromNir::load_interpolated(GPRVector &dest,
+                                              ShaderInput& io, const Interpolator &ip,
+                                              int num_components, int start_comp)
+{
+   // replace io with ShaderInputVarying
+   if (io.interpolate() > 0) {
+
+      sfn_log << SfnLog::io << "Using Interpolator (" << *ip.j << ", " << *ip.i <<  ")" << "\n";
+
+      if (num_components == 1) {
+         switch (start_comp) {
+         case 0: return load_interpolated_one_comp(dest, io, ip, op2_interp_x);
+         case 1: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
+         case 2: return load_interpolated_one_comp(dest, io, ip, op2_interp_z);
+         case 3: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_zw, 2, 3);
+         default:
+            assert(0);
+         }
+      }
+
+      if (num_components == 2) {
+         switch (start_comp) {
+         case 0: return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3);
+         case 2: return load_interpolated_two_comp(dest, io, ip, op2_interp_zw, 0xc);
+         case 1: return load_interpolated_one_comp(dest, io, ip, op2_interp_z) &&
+                  load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
+         default:
+            assert(0);
+         }
+      }
+
+      if (num_components == 3 && start_comp == 0)
+         return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3) &&
+               load_interpolated_one_comp(dest, io, ip, op2_interp_z);
+
+      int full_write_mask = ((1 << num_components) - 1) << start_comp;
+
+      bool success = load_interpolated_two_comp(dest, io, ip, op2_interp_zw, full_write_mask & 0xc);
+      success &= load_interpolated_two_comp(dest, io, ip, op2_interp_xy, full_write_mask & 0x3);
+      return success;
+
+   } else {
+      AluInstruction *ir = nullptr;
+      for (unsigned i = 0; i < 4 ; ++i) {
+         ir = new AluInstruction(op1_interp_load_p0, dest[i],
+                                 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
+                                 EmitInstruction::write);
+         emit_instruction(ir);
+      }
+      ir->set_flag(alu_last_instr);
+   }
+   return true;
+}
+
+bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector &dest,
+                                                       ShaderInput& io, const Interpolator& ip, EAluOp op)
+{
+   for (unsigned i = 0; i < 2 ; ++i) {
+      int chan = i;
+      if (op == op2_interp_z)
+         chan += 2;
+
+
+      auto ir = new AluInstruction(op, dest[chan], i & 1 ? ip.j : ip.i,
+                                   PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
+                                   i == 0  ? EmitInstruction::write : EmitInstruction::last);
+      dest.pin_to_channel(chan);
+
+      ir->set_bank_swizzle(alu_vec_210);
+      emit_instruction(ir);
+   }
+   return true;
+}
+
+bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector &dest, ShaderInput& io,
+                                                       const Interpolator& ip, EAluOp op, int writemask)
+{
+   AluInstruction *ir = nullptr;
+   assert(ip.j);
+   assert(ip.i);
+   for (unsigned i = 0; i < 4 ; ++i) {
+      ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
+                              (writemask & (1 << i)) ? EmitInstruction::write : EmitInstruction::empty);
+      dest.pin_to_channel(i);
+      ir->set_bank_swizzle(alu_vec_210);
+      emit_instruction(ir);
+   }
+   ir->set_flag(alu_last_instr);
+   return true;
+}
+
+bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector &dest,
+                                                               ShaderInput& io, const Interpolator& ip,
+                                                               EAluOp op, UNUSED int start, int comp)
+{
+   AluInstruction *ir = nullptr;
+   for (int i = 0; i <  4 ; ++i) {
+      ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i,
+                                   PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
+                                   i == comp ? EmitInstruction::write : EmitInstruction::empty);
+      ir->set_bank_swizzle(alu_vec_210);
+      dest.pin_to_channel(i);
+      emit_instruction(ir);
+   }
+   ir->set_flag(alu_last_instr);
+   return true;
+}
+
+
+bool FragmentShaderFromNir::emit_export_pixel(nir_intrinsic_instr* instr, int outputs)
+{
+   std::array<uint32_t,4> swizzle;
+   unsigned writemask = nir_intrinsic_write_mask(instr);
+   auto semantics = nir_intrinsic_io_semantics(instr);
+   unsigned driver_location = nir_intrinsic_base(instr);
+
+   switch (semantics.location) {
+   case FRAG_RESULT_DEPTH:
+      writemask = 1;
+      swizzle = {0,7,7,7};
+      break;
+   case FRAG_RESULT_STENCIL:
+      writemask = 2;
+      swizzle = {7,0,7,7};
+      break;
+   case FRAG_RESULT_SAMPLE_MASK:
+      writemask = 4;
+      swizzle = {7,7,0,7};
+      break;
+   default:
+      for (int i = 0; i < 4; ++i) {
+         swizzle[i] = (i < instr->num_components) ? i : 7;
+      }
+   }
+
+   auto value = vec_from_nir_with_fetch_constant(instr->src[0], writemask, swizzle);
+
+   set_output(driver_location, value.sel());
+
+   if (semantics.location == FRAG_RESULT_COLOR ||
+       (semantics.location >= FRAG_RESULT_DATA0 &&
+        semantics.location <= FRAG_RESULT_DATA7)) {
+      for (int k = 0 ; k < outputs; ++k) {
+
+         unsigned location = (m_dual_source_blend && (semantics.location == FRAG_RESULT_COLOR)
+                             ? semantics.dual_source_blend_index : driver_location) + k - m_depth_exports;
+
+         sfn_log << SfnLog::io << "Pixel output at loc:" << location << "\n";
+
+         if (location >= m_max_color_exports) {
+            sfn_log << SfnLog::io << "Pixel output loc:" << location
+                    << " dl:" << driver_location
+                    << " skipped  because  we have only "   << m_max_color_exports << " CBs\n";
+            continue;
+         }
+
+         m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel);
+
+         if (sh_info().ps_export_highest < location)
+            sh_info().ps_export_highest = location;
+
+         sh_info().nr_ps_color_exports++;
+
+         unsigned mask = (0xfu << (location * 4));
+         sh_info().ps_color_export_mask |= mask;
+
+         emit_export_instruction(m_last_pixel_export);
+      };
+   } else if (semantics.location == FRAG_RESULT_DEPTH ||
+              semantics.location == FRAG_RESULT_STENCIL ||
+              semantics.location == FRAG_RESULT_SAMPLE_MASK) {
+      m_depth_exports++;
+      emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel));
+   } else {
+      return false;
+   }
+   return true;
+}
+
+
+bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_intrinsic_instr* instr, int outputs)
+{
+   std::array<uint32_t,4> swizzle;
+   unsigned writemask = nir_intrinsic_write_mask(instr);
+   switch (out_var->data.location) {
+   case FRAG_RESULT_DEPTH:
+      writemask = 1;
+      swizzle = {0,7,7,7};
+      break;
+   case FRAG_RESULT_STENCIL:
+      writemask = 2;
+      swizzle = {7,0,7,7};
+      break;
+   case FRAG_RESULT_SAMPLE_MASK:
+      writemask = 4;
+      swizzle = {7,7,0,7};
+      break;
+   default:
+      for (int i = 0; i < 4; ++i) {
+         swizzle[i] = (i < instr->num_components) ? i : 7;
+      }
+   }
+
+   auto value = vec_from_nir_with_fetch_constant(instr->src[1], writemask, swizzle);
+
+   set_output(out_var->data.driver_location, value.sel());
+
+   if (out_var->data.location == FRAG_RESULT_COLOR ||
+       (out_var->data.location >= FRAG_RESULT_DATA0 &&
+        out_var->data.location <= FRAG_RESULT_DATA7)) {
+      for (int k = 0 ; k < outputs; ++k) {
+
+         unsigned location = (m_dual_source_blend && (out_var->data.location == FRAG_RESULT_COLOR)
+                             ? out_var->data.index : out_var->data.driver_location) + k - m_depth_exports;
+
+         sfn_log << SfnLog::io << "Pixel output " << out_var->name << " at loc:" << location << "\n";
+
+         if (location >= m_max_color_exports) {
+            sfn_log << SfnLog::io << "Pixel output loc:" << location
+                    << " dl:" << out_var->data.location
+                    << " skipped  because  we have only "   << m_max_color_exports << " CBs\n";
+            continue;
+         }
+
+         m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel);
+
+         if (sh_info().ps_export_highest < location)
+            sh_info().ps_export_highest = location;
+
+         sh_info().nr_ps_color_exports++;
+
+         unsigned mask = (0xfu << (location * 4));
+         sh_info().ps_color_export_mask |= mask;
+
+         emit_export_instruction(m_last_pixel_export);
+      };
+   } else if (out_var->data.location == FRAG_RESULT_DEPTH ||
+              out_var->data.location == FRAG_RESULT_STENCIL ||
+              out_var->data.location == FRAG_RESULT_SAMPLE_MASK) {
+      m_depth_exports++;
+      emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel));
+   } else {
+      return false;
+   }
+   return true;
+}
+
+void FragmentShaderFromNir::do_finalize()
+{
+   // update shader io info and set LDS etc.
+   sh_info().ninput = m_shaderio.inputs().size();
+
+   sfn_log << SfnLog::io << "Have " << sh_info().ninput << " inputs\n";
+   for (size_t i = 0; i < sh_info().ninput; ++i) {
+      ShaderInput& input = m_shaderio.input(i);
+      int ij_idx = (input.ij_index() < 6 &&
+                    input.ij_index() >= 0) ? input.ij_index() : 0;
+      input.set_ioinfo(sh_info().input[i], m_interpolator[ij_idx].ij_index);
+   }
+
+   sh_info().two_side = m_shaderio.two_sided();
+   sh_info().nlds = m_shaderio.nlds();
+
+   sh_info().nr_ps_max_color_exports = m_max_counted_color_exports;
+
+   if (sh_info().fs_write_all) {
+      sh_info().nr_ps_max_color_exports = m_max_color_exports;
+   }
+
+   if (!m_last_pixel_export) {
+      GPRVector v(0, {7,7,7,7});
+      m_last_pixel_export = new ExportInstruction(0, v, ExportInstruction::et_pixel);
+      sh_info().nr_ps_color_exports++;
+      sh_info().ps_color_export_mask = 0xf;
+      emit_export_instruction(m_last_pixel_export);
+   }
+
+   m_last_pixel_export->set_last();
+
+   if (sh_info().fs_write_all)
+      sh_info().nr_ps_max_color_exports = 8;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h
new file mode 100644
index 000000000..4755afbfe
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_fragment.h
@@ -0,0 +1,117 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef sfn_fragment_shader_from_nir_h
+#define sfn_fragment_shader_from_nir_h
+
+#include "sfn_shader_base.h"
+#include "sfn_shaderio.h"
+#include <bitset>
+
+namespace r600 {
+
+class FragmentShaderFromNir : public ShaderFromNirProcessor {
+public:
+   FragmentShaderFromNir(const nir_shader& nir, r600_shader& sh_info,
+                         r600_pipe_shader_selector &sel, const r600_shader_key &key,
+                         enum chip_class chip_class);
+   bool scan_sysvalue_access(nir_instr *instr) override;
+private:
+
+   struct Interpolator {
+      bool enabled;
+      unsigned ij_index;
+      PValue i;
+      PValue j;
+   };
+
+   void emit_shader_start() override;
+   bool do_allocate_reserved_registers() override;
+   bool process_store_output(nir_intrinsic_instr *instr);
+
+   bool emit_store_output(nir_intrinsic_instr* instr);
+
+   bool emit_export_pixel(const nir_variable *, nir_intrinsic_instr* instr, int outputs);
+   bool emit_export_pixel(nir_intrinsic_instr* instr, int outputs);
+   bool load_interpolated(GPRVector &dest, ShaderInput &io, const Interpolator& ip,
+                          int num_components, int start_comp);
+   bool load_interpolated_one_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip, EAluOp op);
+   bool load_interpolated_two_comp(GPRVector &dest, ShaderInput& io, const Interpolator& ip,EAluOp op, int writemask);
+   bool load_interpolated_two_comp_for_one(GPRVector &dest,
+                                           ShaderInput& io, const Interpolator& ip, EAluOp op, int start, int comp);
+
+   bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
+   void do_finalize() override;
+
+   void load_front_face();
+
+   bool emit_load_input(nir_intrinsic_instr* instr);
+   bool emit_load_front_face(nir_intrinsic_instr* instr);
+   bool emit_load_sample_mask_in(nir_intrinsic_instr* instr);
+   bool emit_load_sample_pos(nir_intrinsic_instr* instr);
+   bool emit_load_sample_id(nir_intrinsic_instr* instr);
+
+   bool process_load_input(nir_intrinsic_instr *instr, bool interpolated);
+   bool emit_load_interpolated_input(nir_intrinsic_instr* instr);
+   bool load_barycentric_at_offset(nir_intrinsic_instr* instr);
+   bool load_barycentric_at_sample(nir_intrinsic_instr* instr);
+
+
+   unsigned m_max_color_exports;
+   unsigned m_max_counted_color_exports;
+   bool m_two_sided_color;
+   ExportInstruction *m_last_pixel_export;
+   const nir_shader& m_nir;
+
+
+   std::array<Interpolator, 6> m_interpolator;
+   unsigned m_reserved_registers;
+   unsigned m_frag_pos_index;
+   PGPRValue m_front_face_reg;
+   PGPRValue m_sample_mask_reg;
+   PGPRValue m_sample_id_reg;
+   PGPRValue m_helper_invocation;
+   GPRVector m_frag_pos;
+   bool m_need_back_color;
+   bool m_front_face_loaded;
+   ShaderIO m_shaderio;
+   unsigned m_depth_exports;
+
+   std::map<unsigned, PValue> m_input_cache;
+
+   static const int s_max_interpolators = 6;
+
+   std::bitset<s_max_interpolators> m_interpolators_used;
+
+   unsigned m_apply_sample_mask;
+   bool m_dual_source_blend;
+   ShaderInput *m_pos_input;
+
+};
+	
+}
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.cpp
new file mode 100644
index 000000000..0541e0ad0
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.cpp
@@ -0,0 +1,343 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_shader_geometry.h"
+#include "sfn_instruction_misc.h"
+#include "sfn_instruction_fetch.h"
+#include "sfn_shaderio.h"
+
+namespace r600 {
+
+GeometryShaderFromNir::GeometryShaderFromNir(r600_pipe_shader *sh,
+                                             r600_pipe_shader_selector &sel,
+                                             const r600_shader_key &key,
+                                             enum chip_class chip_class):
+   VertexStage(PIPE_SHADER_GEOMETRY, sel, sh->shader,
+               sh->scratch_space_needed, chip_class, key.gs.first_atomic_counter),
+   m_pipe_shader(sh),
+   m_so_info(&sel.so),
+   m_first_vertex_emitted(false),
+   m_offset(0),
+   m_next_input_ring_offset(0),
+   m_key(key),
+   m_clip_dist_mask(0),
+   m_cur_ring_output(0),
+   m_gs_tri_strip_adj_fix(false),
+   m_input_mask(0)
+{
+   sh_info().atomic_base = key.gs.first_atomic_counter;
+}
+
+bool GeometryShaderFromNir::emit_store(nir_intrinsic_instr* instr)
+{
+   auto location = nir_intrinsic_io_semantics(instr).location;
+   auto index = nir_src_as_const_value(instr->src[1]);
+   assert(index);
+   auto driver_location = nir_intrinsic_base(instr) + index->u32;
+
+   uint32_t write_mask = nir_intrinsic_write_mask(instr);
+   GPRVector::Swizzle swz = swizzle_from_mask(write_mask);
+
+   auto out_value = vec_from_nir_with_fetch_constant(instr->src[0], write_mask, swz, true);
+
+   sh_info().output[driver_location].write_mask = write_mask;
+
+   auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write_ind, out_value,
+                                      4 * driver_location,
+                                      instr->num_components, m_export_base[0]);
+   streamout_data[location] = ir;
+
+   return true;
+}
+
+bool GeometryShaderFromNir::scan_sysvalue_access(UNUSED nir_instr *instr)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return true;
+
+   nir_intrinsic_instr *ii =  nir_instr_as_intrinsic(instr);
+
+   switch (ii->intrinsic) {
+   case nir_intrinsic_store_output:
+      return process_store_output(ii);
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_per_vertex_input:
+      return process_load_input(ii);
+   default:
+      return true;
+   }
+}
+
+bool GeometryShaderFromNir::process_store_output(nir_intrinsic_instr* instr)
+{
+   auto location = nir_intrinsic_io_semantics(instr).location;
+   auto index = nir_src_as_const_value(instr->src[1]);
+   assert(index);
+
+   auto driver_location = nir_intrinsic_base(instr) + index->u32;
+
+   if (location == VARYING_SLOT_COL0 ||
+       location == VARYING_SLOT_COL1 ||
+       (location >= VARYING_SLOT_VAR0 &&
+       location <= VARYING_SLOT_VAR31) ||
+       (location >= VARYING_SLOT_TEX0 &&
+       location <= VARYING_SLOT_TEX7) ||
+       location == VARYING_SLOT_BFC0 ||
+       location == VARYING_SLOT_BFC1 ||
+       location == VARYING_SLOT_PNTC ||
+       location == VARYING_SLOT_CLIP_VERTEX ||
+       location == VARYING_SLOT_CLIP_DIST0 ||
+       location == VARYING_SLOT_CLIP_DIST1 ||
+       location == VARYING_SLOT_PRIMITIVE_ID ||
+       location == VARYING_SLOT_POS ||
+       location == VARYING_SLOT_PSIZ ||
+       location == VARYING_SLOT_LAYER ||
+       location == VARYING_SLOT_VIEWPORT ||
+       location == VARYING_SLOT_FOGC) {
+      r600_shader_io& io = sh_info().output[driver_location];
+
+      auto semantic = r600_get_varying_semantic(location);
+      io.name = semantic.first;
+      io.sid = semantic.second;
+
+      evaluate_spi_sid(io);
+
+      if (sh_info().noutput <= driver_location)
+         sh_info().noutput = driver_location + 1;
+
+      if (location == VARYING_SLOT_CLIP_DIST0 ||
+          location == VARYING_SLOT_CLIP_DIST1) {
+         m_clip_dist_mask |= 1 << (location - VARYING_SLOT_CLIP_DIST0);
+      }
+
+      if (location == VARYING_SLOT_VIEWPORT) {
+         sh_info().vs_out_viewport = 1;
+         sh_info().vs_out_misc_write = 1;
+      }
+      return true;
+   }
+   return false;
+}
+
+bool GeometryShaderFromNir::process_load_input(nir_intrinsic_instr* instr)
+{
+   auto location = nir_intrinsic_io_semantics(instr).location;
+   auto index = nir_src_as_const_value(instr->src[1]);
+   assert(index);
+
+   auto driver_location = nir_intrinsic_base(instr) + index->u32;
+
+   if (location == VARYING_SLOT_POS ||
+       location == VARYING_SLOT_PSIZ ||
+       location == VARYING_SLOT_FOGC ||
+       location == VARYING_SLOT_CLIP_VERTEX ||
+       location == VARYING_SLOT_CLIP_DIST0 ||
+       location == VARYING_SLOT_CLIP_DIST1 ||
+       location == VARYING_SLOT_COL0 ||
+       location == VARYING_SLOT_COL1 ||
+       location == VARYING_SLOT_BFC0 ||
+       location == VARYING_SLOT_BFC1 ||
+       location == VARYING_SLOT_PNTC ||
+       (location >= VARYING_SLOT_VAR0 &&
+        location <= VARYING_SLOT_VAR31) ||
+       (location >= VARYING_SLOT_TEX0 &&
+       location <= VARYING_SLOT_TEX7)) {
+
+      uint64_t bit = 1ull << location;
+      if (!(bit & m_input_mask)) {
+         r600_shader_io& io = sh_info().input[driver_location];
+         auto semantic = r600_get_varying_semantic(location);
+         io.name = semantic.first;
+         io.sid = semantic.second;
+
+         io.ring_offset = 16 * driver_location;
+         ++sh_info().ninput;
+         m_next_input_ring_offset += 16;
+         m_input_mask |= bit;
+      }
+      return true;
+   }
+   return false;
+}
+
+bool GeometryShaderFromNir::do_allocate_reserved_registers()
+{
+   const int sel[6] = {0, 0 ,0, 1, 1, 1};
+   const int chan[6] = {0, 1 ,3, 0, 1, 2};
+
+   increment_reserved_registers();
+   increment_reserved_registers();
+
+   /* Reserve registers used by the shaders (should check how many
+    * components are actually used */
+   for (int i = 0; i < 6; ++i) {
+      auto reg = new GPRValue(sel[i], chan[i]);
+      reg->set_as_input();
+      m_per_vertex_offsets[i].reset(reg);
+      inject_register(sel[i], chan[i], m_per_vertex_offsets[i], false);
+   }
+   auto reg = new GPRValue(0, 2);
+   reg->set_as_input();
+   m_primitive_id.reset(reg);
+   inject_register(0, 2, m_primitive_id, false);
+
+   reg = new GPRValue(1, 3);
+   reg->set_as_input();
+   m_invocation_id.reset(reg);
+   inject_register(1, 3, m_invocation_id, false);
+
+   m_export_base[0] = get_temp_register(0);
+   m_export_base[1] = get_temp_register(0);
+   m_export_base[2] = get_temp_register(0);
+   m_export_base[3] = get_temp_register(0);
+   emit_instruction(new AluInstruction(op1_mov, m_export_base[0], Value::zero, {alu_write, alu_last_instr}));
+   emit_instruction(new AluInstruction(op1_mov, m_export_base[1], Value::zero, {alu_write, alu_last_instr}));
+   emit_instruction(new AluInstruction(op1_mov, m_export_base[2], Value::zero, {alu_write, alu_last_instr}));
+   emit_instruction(new AluInstruction(op1_mov, m_export_base[3], Value::zero, {alu_write, alu_last_instr}));
+
+   sh_info().ring_item_sizes[0] = m_next_input_ring_offset;
+
+   if (m_key.gs.tri_strip_adj_fix)
+      emit_adj_fix();
+
+   return true;
+}
+
+void GeometryShaderFromNir::emit_adj_fix()
+{
+   PValue adjhelp0(new  GPRValue(m_export_base[0]->sel(), 1));
+   emit_instruction(op2_and_int, adjhelp0, {m_primitive_id, Value::one_i}, {alu_write, alu_last_instr});
+
+   int reg_indices[6];
+   int reg_chanels[6] = {1, 2, 3, 1, 2, 3};
+
+   int rotate_indices[6] = {4, 5, 0, 1, 2, 3};
+
+   reg_indices[0] = reg_indices[1] = reg_indices[2] = m_export_base[1]->sel();
+   reg_indices[3] = reg_indices[4] = reg_indices[5] = m_export_base[2]->sel();
+
+   std::array<PValue, 6> adjhelp;
+
+   AluInstruction *ir = nullptr;
+   for (int i = 0; i < 6; i++) {
+      adjhelp[i].reset(new GPRValue(reg_indices[i], reg_chanels[i]));
+      ir = new AluInstruction(op3_cnde_int, adjhelp[i],
+                             {adjhelp0, m_per_vertex_offsets[i],
+                              m_per_vertex_offsets[rotate_indices[i]]},
+                             {alu_write});
+      if (i == 3)
+         ir->set_flag(alu_last_instr);
+      emit_instruction(ir);
+   }
+   ir->set_flag(alu_last_instr);
+
+   for (int i = 0; i < 6; i++)
+      m_per_vertex_offsets[i] = adjhelp[i];
+}
+
+
+bool GeometryShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
+{
+   switch (instr->intrinsic) {
+   case nir_intrinsic_emit_vertex:
+      return emit_vertex(instr, false);
+   case nir_intrinsic_end_primitive:
+      return emit_vertex(instr, true);
+   case nir_intrinsic_load_primitive_id:
+      return load_preloaded_value(instr->dest, 0, m_primitive_id);
+   case nir_intrinsic_load_invocation_id:
+      return load_preloaded_value(instr->dest, 0, m_invocation_id);
+   case nir_intrinsic_store_output:
+      return emit_store(instr);
+   case nir_intrinsic_load_per_vertex_input:
+      return emit_load_per_vertex_input(instr);
+   default:
+      ;
+   }
+   return false;
+}
+
+bool GeometryShaderFromNir::emit_vertex(nir_intrinsic_instr* instr, bool cut)
+{
+   int stream = nir_intrinsic_stream_id(instr);
+   assert(stream < 4);
+
+   for(auto v: streamout_data) {
+      if (stream == 0 || v.first != VARYING_SLOT_POS) {
+         v.second->patch_ring(stream, m_export_base[stream]);
+         emit_instruction(v.second);
+      } else
+         delete v.second;
+   }
+   streamout_data.clear();
+   emit_instruction(new EmitVertex(stream, cut));
+
+   if (!cut)
+      emit_instruction(new AluInstruction(op2_add_int, m_export_base[stream], m_export_base[stream],
+                                          PValue(new LiteralValue(sh_info().noutput)),
+                                          {alu_write, alu_last_instr}));
+
+   return true;
+}
+
+bool GeometryShaderFromNir::emit_load_per_vertex_input(nir_intrinsic_instr* instr)
+{
+   auto dest = vec_from_nir(instr->dest, 4);
+
+   std::array<int, 4> swz = {7,7,7,7};
+   for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+      swz[i] = i + nir_intrinsic_component(instr);
+   }
+
+   auto literal_index = nir_src_as_const_value(instr->src[0]);
+
+   if (!literal_index) {
+      sfn_log << SfnLog::err << "GS: Indirect input addressing not (yet) supported\n";
+      return false;
+   }
+   assert(literal_index->u32 < 6);
+   assert(nir_intrinsic_io_semantics(instr).num_slots == 1);
+
+   PValue addr = m_per_vertex_offsets[literal_index->u32];
+   auto fetch = new FetchInstruction(vc_fetch, no_index_offset, dest, addr,
+                                     16 * nir_intrinsic_base(instr),
+                                     R600_GS_RING_CONST_BUFFER, PValue(), bim_none, true);
+   fetch->set_dest_swizzle(swz);
+
+   emit_instruction(fetch);
+   return true;
+}
+
+void GeometryShaderFromNir::do_finalize()
+{
+   if (m_clip_dist_mask) {
+      int num_clip_dist = 4 * util_bitcount(m_clip_dist_mask);
+      sh_info().cc_dist_mask = (1 << num_clip_dist) - 1;
+      sh_info().clip_dist_write = (1 << num_clip_dist) - 1;
+   }
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.h
new file mode 100644
index 000000000..b557b8f58
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_geometry.h
@@ -0,0 +1,81 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef SFN_GEOMETRYSHADERFROMNIR_H
+#define SFN_GEOMETRYSHADERFROMNIR_H
+
+#include "sfn_vertexstageexport.h"
+
+namespace r600 {
+
+class GeometryShaderFromNir : public VertexStage
+{
+public:
+   GeometryShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel, const r600_shader_key& key, enum chip_class chip_class);
+
+   bool scan_sysvalue_access(nir_instr *instr) override;
+   PValue primitive_id() override {return m_primitive_id;}
+
+private:
+
+   bool do_allocate_reserved_registers() override;
+   bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
+
+   bool emit_vertex(nir_intrinsic_instr* instr, bool cut);
+   void emit_adj_fix();
+
+   bool process_store_output(nir_intrinsic_instr* instr);
+   bool process_load_input(nir_intrinsic_instr* instr);
+
+   bool emit_store(nir_intrinsic_instr* instr);
+   bool emit_load_per_vertex_input(nir_intrinsic_instr* instr);
+
+   void do_finalize() override;
+
+   r600_pipe_shader *m_pipe_shader;
+   const pipe_stream_output_info *m_so_info;
+
+   std::array<PValue, 6> m_per_vertex_offsets;
+   PValue m_primitive_id;
+   PValue m_invocation_id;
+   PValue m_export_base[4];
+   bool m_first_vertex_emitted;
+
+   int  m_offset;
+   int  m_next_input_ring_offset;
+   r600_shader_key m_key;
+   int m_clip_dist_mask;
+   unsigned m_cur_ring_output;
+   bool m_gs_tri_strip_adj_fix;
+   uint64_t m_input_mask;
+
+   std::map<int, MemRingOutIntruction *> streamout_data;
+};
+
+}
+
+#endif // SFN_GEOMETRYSHADERFROMNIR_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp
new file mode 100644
index 000000000..fb76695c6
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.cpp
@@ -0,0 +1,108 @@
+#include "sfn_shader_tcs.h"
+#include "sfn_instruction_gds.h"
+#include "tgsi/tgsi_from_mesa.h"
+
+namespace r600 {
+
+TcsShaderFromNir::TcsShaderFromNir(r600_pipe_shader *sh,
+                                   r600_pipe_shader_selector& sel,
+                                   const r600_shader_key& key,
+                                   enum chip_class chip_class):
+   ShaderFromNirProcessor (PIPE_SHADER_TESS_CTRL, sel, sh->shader,
+                           sh->scratch_space_needed, chip_class, key.tcs.first_atomic_counter),
+   m_reserved_registers(0)
+{
+   sh_info().tcs_prim_mode = key.tcs.prim_mode;
+}
+
+bool TcsShaderFromNir::scan_sysvalue_access(nir_instr *instr)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return true;
+
+   auto intr = nir_instr_as_intrinsic(instr);
+
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_primitive_id:
+      m_sv_values.set(es_primitive_id);
+      break;
+   case nir_intrinsic_load_invocation_id:
+      m_sv_values.set(es_invocation_id);
+      break;
+   case nir_intrinsic_load_tcs_rel_patch_id_r600:
+      m_sv_values.set(es_rel_patch_id);
+      break;
+   case nir_intrinsic_load_tcs_tess_factor_base_r600:
+      m_sv_values.set(es_tess_factor_base);
+      break;
+   default:
+
+      ;
+   }
+   return true;
+}
+
+bool TcsShaderFromNir::do_allocate_reserved_registers()
+{
+   if (m_sv_values.test(es_primitive_id)) {
+      m_reserved_registers = 1;
+      auto gpr = new GPRValue(0,0);
+      gpr->set_as_input();
+      m_primitive_id.reset(gpr);
+   }
+
+   if (m_sv_values.test(es_invocation_id)) {
+      m_reserved_registers = 1;
+      auto gpr = new GPRValue(0,2);
+      gpr->set_as_input();
+      m_invocation_id.reset(gpr);
+   }
+
+   if (m_sv_values.test(es_rel_patch_id)) {
+      m_reserved_registers = 1;
+      auto gpr = new GPRValue(0,1);
+      gpr->set_as_input();
+      m_rel_patch_id.reset(gpr);
+   }
+
+   if (m_sv_values.test(es_tess_factor_base)) {
+      m_reserved_registers = 1;
+      auto gpr = new GPRValue(0,3);
+      gpr->set_as_input();
+      m_tess_factor_base.reset(gpr);
+   }
+
+   set_reserved_registers(m_reserved_registers);
+
+   return true;
+}
+
+bool TcsShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
+{
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_tcs_rel_patch_id_r600:
+      return load_preloaded_value(instr->dest, 0, m_rel_patch_id);
+   case nir_intrinsic_load_invocation_id:
+      return load_preloaded_value(instr->dest, 0, m_invocation_id);
+   case nir_intrinsic_load_primitive_id:
+      return load_preloaded_value(instr->dest, 0, m_primitive_id);
+   case nir_intrinsic_load_tcs_tess_factor_base_r600:
+      return load_preloaded_value(instr->dest, 0, m_tess_factor_base);
+   case nir_intrinsic_store_tf_r600:
+      return store_tess_factor(instr);
+   default:
+      return false;
+   }
+}
+
+bool TcsShaderFromNir::store_tess_factor(nir_intrinsic_instr* instr)
+{
+   const GPRVector::Swizzle& swizzle = (instr->src[0].ssa->num_components == 4) ?
+            GPRVector::Swizzle({0, 1, 2, 3}) : GPRVector::Swizzle({0, 1, 7, 7});
+   auto val = vec_from_nir_with_fetch_constant(instr->src[0],
+         (1 << instr->src[0].ssa->num_components) - 1, swizzle);
+   emit_instruction(new GDSStoreTessFactor(val));
+   return true;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.h
new file mode 100644
index 000000000..051078104
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tcs.h
@@ -0,0 +1,33 @@
+#ifndef TCSSHADERFROMNIR_H
+#define TCSSHADERFROMNIR_H
+
+#include "sfn_shader_base.h"
+
+namespace r600 {
+
+class TcsShaderFromNir : public ShaderFromNirProcessor
+{
+public:
+   TcsShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel, const r600_shader_key& key, enum chip_class chip_class);
+   bool scan_sysvalue_access(nir_instr *instr) override;
+
+private:
+   bool do_allocate_reserved_registers() override;
+   bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
+   bool store_tess_factor(nir_intrinsic_instr* instr);
+
+   void do_finalize() override {}
+
+   int m_reserved_registers;
+   PValue m_patch_id;
+   PValue m_rel_patch_id;
+   PValue m_invocation_id;
+   PValue m_primitive_id;
+   PValue m_tess_factor_base;
+
+
+};
+
+}
+
+#endif // TCSSHADERFROMNIR_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.cpp
new file mode 100644
index 000000000..d1c75515a
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.cpp
@@ -0,0 +1,123 @@
+#include "sfn_shader_tess_eval.h"
+#include "tgsi/tgsi_from_mesa.h"
+
+namespace r600 {
+
+TEvalShaderFromNir::TEvalShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel,
+                                       const r600_shader_key& key, r600_shader *gs_shader,
+                                       enum chip_class chip_class):
+   VertexStage(PIPE_SHADER_TESS_EVAL, sel, sh->shader,
+               sh->scratch_space_needed, chip_class, key.tes.first_atomic_counter),
+   m_reserved_registers(0),
+   m_key(key)
+
+{
+   sh->shader.tes_as_es = key.tes.as_es;
+   if (key.tes.as_es)
+      m_export_processor.reset(new VertexStageExportForGS(*this, gs_shader));
+   else
+      m_export_processor.reset(new VertexStageExportForFS(*this, &sel.so, sh, key));
+}
+
+bool TEvalShaderFromNir::scan_sysvalue_access(nir_instr *instr)
+{
+   if (instr->type != nir_instr_type_intrinsic)
+      return true;
+
+   auto ir = nir_instr_as_intrinsic(instr);
+
+   switch (ir->intrinsic) {
+   case nir_intrinsic_load_tess_coord_r600:
+      m_sv_values.set(es_tess_coord);
+      break;
+   case nir_intrinsic_load_primitive_id:
+      m_sv_values.set(es_primitive_id);
+      break;
+   case nir_intrinsic_load_tcs_rel_patch_id_r600:
+      m_sv_values.set(es_rel_patch_id);
+      break;
+   case nir_intrinsic_store_output:
+      m_export_processor->scan_store_output(ir);
+      break;
+   default:
+      ;
+   }
+   return true;
+}
+
+void TEvalShaderFromNir::emit_shader_start()
+{
+   m_export_processor->emit_shader_start();
+}
+
+bool TEvalShaderFromNir::do_allocate_reserved_registers()
+{
+   if (m_sv_values.test(es_tess_coord)) {
+      m_reserved_registers = 1;
+      auto gpr = new GPRValue(0,0);
+      gpr->set_as_input();
+      m_tess_coord[0].reset(gpr);
+      gpr = new GPRValue(0,1);
+      gpr->set_as_input();
+      m_tess_coord[1].reset(gpr);
+   }
+
+   if (m_sv_values.test(es_rel_patch_id)) {
+      m_reserved_registers = 1;
+      auto gpr = new GPRValue(0,2);
+      gpr->set_as_input();
+      m_rel_patch_id.reset(gpr);
+   }
+
+   if (m_sv_values.test(es_primitive_id) ||
+       m_key.vs.as_gs_a) {
+      m_reserved_registers = 1;
+      auto gpr = new GPRValue(0,3);
+      gpr->set_as_input();
+      m_primitive_id.reset(gpr);
+      if (m_key.vs.as_gs_a)
+         inject_register(0, 3, m_primitive_id, false);
+   }
+   set_reserved_registers(m_reserved_registers);
+   return true;
+}
+
+bool TEvalShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
+{
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_tess_coord_r600:
+      return load_preloaded_value(instr->dest, 0, m_tess_coord[0]) &&
+            load_preloaded_value(instr->dest, 1, m_tess_coord[1]);
+   case nir_intrinsic_load_primitive_id:
+      return load_preloaded_value(instr->dest, 0, m_primitive_id);
+   case nir_intrinsic_load_tcs_rel_patch_id_r600:
+      return load_preloaded_value(instr->dest, 0, m_rel_patch_id);
+   case nir_intrinsic_store_output:
+      return m_export_processor->store_output(instr);
+   default:
+      return false;
+   }
+}
+
+void TEvalShaderFromNir::do_finalize()
+{
+   m_export_processor->finalize_exports();
+}
+
+
+bool TEvalShaderFromNir::emit_load_tess_coord(nir_intrinsic_instr* instr)
+{
+   bool result = load_preloaded_value(instr->dest, 0, m_tess_coord[0]) &&
+               load_preloaded_value(instr->dest, 1, m_tess_coord[1]);
+
+   m_tess_coord[2] = from_nir(instr->dest, 2);
+
+
+   emit_instruction(new AluInstruction(op2_add, m_tess_coord[2], m_tess_coord[2],
+         m_tess_coord[0], {alu_last_instr, alu_write, alu_src0_neg}));
+   emit_instruction(new AluInstruction(op2_add, m_tess_coord[2], m_tess_coord[2],
+         m_tess_coord[1], {alu_last_instr, alu_write, alu_src0_neg}));
+   return result;
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.h
new file mode 100644
index 000000000..a1b7d3a9c
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_tess_eval.h
@@ -0,0 +1,39 @@
+#ifndef TEVALSHADERFROMNIR_H
+#define TEVALSHADERFROMNIR_H
+
+#include "sfn_shader_base.h"
+#include "sfn_vertexstageexport.h"
+
+namespace r600 {
+
+class TEvalShaderFromNir : public VertexStage
+{
+public:
+	TEvalShaderFromNir(r600_pipe_shader *sh, r600_pipe_shader_selector& sel,
+                           const r600_shader_key& key, r600_shader *gs_shader,
+                           enum chip_class chip_class);
+        bool scan_sysvalue_access(nir_instr *instr) override;
+        PValue primitive_id() override {return m_primitive_id;}
+     private:
+        void emit_shader_start() override;
+        bool do_allocate_reserved_registers() override;
+        bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
+        bool emit_load_tess_coord(nir_intrinsic_instr* instr);
+        bool load_tess_z_coord(nir_intrinsic_instr* instr);
+
+        void do_finalize() override;
+
+
+        unsigned m_reserved_registers;
+        PValue m_tess_coord[3];
+        PValue m_rel_patch_id;
+        PValue m_primitive_id;
+
+        std::unique_ptr<VertexStageExportBase> m_export_processor;
+        const r600_shader_key& m_key;
+};
+
+
+}
+
+#endif // TEVALSHADERFROMNIR_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp
new file mode 100644
index 000000000..f2c4de3fa
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.cpp
@@ -0,0 +1,230 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "pipe/p_defines.h"
+#include "tgsi/tgsi_from_mesa.h"
+#include "sfn_shader_vertex.h"
+#include "sfn_instruction_lds.h"
+
+#include <queue>
+
+
+namespace r600 {
+
+using std::priority_queue;
+
+VertexShaderFromNir::VertexShaderFromNir(r600_pipe_shader *sh,
+                                         r600_pipe_shader_selector& sel,
+                                         const r600_shader_key& key,
+                                         struct r600_shader* gs_shader,
+                                         enum chip_class chip_class):
+   VertexStage(PIPE_SHADER_VERTEX, sel, sh->shader,
+               sh->scratch_space_needed, chip_class, key.vs.first_atomic_counter),
+   m_num_clip_dist(0),
+   m_last_param_export(nullptr),
+   m_last_pos_export(nullptr),
+   m_pipe_shader(sh),
+   m_enabled_stream_buffers_mask(0),
+   m_so_info(&sel.so),
+   m_vertex_id(),
+   m_key(key),
+   m_max_attrib(0)
+{
+   // reg 0 is used in the fetch shader
+   increment_reserved_registers();
+
+   sh_info().atomic_base = key.vs.first_atomic_counter;
+   sh_info().vs_as_gs_a = m_key.vs.as_gs_a;
+
+   if (key.vs.as_es) {
+      sh->shader.vs_as_es = true;
+      m_export_processor.reset(new VertexStageExportForGS(*this, gs_shader));
+   } else if (key.vs.as_ls) {
+      sh->shader.vs_as_ls = true;
+      sfn_log << SfnLog::trans << "Start VS for GS\n";
+      m_export_processor.reset(new VertexStageExportForES(*this));
+   } else {
+      m_export_processor.reset(new VertexStageExportForFS(*this, &sel.so, sh, key));
+   }
+}
+
+bool VertexShaderFromNir::scan_inputs_read(const nir_shader *sh)
+{
+   uint64_t inputs = sh->info.inputs_read;
+
+   while (inputs) {
+      unsigned i = u_bit_scan64(&inputs);
+      if (i < VERT_ATTRIB_MAX) {
+         ++sh_info().ninput;
+      }
+   }
+   m_max_attrib = sh_info().ninput;
+   return true;
+}
+
+bool VertexShaderFromNir::do_allocate_reserved_registers()
+{
+   /* Since the vertex ID is nearly always used, we add it here as an input so
+    * that the registers used for vertex attributes don't get clobbered by the
+    * register merge step */
+   auto R0x = new GPRValue(0,0);
+   R0x->set_as_input();
+   m_vertex_id.reset(R0x);
+   inject_register(0, 0, m_vertex_id, false);
+
+   if (m_key.vs.as_gs_a || m_sv_values.test(es_primitive_id)) {
+      auto R0z = new GPRValue(0,2);
+      R0x->set_as_input();
+      m_primitive_id.reset(R0z);
+      inject_register(0, 2, m_primitive_id, false);
+   }
+
+   if (m_sv_values.test(es_instanceid)) {
+      auto R0w = new GPRValue(0,3);
+      R0w->set_as_input();
+      m_instance_id.reset(R0w);
+      inject_register(0, 3, m_instance_id, false);
+   }
+
+
+   if (m_sv_values.test(es_rel_patch_id)) {
+      auto R0y = new GPRValue(0,1);
+      R0y->set_as_input();
+      m_rel_vertex_id.reset(R0y);
+      inject_register(0, 1, m_rel_vertex_id, false);
+   }
+
+   m_attribs.resize(4 * m_max_attrib + 4);
+   for (unsigned i = 0; i < m_max_attrib + 1; ++i) {
+      for (unsigned k = 0; k < 4; ++k) {
+         auto gpr = std::make_shared<GPRValue>(i + 1, k);
+         gpr->set_as_input();
+         m_attribs[4 * i + k] = gpr;
+         inject_register(i + 1, k, gpr, false);
+      }
+   }
+
+   return true;
+}
+
+void VertexShaderFromNir::emit_shader_start()
+{
+   m_export_processor->emit_shader_start();
+}
+
+bool VertexShaderFromNir::scan_sysvalue_access(nir_instr *instr)
+{
+   switch (instr->type) {
+   case nir_instr_type_intrinsic: {
+      nir_intrinsic_instr *ii =  nir_instr_as_intrinsic(instr);
+      switch (ii->intrinsic) {
+      case nir_intrinsic_load_vertex_id:
+         m_sv_values.set(es_vertexid);
+         break;
+      case nir_intrinsic_load_instance_id:
+         m_sv_values.set(es_instanceid);
+         break;
+      case nir_intrinsic_load_tcs_rel_patch_id_r600:
+         m_sv_values.set(es_rel_patch_id);
+         break;
+      case nir_intrinsic_store_output:
+         m_export_processor->scan_store_output(ii);
+      default:
+         ;
+      }
+   }
+   default:
+      ;
+   }
+   return true;
+}
+
+bool VertexShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
+{
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_vertex_id:
+      return load_preloaded_value(instr->dest, 0, m_vertex_id);
+   case nir_intrinsic_load_tcs_rel_patch_id_r600:
+      return load_preloaded_value(instr->dest, 0, m_rel_vertex_id);
+   case nir_intrinsic_load_instance_id:
+      return load_preloaded_value(instr->dest, 0, m_instance_id);
+   case nir_intrinsic_store_local_shared_r600:
+      return emit_store_local_shared(instr);
+   case nir_intrinsic_store_output:
+      return m_export_processor->store_output(instr);
+   case nir_intrinsic_load_input:
+      return load_input(instr);
+
+   default:
+      return false;
+   }
+}
+
+bool VertexShaderFromNir::load_input(nir_intrinsic_instr* instr)
+{
+   unsigned location = nir_intrinsic_base(instr);
+
+   if (location < VERT_ATTRIB_MAX) {
+      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+         auto src = m_attribs[4 * location + i];
+
+         if (i == 0)
+            set_input(location, src);
+
+         load_preloaded_value(instr->dest, i, src, i == (unsigned)(instr->num_components - 1));
+      }
+      return true;
+   }
+   fprintf(stderr, "r600-NIR: Unimplemented load_deref for %d\n", location);
+   return false;
+}
+
+bool VertexShaderFromNir::emit_store_local_shared(nir_intrinsic_instr* instr)
+{
+   unsigned write_mask = nir_intrinsic_write_mask(instr);
+
+   auto address = from_nir(instr->src[1], 0);
+   int swizzle_base = (write_mask & 0x3) ? 0 : 2;
+   write_mask |= write_mask >> 2;
+
+   auto value =  from_nir(instr->src[0], swizzle_base);
+   if (!(write_mask & 2)) {
+      emit_instruction(new LDSWriteInstruction(address, 1, value));
+   } else {
+      auto value1 =  from_nir(instr->src[0], swizzle_base + 1);
+      emit_instruction(new LDSWriteInstruction(address, 1, value, value1));
+   }
+
+   return true;
+}
+
+void VertexShaderFromNir::do_finalize()
+{
+   m_export_processor->finalize_exports();
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h
new file mode 100644
index 000000000..c1ba251de
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shader_vertex.h
@@ -0,0 +1,83 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef sfn_vertex_shader_from_nir_h
+#define sfn_vertex_shader_from_nir_h
+
+#include "sfn_shader_base.h"
+#include "sfn_vertexstageexport.h"
+
+namespace r600 {
+
+class VertexShaderFromNir : public VertexStage {
+public:
+   VertexShaderFromNir(r600_pipe_shader *sh,
+                       r600_pipe_shader_selector &sel,
+                       const r600_shader_key &key, r600_shader *gs_shader,
+                       enum chip_class chip_class);
+
+   bool scan_sysvalue_access(nir_instr *instr) override;
+
+   PValue primitive_id() override {return m_primitive_id;}
+protected:
+
+   // todo: encapsulate
+   unsigned m_num_clip_dist;
+   ExportInstruction *m_last_param_export;
+   ExportInstruction *m_last_pos_export;
+   r600_pipe_shader *m_pipe_shader;
+   unsigned m_enabled_stream_buffers_mask;
+   const pipe_stream_output_info *m_so_info;
+   void do_finalize() override;
+
+   std::map<unsigned, unsigned> m_param_map;
+
+   bool scan_inputs_read(const nir_shader *sh) override;
+
+private:
+   bool load_input(nir_intrinsic_instr* instr);
+
+   void finalize_exports();
+
+   void emit_shader_start() override;
+   bool do_allocate_reserved_registers() override;
+   bool emit_intrinsic_instruction_override(nir_intrinsic_instr* instr) override;
+   bool emit_store_local_shared(nir_intrinsic_instr* instr);
+
+   PValue m_vertex_id;
+   PValue m_instance_id;
+   PValue m_rel_vertex_id;
+   PValue m_primitive_id;
+   std::vector<PGPRValue> m_attribs;
+   r600_shader_key m_key;
+
+   std::unique_ptr<VertexStageExportBase> m_export_processor;
+   unsigned m_max_attrib;
+};
+
+}
+
+#endif 
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.cpp
new file mode 100644
index 000000000..1ac94ccc7
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.cpp
@@ -0,0 +1,448 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_shaderio.h"
+#include "sfn_debug.h"
+#include "tgsi/tgsi_from_mesa.h"
+
+#include <queue>
+
+namespace r600 {
+
+using std::vector;
+using std::priority_queue;
+
+ShaderIO::ShaderIO():
+   m_two_sided(false),
+   m_lds_pos(0)
+{
+
+}
+
+ShaderInput::ShaderInput(tgsi_semantic name):
+   m_name(name),
+   m_gpr(0),
+   m_uses_interpolate_at_centroid(false)
+{
+}
+
+ShaderInput::~ShaderInput()
+{
+}
+
+void ShaderInput::set_lds_pos(UNUSED int lds_pos)
+{
+}
+
+int ShaderInput::ij_index() const
+{
+   return -1;
+}
+
+bool ShaderInput::interpolate() const
+{
+   return false;
+}
+
+int ShaderInput::lds_pos() const
+{
+   return 0;
+}
+
+bool ShaderInput::is_varying() const
+{
+   return false;
+}
+
+void ShaderInput::set_uses_interpolate_at_centroid()
+{
+   m_uses_interpolate_at_centroid = true;
+}
+
+void ShaderInput::set_ioinfo(r600_shader_io& io, int translated_ij_index) const
+{
+   io.name = m_name;
+   io.gpr = m_gpr;
+   io.ij_index = translated_ij_index;
+   io.lds_pos = lds_pos();
+   io.uses_interpolate_at_centroid = m_uses_interpolate_at_centroid;
+
+   set_specific_ioinfo(io);
+}
+
+void ShaderInput::set_specific_ioinfo(UNUSED r600_shader_io& io) const
+{
+}
+
+ShaderInputSystemValue::ShaderInputSystemValue(tgsi_semantic name, int gpr):
+   ShaderInput(name),
+   m_gpr(gpr)
+{
+}
+
+void ShaderInputSystemValue::set_specific_ioinfo(r600_shader_io& io) const
+{
+   io.gpr = m_gpr;
+   io.ij_index = 0;
+}
+
+ShaderInputVarying::ShaderInputVarying(tgsi_semantic _name, int sid, unsigned driver_location,
+                                       unsigned frac, unsigned components,
+                                       tgsi_interpolate_mode interpolate,
+                                       tgsi_interpolate_loc interp_loc):
+   ShaderInput(_name),
+   m_driver_location(driver_location),
+   m_location_frac(frac),
+   m_sid(sid),
+   m_interpolate(interpolate),
+   m_interpolate_loc(interp_loc),
+   m_ij_index(-10),
+   m_lds_pos(0),
+   m_mask(((1 << components) - 1) << frac)
+{
+   evaluate_spi_sid();
+
+   m_ij_index = interpolate == TGSI_INTERPOLATE_LINEAR ? 3 : 0;
+   switch (interp_loc) {
+   case TGSI_INTERPOLATE_LOC_CENTROID: m_ij_index += 2; break;
+   case TGSI_INTERPOLATE_LOC_CENTER: m_ij_index += 1; break;
+   default:
+      ;
+   }
+}
+
+ShaderInputVarying::ShaderInputVarying(tgsi_semantic _name, int sid, nir_variable *input):
+   ShaderInput(_name),
+   m_driver_location(input->data.driver_location),
+   m_location_frac(input->data.location_frac),
+   m_sid(sid),
+   m_ij_index(-10),
+   m_lds_pos(0),
+   m_mask(((1 << input->type->components()) - 1) << input->data.location_frac)
+{
+   sfn_log << SfnLog::io << __func__
+           << "name:" << _name
+           << " sid: " << sid
+           << " op: " << input->data.interpolation;
+
+   evaluate_spi_sid();
+
+   enum glsl_base_type base_type =
+      glsl_get_base_type(glsl_without_array(input->type));
+
+   switch (input->data.interpolation) {
+   case INTERP_MODE_NONE:
+      if (glsl_base_type_is_integer(base_type)) {
+         m_interpolate = TGSI_INTERPOLATE_CONSTANT;
+         break;
+      }
+
+      if (name() == TGSI_SEMANTIC_COLOR) {
+         m_interpolate = TGSI_INTERPOLATE_COLOR;
+         m_ij_index = 0;
+         break;
+      }
+      FALLTHROUGH;
+
+   case INTERP_MODE_SMOOTH:
+      assert(!glsl_base_type_is_integer(base_type));
+
+      m_interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
+      m_ij_index = 0;
+      break;
+
+   case INTERP_MODE_NOPERSPECTIVE:
+      assert(!glsl_base_type_is_integer(base_type));
+
+      m_interpolate = TGSI_INTERPOLATE_LINEAR;
+      m_ij_index = 3;
+      break;
+
+   case INTERP_MODE_FLAT:
+      m_interpolate = TGSI_INTERPOLATE_CONSTANT;
+      break;
+
+   default:
+      m_interpolate = TGSI_INTERPOLATE_CONSTANT;
+      break;
+   }
+
+   if (input->data.sample) {
+      m_interpolate_loc = TGSI_INTERPOLATE_LOC_SAMPLE;
+   } else if (input->data.centroid) {
+      m_interpolate_loc = TGSI_INTERPOLATE_LOC_CENTROID;
+      m_ij_index += 2;
+   } else {
+      m_interpolate_loc = TGSI_INTERPOLATE_LOC_CENTER;
+      m_ij_index += 1;
+   }
+   sfn_log << SfnLog::io
+           << " -> IP:" << m_interpolate
+           << " IJ:" << m_ij_index
+           << "\n";
+}
+
+bool ShaderInputVarying::is_varying() const
+{
+   return true;
+}
+
+void ShaderInputVarying::update_mask(int additional_comps, int frac)
+{
+   m_mask |= ((1 << additional_comps) - 1) << frac;
+}
+
+void ShaderInputVarying::evaluate_spi_sid()
+{
+   switch (name()) {
+   case TGSI_SEMANTIC_PSIZE:
+   case TGSI_SEMANTIC_EDGEFLAG:
+   case TGSI_SEMANTIC_FACE:
+   case TGSI_SEMANTIC_SAMPLEMASK:
+      assert(0 && "System value used as varying");
+      break;
+   case TGSI_SEMANTIC_POSITION:
+      m_spi_sid = 0;
+      break;
+   case TGSI_SEMANTIC_GENERIC:
+   case TGSI_SEMANTIC_TEXCOORD:
+   case TGSI_SEMANTIC_PCOORD:
+      m_spi_sid = m_sid + 1;
+      break;
+   default:
+      /* For non-generic params - pack name and sid into 8 bits */
+      m_spi_sid = (0x80 | (name() << 3) | m_sid) + 1;
+   }
+}
+
+ShaderInputVarying::ShaderInputVarying(tgsi_semantic name,
+                                       const ShaderInputVarying& orig, size_t location):
+   ShaderInput(name),
+   m_driver_location(location),
+   m_location_frac(orig.location_frac()),
+
+   m_sid(orig.m_sid),
+   m_spi_sid(orig.m_spi_sid),
+   m_interpolate(orig.m_interpolate),
+   m_interpolate_loc(orig.m_interpolate_loc),
+   m_ij_index(orig.m_ij_index),
+   m_lds_pos(0),
+   m_mask(0)
+{
+   evaluate_spi_sid();
+}
+
+bool ShaderInputVarying::interpolate() const
+{
+   return m_interpolate > 0;
+}
+
+int ShaderInputVarying::ij_index() const
+{
+   return m_ij_index;
+}
+
+void ShaderInputVarying::set_lds_pos(int lds_pos)
+{
+   m_lds_pos = lds_pos;
+}
+
+int ShaderInputVarying::lds_pos() const
+{
+   return m_lds_pos;
+}
+
+void ShaderInputVarying::set_specific_ioinfo(r600_shader_io& io) const
+{
+   io.interpolate = m_interpolate;
+   io.interpolate_location = m_interpolate_loc;
+   io.sid = m_sid;
+   io.spi_sid = m_spi_sid;
+   set_color_ioinfo(io);
+}
+
+void ShaderInputVarying::set_color_ioinfo(UNUSED r600_shader_io& io) const
+{
+   sfn_log << SfnLog::io << __func__ << " Don't set color_ioinfo\n";
+}
+
+ShaderInputColor::ShaderInputColor(tgsi_semantic name, int sid, nir_variable *input):
+   ShaderInputVarying(name, sid, input),
+   m_back_color_input_idx(0)
+{
+   sfn_log << SfnLog::io << __func__ << "name << " << name << " sid << " << sid << "\n";
+}
+
+ShaderInputColor::ShaderInputColor(tgsi_semantic _name, int sid, unsigned driver_location,
+                                   unsigned frac, unsigned components, tgsi_interpolate_mode interpolate,
+                                   tgsi_interpolate_loc interp_loc):
+   ShaderInputVarying(_name, sid, driver_location,frac, components, interpolate, interp_loc),
+   m_back_color_input_idx(0)
+{
+   sfn_log << SfnLog::io << __func__ << "name << " << _name << " sid << " << sid << "\n";
+}
+
+void ShaderInputColor::set_back_color(unsigned back_color_input_idx)
+{
+   sfn_log << SfnLog::io << "Set back color index " << back_color_input_idx << "\n";
+   m_back_color_input_idx = back_color_input_idx;
+}
+
+void ShaderInputColor::set_color_ioinfo(r600_shader_io& io) const
+{
+   sfn_log << SfnLog::io << __func__ << " set color_ioinfo " << m_back_color_input_idx << "\n";
+   io.back_color_input = m_back_color_input_idx;
+}
+
+size_t ShaderIO::add_input(ShaderInput *input)
+{
+   m_inputs.push_back(PShaderInput(input));
+   return m_inputs.size() - 1;
+}
+
+PShaderInput ShaderIO::find_varying(tgsi_semantic name, int sid)
+{
+   for (auto& a : m_inputs) {
+      if (a->name() == name) {
+         assert(a->is_varying());
+         auto& v = static_cast<ShaderInputVarying&>(*a);
+         if (v.sid() == sid)
+            return a;
+      }
+   }
+   return nullptr;
+}
+
+struct VaryingShaderIOLess {
+   bool operator () (PShaderInput lhs, PShaderInput rhs) const
+   {
+      const ShaderInputVarying& l = static_cast<ShaderInputVarying&>(*lhs);
+      const ShaderInputVarying& r = static_cast<ShaderInputVarying&>(*rhs);
+      return l.location() > r.location();
+   }
+};
+
+void ShaderIO::sort_varying_inputs()
+{
+   priority_queue<PShaderInput, vector<PShaderInput>, VaryingShaderIOLess> q;
+
+   vector<int> idx;
+
+   for (auto i = 0u; i < m_inputs.size(); ++i) {
+      if (m_inputs[i]->is_varying()) {
+         q.push(m_inputs[i]);
+         idx.push_back(i);
+      }
+   }
+
+   auto next_index = idx.begin();
+   while (!q.empty()) {
+      auto si = q.top();
+      q.pop();
+      m_inputs[*next_index++] = si;
+   }
+}
+
+void ShaderIO::update_lds_pos()
+{
+   m_lds_pos = -1;
+   m_ldspos.resize(m_inputs.size());
+   for (auto& i : m_inputs) {
+      if (!i->is_varying())
+         continue;
+
+      auto& v = static_cast<ShaderInputVarying&>(*i);
+      /* There are shaders that miss an input ...*/
+      if (m_ldspos.size() <= static_cast<unsigned>(v.location()))
+          m_ldspos.resize(v.location() + 1);
+   }
+
+   std::fill(m_ldspos.begin(), m_ldspos.end(), -1);
+   for (auto& i : m_inputs) {
+      if (!i->is_varying())
+         continue;
+
+      auto& v = static_cast<ShaderInputVarying&>(*i);
+      if (v.name() == TGSI_SEMANTIC_POSITION)
+         continue;
+
+      if (m_ldspos[v.location()] < 0) {
+         ++m_lds_pos;
+         m_ldspos[v.location()] = m_lds_pos;
+      }
+      v.set_lds_pos(m_lds_pos);
+   }
+   ++m_lds_pos;
+}
+
+std::vector<PShaderInput> &ShaderIO::inputs()
+{
+   return m_inputs;
+}
+
+ShaderInput& ShaderIO::input(size_t k)
+{
+   assert(k < m_inputs.size());
+   return *m_inputs[k];
+}
+
+ShaderInput& ShaderIO::input(size_t driver_loc, int frac)
+{
+   for (auto& i: m_inputs) {
+      if (!i->is_varying())
+         continue;
+
+      auto& v = static_cast<ShaderInputVarying&>(*i);
+      if (v.location() == driver_loc && v.location_frac() == frac)
+         return v;
+   }
+   return input(driver_loc);
+}
+
+void ShaderIO::set_two_sided()
+{
+   m_two_sided = true;
+}
+
+std::pair<unsigned, unsigned>
+r600_get_varying_semantic(unsigned varying_location)
+{
+   std::pair<unsigned, unsigned> result;
+   tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot>(varying_location),
+                                true, &result.first, &result.second);
+
+   if (result.first == TGSI_SEMANTIC_GENERIC) {
+      result.second += 9;
+   } else if (result.first == TGSI_SEMANTIC_PCOORD) {
+      result.second = 8;
+   }
+   return result;
+}
+
+
+
+}
+
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.h
new file mode 100644
index 000000000..855bbe143
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_shaderio.h
@@ -0,0 +1,176 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_SHADERIO_H
+#define SFN_SHADERIO_H
+
+#include "compiler/nir/nir.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "gallium/drivers/r600/r600_shader.h"
+
+#include <vector>
+#include <memory>
+
+namespace r600 {
+
+class ShaderInput {
+public:
+   ShaderInput();
+   virtual  ~ShaderInput();
+
+   ShaderInput(tgsi_semantic name);
+   tgsi_semantic name() const {return m_name;}
+
+   void set_gpr(int gpr) {m_gpr = gpr;}
+   int gpr() const {return m_gpr;}
+   void set_ioinfo(r600_shader_io& io, int translated_ij_index) const;
+
+   virtual void set_lds_pos(int lds_pos);
+   virtual int ij_index() const;
+   virtual bool interpolate() const;
+   virtual int lds_pos() const;
+   void set_uses_interpolate_at_centroid();
+
+   virtual bool is_varying() const;
+
+private:
+   virtual void set_specific_ioinfo(r600_shader_io& io) const;
+
+   tgsi_semantic m_name;
+   int m_gpr;
+   bool m_uses_interpolate_at_centroid;
+};
+
+using PShaderInput = std::shared_ptr<ShaderInput>;
+
+class ShaderInputSystemValue: public ShaderInput {
+public:
+   ShaderInputSystemValue(tgsi_semantic name, int gpr);
+   void set_specific_ioinfo(r600_shader_io& io) const;
+   int m_gpr;
+};
+
+class ShaderInputVarying : public ShaderInput {
+public:
+   ShaderInputVarying(tgsi_semantic _name, int sid, unsigned driver_location,
+                      unsigned frac, unsigned components, tgsi_interpolate_mode interpolate,
+                      tgsi_interpolate_loc interp_loc);
+   ShaderInputVarying(tgsi_semantic name, int sid, nir_variable *input);
+   ShaderInputVarying(tgsi_semantic name, const ShaderInputVarying& orig,
+                      size_t location);
+
+   void set_lds_pos(int lds_pos) override;
+
+   int ij_index() const override;
+
+   bool interpolate() const override;
+
+   int lds_pos() const override;
+
+   int sid() const {return m_sid;}
+
+   void update_mask(int additional_comps, int frac);
+
+   size_t location() const {return m_driver_location;}
+   int location_frac() const {return m_location_frac;}
+
+   bool is_varying() const override;
+
+private:
+   void evaluate_spi_sid();
+
+   virtual void set_color_ioinfo(r600_shader_io& io) const;
+   void set_specific_ioinfo(r600_shader_io& io) const override;
+   size_t m_driver_location;
+   int m_location_frac;
+   int m_sid;
+   int m_spi_sid;
+   tgsi_interpolate_mode m_interpolate;
+   tgsi_interpolate_loc m_interpolate_loc;
+   int m_ij_index;
+   int m_lds_pos;
+   int m_mask;
+};
+
+class ShaderInputColor: public ShaderInputVarying {
+public:
+   ShaderInputColor(tgsi_semantic _name, int sid, unsigned driver_location,
+                    unsigned frac, unsigned components, tgsi_interpolate_mode interpolate,
+                    tgsi_interpolate_loc interp_loc);
+   ShaderInputColor(tgsi_semantic name, int sid, nir_variable *input);
+   void set_back_color(unsigned back_color_input_idx);
+   unsigned back_color_input_index() const {
+      return m_back_color_input_idx;
+   }
+private:
+   void set_color_ioinfo(UNUSED r600_shader_io& io) const override;
+   unsigned m_back_color_input_idx;
+
+};
+
+class ShaderIO
+{
+public:
+   ShaderIO();
+
+   size_t add_input(ShaderInput *input);
+
+   std::vector<PShaderInput>& inputs();
+   ShaderInput& input(size_t k);
+
+   ShaderInput& input(size_t driver_loc, int frac);
+
+   void set_two_sided();
+   bool two_sided() {return m_two_sided;}
+
+   int nlds() const  {
+      return m_lds_pos;
+   }
+
+   void sort_varying_inputs();
+
+   size_t size() const {return m_inputs.size();}
+
+   PShaderInput find_varying(tgsi_semantic name, int sid);
+
+   void update_lds_pos();
+
+private:
+   std::vector<PShaderInput> m_inputs;
+   std::vector<int> m_ldspos;
+   bool m_two_sided;
+   int m_lds_pos;
+
+};
+
+std::pair<unsigned, unsigned>
+r600_get_varying_semantic(unsigned varying_location);
+
+
+}
+
+#endif // SFN_SHADERIO_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.cpp
new file mode 100644
index 000000000..3228b75fb
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.cpp
@@ -0,0 +1,242 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_value.h"
+#include "util/macros.h"
+
+#include <iostream>
+#include <iomanip>
+#include <cassert>
+
+namespace r600 {
+
+using std::unique_ptr;
+using std::make_shared;
+
+const char *Value::component_names = "xyzw01?_!";
+
+Value::Value():
+   m_type(gpr),
+   m_chan(0)
+{
+}
+
+Value::Value(Type type, uint32_t chan):
+   m_type(type),
+   m_chan(chan)
+{
+
+}
+
+
+
+Value::Value(Type type):
+   Value(type, 0)
+{
+}
+
+Value::Type Value::type() const
+{
+   return m_type;
+}
+
+void Value::set_chan(uint32_t chan)
+{
+   m_chan = chan;
+}
+
+void Value::print(std::ostream& os) const
+{
+   do_print(os);
+}
+
+void Value::print(std::ostream& os, const PrintFlags& flags) const
+{
+   if (flags.flags & PrintFlags::has_neg) os << '-';
+   if (flags.flags & PrintFlags::has_abs) os << '|';
+   do_print(os, flags);
+   if (flags.flags & PrintFlags::has_abs) os << '|';
+}
+
+void Value::do_print(std::ostream& os, const PrintFlags& flags) const
+{
+   (void)flags;
+   do_print(os);
+}
+
+bool Value::operator < (const Value& lhs) const
+{
+   return sel() < lhs.sel() ||
+         (sel() == lhs.sel() && chan() < lhs.chan());
+}
+
+
+LiteralValue::LiteralValue(float value, uint32_t chan):
+   Value(Value::literal, chan)
+{
+   m_value.f=value;
+}
+
+
+LiteralValue::LiteralValue(uint32_t value, uint32_t chan):
+   Value(Value::literal, chan)
+{
+   m_value.u=value;
+}
+
+LiteralValue::LiteralValue(int value, uint32_t chan):
+   Value(Value::literal, chan)
+{
+   m_value.u=value;
+}
+
+uint32_t LiteralValue::sel() const
+{
+   return ALU_SRC_LITERAL;
+}
+
+uint32_t LiteralValue::value() const
+{
+   return m_value.u;
+}
+
+float LiteralValue::value_float() const
+{
+   return m_value.f;
+}
+
+void LiteralValue::do_print(std::ostream& os) const
+{
+   os << "[0x" << std::setbase(16) << m_value.u << " " << std::setbase(10)
+      << m_value.f << "].";
+   os << component_names[chan()];
+}
+
+void LiteralValue::do_print(std::ostream& os, UNUSED const PrintFlags& flags) const
+{
+   os << "[0x" << std::setbase(16) << m_value.u << " "
+      << std::setbase(10);
+
+   os << m_value.f << "f";
+
+   os<< "]";
+}
+
+bool LiteralValue::is_equal_to(const Value& other) const
+{
+   assert(other.type() == Value::Type::literal);
+   const auto& rhs = static_cast<const LiteralValue&>(other);
+   return (sel() == rhs.sel() &&
+           value() == rhs.value());
+}
+
+InlineConstValue::InlineConstValue(int value, int chan):
+   Value(Value::cinline,  chan),
+   m_value(static_cast<AluInlineConstants>(value))
+{
+}
+
+uint32_t InlineConstValue::sel() const
+{
+   return m_value;
+}
+
+void InlineConstValue::do_print(std::ostream& os) const
+{
+   auto sv_info = alu_src_const.find(m_value);
+   if (sv_info != alu_src_const.end()) {
+      os << sv_info->second.descr;
+      if (sv_info->second.use_chan)
+         os << '.' << component_names[chan()];
+      else if (chan() > 0)
+         os << "." << component_names[chan()]
+            << " (W: Channel ignored)";
+   } else {
+      if (m_value >= ALU_SRC_PARAM_BASE && m_value < ALU_SRC_PARAM_BASE + 32)
+         os << " Param" << m_value - ALU_SRC_PARAM_BASE;
+      else
+         os << " E: unknown inline constant " << m_value;
+   }
+}
+
+bool InlineConstValue::is_equal_to(const Value& other) const
+{
+   assert(other.type() == Value::Type::cinline);
+   const auto& rhs = static_cast<const InlineConstValue&>(other);
+   return sel() == rhs.sel();
+}
+
+PValue Value::zero(new InlineConstValue(ALU_SRC_0, 0));
+PValue Value::one_f(new InlineConstValue(ALU_SRC_1, 0));
+PValue Value::one_i(new InlineConstValue(ALU_SRC_1_INT, 0));
+PValue Value::zero_dot_5(new InlineConstValue(ALU_SRC_0_5, 0));
+
+UniformValue::UniformValue(uint32_t sel, uint32_t chan, uint32_t kcache_bank):
+   Value(Value::kconst, chan)
+{
+   m_index = sel;
+   m_kcache_bank = kcache_bank;
+}
+
+UniformValue::UniformValue(uint32_t sel, uint32_t chan, PValue addr):
+   Value(Value::kconst, chan),
+   m_index(sel),
+   m_kcache_bank(1),
+   m_addr(addr)
+{
+
+}
+
+uint32_t UniformValue::sel() const
+{
+   const int bank_base[4] = {128, 160, 256, 288};
+   return m_index < 512 ? m_index + bank_base[m_kcache_bank] : m_index;
+}
+
+uint32_t UniformValue::kcache_bank() const
+{
+   return m_kcache_bank;
+}
+
+bool UniformValue::is_equal_to(const Value& other) const
+{
+   const UniformValue& o = static_cast<const UniformValue&>(other);
+   return sel()  == o.sel() &&
+         m_kcache_bank == o.kcache_bank();
+}
+
+void UniformValue::do_print(std::ostream& os) const
+{
+   if (m_index < 512)
+      os << "KC" << m_kcache_bank << "[" << m_index;
+   else if (m_addr)
+      os << "KC[" << *m_addr << "][" << m_index;
+   else
+      os << "KCx[" << m_index;
+   os << "]." << component_names[chan()];
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.h
new file mode 100644
index 000000000..7bc4528f9
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value.h
@@ -0,0 +1,194 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_VALUE_H
+#define SFN_VALUE_H
+
+#include "sfn_alu_defines.h"
+#include "nir.h"
+
+#include <memory>
+#include <set>
+#include <bitset>
+#include <iostream>
+
+namespace r600 {
+
+class Value {
+public:
+   using Pointer=std::shared_ptr<Value>;
+
+   struct PrintFlags {
+      PrintFlags():index_mode(0),
+         flags(0)
+      {
+      }
+      PrintFlags(int im, int f):index_mode(im),
+         flags(f)
+      {
+      }
+      int index_mode;
+      int flags;
+      static const int is_rel = 1;
+      static const int has_abs = 2;
+      static const int has_neg = 4;
+      static const int literal_is_float = 8;
+      static const int index_ar = 16;
+      static const int index_loopidx = 32;
+   };
+
+   enum Type {
+      gpr,
+      kconst,
+      literal,
+      cinline,
+      lds_direct,
+      gpr_vector,
+      gpr_array_value,
+      unknown
+   };
+
+   static const char *component_names;
+
+   using LiteralFlags=std::bitset<4>;
+
+   Value();
+
+   Value(Type type);
+
+   virtual ~Value(){}
+
+   Type type() const;
+   virtual uint32_t sel() const = 0;
+   uint32_t chan() const {return m_chan;}
+
+   void set_chan(uint32_t chan);
+   virtual void set_pin_to_channel() { assert(0 && "Only GPRs can be pinned to a channel ");}
+   void print(std::ostream& os, const PrintFlags& flags) const;
+
+   void print(std::ostream& os) const;
+
+   bool operator < (const Value& lhs) const;
+
+   static Value::Pointer zero;
+   static Value::Pointer one_f;
+   static Value::Pointer zero_dot_5;
+   static Value::Pointer one_i;
+
+protected:
+   Value(Type type, uint32_t chan);
+
+private:
+   virtual void do_print(std::ostream& os) const = 0;
+   virtual void do_print(std::ostream& os, const PrintFlags& flags) const;
+
+   virtual bool is_equal_to(const Value& other) const = 0;
+
+   Type m_type;
+   uint32_t m_chan;
+
+   friend bool operator == (const Value& lhs, const Value& rhs);
+};
+
+
+inline std::ostream& operator << (std::ostream& os, const Value& v)
+{
+   v.print(os);
+   return os;
+}
+
+
+inline bool operator == (const Value& lhs, const Value& rhs)
+{
+   if (lhs.type() == rhs.type())
+      return lhs.is_equal_to(rhs);
+   return false;
+}
+
+inline bool operator != (const Value& lhs, const Value& rhs)
+{
+   return !(lhs == rhs);
+}
+
+using PValue=Value::Pointer;
+
+struct value_less {
+   inline bool operator () (PValue lhs, PValue rhs) const {
+      return *lhs < *rhs;
+   }
+};
+
+using ValueSet = std::set<PValue, value_less>;
+
+
+class LiteralValue: public Value {
+public:
+   LiteralValue(float value, uint32_t chan= 0);
+   LiteralValue(uint32_t value, uint32_t chan= 0);
+   LiteralValue(int value, uint32_t chan= 0);
+   uint32_t sel() const override final;
+   uint32_t value() const;
+   float value_float() const;
+private:
+   void do_print(std::ostream& os) const override;
+   void do_print(std::ostream& os, const PrintFlags& flags) const override;
+   bool is_equal_to(const Value& other) const override;
+   union {
+      uint32_t u;
+      float f;
+   } m_value;
+};
+
+class InlineConstValue: public Value {
+public:
+   InlineConstValue(int value, int chan);
+   uint32_t sel() const override final;
+private:
+   void do_print(std::ostream& os) const override;
+   bool is_equal_to(const Value& other) const override;
+   AluInlineConstants m_value;
+};
+
+class UniformValue: public Value {
+public:
+   UniformValue(uint32_t sel, uint32_t chan, uint32_t kcache_bank = 0);
+   UniformValue(uint32_t sel, uint32_t chan, PValue addr);
+   uint32_t sel() const override;
+   uint32_t kcache_bank() const;
+   PValue addr() const {return m_addr;}
+   void reset_addr(PValue v) {m_addr = v;}
+private:
+   void do_print(std::ostream& os) const override;
+   bool is_equal_to(const Value& other) const override;
+
+   uint32_t m_index;
+   uint32_t m_kcache_bank;
+   PValue m_addr;
+};
+
+} // end ns r600
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp
new file mode 100644
index 000000000..c53b32527
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.cpp
@@ -0,0 +1,380 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_value_gpr.h"
+#include "sfn_valuepool.h"
+#include "sfn_debug.h"
+#include "sfn_liverange.h"
+
+namespace r600 {
+
+using std::vector;
+using std::array;
+
+GPRValue::GPRValue(uint32_t sel, uint32_t chan, int base_offset):
+   Value(Value::gpr, chan),
+   m_sel(sel),
+   m_base_offset(base_offset),
+   m_input(false),
+   m_pin_to_channel(false),
+   m_keep_alive(false)
+{
+}
+
+GPRValue::GPRValue(uint32_t sel, uint32_t chan):
+   Value(Value::gpr, chan),
+   m_sel(sel),
+   m_base_offset(0),
+   m_input(false),
+   m_pin_to_channel(false),
+   m_keep_alive(false)
+{
+}
+
+uint32_t GPRValue::sel() const
+{
+   return m_sel;
+}
+
+void GPRValue::do_print(std::ostream& os) const
+{
+   os << 'R';
+   os << m_sel;
+   os << '.' << component_names[chan()];
+}
+
+bool GPRValue::is_equal_to(const Value& other) const
+{
+   assert(other.type() == Value::Type::gpr);
+   const auto& rhs = static_cast<const GPRValue&>(other);
+   return (sel() == rhs.sel() &&
+           chan() == rhs.chan());
+}
+
+void GPRValue::do_print(std::ostream& os, UNUSED const PrintFlags& flags) const
+{
+   os << 'R';
+   os << m_sel;
+   os << '.' << component_names[chan()];
+}
+
+GPRVector::GPRVector(const GPRVector& orig):
+   Value(gpr_vector),
+   m_elms(orig.m_elms),
+   m_valid(orig.m_valid)
+{
+}
+
+GPRVector::GPRVector(std::array<PValue,4> elms):
+   Value(gpr_vector),
+   m_elms(elms),
+   m_valid(false)
+{
+   for (unsigned i = 0; i < 4; ++i)
+      if (!m_elms[i] || (m_elms[i]->type() != Value::gpr)) {
+         assert(0 && "GPR vector not valid because element missing or nit a GPR");
+         return;
+      }
+   unsigned sel = m_elms[0]->sel();
+   for (unsigned i = 1; i < 4; ++i)
+      if (m_elms[i]->sel() != sel) {
+         assert(0 && "GPR vector not valid because sel is not equal for all elements");
+         return;
+      }
+   m_valid = true;
+}
+
+GPRVector::GPRVector(uint32_t sel, std::array<uint32_t,4> swizzle):
+   Value (gpr_vector),
+   m_valid(true)
+{
+   for (int i = 0; i < 4; ++i)
+      m_elms[i] = PValue(new GPRValue(sel, swizzle[i]));
+}
+
+GPRVector::GPRVector(const GPRVector& orig, const std::array<uint8_t,4>& swizzle)
+{
+      for (int i = 0; i < 4; ++i)
+         m_elms[i] = orig.reg_i(swizzle[i]);
+      m_valid = orig.m_valid;
+}
+
+void GPRVector::validate() const
+{
+   assert(m_elms[0]);
+   uint32_t sel = m_elms[0]->sel();
+   if (sel >= 124)
+      return;
+
+   for (unsigned i = 1; i < 4; ++i) {
+      assert(m_elms[i]);
+      if (sel != m_elms[i]->sel())
+         return;
+   }
+
+   m_valid = true;
+}
+
+uint32_t GPRVector::sel() const
+{
+   validate();
+   assert(m_valid);
+   return m_elms[0] ? m_elms[0]->sel() : 999;
+}
+
+void GPRVector::set_reg_i(int i, PValue reg)
+{
+   m_elms[i] = reg;
+}
+
+void GPRVector::pin_to_channel(int i)
+{
+   auto& v = static_cast<GPRValue&>(*m_elms[i]);
+   v.set_pin_to_channel();
+}
+
+void GPRVector::pin_all_to_channel()
+{
+   for (auto& v: m_elms) {
+      auto& c = static_cast<GPRValue&>(*v);
+      c.set_pin_to_channel();
+   }
+}
+
+void GPRVector::do_print(std::ostream& os) const
+{
+   os << "R" << sel() << ".";
+   for (int i = 0; i < 4; ++i)
+      os << (m_elms[i] ? component_names[m_elms[i]->chan() < 8 ? m_elms[i]->chan() : 8] : '?');
+}
+
+void GPRVector::swizzle(const Swizzle& swz)
+{
+   Values v(m_elms);
+   for (uint32_t i = 0; i < 4; ++i)
+      if (i != swz[i]) {
+         assert(swz[i] < 4);
+         m_elms[i] = v[swz[i]];
+      }
+}
+
+bool GPRVector::is_equal_to(const Value& other) const
+{
+   if (other.type() != gpr_vector) {
+      std::cerr << "t";
+      return false;
+   }
+
+   const GPRVector& o = static_cast<const GPRVector&>(other);
+
+   for (int i = 0; i < 4; ++i) {
+      if (*m_elms[i] != *o.m_elms[i]) {
+         std::cerr << "elm" << i;
+         return false;
+      }
+   }
+   return true;
+}
+
+
+GPRArrayValue::GPRArrayValue(PValue value, PValue addr, GPRArray *array):
+   Value(gpr_array_value, value->chan()),
+   m_value(value),
+   m_addr(addr),
+   m_array(array)
+{
+}
+
+GPRArrayValue::GPRArrayValue(PValue value, GPRArray *array):
+   Value(gpr_array_value, value->chan()),
+   m_value(value),
+   m_array(array)
+{
+}
+
+static const char *swz_char = "xyzw01_";
+
+void GPRArrayValue::do_print(std::ostream& os) const
+{
+   assert(m_array);
+   os << "R"  << m_value->sel();
+   if (m_addr) {
+      os <<  "[" << *m_addr  << "] ";
+   }
+   os << swz_char[m_value->chan()];
+
+   os << "(" << *m_array << ")";
+}
+
+bool GPRArrayValue::is_equal_to(const Value& other) const
+{
+   const GPRArrayValue& v = static_cast<const GPRArrayValue&>(other);
+
+   return *m_value == *v.m_value &&
+         *m_array == *v.m_array;
+}
+
+void GPRArrayValue::record_read(LiverangeEvaluator& ev) const
+{
+   if (m_addr) {
+      ev.record_read(*m_addr);
+      unsigned chan = m_value->chan();
+      assert(m_array);
+      m_array->record_read(ev, chan);
+   } else
+      ev.record_read(*m_value);
+}
+
+void GPRArrayValue::record_write(LiverangeEvaluator& ev) const
+{
+   if (m_addr) {
+      ev.record_read(*m_addr);
+      unsigned chan = m_value->chan();
+      assert(m_array);
+      m_array->record_write(ev, chan);
+   } else
+      ev.record_write(*m_value);
+}
+
+void GPRArrayValue::reset_value(PValue new_value)
+{
+   m_value = new_value;
+}
+
+void GPRArrayValue::reset_addr(PValue new_addr)
+{
+   m_addr = new_addr;
+}
+
+
+GPRArray::GPRArray(int base, int size, int mask, int frac):
+   Value (gpr_vector),
+   m_base_index(base),
+   m_component_mask(mask),
+   m_frac(frac)
+{
+   m_values.resize(size);
+   for (int i = 0; i < size; ++i) {
+      for (int j = 0; j < 4; ++j) {
+         if (mask & (1 << j)) {
+            auto gpr = new GPRValue(base + i, j);
+            /* If we want to use sb, we have to keep arrays
+             * alife for the whole shader range, otherwise the sb scheduler
+             * thinks is not capable to rename non-array uses of these registers */
+            gpr->set_as_input();
+            gpr->set_keep_alive();
+            m_values[i].set_reg_i(j, PValue(gpr));
+
+         }
+      }
+   }
+}
+
+uint32_t GPRArray::sel() const
+{
+   return m_base_index;
+}
+
+static const char *compchar = "xyzw";
+void GPRArray::do_print(std::ostream& os) const
+{
+   os << "ARRAY[R" << sel() << "..R" << sel() + m_values.size()  - 1 << "].";
+   for (int j = 0; j < 4; ++j) {
+      if (m_component_mask & (1 << j))
+         os << compchar[j];
+   }
+}
+
+bool GPRArray::is_equal_to(const Value& other) const
+{
+   const GPRArray& o = static_cast<const GPRArray&>(other);
+   return o.sel() == sel() &&
+         o.m_values.size() == m_values.size() &&
+         o.m_component_mask == m_component_mask;
+}
+
+uint32_t GPRArrayValue::sel() const
+{
+   return m_value->sel();
+}
+
+PValue GPRArray::get_indirect(unsigned index, PValue indirect, unsigned component)
+{
+   assert(index < m_values.size());
+   assert(m_component_mask & (1 << (component + m_frac)));
+
+   sfn_log << SfnLog::reg << "Create indirect register from " << *this;
+
+   PValue v = m_values[index].reg_i(component + m_frac);
+   assert(v);
+
+   sfn_log << SfnLog::reg << " ->  " << *v;
+
+   if (indirect) {
+      sfn_log << SfnLog::reg << "["  << *indirect << "]";
+      switch (indirect->type()) {
+      case Value::literal: {
+         const LiteralValue& lv = static_cast<const LiteralValue&>(*indirect);
+         v = m_values[lv.value()].reg_i(component + m_frac);
+         break;
+      }
+      case Value::gpr:  {
+         v = PValue(new GPRArrayValue(v, indirect, this));
+         sfn_log << SfnLog::reg << "(" << *v << ")";
+         break;
+      }
+      default:
+         assert(0 && !"Indirect addressing must be literal value or GPR");
+      }
+   }
+   sfn_log << SfnLog::reg <<"  -> " << *v << "\n";
+   return v;
+}
+
+void GPRArray::record_read(LiverangeEvaluator& ev, int chan) const
+{
+   for (auto& v: m_values)
+      ev.record_read(*v.reg_i(chan), true);
+}
+
+void GPRArray::record_write(LiverangeEvaluator& ev, int chan) const
+{
+   for (auto& v: m_values)
+      ev.record_write(*v.reg_i(chan), true);
+}
+
+void GPRArray::collect_registers(ValueMap& output) const
+{
+   for (auto& v: m_values) {
+      for (int i = 0; i < 4; ++i) {
+         auto vv = v.reg_i(i);
+         if (vv)
+            output.insert(vv);
+      }
+   }
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.h
new file mode 100644
index 000000000..789348875
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_value_gpr.h
@@ -0,0 +1,208 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SFN_GPRARRAY_H
+#define SFN_GPRARRAY_H
+
+#include "sfn_value.h"
+#include <vector>
+#include <array>
+
+namespace r600 {
+
+class ValuePool;
+class ValueMap;
+class LiverangeEvaluator;
+
+class GPRValue : public Value {
+public:
+   GPRValue() = default;
+   GPRValue(GPRValue&& orig) = default;
+   GPRValue(const GPRValue& orig) = default;
+
+   GPRValue(uint32_t sel, uint32_t chan, int base_offset);
+
+   GPRValue(uint32_t sel, uint32_t chan);
+
+   GPRValue& operator = (const GPRValue& orig) = default;
+   GPRValue& operator = (GPRValue&& orig) = default;
+
+   uint32_t sel() const override final;
+
+   void set_as_input(){ m_input = true; }
+   bool is_input() const {return  m_input; }
+   void set_keep_alive() { m_keep_alive = true; }
+   bool keep_alive() const {return  m_keep_alive; }
+   void set_pin_to_channel() override { m_pin_to_channel = true;}
+   bool pin_to_channel()  const { return m_pin_to_channel;}
+
+private:
+   void do_print(std::ostream& os) const override;
+   void do_print(std::ostream& os, const PrintFlags& flags) const override;
+   bool is_equal_to(const Value& other) const override;
+   uint32_t m_sel;
+   bool m_base_offset;
+   bool m_input;
+   bool m_pin_to_channel;
+   bool m_keep_alive;
+};
+
+using PGPRValue = std::shared_ptr<GPRValue>;
+
+class GPRVector : public Value {
+public:
+   using Swizzle = std::array<uint32_t,4>;
+   using Values = std::array<PValue,4>;
+   GPRVector() = default;
+   GPRVector(GPRVector&& orig) = default;
+   GPRVector(const GPRVector& orig);
+
+   GPRVector(const GPRVector& orig, const std::array<uint8_t, 4>& swizzle);
+   GPRVector(std::array<PValue,4> elms);
+   GPRVector(uint32_t sel, std::array<uint32_t,4> swizzle);
+
+   GPRVector& operator = (const GPRVector& orig) = default;
+   GPRVector& operator = (GPRVector&& orig) = default;
+
+   void swizzle(const Swizzle& swz);
+
+   uint32_t sel() const override final;
+
+   void set_reg_i(int i, PValue reg);
+
+   unsigned chan_i(int i) const {return m_elms[i]->chan();}
+   PValue reg_i(int i) const {return m_elms[i];}
+   PValue operator [] (int i) const {return m_elms[i];}
+   PValue& operator [] (int i) {return m_elms[i];}
+
+   void pin_to_channel(int i);
+   void pin_all_to_channel();
+
+   PValue x() const {return m_elms[0];}
+   PValue y() const {return m_elms[1];}
+   PValue z() const {return m_elms[2];}
+   PValue w() const {return m_elms[3];}
+
+   Values& values() { return m_elms;}
+
+private:
+   void do_print(std::ostream& os) const override;
+   bool is_equal_to(const Value& other) const override;
+   void validate() const;
+
+   Values m_elms;
+   mutable bool m_valid;
+};
+
+
+class GPRArray : public Value
+{
+public:
+   using Pointer = std::shared_ptr<GPRArray>;
+
+   GPRArray(int base, int size, int comp_mask, int frac);
+
+   uint32_t sel() const override;
+
+   uint32_t mask() const { return m_component_mask; };
+
+   size_t size() const {return m_values.size();}
+
+   PValue get_indirect(unsigned index, PValue indirect, unsigned component);
+
+   void record_read(LiverangeEvaluator& ev, int chan)const;
+   void record_write(LiverangeEvaluator& ev, int chan)const;
+
+   void collect_registers(ValueMap& output) const;
+
+private:
+   void do_print(std::ostream& os) const override;
+
+   bool is_equal_to(const Value& other) const override;
+
+   int m_base_index;
+   int m_component_mask;
+   int m_frac;
+
+   std::vector<GPRVector> m_values;
+};
+
+using PGPRArray = GPRArray::Pointer;
+
+class GPRArrayValue :public Value {
+public:
+   GPRArrayValue(PValue value, GPRArray *array);
+   GPRArrayValue(PValue value, PValue index, GPRArray *array);
+
+   void record_read(LiverangeEvaluator& ev) const;
+   void record_write(LiverangeEvaluator& ev) const;
+
+   size_t array_size() const;
+   uint32_t sel() const override;
+
+   PValue value() {return m_value;}
+
+   void reset_value(PValue new_value);
+   void reset_addr(PValue new_addr);
+
+   Value::Pointer indirect() const {return m_addr;}
+
+private:
+
+   void do_print(std::ostream& os) const override;
+
+   bool is_equal_to(const Value& other) const override;
+
+   PValue m_value;
+   PValue m_addr;
+   GPRArray *m_array;
+};
+
+inline size_t GPRArrayValue::array_size() const
+{
+   return m_array->size();
+}
+
+inline GPRVector::Swizzle swizzle_from_comps(unsigned ncomp)
+{
+   GPRVector::Swizzle swz = {0,1,2,3};
+   for (int i = ncomp; i < 4; ++i)
+      swz[i] = 7;
+   return swz;
+}
+
+inline GPRVector::Swizzle swizzle_from_mask(unsigned mask)
+{
+   GPRVector::Swizzle swz;
+   for (int i = 0; i < 4; ++i)
+      swz[i] =  ((1 << i) & mask) ? i : 7;
+   return swz;
+}
+
+
+}
+
+#endif // SFN_GPRARRAY_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.cpp
new file mode 100644
index 000000000..efc9efdca
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.cpp
@@ -0,0 +1,526 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018-2019 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sfn_debug.h"
+#include "sfn_value_gpr.h"
+#include "sfn_valuepool.h"
+
+#include <iostream>
+#include <queue>
+
+namespace r600 {
+
+using std::vector;
+using std::pair;
+using std::make_pair;
+using std::queue;
+
+ValuePool::ValuePool():
+   m_next_register_index(0),
+   current_temp_reg_index(0),
+   next_temp_reg_comp(4)
+{
+}
+
+PValue ValuePool::m_undef = Value::zero;
+
+GPRVector ValuePool::vec_from_nir(const nir_dest& dst, int num_components)
+{
+   std::array<PValue, 4> result;
+   for (int i = 0; i < 4; ++i)
+      result[i] = from_nir(dst, i < num_components ? i : 7);
+   return GPRVector(result);
+}
+
+std::vector<PValue> ValuePool::varvec_from_nir(const nir_dest& dst, int num_components)
+{
+   std::vector<PValue> result(num_components);
+   for (int i = 0; i < num_components; ++i)
+      result[i] = from_nir(dst, i);
+   return result;
+}
+
+
+std::vector<PValue> ValuePool::varvec_from_nir(const nir_src& src, int num_components)
+{
+   std::vector<PValue> result(num_components);
+   int i;
+   for (i = 0; i < num_components; ++i)
+      result[i] = from_nir(src, i);
+
+   return result;
+}
+
+
+PValue ValuePool::from_nir(const nir_src& v, unsigned component, unsigned swizzled)
+{
+   sfn_log << SfnLog::reg << "Search " << (v.is_ssa ? "ssa_reg " : "reg ")
+           << (v.is_ssa ? v.ssa->index : v.reg.reg->index);
+
+   if (!v.is_ssa) {
+      int idx = lookup_register_index(v);
+      sfn_log << SfnLog::reg << "  -> got index " <<  idx << "\n";
+      if (idx >= 0) {
+         auto reg = lookup_register(idx, swizzled, false);
+         if (reg) {
+            if (reg->type() == Value::gpr_vector) {
+               auto& array = static_cast<GPRArray&>(*reg);
+               reg = array.get_indirect(v.reg.base_offset,
+                                        v.reg.indirect ?
+                                           from_nir(*v.reg.indirect, 0, 0) : nullptr,
+                                        component);
+            }
+            return reg;
+         }
+      }
+      assert(0 && "local registers should always be found");
+   }
+
+   unsigned index = v.ssa->index;
+   /* For undefs we use zero and let ()yet to be implemeneted dce deal with it */
+   if (m_ssa_undef.find(index) != m_ssa_undef.end())
+      return Value::zero;
+
+
+   int idx = lookup_register_index(v);
+   sfn_log << SfnLog::reg << "  -> got index " <<  idx << "\n";
+   if (idx >= 0) {
+      auto reg = lookup_register(idx, swizzled, false);
+      if (reg)
+         return reg;
+   }
+
+   auto literal_val = nir_src_as_const_value(v);
+   if (literal_val) {
+      assert(v.is_ssa);
+      switch (v.ssa->bit_size) {
+      case 1:
+         return PValue(new LiteralValue(literal_val[swizzled].b ? 0xffffffff : 0, component));
+      case 32:
+         return literal(literal_val[swizzled].u32);
+      default:
+         sfn_log << SfnLog::reg << "Unsupported bit size " << v.ssa->bit_size
+                 << " fall back to 32\n";
+         return PValue(new LiteralValue(literal_val[swizzled].u32, component));
+      }
+   }
+
+   return PValue();
+}
+
+PValue ValuePool::from_nir(const nir_src& v, unsigned component)
+{
+   return from_nir(v, component, component);
+}
+
+PValue ValuePool::from_nir(const nir_tex_src &v, unsigned component)
+{
+   return from_nir(v.src, component, component);
+}
+
+PValue ValuePool::from_nir(const nir_alu_src &v, unsigned component)
+{
+   return from_nir(v.src, component, v.swizzle[component]);
+}
+
+PGPRValue ValuePool::get_temp_register(int channel)
+{
+   /* Skip to next register to get the channel we want */
+   if (channel >= 0) {
+      if (next_temp_reg_comp <= channel)
+         next_temp_reg_comp = channel;
+      else
+         next_temp_reg_comp = 4;
+   }
+
+   if (next_temp_reg_comp > 3) {
+      current_temp_reg_index = allocate_temp_register();
+      next_temp_reg_comp = 0;
+   }
+   return std::make_shared<GPRValue>(current_temp_reg_index, next_temp_reg_comp++);
+}
+
+GPRVector ValuePool::get_temp_vec4(const GPRVector::Swizzle& swizzle)
+{
+   int sel = allocate_temp_register();
+   return GPRVector(sel, swizzle);
+}
+
+PValue ValuePool::create_register_from_nir_src(const nir_src& src, int comp)
+{
+   int idx = src.is_ssa ? get_dst_ssa_register_index(*src.ssa):
+                          get_local_register_index(*src.reg.reg);
+
+   auto retval = lookup_register(idx, comp, false);
+   if (!retval || retval->type() != Value::gpr || retval->type() != Value::gpr_array_value)
+      retval = create_register(idx, comp);
+   return retval;
+}
+
+PValue ValuePool::from_nir(const nir_alu_dest &v, unsigned component)
+{
+   //assert(v->write_mask & (1 << component));
+   return from_nir(v.dest, component);
+}
+
+int ValuePool::lookup_register_index(const nir_dest& dst)
+{
+   return dst.is_ssa ? get_dst_ssa_register_index(dst.ssa):
+                       get_local_register_index(*dst.reg.reg);
+}
+
+int ValuePool::lookup_register_index(const nir_src& src) const
+{
+   int index = 0;
+
+   index = src.is_ssa ?
+              get_ssa_register_index(*src.ssa) :
+              get_local_register_index(*src.reg.reg);
+
+   sfn_log << SfnLog::reg << " LIDX:" << index;
+
+   auto r = m_register_map.find(index);
+   if (r == m_register_map.end()) {
+      return -1;
+   }
+   return static_cast<int>(r->second.index);
+}
+
+
+int ValuePool::allocate_temp_register()
+{
+   return m_next_register_index++;
+}
+
+
+PValue ValuePool::from_nir(const nir_dest& v, unsigned component)
+{
+   int idx = lookup_register_index(v);
+   sfn_log << SfnLog::reg << __func__  << ": ";
+   if (v.is_ssa)
+      sfn_log << "ssa_" << v.ssa.index;
+   else
+      sfn_log << "r" << v.reg.reg->index;
+   sfn_log << " -> " << idx << "\n";
+
+   auto retval = lookup_register(idx, component, false);
+   if (!retval)
+      retval = create_register(idx, component);
+
+   if (retval->type() == Value::gpr_vector) {
+      assert(!v.is_ssa);
+      auto& array = static_cast<GPRArray&>(*retval);
+      retval = array.get_indirect(v.reg.base_offset,
+                                  v.reg.indirect ?
+                                  from_nir(*v.reg.indirect, 0, 0) : nullptr,
+                                  component);
+   }
+
+   return retval;
+}
+
+ValueMap ValuePool::get_temp_registers() const
+{
+   ValueMap result;
+
+   for (auto& v : m_registers) {
+      if (v.second->type() == Value::gpr)
+         result.insert(v.second);
+      else if (v.second->type() == Value::gpr_vector) {
+         auto& array = static_cast<GPRArray&>(*v.second);
+         array.collect_registers(result);
+      }
+   }
+   return result;
+}
+
+static const char swz[] = "xyzw01?_";
+
+PValue ValuePool::create_register(unsigned sel, unsigned swizzle)
+{
+   sfn_log << SfnLog::reg
+           <<"Create register " << sel  << '.' << swz[swizzle] << "\n";
+   auto retval = PValue(new GPRValue(sel, swizzle));
+   m_registers[(sel << 3) + swizzle] = retval;
+   return retval;
+}
+
+bool ValuePool::inject_register(unsigned sel, unsigned swizzle,
+                                const PValue& reg, bool map)
+{
+   uint32_t ssa_index = sel;
+
+   if (map) {
+      auto pos = m_ssa_register_map.find(sel);
+      if (pos == m_ssa_register_map.end())
+         ssa_index = m_next_register_index++;
+      else
+         ssa_index = pos->second;
+   }
+
+   sfn_log << SfnLog::reg
+           << "Inject register " << sel  << '.' << swz[swizzle]
+           << " at index " <<  ssa_index << " ...";
+
+   if (map)
+      m_ssa_register_map[sel] = ssa_index;
+
+   allocate_with_mask(ssa_index, swizzle, true);
+
+   unsigned idx = (ssa_index << 3) + swizzle;
+   auto p = m_registers.find(idx);
+   if ( (p != m_registers.end()) && *p->second != *reg) {
+      std::cerr << "Register location (" << ssa_index << ", " << swizzle << ") was already reserved\n";
+      assert(0);
+      return false;
+   }
+   sfn_log << SfnLog::reg << " at idx:" << idx << " to " << *reg << "\n";
+   m_registers[idx] = reg;
+
+   if (m_next_register_index <= ssa_index)
+      m_next_register_index = ssa_index + 1;
+   return true;
+}
+
+
+PValue ValuePool::lookup_register(unsigned sel, unsigned swizzle,
+                                  bool required)
+{
+
+   PValue retval;
+   sfn_log << SfnLog::reg
+           << "lookup register " << sel  << '.' << swz[swizzle] << "("
+           << ((sel << 3) + swizzle) << ")...";
+
+
+   auto reg = m_registers.find((sel << 3) + swizzle);
+   if (reg != m_registers.end()) {
+      sfn_log << SfnLog::reg << " -> Found " << *reg->second << "\n";
+      retval = reg->second;
+   } else if (swizzle == 7) {
+      PValue retval = create_register(sel, swizzle);
+      sfn_log << SfnLog::reg << " -> Created " << *retval << "\n";
+   } else if (required) {
+      sfn_log << SfnLog::reg << "Register (" << sel << ", "
+              << swizzle << ") not found but required\n";
+      assert(0 && "Unallocated register value requested\n");
+   }
+   sfn_log << SfnLog::reg << " -> Not required and not  allocated\n";
+   return retval;
+}
+
+unsigned ValuePool::get_dst_ssa_register_index(const nir_ssa_def& ssa)
+{
+   sfn_log << SfnLog::reg << __func__ << ": search dst ssa "
+           << ssa.index;
+
+   auto pos = m_ssa_register_map.find(ssa.index);
+   if (pos == m_ssa_register_map.end()) {
+      sfn_log << SfnLog::reg << " Need to allocate ...";
+      allocate_ssa_register(ssa);
+      pos = m_ssa_register_map.find(ssa.index);
+      assert(pos != m_ssa_register_map.end());
+   }
+   sfn_log << SfnLog::reg << "... got " << pos->second << "\n";
+   return pos->second;
+}
+
+unsigned ValuePool::get_ssa_register_index(const nir_ssa_def& ssa) const
+{
+   sfn_log << SfnLog::reg << __func__ << ": search ssa "
+           << ssa.index;
+
+   auto pos = m_ssa_register_map.find(ssa.index);
+   sfn_log << SfnLog::reg << " got " << pos->second<< "\n";
+   if (pos == m_ssa_register_map.end()) {
+      sfn_log << SfnLog::reg << __func__ << ": ssa register "
+              << ssa.index << " lookup failed\n";
+      return -1;
+   }
+   return pos->second;
+}
+
+unsigned ValuePool::get_local_register_index(const nir_register& reg)
+{
+   unsigned index = reg.index | 0x80000000;
+
+   auto pos = m_ssa_register_map.find(index);
+   if (pos == m_ssa_register_map.end()) {
+      allocate_local_register(reg);
+      pos = m_ssa_register_map.find(index);
+      assert(pos != m_ssa_register_map.end());
+   }
+   return pos->second;
+}
+
+unsigned ValuePool::get_local_register_index(const nir_register& reg) const
+{
+   unsigned index = reg.index | 0x80000000;
+   auto pos = m_ssa_register_map.find(index);
+   if (pos == m_ssa_register_map.end()) {
+      sfn_log << SfnLog::err << __func__ << ": local register "
+              << reg.index << " lookup failed";
+      return -1;
+   }
+   return pos->second;
+}
+
+void ValuePool::allocate_ssa_register(const nir_ssa_def& ssa)
+{
+   sfn_log << SfnLog::reg << "ValuePool: Allocate ssa register " << ssa.index
+           << " as " << m_next_register_index << "\n";
+   int index = m_next_register_index++;
+   m_ssa_register_map[ssa.index] = index;
+   allocate_with_mask(index, 0xf, true);
+}
+
+void ValuePool::allocate_arrays(array_list& arrays)
+{
+   int ncomponents = 0;
+   int current_index = m_next_register_index;
+   unsigned instance = 0;
+
+   while (!arrays.empty()) {
+      auto a = arrays.top();
+      arrays.pop();
+
+      /* This is a bit hackish, return an id that encodes the array merge. To make sure
+       * that the mapping doesn't go wrong we have to make sure the arrays is longer than
+       * the number of instances in this arrays slot */
+      if (a.ncomponents + ncomponents > 4 ||
+          a.length < instance) {
+         current_index = m_next_register_index;
+         ncomponents = 0;
+         instance = 0;
+      }
+
+      if (ncomponents == 0)
+         m_next_register_index += a.length;
+
+      uint32_t mask = ((1 << a.ncomponents) - 1) << ncomponents;
+
+      PGPRArray array = PGPRArray(new GPRArray(current_index, a.length, mask, ncomponents));
+
+      m_reg_arrays.push_back(array);
+
+      sfn_log << SfnLog::reg << "Add array at "<< current_index
+              << " of size " << a.length << " with " << a.ncomponents
+              << " components, mask " << mask << "\n";
+
+      m_ssa_register_map[a.index | 0x80000000] = current_index + instance;
+
+      for (unsigned  i = 0; i < a.ncomponents; ++i)
+         m_registers[((current_index  + instance) << 3) + i] = array;
+
+      VRec next_reg = {current_index + instance, mask, mask};
+      m_register_map[current_index + instance] = next_reg;
+
+      ncomponents += a.ncomponents;
+      ++instance;
+   }
+}
+
+void ValuePool::allocate_local_register(const nir_register& reg)
+{
+   int index = m_next_register_index++;
+   m_ssa_register_map[reg.index | 0x80000000] = index;
+   allocate_with_mask(index, 0xf, true);
+
+   /* Create actual register and map it */;
+   for (int i = 0; i < 4; ++i) {
+      int k = (index << 3) + i;
+      m_registers[k] = std::make_shared<GPRValue>(index, i);
+   }
+}
+
+void ValuePool::allocate_local_register(const nir_register& reg, array_list& arrays)
+{
+   sfn_log << SfnLog::reg << "ValuePool: Allocate local register " << reg.index
+           << " as " << m_next_register_index << "\n";
+
+   if (reg.num_array_elems) {
+      array_entry ae = {reg.index, reg.num_array_elems, reg.num_components};
+      arrays.push(ae);
+   }
+   else
+      allocate_local_register(reg);
+}
+
+bool ValuePool::create_undef(nir_ssa_undef_instr* instr)
+{
+   m_ssa_undef.insert(instr->def.index);
+   return true;
+}
+
+int ValuePool::allocate_with_mask(unsigned index, unsigned mask, bool pre_alloc)
+{
+   int retval;
+   VRec next_register = { index, mask };
+
+   sfn_log << SfnLog::reg << (pre_alloc ? "Pre-alloc" : "Allocate")
+           << " register (" << index << ", " << mask << ")\n";
+   retval = index;
+   auto r = m_register_map.find(index);
+
+   if (r != m_register_map.end()) {
+      if ((r->second.mask & next_register.mask) &&
+          !(r->second.pre_alloc_mask & next_register.mask)) {
+         std::cerr << "r600 ERR: register ("
+                   << index << ", " << mask
+                   << ") already allocated as (" << r->second.index << ", "
+                   << r->second.mask << ", " << r->second.pre_alloc_mask
+                   << ") \n";
+         retval = -1;
+      } else {
+         r->second.mask |= next_register.mask;
+         if (pre_alloc)
+            r->second.pre_alloc_mask |= next_register.mask;
+         retval = r->second.index;
+      }
+   } else  {
+      if (pre_alloc)
+         next_register.pre_alloc_mask = mask;
+      m_register_map[index] = next_register;
+      retval = next_register.index;
+   }
+
+   sfn_log << SfnLog::reg << "Allocate register (" << index << "," << mask << ") in R"
+           << retval << "\n";
+
+   return retval;
+}
+
+PValue ValuePool::literal(uint32_t value)
+{
+   auto l = m_literals.find(value);
+   if (l != m_literals.end())
+      return l->second;
+
+   m_literals[value] = PValue(new LiteralValue(value));
+   return m_literals[value];
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.h
new file mode 100644
index 000000000..fa1e5507f
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_valuepool.h
@@ -0,0 +1,242 @@
+/* -*- mesa-c++  -*-
+ *
+ * Copyright (c) 2018 Collabora LTD
+ *
+ * Author: Gert Wollny <gert.wollny@collabora.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef SFN_VALUEPOOL_H
+#define SFN_VALUEPOOL_H
+
+#include "sfn_value.h"
+#include "sfn_value_gpr.h"
+
+#include <set>
+#include <queue>
+
+namespace r600 {
+
+using LiteralBuffer = std::map<unsigned, const nir_load_const_instr *>;
+
+class ValueMap {
+public:
+   void insert(const PValue& v) {
+      auto idx = index_from(v->sel(), v->chan());
+      m_map[idx] = v;
+   }
+   PValue get_or_inject(uint32_t index, uint32_t chan) {
+      auto idx = index_from(index, chan);
+      auto v = m_map.find(idx);
+      if (v == m_map.end()) {
+         insert(PValue(new GPRValue(index, chan)));
+         v = m_map.find(idx);
+      }
+      return v->second;
+   }
+   std::map<uint32_t, PValue>::const_iterator begin() const {return m_map.begin();}
+   std::map<uint32_t, PValue>::const_iterator end() const {return m_map.end();}
+
+private:
+   uint32_t index_from(uint32_t index, uint32_t chan) {
+      return (index << 3) + chan;
+   }
+   std::map<uint32_t, PValue> m_map;
+};
+
+/** \brief Class to keep track of registers, uniforms, and literals
+ * This class holds the references to the uniforms and the literals
+ * and is responsible for allocating the registers.
+ */
+class ValuePool
+{
+public:
+
+   struct  array_entry {
+      unsigned index;
+      unsigned length;
+      unsigned ncomponents;
+
+      bool operator ()(const array_entry& a, const array_entry& b) const {
+         return a.length < b.length || (a.length == b.length && a.ncomponents > b.ncomponents);
+      }
+   };
+
+   using array_list = std::priority_queue<array_entry, std::vector<array_entry>,
+                                          array_entry>;
+
+   ValuePool();
+
+
+   GPRVector vec_from_nir(const nir_dest& dst, int num_components);
+
+   std::vector<PValue> varvec_from_nir(const nir_dest& src, int num_components);
+   std::vector<PValue> varvec_from_nir(const nir_src& src, int num_components);
+
+   PValue from_nir(const nir_src& v, unsigned component, unsigned swizzled);
+
+   PValue from_nir(const nir_src& v, unsigned component);
+   /** Get a register that is used as source register in an ALU instruction
+    * The PValue holds one componet as specified. If the register refers to
+    * a GPR it must already have been allocated, uniforms and literals on
+    * the other hand might be pre-loaded.
+    */
+   PValue from_nir(const nir_alu_src& v, unsigned component);
+
+   /** Get a register that is used as source register in an Texture instruction
+    * The PValue holds one componet as specified.
+    */
+   PValue from_nir(const nir_tex_src& v, unsigned component);
+
+   /** Allocate a register that is used as destination register in an ALU
+    * instruction. The PValue holds one componet as specified.
+    */
+   PValue from_nir(const nir_alu_dest& v, unsigned component);
+
+   /** Allocate a register that is used as destination register in any
+    * instruction. The PValue holds one componet as specified.
+    */
+   PValue from_nir(const nir_dest& v, unsigned component);
+
+
+   /** Inject a register into a given ssa index position
+    * This is used to redirect loads from system values and vertex attributes
+    * that are already loaded into registers */
+   bool inject_register(unsigned sel, unsigned swizzle, const PValue &reg, bool map);
+
+   /** Reserve space for a local register */
+   void allocate_local_register(const nir_register& reg);
+   void allocate_local_register(const nir_register &reg, array_list& arrays);
+
+   void allocate_arrays(array_list& arrays);
+
+
+   void increment_reserved_registers() {
+      ++m_next_register_index;
+   }
+
+   void set_reserved_registers(unsigned rr) {
+      m_next_register_index =rr;
+   }
+
+   /** Reserve a undef register, currently it uses (0,7),
+    * \todo should be eliminated in the final pass
+    */
+   bool create_undef(nir_ssa_undef_instr* instr);
+
+   /** Create a new register with the given index and store it in the
+    * lookup map
+    */
+   PValue create_register_from_nir_src(const nir_src& sel, int comp);
+
+   ValueMap get_temp_registers() const;
+
+   PValue lookup_register(unsigned sel, unsigned swizzle, bool required);
+
+   size_t register_count() const {return m_next_register_index;}
+
+   PValue literal(uint32_t value);
+
+   PGPRValue get_temp_register(int channel = -1);
+
+   GPRVector get_temp_vec4(const GPRVector::Swizzle &swizzle = {0,1,2,3});
+
+protected:
+   std::vector<PGPRArray> m_reg_arrays;
+
+private:
+
+   /** Get the register index mapped from the NIR code to the r600 ir
+    * \param index NIR index of register
+    * \returns r600 ir inxex
+    */
+   int lookup_register_index(const nir_src& src) const;
+
+   /** Get the register index mapped from the NIR code to the r600 ir
+    * \param index NIR index of register
+    * \returns r600 ir inxex
+    */
+   int lookup_register_index(const nir_dest& dst);
+
+   /** Allocate a register that is is needed for lowering an instruction
+    * that requires complex calculations,
+    */
+   int allocate_temp_register();
+
+
+   PValue create_register(unsigned index, unsigned swizzle);
+
+   unsigned get_dst_ssa_register_index(const nir_ssa_def& ssa);
+
+   unsigned get_ssa_register_index(const nir_ssa_def& ssa) const;
+
+   unsigned get_local_register_index(const nir_register& reg);
+
+   unsigned get_local_register_index(const nir_register& reg) const;
+
+   void allocate_ssa_register(const nir_ssa_def& ssa);
+
+   void allocate_array(const nir_register& reg);
+
+
+   /** Allocate a register index with the given component mask.
+    * If one of the components is already been allocated the function
+    * will signal an error bz returning -1, otherwise a register index is
+    * returned.
+    */
+   int allocate_with_mask(unsigned index, unsigned mask, bool pre_alloc);
+
+   /** search for a new register with the given index in the
+    * lookup map.
+    * \param sel register sel value
+    * \param swizzle register component, can also be 4,5, and 7
+    * \param required true: in debug mode assert when register doesn't exist
+    *                 false: return nullptr on failure
+    */
+
+   std::set<unsigned> m_ssa_undef;
+
+   std::map<unsigned, unsigned> m_ssa_register_map;
+
+   std::map<unsigned, PValue> m_registers;
+
+   static PValue m_undef;
+
+   struct VRec {
+      unsigned index;
+      unsigned mask;
+      unsigned pre_alloc_mask;
+   };
+   std::map<unsigned, VRec> m_register_map;
+
+   unsigned m_next_register_index;
+
+
+   std::map<uint32_t, PValue> m_literals;
+
+   int current_temp_reg_index;
+   int next_temp_reg_comp;
+};
+
+}
+
+#endif // SFN_VALUEPOOL_H
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp
new file mode 100644
index 000000000..ff49216a9
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.cpp
@@ -0,0 +1,535 @@
+#include "sfn_vertexstageexport.h"
+
+#include "sfn_shaderio.h"
+
+namespace r600 {
+
+using std::priority_queue;
+
+VertexStageExportBase::VertexStageExportBase(VertexStage& proc):
+   m_proc(proc),
+   m_cur_clip_pos(1)
+{
+
+}
+
+VertexStageExportBase::~VertexStageExportBase()
+{
+
+}
+
+bool VertexStageExportBase::do_process_outputs(nir_variable *output)
+{
+   return true;
+}
+
+void VertexStageExportBase::emit_shader_start()
+{
+
+}
+
+void VertexStageExportBase::scan_store_output(nir_intrinsic_instr* instr)
+{
+
+}
+
+bool VertexStageExportBase::store_output(nir_intrinsic_instr* instr)
+{
+   auto index = nir_src_as_const_value(instr->src[1]);
+   assert(index && "Indirect outputs not supported");
+
+   const store_loc store_info  = {
+      nir_intrinsic_component(instr),
+      nir_intrinsic_io_semantics(instr).location,
+      (unsigned)nir_intrinsic_base(instr) + index->u32,
+      0
+   };
+
+   return do_store_output(store_info, instr);
+}
+
+VertexStageExportForFS::VertexStageExportForFS(VertexStage& proc,
+                                               const pipe_stream_output_info *so_info,
+                                               r600_pipe_shader *pipe_shader, const r600_shader_key &key):
+   VertexStageWithOutputInfo(proc),
+   m_last_param_export(nullptr),
+   m_last_pos_export(nullptr),
+   m_num_clip_dist(0),
+   m_enabled_stream_buffers_mask(0),
+   m_so_info(so_info),
+   m_pipe_shader(pipe_shader),
+   m_key(key)
+{
+}
+
+bool VertexStageWithOutputInfo::do_process_outputs(nir_variable *output)
+{
+   if (output->data.location == VARYING_SLOT_COL0 ||
+       output->data.location == VARYING_SLOT_COL1 ||
+       (output->data.location >= VARYING_SLOT_VAR0 &&
+       output->data.location <= VARYING_SLOT_VAR31) ||
+       (output->data.location >= VARYING_SLOT_TEX0 &&
+        output->data.location <= VARYING_SLOT_TEX7) ||
+       output->data.location == VARYING_SLOT_BFC0 ||
+       output->data.location == VARYING_SLOT_BFC1 ||
+       output->data.location == VARYING_SLOT_CLIP_VERTEX ||
+       output->data.location == VARYING_SLOT_CLIP_DIST0 ||
+       output->data.location == VARYING_SLOT_CLIP_DIST1 ||
+       output->data.location == VARYING_SLOT_POS ||
+       output->data.location == VARYING_SLOT_PSIZ ||
+       output->data.location == VARYING_SLOT_FOGC ||
+       output->data.location == VARYING_SLOT_LAYER ||
+       output->data.location == VARYING_SLOT_EDGE ||
+       output->data.location == VARYING_SLOT_VIEWPORT
+       ) {
+
+      r600_shader_io& io = m_proc.sh_info().output[output->data.driver_location];
+      auto semantic = r600_get_varying_semantic(output->data.location);
+      io.name = semantic.first;
+      io.sid = semantic.second;
+
+      m_proc.evaluate_spi_sid(io);
+      io.write_mask = ((1 << glsl_get_components(output->type)) - 1)
+                      << output->data.location_frac;
+      ++m_proc.sh_info().noutput;
+
+      if (output->data.location == VARYING_SLOT_PSIZ ||
+          output->data.location == VARYING_SLOT_EDGE ||
+          output->data.location == VARYING_SLOT_LAYER) // VIEWPORT?
+            m_cur_clip_pos = 2;
+
+      if (output->data.location != VARYING_SLOT_POS &&
+          output->data.location != VARYING_SLOT_EDGE &&
+          output->data.location != VARYING_SLOT_PSIZ &&
+          output->data.location != VARYING_SLOT_CLIP_VERTEX)
+         m_param_driver_locations.push(output->data.driver_location);
+
+      return true;
+   }
+   return false;
+}
+
+bool VertexStageExportForFS::do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr)
+{
+   switch (store_info.location) {
+   case VARYING_SLOT_PSIZ:
+      m_proc.sh_info().vs_out_point_size = 1;
+      m_proc.sh_info().vs_out_misc_write = 1;
+      FALLTHROUGH;
+   case VARYING_SLOT_POS:
+      return emit_varying_pos(store_info, instr);
+   case VARYING_SLOT_EDGE: {
+      std::array<uint32_t, 4> swizzle_override = {7 ,0, 7, 7};
+      return emit_varying_pos(store_info, instr, &swizzle_override);
+   }
+   case VARYING_SLOT_VIEWPORT: {
+      std::array<uint32_t, 4> swizzle_override = {7, 7, 7, 0};
+      return emit_varying_pos(store_info, instr, &swizzle_override) &&
+            emit_varying_param(store_info, instr);
+   }
+   case VARYING_SLOT_CLIP_VERTEX:
+      return emit_clip_vertices(store_info, instr);
+   case VARYING_SLOT_CLIP_DIST0:
+   case VARYING_SLOT_CLIP_DIST1:
+      m_num_clip_dist += 4;
+      return emit_varying_param(store_info, instr) && emit_varying_pos(store_info, instr);
+   case VARYING_SLOT_LAYER: {
+      m_proc.sh_info().vs_out_misc_write = 1;
+      m_proc.sh_info().vs_out_layer = 1;
+      std::array<uint32_t, 4> swz = {7,7,0,7};
+      return emit_varying_pos(store_info, instr, &swz) &&
+            emit_varying_param(store_info, instr);
+   }
+   case VARYING_SLOT_VIEW_INDEX:
+      return emit_varying_pos(store_info, instr) &&
+            emit_varying_param(store_info, instr);
+
+   default:
+         return emit_varying_param(store_info, instr);
+   }
+
+   fprintf(stderr, "r600-NIR: Unimplemented store_deref for %d\n",
+           store_info.location);
+   return false;
+}
+
+bool VertexStageExportForFS::emit_varying_pos(const store_loc &store_info, nir_intrinsic_instr* instr,
+                                              std::array<uint32_t, 4> *swizzle_override)
+{
+   std::array<uint32_t,4> swizzle;
+   uint32_t write_mask = 0;
+
+   if (swizzle_override) {
+      swizzle = *swizzle_override;
+      for (int i = 0; i < 4; ++i) {
+         if (swizzle[i] < 6)
+            write_mask |= 1 << i;
+      }
+   } else {
+      write_mask = nir_intrinsic_write_mask(instr) << store_info.frac;
+      for (int i = 0; i < 4; ++i)
+         swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7;
+   }
+
+   m_proc.sh_info().output[store_info.driver_location].write_mask = write_mask;
+
+   GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], write_mask, swizzle);
+   m_proc.set_output(store_info.driver_location, value.sel());
+
+   int export_slot = 0;
+
+   switch (store_info.location) {
+   case VARYING_SLOT_EDGE: {
+      m_proc.sh_info().vs_out_misc_write = 1;
+      m_proc.sh_info().vs_out_edgeflag = 1;
+      m_proc.emit_instruction(op1_mov, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_dst_clamp, alu_last_instr});
+      m_proc.emit_instruction(op1_flt_to_int, value.reg_i(1), {value.reg_i(1)}, {alu_write, alu_last_instr});
+      m_proc.sh_info().output[store_info.driver_location].write_mask = 0xf;
+   }
+      FALLTHROUGH;
+   case VARYING_SLOT_PSIZ:
+   case VARYING_SLOT_LAYER:
+      export_slot = 1;
+      break;
+   case VARYING_SLOT_VIEWPORT:
+      m_proc.sh_info().vs_out_misc_write = 1;
+      m_proc.sh_info().vs_out_viewport = 1;
+      export_slot = 1;
+      break;
+   case VARYING_SLOT_POS:
+      break;
+   case VARYING_SLOT_CLIP_DIST0:
+   case VARYING_SLOT_CLIP_DIST1:
+      export_slot = m_cur_clip_pos++;
+      break;
+   default:
+      sfn_log << SfnLog::err << __func__ << "Unsupported location "
+              << store_info.location << "\n";
+      return false;
+   }
+
+   m_last_pos_export = new ExportInstruction(export_slot, value, ExportInstruction::et_pos);
+   m_proc.emit_export_instruction(m_last_pos_export);
+   m_proc.add_param_output_reg(store_info.driver_location, m_last_pos_export->gpr_ptr());
+   return true;
+}
+
+bool VertexStageExportForFS::emit_varying_param(const store_loc &store_info, nir_intrinsic_instr* instr)
+{
+   assert(store_info.driver_location < m_proc.sh_info().noutput);
+   sfn_log << SfnLog::io << __func__ << ": emit DDL: " << store_info.driver_location << "\n";
+
+   int write_mask = nir_intrinsic_write_mask(instr) << store_info.frac;
+   std::array<uint32_t,4> swizzle;
+   for (int i = 0; i < 4; ++i)
+      swizzle[i] = ((1 << i) & write_mask) ? i - store_info.frac : 7;
+
+   //m_proc.sh_info().output[store_info.driver_location].write_mask = write_mask;
+
+   GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], write_mask, swizzle, true);
+   m_proc.sh_info().output[store_info.driver_location].gpr = value.sel();
+
+   /* This should use the registers!! */
+   m_proc.set_output(store_info.driver_location, value.sel());
+
+   m_last_param_export = new ExportInstruction(param_id(store_info.driver_location),
+                                               value, ExportInstruction::et_param);
+   m_proc.emit_export_instruction(m_last_param_export);
+   m_proc.add_param_output_reg(store_info.driver_location, m_last_param_export->gpr_ptr());
+   return true;
+}
+
+bool VertexStageExportForFS::emit_clip_vertices(const store_loc &store_info, nir_intrinsic_instr* instr)
+{
+   m_proc.sh_info().cc_dist_mask = 0xff;
+   m_proc.sh_info().clip_dist_write = 0xff;
+
+   m_clip_vertex = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], 0xf, {0,1,2,3});
+   m_proc.add_param_output_reg(store_info.driver_location, &m_clip_vertex);
+
+   for (int i = 0; i < 4; ++i)
+      m_proc.sh_info().output[store_info.driver_location].write_mask |= 1 << i;
+
+   GPRVector clip_dist[2] = { m_proc.get_temp_vec4(), m_proc.get_temp_vec4()};
+
+   for (int i = 0; i < 8; i++) {
+      int oreg = i >> 2;
+      int ochan = i & 3;
+      AluInstruction *ir = nullptr;
+      for (int j = 0; j < 4; j++) {
+         ir = new AluInstruction(op2_dot4_ieee, clip_dist[oreg].reg_i(j), m_clip_vertex.reg_i(j),
+                                 PValue(new UniformValue(512 + i, j, R600_BUFFER_INFO_CONST_BUFFER)),
+                                 (j == ochan) ? EmitInstruction::write : EmitInstruction::empty);
+         m_proc.emit_instruction(ir);
+      }
+      ir->set_flag(alu_last_instr);
+   }
+
+   m_last_pos_export = new ExportInstruction(m_cur_clip_pos++, clip_dist[0], ExportInstruction::et_pos);
+   m_proc.emit_export_instruction(m_last_pos_export);
+
+   m_last_pos_export = new ExportInstruction(m_cur_clip_pos, clip_dist[1], ExportInstruction::et_pos);
+   m_proc.emit_export_instruction(m_last_pos_export);
+
+   return true;
+}
+
+VertexStageWithOutputInfo::VertexStageWithOutputInfo(VertexStage& proc):
+   VertexStageExportBase(proc),
+   m_current_param(0)
+{
+
+}
+
+void VertexStageWithOutputInfo::scan_store_output(nir_intrinsic_instr* instr)
+{
+   auto location = nir_intrinsic_io_semantics(instr).location;
+   auto driver_location = nir_intrinsic_base(instr);
+   auto index = nir_src_as_const_value(instr->src[1]);
+   assert(index);
+
+   unsigned noutputs = driver_location + index->u32 + 1;
+   if (m_proc.sh_info().noutput < noutputs)
+      m_proc.sh_info().noutput = noutputs;
+
+   r600_shader_io& io = m_proc.sh_info().output[driver_location + index->u32];
+   auto semantic = r600_get_varying_semantic(location + index->u32);
+   io.name = semantic.first;
+   io.sid = semantic.second;
+   m_proc.evaluate_spi_sid(io);
+   io.write_mask = nir_intrinsic_write_mask(instr);
+
+   if (location == VARYING_SLOT_PSIZ ||
+       location == VARYING_SLOT_EDGE ||
+       location == VARYING_SLOT_LAYER) // VIEWPORT?
+      m_cur_clip_pos = 2;
+
+   if (location != VARYING_SLOT_POS &&
+       location != VARYING_SLOT_EDGE &&
+       location != VARYING_SLOT_PSIZ &&
+       location != VARYING_SLOT_CLIP_VERTEX) {
+      m_param_driver_locations.push(driver_location + index->u32);
+   }
+}
+
+unsigned VertexStageWithOutputInfo::param_id(unsigned driver_location)
+{
+   auto param_loc = m_param_map.find(driver_location);
+   assert(param_loc != m_param_map.end());
+   return param_loc->second;
+}
+
+void VertexStageWithOutputInfo::emit_shader_start()
+{
+   while (!m_param_driver_locations.empty()) {
+      auto loc = m_param_driver_locations.top();
+      m_param_driver_locations.pop();
+      m_param_map[loc] = m_current_param++;
+   }
+}
+
+unsigned VertexStageWithOutputInfo::current_param() const
+{
+   return m_current_param;
+}
+
+void VertexStageExportForFS::finalize_exports()
+{
+   if (m_key.vs.as_gs_a) {
+      PValue o(new GPRValue(0,PIPE_SWIZZLE_0));
+      GPRVector primid({m_proc.primitive_id(), o,o,o});
+      m_last_param_export = new ExportInstruction(current_param(), primid, ExportInstruction::et_param);
+      m_proc.emit_export_instruction(m_last_param_export);
+      int i;
+      i = m_proc.sh_info().noutput++;
+      auto& io = m_proc.sh_info().output[i];
+      io.name = TGSI_SEMANTIC_PRIMID;
+      io.sid = 0;
+      io.gpr = 0;
+      io.interpolate = TGSI_INTERPOLATE_CONSTANT;
+      io.write_mask = 0x1;
+      io.spi_sid = m_key.vs.prim_id_out;
+      m_proc.sh_info().vs_as_gs_a = 1;
+   }
+
+   if (m_so_info && m_so_info->num_outputs)
+      emit_stream(-1);
+
+   m_pipe_shader->enabled_stream_buffers_mask = m_enabled_stream_buffers_mask;
+
+   if (!m_last_param_export) {
+      GPRVector value(0,{7,7,7,7});
+      m_last_param_export = new ExportInstruction(0, value, ExportInstruction::et_param);
+      m_proc.emit_export_instruction(m_last_param_export);
+   }
+   m_last_param_export->set_last();
+
+   if (!m_last_pos_export) {
+      GPRVector value(0,{7,7,7,7});
+      m_last_pos_export = new ExportInstruction(0, value, ExportInstruction::et_pos);
+      m_proc.emit_export_instruction(m_last_pos_export);
+   }
+   m_last_pos_export->set_last();
+}
+
+bool VertexStageExportForFS::emit_stream(int stream)
+{
+   assert(m_so_info);
+   if (m_so_info->num_outputs > PIPE_MAX_SO_OUTPUTS) {
+           R600_ERR("Too many stream outputs: %d\n", m_so_info->num_outputs);
+           return false;
+   }
+   for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
+           if (m_so_info->output[i].output_buffer >= 4) {
+                   R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
+                            m_so_info->output[i].output_buffer);
+                   return false;
+           }
+   }
+   const GPRVector *so_gpr[PIPE_MAX_SHADER_OUTPUTS];
+   unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS];
+   std::vector<GPRVector> tmp(m_so_info->num_outputs);
+
+   /* Initialize locations where the outputs are stored. */
+   for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
+      if (stream != -1 && stream != m_so_info->output[i].stream)
+         continue;
+
+      sfn_log << SfnLog::instr << "Emit stream " << i
+              << " with register index " << m_so_info->output[i].register_index << "  so_gpr:";
+
+
+      so_gpr[i] = m_proc.output_register(m_so_info->output[i].register_index);
+
+      if (!so_gpr[i]) {
+         sfn_log << SfnLog::err << "\nERR: register index "
+                 << m_so_info->output[i].register_index
+                 << " doesn't correspond to an output register\n";
+         return false;
+      }
+      start_comp[i] = m_so_info->output[i].start_component;
+      /* Lower outputs with dst_offset < start_component.
+       *
+       * We can only output 4D vectors with a write mask, e.g. we can
+       * only output the W component at offset 3, etc. If we want
+       * to store Y, Z, or W at buffer offset 0, we need to use MOV
+       * to move it to X and output X. */
+      if (m_so_info->output[i].dst_offset < m_so_info->output[i].start_component) {
+
+         GPRVector::Swizzle swizzle =  {0,1,2,3};
+         for (auto j = m_so_info->output[i].num_components; j < 4; ++j)
+            swizzle[j] = 7;
+         tmp[i] = m_proc.get_temp_vec4(swizzle);
+
+         int sc = m_so_info->output[i].start_component;
+         AluInstruction *alu = nullptr;
+         for (int j = 0; j < m_so_info->output[i].num_components; j++) {
+            alu = new AluInstruction(op1_mov, tmp[i][j], so_gpr[i]->reg_i(j + sc), {alu_write});
+            m_proc.emit_instruction(alu);
+         }
+         if (alu)
+            alu->set_flag(alu_last_instr);
+
+         start_comp[i] = 0;
+         so_gpr[i] = &tmp[i];
+      }
+      sfn_log << SfnLog::instr <<  *so_gpr[i] << "\n";
+   }
+
+   /* Write outputs to buffers. */
+   for (unsigned i = 0; i < m_so_info->num_outputs; i++) {
+      sfn_log << SfnLog::instr << "Write output buffer " << i
+              << " with register index " << m_so_info->output[i].register_index << "\n";
+
+      StreamOutIntruction *out_stream =
+            new StreamOutIntruction(*so_gpr[i],
+                                    m_so_info->output[i].num_components,
+                                    m_so_info->output[i].dst_offset - start_comp[i],
+                                    ((1 << m_so_info->output[i].num_components) - 1) << start_comp[i],
+                                    m_so_info->output[i].output_buffer,
+                                    m_so_info->output[i].stream);
+      m_proc.emit_export_instruction(out_stream);
+      m_enabled_stream_buffers_mask |= (1 << m_so_info->output[i].output_buffer) << m_so_info->output[i].stream * 4;
+   }
+   return true;
+}
+
+
+VertexStageExportForGS::VertexStageExportForGS(VertexStage &proc,
+                                               const r600_shader *gs_shader):
+   VertexStageWithOutputInfo(proc),
+   m_num_clip_dist(0),
+   m_gs_shader(gs_shader)
+{
+
+}
+
+bool VertexStageExportForGS::do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr)
+{
+   int ring_offset = -1;
+   const r600_shader_io& out_io = m_proc.sh_info().output[store_info.driver_location];
+
+   sfn_log << SfnLog::io << "check output " << store_info.driver_location
+           << " name=" << out_io.name<< " sid=" << out_io.sid << "\n";
+   for (unsigned k = 0; k < m_gs_shader->ninput; ++k) {
+      auto& in_io = m_gs_shader->input[k];
+      sfn_log << SfnLog::io << "  against  " <<  k << " name=" << in_io.name<< " sid=" << in_io.sid << "\n";
+
+      if (in_io.name == out_io.name &&
+          in_io.sid == out_io.sid) {
+         ring_offset = in_io.ring_offset;
+         break;
+      }
+   }
+
+   if (store_info.location == VARYING_SLOT_VIEWPORT) {
+      m_proc.sh_info().vs_out_viewport = 1;
+      m_proc.sh_info().vs_out_misc_write = 1;
+      return true;
+   }
+
+   if (ring_offset == -1) {
+      sfn_log << SfnLog::err << "VS defines output at "
+              << store_info.driver_location << "name=" << out_io.name
+              << " sid=" << out_io.sid << " that is not consumed as GS input\n";
+      return true;
+   }
+
+   uint32_t write_mask =  (1 << instr->num_components) - 1;
+
+   GPRVector value = m_proc.vec_from_nir_with_fetch_constant(instr->src[store_info.data_loc], write_mask,
+         swizzle_from_comps(instr->num_components), true);
+
+   auto ir = new MemRingOutIntruction(cf_mem_ring, mem_write, value,
+                                      ring_offset >> 2, 4, PValue());
+   m_proc.emit_export_instruction(ir);
+
+   m_proc.sh_info().output[store_info.driver_location].write_mask |= write_mask;
+   if (store_info.location == VARYING_SLOT_CLIP_DIST0 ||
+       store_info.location == VARYING_SLOT_CLIP_DIST1)
+      m_num_clip_dist += 4;
+
+   return true;
+}
+
+void VertexStageExportForGS::finalize_exports()
+{
+
+}
+
+VertexStageExportForES::VertexStageExportForES(VertexStage& proc):
+   VertexStageExportBase(proc)
+{
+}
+
+bool VertexStageExportForES::do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr)
+{
+   return true;
+}
+
+void VertexStageExportForES::finalize_exports()
+{
+
+}
+
+}
diff --git a/lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h
new file mode 100644
index 000000000..46aee8071
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/r600/sfn/sfn_vertexstageexport.h
@@ -0,0 +1,116 @@
+#ifndef VERTEXSTAGEEXPORT_H
+#define VERTEXSTAGEEXPORT_H
+
+#include "sfn_shader_base.h"
+#include <queue>
+
+namespace r600 {
+
+class VertexStage : public ShaderFromNirProcessor {
+public:
+   using ShaderFromNirProcessor::ShaderFromNirProcessor;
+
+   virtual PValue primitive_id() = 0;
+};
+
+class VertexStageExportBase
+{
+public:
+   VertexStageExportBase(VertexStage& proc);
+   virtual ~VertexStageExportBase();
+   virtual void finalize_exports() = 0;
+   virtual bool do_process_outputs(nir_variable *output);
+
+   virtual void emit_shader_start();
+
+   virtual void scan_store_output(nir_intrinsic_instr* instr);
+   bool store_output(nir_intrinsic_instr* instr);
+protected:
+
+   struct store_loc {
+      unsigned frac;
+      unsigned location;
+      unsigned driver_location;
+      int data_loc;
+   };
+   virtual bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) = 0;
+
+   VertexStage& m_proc;
+   int m_cur_clip_pos;
+   GPRVector m_clip_vertex;
+};
+
+
+class VertexStageWithOutputInfo: public VertexStageExportBase
+{
+protected:
+   VertexStageWithOutputInfo(VertexStage& proc);
+   void scan_store_output(nir_intrinsic_instr* instr) override;
+   void emit_shader_start() override;
+   bool do_process_outputs(nir_variable *output) override;
+protected:
+   unsigned param_id(unsigned driver_location);
+   unsigned current_param() const;
+private:
+   std::priority_queue<unsigned, std::vector<unsigned>, std::greater<unsigned> > m_param_driver_locations;
+   std::map<unsigned, unsigned> m_param_map;
+   unsigned m_current_param;
+};
+
+
+class VertexStageExportForFS : public VertexStageWithOutputInfo
+{
+public:
+   VertexStageExportForFS(VertexStage& proc,
+                          const pipe_stream_output_info *so_info,
+                          r600_pipe_shader *pipe_shader,
+                          const r600_shader_key& key);
+
+   void finalize_exports() override;
+private:
+   bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) override;
+
+   bool emit_varying_param(const store_loc& store_info, nir_intrinsic_instr* instr);
+   bool emit_varying_pos(const store_loc& store_info, nir_intrinsic_instr* instr,
+                         std::array<uint32_t, 4> *swizzle_override = nullptr);
+   bool emit_clip_vertices(const store_loc &store_info, nir_intrinsic_instr* instr);
+   bool emit_stream(int stream);
+
+   ExportInstruction *m_last_param_export;
+   ExportInstruction *m_last_pos_export;
+
+   int m_num_clip_dist;
+   int m_enabled_stream_buffers_mask;
+   const pipe_stream_output_info *m_so_info;
+   r600_pipe_shader *m_pipe_shader;
+   const r600_shader_key& m_key;
+
+
+};
+
+class VertexStageExportForGS : public VertexStageWithOutputInfo
+{
+public:
+   VertexStageExportForGS(VertexStage& proc,
+                          const r600_shader *gs_shader);
+   void finalize_exports() override;
+
+private:
+   bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) override;
+   unsigned m_num_clip_dist;
+   const r600_shader *m_gs_shader;
+};
+
+class VertexStageExportForES : public VertexStageExportBase
+{
+public:
+   VertexStageExportForES(VertexStage& proc);
+   void finalize_exports() override;
+private:
+   bool do_store_output(const store_loc& store_info, nir_intrinsic_instr* instr) override;
+};
+
+
+}
+
+#endif // VERTEXSTAGEEXPORT_H
author	Jonathan Gray <jsg@cvs.openbsd.org>	2021-07-22 10:50:50 +0000
committer	Jonathan Gray <jsg@cvs.openbsd.org>	2021-07-22 10:50:50 +0000
commit	9130ec005fbc78a62420643414d8354d0929ca50 (patch)
tree	6762777acdd2d4eee17ef87290e80dc7afe2b73d /lib/mesa/src/gallium/drivers/r600
parent	ca11beabae33eb59fb981b8adf50b1d47a2a98f0 (diff)