summaryrefslogtreecommitdiff
path: root/lib/mesa/src
diff options
context:
space:
mode:
Diffstat (limited to 'lib/mesa/src')
-rw-r--r--lib/mesa/src/compiler/nir/nir_to_ssa.c24
-rw-r--r--lib/mesa/src/mesa/drivers/dri/i965/brw_compiler.c98
-rw-r--r--lib/mesa/src/mesa/drivers/dri/i965/brw_compiler.h174
-rw-r--r--lib/mesa/src/mesa/drivers/dri/i965/brw_eu_validate.c278
-rw-r--r--lib/mesa/src/mesa/drivers/dri/i965/brw_fs_validate.cpp6
-rw-r--r--lib/mesa/src/mesa/drivers/dri/i965/brw_nir_attribute_workarounds.c11
-rw-r--r--lib/mesa/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c61
-rw-r--r--lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_builder.h3
-rw-r--r--lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp18
-rw-r--r--lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp4
-rw-r--r--lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp311
-rw-r--r--lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tcs.h6
-rw-r--r--lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp16
-rw-r--r--lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tes.h3
-rw-r--r--lib/mesa/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp14
15 files changed, 565 insertions, 462 deletions
diff --git a/lib/mesa/src/compiler/nir/nir_to_ssa.c b/lib/mesa/src/compiler/nir/nir_to_ssa.c
index 6accdd24b..44a505477 100644
--- a/lib/mesa/src/compiler/nir/nir_to_ssa.c
+++ b/lib/mesa/src/compiler/nir/nir_to_ssa.c
@@ -27,6 +27,7 @@
#include "nir.h"
#include <stdlib.h>
+#include <unistd.h>
/*
* Implements the classic to-SSA algorithm described by Cytron et. al. in
@@ -88,7 +89,7 @@ insert_phi_nodes(nir_function_impl *impl)
w_start = w_end = 0;
iter_count++;
- nir_foreach_def(dest, reg) {
+ nir_foreach_def(reg, dest) {
nir_instr *def = dest->reg.parent_instr;
if (work[def->block->index] < iter_count)
W[w_end++] = def->block;
@@ -159,8 +160,7 @@ static nir_ssa_def *get_ssa_src(nir_register *reg, rewrite_state *state)
* to preserve the information that this source is undefined
*/
nir_ssa_undef_instr *instr =
- nir_ssa_undef_instr_create(state->mem_ctx, reg->num_components,
- reg->bit_size);
+ nir_ssa_undef_instr_create(state->mem_ctx, reg->num_components);
/*
* We could just insert the undefined instruction before the instruction
@@ -219,9 +219,7 @@ rewrite_def_forwards(nir_dest *dest, void *_state)
state->states[index].num_defs);
list_del(&dest->reg.def_link);
- nir_ssa_dest_init(state->parent_instr, dest, reg->num_components,
- reg->bit_size, name);
- ralloc_free(name);
+ nir_ssa_dest_init(state->parent_instr, dest, reg->num_components, name);
/* push our SSA destination on the stack */
state->states[index].index++;
@@ -273,9 +271,7 @@ rewrite_alu_instr_forward(nir_alu_instr *instr, rewrite_state *state)
instr->dest.write_mask = (1 << num_components) - 1;
list_del(&instr->dest.dest.reg.def_link);
- nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
- reg->bit_size, name);
- ralloc_free(name);
+ nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, name);
if (nir_op_infos[instr->op].output_size == 0) {
/*
@@ -381,7 +377,7 @@ rewrite_instr_forward(nir_instr *instr, rewrite_state *state)
static void
rewrite_phi_sources(nir_block *block, nir_block *pred, rewrite_state *state)
{
- nir_foreach_instr(instr, block) {
+ nir_foreach_instr(block, instr) {
if (instr->type != nir_instr_type_phi)
break;
@@ -389,7 +385,7 @@ rewrite_phi_sources(nir_block *block, nir_block *pred, rewrite_state *state)
state->parent_instr = instr;
- nir_foreach_phi_src(src, phi_instr) {
+ nir_foreach_phi_src(phi_instr, src) {
if (src->pred == pred) {
rewrite_use(&src->src, state);
break;
@@ -434,7 +430,7 @@ rewrite_block(nir_block *block, rewrite_state *state)
* what we want because those instructions (vector gather, conditional
* select) will already be in SSA form.
*/
- nir_foreach_instr_safe(instr, block) {
+ nir_foreach_instr_safe(block, instr) {
rewrite_instr_forward(instr, state);
}
@@ -455,7 +451,7 @@ rewrite_block(nir_block *block, rewrite_state *state)
for (unsigned i = 0; i < block->num_dom_children; i++)
rewrite_block(block->dom_children[i], state);
- nir_foreach_instr_reverse(instr, block) {
+ nir_foreach_instr_reverse(block, instr) {
rewrite_instr_backwards(instr, state);
}
}
@@ -533,7 +529,7 @@ nir_convert_to_ssa_impl(nir_function_impl *impl)
void
nir_convert_to_ssa(nir_shader *shader)
{
- nir_foreach_function(function, shader) {
+ nir_foreach_function(shader, function) {
if (function->impl)
nir_convert_to_ssa_impl(function->impl);
}
diff --git a/lib/mesa/src/mesa/drivers/dri/i965/brw_compiler.c b/lib/mesa/src/mesa/drivers/dri/i965/brw_compiler.c
index 18145beb2..148920756 100644
--- a/lib/mesa/src/mesa/drivers/dri/i965/brw_compiler.c
+++ b/lib/mesa/src/mesa/drivers/dri/i965/brw_compiler.c
@@ -27,21 +27,62 @@
#include "main/errors.h"
#include "util/debug.h"
+static void
+shader_debug_log_mesa(void *data, const char *fmt, ...)
+{
+ struct brw_context *brw = (struct brw_context *)data;
+ va_list args;
+
+ va_start(args, fmt);
+ GLuint msg_id = 0;
+ _mesa_gl_vdebug(&brw->ctx, &msg_id,
+ MESA_DEBUG_SOURCE_SHADER_COMPILER,
+ MESA_DEBUG_TYPE_OTHER,
+ MESA_DEBUG_SEVERITY_NOTIFICATION, fmt, args);
+ va_end(args);
+}
+
+static void
+shader_perf_log_mesa(void *data, const char *fmt, ...)
+{
+ struct brw_context *brw = (struct brw_context *)data;
+
+ va_list args;
+ va_start(args, fmt);
+
+ if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+ va_list args_copy;
+ va_copy(args_copy, args);
+ vfprintf(stderr, fmt, args_copy);
+ va_end(args_copy);
+ }
+
+ if (brw->perf_debug) {
+ GLuint msg_id = 0;
+ _mesa_gl_vdebug(&brw->ctx, &msg_id,
+ MESA_DEBUG_SOURCE_SHADER_COMPILER,
+ MESA_DEBUG_TYPE_PERFORMANCE,
+ MESA_DEBUG_SEVERITY_MEDIUM, fmt, args);
+ }
+ va_end(args);
+}
+
#define COMMON_OPTIONS \
+ /* In order to help allow for better CSE at the NIR level we tell NIR to \
+ * split all ffma instructions during opt_algebraic and we then re-combine \
+ * them as a later step. \
+ */ \
+ .lower_ffma = true, \
.lower_sub = true, \
.lower_fdiv = true, \
.lower_scmp = true, \
- .lower_fmod32 = true, \
- .lower_fmod64 = false, \
+ .lower_fmod = true, \
.lower_bitfield_extract = true, \
.lower_bitfield_insert = true, \
.lower_uadd_carry = true, \
.lower_usub_borrow = true, \
.lower_fdiv = true, \
- .lower_flrp64 = true, \
- .native_integers = true, \
- .use_interpolated_input_intrinsics = true, \
- .vertex_id_zero_based = true
+ .native_integers = true
static const struct nir_shader_compiler_options scalar_nir_options = {
COMMON_OPTIONS,
@@ -66,26 +107,6 @@ static const struct nir_shader_compiler_options vector_nir_options = {
*/
.fdot_replicates = true,
- /* Prior to Gen6, there are no three source operations for SIMD4x2. */
- .lower_flrp32 = true,
-
- .lower_pack_snorm_2x16 = true,
- .lower_pack_unorm_2x16 = true,
- .lower_unpack_snorm_2x16 = true,
- .lower_unpack_unorm_2x16 = true,
- .lower_extract_byte = true,
- .lower_extract_word = true,
-};
-
-static const struct nir_shader_compiler_options vector_nir_options_gen6 = {
- COMMON_OPTIONS,
-
- /* In the vec4 backend, our dpN instruction replicates its result to all the
- * components of a vec4. We would like NIR to give us replicated fdot
- * instructions because it can optimize better for us.
- */
- .fdot_replicates = true,
-
.lower_pack_snorm_2x16 = true,
.lower_pack_unorm_2x16 = true,
.lower_unpack_snorm_2x16 = true,
@@ -95,25 +116,24 @@ static const struct nir_shader_compiler_options vector_nir_options_gen6 = {
};
struct brw_compiler *
-brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
+brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
{
struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler);
compiler->devinfo = devinfo;
+ compiler->shader_debug_log = shader_debug_log_mesa;
+ compiler->shader_perf_log = shader_perf_log_mesa;
brw_fs_alloc_reg_sets(compiler);
brw_vec4_alloc_reg_set(compiler);
- compiler->precise_trig = env_var_as_boolean("INTEL_PRECISE_TRIG", false);
-
compiler->scalar_stage[MESA_SHADER_VERTEX] =
devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS);
- compiler->scalar_stage[MESA_SHADER_TESS_CTRL] =
- devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TCS", true);
+ compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false;
compiler->scalar_stage[MESA_SHADER_TESS_EVAL] =
devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TES", true);
compiler->scalar_stage[MESA_SHADER_GEOMETRY] =
- devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", true);
+ devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false);
compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true;
compiler->scalar_stage[MESA_SHADER_COMPUTE] = true;
@@ -123,10 +143,12 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
compiler->glsl_compiler_options[i].MaxIfDepth =
devinfo->gen < 6 ? 16 : UINT_MAX;
+ compiler->glsl_compiler_options[i].EmitCondCodes = true;
+ compiler->glsl_compiler_options[i].EmitNoNoise = true;
compiler->glsl_compiler_options[i].EmitNoMainReturn = true;
compiler->glsl_compiler_options[i].EmitNoIndirectInput = true;
compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
- compiler->glsl_compiler_options[i].LowerCombinedClipCullDistance = true;
+ compiler->glsl_compiler_options[i].LowerClipDistance = true;
bool is_scalar = compiler->scalar_stage[i];
@@ -138,20 +160,14 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
if (devinfo->gen < 7)
compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
- if (is_scalar) {
- compiler->glsl_compiler_options[i].NirOptions = &scalar_nir_options;
- } else {
- compiler->glsl_compiler_options[i].NirOptions =
- devinfo->gen < 6 ? &vector_nir_options : &vector_nir_options_gen6;
- }
+ compiler->glsl_compiler_options[i].NirOptions =
+ is_scalar ? &scalar_nir_options : &vector_nir_options;
compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true;
- compiler->glsl_compiler_options[i].ClampBlockIndicesToArrayBounds = true;
}
compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectInput = false;
compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = false;
- compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectOutput = false;
if (compiler->scalar_stage[MESA_SHADER_GEOMETRY])
compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false;
diff --git a/lib/mesa/src/mesa/drivers/dri/i965/brw_compiler.h b/lib/mesa/src/mesa/drivers/dri/i965/brw_compiler.h
index 447d05b81..27a95a3c6 100644
--- a/lib/mesa/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/lib/mesa/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -24,9 +24,8 @@
#pragma once
#include <stdio.h>
-#include "common/gen_device_info.h"
+#include "brw_device_info.h"
#include "main/mtypes.h"
-#include "main/macros.h"
#ifdef __cplusplus
extern "C" {
@@ -38,7 +37,7 @@ struct brw_geometry_program;
union gl_constant_value;
struct brw_compiler {
- const struct gen_device_info *devinfo;
+ const struct brw_device_info *devinfo;
struct {
struct ra_regs *regs;
@@ -86,19 +85,13 @@ struct brw_compiler {
* appear in *classes.
*/
int aligned_pairs_class;
- } fs_reg_sets[3];
+ } fs_reg_sets[2];
void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
bool scalar_stage[MESA_SHADER_STAGES];
struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
-
- /**
- * Apply workarounds for SIN and COS output range problems.
- * This can negatively impact performance.
- */
- bool precise_trig;
};
@@ -160,13 +153,6 @@ struct brw_sampler_prog_key_data {
* For Sandybridge, which shader w/a we need for gather quirks.
*/
enum gen6_gather_sampler_wa gen6_gather_wa[MAX_SAMPLERS];
-
- /**
- * Texture units that have a YUV image bound.
- */
- uint32_t y_u_v_image_mask;
- uint32_t y_uv_image_mask;
- uint32_t yx_xuxv_image_mask;
};
@@ -220,8 +206,6 @@ struct brw_tcs_prog_key
/** A bitfield of per-vertex outputs written. */
uint64_t outputs_written;
- bool quads_workaround;
-
struct brw_sampler_prog_key_data tex;
};
@@ -252,15 +236,17 @@ struct brw_wm_prog_key {
uint8_t iz_lookup;
bool stats_wm:1;
bool flat_shade:1;
+ bool persample_shading:1;
+ bool persample_2x:1;
unsigned nr_color_regions:5;
bool replicate_alpha:1;
+ bool render_to_fbo:1;
bool clamp_fragment_color:1;
- bool persample_interp:1;
- bool multisample_fbo:1;
+ bool compute_pos_offset:1;
+ bool compute_sample_id:1;
unsigned line_aa:2;
bool high_quality_derivatives:1;
bool force_dual_color_blend:1;
- bool coherent_fb_fetch:1;
uint16_t drawable_height;
uint64_t input_slots_valid;
@@ -338,7 +324,6 @@ struct brw_stage_prog_data {
uint32_t abo_start;
uint32_t image_start;
uint32_t shader_time_start;
- uint32_t plane_start[3];
/** @} */
} binding_table;
@@ -378,18 +363,15 @@ struct brw_wm_prog_data {
GLuint num_varying_inputs;
- uint8_t reg_blocks_0;
- uint8_t reg_blocks_2;
-
- uint8_t dispatch_grf_start_reg_2;
- uint32_t prog_offset_2;
+ GLuint dispatch_grf_start_reg_16;
+ GLuint reg_blocks;
+ GLuint reg_blocks_16;
struct {
/** @{
* surface indices the WM-specific surfaces
*/
uint32_t render_target_start;
- uint32_t render_target_read_start;
/** @} */
} binding_table;
@@ -397,18 +379,16 @@ struct brw_wm_prog_data {
bool computed_stencil;
bool early_fragment_tests;
- bool dispatch_8;
- bool dispatch_16;
+ bool no_8;
bool dual_src_blend;
- bool persample_dispatch;
bool uses_pos_offset;
bool uses_omask;
bool uses_kill;
bool uses_src_depth;
bool uses_src_w;
bool uses_sample_mask;
- bool has_side_effects;
bool pulls_bary;
+ uint32_t prog_offset_16;
/**
* Mask of which interpolation modes are required by the fragment shader.
@@ -417,12 +397,6 @@ struct brw_wm_prog_data {
uint32_t barycentric_interp_modes;
/**
- * Mask of which FS inputs are marked flat by the shader source. This is
- * needed for setting up 3DSTATE_SF/SBE.
- */
- uint32_t flat_inputs;
-
- /**
* Map from gl_varying_slot to the position within the FS setup data
* payload where the varying's attribute vertex deltas should be delivered.
* For varying slots that are not used by the FS, the value is -1.
@@ -430,28 +404,15 @@ struct brw_wm_prog_data {
int urb_setup[VARYING_SLOT_MAX];
};
-struct brw_push_const_block {
- unsigned dwords; /* Dword count, not reg aligned */
- unsigned regs;
- unsigned size; /* Bytes, register aligned */
-};
-
struct brw_cs_prog_data {
struct brw_stage_prog_data base;
GLuint dispatch_grf_start_reg_16;
unsigned local_size[3];
unsigned simd_size;
- unsigned threads;
bool uses_barrier;
bool uses_num_work_groups;
- int thread_local_id_index;
-
- struct {
- struct brw_push_const_block cross_thread;
- struct brw_push_const_block per_thread;
- struct brw_push_const_block total;
- } push;
+ unsigned local_invocation_id_regs;
struct {
/** @{
@@ -566,7 +527,7 @@ GLuint brw_varying_to_offset(const struct brw_vue_map *vue_map, GLuint varying)
return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]);
}
-void brw_compute_vue_map(const struct gen_device_info *devinfo,
+void brw_compute_vue_map(const struct brw_device_info *devinfo,
struct brw_vue_map *vue_map,
GLbitfield64 slots_valid,
bool separate_shader);
@@ -620,8 +581,6 @@ struct brw_vue_prog_data {
GLuint urb_read_length;
GLuint total_grf;
- uint32_t cull_distance_mask;
-
/* Used for calculating urb partitions. In the VS, this is the size of the
* URB entry used for both input and output to the thread. In the GS, this
* is the size of the URB entry used for output.
@@ -637,7 +596,6 @@ struct brw_vs_prog_data {
GLbitfield64 inputs_read;
unsigned nr_attributes;
- unsigned nr_attribute_slots;
bool uses_vertexid;
bool uses_instanceid;
@@ -731,28 +689,11 @@ struct brw_gs_prog_data
unsigned char transform_feedback_swizzles[64 /* BRW_MAX_SOL_BINDINGS */];
};
-#define DEFINE_PROG_DATA_DOWNCAST(stage) \
-static inline struct brw_##stage##_prog_data * \
-brw_##stage##_prog_data(struct brw_stage_prog_data *prog_data) \
-{ \
- return (struct brw_##stage##_prog_data *) prog_data; \
-}
-DEFINE_PROG_DATA_DOWNCAST(vue)
-DEFINE_PROG_DATA_DOWNCAST(vs)
-DEFINE_PROG_DATA_DOWNCAST(tcs)
-DEFINE_PROG_DATA_DOWNCAST(tes)
-DEFINE_PROG_DATA_DOWNCAST(gs)
-DEFINE_PROG_DATA_DOWNCAST(wm)
-DEFINE_PROG_DATA_DOWNCAST(cs)
-DEFINE_PROG_DATA_DOWNCAST(ff_gs)
-DEFINE_PROG_DATA_DOWNCAST(clip)
-DEFINE_PROG_DATA_DOWNCAST(sf)
-#undef DEFINE_PROG_DATA_DOWNCAST
/** @} */
struct brw_compiler *
-brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo);
+brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo);
/**
* Compile a vertex shader.
@@ -833,7 +774,6 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
struct gl_program *prog,
int shader_time_index8,
int shader_time_index16,
- bool allow_spilling,
bool use_rep_send,
unsigned *final_assembly_size,
char **error_str);
@@ -853,86 +793,12 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
unsigned *final_assembly_size,
char **error_str);
-static inline uint32_t
-encode_slm_size(unsigned gen, uint32_t bytes)
-{
- uint32_t slm_size = 0;
-
- /* Shared Local Memory is specified as powers of two, and encoded in
- * INTERFACE_DESCRIPTOR_DATA with the following representations:
- *
- * Size | 0 kB | 1 kB | 2 kB | 4 kB | 8 kB | 16 kB | 32 kB | 64 kB |
- * -------------------------------------------------------------------
- * Gen7-8 | 0 | none | none | 1 | 2 | 4 | 8 | 16 |
- * -------------------------------------------------------------------
- * Gen9+ | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
- */
- assert(bytes <= 64 * 1024);
-
- if (bytes > 0) {
- /* Shared Local Memory Size is specified as powers of two. */
- slm_size = util_next_power_of_two(bytes);
-
- if (gen >= 9) {
- /* Use a minimum of 1kB; turn an exponent of 10 (1024 kB) into 1. */
- slm_size = ffs(MAX2(slm_size, 1024)) - 10;
- } else {
- /* Use a minimum of 4kB; convert to the pre-Gen9 representation. */
- slm_size = MAX2(slm_size, 4096) / 4096;
- }
- }
-
- return slm_size;
-}
-
/**
- * Return true if the given shader stage is dispatched contiguously by the
- * relevant fixed function starting from channel 0 of the SIMD thread, which
- * implies that the dispatch mask of a thread can be assumed to have the form
- * '2^n - 1' for some n.
+ * Fill out local id payload for compute shader according to cs_prog_data.
*/
-static inline bool
-brw_stage_has_packed_dispatch(const struct gen_device_info *devinfo,
- gl_shader_stage stage,
- const struct brw_stage_prog_data *prog_data)
-{
- /* The code below makes assumptions about the hardware's thread dispatch
- * behavior that could be proven wrong in future generations -- Make sure
- * to do a full test run with brw_fs_test_dispatch_packing() hooked up to
- * the NIR front-end before changing this assertion.
- */
- assert(devinfo->gen <= 9);
-
- switch (stage) {
- case MESA_SHADER_FRAGMENT: {
- /* The PSD discards subspans coming in with no lit samples, which in the
- * per-pixel shading case implies that each subspan will either be fully
- * lit (due to the VMask being used to allow derivative computations),
- * or not dispatched at all. In per-sample dispatch mode individual
- * samples from the same subspan have a fixed relative location within
- * the SIMD thread, so dispatch of unlit samples cannot be avoided in
- * general and we should return false.
- */
- const struct brw_wm_prog_data *wm_prog_data =
- (const struct brw_wm_prog_data *)prog_data;
- return !wm_prog_data->persample_dispatch;
- }
- case MESA_SHADER_COMPUTE:
- /* Compute shaders will be spawned with either a fully enabled dispatch
- * mask or with whatever bottom/right execution mask was given to the
- * GPGPU walker command to be used along the workgroup edges -- In both
- * cases the dispatch mask is required to be tightly packed for our
- * invocation index calculations to work.
- */
- return true;
- default:
- /* Most remaining fixed functions are limited to use a packed dispatch
- * mask due to the hardware representation of the dispatch mask as a
- * single counter representing the number of enabled channels.
- */
- return true;
- }
-}
+void
+brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
+ void *buffer, uint32_t threads, uint32_t stride);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/lib/mesa/src/mesa/drivers/dri/i965/brw_eu_validate.c b/lib/mesa/src/mesa/drivers/dri/i965/brw_eu_validate.c
index 0e736ed01..2de2ea1ba 100644
--- a/lib/mesa/src/mesa/drivers/dri/i965/brw_eu_validate.c
+++ b/lib/mesa/src/mesa/drivers/dri/i965/brw_eu_validate.c
@@ -55,31 +55,251 @@ cat(struct string *dest, const struct string src)
} while(0)
static bool
-src0_is_null(const struct gen_device_info *devinfo, const brw_inst *inst)
+src0_is_null(const struct brw_device_info *devinfo, const brw_inst *inst)
{
return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
}
static bool
-src1_is_null(const struct gen_device_info *devinfo, const brw_inst *inst)
+src1_is_null(const struct brw_device_info *devinfo, const brw_inst *inst)
{
return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
}
-static bool
-src0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst)
-{
- return brw_inst_src0_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE;
-}
+enum gen {
+ GEN4 = (1 << 0),
+ GEN45 = (1 << 1),
+ GEN5 = (1 << 2),
+ GEN6 = (1 << 3),
+ GEN7 = (1 << 4),
+ GEN75 = (1 << 5),
+ GEN8 = (1 << 6),
+ GEN9 = (1 << 7),
+ GEN_ALL = ~0
+};
+
+#define GEN_GE(gen) (~((gen) - 1) | gen)
+#define GEN_LE(gen) (((gen) - 1) | gen)
+
+struct inst_info {
+ enum gen gen;
+};
+
+static const struct inst_info inst_info[128] = {
+ [BRW_OPCODE_ILLEGAL] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_MOV] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_SEL] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_MOVI] = {
+ .gen = GEN_GE(GEN45),
+ },
+ [BRW_OPCODE_NOT] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_AND] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_OR] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_XOR] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_SHR] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_SHL] = {
+ .gen = GEN_ALL,
+ },
+ /* BRW_OPCODE_DIM / BRW_OPCODE_SMOV */
+ /* Reserved - 11 */
+ [BRW_OPCODE_ASR] = {
+ .gen = GEN_ALL,
+ },
+ /* Reserved - 13-15 */
+ [BRW_OPCODE_CMP] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_CMPN] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_CSEL] = {
+ .gen = GEN_GE(GEN8),
+ },
+ [BRW_OPCODE_F32TO16] = {
+ .gen = GEN7 | GEN75,
+ },
+ [BRW_OPCODE_F16TO32] = {
+ .gen = GEN7 | GEN75,
+ },
+ /* Reserved - 21-22 */
+ [BRW_OPCODE_BFREV] = {
+ .gen = GEN_GE(GEN7),
+ },
+ [BRW_OPCODE_BFE] = {
+ .gen = GEN_GE(GEN7),
+ },
+ [BRW_OPCODE_BFI1] = {
+ .gen = GEN_GE(GEN7),
+ },
+ [BRW_OPCODE_BFI2] = {
+ .gen = GEN_GE(GEN7),
+ },
+ /* Reserved - 27-31 */
+ [BRW_OPCODE_JMPI] = {
+ .gen = GEN_ALL,
+ },
+ /* BRW_OPCODE_BRD */
+ [BRW_OPCODE_IF] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_IFF] = { /* also BRW_OPCODE_BRC */
+ .gen = GEN_LE(GEN5),
+ },
+ [BRW_OPCODE_ELSE] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_ENDIF] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_DO] = { /* also BRW_OPCODE_CASE */
+ .gen = GEN_LE(GEN5),
+ },
+ [BRW_OPCODE_WHILE] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_BREAK] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_CONTINUE] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_HALT] = {
+ .gen = GEN_ALL,
+ },
+ /* BRW_OPCODE_CALLA */
+ /* BRW_OPCODE_MSAVE / BRW_OPCODE_CALL */
+ /* BRW_OPCODE_MREST / BRW_OPCODE_RET */
+ /* BRW_OPCODE_PUSH / BRW_OPCODE_FORK / BRW_OPCODE_GOTO */
+ /* BRW_OPCODE_POP */
+ [BRW_OPCODE_WAIT] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_SEND] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_SENDC] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_SENDS] = {
+ .gen = GEN_GE(GEN9),
+ },
+ [BRW_OPCODE_SENDSC] = {
+ .gen = GEN_GE(GEN9),
+ },
+ /* Reserved 53-55 */
+ [BRW_OPCODE_MATH] = {
+ .gen = GEN_GE(GEN6),
+ },
+ /* Reserved 57-63 */
+ [BRW_OPCODE_ADD] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_MUL] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_AVG] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_FRC] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_RNDU] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_RNDD] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_RNDE] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_RNDZ] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_MAC] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_MACH] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_LZD] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_FBH] = {
+ .gen = GEN_GE(GEN7),
+ },
+ [BRW_OPCODE_FBL] = {
+ .gen = GEN_GE(GEN7),
+ },
+ [BRW_OPCODE_CBIT] = {
+ .gen = GEN_GE(GEN7),
+ },
+ [BRW_OPCODE_ADDC] = {
+ .gen = GEN_GE(GEN7),
+ },
+ [BRW_OPCODE_SUBB] = {
+ .gen = GEN_GE(GEN7),
+ },
+ [BRW_OPCODE_SAD2] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_SADA2] = {
+ .gen = GEN_ALL,
+ },
+ /* Reserved 82-83 */
+ [BRW_OPCODE_DP4] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_DPH] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_DP3] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_DP2] = {
+ .gen = GEN_ALL,
+ },
+ /* Reserved 88 */
+ [BRW_OPCODE_LINE] = {
+ .gen = GEN_ALL,
+ },
+ [BRW_OPCODE_PLN] = {
+ .gen = GEN_GE(GEN45),
+ },
+ [BRW_OPCODE_MAD] = {
+ .gen = GEN_GE(GEN6),
+ },
+ [BRW_OPCODE_LRP] = {
+ .gen = GEN_GE(GEN6),
+ },
+ /* Reserved 93-124 */
+ /* BRW_OPCODE_NENOP */
+ [BRW_OPCODE_NOP] = {
+ .gen = GEN_ALL,
+ },
+};
static unsigned
-num_sources_from_inst(const struct gen_device_info *devinfo,
+num_sources_from_inst(const struct brw_device_info *devinfo,
const brw_inst *inst)
{
- const struct opcode_desc *desc =
- brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst));
unsigned math_function;
if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) {
@@ -94,10 +314,8 @@ num_sources_from_inst(const struct gen_device_info *devinfo,
*/
return 0;
}
- } else if (desc) {
- return desc->nsrc;
} else {
- return 0;
+ return opcode_descs[brw_inst_opcode(devinfo, inst)].nsrc;
}
switch (math_function) {
@@ -123,18 +341,34 @@ num_sources_from_inst(const struct gen_device_info *devinfo,
}
}
+static enum gen
+gen_from_devinfo(const struct brw_device_info *devinfo)
+{
+ switch (devinfo->gen) {
+ case 4: return devinfo->is_g4x ? GEN45 : GEN4;
+ case 5: return GEN5;
+ case 6: return GEN6;
+ case 7: return devinfo->is_haswell ? GEN75 : GEN7;
+ case 8: return GEN8;
+ case 9: return GEN9;
+ default:
+ unreachable("not reached");
+ }
+}
+
static bool
-is_unsupported_inst(const struct gen_device_info *devinfo,
+is_unsupported_inst(const struct brw_device_info *devinfo,
const brw_inst *inst)
{
- return brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)) == NULL;
+ enum gen gen = gen_from_devinfo(devinfo);
+ return (inst_info[brw_inst_opcode(devinfo, inst)].gen & gen) == 0;
}
bool
brw_validate_instructions(const struct brw_codegen *p, int start_offset,
struct annotation_info *annotation)
{
- const struct gen_device_info *devinfo = p->devinfo;
+ const struct brw_device_info *devinfo = p->devinfo;
const void *store = p->store + start_offset / 16;
bool valid = true;
@@ -163,18 +397,6 @@ brw_validate_instructions(const struct brw_codegen *p, int start_offset,
ERROR_IF(is_unsupported_inst(devinfo, inst),
"Instruction not supported on this Gen");
- if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) {
- ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) !=
- BRW_ADDRESS_DIRECT, "send must use direct addressing");
-
- if (devinfo->gen >= 7) {
- ERROR_IF(!src0_is_grf(devinfo, inst), "send from non-GRF");
- ERROR_IF(brw_inst_eot(devinfo, inst) &&
- brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
- "send with EOT must use g112-g127");
- }
- }
-
if (error_msg.str && annotation) {
annotation_insert_error(annotation, src_offset, error_msg.str);
}
diff --git a/lib/mesa/src/mesa/drivers/dri/i965/brw_fs_validate.cpp b/lib/mesa/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
index 676942c19..90edd023b 100644
--- a/lib/mesa/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
+++ b/lib/mesa/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
@@ -43,14 +43,14 @@ fs_visitor::validate()
{
foreach_block_and_inst (block, fs_inst, inst, cfg) {
if (inst->dst.file == VGRF) {
- fsv_assert(inst->dst.offset / REG_SIZE + regs_written(inst) <=
+ fsv_assert(inst->dst.reg_offset + inst->regs_written <=
alloc.sizes[inst->dst.nr]);
}
for (unsigned i = 0; i < inst->sources; i++) {
if (inst->src[i].file == VGRF) {
- fsv_assert(inst->src[i].offset / REG_SIZE + regs_read(inst, i) <=
- alloc.sizes[inst->src[i].nr]);
+ fsv_assert(inst->src[i].reg_offset + inst->regs_read(i) <=
+ (int)alloc.sizes[inst->src[i].nr]);
}
}
}
diff --git a/lib/mesa/src/mesa/drivers/dri/i965/brw_nir_attribute_workarounds.c b/lib/mesa/src/mesa/drivers/dri/i965/brw_nir_attribute_workarounds.c
index 0bb766d70..9c65e540d 100644
--- a/lib/mesa/src/mesa/drivers/dri/i965/brw_nir_attribute_workarounds.c
+++ b/lib/mesa/src/mesa/drivers/dri/i965/brw_nir_attribute_workarounds.c
@@ -39,11 +39,12 @@ struct attr_wa_state {
};
static bool
-apply_attr_wa_block(nir_block *block, struct attr_wa_state *state)
+apply_attr_wa_block(nir_block *block, void *void_state)
{
+ struct attr_wa_state *state = void_state;
nir_builder *b = &state->builder;
- nir_foreach_instr_safe(instr, block) {
+ nir_foreach_instr_safe(block, instr) {
if (instr->type != nir_instr_type_intrinsic)
continue;
@@ -155,16 +156,14 @@ brw_nir_apply_attribute_workarounds(nir_shader *shader,
.wa_flags = attrib_wa_flags,
};
- nir_foreach_function(func, shader) {
+ nir_foreach_function(shader, func) {
if (!func->impl)
continue;
nir_builder_init(&state.builder, func->impl);
state.impl_progress = false;
- nir_foreach_block(block, func->impl) {
- apply_attr_wa_block(block, &state);
- }
+ nir_foreach_block(func->impl, apply_attr_wa_block, &state);
if (state.impl_progress) {
nir_metadata_preserve(func->impl, nir_metadata_block_index |
diff --git a/lib/mesa/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c b/lib/mesa/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
index 14a9a0fac..5ff2cba04 100644
--- a/lib/mesa/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
+++ b/lib/mesa/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
@@ -44,7 +44,7 @@ are_all_uses_fadd(nir_ssa_def *def)
if (!list_empty(&def->if_uses))
return false;
- nir_foreach_use(use_src, def) {
+ nir_foreach_use(def, use_src) {
nir_instr *use_instr = use_src->parent_instr;
if (use_instr->type != nir_instr_type_alu)
@@ -84,17 +84,6 @@ get_mul_for_src(nir_alu_src *src, int num_components,
return NULL;
nir_alu_instr *alu = nir_instr_as_alu(instr);
-
- /* We want to bail if any of the other ALU operations involved is labled
- * exact. One reason for this is that, while the value that is changing is
- * actually the result of the add and not the multiply, the intention of
- * the user when they specify an exact multiply is that they want *that*
- * value and what they don't care about is the add. Another reason is that
- * SPIR-V explicitly requires this behaviour.
- */
- if (alu->exact)
- return NULL;
-
switch (alu->op) {
case nir_op_imov:
case nir_op_fmov:
@@ -113,7 +102,7 @@ get_mul_for_src(nir_alu_src *src, int num_components,
break;
case nir_op_fmul:
- /* Only absorb a fmul into a ffma if the fmul is only used in fadd
+ /* Only absorb a fmul into a ffma if the fmul is is only used in fadd
* operations. This prevents us from being too aggressive with our
* fusing which can actually lead to more instructions.
*/
@@ -167,11 +156,11 @@ any_alu_src_is_a_constant(nir_alu_src srcs[])
}
static bool
-brw_nir_opt_peephole_ffma_block(nir_block *block, void *mem_ctx)
+brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
{
- bool progress = false;
+ struct peephole_ffma_state *state = void_state;
- nir_foreach_instr_safe(instr, block) {
+ nir_foreach_instr_safe(block, instr) {
if (instr->type != nir_instr_type_alu)
continue;
@@ -179,9 +168,7 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *mem_ctx)
if (add->op != nir_op_fadd)
continue;
- assert(add->dest.dest.is_ssa);
- if (add->exact)
- continue;
+ /* TODO: Maybe bail if this expression is considered "precise"? */
assert(add->src[0].src.is_ssa && add->src[1].src.is_ssa);
@@ -214,8 +201,6 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *mem_ctx)
if (mul == NULL)
continue;
- unsigned bit_size = add->dest.dest.ssa.bit_size;
-
nir_ssa_def *mul_src[2];
mul_src[0] = mul->src[0].src.ssa;
mul_src[1] = mul->src[1].src.ssa;
@@ -231,10 +216,11 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *mem_ctx)
if (abs) {
for (unsigned i = 0; i < 2; i++) {
- nir_alu_instr *abs = nir_alu_instr_create(mem_ctx, nir_op_fabs);
+ nir_alu_instr *abs = nir_alu_instr_create(state->mem_ctx,
+ nir_op_fabs);
abs->src[0].src = nir_src_for_ssa(mul_src[i]);
nir_ssa_dest_init(&abs->instr, &abs->dest.dest,
- mul_src[i]->num_components, bit_size, NULL);
+ mul_src[i]->num_components, NULL);
abs->dest.write_mask = (1 << mul_src[i]->num_components) - 1;
nir_instr_insert_before(&add->instr, &abs->instr);
mul_src[i] = &abs->dest.dest.ssa;
@@ -242,16 +228,17 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *mem_ctx)
}
if (negate) {
- nir_alu_instr *neg = nir_alu_instr_create(mem_ctx, nir_op_fneg);
+ nir_alu_instr *neg = nir_alu_instr_create(state->mem_ctx,
+ nir_op_fneg);
neg->src[0].src = nir_src_for_ssa(mul_src[0]);
nir_ssa_dest_init(&neg->instr, &neg->dest.dest,
- mul_src[0]->num_components, bit_size, NULL);
+ mul_src[0]->num_components, NULL);
neg->dest.write_mask = (1 << mul_src[0]->num_components) - 1;
nir_instr_insert_before(&add->instr, &neg->instr);
mul_src[0] = &neg->dest.dest.ssa;
}
- nir_alu_instr *ffma = nir_alu_instr_create(mem_ctx, nir_op_ffma);
+ nir_alu_instr *ffma = nir_alu_instr_create(state->mem_ctx, nir_op_ffma);
ffma->dest.saturate = add->dest.saturate;
ffma->dest.write_mask = add->dest.write_mask;
@@ -266,7 +253,6 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *mem_ctx)
nir_ssa_dest_init(&ffma->instr, &ffma->dest.dest,
add->dest.dest.ssa.num_components,
- bit_size,
add->dest.dest.ssa.name);
nir_ssa_def_rewrite_uses(&add->dest.dest.ssa,
nir_src_for_ssa(&ffma->dest.dest.ssa));
@@ -275,27 +261,28 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *mem_ctx)
assert(list_empty(&add->dest.dest.ssa.uses));
nir_instr_remove(&add->instr);
- progress = true;
+ state->progress = true;
}
- return progress;
+ return true;
}
static bool
brw_nir_opt_peephole_ffma_impl(nir_function_impl *impl)
{
- bool progress = false;
- void *mem_ctx = ralloc_parent(impl);
+ struct peephole_ffma_state state;
- nir_foreach_block(block, impl) {
- progress |= brw_nir_opt_peephole_ffma_block(block, mem_ctx);
- }
+ state.mem_ctx = ralloc_parent(impl);
+ state.impl = impl;
+ state.progress = false;
+
+ nir_foreach_block(impl, brw_nir_opt_peephole_ffma_block, &state);
- if (progress)
+ if (state.progress)
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_dominance);
- return progress;
+ return state.progress;
}
bool
@@ -303,7 +290,7 @@ brw_nir_opt_peephole_ffma(nir_shader *shader)
{
bool progress = false;
- nir_foreach_function(function, shader) {
+ nir_foreach_function(shader, function) {
if (function->impl)
progress |= brw_nir_opt_peephole_ffma_impl(function->impl);
}
diff --git a/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_builder.h b/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_builder.h
index dab6e0377..3a8617e05 100644
--- a/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_builder.h
+++ b/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_builder.h
@@ -95,7 +95,7 @@ namespace brw {
vec4_builder
at_end() const
{
- return at(NULL, (exec_node *)&shader->instructions.tail_sentinel);
+ return at(NULL, (exec_node *)&shader->instructions.tail);
}
/**
@@ -373,7 +373,6 @@ namespace brw {
ALU1(CBIT)
ALU2(CMPN)
ALU3(CSEL)
- ALU1(DIM)
ALU2(DP2)
ALU2(DP3)
ALU2(DP4)
diff --git a/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp b/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
index c531fba03..0c8224f5f 100644
--- a/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
+++ b/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
@@ -68,10 +68,10 @@ opt_cmod_propagation_local(bblock_t *block)
bool read_flag = false;
foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst, inst) {
- if (regions_overlap(inst->src[0], inst->size_read(0),
- scan_inst->dst, scan_inst->size_written)) {
+ if (inst->src[0].in_range(scan_inst->dst,
+ scan_inst->regs_written)) {
if ((scan_inst->predicate && scan_inst->opcode != BRW_OPCODE_SEL) ||
- scan_inst->dst.offset != inst->src[0].offset ||
+ scan_inst->dst.reg_offset != inst->src[0].reg_offset ||
(scan_inst->dst.writemask != WRITEMASK_X &&
scan_inst->dst.writemask != WRITEMASK_XYZW) ||
(scan_inst->dst.writemask == WRITEMASK_XYZW &&
@@ -115,18 +115,6 @@ opt_cmod_propagation_local(bblock_t *block)
break;
}
- /* The conditional mod of the CMP/CMPN instructions behaves
- * specially because the flag output is not calculated from the
- * result of the instruction, but the other way around, which
- * means that even if the condmod to propagate and the condmod
- * from the CMP instruction are the same they will in general give
- * different results because they are evaluated based on different
- * inputs.
- */
- if (scan_inst->opcode == BRW_OPCODE_CMP ||
- scan_inst->opcode == BRW_OPCODE_CMPN)
- break;
-
/* Otherwise, try propagating the conditional. */
enum brw_conditional_mod cond =
inst->src[0].negate ? brw_swap_cmod(inst->conditional_mod)
diff --git a/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp b/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
index 19c685fee..28002c56c 100644
--- a/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
+++ b/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_surface_builder.cpp
@@ -145,7 +145,7 @@ namespace brw {
vec4_instruction *inst =
bld.emit(op, dst, src_reg(payload), usurface, brw_imm_ud(arg));
inst->mlen = sz;
- inst->size_written = ret_sz * REG_SIZE;
+ inst->regs_written = ret_sz;
inst->header_size = header_sz;
inst->predicate = pred;
@@ -221,7 +221,7 @@ namespace brw {
emit_insert(bld, addr, dims, has_simd4x2),
has_simd4x2 ? 1 : dims,
emit_insert(bld, src_reg(srcs), size, has_simd4x2),
- has_simd4x2 && size ? 1 : size,
+ has_simd4x2 ? 1 : size,
surface, op, rsize, pred);
}
diff --git a/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
index 498fb7cfb..9b49b7df8 100644
--- a/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
+++ b/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
@@ -29,7 +29,6 @@
#include "brw_nir.h"
#include "brw_vec4_tcs.h"
-#include "brw_fs.h"
namespace brw {
@@ -49,12 +48,62 @@ vec4_tcs_visitor::vec4_tcs_visitor(const struct brw_compiler *compiler,
void
+vec4_tcs_visitor::emit_nir_code()
+{
+ if (key->program_string_id != 0) {
+ /* We have a real application-supplied TCS, emit real code. */
+ vec4_visitor::emit_nir_code();
+ } else {
+ /* There is no TCS; automatically generate a passthrough shader
+ * that writes the API-specified default tessellation levels and
+ * copies VS outputs to TES inputs.
+ */
+ uniforms = 2;
+ uniform_size[0] = 1;
+ uniform_size[1] = 1;
+
+ uint64_t varyings = key->outputs_written;
+
+ src_reg vertex_offset(this, glsl_type::uint_type);
+ emit(MUL(dst_reg(vertex_offset), invocation_id,
+ brw_imm_ud(prog_data->vue_map.num_per_vertex_slots)));
+
+ while (varyings != 0) {
+ const int varying = ffsll(varyings) - 1;
+
+ unsigned in_offset = input_vue_map->varying_to_slot[varying];
+ unsigned out_offset = prog_data->vue_map.varying_to_slot[varying];
+ assert(out_offset >= 2);
+
+ dst_reg val(this, glsl_type::vec4_type);
+ emit_input_urb_read(val, invocation_id, in_offset, src_reg());
+ emit_urb_write(src_reg(val), WRITEMASK_XYZW, out_offset,
+ vertex_offset);
+
+ varyings &= ~BITFIELD64_BIT(varying);
+ }
+
+ /* Only write the tessellation factors from invocation 0.
+ * There's no point in making other threads do redundant work.
+ */
+ emit(CMP(dst_null_d(), invocation_id, brw_imm_ud(0),
+ BRW_CONDITIONAL_EQ));
+ emit(IF(BRW_PREDICATE_NORMAL));
+ emit_urb_write(src_reg(UNIFORM, 0, glsl_type::vec4_type),
+ WRITEMASK_XYZW, 0, src_reg());
+ emit_urb_write(src_reg(UNIFORM, 1, glsl_type::vec4_type),
+ WRITEMASK_XYZW, 1, src_reg());
+ emit(BRW_OPCODE_ENDIF);
+ }
+}
+
+void
vec4_tcs_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
{
}
dst_reg *
-vec4_tcs_visitor::make_reg_for_system_value(int location)
+vec4_tcs_visitor::make_reg_for_system_value(int location, const glsl_type *type)
{
return NULL;
}
@@ -135,9 +184,7 @@ vec4_tcs_visitor::emit_thread_end()
* we don't have stride in the vec4 world, nor UV immediates in
* align16, so we need an opcode to get invocation_id<0,4,0>.
*/
- set_condmod(BRW_CONDITIONAL_Z,
- emit(TCS_OPCODE_SRC0_010_IS_ZERO, dst_null_d(),
- invocation_id));
+ emit(TCS_OPCODE_SRC0_010_IS_ZERO, dst_null_d(), invocation_id);
emit(IF(BRW_PREDICATE_NORMAL));
for (unsigned i = 0; i < key->input_vertices; i += 2) {
/* If we have an odd number of input vertices, the last will be
@@ -166,7 +213,6 @@ void
vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst,
const src_reg &vertex_index,
unsigned base_offset,
- unsigned first_component,
const src_reg &indirect_offset)
{
vec4_instruction *inst;
@@ -192,16 +238,13 @@ vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst,
if (inst->offset == 0 && indirect_offset.file == BAD_FILE) {
emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WWWW)));
} else {
- src_reg src = src_reg(temp);
- src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
- emit(MOV(dst, src));
+ emit(MOV(dst, src_reg(temp)));
}
}
void
vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst,
unsigned base_offset,
- unsigned first_component,
const src_reg &indirect_offset)
{
vec4_instruction *inst;
@@ -217,12 +260,6 @@ vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst,
read->offset = base_offset;
read->mlen = 1;
read->base_mrf = -1;
-
- if (first_component) {
- src_reg src = src_reg(dst);
- src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
- emit(MOV(dst, src));
- }
}
void
@@ -249,6 +286,53 @@ vec4_tcs_visitor::emit_urb_write(const src_reg &value,
inst->base_mrf = -1;
}
+static unsigned
+tesslevel_outer_components(GLenum tes_primitive_mode)
+{
+ switch (tes_primitive_mode) {
+ case GL_QUADS:
+ return 4;
+ case GL_TRIANGLES:
+ return 3;
+ case GL_ISOLINES:
+ return 2;
+ default:
+ unreachable("Bogus tessellation domain");
+ }
+ return 0;
+}
+
+static unsigned
+tesslevel_inner_components(GLenum tes_primitive_mode)
+{
+ switch (tes_primitive_mode) {
+ case GL_QUADS:
+ return 2;
+ case GL_TRIANGLES:
+ return 1;
+ case GL_ISOLINES:
+ return 0;
+ default:
+ unreachable("Bogus tessellation domain");
+ }
+ return 0;
+}
+
+/**
+ * Given a normal .xyzw writemask, convert it to a writemask for a vector
+ * that's stored backwards, i.e. .wzyx.
+ */
+static unsigned
+writemask_for_backwards_vector(unsigned mask)
+{
+ unsigned new_mask = 0;
+
+ for (int i = 0; i < 4; i++)
+ new_mask |= ((mask >> i) & 1) << (3 - i);
+
+ return new_mask;
+}
+
void
vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
{
@@ -271,14 +355,13 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
nir_const_value *vertex_const = nir_src_as_const_value(instr->src[0]);
src_reg vertex_index =
- vertex_const ? src_reg(brw_imm_ud(vertex_const->u32[0]))
+ vertex_const ? src_reg(brw_imm_ud(vertex_const->u[0]))
: get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);
dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
dst.writemask = brw_writemask_for_size(instr->num_components);
- emit_input_urb_read(dst, vertex_index, imm_offset,
- nir_intrinsic_component(instr), indirect_offset);
+ emit_input_urb_read(dst, vertex_index, imm_offset, indirect_offset);
break;
}
case nir_intrinsic_load_input:
@@ -287,7 +370,7 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
case nir_intrinsic_load_output:
case nir_intrinsic_load_per_vertex_output: {
src_reg indirect_offset = get_indirect_offset(instr);
- unsigned imm_offset = instr->const_index[0];
+ unsigned imm_offset = instr->const_index[0];;
dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
dst.writemask = brw_writemask_for_size(instr->num_components);
@@ -302,15 +385,14 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
case GL_QUADS: {
/* DWords 3-2 (reversed); use offset 0 and WZYX swizzle. */
dst_reg tmp(this, glsl_type::vec4_type);
- emit_output_urb_read(tmp, 0, 0, src_reg());
+ emit_output_urb_read(tmp, 0, src_reg());
emit(MOV(writemask(dst, WRITEMASK_XY),
swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX)));
break;
}
case GL_TRIANGLES:
/* DWord 4; use offset 1 but normal swizzle/writemask. */
- emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, 0,
- src_reg());
+ emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, src_reg());
break;
case GL_ISOLINES:
/* All channels are undefined. */
@@ -342,11 +424,10 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
}
dst_reg tmp(this, glsl_type::vec4_type);
- emit_output_urb_read(tmp, 1, 0, src_reg());
+ emit_output_urb_read(tmp, 1, src_reg());
emit(MOV(dst, swizzle(src_reg(tmp), swiz)));
} else {
- emit_output_urb_read(dst, imm_offset, nir_intrinsic_component(instr),
- indirect_offset);
+ emit_output_urb_read(dst, imm_offset, indirect_offset);
}
break;
}
@@ -359,67 +440,53 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
src_reg indirect_offset = get_indirect_offset(instr);
unsigned imm_offset = instr->const_index[0];
- /* The passthrough shader writes the whole patch header as two vec4s;
- * skip all the gl_TessLevelInner/Outer swizzling.
- */
- if (indirect_offset.file == BAD_FILE && !is_passthrough_shader) {
- if (imm_offset == 0) {
- value.type = BRW_REGISTER_TYPE_F;
+ if (imm_offset == 0 && indirect_offset.file == BAD_FILE) {
+ value.type = BRW_REGISTER_TYPE_F;
- mask &=
- (1 << tesslevel_inner_components(key->tes_primitive_mode)) - 1;
+ mask &= (1 << tesslevel_inner_components(key->tes_primitive_mode)) - 1;
- /* This is a write to gl_TessLevelInner[], which lives in the
- * Patch URB header. The layout depends on the domain.
+ /* This is a write to gl_TessLevelInner[], which lives in the
+ * Patch URB header. The layout depends on the domain.
+ */
+ switch (key->tes_primitive_mode) {
+ case GL_QUADS:
+ /* gl_TessLevelInner[].xy lives at DWords 3-2 (reversed).
+ * We use an XXYX swizzle to reverse put .xy in the .wz
+ * channels, and use a .zw writemask.
*/
- switch (key->tes_primitive_mode) {
- case GL_QUADS:
- /* gl_TessLevelInner[].xy lives at DWords 3-2 (reversed).
- * We use an XXYX swizzle to reverse put .xy in the .wz
- * channels, and use a .zw writemask.
- */
- swiz = BRW_SWIZZLE4(0, 0, 1, 0);
- mask = writemask_for_backwards_vector(mask);
- break;
- case GL_TRIANGLES:
- /* gl_TessLevelInner[].x lives at DWord 4, so we set the
- * writemask to X and bump the URB offset by 1.
- */
- imm_offset = 1;
- break;
- case GL_ISOLINES:
- /* Skip; gl_TessLevelInner[] doesn't exist for isolines. */
- return;
- default:
- unreachable("Bogus tessellation domain");
- }
- } else if (imm_offset == 1) {
- value.type = BRW_REGISTER_TYPE_F;
-
- mask &=
- (1 << tesslevel_outer_components(key->tes_primitive_mode)) - 1;
-
- /* This is a write to gl_TessLevelOuter[] which lives in the
- * Patch URB Header at DWords 4-7. However, it's reversed, so
- * instead of .xyzw we have .wzyx.
+ swiz = BRW_SWIZZLE4(0, 0, 1, 0);
+ mask = writemask_for_backwards_vector(mask);
+ break;
+ case GL_TRIANGLES:
+ /* gl_TessLevelInner[].x lives at DWord 4, so we set the
+ * writemask to X and bump the URB offset by 1.
*/
- if (key->tes_primitive_mode == GL_ISOLINES) {
- /* Isolines .xy should be stored in .zw, in order. */
- swiz = BRW_SWIZZLE4(0, 0, 0, 1);
- mask <<= 2;
- } else {
- /* Other domains are reversed; store .wzyx instead of .xyzw. */
- swiz = BRW_SWIZZLE_WZYX;
- mask = writemask_for_backwards_vector(mask);
- }
+ imm_offset = 1;
+ break;
+ case GL_ISOLINES:
+ /* Skip; gl_TessLevelInner[] doesn't exist for isolines. */
+ return;
+ default:
+ unreachable("Bogus tessellation domain");
}
- }
+ } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) {
+ value.type = BRW_REGISTER_TYPE_F;
- unsigned first_component = nir_intrinsic_component(instr);
- if (first_component) {
- assert(swiz == BRW_SWIZZLE_XYZW);
- swiz = BRW_SWZ_COMP_OUTPUT(first_component);
- mask = mask << first_component;
+ mask &= (1 << tesslevel_outer_components(key->tes_primitive_mode)) - 1;
+
+ /* This is a write to gl_TessLevelOuter[] which lives in the
+ * Patch URB Header at DWords 4-7. However, it's reversed, so
+ * instead of .xyzw we have .wzyx.
+ */
+ if (key->tes_primitive_mode == GL_ISOLINES) {
+ /* Isolines .xy should be stored in .zw, in order. */
+ swiz = BRW_SWIZZLE4(0, 0, 0, 1);
+ mask <<= 2;
+ } else {
+ /* Other domains are reversed; store .wzyx instead of .xyzw. */
+ swiz = BRW_SWIZZLE_WZYX;
+ mask = writemask_for_backwards_vector(mask);
+ }
}
emit_urb_write(swizzle(value, swiz), mask,
@@ -451,36 +518,23 @@ brw_compile_tcs(const struct brw_compiler *compiler,
unsigned *final_assembly_size,
char **error_str)
{
- const struct gen_device_info *devinfo = compiler->devinfo;
+ const struct brw_device_info *devinfo = compiler->devinfo;
struct brw_vue_prog_data *vue_prog_data = &prog_data->base;
const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_CTRL];
nir_shader *nir = nir_shader_clone(mem_ctx, src_shader);
+ nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
nir->info.outputs_written = key->outputs_written;
nir->info.patch_outputs_written = key->patch_outputs_written;
+ nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar, false, NULL);
+ nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
- struct brw_vue_map input_vue_map;
- brw_compute_vue_map(devinfo, &input_vue_map,
- nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID,
- true);
+ prog_data->instances = DIV_ROUND_UP(nir->info.tcs.vertices_out, 2);
brw_compute_tess_vue_map(&vue_prog_data->vue_map,
nir->info.outputs_written,
nir->info.patch_outputs_written);
- nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
- brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map);
- brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map);
- if (key->quads_workaround)
- brw_nir_apply_tcs_quads_workaround(nir);
-
- nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
-
- if (is_scalar)
- prog_data->instances = DIV_ROUND_UP(nir->info.tcs.vertices_out, 8);
- else
- prog_data->instances = DIV_ROUND_UP(nir->info.tcs.vertices_out, 2);
-
/* Compute URB entry size. The maximum allowed URB entry size is 32k.
* That divides up as follows:
*
@@ -507,6 +561,11 @@ brw_compile_tcs(const struct brw_compiler *compiler,
/* URB entry sizes are stored as a multiple of 64 bytes. */
vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+ struct brw_vue_map input_vue_map;
+ brw_compute_vue_map(devinfo, &input_vue_map,
+ nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID,
+ true);
+
/* HS does not use the usual payload pushing from URB to GRFs,
* because we don't have enough registers for a full-size payload, and
* the hardware is broken on Haswell anyway.
@@ -520,50 +579,20 @@ brw_compile_tcs(const struct brw_compiler *compiler,
brw_print_vue_map(stderr, &vue_prog_data->vue_map);
}
- if (is_scalar) {
- fs_visitor v(compiler, log_data, mem_ctx, (void *) key,
- &prog_data->base.base, NULL, nir, 8,
- shader_time_index, &input_vue_map);
- if (!v.run_tcs_single_patch()) {
- if (error_str)
- *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
- return NULL;
- }
-
- prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
- prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
-
- fs_generator g(compiler, log_data, mem_ctx, (void *) key,
- &prog_data->base.base, v.promoted_constants, false,
- MESA_SHADER_TESS_CTRL);
- if (unlikely(INTEL_DEBUG & DEBUG_TCS)) {
- g.enable_debug(ralloc_asprintf(mem_ctx,
- "%s tessellation control shader %s",
- nir->info.label ? nir->info.label
- : "unnamed",
- nir->info.name));
- }
-
- g.generate_code(v.cfg, 8);
-
- return g.get_assembly(final_assembly_size);
- } else {
- vec4_tcs_visitor v(compiler, log_data, key, prog_data,
- nir, mem_ctx, shader_time_index, &input_vue_map);
- if (!v.run()) {
- if (error_str)
- *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
- return NULL;
- }
-
- if (unlikely(INTEL_DEBUG & DEBUG_TCS))
- v.dump_instructions();
+ vec4_tcs_visitor v(compiler, log_data, key, prog_data,
+ nir, mem_ctx, shader_time_index, &input_vue_map);
+ if (!v.run()) {
+ if (error_str)
+ *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
+ return NULL;
+ }
+ if (unlikely(INTEL_DEBUG & DEBUG_TCS))
+ v.dump_instructions();
- return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
- &prog_data->base, v.cfg,
- final_assembly_size);
- }
+ return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
+ &prog_data->base, v.cfg,
+ final_assembly_size);
}
diff --git a/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tcs.h b/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tcs.h
index 030eb5e66..2c6801b2a 100644
--- a/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tcs.h
+++ b/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tcs.h
@@ -49,7 +49,9 @@ public:
const struct brw_vue_map *input_vue_map);
protected:
- virtual dst_reg *make_reg_for_system_value(int location);
+ virtual void emit_nir_code();
+ virtual dst_reg *make_reg_for_system_value(int location,
+ const glsl_type *type);
virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
virtual void setup_payload();
virtual void emit_prolog();
@@ -60,11 +62,9 @@ protected:
void emit_input_urb_read(const dst_reg &dst,
const src_reg &vertex_index,
unsigned base_offset,
- unsigned first_component,
const src_reg &indirect_offset);
void emit_output_urb_read(const dst_reg &dst,
unsigned base_offset,
- unsigned first_component,
const src_reg &indirect_offset);
void emit_urb_write(const src_reg &value, unsigned writemask,
diff --git a/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp b/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
index 226dcb4f6..7ba494fbf 100644
--- a/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
+++ b/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp
@@ -46,7 +46,7 @@ vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler,
dst_reg *
-vec4_tes_visitor::make_reg_for_system_value(int location)
+vec4_tes_visitor::make_reg_for_system_value(int location, const glsl_type *type)
{
return NULL;
}
@@ -177,9 +177,7 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
case nir_intrinsic_load_input:
case nir_intrinsic_load_per_vertex_input: {
src_reg indirect_offset = get_indirect_offset(instr);
- dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
unsigned imm_offset = instr->const_index[0];
- unsigned first_component = nir_intrinsic_component(instr);
src_reg header = input_read_header;
if (indirect_offset.file != BAD_FILE) {
@@ -192,10 +190,8 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
*/
const unsigned max_push_slots = 24;
if (imm_offset < max_push_slots) {
- src_reg src = src_reg(ATTR, imm_offset, glsl_type::ivec4_type);
- src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
-
- emit(MOV(dst, src));
+ emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D),
+ src_reg(ATTR, imm_offset, glsl_type::ivec4_type)));
prog_data->urb_read_length =
MAX2(prog_data->urb_read_length,
DIV_ROUND_UP(imm_offset + 1, 2));
@@ -209,14 +205,12 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
read->offset = imm_offset;
read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
- src_reg src = src_reg(temp);
- src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
-
/* Copy to target. We might end up with some funky writemasks landing
* in here, but we really don't want them in the above pseudo-ops.
*/
+ dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
dst.writemask = brw_writemask_for_size(instr->num_components);
- emit(MOV(dst, src));
+ emit(MOV(dst, src_reg(temp)));
break;
}
default:
diff --git a/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tes.h b/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tes.h
index 31a28f359..4b697aa59 100644
--- a/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tes.h
+++ b/lib/mesa/src/mesa/drivers/dri/i965/brw_vec4_tes.h
@@ -47,7 +47,8 @@ public:
int shader_time_index);
protected:
- virtual dst_reg *make_reg_for_system_value(int location);
+ virtual dst_reg *make_reg_for_system_value(int location,
+ const glsl_type *type);
virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
diff --git a/lib/mesa/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp b/lib/mesa/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
index 1323b6507..8d4a447a8 100644
--- a/lib/mesa/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
+++ b/lib/mesa/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
@@ -36,9 +36,10 @@ class cmod_propagation_test : public ::testing::Test {
public:
struct brw_compiler *compiler;
- struct gen_device_info *devinfo;
+ struct brw_device_info *devinfo;
struct gl_context *ctx;
struct gl_shader_program *shader_prog;
+ struct brw_vertex_program *vp;
struct brw_vue_prog_data *prog_data;
vec4_visitor *v;
};
@@ -57,7 +58,8 @@ public:
protected:
/* Dummy implementation for pure virtual methods */
- virtual dst_reg *make_reg_for_system_value(int location)
+ virtual dst_reg *make_reg_for_system_value(int location,
+ const glsl_type *type)
{
unreachable("Not reached");
}
@@ -98,14 +100,18 @@ void cmod_propagation_test::SetUp()
{
ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
- devinfo = (struct gen_device_info *)calloc(1, sizeof(*devinfo));
+ devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
prog_data = (struct brw_vue_prog_data *)calloc(1, sizeof(*prog_data));
compiler->devinfo = devinfo;
+ vp = ralloc(NULL, struct brw_vertex_program);
+
nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL);
v = new cmod_propagation_vec4_visitor(compiler, shader, prog_data);
+ _mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0);
+
devinfo->gen = 4;
}
@@ -370,7 +376,7 @@ TEST_F(cmod_propagation_test, intervening_dest_write)
src_reg zero(brw_imm_f(0.0f));
bld.ADD(offset(dest, 2), src0, src1);
bld.emit(SHADER_OPCODE_TEX, dest, src2)
- ->size_written = 4 * REG_SIZE;
+ ->regs_written = 4;
bld.CMP(bld.null_reg_f(), offset(src_reg(dest), 2), zero, BRW_CONDITIONAL_GE);
/* = Before =