summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/mesa/src/compiler/glsl/glcpp/glcpp-parse.h5
-rw-r--r--lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp74
-rw-r--r--lib/mesa/src/compiler/glsl/serialize.cpp15
-rw-r--r--lib/mesa/src/compiler/nir/nir_gather_xfb_info.c47
4 files changed, 103 insertions, 38 deletions
diff --git a/lib/mesa/src/compiler/glsl/glcpp/glcpp-parse.h b/lib/mesa/src/compiler/glsl/glcpp/glcpp-parse.h
index 38fc43d7c..3a9160ccf 100644
--- a/lib/mesa/src/compiler/glsl/glcpp/glcpp-parse.h
+++ b/lib/mesa/src/compiler/glsl/glcpp/glcpp-parse.h
@@ -1,4 +1,4 @@
-/* A Bison parser, made by GNU Bison 3.1. */
+/* A Bison parser, made by GNU Bison 3.2. */
/* Bison interface for Yacc-like parsers in C
@@ -30,6 +30,9 @@
This special exception was added by the Free Software Foundation in
version 2.2 of Bison. */
+/* Undocumented macros, especially those whose name start with YY_,
+ are private implementation details. Do not rely on them. */
+
#ifndef YY_GLCPP_PARSER_GLSL_GLCPP_GLCPP_PARSE_H_INCLUDED
# define YY_GLCPP_PARSER_GLSL_GLCPP_GLCPP_PARSE_H_INCLUDED
/* Debug traces. */
diff --git a/lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp b/lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp
index 6cd9a2d81..2aae30d82 100644
--- a/lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp
+++ b/lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp
@@ -32,8 +32,9 @@ namespace {
class vector_deref_visitor : public ir_rvalue_enter_visitor {
public:
- vector_deref_visitor()
- : progress(false)
+ vector_deref_visitor(void *mem_ctx, gl_shader_stage shader_stage)
+ : progress(false), shader_stage(shader_stage),
+ factory(&factory_instructions, mem_ctx)
{
}
@@ -45,6 +46,9 @@ public:
virtual ir_visitor_status visit_enter(ir_assignment *ir);
bool progress;
+ gl_shader_stage shader_stage;
+ exec_list factory_instructions;
+ ir_factory factory;
};
} /* anonymous namespace */
@@ -65,13 +69,63 @@ vector_deref_visitor::visit_enter(ir_assignment *ir)
ir_constant *old_index_constant =
deref->array_index->constant_expression_value(mem_ctx);
if (!old_index_constant) {
- ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
- new_lhs->type,
- new_lhs->clone(mem_ctx, NULL),
- ir->rhs,
- deref->array_index);
- ir->write_mask = (1 << new_lhs->type->vector_elements) - 1;
- ir->set_lhs(new_lhs);
+ if (shader_stage == MESA_SHADER_TESS_CTRL &&
+ deref->variable_referenced()->data.mode == ir_var_shader_out) {
+ /* Tessellation control shader outputs act as if they have memory
+ * backing them and if we have writes from multiple threads
+ * targeting the same vec4 (this can happen for patch outputs), the
+ * load-vec-store pattern of ir_triop_vector_insert doesn't work.
+ * Instead, we have to lower to a series of conditional write-masked
+ * assignments.
+ */
+ ir_variable *const src_temp =
+ factory.make_temp(ir->rhs->type, "scalar_tmp");
+
+ /* The newly created variable declaration goes before the assignment
+ * because we're going to set it as the new LHS.
+ */
+ ir->insert_before(factory.instructions);
+ ir->set_lhs(new(mem_ctx) ir_dereference_variable(src_temp));
+
+ ir_variable *const arr_index =
+ factory.make_temp(deref->array_index->type, "index_tmp");
+ factory.emit(assign(arr_index, deref->array_index));
+
+ for (unsigned i = 0; i < new_lhs->type->vector_elements; i++) {
+ ir_constant *const cmp_index =
+ ir_constant::zero(factory.mem_ctx, deref->array_index->type);
+ cmp_index->value.u[0] = i;
+
+ ir_rvalue *const lhs_clone = new_lhs->clone(factory.mem_ctx, NULL);
+ ir_dereference_variable *const src_temp_deref =
+ new(mem_ctx) ir_dereference_variable(src_temp);
+
+ if (new_lhs->ir_type != ir_type_swizzle) {
+ assert(lhs_clone->as_dereference());
+ ir_assignment *cond_assign =
+ new(mem_ctx) ir_assignment(lhs_clone->as_dereference(),
+ src_temp_deref,
+ equal(arr_index, cmp_index),
+ WRITEMASK_X << i);
+ factory.emit(cond_assign);
+ } else {
+ ir_assignment *cond_assign =
+ new(mem_ctx) ir_assignment(swizzle(lhs_clone, i, 1),
+ src_temp_deref,
+ equal(arr_index, cmp_index));
+ factory.emit(cond_assign);
+ }
+ }
+ ir->insert_after(factory.instructions);
+ } else {
+ ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
+ new_lhs->type,
+ new_lhs->clone(mem_ctx, NULL),
+ ir->rhs,
+ deref->array_index);
+ ir->write_mask = (1 << new_lhs->type->vector_elements) - 1;
+ ir->set_lhs(new_lhs);
+ }
} else if (new_lhs->ir_type != ir_type_swizzle) {
ir->set_lhs(new_lhs);
ir->write_mask = 1 << old_index_constant->get_uint_component(0);
@@ -105,7 +159,7 @@ vector_deref_visitor::handle_rvalue(ir_rvalue **rv)
bool
lower_vector_derefs(gl_linked_shader *shader)
{
- vector_deref_visitor v;
+ vector_deref_visitor v(shader->ir, shader->Stage);
visit_list_elements(&v, shader->ir);
diff --git a/lib/mesa/src/compiler/glsl/serialize.cpp b/lib/mesa/src/compiler/glsl/serialize.cpp
index fdd99ec59..ad258f8bc 100644
--- a/lib/mesa/src/compiler/glsl/serialize.cpp
+++ b/lib/mesa/src/compiler/glsl/serialize.cpp
@@ -996,15 +996,14 @@ write_shader_parameters(struct blob *metadata,
struct gl_program_parameter_list *params)
{
blob_write_uint32(metadata, params->NumParameters);
- blob_write_uint32(metadata, params->NumParameterValues);
uint32_t i = 0;
while (i < params->NumParameters) {
struct gl_program_parameter *param = &params->Parameters[i];
-
blob_write_uint32(metadata, param->Type);
blob_write_string(metadata, param->Name);
blob_write_uint32(metadata, param->Size);
+ blob_write_uint32(metadata, param->Padded);
blob_write_uint32(metadata, param->DataType);
blob_write_bytes(metadata, param->StateIndexes,
sizeof(param->StateIndexes));
@@ -1015,9 +1014,6 @@ write_shader_parameters(struct blob *metadata,
blob_write_bytes(metadata, params->ParameterValues,
sizeof(gl_constant_value) * params->NumParameterValues);
- blob_write_bytes(metadata, params->ParameterValueOffset,
- sizeof(uint32_t) * params->NumParameters);
-
blob_write_uint32(metadata, params->StateFlags);
}
@@ -1028,28 +1024,25 @@ read_shader_parameters(struct blob_reader *metadata,
gl_state_index16 state_indexes[STATE_LENGTH];
uint32_t i = 0;
uint32_t num_parameters = blob_read_uint32(metadata);
- uint32_t num_parameters_values = blob_read_uint32(metadata);
_mesa_reserve_parameter_storage(params, num_parameters);
while (i < num_parameters) {
gl_register_file type = (gl_register_file) blob_read_uint32(metadata);
const char *name = blob_read_string(metadata);
unsigned size = blob_read_uint32(metadata);
+ bool padded = blob_read_uint32(metadata);
unsigned data_type = blob_read_uint32(metadata);
blob_copy_bytes(metadata, (uint8_t *) state_indexes,
sizeof(state_indexes));
_mesa_add_parameter(params, type, name, size, data_type,
- NULL, state_indexes, false);
+ NULL, state_indexes, padded);
i++;
}
blob_copy_bytes(metadata, (uint8_t *) params->ParameterValues,
- sizeof(gl_constant_value) * num_parameters_values);
-
- blob_copy_bytes(metadata, (uint8_t *) params->ParameterValueOffset,
- sizeof(uint32_t) * num_parameters);
+ sizeof(gl_constant_value) * params->NumParameterValues);
params->StateFlags = blob_read_uint32(metadata);
}
diff --git a/lib/mesa/src/compiler/nir/nir_gather_xfb_info.c b/lib/mesa/src/compiler/nir/nir_gather_xfb_info.c
index 7e441adc0..a5258f79a 100644
--- a/lib/mesa/src/compiler/nir/nir_gather_xfb_info.c
+++ b/lib/mesa/src/compiler/nir/nir_gather_xfb_info.c
@@ -32,7 +32,11 @@ add_var_xfb_outputs(nir_xfb_info *xfb,
unsigned *offset,
const struct glsl_type *type)
{
- if (glsl_type_is_array(type) || glsl_type_is_matrix(type)) {
+ /* If this type contains a 64-bit value, align to 8 bytes */
+ if (glsl_type_contains_64bit(type))
+ *offset = ALIGN_POT(*offset, 8);
+
+ if (glsl_type_is_array_or_matrix(type) && !var->data.compact) {
unsigned length = glsl_get_length(type);
const struct glsl_type *child_type = glsl_get_array_element(type);
for (unsigned i = 0; i < length; i++)
@@ -57,32 +61,43 @@ add_var_xfb_outputs(nir_xfb_info *xfb,
assert(var->data.stream < NIR_MAX_XFB_STREAMS);
xfb->streams_written |= (1 << var->data.stream);
- unsigned comp_slots = glsl_get_component_slots(type);
- unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4);
- assert(attrib_slots == glsl_count_attribute_slots(type, false));
-
- /* Ensure that we don't have, for instance, a dvec2 with a location_frac
- * of 2 which would make it crass a location boundary even though it
- * fits in a single slot. However, you can have a dvec3 which crosses
- * the slot boundary with a location_frac of 2.
- */
- assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) == attrib_slots);
+ unsigned comp_slots;
+ if (var->data.compact) {
+ /* This only happens for clip/cull which are float arrays */
+ assert(glsl_without_array(type) == glsl_float_type());
+ assert(var->data.location == VARYING_SLOT_CLIP_DIST0 ||
+ var->data.location == VARYING_SLOT_CLIP_DIST1);
+ comp_slots = glsl_get_length(type);
+ } else {
+ comp_slots = glsl_get_component_slots(type);
+
+ unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4);
+ assert(attrib_slots == glsl_count_attribute_slots(type, false));
+
+ /* Ensure that we don't have, for instance, a dvec2 with a
+ * location_frac of 2 which would make it crass a location boundary
+ * even though it fits in a single slot. However, you can have a
+ * dvec3 which crosses the slot boundary with a location_frac of 2.
+ */
+ assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) ==
+ attrib_slots);
+ }
assert(var->data.location_frac + comp_slots <= 8);
uint8_t comp_mask = ((1 << comp_slots) - 1) << var->data.location_frac;
- assert(attrib_slots <= 2);
- for (unsigned s = 0; s < attrib_slots; s++) {
+ while (comp_mask) {
nir_xfb_output_info *output = &xfb->outputs[xfb->output_count++];
output->buffer = var->data.xfb_buffer;
- output->offset = *offset + s * 16;
+ output->offset = *offset;
output->location = *location;
- output->component_mask = (comp_mask >> (s * 4)) & 0xf;
+ output->component_mask = comp_mask & 0xf;
+ *offset += util_bitcount(output->component_mask) * 4;
(*location)++;
+ comp_mask >>= 4;
}
- *offset += comp_slots * 4;
}
}