diff options
-rw-r--r-- | lib/mesa/src/compiler/glsl/glcpp/glcpp-parse.h | 5 | ||||
-rw-r--r-- | lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp | 74 | ||||
-rw-r--r-- | lib/mesa/src/compiler/glsl/serialize.cpp | 15 | ||||
-rw-r--r-- | lib/mesa/src/compiler/nir/nir_gather_xfb_info.c | 47 |
4 files changed, 103 insertions, 38 deletions
diff --git a/lib/mesa/src/compiler/glsl/glcpp/glcpp-parse.h b/lib/mesa/src/compiler/glsl/glcpp/glcpp-parse.h index 38fc43d7c..3a9160ccf 100644 --- a/lib/mesa/src/compiler/glsl/glcpp/glcpp-parse.h +++ b/lib/mesa/src/compiler/glsl/glcpp/glcpp-parse.h @@ -1,4 +1,4 @@ -/* A Bison parser, made by GNU Bison 3.1. */ +/* A Bison parser, made by GNU Bison 3.2. */ /* Bison interface for Yacc-like parsers in C @@ -30,6 +30,9 @@ This special exception was added by the Free Software Foundation in version 2.2 of Bison. */ +/* Undocumented macros, especially those whose name start with YY_, + are private implementation details. Do not rely on them. */ + #ifndef YY_GLCPP_PARSER_GLSL_GLCPP_GLCPP_PARSE_H_INCLUDED # define YY_GLCPP_PARSER_GLSL_GLCPP_GLCPP_PARSE_H_INCLUDED /* Debug traces. */ diff --git a/lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp b/lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp index 6cd9a2d81..2aae30d82 100644 --- a/lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp +++ b/lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp @@ -32,8 +32,9 @@ namespace { class vector_deref_visitor : public ir_rvalue_enter_visitor { public: - vector_deref_visitor() - : progress(false) + vector_deref_visitor(void *mem_ctx, gl_shader_stage shader_stage) + : progress(false), shader_stage(shader_stage), + factory(&factory_instructions, mem_ctx) { } @@ -45,6 +46,9 @@ public: virtual ir_visitor_status visit_enter(ir_assignment *ir); bool progress; + gl_shader_stage shader_stage; + exec_list factory_instructions; + ir_factory factory; }; } /* anonymous namespace */ @@ -65,13 +69,63 @@ vector_deref_visitor::visit_enter(ir_assignment *ir) ir_constant *old_index_constant = deref->array_index->constant_expression_value(mem_ctx); if (!old_index_constant) { - ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert, - new_lhs->type, - new_lhs->clone(mem_ctx, NULL), - ir->rhs, - deref->array_index); - ir->write_mask = (1 << new_lhs->type->vector_elements) - 1; - ir->set_lhs(new_lhs); + if (shader_stage == MESA_SHADER_TESS_CTRL && + deref->variable_referenced()->data.mode == ir_var_shader_out) { + /* Tessellation control shader outputs act as if they have memory + * backing them and if we have writes from multiple threads + * targeting the same vec4 (this can happen for patch outputs), the + * load-vec-store pattern of ir_triop_vector_insert doesn't work. + * Instead, we have to lower to a series of conditional write-masked + * assignments. + */ + ir_variable *const src_temp = + factory.make_temp(ir->rhs->type, "scalar_tmp"); + + /* The newly created variable declaration goes before the assignment + * because we're going to set it as the new LHS. + */ + ir->insert_before(factory.instructions); + ir->set_lhs(new(mem_ctx) ir_dereference_variable(src_temp)); + + ir_variable *const arr_index = + factory.make_temp(deref->array_index->type, "index_tmp"); + factory.emit(assign(arr_index, deref->array_index)); + + for (unsigned i = 0; i < new_lhs->type->vector_elements; i++) { + ir_constant *const cmp_index = + ir_constant::zero(factory.mem_ctx, deref->array_index->type); + cmp_index->value.u[0] = i; + + ir_rvalue *const lhs_clone = new_lhs->clone(factory.mem_ctx, NULL); + ir_dereference_variable *const src_temp_deref = + new(mem_ctx) ir_dereference_variable(src_temp); + + if (new_lhs->ir_type != ir_type_swizzle) { + assert(lhs_clone->as_dereference()); + ir_assignment *cond_assign = + new(mem_ctx) ir_assignment(lhs_clone->as_dereference(), + src_temp_deref, + equal(arr_index, cmp_index), + WRITEMASK_X << i); + factory.emit(cond_assign); + } else { + ir_assignment *cond_assign = + new(mem_ctx) ir_assignment(swizzle(lhs_clone, i, 1), + src_temp_deref, + equal(arr_index, cmp_index)); + factory.emit(cond_assign); + } + } + ir->insert_after(factory.instructions); + } else { + ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert, + new_lhs->type, + new_lhs->clone(mem_ctx, NULL), + ir->rhs, + deref->array_index); + ir->write_mask = (1 << new_lhs->type->vector_elements) - 1; + ir->set_lhs(new_lhs); + } } else if (new_lhs->ir_type != ir_type_swizzle) { ir->set_lhs(new_lhs); ir->write_mask = 1 << old_index_constant->get_uint_component(0); @@ -105,7 +159,7 @@ vector_deref_visitor::handle_rvalue(ir_rvalue **rv) bool lower_vector_derefs(gl_linked_shader *shader) { - vector_deref_visitor v; + vector_deref_visitor v(shader->ir, shader->Stage); visit_list_elements(&v, shader->ir); diff --git a/lib/mesa/src/compiler/glsl/serialize.cpp b/lib/mesa/src/compiler/glsl/serialize.cpp index fdd99ec59..ad258f8bc 100644 --- a/lib/mesa/src/compiler/glsl/serialize.cpp +++ b/lib/mesa/src/compiler/glsl/serialize.cpp @@ -996,15 +996,14 @@ write_shader_parameters(struct blob *metadata, struct gl_program_parameter_list *params) { blob_write_uint32(metadata, params->NumParameters); - blob_write_uint32(metadata, params->NumParameterValues); uint32_t i = 0; while (i < params->NumParameters) { struct gl_program_parameter *param = ¶ms->Parameters[i]; - blob_write_uint32(metadata, param->Type); blob_write_string(metadata, param->Name); blob_write_uint32(metadata, param->Size); + blob_write_uint32(metadata, param->Padded); blob_write_uint32(metadata, param->DataType); blob_write_bytes(metadata, param->StateIndexes, sizeof(param->StateIndexes)); @@ -1015,9 +1014,6 @@ write_shader_parameters(struct blob *metadata, blob_write_bytes(metadata, params->ParameterValues, sizeof(gl_constant_value) * params->NumParameterValues); - blob_write_bytes(metadata, params->ParameterValueOffset, - sizeof(uint32_t) * params->NumParameters); - blob_write_uint32(metadata, params->StateFlags); } @@ -1028,28 +1024,25 @@ read_shader_parameters(struct blob_reader *metadata, gl_state_index16 state_indexes[STATE_LENGTH]; uint32_t i = 0; uint32_t num_parameters = blob_read_uint32(metadata); - uint32_t num_parameters_values = blob_read_uint32(metadata); _mesa_reserve_parameter_storage(params, num_parameters); while (i < num_parameters) { gl_register_file type = (gl_register_file) blob_read_uint32(metadata); const char *name = blob_read_string(metadata); unsigned size = blob_read_uint32(metadata); + bool padded = blob_read_uint32(metadata); unsigned data_type = blob_read_uint32(metadata); blob_copy_bytes(metadata, (uint8_t *) state_indexes, sizeof(state_indexes)); _mesa_add_parameter(params, type, name, size, data_type, - NULL, state_indexes, false); + NULL, state_indexes, padded); i++; } blob_copy_bytes(metadata, (uint8_t *) params->ParameterValues, - sizeof(gl_constant_value) * num_parameters_values); - - blob_copy_bytes(metadata, (uint8_t *) params->ParameterValueOffset, - sizeof(uint32_t) * num_parameters); + sizeof(gl_constant_value) * params->NumParameterValues); params->StateFlags = blob_read_uint32(metadata); } diff --git a/lib/mesa/src/compiler/nir/nir_gather_xfb_info.c b/lib/mesa/src/compiler/nir/nir_gather_xfb_info.c index 7e441adc0..a5258f79a 100644 --- a/lib/mesa/src/compiler/nir/nir_gather_xfb_info.c +++ b/lib/mesa/src/compiler/nir/nir_gather_xfb_info.c @@ -32,7 +32,11 @@ add_var_xfb_outputs(nir_xfb_info *xfb, unsigned *offset, const struct glsl_type *type) { - if (glsl_type_is_array(type) || glsl_type_is_matrix(type)) { + /* If this type contains a 64-bit value, align to 8 bytes */ + if (glsl_type_contains_64bit(type)) + *offset = ALIGN_POT(*offset, 8); + + if (glsl_type_is_array_or_matrix(type) && !var->data.compact) { unsigned length = glsl_get_length(type); const struct glsl_type *child_type = glsl_get_array_element(type); for (unsigned i = 0; i < length; i++) @@ -57,32 +61,43 @@ add_var_xfb_outputs(nir_xfb_info *xfb, assert(var->data.stream < NIR_MAX_XFB_STREAMS); xfb->streams_written |= (1 << var->data.stream); - unsigned comp_slots = glsl_get_component_slots(type); - unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4); - assert(attrib_slots == glsl_count_attribute_slots(type, false)); - - /* Ensure that we don't have, for instance, a dvec2 with a location_frac - * of 2 which would make it crass a location boundary even though it - * fits in a single slot. However, you can have a dvec3 which crosses - * the slot boundary with a location_frac of 2. - */ - assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) == attrib_slots); + unsigned comp_slots; + if (var->data.compact) { + /* This only happens for clip/cull which are float arrays */ + assert(glsl_without_array(type) == glsl_float_type()); + assert(var->data.location == VARYING_SLOT_CLIP_DIST0 || + var->data.location == VARYING_SLOT_CLIP_DIST1); + comp_slots = glsl_get_length(type); + } else { + comp_slots = glsl_get_component_slots(type); + + unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4); + assert(attrib_slots == glsl_count_attribute_slots(type, false)); + + /* Ensure that we don't have, for instance, a dvec2 with a + * location_frac of 2 which would make it crass a location boundary + * even though it fits in a single slot. However, you can have a + * dvec3 which crosses the slot boundary with a location_frac of 2. + */ + assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) == + attrib_slots); + } assert(var->data.location_frac + comp_slots <= 8); uint8_t comp_mask = ((1 << comp_slots) - 1) << var->data.location_frac; - assert(attrib_slots <= 2); - for (unsigned s = 0; s < attrib_slots; s++) { + while (comp_mask) { nir_xfb_output_info *output = &xfb->outputs[xfb->output_count++]; output->buffer = var->data.xfb_buffer; - output->offset = *offset + s * 16; + output->offset = *offset; output->location = *location; - output->component_mask = (comp_mask >> (s * 4)) & 0xf; + output->component_mask = comp_mask & 0xf; + *offset += util_bitcount(output->component_mask) * 4; (*location)++; + comp_mask >>= 4; } - *offset += comp_slots * 4; } } |