4 files changed, 103 insertions, 38 deletions
diff --git a/lib/mesa/src/compiler/glsl/glcpp/glcpp-parse.h b/lib/mesa/src/compiler/glsl/glcpp/glcpp-parse.h
index 38fc43d7c..3a9160ccf 100644
--- a/lib/mesa/src/compiler/glsl/glcpp/glcpp-parse.h
+++ b/lib/mesa/src/compiler/glsl/glcpp/glcpp-parse.h
@@ -1,4 +1,4 @@
-/* A Bison parser, made by GNU Bison 3.1.  */
+/* A Bison parser, made by GNU Bison 3.2.  */
 
 /* Bison interface for Yacc-like parsers in C
 
@@ -30,6 +30,9 @@
    This special exception was added by the Free Software Foundation in
    version 2.2 of Bison.  */
 
+/* Undocumented macros, especially those whose name start with YY_,
+   are private implementation details.  Do not rely on them.  */
+
 #ifndef YY_GLCPP_PARSER_GLSL_GLCPP_GLCPP_PARSE_H_INCLUDED
 # define YY_GLCPP_PARSER_GLSL_GLCPP_GLCPP_PARSE_H_INCLUDED
 /* Debug traces.  */
diff --git a/lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp b/lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp
index 6cd9a2d81..2aae30d82 100644
--- a/lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp
+++ b/lib/mesa/src/compiler/glsl/lower_vector_derefs.cpp
@@ -32,8 +32,9 @@ namespace {
 
 class vector_deref_visitor : public ir_rvalue_enter_visitor {
 public:
-   vector_deref_visitor()
-      : progress(false)
+   vector_deref_visitor(void *mem_ctx, gl_shader_stage shader_stage)
+      : progress(false), shader_stage(shader_stage),
+        factory(&factory_instructions, mem_ctx)
    {
    }
 
@@ -45,6 +46,9 @@ public:
    virtual ir_visitor_status visit_enter(ir_assignment *ir);
 
    bool progress;
+   gl_shader_stage shader_stage;
+   exec_list factory_instructions;
+   ir_factory factory;
 };
 
 } /* anonymous namespace */
@@ -65,13 +69,63 @@ vector_deref_visitor::visit_enter(ir_assignment *ir)
    ir_constant *old_index_constant =
       deref->array_index->constant_expression_value(mem_ctx);
    if (!old_index_constant) {
-      ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
-                                           new_lhs->type,
-                                           new_lhs->clone(mem_ctx, NULL),
-                                           ir->rhs,
-                                           deref->array_index);
-      ir->write_mask = (1 << new_lhs->type->vector_elements) - 1;
-      ir->set_lhs(new_lhs);
+      if (shader_stage == MESA_SHADER_TESS_CTRL &&
+          deref->variable_referenced()->data.mode == ir_var_shader_out) {
+         /* Tessellation control shader outputs act as if they have memory
+          * backing them and if we have writes from multiple threads
+          * targeting the same vec4 (this can happen for patch outputs), the
+          * load-vec-store pattern of ir_triop_vector_insert doesn't work.
+          * Instead, we have to lower to a series of conditional write-masked
+          * assignments.
+          */
+         ir_variable *const src_temp =
+            factory.make_temp(ir->rhs->type, "scalar_tmp");
+
+         /* The newly created variable declaration goes before the assignment
+          * because we're going to set it as the new LHS.
+          */
+         ir->insert_before(factory.instructions);
+         ir->set_lhs(new(mem_ctx) ir_dereference_variable(src_temp));
+
+         ir_variable *const arr_index =
+            factory.make_temp(deref->array_index->type, "index_tmp");
+         factory.emit(assign(arr_index, deref->array_index));
+
+         for (unsigned i = 0; i < new_lhs->type->vector_elements; i++) {
+            ir_constant *const cmp_index =
+               ir_constant::zero(factory.mem_ctx, deref->array_index->type);
+            cmp_index->value.u[0] = i;
+
+            ir_rvalue *const lhs_clone = new_lhs->clone(factory.mem_ctx, NULL);
+            ir_dereference_variable *const src_temp_deref =
+               new(mem_ctx) ir_dereference_variable(src_temp);
+
+            if (new_lhs->ir_type != ir_type_swizzle) {
+               assert(lhs_clone->as_dereference());
+               ir_assignment *cond_assign =
+                  new(mem_ctx) ir_assignment(lhs_clone->as_dereference(),
+                                             src_temp_deref,
+                                             equal(arr_index, cmp_index),
+                                             WRITEMASK_X << i);
+               factory.emit(cond_assign);
+            } else {
+               ir_assignment *cond_assign =
+                  new(mem_ctx) ir_assignment(swizzle(lhs_clone, i, 1),
+                                             src_temp_deref,
+                                             equal(arr_index, cmp_index));
+               factory.emit(cond_assign);
+            }
+         }
+         ir->insert_after(factory.instructions);
+      } else {
+         ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
+                                              new_lhs->type,
+                                              new_lhs->clone(mem_ctx, NULL),
+                                              ir->rhs,
+                                              deref->array_index);
+         ir->write_mask = (1 << new_lhs->type->vector_elements) - 1;
+         ir->set_lhs(new_lhs);
+      }
    } else if (new_lhs->ir_type != ir_type_swizzle) {
       ir->set_lhs(new_lhs);
       ir->write_mask = 1 << old_index_constant->get_uint_component(0);
@@ -105,7 +159,7 @@ vector_deref_visitor::handle_rvalue(ir_rvalue **rv)
 bool
 lower_vector_derefs(gl_linked_shader *shader)
 {
-   vector_deref_visitor v;
+   vector_deref_visitor v(shader->ir, shader->Stage);
 
    visit_list_elements(&v, shader->ir);
 
diff --git a/lib/mesa/src/compiler/glsl/serialize.cpp b/lib/mesa/src/compiler/glsl/serialize.cpp
index fdd99ec59..ad258f8bc 100644
--- a/lib/mesa/src/compiler/glsl/serialize.cpp
+++ b/lib/mesa/src/compiler/glsl/serialize.cpp
@@ -996,15 +996,14 @@ write_shader_parameters(struct blob *metadata,
                         struct gl_program_parameter_list *params)
 {
    blob_write_uint32(metadata, params->NumParameters);
-   blob_write_uint32(metadata, params->NumParameterValues);
    uint32_t i = 0;
 
    while (i < params->NumParameters) {
       struct gl_program_parameter *param = &params->Parameters[i];
-
       blob_write_uint32(metadata, param->Type);
       blob_write_string(metadata, param->Name);
       blob_write_uint32(metadata, param->Size);
+      blob_write_uint32(metadata, param->Padded);
       blob_write_uint32(metadata, param->DataType);
       blob_write_bytes(metadata, param->StateIndexes,
                        sizeof(param->StateIndexes));
@@ -1015,9 +1014,6 @@ write_shader_parameters(struct blob *metadata,
    blob_write_bytes(metadata, params->ParameterValues,
                     sizeof(gl_constant_value) * params->NumParameterValues);
 
-   blob_write_bytes(metadata, params->ParameterValueOffset,
-                    sizeof(uint32_t) * params->NumParameters);
-
    blob_write_uint32(metadata, params->StateFlags);
 }
 
@@ -1028,28 +1024,25 @@ read_shader_parameters(struct blob_reader *metadata,
    gl_state_index16 state_indexes[STATE_LENGTH];
    uint32_t i = 0;
    uint32_t num_parameters = blob_read_uint32(metadata);
-   uint32_t num_parameters_values = blob_read_uint32(metadata);
 
    _mesa_reserve_parameter_storage(params, num_parameters);
    while (i < num_parameters) {
       gl_register_file type = (gl_register_file) blob_read_uint32(metadata);
       const char *name = blob_read_string(metadata);
       unsigned size = blob_read_uint32(metadata);
+      bool padded = blob_read_uint32(metadata);
       unsigned data_type = blob_read_uint32(metadata);
       blob_copy_bytes(metadata, (uint8_t *) state_indexes,
                       sizeof(state_indexes));
 
       _mesa_add_parameter(params, type, name, size, data_type,
-                          NULL, state_indexes, false);
+                          NULL, state_indexes, padded);
 
       i++;
    }
 
    blob_copy_bytes(metadata, (uint8_t *) params->ParameterValues,
-                   sizeof(gl_constant_value) * num_parameters_values);
-
-   blob_copy_bytes(metadata, (uint8_t *) params->ParameterValueOffset,
-                   sizeof(uint32_t) * num_parameters);
+                   sizeof(gl_constant_value) * params->NumParameterValues);
 
    params->StateFlags = blob_read_uint32(metadata);
 }
diff --git a/lib/mesa/src/compiler/nir/nir_gather_xfb_info.c b/lib/mesa/src/compiler/nir/nir_gather_xfb_info.c
index 7e441adc0..a5258f79a 100644
--- a/lib/mesa/src/compiler/nir/nir_gather_xfb_info.c
+++ b/lib/mesa/src/compiler/nir/nir_gather_xfb_info.c
@@ -32,7 +32,11 @@ add_var_xfb_outputs(nir_xfb_info *xfb,
                     unsigned *offset,
                     const struct glsl_type *type)
 {
-   if (glsl_type_is_array(type) || glsl_type_is_matrix(type)) {
+   /* If this type contains a 64-bit value, align to 8 bytes */
+   if (glsl_type_contains_64bit(type))
+      *offset = ALIGN_POT(*offset, 8);
+
+   if (glsl_type_is_array_or_matrix(type) && !var->data.compact) {
       unsigned length = glsl_get_length(type);
       const struct glsl_type *child_type = glsl_get_array_element(type);
       for (unsigned i = 0; i < length; i++)
@@ -57,32 +61,43 @@ add_var_xfb_outputs(nir_xfb_info *xfb,
       assert(var->data.stream < NIR_MAX_XFB_STREAMS);
       xfb->streams_written |= (1 << var->data.stream);
 
-      unsigned comp_slots = glsl_get_component_slots(type);
-      unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4);
-      assert(attrib_slots == glsl_count_attribute_slots(type, false));
-
-      /* Ensure that we don't have, for instance, a dvec2 with a location_frac
-       * of 2 which would make it crass a location boundary even though it
-       * fits in a single slot.  However, you can have a dvec3 which crosses
-       * the slot boundary with a location_frac of 2.
-       */
-      assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) == attrib_slots);
+      unsigned comp_slots;
+      if (var->data.compact) {
+         /* This only happens for clip/cull which are float arrays */
+         assert(glsl_without_array(type) == glsl_float_type());
+         assert(var->data.location == VARYING_SLOT_CLIP_DIST0 ||
+                var->data.location == VARYING_SLOT_CLIP_DIST1);
+         comp_slots = glsl_get_length(type);
+      } else {
+         comp_slots = glsl_get_component_slots(type);
+
+         unsigned attrib_slots = DIV_ROUND_UP(comp_slots, 4);
+         assert(attrib_slots == glsl_count_attribute_slots(type, false));
+
+         /* Ensure that we don't have, for instance, a dvec2 with a
+          * location_frac of 2 which would make it crass a location boundary
+          * even though it fits in a single slot.  However, you can have a
+          * dvec3 which crosses the slot boundary with a location_frac of 2.
+          */
+         assert(DIV_ROUND_UP(var->data.location_frac + comp_slots, 4) ==
+                attrib_slots);
+      }
 
       assert(var->data.location_frac + comp_slots <= 8);
       uint8_t comp_mask = ((1 << comp_slots) - 1) << var->data.location_frac;
 
-      assert(attrib_slots <= 2);
-      for (unsigned s = 0; s < attrib_slots; s++) {
+      while (comp_mask) {
          nir_xfb_output_info *output = &xfb->outputs[xfb->output_count++];
 
          output->buffer = var->data.xfb_buffer;
-         output->offset = *offset + s * 16;
+         output->offset = *offset;
          output->location = *location;
-         output->component_mask = (comp_mask >> (s * 4)) & 0xf;
+         output->component_mask = comp_mask & 0xf;
 
+         *offset += util_bitcount(output->component_mask) * 4;
          (*location)++;
+         comp_mask >>= 4;
       }
-      *offset += comp_slots * 4;
    }
 }