Revert to Mesa 13.0.6 again.

Corruption has again been reported on Intel hardware running Xorg with the modesetting driver (which uses OpenGL based acceleration instead of SNA acceleration the intel driver defaults to). Reported in various forms on Sandy Bridge (X220), Ivy Bridge (X230) and Haswell (X240). Confirmed to not occur with the intel driver but the xserver was changed to default to the modesetting driver on >= gen4 hardware (except Ironlake). One means of triggering this is to open a large pdf with xpdf on an idle machine and highlight a section of the document. There have been reports of gpu hangs on gen4 intel hardware (T500 with GM45, X61 with 965GM) when starting Xorg as well.
author: Jonathan Gray <jsg@cvs.openbsd.org> 2018-01-08 05:41:34 +0000
committer: Jonathan Gray <jsg@cvs.openbsd.org> 2018-01-08 05:41:34 +0000
commit: c00801de923e125863aaf8180439d59d610b2517 (patch)
tree: e2896aa2785f3cf2151aeeb3c95fb5cc09a2fe02 /lib/mesa/src/gallium/auxiliary/draw
parent: be30e6efb92db21299b936c0e068e7088941e9c9 (diff)
20 files changed, 904 insertions, 798 deletions
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_context.c b/lib/mesa/src/gallium/auxiliary/draw/draw_context.c
index 9791ec550..56abcff5a 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_context.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_context.c
@@ -206,8 +206,9 @@ void draw_destroy( struct draw_context *draw )
       }
    }
 
-   for (i = 0; i < draw->pt.nr_vertex_buffers; i++)
-      pipe_vertex_buffer_unreference(&draw->pt.vertex_buffer[i]);
+   for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
+      pipe_resource_reference(&draw->pt.vertex_buffer[i].buffer, NULL);
+   }
 
    /* Not so fast -- we're just borrowing this at the moment.
     * 
@@ -439,7 +440,7 @@ draw_set_mapped_vertex_buffer(struct draw_context *draw,
 
 void
 draw_set_mapped_constant_buffer(struct draw_context *draw,
-                                enum pipe_shader_type shader_type,
+                                unsigned shader_type,
                                 unsigned slot,
                                 const void *buffer,
                                 unsigned size )
@@ -719,7 +720,7 @@ draw_total_gs_outputs(const struct draw_context *draw)
  */
 void
 draw_texture_sampler(struct draw_context *draw,
-                     enum pipe_shader_type shader,
+                     uint shader,
                      struct tgsi_sampler *sampler)
 {
    if (shader == PIPE_SHADER_VERTEX) {
@@ -737,7 +738,7 @@ draw_texture_sampler(struct draw_context *draw,
  */
 void
 draw_image(struct draw_context *draw,
-           enum pipe_shader_type shader,
+           uint shader,
            struct tgsi_image *image)
 {
    if (shader == PIPE_SHADER_VERTEX) {
@@ -755,7 +756,7 @@ draw_image(struct draw_context *draw,
  */
 void
 draw_buffer(struct draw_context *draw,
-            enum pipe_shader_type shader,
+            uint shader,
             struct tgsi_buffer *buffer)
 {
    if (shader == PIPE_SHADER_VERTEX) {
@@ -777,6 +778,9 @@ void draw_set_render( struct draw_context *draw,
 /**
  * Tell the draw module where vertex indexes/elements are located, and
  * their size (in bytes).
+ *
+ * Note: the caller must apply the pipe_index_buffer::offset value to
+ * the address.  The draw module doesn't do that.
  */
 void
 draw_set_indexes(struct draw_context *draw,
@@ -1007,7 +1011,7 @@ draw_set_samplers(struct draw_context *draw,
 
 void
 draw_set_mapped_texture(struct draw_context *draw,
-                        enum pipe_shader_type shader_stage,
+                        unsigned shader_stage,
                         unsigned sview_idx,
                         uint32_t width, uint32_t height, uint32_t depth,
                         uint32_t first_level, uint32_t last_level,
@@ -1032,8 +1036,7 @@ draw_set_mapped_texture(struct draw_context *draw,
  * different ways of setting textures, and drivers typically only support one.
  */
 int
-draw_get_shader_param_no_llvm(enum pipe_shader_type shader,
-                              enum pipe_shader_cap param)
+draw_get_shader_param_no_llvm(unsigned shader, enum pipe_shader_cap param)
 {
    switch(shader) {
    case PIPE_SHADER_VERTEX:
@@ -1051,7 +1054,7 @@ draw_get_shader_param_no_llvm(enum pipe_shader_type shader,
  * draw_get_shader_param_no_llvm instead.
  */
 int
-draw_get_shader_param(enum pipe_shader_type shader, enum pipe_shader_cap param)
+draw_get_shader_param(unsigned shader, enum pipe_shader_cap param)
 {
 
 #ifdef HAVE_LLVM
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_context.h b/lib/mesa/src/gallium/auxiliary/draw/draw_context.h
index d8a1470e9..145fc2ed4 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_context.h
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_context.h
@@ -152,17 +152,17 @@ draw_total_gs_outputs(const struct draw_context *draw);
 
 void
 draw_texture_sampler(struct draw_context *draw,
-                     enum pipe_shader_type shader_type,
+                     uint shader_type,
                      struct tgsi_sampler *sampler);
 
 void
 draw_image(struct draw_context *draw,
-           enum pipe_shader_type shader_type,
+           uint shader_type,
            struct tgsi_image *image);
 
 void
 draw_buffer(struct draw_context *draw,
-           enum pipe_shader_type shader_type,
+           uint shader_type,
            struct tgsi_buffer *buffer);
 
 void
@@ -178,7 +178,7 @@ draw_set_samplers(struct draw_context *draw,
 
 void
 draw_set_mapped_texture(struct draw_context *draw,
-                        enum pipe_shader_type shader_stage,
+                        unsigned shader_stage,
                         unsigned sview_idx,
                         uint32_t width, uint32_t height, uint32_t depth,
                         uint32_t first_level, uint32_t last_level,
@@ -249,7 +249,7 @@ void draw_set_mapped_vertex_buffer(struct draw_context *draw,
 
 void
 draw_set_mapped_constant_buffer(struct draw_context *draw,
-                                enum pipe_shader_type shader_type,
+                                unsigned shader_type,
                                 unsigned slot,
                                 const void *buffer,
                                 unsigned size);
@@ -299,11 +299,10 @@ boolean draw_need_pipeline(const struct draw_context *draw,
                            unsigned prim );
 
 int
-draw_get_shader_param(enum pipe_shader_type shader, enum pipe_shader_cap param);
+draw_get_shader_param(unsigned shader, enum pipe_shader_cap param);
 
 int
-draw_get_shader_param_no_llvm(enum pipe_shader_type shader,
-                              enum pipe_shader_cap param);
+draw_get_shader_param_no_llvm(unsigned shader, enum pipe_shader_cap param);
 
 boolean
 draw_get_option_use_llvm(void);
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c b/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c
index 203572010..2f82d9dee 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.c
@@ -33,8 +33,6 @@
 
 #include "gallivm/lp_bld_arit.h"
 #include "gallivm/lp_bld_arit_overflow.h"
-#include "gallivm/lp_bld_bitarit.h"
-#include "gallivm/lp_bld_gather.h"
 #include "gallivm/lp_bld_logic.h"
 #include "gallivm/lp_bld_const.h"
 #include "gallivm/lp_bld_swizzle.h"
@@ -63,7 +61,8 @@
 
 
 static void
-draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
+draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var,
+                   boolean elts);
 
 
 struct draw_gs_llvm_iface {
@@ -352,9 +351,9 @@ create_jit_vertex_buffer_type(struct gallivm_state *gallivm,
    LLVMTypeRef elem_types[4];
    LLVMTypeRef vb_type;
 
-   elem_types[0] = LLVMInt16TypeInContext(gallivm->context);
-   elem_types[1] = LLVMInt8TypeInContext(gallivm->context);
-   elem_types[2] = LLVMInt32TypeInContext(gallivm->context);
+   elem_types[0] =
+   elem_types[1] = LLVMInt32TypeInContext(gallivm->context);
+   elem_types[2] =
    elem_types[3] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
 
    vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
@@ -363,12 +362,8 @@ create_jit_vertex_buffer_type(struct gallivm_state *gallivm,
    (void) target; /* silence unused var warning for non-debug build */
    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
                           target, vb_type, 0);
-   LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, is_user_buffer,
-                          target, vb_type, 1);
    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
-                          target, vb_type, 2);
-   LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer.resource,
-                          target, vb_type, 3);
+                          target, vb_type, 1);
 
    LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type);
 
@@ -580,13 +575,17 @@ draw_llvm_create_variant(struct draw_llvm *llvm,
 
    variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
 
-   draw_llvm_generate(llvm, variant);
+   draw_llvm_generate(llvm, variant, FALSE);  /* linear */
+   draw_llvm_generate(llvm, variant, TRUE);   /* elts */
 
    gallivm_compile_module(variant->gallivm);
 
    variant->jit_func = (draw_jit_vert_func)
          gallivm_jit_function(variant->gallivm, variant->function);
 
+   variant->jit_func_elts = (draw_jit_vert_func_elts)
+         gallivm_jit_function(variant->gallivm, variant->function_elts);
+
    gallivm_free_ir(variant->gallivm);
 
    variant->list_item_global.base = variant;
@@ -657,134 +656,101 @@ generate_vs(struct draw_llvm_variant *variant,
    }
 }
 
-
 static void
-fetch_instanced(struct gallivm_state *gallivm,
-                const struct util_format_description *format_desc,
-                struct lp_type vs_type,
-                LLVMValueRef vb_stride,
-                LLVMValueRef map_ptr,
-                LLVMValueRef buffer_size_adj,
-                LLVMValueRef *inputs,
-                LLVMValueRef index)
+generate_fetch(struct gallivm_state *gallivm,
+               struct draw_context *draw,
+               const struct util_format_description *format_desc,
+               LLVMValueRef vb_stride,
+               LLVMValueRef stride_fixed,
+               LLVMValueRef map_ptr,
+               LLVMValueRef buffer_size_adj,
+               LLVMValueRef ofbit,
+               LLVMValueRef *res,
+               LLVMValueRef index)
 {
-   LLVMTypeRef i32_t = LLVMInt32TypeInContext(gallivm->context);
-   LLVMTypeRef aosf_t, aosi_t;
-   LLVMValueRef zero = LLVMConstNull(i32_t);
+   LLVMValueRef zero = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
    LLVMBuilderRef builder = gallivm->builder;
-   LLVMValueRef stride, buffer_overflowed, aos, index_valid;
-   unsigned i;
+   LLVMValueRef stride;
+   LLVMValueRef buffer_overflowed;
+   LLVMValueRef temp_ptr =
+      lp_build_alloca(gallivm,
+                      lp_build_vec_type(gallivm, lp_float32_vec4_type()), "");
+   struct lp_build_if_state if_ctx;
 
-   aosf_t = lp_build_vec_type(gallivm, lp_float32_vec4_type());
-   aosi_t = lp_build_vec_type(gallivm, lp_int32_vec4_type());
+   if (format_desc->format == PIPE_FORMAT_NONE) {
+      *res = lp_build_const_vec(gallivm, lp_float32_vec4_type(), 0);
+      return;
+   }
 
-   /* This mul can overflow. Wraparound is ok. */
-   stride = LLVMBuildMul(builder, vb_stride, index, "");
+   stride = lp_build_umul_overflow(gallivm, vb_stride, index, &ofbit);
+   stride = lp_build_uadd_overflow(gallivm, stride, stride_fixed, &ofbit);
 
-   buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
+   buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
                                      stride, buffer_size_adj,
                                      "buffer_overflowed");
+   buffer_overflowed = LLVMBuildOr(builder, buffer_overflowed, ofbit, "");
 
    if (0) {
-      lp_build_print_value(gallivm, "   instance index = ", index);
+      lp_build_printf(gallivm, "   stride = %u\n", stride);
+      lp_build_printf(gallivm, "   buffer size adj = %u\n", buffer_size_adj);
       lp_build_print_value(gallivm, "   buffer overflowed = ", buffer_overflowed);
    }
 
-   index_valid = LLVMBuildNot(builder, buffer_overflowed, "");
-   index_valid = LLVMBuildSExt(builder, index_valid, i32_t, "");
-   stride = LLVMBuildAnd(builder, stride, index_valid, "");
-
-   aos = lp_build_fetch_rgba_aos(gallivm,
-                                 format_desc,
-                                 lp_float32_vec4_type(),
-                                 FALSE,
-                                 map_ptr,
-                                 stride, zero, zero,
-                                 NULL);
-
-   index_valid = lp_build_broadcast(gallivm, aosi_t, index_valid);
-   aos = LLVMBuildBitCast(builder, aos, aosi_t, "");
-   aos = LLVMBuildAnd(builder, aos, index_valid, "");
-   aos = LLVMBuildBitCast(builder, aos, aosf_t, "");
-
-   for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
-      LLVMValueRef index = lp_build_const_int32(gallivm, i);
-      inputs[i] = lp_build_extract_broadcast(gallivm,
-                                             lp_float32_vec4_type(),
-                                             vs_type, aos, index);
+   lp_build_if(&if_ctx, gallivm, buffer_overflowed);
+   {
+      LLVMValueRef val =
+         lp_build_const_vec(gallivm, lp_float32_vec4_type(), 0);
+      LLVMBuildStore(builder, val, temp_ptr);
    }
-}
+   lp_build_else(&if_ctx);
+   {
+      LLVMValueRef val;
+      map_ptr = LLVMBuildGEP(builder, map_ptr, &stride, 1, "");
+
+      val = lp_build_fetch_rgba_aos(gallivm,
+                                    format_desc,
+                                    lp_float32_vec4_type(),
+                                    FALSE,
+                                    map_ptr,
+                                    zero, zero, zero,
+                                    NULL);
+      LLVMBuildStore(builder, val, temp_ptr);
+   }
+   lp_build_endif(&if_ctx);
 
+   *res = LLVMBuildLoad(builder, temp_ptr, "aos");
+}
 
 static void
-fetch_vector(struct gallivm_state *gallivm,
-             const struct util_format_description *format_desc,
-             struct lp_type vs_type,
-             LLVMValueRef vb_stride,
-             LLVMValueRef map_ptr,
-             LLVMValueRef buffer_size_adj,
-             LLVMValueRef *inputs,
-             LLVMValueRef indices)
+convert_to_soa(struct gallivm_state *gallivm,
+               LLVMValueRef src_aos[LP_MAX_VECTOR_WIDTH / 32],
+               LLVMValueRef (*dst_soa)[TGSI_NUM_CHANNELS],
+               unsigned attrib, const struct lp_type soa_type)
 {
-   LLVMBuilderRef builder = gallivm->builder;
-   struct lp_build_context blduivec;
-   struct lp_type fetch_type = vs_type;
-   LLVMValueRef offset, valid_mask;
-   unsigned i;
-
-   lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
+   unsigned j, k;
+   struct lp_type aos_channel_type = soa_type;
 
-   vb_stride = lp_build_broadcast_scalar(&blduivec, vb_stride);
-   buffer_size_adj = lp_build_broadcast_scalar(&blduivec, buffer_size_adj);
+   LLVMValueRef aos_channels[TGSI_NUM_CHANNELS];
+   unsigned pixels_per_channel = soa_type.length / TGSI_NUM_CHANNELS;
 
-   /* This mul can overflow. Wraparound is ok. */
-   offset = lp_build_mul(&blduivec, vb_stride, indices);
+   debug_assert(TGSI_NUM_CHANNELS == 4);
+   debug_assert((soa_type.length % TGSI_NUM_CHANNELS) == 0);
 
-   valid_mask = lp_build_compare(gallivm, blduivec.type,
-                                 PIPE_FUNC_LESS, offset, buffer_size_adj);
+   aos_channel_type.length >>= 1;
 
-   /* not valid elements use offset 0 */
-   offset = LLVMBuildAnd(builder, offset, valid_mask, "");
+   for (j = 0; j < TGSI_NUM_CHANNELS; ++j) {
+      LLVMValueRef channel[LP_MAX_VECTOR_LENGTH] = { 0 };
 
-   if (0) {
-      lp_build_print_value(gallivm, "   indices = ", indices);
-      lp_build_print_value(gallivm, "   offsets = ", offset);
-      lp_build_print_value(gallivm, "   valid_mask = ", valid_mask);
-   }
-
-   /*
-    * Unlike fetch_instanced, use SoA fetch instead of multiple AoS fetches.
-    * This should always produce better code.
-    */
+      assert(pixels_per_channel <= LP_MAX_VECTOR_LENGTH);
 
-   /* The type handling is annoying here... */
-   if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
-       format_desc->channel[0].pure_integer) {
-      if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
-         fetch_type = lp_type_int_vec(vs_type.width, vs_type.width * vs_type.length);
-      }
-      else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
-         fetch_type = lp_type_uint_vec(vs_type.width, vs_type.width * vs_type.length);
+      for (k = 0; k < pixels_per_channel; ++k) {
+         channel[k] = src_aos[j + TGSI_NUM_CHANNELS * k];
       }
-   }
 
-   lp_build_fetch_rgba_soa(gallivm, format_desc,
-                           fetch_type, FALSE, map_ptr, offset,
-                           blduivec.zero, blduivec.zero,
-                           NULL, inputs);
-
-   for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
-      inputs[i] = LLVMBuildBitCast(builder, inputs[i],
-                                   lp_build_vec_type(gallivm, vs_type), "");
+      aos_channels[j] = lp_build_concat(gallivm, channel, aos_channel_type, pixels_per_channel);
    }
 
-   /* out-of-bound fetches return all zeros */
-   for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
-      inputs[i] = LLVMBuildBitCast(builder, inputs[i], blduivec.vec_type, "");
-      inputs[i] = LLVMBuildAnd(builder, inputs[i], valid_mask, "");
-      inputs[i] = LLVMBuildBitCast(builder, inputs[i],
-                                   lp_build_vec_type(gallivm, vs_type), "");
-   }
+   lp_build_transpose_aos(gallivm, soa_type, aos_channels, dst_soa[attrib]);
 }
 
 
@@ -1336,21 +1302,20 @@ generate_clipmask(struct draw_llvm *llvm,
 
 /**
  * Returns boolean if any clipping has occurred
- * Used zero/one i8 value to represent boolean
+ * Used zero/non-zero i32 value to represent boolean
  */
 static LLVMValueRef
-clipmask_booli8(struct gallivm_state *gallivm,
-                const struct lp_type vs_type,
-                LLVMValueRef clipmask_bool_ptr,
-                boolean edgeflag_in_clipmask)
+clipmask_booli32(struct gallivm_state *gallivm,
+                 const struct lp_type vs_type,
+                 LLVMValueRef clipmask_bool_ptr,
+                 boolean edgeflag_in_clipmask)
 {
    LLVMBuilderRef builder = gallivm->builder;
-   LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
+   LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
    LLVMValueRef clipmask_bool = LLVMBuildLoad(builder, clipmask_bool_ptr, "");
-   LLVMValueRef ret;
-   struct lp_build_context bldivec;
-
-   lp_build_context_init(&bldivec, gallivm, lp_int_type(vs_type));
+   LLVMValueRef ret = LLVMConstNull(int32_type);
+   LLVMValueRef temp;
+   int i;
 
    /*
     * We need to invert the edgeflag bit from the clipmask here
@@ -1358,20 +1323,19 @@ clipmask_booli8(struct gallivm_state *gallivm,
     * and we (may) need it if edgeflag was 0).
     */
    if (edgeflag_in_clipmask) {
-      LLVMValueRef edge = lp_build_const_int_vec(gallivm, bldivec.type,
+      struct lp_type i32_type = lp_int_type(vs_type);
+      LLVMValueRef edge = lp_build_const_int_vec(gallivm, i32_type,
                                                  1LL << DRAW_TOTAL_CLIP_PLANES);
       clipmask_bool = LLVMBuildXor(builder, clipmask_bool, edge, "");
    }
-
    /*
-    * XXX: probably should mask off bits from the mask which come from
-    * vertices which were beyond the count (i.e. indices_valid for
-    * linear fetches, for elts ones we don't have the correct mask
-    * right now). Otherwise might run the pipeline for nothing,
-    * though everything should still work.
+    * Could do much better with just cmp/movmskps.
     */
-   ret = lp_build_any_true_range(&bldivec, vs_type.length, clipmask_bool);
-   ret = LLVMBuildZExt(builder, ret, int8_type, "");
+   for (i=0; i < vs_type.length; i++) {
+      temp = LLVMBuildExtractElement(builder, clipmask_bool,
+                                     lp_build_const_int32(gallivm, i) , "");
+      ret = LLVMBuildOr(builder, ret, temp, "");
+   }
    return ret;
 }
 
@@ -1518,38 +1482,43 @@ draw_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base,
 }
 
 static void
-draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
+draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
+                   boolean elts)
 {
    struct gallivm_state *gallivm = variant->gallivm;
    LLVMContextRef context = gallivm->context;
    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
    LLVMTypeRef arg_types[11];
-   unsigned num_arg_types = ARRAY_SIZE(arg_types);
+   unsigned num_arg_types =
+      elts ? ARRAY_SIZE(arg_types) : ARRAY_SIZE(arg_types) - 1;
    LLVMTypeRef func_type;
    LLVMValueRef context_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
    char func_name[64];
    struct lp_type vs_type;
-   LLVMValueRef count, fetch_elts, start_or_maxelt;
+   LLVMValueRef end, start;
+   LLVMValueRef count, fetch_elts, fetch_elt_max, fetch_count;
    LLVMValueRef vertex_id_offset, start_instance;
    LLVMValueRef stride, step, io_itr;
-   LLVMValueRef ind_vec, start_vec, have_elts, fetch_max, tmp;
    LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
-   LLVMValueRef vb_stride[PIPE_MAX_ATTRIBS];
-   LLVMValueRef map_ptr[PIPE_MAX_ATTRIBS];
-   LLVMValueRef buffer_size_adj[PIPE_MAX_ATTRIBS];
-   LLVMValueRef instance_index[PIPE_MAX_ATTRIBS];
-   LLVMValueRef fake_buf_ptr, fake_buf;
+   LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
+   LLVMValueRef one = lp_build_const_int32(gallivm, 1);
+   LLVMValueRef vb_stride[PIPE_MAX_SHADER_INPUTS];
+   LLVMValueRef map_ptr[PIPE_MAX_SHADER_INPUTS];
+   LLVMValueRef buffer_size_adj[PIPE_MAX_SHADER_INPUTS];
+   LLVMValueRef stride_fixed[PIPE_MAX_SHADER_INPUTS];
+   LLVMValueRef ofbit[PIPE_MAX_SHADER_INPUTS];
+   LLVMValueRef instance_index[PIPE_MAX_SHADER_INPUTS];
 
    struct draw_context *draw = llvm->draw;
    const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
    unsigned i, j;
-   struct lp_build_context bld, blduivec;
+   struct lp_build_context bld;
    struct lp_build_loop_state lp_loop;
-   struct lp_build_if_state if_ctx;
    const int vector_length = lp_native_vector_width / 32;
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
+   LLVMValueRef fetch_max;
    struct lp_build_sampler_soa *sampler = 0;
    LLVMValueRef ret, clipmask_bool_ptr;
    struct draw_llvm_variant_key *key = &variant->key;
@@ -1561,80 +1530,93 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
     * the values).
     */
    const boolean bypass_viewport = key->has_gs || key->bypass_viewport ||
-                                   vs_info->writes_viewport_index;
+                                   llvm->draw->vs.vertex_shader->info.writes_viewport_index;
    const boolean enable_cliptest = !key->has_gs && (key->clip_xy ||
                                                     key->clip_z ||
                                                     key->clip_user ||
                                                     key->need_edgeflags);
    LLVMValueRef variant_func;
-   const unsigned pos = draw->vs.position_output;
-   const unsigned cv = draw->vs.clipvertex_output;
+   const unsigned pos = llvm->draw->vs.position_output;
+   const unsigned cv = llvm->draw->vs.clipvertex_output;
    boolean have_clipdist = FALSE;
    struct lp_bld_tgsi_system_values system_values;
 
    memset(&system_values, 0, sizeof(system_values));
 
-   util_snprintf(func_name, sizeof(func_name), "draw_llvm_vs_variant%u",
-                 variant->shader->variants_cached);
+   util_snprintf(func_name, sizeof(func_name), "draw_llvm_vs_variant%u_%s",
+                 variant->shader->variants_cached, elts ? "elts" : "linear");
 
    i = 0;
    arg_types[i++] = get_context_ptr_type(variant);       /* context */
    arg_types[i++] = get_vertex_header_ptr_type(variant); /* vertex_header */
    arg_types[i++] = get_buffer_ptr_type(variant);        /* vbuffers */
-   arg_types[i++] = int32_type;                          /* count */
-   arg_types[i++] = int32_type;                          /* start/fetch_elt_max */
-   arg_types[i++] = int32_type;                          /* stride */
-   arg_types[i++] = get_vb_ptr_type(variant);            /* pipe_vertex_buffer's */
-   arg_types[i++] = int32_type;                          /* instance_id */
-   arg_types[i++] = int32_type;                          /* vertex_id_offset */
-   arg_types[i++] = int32_type;                          /* start_instance */
-   arg_types[i++] = LLVMPointerType(int32_type, 0);      /* fetch_elts  */
-
-   func_type = LLVMFunctionType(LLVMInt8TypeInContext(context),
-                                arg_types, num_arg_types, 0);
+   if (elts) {
+      arg_types[i++] = LLVMPointerType(int32_type, 0);/* fetch_elts  */
+      arg_types[i++] = int32_type;                  /* fetch_elt_max */
+   } else
+      arg_types[i++] = int32_type;                  /* start */
+   arg_types[i++] = int32_type;                     /* fetch_count / count */
+   arg_types[i++] = int32_type;                     /* stride */
+   arg_types[i++] = get_vb_ptr_type(variant);       /* pipe_vertex_buffer's */
+   arg_types[i++] = int32_type;                     /* instance_id */
+   arg_types[i++] = int32_type;                     /* vertex_id_offset */
+   arg_types[i++] = int32_type;                     /* start_instance */
+
+   func_type = LLVMFunctionType(int32_type, arg_types, num_arg_types, 0);
 
    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
-   variant->function = variant_func;
+
+   if (elts)
+      variant->function_elts = variant_func;
+   else
+      variant->function = variant_func;
 
    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
    for (i = 0; i < num_arg_types; ++i)
       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
-         lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
+         LLVMAddAttribute(LLVMGetParam(variant_func, i),
+                          LLVMNoAliasAttribute);
 
    context_ptr               = LLVMGetParam(variant_func, 0);
    io_ptr                    = LLVMGetParam(variant_func, 1);
    vbuffers_ptr              = LLVMGetParam(variant_func, 2);
-   count                     = LLVMGetParam(variant_func, 3);
-   /*
-    * XXX: the maxelt part is unused. Not really useful, since we cannot
-    * get index buffer overflows due to vsplit (which provides its own
-    * elts buffer, with a different size than what's passed in here).
-    */
-   start_or_maxelt           = LLVMGetParam(variant_func, 4);
    /*
     * XXX: stride is actually unused. The stride we use is strictly calculated
     * from the number of outputs (including the draw_extra outputs).
     * Should probably fix some day (we need a new vs just because of extra
     * outputs which the generated vs won't touch).
     */
-   stride                    = LLVMGetParam(variant_func, 5);
-   vb_ptr                    = LLVMGetParam(variant_func, 6);
-   system_values.instance_id = LLVMGetParam(variant_func, 7);
-   vertex_id_offset          = LLVMGetParam(variant_func, 8);
-   start_instance            = LLVMGetParam(variant_func, 9);
-   fetch_elts                = LLVMGetParam(variant_func, 10);
+   stride                    = LLVMGetParam(variant_func, 5 + (elts ? 1 : 0));
+   vb_ptr                    = LLVMGetParam(variant_func, 6 + (elts ? 1 : 0));
+   system_values.instance_id = LLVMGetParam(variant_func, 7 + (elts ? 1 : 0));
+   vertex_id_offset          = LLVMGetParam(variant_func, 8 + (elts ? 1 : 0));
+   start_instance            = LLVMGetParam(variant_func, 9 + (elts ? 1 : 0));
 
    lp_build_name(context_ptr, "context");
    lp_build_name(io_ptr, "io");
    lp_build_name(vbuffers_ptr, "vbuffers");
-   lp_build_name(count, "count");
-   lp_build_name(start_or_maxelt, "start_or_maxelt");
    lp_build_name(stride, "stride");
    lp_build_name(vb_ptr, "vb");
    lp_build_name(system_values.instance_id, "instance_id");
    lp_build_name(vertex_id_offset, "vertex_id_offset");
    lp_build_name(start_instance, "start_instance");
-   lp_build_name(fetch_elts, "fetch_elts");
+
+   if (elts) {
+      fetch_elts    = LLVMGetParam(variant_func, 3);
+      fetch_elt_max = LLVMGetParam(variant_func, 4);
+      fetch_count   = LLVMGetParam(variant_func, 5);
+      lp_build_name(fetch_elts, "fetch_elts");
+      lp_build_name(fetch_elt_max, "fetch_elt_max");
+      lp_build_name(fetch_count, "fetch_count");
+      start = count = NULL;
+   }
+   else {
+      start        = LLVMGetParam(variant_func, 3);
+      count        = LLVMGetParam(variant_func, 4);
+      lp_build_name(start, "start");
+      lp_build_name(count, "count");
+      fetch_elts = fetch_count = NULL;
+   }
 
    /*
     * Function body
@@ -1644,6 +1626,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
    builder = gallivm->builder;
    LLVMPositionBuilderAtEnd(builder, block);
 
+   lp_build_context_init(&bld, gallivm, lp_type_int(32));
+
    memset(&vs_type, 0, sizeof vs_type);
    vs_type.floating = TRUE; /* floating point values */
    vs_type.sign = TRUE;     /* values are signed */
@@ -1651,143 +1635,87 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
    vs_type.width = 32;      /* 32-bit float */
    vs_type.length = vector_length;
 
-   lp_build_context_init(&bld, gallivm, lp_type_uint(32));
-   lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
-
    /* hold temporary "bool" clipmask */
-   clipmask_bool_ptr = lp_build_alloca(gallivm, blduivec.vec_type, "");
-
-   fake_buf = lp_build_alloca_undef(gallivm,
-                 LLVMVectorType(LLVMInt64TypeInContext(context), 4), "");
-   fake_buf = LLVMBuildBitCast(builder, fake_buf,
-                 LLVMPointerType(LLVMInt8TypeInContext(context), 0), "");
-   fake_buf_ptr = LLVMBuildGEP(builder, fake_buf, &bld.zero, 1, "");
+   clipmask_bool_ptr = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, vs_type), "");
+   LLVMBuildStore(builder, lp_build_zero(gallivm, lp_int_type(vs_type)), clipmask_bool_ptr);
 
    /* code generated texture sampling */
    sampler = draw_llvm_sampler_soa_create(draw_llvm_variant_key_samplers(key));
 
-   step = lp_build_const_int32(gallivm, vector_length);
-
-   ind_vec = blduivec.undef;
-   for (i = 0; i < vs_type.length; i++) {
-      LLVMValueRef index = lp_build_const_int32(gallivm, i);
-      ind_vec = LLVMBuildInsertElement(builder, ind_vec, index, index, "");
+   if (elts) {
+      start = zero;
+      end = fetch_count;
+      count = fetch_count;
+   }
+   else {
+      end = lp_build_add(&bld, start, count);
    }
 
-   have_elts = LLVMBuildICmp(builder, LLVMIntNE,
-                             LLVMConstPointerNull(arg_types[10]), fetch_elts, "");
+   step = lp_build_const_int32(gallivm, vector_length);
 
-   fetch_max = LLVMBuildSub(builder, count, bld.one, "fetch_max");
-   fetch_max = lp_build_broadcast_scalar(&blduivec, fetch_max);
-   /*
-    * Only needed for non-indexed path.
-    */
-   start_vec = lp_build_broadcast_scalar(&blduivec, start_or_maxelt);
+   fetch_max = LLVMBuildSub(builder, end, one, "fetch_max");
 
    /*
     * Pre-calculate everything which is constant per shader invocation.
     */
-   for (j = 0; j < key->nr_vertex_elements; ++j) {
-      LLVMValueRef vb_buffer_offset, buffer_size, temp_ptr;
-      LLVMValueRef vb_info, vbuffer_ptr, buf_offset, ofbit;
-      struct pipe_vertex_element *velem = &key->vertex_element[j];
+   for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
+      LLVMValueRef vb_buffer_offset, buffer_size;
+      LLVMValueRef vb_info, vbuffer_ptr;
+      struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
       LLVMValueRef vb_index =
          lp_build_const_int32(gallivm, velem->vertex_buffer_index);
       LLVMValueRef bsize = lp_build_const_int32(gallivm,
                                                 util_format_get_blocksize(velem->src_format));
       LLVMValueRef src_offset = lp_build_const_int32(gallivm,
                                                      velem->src_offset);
-      struct lp_build_if_state if_ctx;
-
-      if (velem->src_format != PIPE_FORMAT_NONE) {
-         vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, &vb_index, 1, "");
-         vb_info = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, "");
-         vb_stride[j] = draw_jit_vbuffer_stride(gallivm, vb_info);
-         vb_stride[j] = LLVMBuildZExt(gallivm->builder, vb_stride[j],
-                                      LLVMInt32TypeInContext(context), "");
-         vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vb_info);
-         map_ptr[j] = draw_jit_dvbuffer_map(gallivm, vbuffer_ptr);
-         buffer_size = draw_jit_dvbuffer_size(gallivm, vbuffer_ptr);
-
-         ofbit = NULL;
-         /*
-          * We'll set buffer_size_adj to zero if we have of, so it will
-          * always overflow later automatically without having to keep ofbit.
-          * Overflows (with normal wraparound) doing the actual offset
-          * calculation should be ok, just not for the buffer size calc.
-          * It would also be possible to detect such overflows and return
-          * zeros if that happens, but this would be more complex.
-          */
-         buf_offset = lp_build_add(&bld, vb_buffer_offset, src_offset);
-         tmp = lp_build_sub(&bld, bsize, bld.one);
-         buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size, tmp,
-                                                     &ofbit);
-         buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size_adj[j],
-                                                     buf_offset, &ofbit);
-
-         /*
-          * We can't easily set fake vertex buffers outside the generated code.
-          * Hence, set fake vertex buffers here instead basically, so fetch
-          * code can always fetch using offset 0, eliminating all control flow
-          * inside the main loop.
-          * (Alternatively, could have control flow per vector skipping fetch
-          * if ofbit is true.)
-          */
-         if (velem->instance_divisor) {
-            /*
-             * Index is equal to the start instance plus the number of current
-             * instance divided by the divisor. In this case we compute it as:
-             * index = start_instance + (instance_id  / divisor).
-             * Note we could actually do the fetch here, outside the loop -
-             * it's all constant, hopefully llvm recognizes this.
-             */
-            LLVMValueRef current_instance;
-            current_instance = LLVMBuildUDiv(builder, system_values.instance_id,
-                                             lp_build_const_int32(gallivm,
-                                                                  velem->instance_divisor),
-                                             "instance_divisor");
-            instance_index[j] = lp_build_uadd_overflow(gallivm, start_instance,
-                                                       current_instance, &ofbit);
-         }
-
-         buffer_size_adj[j] = LLVMBuildSelect(builder, ofbit, bld.zero,
-                                              buffer_size_adj[j], "");
 
-         temp_ptr = lp_build_alloca_undef(gallivm,
-                       LLVMPointerType(LLVMInt8TypeInContext(context), 0), "");
+      vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, &vb_index, 1, "");
+      vb_info = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, "");
+      vb_stride[j] = draw_jit_vbuffer_stride(gallivm, vb_info);
+      vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vb_info);
+      map_ptr[j] = draw_jit_dvbuffer_map(gallivm, vbuffer_ptr);
+      buffer_size = draw_jit_dvbuffer_size(gallivm, vbuffer_ptr);
+
+      ofbit[j] = NULL;
+      stride_fixed[j] = lp_build_uadd_overflow(gallivm, vb_buffer_offset,
+                                               src_offset, &ofbit[j]);
+      buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size, bsize,
+                                                   &ofbit[j]);
+
+      if (velem->instance_divisor) {
+         /* Index is equal to the start instance plus the number of current 
+          * instance divided by the divisor. In this case we compute it as:
+          * index = start_instance + (instance_id  / divisor)
+          */
+         LLVMValueRef current_instance;
+         current_instance = LLVMBuildUDiv(builder, system_values.instance_id,
+                                          lp_build_const_int32(gallivm,
+                                                               velem->instance_divisor),
+                                          "instance_divisor");
+         instance_index[j] = lp_build_uadd_overflow(gallivm, start_instance,
+                                                    current_instance, &ofbit[j]);
+      }
 
-         lp_build_if(&if_ctx, gallivm, ofbit);
-         {
-            LLVMBuildStore(builder, fake_buf_ptr, temp_ptr);
-         }
-         lp_build_else(&if_ctx);
-         {
-            map_ptr[j] = LLVMBuildGEP(builder, map_ptr[j], &buf_offset, 1, "");
-            LLVMBuildStore(builder, map_ptr[j], temp_ptr);
-         }
-         lp_build_endif(&if_ctx);
-         map_ptr[j] = LLVMBuildLoad(builder, temp_ptr, "map_ptr");
-
-         if (0) {
-            lp_build_printf(gallivm, "velem %d, vbuf index = %u, vb_stride = %u\n",
-                            lp_build_const_int32(gallivm, j),
-                            vb_index, vb_stride[j]);
-            lp_build_printf(gallivm,
-                            "   vb_buffer_offset = %u, src_offset = %u, buf_offset = %u\n",
-                            vb_buffer_offset, src_offset, buf_offset);
-            lp_build_printf(gallivm, "   buffer size = %u, blocksize = %u\n",
-                            buffer_size, bsize);
-            lp_build_printf(gallivm, "   instance_id = %u\n", system_values.instance_id);
-         }
+      if (0) {
+         lp_build_printf(gallivm, "vbuf index = %u, vb_stride is %u\n",
+                         vb_index, vb_stride[j]);
+         lp_build_printf(gallivm, "   vb_buffer_offset = %u, src_offset is %u\n",
+                         vb_buffer_offset, src_offset);
+         lp_build_print_value(gallivm, "   blocksize = ", bsize);
+         lp_build_printf(gallivm, "   instance_id = %u\n", system_values.instance_id);
+         lp_build_printf(gallivm, "   buffer size = %u\n", buffer_size);
       }
    }
 
-   lp_build_loop_begin(&lp_loop, gallivm, bld.zero);
+   lp_build_loop_begin(&lp_loop, gallivm, zero);
    {
       LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
+      LLVMValueRef aos_attribs[LP_MAX_VECTOR_WIDTH / 32] = { 0 };
       LLVMValueRef io;
       LLVMValueRef clipmask;   /* holds the clipmask value */
-      LLVMValueRef true_index_array, index_store;
+      LLVMValueRef true_index_array = lp_build_zero(gallivm,
+                                                    lp_type_uint_vec(32, 32*vector_length));
+      LLVMValueRef true_indices[LP_MAX_VECTOR_WIDTH / 32];
       const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];
 
       io_itr = lp_loop.counter;
@@ -1798,86 +1726,73 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
                       io_itr, io, lp_loop.counter);
 #endif
 
-      true_index_array = lp_build_broadcast_scalar(&blduivec, lp_loop.counter);
-      true_index_array = LLVMBuildAdd(builder, true_index_array, ind_vec, "");
-
-      /*
-       * Limit indices to fetch_max, otherwise might try to access indices
-       * beyond index buffer (or rather vsplit elt buffer) size.
-       * Could probably safely (?) skip this for non-indexed draws and
-       * simplify things minimally (by removing it could combine the ind_vec
-       * and start_vec adds). I think the only effect for non-indexed draws will
-       * be that for the invalid elements they will be all fetched from the
-       * same location as the last valid one, but noone should really care.
-       */
-      true_index_array = lp_build_min(&blduivec, true_index_array, fetch_max);
-
-      index_store = lp_build_alloca_undef(gallivm, blduivec.vec_type, "index_store");
-
-      lp_build_if(&if_ctx, gallivm, have_elts);
-      {
-         /*
-          * Note: you'd expect some comparison/clamp against fetch_elt_max
-          * here.
-          * There used to be one here but it was incorrect: overflow was
-          * detected if index > fetch_elt_max - but the correct condition
-          * would be index >= fetch_elt_max (since this is just size of elts
-          * buffer / element size).
-          * Using the correct condition however will cause failures - due to
-          * vsplit/vcache code which rebases indices. So, as an example, if
-          * fetch_elt_max is just 1 and fetch_count 2, vsplit cache will
-          * replace all invalid indices with 0 - which in case of elt_bias
-          * not being zero will get a different fetch index than the valid
-          * index 0. So, just rely on vsplit code preventing out-of-bounds
-          * fetches. This is also why it's safe to do elts fetch even if there
-          * was no index buffer bound - the real buffer is never seen here, at
-          * least not if there are index buffer overflows...
-          */
-
-         /*
-          * XXX should not have to do this, as scale can be handled
-          * natively by loads (hits asserts though).
-          */
-         tmp = lp_build_shl_imm(&blduivec, true_index_array, 2);
-         fetch_elts = LLVMBuildBitCast(builder, fetch_elts,
-                                       LLVMPointerType(LLVMInt8TypeInContext(context),
-                                                       0), "");
-         tmp = lp_build_gather(gallivm, vs_type.length,
-                               32, bld.type, TRUE,
-                               fetch_elts, tmp, FALSE);
-         LLVMBuildStore(builder, tmp, index_store);
-      }
-      lp_build_else(&if_ctx);
-      {
-         tmp = LLVMBuildAdd(builder, true_index_array, start_vec, "");
-         LLVMBuildStore(builder, tmp, index_store);
+      for (i = 0; i < vector_length; ++i) {
+         LLVMValueRef vert_index =
+            LLVMBuildAdd(builder,
+                         lp_loop.counter,
+                         lp_build_const_int32(gallivm, i), "");
+         LLVMValueRef true_index =
+            LLVMBuildAdd(builder, start, vert_index, "");
+
+         /* make sure we're not out of bounds which can happen
+          * if fetch_count % 4 != 0, because on the last iteration
+          * a few of the 4 vertex fetches will be out of bounds */
+         true_index = lp_build_min(&bld, true_index, fetch_max);
+
+         if (elts) {
+            LLVMValueRef fetch_ptr;
+            LLVMValueRef index_overflowed;
+            LLVMValueRef index_ptr =
+               lp_build_alloca(
+                  gallivm,
+                  lp_build_vec_type(gallivm, lp_type_int(32)), "");
+            struct lp_build_if_state if_ctx;
+            index_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
+                                             true_index, fetch_elt_max,
+                                             "index_overflowed");
+
+            lp_build_if(&if_ctx, gallivm, index_overflowed);
+            {
+               /* Generate maximum possible index so that
+                * generate_fetch can treat it just like
+                * any other overflow and return zeros.
+                * We don't have to worry about the restart
+                * primitive index because it has already been 
+                * handled
+                */
+               LLVMValueRef val =
+                  lp_build_const_int32(gallivm, 0xffffffff);
+               LLVMBuildStore(builder, val, index_ptr);
+            }
+            lp_build_else(&if_ctx);
+            {
+               LLVMValueRef val;
+               fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
+                                        &true_index, 1, "");
+               val = LLVMBuildLoad(builder, fetch_ptr, "");
+               LLVMBuildStore(builder, val, index_ptr);
+            }
+            lp_build_endif(&if_ctx);
+            true_index = LLVMBuildLoad(builder, index_ptr, "true_index");
+         }
+         true_indices[i] = true_index;
+         true_index_array = LLVMBuildInsertElement(
+            gallivm->builder, true_index_array, true_index,
+            lp_build_const_int32(gallivm, i), "");
       }
-      lp_build_endif(&if_ctx);
-
-      true_index_array = LLVMBuildLoad(builder, index_store, "");
 
       for (j = 0; j < key->nr_vertex_elements; ++j) {
-         struct pipe_vertex_element *velem = &key->vertex_element[j];
+         struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
          const struct util_format_description *format_desc =
             util_format_description(velem->src_format);
 
-         if (format_desc->format == PIPE_FORMAT_NONE) {
-            for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
-               inputs[j][i] = lp_build_zero(gallivm, vs_type);
-            }
-         }
-         else if (velem->instance_divisor) {
-            fetch_instanced(gallivm, format_desc, vs_type,
-                            vb_stride[j], map_ptr[j],
-                            buffer_size_adj[j],
-                            inputs[j], instance_index[j]);
-         }
-         else {
-            fetch_vector(gallivm, format_desc, vs_type,
-                         vb_stride[j], map_ptr[j],
-                         buffer_size_adj[j],
-                         inputs[j], true_index_array);
+         for (i = 0; i < vector_length; ++i) {
+            generate_fetch(gallivm, draw, format_desc,
+                           vb_stride[j], stride_fixed[j], map_ptr[j],
+                           buffer_size_adj[j], ofbit[j], &aos_attribs[i],
+                           velem->instance_divisor ? instance_index[j] : true_indices[i]);
          }
+         convert_to_soa(gallivm, aos_attribs, inputs, j, vs_type);
       }
 
       /* In the paths with elts vertex id has to be unaffected by the
@@ -1891,8 +1806,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
        * most 4095-vertices) we need to back out the original start
        * index out of our vertex id here.
        */
-      system_values.basevertex = lp_build_broadcast_scalar(&blduivec,
-                                                           vertex_id_offset);
+      system_values.basevertex = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm,
+                                                       lp_type_uint_vec(32, 32*vector_length)),
+                                                    vertex_id_offset);
       system_values.vertex_id = true_index_array;
       system_values.vertex_id_nobase = LLVMBuildSub(builder, true_index_array,
                                                       system_values.basevertex, "");
@@ -1927,7 +1843,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
             LLVMBuildStore(builder, temp, clipmask_bool_ptr);
          }
          else {
-            clipmask = blduivec.zero;
+            clipmask = lp_build_const_int_vec(gallivm, lp_int_type(vs_type), 0);
          }
 
          /* do viewport mapping */
@@ -1936,7 +1852,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
          }
       }
       else {
-         clipmask = blduivec.zero;
+         clipmask = lp_build_const_int_vec(gallivm, lp_int_type(vs_type), 0);
       }
 
       /* store clipmask in vertex header,
@@ -1952,8 +1868,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
    sampler->destroy(sampler);
 
    /* return clipping boolean value for function */
-   ret = clipmask_booli8(gallivm, vs_type, clipmask_bool_ptr,
-                         enable_cliptest && key->need_edgeflags);
+   ret = clipmask_booli32(gallivm, vs_type, clipmask_bool_ptr,
+                          enable_cliptest && key->need_edgeflags);
 
    LLVMBuildRet(builder, ret);
 
@@ -2072,7 +1988,7 @@ draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key)
 
 void
 draw_llvm_set_mapped_texture(struct draw_context *draw,
-                             enum pipe_shader_type shader_stage,
+                             unsigned shader_stage,
                              unsigned sview_idx,
                              uint32_t width, uint32_t height, uint32_t depth,
                              uint32_t first_level, uint32_t last_level,
@@ -2117,7 +2033,7 @@ draw_llvm_set_mapped_texture(struct draw_context *draw,
 
 void
 draw_llvm_set_sampler_state(struct draw_context *draw, 
-                            enum pipe_shader_type shader_type)
+                            unsigned shader_type)
 {
    unsigned i;
 
@@ -2273,7 +2189,8 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
 
    for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
-         lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
+         LLVMAddAttribute(LLVMGetParam(variant_func, i),
+                          LLVMNoAliasAttribute);
 
    context_ptr               = LLVMGetParam(variant_func, 0);
    input_array               = LLVMGetParam(variant_func, 1);
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.h b/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.h
index a968be01f..271433c8e 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_llvm.h
@@ -172,7 +172,7 @@ enum {
    lp_build_struct_get(_gallivm, _ptr, 0, "stride")
 
 #define draw_jit_vbuffer_offset(_gallivm, _ptr)         \
-   lp_build_struct_get(_gallivm, _ptr, 2, "buffer_offset")
+   lp_build_struct_get(_gallivm, _ptr, 1, "buffer_offset")
 
 enum {
    DRAW_JIT_DVBUFFER_MAP = 0,
@@ -261,18 +261,31 @@ enum {
 
 
 
-typedef boolean
+typedef int
 (*draw_jit_vert_func)(struct draw_jit_context *context,
                       struct vertex_header *io,
                       const struct draw_vertex_buffer vbuffers[PIPE_MAX_ATTRIBS],
+                      unsigned start,
                       unsigned count,
-                      unsigned start_or_maxelt,
                       unsigned stride,
                       struct pipe_vertex_buffer *vertex_buffers,
                       unsigned instance_id,
                       unsigned vertex_id_offset,
-                      unsigned start_instance,
-                      const unsigned *fetch_elts);
+                      unsigned start_instance);
+
+
+typedef int
+(*draw_jit_vert_func_elts)(struct draw_jit_context *context,
+                           struct vertex_header *io,
+                           const struct draw_vertex_buffer vbuffers[PIPE_MAX_ATTRIBS],
+                           const unsigned *fetch_elts,
+                           unsigned fetch_max_elt,
+                           unsigned fetch_count,
+                           unsigned stride,
+                           struct pipe_vertex_buffer *vertex_buffers,
+                           unsigned instance_id,
+                           unsigned vertex_id_offset,
+                           unsigned start_instance);
 
 
 typedef int
@@ -380,7 +393,9 @@ struct draw_llvm_variant
    LLVMTypeRef vertex_header_ptr_type;
 
    LLVMValueRef function;
+   LLVMValueRef function_elts;
    draw_jit_vert_func jit_func;
+   draw_jit_vert_func_elts jit_func_elts;
 
    struct llvm_vertex_shader *shader;
 
@@ -508,12 +523,11 @@ struct lp_build_sampler_soa *
 draw_llvm_sampler_soa_create(const struct draw_sampler_static_state *static_state);
 
 void
-draw_llvm_set_sampler_state(struct draw_context *draw,
-                            enum pipe_shader_type shader_stage);
+draw_llvm_set_sampler_state(struct draw_context *draw, unsigned shader_stage);
 
 void
 draw_llvm_set_mapped_texture(struct draw_context *draw,
-                             enum pipe_shader_type shader_stage,
+                             unsigned shader_stage,
                              unsigned sview_idx,
                              uint32_t width, uint32_t height, uint32_t depth,
                              uint32_t first_level, uint32_t last_level,
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index a859dbc02..c236caa3e 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -1,5 +1,5 @@
 /**************************************************************************
- *
+ * 
  * Copyright 2007 VMware, Inc.
  * All Rights Reserved.
  *
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- *
+ * 
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- *
+ * 
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
+ * 
  **************************************************************************/
 
 /**
@@ -285,7 +285,7 @@ aa_transform_epilog(struct tgsi_transform_context *ctx)
                               TGSI_FILE_OUTPUT, aactx->colorOutput,
                               TGSI_WRITEMASK_W,
                               TGSI_FILE_TEMPORARY, aactx->colorTemp,
-                              TGSI_FILE_TEMPORARY, aactx->texTemp, false);
+                              TGSI_FILE_TEMPORARY, aactx->texTemp);
    }
 }
 
@@ -423,9 +423,9 @@ aaline_create_texture(struct aaline_stage *aaline)
 
       assert(aaline->texture->width0 == aaline->texture->height0);
 
-      u_box_origin_2d(size, size, &box);
+      u_box_origin_2d( size, size, &box );
 
-      /* This texture is new, no need to flush.
+      /* This texture is new, no need to flush. 
        */
       data = pipe->transfer_map(pipe,
                                 aaline->texture,
@@ -502,7 +502,8 @@ bind_aaline_fragment_shader(struct aaline_stage *aaline)
    struct draw_context *draw = aaline->stage.draw;
    struct pipe_context *pipe = draw->pipe;
 
-   if (!aaline->fs->aaline_fs && !generate_aaline_fs(aaline))
+   if (!aaline->fs->aaline_fs && 
+       !generate_aaline_fs(aaline))
       return FALSE;
 
    draw->suspend_flushing = TRUE;
@@ -515,7 +516,7 @@ bind_aaline_fragment_shader(struct aaline_stage *aaline)
 
 
 static inline struct aaline_stage *
-aaline_stage(struct draw_stage *stage)
+aaline_stage( struct draw_stage *stage )
 {
    return (struct aaline_stage *) stage;
 }
@@ -572,12 +573,12 @@ aaline_line(struct draw_stage *stage, struct prim_header *header)
    pos[1] += (-dx * s_a + -dy * c_a);
 
    pos = v[2]->data[posPos];
-   pos[0] += (dx * c_a -  dy * s_a);
-   pos[1] += (dx * s_a +  dy * c_a);
+   pos[0] += ( dx * c_a -  dy * s_a);
+   pos[1] += ( dx * s_a +  dy * c_a);
 
    pos = v[3]->data[posPos];
-   pos[0] += (dx * c_a - -dy * s_a);
-   pos[1] += (dx * s_a + -dy * c_a);
+   pos[0] += ( dx * c_a - -dy * s_a);
+   pos[1] += ( dx * s_a + -dy * c_a);
 
    pos = v[4]->data[posPos];
    pos[0] += (-dx * c_a -  dy * s_a);
@@ -588,12 +589,12 @@ aaline_line(struct draw_stage *stage, struct prim_header *header)
    pos[1] += (-dx * s_a + -dy * c_a);
 
    pos = v[6]->data[posPos];
-   pos[0] += (dx * c_a -  dy * s_a);
-   pos[1] += (dx * s_a +  dy * c_a);
+   pos[0] += ( dx * c_a -  dy * s_a);
+   pos[1] += ( dx * s_a +  dy * c_a);
 
    pos = v[7]->data[posPos];
-   pos[0] += (dx * c_a - -dy * s_a);
-   pos[1] += (dx * s_a + -dy * c_a);
+   pos[0] += ( dx * c_a - -dy * s_a);
+   pos[1] += ( dx * s_a + -dy * c_a);
 
    /* new texcoords */
    tex = v[0]->data[texPos];
@@ -622,22 +623,22 @@ aaline_line(struct draw_stage *stage, struct prim_header *header)
 
    /* emit 6 tris for the quad strip */
    tri.v[0] = v[2];  tri.v[1] = v[1];  tri.v[2] = v[0];
-   stage->next->tri(stage->next, &tri);
+   stage->next->tri( stage->next, &tri );
 
    tri.v[0] = v[3];  tri.v[1] = v[1];  tri.v[2] = v[2];
-   stage->next->tri(stage->next, &tri);
+   stage->next->tri( stage->next, &tri );
 
    tri.v[0] = v[4];  tri.v[1] = v[3];  tri.v[2] = v[2];
-   stage->next->tri(stage->next, &tri);
+   stage->next->tri( stage->next, &tri );
 
    tri.v[0] = v[5];  tri.v[1] = v[3];  tri.v[2] = v[4];
-   stage->next->tri(stage->next, &tri);
+   stage->next->tri( stage->next, &tri );
 
    tri.v[0] = v[6];  tri.v[1] = v[5];  tri.v[2] = v[4];
-   stage->next->tri(stage->next, &tri);
+   stage->next->tri( stage->next, &tri );
 
    tri.v[0] = v[7];  tri.v[1] = v[5];  tri.v[2] = v[6];
-   stage->next->tri(stage->next, &tri);
+   stage->next->tri( stage->next, &tri );
 }
 
 
@@ -707,7 +708,7 @@ aaline_flush(struct draw_stage *stage, unsigned flags)
    struct pipe_context *pipe = draw->pipe;
 
    stage->line = aaline_first_line;
-   stage->next->flush(stage->next, flags);
+   stage->next->flush( stage->next, flags );
 
    /* restore original frag shader, texture, sampler state */
    draw->suspend_flushing = TRUE;
@@ -735,7 +736,7 @@ aaline_flush(struct draw_stage *stage, unsigned flags)
 static void
 aaline_reset_stipple_counter(struct draw_stage *stage)
 {
-   stage->next->reset_stipple_counter(stage->next);
+   stage->next->reset_stipple_counter( stage->next );
 }
 
 
@@ -760,7 +761,7 @@ aaline_destroy(struct draw_stage *stage)
       pipe_sampler_view_reference(&aaline->sampler_view, NULL);
    }
 
-   draw_free_temp_verts(stage);
+   draw_free_temp_verts( stage );
 
    /* restore the old entry points */
    pipe->create_fs_state = aaline->driver_create_fs_state;
@@ -770,7 +771,7 @@ aaline_destroy(struct draw_stage *stage)
    pipe->bind_sampler_states = aaline->driver_bind_sampler_states;
    pipe->set_sampler_views = aaline->driver_set_sampler_views;
 
-   FREE(stage);
+   FREE( stage );
 }
 
 
@@ -791,7 +792,7 @@ draw_aaline_stage(struct draw_context *draw)
    aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter;
    aaline->stage.destroy = aaline_destroy;
 
-   if (!draw_alloc_temp_verts(&aaline->stage, 8))
+   if (!draw_alloc_temp_verts( &aaline->stage, 8 ))
       goto fail;
 
    return aaline;
@@ -969,7 +970,7 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe)
    /*
     * Create / install AA line drawing / prim stage
     */
-   aaline = draw_aaline_stage(draw);
+   aaline = draw_aaline_stage( draw );
    if (!aaline)
       goto fail;
 
@@ -995,16 +996,16 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe)
 
    pipe->bind_sampler_states = aaline_bind_sampler_states;
    pipe->set_sampler_views = aaline_set_sampler_views;
-
+   
    /* Install once everything is known to be OK:
     */
    draw->pipeline.aaline = &aaline->stage;
 
    return TRUE;
 
-fail:
+ fail:
    if (aaline)
-      aaline->stage.destroy(&aaline->stage);
-
+      aaline->stage.destroy( &aaline->stage );
+   
    return FALSE;
 }
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index 2b96b8ad4..33ef8ec17 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -213,13 +213,13 @@ aa_transform_prolog(struct tgsi_transform_context *ctx)
    tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
                            TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_XY,
                            TGSI_FILE_INPUT, texInput,
-                           TGSI_FILE_INPUT, texInput, false);
+                           TGSI_FILE_INPUT, texInput);
 
    /* ADD t0.x, t0.x, t0.y;  # x^2 + y^2 */
    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD,
                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X,
-                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, false);
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y);
 
 #if NORMALIZE  /* OPTIONAL normalization of length */
    /* RSQ t0.x, t0.x; */
@@ -237,7 +237,7 @@ aa_transform_prolog(struct tgsi_transform_context *ctx)
    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SGT,
                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X,
-                               TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W, false);
+                               TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W);
 
    /* KILL_IF -tmp0.yyyy;   # if -tmp0.y < 0, KILL */
    tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0,
@@ -246,10 +246,10 @@ aa_transform_prolog(struct tgsi_transform_context *ctx)
    /* compute coverage factor = (1-d)/(1-k) */
 
    /* SUB t0.z, tex.w, tex.z;  # m = 1 - k */
-   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD,
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Z,
                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W,
-                               TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z, true);
+                               TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z);
 
    /* RCP t0.z, t0.z;  # t0.z = 1 / m */
    newInst = tgsi_default_full_instruction();
@@ -265,22 +265,22 @@ aa_transform_prolog(struct tgsi_transform_context *ctx)
    ctx->emit_instruction(ctx, &newInst);
 
    /* SUB t0.y, 1, t0.x;  # d = 1 - d */
-   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD,
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W,
-                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X, true);
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X);
 
    /* MUL t0.w, t0.y, t0.z;   # coverage = d * m */
    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL,
                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y,
-                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Z, false);
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Z);
 
    /* SLE t0.y, t0.x, tex.z;  # bool b = distance <= k */
    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SLE,
                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X,
-                               TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z, false);
+                               TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z);
 
    /* CMP t0.w, -t0.y, tex.w, t0.w;
     *  # if -t0.y < 0 then
@@ -318,7 +318,7 @@ aa_transform_epilog(struct tgsi_transform_context *ctx)
                            TGSI_FILE_OUTPUT, aactx->colorOutput,
                            TGSI_WRITEMASK_W,
                            TGSI_FILE_TEMPORARY, aactx->colorTemp,
-                           TGSI_FILE_TEMPORARY, aactx->tmp0, false);
+                           TGSI_FILE_TEMPORARY, aactx->tmp0);
 }
 
 
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_clip.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_clip.c
index c22758bc7..cf2b41738 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -32,6 +32,7 @@
  */
 
 
+#include "util/u_bitcast.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
 
@@ -58,19 +59,26 @@
 struct clip_stage {
    struct draw_stage stage;      /**< base class */
 
-   /* List of the attributes to be flatshaded. */
-   uint num_flat_attribs;
-   uint flat_attribs[PIPE_MAX_SHADER_OUTPUTS];
+   unsigned pos_attr;
+   boolean have_clipdist;
+   int cv_attr;
 
-   /* Mask of attributes in noperspective mode */
-   boolean noperspective_attribs[PIPE_MAX_SHADER_OUTPUTS];
+   /* List of the attributes to be constant interpolated. */
+   uint num_const_attribs;
+   uint8_t const_attribs[PIPE_MAX_SHADER_OUTPUTS];
+   /* List of the attributes to be linear interpolated. */
+   uint num_linear_attribs;
+   uint8_t linear_attribs[PIPE_MAX_SHADER_OUTPUTS];
+   /* List of the attributes to be perspective interpolated. */
+   uint num_perspect_attribs;
+   uint8_t perspect_attribs[PIPE_MAX_SHADER_OUTPUTS];
 
    float (*plane)[4];
 };
 
 
 /** Cast wrapper */
-static inline struct clip_stage *clip_stage( struct draw_stage *stage )
+static inline struct clip_stage *clip_stage(struct draw_stage *stage)
 {
    return (struct clip_stage *)stage;
 }
@@ -83,7 +91,7 @@ draw_viewport_index(struct draw_context *draw,
       unsigned viewport_index_output =
          draw_current_shader_viewport_index_output(draw);
       unsigned viewport_index =
-         *((unsigned*)leading_vertex->data[viewport_index_output]);
+         u_bitcast_f2u(leading_vertex->data[viewport_index_output][0]);
       return draw_clamp_viewport_idx(viewport_index);
    } else {
       return 0;
@@ -96,10 +104,10 @@ draw_viewport_index(struct draw_context *draw,
 
 /* All attributes are float[4], so this is easy:
  */
-static void interp_attr( float dst[4],
-			 float t,
-			 const float in[4],
-			 const float out[4] )
+static void interp_attr(float dst[4],
+                        float t,
+                        const float in[4],
+                        const float out[4])
 {
    dst[0] = LINTERP( t, out[0], in[0] );
    dst[1] = LINTERP( t, out[1], in[1] );
@@ -111,30 +119,28 @@ static void interp_attr( float dst[4],
 /**
  * Copy flat shaded attributes src vertex to dst vertex.
  */
-static void copy_flat( struct draw_stage *stage,
-                       struct vertex_header *dst,
-                       const struct vertex_header *src )
+static void copy_flat(struct draw_stage *stage,
+                      struct vertex_header *dst,
+                      const struct vertex_header *src)
 {
    const struct clip_stage *clipper = clip_stage(stage);
    uint i;
-   for (i = 0; i < clipper->num_flat_attribs; i++) {
-      const uint attr = clipper->flat_attribs[i];
+   for (i = 0; i < clipper->num_const_attribs; i++) {
+      const uint attr = clipper->const_attribs[i];
       COPY_4FV(dst->data[attr], src->data[attr]);
    }
 }
 
 /* Interpolate between two vertices to produce a third.  
  */
-static void interp( const struct clip_stage *clip,
-		    struct vertex_header *dst,
-		    float t,
-		    const struct vertex_header *out, 
-		    const struct vertex_header *in,
-                    unsigned viewport_index )
+static void interp(const struct clip_stage *clip,
+                   struct vertex_header *dst,
+                   float t,
+                   const struct vertex_header *out,
+                   const struct vertex_header *in,
+                   unsigned viewport_index)
 {
-   const unsigned nr_attrs = draw_num_shader_outputs(clip->stage.draw);
-   const unsigned pos_attr = draw_current_shader_position_output(clip->stage.draw);
-   const unsigned clip_attr = draw_current_shader_clipvertex_output(clip->stage.draw);
+   const unsigned pos_attr = clip->pos_attr;
    unsigned j;
    float t_nopersp;
 
@@ -142,20 +148,23 @@ static void interp( const struct clip_stage *clip,
     */
    dst->clipmask = 0;
    dst->edgeflag = 0;        /* will get overwritten later */
-   dst->have_clipdist = in->have_clipdist;
+   dst->pad = 0;
    dst->vertex_id = UNDEFINED_VERTEX_ID;
 
    /* Interpolate the clip-space coords.
     */
-   interp_attr(dst->clip, t, in->clip, out->clip);
+   if (clip->cv_attr >= 0) {
+      interp_attr(dst->data[clip->cv_attr], t,
+                  in->data[clip->cv_attr], out->data[clip->cv_attr]);
+   }
    /* interpolate the clip-space position */
-   interp_attr(dst->pre_clip_pos, t, in->pre_clip_pos, out->pre_clip_pos);
+   interp_attr(dst->clip_pos, t, in->clip_pos, out->clip_pos);
 
    /* Do the projective divide and viewport transformation to get
     * new window coordinates:
     */
    {
-      const float *pos = dst->pre_clip_pos;
+      const float *pos = dst->clip_pos;
       const float *scale =
          clip->stage.draw->viewports[viewport_index].scale;
       const float *trans =
@@ -168,6 +177,13 @@ static void interp( const struct clip_stage *clip,
       dst->data[pos_attr][3] = oow;
    }
    
+
+   /* interp perspective attribs */
+   for (j = 0; j < clip->num_perspect_attribs; j++) {
+      const unsigned attr = clip->perspect_attribs[j];
+      interp_attr(dst->data[attr], t, in->data[attr], out->data[attr]);
+   }
+
    /**
     * Compute the t in screen-space instead of 3d space to use
     * for noperspective interpolation.
@@ -177,43 +193,36 @@ static void interp( const struct clip_stage *clip,
     * pick whatever value (the interpolated point won't be in front
     * anyway), so just use the 3d t.
     */
-   {
+   if (clip->num_linear_attribs) {
       int k;
       t_nopersp = t;
       /* find either in.x != out.x or in.y != out.y */
       for (k = 0; k < 2; k++) {
-         if (in->clip[k] != out->clip[k]) {
+         if (in->clip_pos[k] != out->clip_pos[k]) {
             /* do divide by W, then compute linear interpolation factor */
-            float in_coord = in->clip[k] / in->clip[3];
-            float out_coord = out->clip[k] / out->clip[3];
-            float dst_coord = dst->clip[k] / dst->clip[3];
+            float in_coord = in->clip_pos[k] / in->clip_pos[3];
+            float out_coord = out->clip_pos[k] / out->clip_pos[3];
+            float dst_coord = dst->clip_pos[k] / dst->clip_pos[3];
             t_nopersp = (dst_coord - out_coord) / (in_coord - out_coord);
             break;
          }
       }
-   }
-
-   /* Other attributes
-    */
-   for (j = 0; j < nr_attrs; j++) {
-      if (j != pos_attr && j != clip_attr) {
-         if (clip->noperspective_attribs[j])
-            interp_attr(dst->data[j], t_nopersp, in->data[j], out->data[j]);
-         else
-            interp_attr(dst->data[j], t, in->data[j], out->data[j]);
+      for (j = 0; j < clip->num_linear_attribs; j++) {
+         const unsigned attr = clip->linear_attribs[j];
+         interp_attr(dst->data[attr], t_nopersp, in->data[attr], out->data[attr]);
       }
    }
 }
 
 /**
- * Checks whether the specifed triangle is empty and if it is returns
+ * Checks whether the specified triangle is empty and if it is returns
  * true, otherwise returns false.
- * Triangle is considered null/empty if it's area is qual to zero.
+ * Triangle is considered null/empty if its area is equal to zero.
  */
 static inline boolean
-is_tri_null(struct draw_context *draw, const struct prim_header *header)
+is_tri_null(const struct clip_stage *clip, const struct prim_header *header)
 {
-   const unsigned pos_attr = draw_current_shader_position_output(draw);
+   const unsigned pos_attr = clip->pos_attr;
    float x1 = header->v[1]->data[pos_attr][0] - header->v[0]->data[pos_attr][0];
    float y1 = header->v[1]->data[pos_attr][1] - header->v[0]->data[pos_attr][1];
    float z1 = header->v[1]->data[pos_attr][2] - header->v[0]->data[pos_attr][2];
@@ -233,12 +242,13 @@ is_tri_null(struct draw_context *draw, const struct prim_header *header)
  * Emit a post-clip polygon to the next pipeline stage.  The polygon
  * will be convex and the provoking vertex will always be vertex[0].
  */
-static void emit_poly( struct draw_stage *stage,
-		       struct vertex_header **inlist,
-                       const boolean *edgeflags,
-		       unsigned n,
-		       const struct prim_header *origPrim)
+static void emit_poly(struct draw_stage *stage,
+                      struct vertex_header **inlist,
+                      const boolean *edgeflags,
+                      unsigned n,
+                      const struct prim_header *origPrim)
 {
+   const struct clip_stage *clipper = clip_stage(stage);
    struct prim_header header;
    unsigned i;
    ushort edge_first, edge_middle, edge_last;
@@ -278,7 +288,7 @@ static void emit_poly( struct draw_stage *stage,
          header.v[2] = inlist[0];  /* the provoking vertex */
       }
 
-      tri_null = is_tri_null(stage->draw, &header);
+      tri_null = is_tri_null(clipper, &header);
       /* If we generated a triangle with an area, aka. non-null triangle,
        * or if the previous triangle was also null then skip all subsequent
        * null triangles */
@@ -303,11 +313,18 @@ static void emit_poly( struct draw_stage *stage,
          debug_printf("Clipped tri: (flat-shade-first = %d)\n",
                       stage->draw->rasterizer->flatshade_first);
          for (j = 0; j < 3; j++) {
-            debug_printf("  Vert %d: clip: %f %f %f %f\n", j,
-                         header.v[j]->clip[0],
-                         header.v[j]->clip[1],
-                         header.v[j]->clip[2],
-                         header.v[j]->clip[3]);
+            debug_printf("  Vert %d: clip pos: %f %f %f %f\n", j,
+                         header.v[j]->clip_pos[0],
+                         header.v[j]->clip_pos[1],
+                         header.v[j]->clip_pos[2],
+                         header.v[j]->clip_pos[3]);
+            if (clipper->cv_attr >= 0) {
+               debug_printf("  Vert %d: cv: %f %f %f %f\n", j,
+                            header.v[j]->data[clipper->cv_attr][0],
+                            header.v[j]->data[clipper->cv_attr][1],
+                            header.v[j]->data[clipper->cv_attr][2],
+                            header.v[j]->data[clipper->cv_attr][3]);
+            }
             for (k = 0; k < draw_num_shader_outputs(stage->draw); k++) {
                debug_printf("  Vert %d: Attr %d:  %f %f %f %f\n", j, k,
                             header.v[j]->data[k][0],
@@ -317,7 +334,7 @@ static void emit_poly( struct draw_stage *stage,
             }
          }
       }
-      stage->next->tri( stage->next, &header );
+      stage->next->tri(stage->next, &header);
    }
 }
 
@@ -342,15 +359,28 @@ static inline float getclipdist(const struct clip_stage *clipper,
 {
    const float *plane;
    float dp;
-   if (vert->have_clipdist && plane_idx >= 6) {
+   if (plane_idx < 6) {
+      /* ordinary xyz view volume clipping uses pos output */
+      plane = clipper->plane[plane_idx];
+      dp = dot4(vert->clip_pos, plane);
+   }
+   else if (clipper->have_clipdist) {
       /* pick the correct clipdistance element from the output vectors */
       int _idx = plane_idx - 6;
       int cdi = _idx >= 4;
       int vidx = cdi ? _idx - 4 : _idx;
-      dp = vert->data[draw_current_shader_clipdistance_output(clipper->stage.draw, cdi)][vidx];
+      dp = vert->data[draw_current_shader_ccdistance_output(clipper->stage.draw, cdi)][vidx];
    } else {
+      /*
+       * legacy user clip planes or gl_ClipVertex
+       */
       plane = clipper->plane[plane_idx];
-      dp = dot4(vert->clip, plane);
+      if (clipper->cv_attr >= 0) {
+         dp = dot4(vert->data[clipper->cv_attr], plane);
+      }
+      else {
+         dp = dot4(vert->clip_pos, plane);
+      }
    }
    return dp;
 }
@@ -358,15 +388,16 @@ static inline float getclipdist(const struct clip_stage *clipper,
 /* Clip a triangle against the viewport and user clip planes.
  */
 static void
-do_clip_tri( struct draw_stage *stage, 
-	     struct prim_header *header,
-	     unsigned clipmask )
+do_clip_tri(struct draw_stage *stage,
+            struct prim_header *header,
+            unsigned clipmask)
 {
    struct clip_stage *clipper = clip_stage( stage );
    struct vertex_header *a[MAX_CLIPPED_VERTICES];
    struct vertex_header *b[MAX_CLIPPED_VERTICES];
    struct vertex_header **inlist = a;
    struct vertex_header **outlist = b;
+   struct vertex_header *prov_vertex;
    unsigned tmpnr = 0;
    unsigned n = 3;
    unsigned i;
@@ -380,16 +411,38 @@ do_clip_tri( struct draw_stage *stage,
    inlist[1] = header->v[1];
    inlist[2] = header->v[2];
 
-   viewport_index = draw_viewport_index(clipper->stage.draw, inlist[0]);
+   /*
+    * For d3d10, we need to take this from the leading (first) vertex.
+    * For GL, we could do anything (as long as we advertize
+    * GL_UNDEFINED_VERTEX for the VIEWPORT_INDEX_PROVOKING_VERTEX query),
+    * but it needs to be consistent with what other parts (i.e. driver)
+    * will do, and that seems easier with GL_PROVOKING_VERTEX logic.
+    */
+   if (stage->draw->rasterizer->flatshade_first) {
+      prov_vertex = inlist[0];
+   }
+   else {
+      prov_vertex = inlist[2];
+   }
+   viewport_index = draw_viewport_index(clipper->stage.draw, prov_vertex);
 
    if (DEBUG_CLIP) {
-      const float *v0 = header->v[0]->clip;
-      const float *v1 = header->v[1]->clip;
-      const float *v2 = header->v[2]->clip;
-      debug_printf("Clip triangle:\n");
+      const float *v0 = header->v[0]->clip_pos;
+      const float *v1 = header->v[1]->clip_pos;
+      const float *v2 = header->v[2]->clip_pos;
+      debug_printf("Clip triangle pos:\n");
       debug_printf(" %f, %f, %f, %f\n", v0[0], v0[1], v0[2], v0[3]);
       debug_printf(" %f, %f, %f, %f\n", v1[0], v1[1], v1[2], v1[3]);
       debug_printf(" %f, %f, %f, %f\n", v2[0], v2[1], v2[2], v2[3]);
+      if (clipper->cv_attr >= 0) {
+         const float *v0 = header->v[0]->data[clipper->cv_attr];
+         const float *v1 = header->v[1]->data[clipper->cv_attr];
+         const float *v2 = header->v[2]->data[clipper->cv_attr];
+         debug_printf("Clip triangle cv:\n");
+         debug_printf(" %f, %f, %f, %f\n", v0[0], v0[1], v0[2], v0[3]);
+         debug_printf(" %f, %f, %f, %f\n", v1[0], v1[1], v1[2], v1[3]);
+         debug_printf(" %f, %f, %f, %f\n", v2[0], v2[1], v2[2], v2[3]);
+      }
    }
 
    /*
@@ -425,7 +478,7 @@ do_clip_tri( struct draw_stage *stage,
       inEdges[n] = inEdges[0];
 
       for (i = 1; i <= n; i++) {
-	 struct vertex_header *vert = inlist[i];
+         struct vertex_header *vert = inlist[i];
          boolean *edge = &inEdges[i];
 
          float dp = getclipdist(clipper, vert, plane_idx);
@@ -433,16 +486,16 @@ do_clip_tri( struct draw_stage *stage,
          if (util_is_inf_or_nan(dp))
             return; //discard nan
 
-	 if (dp_prev >= 0.0f) {
+         if (dp_prev >= 0.0f) {
             assert(outcount < MAX_CLIPPED_VERTICES);
             if (outcount >= MAX_CLIPPED_VERTICES)
                return;
             outEdges[outcount] = *edge_prev;
-	    outlist[outcount++] = vert_prev;
-	 }
+            outlist[outcount++] = vert_prev;
+         }
 
-	 if (DIFFERENT_SIGNS(dp, dp_prev)) {
-	    struct vertex_header *new_vert;
+         if (DIFFERENT_SIGNS(dp, dp_prev)) {
+            struct vertex_header *new_vert;
             boolean *new_edge;
 
             assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
@@ -455,19 +508,19 @@ do_clip_tri( struct draw_stage *stage,
                return;
 
             new_edge = &outEdges[outcount];
-	    outlist[outcount++] = new_vert;
-
-	    if (dp < 0.0f) {
-	       /* Going out of bounds.  Avoid division by zero as we
-		* know dp != dp_prev from DIFFERENT_SIGNS, above.
-		*/
-	       float t = dp / (dp - dp_prev);
-	       interp( clipper, new_vert, t, vert, vert_prev, viewport_index );
-	       
-	       /* Whether or not to set edge flag for the new vert depends
+            outlist[outcount++] = new_vert;
+
+            if (dp < 0.0f) {
+               /* Going out of bounds.  Avoid division by zero as we
+                * know dp != dp_prev from DIFFERENT_SIGNS, above.
+                */
+               float t = dp / (dp - dp_prev);
+               interp( clipper, new_vert, t, vert, vert_prev, viewport_index );
+
+               /* Whether or not to set edge flag for the new vert depends
                 * on whether it's a user-defined clipping plane.  We're
                 * copying NVIDIA's behaviour here.
-		*/
+                */
                if (is_user_clip_plane) {
                   /* we want to see an edge along the clip plane */
                   *new_edge = TRUE;
@@ -478,31 +531,31 @@ do_clip_tri( struct draw_stage *stage,
                   *new_edge = *edge_prev;
                   new_vert->edgeflag = FALSE;
                }
-	    }
+            }
             else {
-	       /* Coming back in.
-		*/
-	       float t = dp_prev / (dp_prev - dp);
-	       interp( clipper, new_vert, t, vert_prev, vert, viewport_index );
-
-	       /* Copy starting vert's edgeflag:
-		*/
-	       new_vert->edgeflag = vert_prev->edgeflag;
+               /* Coming back in.
+                */
+               float t = dp_prev / (dp_prev - dp);
+               interp( clipper, new_vert, t, vert_prev, vert, viewport_index );
+
+               /* Copy starting vert's edgeflag:
+                */
+               new_vert->edgeflag = vert_prev->edgeflag;
                *new_edge = *edge_prev;
-	    }
-	 }
+            }
+         }
 
-	 vert_prev = vert;
+         vert_prev = vert;
          edge_prev = edge;
-	 dp_prev = dp;
+         dp_prev = dp;
       }
 
       /* swap in/out lists */
       {
-	 struct vertex_header **tmp = inlist;
-	 inlist = outlist;
-	 outlist = tmp;
-	 n = outcount;
+         struct vertex_header **tmp = inlist;
+         inlist = outlist;
+         outlist = tmp;
+         n = outcount;
       }
       {
          boolean *tmp = inEdges;
@@ -512,10 +565,10 @@ do_clip_tri( struct draw_stage *stage,
 
    }
 
-   /* If flat-shading, copy provoking vertex color to polygon vertex[0]
+   /* If constant interpolated, copy provoking vertex attrib to polygon vertex[0]
     */
    if (n >= 3) {
-      if (clipper->num_flat_attribs) {
+      if (clipper->num_const_attribs) {
          if (stage->draw->rasterizer->flatshade_first) {
             if (inlist[0] != header->v[0]) {
                assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
@@ -535,10 +588,10 @@ do_clip_tri( struct draw_stage *stage,
             }
          }
       }
-      
+
       /* Emit the polygon as triangles to the setup stage:
        */
-      emit_poly( stage, inlist, inEdges, n, header );
+      emit_poly(stage, inlist, inEdges, n, header);
    }
 }
 
@@ -546,17 +599,28 @@ do_clip_tri( struct draw_stage *stage,
 /* Clip a line against the viewport and user clip planes.
  */
 static void
-do_clip_line( struct draw_stage *stage,
-	      struct prim_header *header,
-	      unsigned clipmask )
+do_clip_line(struct draw_stage *stage,
+             struct prim_header *header,
+             unsigned clipmask)
 {
-   const struct clip_stage *clipper = clip_stage( stage );
+   const struct clip_stage *clipper = clip_stage(stage);
    struct vertex_header *v0 = header->v[0];
    struct vertex_header *v1 = header->v[1];
+   struct vertex_header *prov_vertex;
    float t0 = 0.0F;
    float t1 = 0.0F;
    struct prim_header newprim;
-   int viewport_index = draw_viewport_index(clipper->stage.draw, v0);
+   int viewport_index;
+
+   newprim.flags = header->flags;
+
+   if (stage->draw->rasterizer->flatshade_first) {
+      prov_vertex = v0;
+   }
+   else {
+      prov_vertex = v1;
+   }
+   viewport_index = draw_viewport_index(clipper->stage.draw, prov_vertex);
 
    while (clipmask) {
       const unsigned plane_idx = ffs(clipmask)-1;
@@ -567,17 +631,17 @@ do_clip_line( struct draw_stage *stage,
          return; //discard nan
 
       if (dp1 < 0.0F) {
-	 float t = dp1 / (dp1 - dp0);
+         float t = dp1 / (dp1 - dp0);
          t1 = MAX2(t1, t);
       } 
 
       if (dp0 < 0.0F) {
-	 float t = dp0 / (dp0 - dp1);
+         float t = dp0 / (dp0 - dp1);
          t0 = MAX2(t0, t);
       }
 
       if (t0 + t1 >= 1.0F)
-	 return; /* discard */
+         return; /* discard */
 
       clipmask &= ~(1 << plane_idx);  /* turn off this plane's bit */
    }
@@ -615,8 +679,7 @@ do_clip_line( struct draw_stage *stage,
 
 
 static void
-clip_point( struct draw_stage *stage, 
-            struct prim_header *header )
+clip_point(struct draw_stage *stage, struct prim_header *header)
 {
    if (header->v[0]->clipmask == 0)
       stage->next->point( stage->next, header );
@@ -630,8 +693,7 @@ clip_point( struct draw_stage *stage,
  * the guard band and not just outside the vp.)
  */
 static void
-clip_point_guard_xy( struct draw_stage *stage,
-                     struct prim_header *header )
+clip_point_guard_xy(struct draw_stage *stage, struct prim_header *header)
 {
    unsigned clipmask = header->v[0]->clipmask;
    if ((clipmask & 0xffffffff) == 0)
@@ -647,9 +709,9 @@ clip_point_guard_xy( struct draw_stage *stage,
           * automatically). These would usually be captured by depth clip
           * too but this can be disabled.
           */
-         if (header->v[0]->clip[3] <= 0.0f ||
-             util_is_inf_or_nan(header->v[0]->clip[0]) ||
-             util_is_inf_or_nan(header->v[0]->clip[1]))
+         if (header->v[0]->clip_pos[3] <= 0.0f ||
+             util_is_inf_or_nan(header->v[0]->clip_pos[0]) ||
+             util_is_inf_or_nan(header->v[0]->clip_pos[1]))
             return;
       }
       stage->next->point(stage->next, header);
@@ -658,8 +720,7 @@ clip_point_guard_xy( struct draw_stage *stage,
 
 
 static void
-clip_first_point( struct draw_stage *stage,
-                  struct prim_header *header )
+clip_first_point(struct draw_stage *stage, struct prim_header *header)
 {
    stage->point = stage->draw->guard_band_points_xy ? clip_point_guard_xy : clip_point;
    stage->point(stage, header);
@@ -667,8 +728,7 @@ clip_first_point( struct draw_stage *stage,
 
 
 static void
-clip_line( struct draw_stage *stage,
-	   struct prim_header *header )
+clip_line(struct draw_stage *stage, struct prim_header *header)
 {
    unsigned clipmask = (header->v[0]->clipmask | 
                         header->v[1]->clipmask);
@@ -686,8 +746,7 @@ clip_line( struct draw_stage *stage,
 
 
 static void
-clip_tri( struct draw_stage *stage,
-          struct prim_header *header )
+clip_tri(struct draw_stage *stage, struct prim_header *header)
 {
    unsigned clipmask = (header->v[0]->clipmask | 
                         header->v[1]->clipmask | 
@@ -715,12 +774,24 @@ find_interp(const struct draw_fragment_shader *fs, int *indexed_interp,
    if (semantic_name == TGSI_SEMANTIC_COLOR ||
        semantic_name == TGSI_SEMANTIC_BCOLOR) {
       interp = indexed_interp[semantic_index];
+   } else if (semantic_name == TGSI_SEMANTIC_POSITION ||
+              semantic_name == TGSI_SEMANTIC_CLIPVERTEX) {
+      /* these inputs are handled specially always */
+      return -1;
    } else {
       /* Otherwise, search in the FS inputs, with a decent default
        * if we don't find it.
+       * This probably only matters for layer, vpindex, culldist, maybe
+       * front_face.
        */
       uint j;
-      interp = TGSI_INTERPOLATE_PERSPECTIVE;
+      if (semantic_name == TGSI_SEMANTIC_LAYER ||
+          semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) {
+         interp = TGSI_INTERPOLATE_CONSTANT;
+      }
+      else {
+         interp = TGSI_INTERPOLATE_PERSPECTIVE;
+      }
       if (fs) {
          for (j = 0; j < fs->info.num_inputs; j++) {
             if (semantic_name == fs->info.input_semantic_name[j] &&
@@ -738,13 +809,23 @@ find_interp(const struct draw_fragment_shader *fs, int *indexed_interp,
  * primitive that really requires clipping.
  */
 static void 
-clip_init_state( struct draw_stage *stage )
+clip_init_state(struct draw_stage *stage)
 {
-   struct clip_stage *clipper = clip_stage( stage );
+   struct clip_stage *clipper = clip_stage(stage);
    const struct draw_context *draw = stage->draw;
    const struct draw_fragment_shader *fs = draw->fs.fragment_shader;
    const struct tgsi_shader_info *info = draw_get_shader_info(draw);
    uint i, j;
+   int indexed_interp[2];
+
+   clipper->pos_attr = draw_current_shader_position_output(draw);
+   clipper->have_clipdist = draw_current_shader_num_written_clipdistances(draw) > 0;
+   if (draw_current_shader_clipvertex_output(draw) != clipper->pos_attr) {
+      clipper->cv_attr = (int)draw_current_shader_clipvertex_output(draw);
+   }
+   else {
+      clipper->cv_attr = -1;
+   }
 
    /* We need to know for each attribute what kind of interpolation is
     * done on it (flat, smooth or noperspective).  But the information
@@ -765,7 +846,6 @@ clip_init_state( struct draw_stage *stage )
    /* First pick up the interpolation mode for
     * gl_Color/gl_SecondaryColor, with the correct default.
     */
-   int indexed_interp[2];
    indexed_interp[0] = indexed_interp[1] = draw->rasterizer->flatshade ?
       TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
 
@@ -778,29 +858,33 @@ clip_init_state( struct draw_stage *stage )
       }
    }
 
-   /* Then resolve the interpolation mode for every output attribute.
-    *
-    * Given how the rest of the code, the most efficient way is to
-    * have a vector of flat-mode attributes, and a mask for
-    * noperspective attributes.
-    */
+   /* Then resolve the interpolation mode for every output attribute. */
 
-   clipper->num_flat_attribs = 0;
-   memset(clipper->noperspective_attribs, 0, sizeof(clipper->noperspective_attribs));
+   clipper->num_const_attribs = 0;
+   clipper->num_linear_attribs = 0;
+   clipper->num_perspect_attribs = 0;
    for (i = 0; i < info->num_outputs; i++) {
       /* Find the interpolation mode for a specific attribute */
       int interp = find_interp(fs, indexed_interp,
                                info->output_semantic_name[i],
                                info->output_semantic_index[i]);
-      /* If it's flat, add it to the flat vector.  Otherwise update
-       * the noperspective mask.
-       */
-
-      if (interp == TGSI_INTERPOLATE_CONSTANT) {
-         clipper->flat_attribs[clipper->num_flat_attribs] = i;
-         clipper->num_flat_attribs++;
-      } else
-         clipper->noperspective_attribs[i] = interp == TGSI_INTERPOLATE_LINEAR;
+      switch (interp) {
+      case TGSI_INTERPOLATE_CONSTANT:
+         clipper->const_attribs[clipper->num_const_attribs] = i;
+         clipper->num_const_attribs++;
+         break;
+      case TGSI_INTERPOLATE_LINEAR:
+         clipper->linear_attribs[clipper->num_linear_attribs] = i;
+         clipper->num_linear_attribs++;
+         break;
+      case TGSI_INTERPOLATE_PERSPECTIVE:
+         clipper->perspect_attribs[clipper->num_perspect_attribs] = i;
+         clipper->num_perspect_attribs++;
+         break;
+      default:
+         assert(interp == -1);
+         break;
+      }
    }
    /* Search the extra vertex attributes */
    for (j = 0; j < draw->extra_shader_outputs.num; j++) {
@@ -808,39 +892,47 @@ clip_init_state( struct draw_stage *stage )
       int interp = find_interp(fs, indexed_interp,
                                draw->extra_shader_outputs.semantic_name[j],
                                draw->extra_shader_outputs.semantic_index[j]);
-      /* If it's flat, add it to the flat vector.  Otherwise update
-       * the noperspective mask.
-       */
-      if (interp == TGSI_INTERPOLATE_CONSTANT) {
-         clipper->flat_attribs[clipper->num_flat_attribs] = i + j;
-         clipper->num_flat_attribs++;
-      } else
-         clipper->noperspective_attribs[i + j] = interp == TGSI_INTERPOLATE_LINEAR;
+      switch (interp) {
+      case TGSI_INTERPOLATE_CONSTANT:
+         clipper->const_attribs[clipper->num_const_attribs] = i + j;
+         clipper->num_const_attribs++;
+         break;
+      case TGSI_INTERPOLATE_LINEAR:
+         clipper->linear_attribs[clipper->num_linear_attribs] = i + j;
+         clipper->num_linear_attribs++;
+         break;
+      case TGSI_INTERPOLATE_PERSPECTIVE:
+         clipper->perspect_attribs[clipper->num_perspect_attribs] = i + j;
+         clipper->num_perspect_attribs++;
+         break;
+      default:
+         assert(interp == -1);
+         break;
+      }
    }
-   
+
    stage->tri = clip_tri;
    stage->line = clip_line;
 }
 
 
 
-static void clip_first_tri( struct draw_stage *stage,
-			    struct prim_header *header )
+static void clip_first_tri(struct draw_stage *stage,
+                           struct prim_header *header)
 {
    clip_init_state( stage );
    stage->tri( stage, header );
 }
 
-static void clip_first_line( struct draw_stage *stage,
-			     struct prim_header *header )
+static void clip_first_line(struct draw_stage *stage,
+                            struct prim_header *header)
 {
    clip_init_state( stage );
    stage->line( stage, header );
 }
 
 
-static void clip_flush( struct draw_stage *stage, 
-			     unsigned flags )
+static void clip_flush(struct draw_stage *stage, unsigned flags)
 {
    stage->tri = clip_first_tri;
    stage->line = clip_first_line;
@@ -848,13 +940,13 @@ static void clip_flush( struct draw_stage *stage,
 }
 
 
-static void clip_reset_stipple_counter( struct draw_stage *stage )
+static void clip_reset_stipple_counter(struct draw_stage *stage)
 {
    stage->next->reset_stipple_counter( stage->next );
 }
 
 
-static void clip_destroy( struct draw_stage *stage )
+static void clip_destroy(struct draw_stage *stage)
 {
    draw_free_temp_verts( stage );
    FREE( stage );
@@ -865,10 +957,10 @@ static void clip_destroy( struct draw_stage *stage )
  * Allocate a new clipper stage.
  * \return pointer to new stage object
  */
-struct draw_stage *draw_clip_stage( struct draw_context *draw )
+struct draw_stage *draw_clip_stage(struct draw_context *draw)
 {
    struct clip_stage *clipper = CALLOC_STRUCT(clip_stage);
-   if (clipper == NULL)
+   if (!clipper)
       goto fail;
 
    clipper->stage.draw = draw;
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
index 0ea740861..cd285e6f9 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
@@ -309,7 +309,7 @@ static void flatshade_destroy( struct draw_stage *stage )
 struct draw_stage *draw_flatshade_stage( struct draw_context *draw )
 {
    struct flat_stage *flatshade = CALLOC_STRUCT(flat_stage);
-   if (flatshade == NULL)
+   if (!flatshade)
       goto fail;
 
    flatshade->stage.draw = draw;
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_stipple.c
index 381aa4153..0d39ee4ec 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_stipple.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_stipple.c
@@ -108,11 +108,11 @@ emit_segment(struct draw_stage *stage, struct prim_header *header,
 }
 
 
-static inline unsigned
+static inline bool
 stipple_test(int counter, ushort pattern, int factor)
 {
    int b = (counter / factor) & 0xf;
-   return (1 << b) & pattern;
+   return !!((1 << b) & pattern);
 }
 
 
@@ -126,7 +126,7 @@ stipple_line(struct draw_stage *stage, struct prim_header *header)
    const float *pos0 = v0->data[pos];
    const float *pos1 = v1->data[pos];
    float start = 0;
-   int state = 0;
+   bool state = 0;
 
    float x0 = pos0[0];
    float x1 = pos1[0];
@@ -143,29 +143,29 @@ stipple_line(struct draw_stage *stage, struct prim_header *header)
       stipple->counter = 0;
 
 
-   /* XXX ToDo: intead of iterating pixel-by-pixel, use a look-up table.
+   /* XXX ToDo: instead of iterating pixel-by-pixel, use a look-up table.
     */
    for (i = 0; i < length; i++) {
-      int result = stipple_test( (int) stipple->counter+i,
-                                 (ushort) stipple->pattern, stipple->factor );
+      bool result = stipple_test((int)stipple->counter + i,
+                                 (ushort)stipple->pattern, stipple->factor);
       if (result != state) {
          /* changing from "off" to "on" or vice versa */
-	 if (state) {
-	    if (start != i) {
+         if (state) {
+            if (start != i) {
                /* finishing an "on" segment */
-	       emit_segment( stage, header, start / length, i / length );
+               emit_segment(stage, header, start / length, i / length);
             }
-	 }
-	 else {
+         }
+         else {
             /* starting an "on" segment */
-	    start = (float) i;
-	 }
-	 state = result;	   
+            start = (float)i;
+         }
+         state = result;
       }
    }
 
    if (state && start < length)
-      emit_segment( stage, header, start / length, 1.0 );
+      emit_segment(stage, header, start / length, 1.0);
 
    stipple->counter += length;
 }
@@ -235,7 +235,7 @@ stipple_destroy( struct draw_stage *stage )
 struct draw_stage *draw_stipple_stage( struct draw_context *draw )
 {
    struct stipple_stage *stipple = CALLOC_STRUCT(stipple_stage);
-   if (stipple == NULL)
+   if (!stipple)
       goto fail;
 
    stipple->stage.draw = draw;
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_twoside.c
index 7f958d9b9..52d87c6b2 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_twoside.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_twoside.c
@@ -165,7 +165,7 @@ static void twoside_destroy( struct draw_stage *stage )
 struct draw_stage *draw_twoside_stage( struct draw_context *draw )
 {
    struct twoside_stage *twoside = CALLOC_STRUCT(twoside_stage);
-   if (twoside == NULL)
+   if (!twoside)
       goto fail;
 
    twoside->stage.draw = draw;
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_validate.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_validate.c
index 846cd4db3..01d07593d 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_validate.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_validate.c
@@ -156,10 +156,9 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
     */
    stage->next = next;
 
-   /* drawing wide, non-AA lines? */
-   wide_lines = rast->line_width != 1.0f &&
-                roundf(rast->line_width) > draw->pipeline.wide_line_threshold &&
-                !rast->line_smooth;
+   /* drawing wide lines? */
+   wide_lines = (roundf(rast->line_width) > draw->pipeline.wide_line_threshold
+                 && !rast->line_smooth);
 
    /* drawing large/sprite points (but not AA points)? */
    if (rast->sprite_coord_enable && draw->pipeline.point_sprite)
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index 8592f51db..6df7149b5 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -28,7 +28,7 @@
 /**
  * \file
  * Vertex buffer drawing stage.
- *
+ * 
  * \author Jose Fonseca <jfonseca@vmware.com>
  * \author Keith Whitwell <keithw@vmware.com>
  */
@@ -37,6 +37,7 @@
 #include "util/u_debug.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+
 #include "draw_vbuf.h"
 #include "draw_private.h"
 #include "draw_vertex.h"
@@ -52,14 +53,14 @@ struct vbuf_stage {
    struct draw_stage stage; /**< This must be first (base class) */
 
    struct vbuf_render *render;
-
+   
    const struct vertex_info *vinfo;
-
+   
    /** Vertex size in bytes */
    unsigned vertex_size;
 
    struct translate *translate;
-
+   
    /* FIXME: we have no guarantee that 'unsigned' is 32bit */
 
    /** Vertices in hardware format */
@@ -67,7 +68,7 @@ struct vbuf_stage {
    unsigned *vertex_ptr;
    unsigned max_vertices;
    unsigned nr_vertices;
-
+   
    /** Indices */
    ushort *indices;
    unsigned max_indices;
@@ -86,28 +87,39 @@ struct vbuf_stage {
  * Basically a cast wrapper.
  */
 static inline struct vbuf_stage *
-vbuf_stage(struct draw_stage *stage)
+vbuf_stage( struct draw_stage *stage )
 {
    assert(stage);
    return (struct vbuf_stage *)stage;
 }
 
 
-static void vbuf_flush_vertices(struct vbuf_stage *vbuf);
-static void vbuf_alloc_vertices(struct vbuf_stage *vbuf);
+static void vbuf_flush_vertices( struct vbuf_stage *vbuf );
+static void vbuf_alloc_vertices( struct vbuf_stage *vbuf );
+
+
+static inline boolean 
+overflow( void *map, void *ptr, unsigned bytes, unsigned bufsz )
+{
+   unsigned long used = (unsigned long) ((char *)ptr - (char *)map);
+   return (used + bytes) > bufsz;
+}
 
 
-static inline void
-check_space(struct vbuf_stage *vbuf, unsigned nr)
+static inline void 
+check_space( struct vbuf_stage *vbuf, unsigned nr )
 {
    if (vbuf->nr_vertices + nr > vbuf->max_vertices ||
-       vbuf->nr_indices + nr > vbuf->max_indices) {
-      vbuf_flush_vertices(vbuf);
-      vbuf_alloc_vertices(vbuf);
+       vbuf->nr_indices + nr > vbuf->max_indices)
+   {
+      vbuf_flush_vertices( vbuf );
+      vbuf_alloc_vertices( vbuf );
    }
 }
 
 
+
+
 /**
  * Extract the needed fields from post-transformed vertex and emit
  * a hardware(driver) vertex.
@@ -115,21 +127,22 @@ check_space(struct vbuf_stage *vbuf, unsigned nr)
  * have a couple of slots at the beginning (1-dword header, 4-dword
  * clip pos) that we ignore here.  We only use the vertex->data[] fields.
  */
-static inline ushort
-emit_vertex(struct vbuf_stage *vbuf, struct vertex_header *vertex)
+static inline ushort 
+emit_vertex( struct vbuf_stage *vbuf,
+             struct vertex_header *vertex )
 {
    if (vertex->vertex_id == UNDEFINED_VERTEX_ID && vbuf->vertex_ptr) {
       /* Hmm - vertices are emitted one at a time - better make sure
        * set_buffer is efficient.  Consider a special one-shot mode for
        * translate.
        */
-      /* Note: we really do want data[0] here, not data[pos]:
+      /* Note: we really do want data[0] here, not data[pos]: 
        */
       vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0, ~0);
       vbuf->translate->run(vbuf->translate, 0, 1, 0, 0, vbuf->vertex_ptr);
 
       if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr);
-
+      
       vbuf->vertex_ptr += vbuf->vertex_size/4;
       vertex->vertex_id = vbuf->nr_vertices++;
    }
@@ -138,52 +151,57 @@ emit_vertex(struct vbuf_stage *vbuf, struct vertex_header *vertex)
 }
 
 
-static void
-vbuf_tri(struct draw_stage *stage, struct prim_header *prim)
+static void 
+vbuf_tri( struct draw_stage *stage,
+          struct prim_header *prim )
 {
-   struct vbuf_stage *vbuf = vbuf_stage(stage);
+   struct vbuf_stage *vbuf = vbuf_stage( stage );
    unsigned i;
 
-   check_space(vbuf, 3);
+   check_space( vbuf, 3 );
 
    for (i = 0; i < 3; i++) {
-      vbuf->indices[vbuf->nr_indices++] = emit_vertex(vbuf, prim->v[i]);
+      vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] );
    }
 }
 
 
-static void
-vbuf_line(struct draw_stage *stage, struct prim_header *prim)
+static void 
+vbuf_line( struct draw_stage *stage, 
+           struct prim_header *prim )
 {
-   struct vbuf_stage *vbuf = vbuf_stage(stage);
+   struct vbuf_stage *vbuf = vbuf_stage( stage );
    unsigned i;
 
-   check_space(vbuf, 2);
+   check_space( vbuf, 2 );
 
    for (i = 0; i < 2; i++) {
-      vbuf->indices[vbuf->nr_indices++] = emit_vertex(vbuf, prim->v[i]);
-   }
+      vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] );
+   }   
 }
 
 
-static void
-vbuf_point(struct draw_stage *stage, struct prim_header *prim)
+static void 
+vbuf_point( struct draw_stage *stage, 
+            struct prim_header *prim )
 {
-   struct vbuf_stage *vbuf = vbuf_stage(stage);
+   struct vbuf_stage *vbuf = vbuf_stage( stage );
 
-   check_space(vbuf, 1);
+   check_space( vbuf, 1 );
 
-   vbuf->indices[vbuf->nr_indices++] = emit_vertex(vbuf, prim->v[0]);
+   vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[0] );
 }
 
 
+
+
 /**
  * Set the prim type for subsequent vertices.
  * This may result in a new vertex size.  The existing vbuffer (if any)
  * will be flushed if needed and a new one allocated.
  */
 static void
-vbuf_start_prim(struct vbuf_stage *vbuf, uint prim)
+vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
 {
    struct translate_key hw_key;
    unsigned dst_offset;
@@ -193,7 +211,7 @@ vbuf_start_prim(struct vbuf_stage *vbuf, uint prim)
    vbuf->render->set_primitive(vbuf->render, prim);
 
    /* Must do this after set_primitive() above:
-    *
+    * 
     * XXX: need some state managment to track when this needs to be
     * recalculated.  The driver should tell us whether there was a
     * state change.
@@ -210,7 +228,7 @@ vbuf_start_prim(struct vbuf_stage *vbuf, uint prim)
       unsigned emit_sz = 0;
       unsigned src_buffer = 0;
       enum pipe_format output_format;
-      unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float));
+      unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) );
 
       output_format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
       emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit);
@@ -245,7 +263,8 @@ vbuf_start_prim(struct vbuf_stage *vbuf, uint prim)
    /* Don't bother with caching at this stage:
     */
    if (!vbuf->translate ||
-       translate_key_compare(&vbuf->translate->key, &hw_key) != 0) {
+       translate_key_compare(&vbuf->translate->key, &hw_key) != 0) 
+   {
       translate_key_sanitize(&hw_key);
       vbuf->translate = translate_cache_find(vbuf->cache, &hw_key);
 
@@ -262,39 +281,42 @@ vbuf_start_prim(struct vbuf_stage *vbuf, uint prim)
 }
 
 
-static void
-vbuf_first_tri(struct draw_stage *stage, struct prim_header *prim)
+static void 
+vbuf_first_tri( struct draw_stage *stage,
+                struct prim_header *prim )
 {
-   struct vbuf_stage *vbuf = vbuf_stage(stage);
+   struct vbuf_stage *vbuf = vbuf_stage( stage );
 
-   vbuf_flush_vertices(vbuf);
+   vbuf_flush_vertices( vbuf );
    vbuf_start_prim(vbuf, PIPE_PRIM_TRIANGLES);
    stage->tri = vbuf_tri;
-   stage->tri(stage, prim);
+   stage->tri( stage, prim );
 }
 
 
-static void
-vbuf_first_line(struct draw_stage *stage, struct prim_header *prim)
+static void 
+vbuf_first_line( struct draw_stage *stage,
+                 struct prim_header *prim )
 {
-   struct vbuf_stage *vbuf = vbuf_stage(stage);
+   struct vbuf_stage *vbuf = vbuf_stage( stage );
 
-   vbuf_flush_vertices(vbuf);
+   vbuf_flush_vertices( vbuf );
    vbuf_start_prim(vbuf, PIPE_PRIM_LINES);
    stage->line = vbuf_line;
-   stage->line(stage, prim);
+   stage->line( stage, prim );
 }
 
 
-static void
-vbuf_first_point(struct draw_stage *stage, struct prim_header *prim)
+static void 
+vbuf_first_point( struct draw_stage *stage,
+                  struct prim_header *prim )
 {
-   struct vbuf_stage *vbuf = vbuf_stage(stage);
+   struct vbuf_stage *vbuf = vbuf_stage( stage );
 
    vbuf_flush_vertices(vbuf);
    vbuf_start_prim(vbuf, PIPE_PRIM_POINTS);
    stage->point = vbuf_point;
-   stage->point(stage, prim);
+   stage->point( stage, prim );
 }
 
 
@@ -302,26 +324,28 @@ vbuf_first_point(struct draw_stage *stage, struct prim_header *prim)
 /**
  * Flush existing vertex buffer and allocate a new one.
  */
-static void
-vbuf_flush_vertices(struct vbuf_stage *vbuf)
+static void 
+vbuf_flush_vertices( struct vbuf_stage *vbuf )
 {
-   if (vbuf->vertices) {
-      vbuf->render->unmap_vertices(vbuf->render, 0, vbuf->nr_vertices - 1);
+   if(vbuf->vertices) {
 
-      if (vbuf->nr_indices) {
-         vbuf->render->draw_elements(vbuf->render,
-                                     vbuf->indices,
-                                     vbuf->nr_indices);
+      vbuf->render->unmap_vertices( vbuf->render, 0, vbuf->nr_vertices - 1 );
 
+      if (vbuf->nr_indices) 
+      {
+         vbuf->render->draw_elements(vbuf->render, 
+                                     vbuf->indices, 
+                                     vbuf->nr_indices );
+   
          vbuf->nr_indices = 0;
       }
-
+     
       /* Reset temporary vertices ids */
-      if (vbuf->nr_vertices)
-         draw_reset_vertex_ids(vbuf->stage.draw);
-
+      if(vbuf->nr_vertices)
+	 draw_reset_vertex_ids( vbuf->stage.draw );
+      
       /* Free the vertex buffer */
-      vbuf->render->release_vertices(vbuf->render);
+      vbuf->render->release_vertices( vbuf->render );
 
       vbuf->max_vertices = vbuf->nr_vertices = 0;
       vbuf->vertex_ptr = vbuf->vertices = NULL;
@@ -337,21 +361,20 @@ vbuf_flush_vertices(struct vbuf_stage *vbuf)
    vbuf->stage.line = vbuf_first_line;
    vbuf->stage.tri = vbuf_first_tri;
 }
+   
 
-
-static void
-vbuf_alloc_vertices(struct vbuf_stage *vbuf)
+static void 
+vbuf_alloc_vertices( struct vbuf_stage *vbuf )
 {
    if (vbuf->vertex_ptr) {
       assert(!vbuf->nr_indices);
       assert(!vbuf->vertices);
    }
-
+   
    /* Allocate a new vertex buffer */
-   vbuf->max_vertices =
-      vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size;
+   vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size;
 
-   if (vbuf->max_vertices >= UNDEFINED_VERTEX_ID)
+   if(vbuf->max_vertices >= UNDEFINED_VERTEX_ID)
       vbuf->max_vertices = UNDEFINED_VERTEX_ID - 1;
 
    /* Must always succeed -- driver gives us a
@@ -363,23 +386,24 @@ vbuf_alloc_vertices(struct vbuf_stage *vbuf)
                                    (ushort) vbuf->vertex_size,
                                    (ushort) vbuf->max_vertices);
 
-   vbuf->vertices = (uint *) vbuf->render->map_vertices(vbuf->render);
-
+   vbuf->vertices = (uint *) vbuf->render->map_vertices( vbuf->render );
+   
    vbuf->vertex_ptr = vbuf->vertices;
 }
 
 
-static void
-vbuf_flush(struct draw_stage *stage, unsigned flags)
+
+static void 
+vbuf_flush( struct draw_stage *stage, unsigned flags )
 {
-   struct vbuf_stage *vbuf = vbuf_stage(stage);
+   struct vbuf_stage *vbuf = vbuf_stage( stage );
 
-   vbuf_flush_vertices(vbuf);
+   vbuf_flush_vertices( vbuf );
 }
 
 
-static void
-vbuf_reset_stipple_counter(struct draw_stage *stage)
+static void 
+vbuf_reset_stipple_counter( struct draw_stage *stage )
 {
    /* XXX: Need to do something here for hardware with linestipple.
     */
@@ -387,29 +411,28 @@ vbuf_reset_stipple_counter(struct draw_stage *stage)
 }
 
 
-static void
-vbuf_destroy(struct draw_stage *stage)
+static void vbuf_destroy( struct draw_stage *stage )
 {
-   struct vbuf_stage *vbuf = vbuf_stage(stage);
-
-   if (vbuf->indices)
-      align_free(vbuf->indices);
+   struct vbuf_stage *vbuf = vbuf_stage( stage );
 
+   if(vbuf->indices)
+      align_free( vbuf->indices );
+   
    if (vbuf->render)
-      vbuf->render->destroy(vbuf->render);
+      vbuf->render->destroy( vbuf->render );
 
    if (vbuf->cache)
       translate_cache_destroy(vbuf->cache);
 
-   FREE(stage);
+   FREE( stage );
 }
 
 
 /**
  * Create a new primitive vbuf/render stage.
  */
-struct draw_stage *
-draw_vbuf_stage(struct draw_context *draw, struct vbuf_render *render)
+struct draw_stage *draw_vbuf_stage( struct draw_context *draw,
+                                    struct vbuf_render *render )
 {
    struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage);
    if (!vbuf)
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
index cdcc43a13..ae4a00eb6 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
@@ -39,15 +39,26 @@
 
 struct wideline_stage {
    struct draw_stage stage;
+
+   float half_line_width;
 };
 
 
+
+static inline struct wideline_stage *wideline_stage( struct draw_stage *stage )
+{
+   return (struct wideline_stage *)stage;
+}
+
+
+
 /**
  * Draw a wide line by drawing a quad (two triangles).
  */
 static void wideline_line( struct draw_stage *stage,
                            struct prim_header *header )
 {
+   /*const struct wideline_stage *wide = wideline_stage(stage);*/
    const unsigned pos = draw_current_shader_position_output(stage->draw);
    const float half_width = 0.5f * stage->draw->rasterizer->line_width;
 
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
index 348b0e93b..adb6120d8 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -315,7 +315,7 @@ static void widepoint_destroy( struct draw_stage *stage )
 struct draw_stage *draw_wide_point_stage( struct draw_context *draw )
 {
    struct widepoint_stage *wide = CALLOC_STRUCT(widepoint_stage);
-   if (wide == NULL)
+   if (!wide)
       goto fail;
 
    wide->stage.draw = draw;
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_private.h b/lib/mesa/src/gallium/auxiliary/draw/draw_private.h
index 030bb2cec..a6aa61052 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_private.h
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_private.h
@@ -489,10 +489,11 @@ void draw_update_viewport_flags(struct draw_context *draw);
 
 /** 
  * Return index i from the index buffer.
- * If the index buffer would overflow we return index 0.
+ * If the index buffer would overflow we return the
+ * maximum possible index.
  */
 #define DRAW_GET_IDX(_elts, _i)                   \
-   (((_i) >= draw->pt.user.eltMax) ? 0 : (_elts)[_i])
+   (((_i) >= draw->pt.user.eltMax) ? DRAW_MAX_FETCH_IDX : (_elts)[_i])
 
 /**
  * Return index of the given viewport clamping it
@@ -514,7 +515,7 @@ draw_overflow_uadd(unsigned a, unsigned b,
                    unsigned overflow_value)
 {
    unsigned res = a + b;
-   if (res < a) {
+   if (res < a || res < b) {
       res = overflow_value;
    }
    return res;
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pt.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pt.c
index 3236e523a..5a49acb64 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pt.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pt.c
@@ -109,7 +109,7 @@ draw_pt_arrays(struct draw_context *draw,
 
    frontend = draw->pt.frontend;
 
-   if (frontend ) {
+   if (frontend) {
       if (draw->pt.prim != prim || draw->pt.opt != opt) {
          /* In certain conditions switching primitives requires us to flush
           * and validate the different stages. One example is when smooth
@@ -524,7 +524,7 @@ draw_vbo(struct draw_context *draw,
 #endif
    {
       if (index_limit == 0) {
-      /* one of the buffers is too small to do any valid drawing */
+         /* one of the buffers is too small to do any valid drawing */
          debug_warning("draw: VBO too small to draw anything\n");
          util_fpstate_set(fpstate);
          return;
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index 0277cbfc8..4033eddca 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -353,9 +353,7 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle,
    const struct draw_prim_info *prim_info = in_prim_info;
    boolean free_prim_info = FALSE;
    unsigned opt = fpme->opt;
-   boolean clipped = 0;
-   unsigned start_or_maxelt, vid_base;
-   const unsigned *elts;
+   unsigned clipped = 0;
 
    llvm_vert_info.count = fetch_info->count;
    llvm_vert_info.vertex_size = fpme->vertex_size;
@@ -375,27 +373,29 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle,
       draw->statistics.vs_invocations += fetch_info->count;
    }
 
-   if (fetch_info->linear) {
-      start_or_maxelt = fetch_info->start;
-      vid_base = draw->start_index;
-      elts = NULL;
-   }
-   else {
-      start_or_maxelt = draw->pt.user.eltMax;
-      vid_base = draw->pt.user.eltBias;
-      elts = fetch_info->elts;
-   }
-   clipped = fpme->current_variant->jit_func(&fpme->llvm->jit_context,
-                                             llvm_vert_info.verts,
-                                             draw->pt.user.vbuffer,
-                                             fetch_info->count,
-                                             start_or_maxelt,
-                                             fpme->vertex_size,
-                                             draw->pt.vertex_buffer,
-                                             draw->instance_id,
-                                             vid_base,
-                                             draw->start_instance,
-                                             elts);
+   if (fetch_info->linear)
+      clipped = fpme->current_variant->jit_func( &fpme->llvm->jit_context,
+                                       llvm_vert_info.verts,
+                                       draw->pt.user.vbuffer,
+                                       fetch_info->start,
+                                       fetch_info->count,
+                                       fpme->vertex_size,
+                                       draw->pt.vertex_buffer,
+                                       draw->instance_id,
+                                       draw->start_index,
+                                       draw->start_instance);
+   else
+      clipped = fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context,
+                                            llvm_vert_info.verts,
+                                            draw->pt.user.vbuffer,
+                                            fetch_info->elts,
+                                            draw->pt.user.eltMax,
+                                            fetch_info->count,
+                                            fpme->vertex_size,
+                                            draw->pt.vertex_buffer,
+                                            draw->instance_id,
+                                            draw->pt.user.eltBias,
+                                            draw->start_instance);
 
    /* Finished with fetch and vs:
     */
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit.c b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit.c
index a68d5bf97..8d448f92a 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit.c
@@ -33,7 +33,7 @@
 #define SEGMENT_SIZE 1024
 #define MAP_SIZE     256
 
-/* The largest possible index within an index buffer */
+/* The largest possible index withing an index buffer */
 #define MAX_ELT_IDX 0xffffffff
 
 struct vsplit_frontend {
@@ -85,7 +85,7 @@ vsplit_flush_cache(struct vsplit_frontend *vsplit, unsigned flags)
  * Add a fetch element and add it to the draw elements.
  */
 static inline void
-vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch)
+vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch, unsigned ofbias)
 {
    unsigned hash;
 
@@ -93,7 +93,7 @@ vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch)
 
    /* If the value isn't in the cache or it's an overflow due to the
     * element bias */
-   if (vsplit->cache.fetches[hash] != fetch) {
+   if (vsplit->cache.fetches[hash] != fetch || ofbias) {
       /* update cache */
       vsplit->cache.fetches[hash] = fetch;
       vsplit->cache.draws[hash] = vsplit->cache.num_fetch_elts;
@@ -108,22 +108,67 @@ vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch)
 
 /**
  * Returns the base index to the elements array.
- * The value is checked for integer overflow (not sure it can happen?).
+ * The value is checked for overflows (both integer overflows
+ * and the elements array overflow).
  */
 static inline unsigned
-vsplit_get_base_idx(unsigned start, unsigned fetch)
+vsplit_get_base_idx(struct vsplit_frontend *vsplit,
+                    unsigned start, unsigned fetch, unsigned *ofbit)
 {
-   return draw_overflow_uadd(start, fetch, MAX_ELT_IDX);
+   struct draw_context *draw = vsplit->draw;
+   unsigned elt_idx = draw_overflow_uadd(start, fetch, MAX_ELT_IDX);
+   if (ofbit)
+      *ofbit = 0;
+
+   /* Overflown indices need to wrap to the first element
+    * in the index buffer */
+   if (elt_idx >= draw->pt.user.eltMax) {
+      if (ofbit)
+         *ofbit = 1;
+      elt_idx = 0;
+   }
+
+   return elt_idx;
 }
 
-/*
- * The final element index is just element index plus element bias.
+/**
+ * Returns the element index adjust for the element bias.
+ * The final element index is created from the actual element
+ * index, plus the element bias, clamped to maximum elememt
+ * index if that addition overflows.
  */
+static inline unsigned
+vsplit_get_bias_idx(struct vsplit_frontend *vsplit,
+                    int idx, int bias, unsigned *ofbias)
+{
+   int res = idx + bias;
+
+   if (ofbias)
+      *ofbias = 0;
+
+   if (idx > 0 && bias > 0) {
+      if (res < idx || res < bias) {
+         res = DRAW_MAX_FETCH_IDX;
+         if (ofbias)
+            *ofbias = 1;
+      }
+   } else if (idx < 0 && bias < 0) {
+      if (res > idx || res > bias) {
+         res = DRAW_MAX_FETCH_IDX;
+         if (ofbias)
+            *ofbias = 1;
+      }
+   }
+
+   return res;
+}
+
 #define VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias)    \
    unsigned elt_idx;                                       \
-   elt_idx = vsplit_get_base_idx(start, fetch);            \
-   elt_idx = (unsigned)((int)(DRAW_GET_IDX(elts, elt_idx)) + (int)elt_bias);
-
+   unsigned ofbit;                                         \
+   unsigned ofbias;                                        \
+   elt_idx = vsplit_get_base_idx(vsplit, start, fetch, &ofbit);          \
+   elt_idx = vsplit_get_bias_idx(vsplit, ofbit ? 0 : DRAW_GET_IDX(elts, elt_idx), elt_bias, &ofbias)
 
 static inline void
 vsplit_add_cache_ubyte(struct vsplit_frontend *vsplit, const ubyte *elts,
@@ -131,13 +176,7 @@ vsplit_add_cache_ubyte(struct vsplit_frontend *vsplit, const ubyte *elts,
 {
    struct draw_context *draw = vsplit->draw;
    VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias);
-   /* unlike the uint case this can only happen with elt_bias */
-   if (elt_bias && elt_idx == DRAW_MAX_FETCH_IDX && !vsplit->cache.has_max_fetch) {
-      unsigned hash = fetch % MAP_SIZE;
-      vsplit->cache.fetches[hash] = 0;
-      vsplit->cache.has_max_fetch = TRUE;
-   }
-   vsplit_add_cache(vsplit, elt_idx);
+   vsplit_add_cache(vsplit, elt_idx, ofbias);
 }
 
 static inline void
@@ -146,13 +185,7 @@ vsplit_add_cache_ushort(struct vsplit_frontend *vsplit, const ushort *elts,
 {
    struct draw_context *draw = vsplit->draw;
    VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias);
-   /* unlike the uint case this can only happen with elt_bias */
-   if (elt_bias && elt_idx == DRAW_MAX_FETCH_IDX && !vsplit->cache.has_max_fetch) {
-      unsigned hash = fetch % MAP_SIZE;
-      vsplit->cache.fetches[hash] = 0;
-      vsplit->cache.has_max_fetch = TRUE;
-   }
-   vsplit_add_cache(vsplit, elt_idx);
+   vsplit_add_cache(vsplit, elt_idx, ofbias);
 }
 
 
@@ -165,15 +198,17 @@ vsplit_add_cache_uint(struct vsplit_frontend *vsplit, const uint *elts,
                       unsigned start, unsigned fetch, int elt_bias)
 {
    struct draw_context *draw = vsplit->draw;
+   unsigned raw_elem_idx = start + fetch + elt_bias;
    VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias);
-   /* Take care for DRAW_MAX_FETCH_IDX (since cache is initialized to -1). */
-   if (elt_idx == DRAW_MAX_FETCH_IDX && !vsplit->cache.has_max_fetch) {
+
+   /* special care for DRAW_MAX_FETCH_IDX */
+   if (raw_elem_idx == DRAW_MAX_FETCH_IDX && !vsplit->cache.has_max_fetch) {
       unsigned hash = fetch % MAP_SIZE;
-      /* force update - any value will do except DRAW_MAX_FETCH_IDX */
-      vsplit->cache.fetches[hash] = 0;
+      vsplit->cache.fetches[hash] = raw_elem_idx - 1; /* force update */
       vsplit->cache.has_max_fetch = TRUE;
    }
-   vsplit_add_cache(vsplit, elt_idx);
+
+   vsplit_add_cache(vsplit, elt_idx, ofbias);
 }
 
 
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
index be353c418..6da79b949 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
@@ -49,8 +49,9 @@ CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit,
 
    /* If the index buffer overflows we'll need to run
     * through the normal paths */
-   if (end >= draw->pt.user.eltMax ||
-       end < istart)
+   if (start >= draw->pt.user.eltMax ||
+       end > draw->pt.user.eltMax ||
+       end < istart || end < icount)
       return FALSE;
 
    /* use the ib directly */
@@ -156,7 +157,7 @@ CONCAT(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit,
       if (close)
          ADD_CACHE(vsplit, ib, 0, iclose, 0);
    }
-   else {
+   else if (ibias > 0) {
       if (spoken)
          ADD_CACHE(vsplit, ib, 0, ispoken, ibias);
 
@@ -166,6 +167,19 @@ CONCAT(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit,
       if (close)
          ADD_CACHE(vsplit, ib, 0, iclose, ibias);
    }
+   else {
+      if (spoken) {
+         ADD_CACHE(vsplit, ib, 0, ispoken, ibias);
+      }
+
+      for (i = spoken; i < icount; i++) {
+         ADD_CACHE(vsplit, ib, istart, i, ibias);
+      }
+
+      if (close) {
+         ADD_CACHE(vsplit, ib, 0, iclose, ibias);
+      }
+   }
 
    vsplit_flush_cache(vsplit, flags);
 }
diff --git a/lib/mesa/src/gallium/auxiliary/draw/draw_vs_exec.c b/lib/mesa/src/gallium/auxiliary/draw/draw_vs_exec.c
index 17b54b6fe..5125eb4df 100644
--- a/lib/mesa/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/lib/mesa/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -70,7 +70,9 @@ vs_exec_prepare( struct draw_vertex_shader *shader,
    if (evs->machine->Tokens != shader->state.tokens) {
       tgsi_exec_machine_bind_shader(evs->machine,
                                     shader->state.tokens,
-                                    draw->vs.tgsi.sampler);
+                                    draw->vs.tgsi.sampler,
+                                    draw->vs.tgsi.image,
+                                    draw->vs.tgsi.buffer);
    }
 }
 
@@ -103,9 +105,9 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
 
    if (shader->info.uses_instanceid) {
       unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_INSTANCEID];
-      assert(i < Elements(machine->SystemValue));
+      assert(i < ARRAY_SIZE(machine->SystemValue));
       for (j = 0; j < TGSI_QUAD_SIZE; j++)
-         machine->SystemValue[i].i[j] = shader->draw->instance_id;
+         machine->SystemValue[i].xyzw[0].i[j] = shader->draw->instance_id;
    }
 
    for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
@@ -127,20 +129,20 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
 
          if (shader->info.uses_vertexid) {
             unsigned vid = machine->SysSemanticToIndex[TGSI_SEMANTIC_VERTEXID];
-            assert(vid < Elements(machine->SystemValue));
-            machine->SystemValue[vid].i[j] = i + j;
+            assert(vid < ARRAY_SIZE(machine->SystemValue));
+            machine->SystemValue[vid].xyzw[0].i[j] = i + j;
             /* XXX this should include base vertex. Where to get it??? */
          }
          if (shader->info.uses_basevertex) {
             unsigned vid = machine->SysSemanticToIndex[TGSI_SEMANTIC_BASEVERTEX];
-            assert(vid < Elements(machine->SystemValue));
-            machine->SystemValue[vid].i[j] = 0;
+            assert(vid < ARRAY_SIZE(machine->SystemValue));
+            machine->SystemValue[vid].xyzw[0].i[j] = 0;
             /* XXX Where to get it??? */
          }
          if (shader->info.uses_vertexid_nobase) {
             unsigned vid = machine->SysSemanticToIndex[TGSI_SEMANTIC_VERTEXID_NOBASE];
-            assert(vid < Elements(machine->SystemValue));
-            machine->SystemValue[vid].i[j] = i + j;
+            assert(vid < ARRAY_SIZE(machine->SystemValue));
+            machine->SystemValue[vid].xyzw[0].i[j] = i + j;
          }
 
          for (slot = 0; slot < shader->info.num_inputs; slot++) {
@@ -159,14 +161,9 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
          input = (const float (*)[4])((const char *)input + input_stride);
       } 
 
-      tgsi_set_exec_mask(machine,
-                         1,
-                         max_vertices > 1,
-                         max_vertices > 2,
-                         max_vertices > 3);
-
+      machine->NonHelperMask = (1 << max_vertices) - 1;
       /* run interpreter */
-      tgsi_exec_machine_run( machine );
+      tgsi_exec_machine_run( machine, 0 );
 
       /* Unswizzle all output results.  
        */
@@ -225,7 +222,7 @@ draw_create_vs_exec(struct draw_context *draw,
 {
    struct exec_vertex_shader *vs = CALLOC_STRUCT( exec_vertex_shader );
 
-   if (vs == NULL) 
+   if (!vs)
       return NULL;
 
    /* we make a private copy of the tokens */
author	Jonathan Gray <jsg@cvs.openbsd.org>	2018-01-08 05:41:34 +0000
committer	Jonathan Gray <jsg@cvs.openbsd.org>	2018-01-08 05:41:34 +0000
commit	c00801de923e125863aaf8180439d59d610b2517 (patch)
tree	e2896aa2785f3cf2151aeeb3c95fb5cc09a2fe02 /lib/mesa/src/gallium/auxiliary/draw
parent	be30e6efb92db21299b936c0e068e7088941e9c9 (diff)