Merge Mesa 21.1.5

author: Jonathan Gray <jsg@cvs.openbsd.org> 2021-07-22 10:50:50 +0000
committer: Jonathan Gray <jsg@cvs.openbsd.org> 2021-07-22 10:50:50 +0000
commit: 9130ec005fbc78a62420643414d8354d0929ca50 (patch)
tree: 6762777acdd2d4eee17ef87290e80dc7afe2b73d /lib/mesa/src/gallium/drivers/llvmpipe
parent: ca11beabae33eb59fb981b8adf50b1d47a2a98f0 (diff)
24 files changed, 2308 insertions, 717 deletions
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/SConscript b/lib/mesa/src/gallium/drivers/llvmpipe/SConscript
deleted file mode 100644
index 1af686771..000000000
--- a/lib/mesa/src/gallium/drivers/llvmpipe/SConscript
+++ /dev/null
@@ -1,46 +0,0 @@
-from sys import executable as python_cmd
-import distutils.version
-
-Import('*')
-
-if not env['llvm']:
-    print('warning: LLVM disabled: not building llvmpipe')
-    Return()
-
-env = env.Clone()
-
-env.MSVC2013Compat()
-
-llvmpipe = env.ConvenienceLibrary(
-	target = 'llvmpipe',
-	source = env.ParseSourceList('Makefile.sources', 'C_SOURCES')
-	)
-
-env.Alias('llvmpipe', llvmpipe)
-
-env.Append(CPPPATH = [
-    '../../../compiler/nir',
-])
-
-if not env['embedded']:
-    env = env.Clone()
-
-    env.Prepend(LIBS = [llvmpipe, gallium, mesautil])
-
-    tests = [
-        'arit',
-        'format',
-        'blend',
-        'conv',
-        'printf',
-    ]
-
-    for test in tests:
-        testname = 'lp_test_' + test
-        target = env.Program(
-            target = testname,
-            source = [testname + '.c', 'lp_test_main.c'],
-        )
-        env.UnitTest(testname, target)
-
-Export('llvmpipe')
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 63c2fb5d8..dc559bc3f 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -66,6 +66,7 @@
 #include "gallivm/lp_bld_pack.h"
 
 #include "lp_bld_depth.h"
+#include "lp_state_fs.h"
 
 
 /** Used to select fields from pipe_stencil_state */
@@ -435,7 +436,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
    assert(type.length <= 16);
    assert(type.floating);
 
-   if(util_cpu_caps.has_sse && type.length == 4) {
+   if(util_get_cpu_caps()->has_sse && type.length == 4) {
       const char *movmskintr = "llvm.x86.sse.movmsk.ps";
       const char *popcntintr = "llvm.ctpop.i32";
       LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
@@ -446,7 +447,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
                                        LLVMInt32TypeInContext(context), bits);
       count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
    }
-   else if(util_cpu_caps.has_avx && type.length == 8) {
+   else if(util_get_cpu_caps()->has_avx && type.length == 8) {
       const char *movmskintr = "llvm.x86.avx.movmsk.ps.256";
       const char *popcntintr = "llvm.ctpop.i32";
       LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
@@ -469,7 +470,11 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
       countv = LLVMBuildBitCast(builder, countv, i8vntype, "");
 
        for (i = 0; i < type.length; i++) {
+#if UTIL_ARCH_LITTLE_ENDIAN
           shuffles[i] = lp_build_const_int32(gallivm, 4*i);
+#else
+          shuffles[i] = lp_build_const_int32(gallivm, (4*i) + 3);
+#endif
        }
 
        shufflev = LLVMConstVector(shuffles, type.length);
@@ -599,6 +604,12 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
                                   LLVMConstVector(shuffles, zs_type.length), "");
    *s_fb = *z_fb;
 
+   if (format_desc->block.bits == 8) {
+      /* Extend stencil-only 8 bit values (S8_UINT) */
+      *s_fb = LLVMBuildZExt(builder, *s_fb,
+                            lp_build_int_vec_type(gallivm, z_src_type), "");
+   }
+
    if (format_desc->block.bits < z_src_type.width) {
       /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
       *z_fb = LLVMBuildZExt(builder, *z_fb,
@@ -648,7 +659,7 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
  * \param type  the data type of the fragment depth/stencil values
  * \param format_desc  description of the depth/stencil surface
  * \param is_1d  whether this resource has only one dimension
- * \param mask  the alive/dead pixel mask for the quad (vector)
+ * \param mask_value the alive/dead pixel mask for the quad (vector)
  * \param z_fb  z values read from fb (with padding)
  * \param s_fb  s values read from fb (with padding)
  * \param loop_counter  the current loop iteration
@@ -662,7 +673,7 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
                                       struct lp_type z_src_type,
                                       const struct util_format_description *format_desc,
                                       boolean is_1d,
-                                      struct lp_build_mask_context *mask,
+                                      LLVMValueRef mask_value,
                                       LLVMValueRef z_fb,
                                       LLVMValueRef s_fb,
                                       LLVMValueRef loop_counter,
@@ -674,7 +685,6 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
    struct lp_build_context z_bld;
    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
    LLVMBuilderRef builder = gallivm->builder;
-   LLVMValueRef mask_value = NULL;
    LLVMValueRef zs_dst1, zs_dst2;
    LLVMValueRef zs_dst_ptr1, zs_dst_ptr2;
    LLVMValueRef depth_offset1, depth_offset2;
@@ -732,8 +742,7 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
       s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, "");
    }
 
-   if (mask) {
-      mask_value = lp_build_mask_value(mask);
+   if (mask_value) {
       z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb);
       if (format_desc->block.bits > 32) {
          s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, "");
@@ -806,6 +815,7 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
  * \param type  the data type of the fragment depth/stencil values
  * \param format_desc  description of the depth/stencil surface
  * \param mask  the alive/dead pixel mask for the quad (vector)
+ * \param cov_mask coverage mask
  * \param stencil_refs  the front/back stencil ref values (scalar)
  * \param z_src  the incoming depth/stencil values (n 2x2 quad values, float32)
  * \param zs_dst  the depth/stencil values in framebuffer
@@ -813,11 +823,12 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
  */
 void
 lp_build_depth_stencil_test(struct gallivm_state *gallivm,
-                            const struct pipe_depth_state *depth,
+                            const struct lp_depth_state *depth,
                             const struct pipe_stencil_state stencil[2],
                             struct lp_type z_src_type,
                             const struct util_format_description *format_desc,
                             struct lp_build_mask_context *mask,
+                            LLVMValueRef *cov_mask,
                             LLVMValueRef stencil_refs[2],
                             LLVMValueRef z_src,
                             LLVMValueRef z_fb,
@@ -837,7 +848,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
    LLVMValueRef stencil_vals = NULL;
    LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
    LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
-   LLVMValueRef current_mask = lp_build_mask_value(mask);
+   LLVMValueRef current_mask = mask ? lp_build_mask_value(mask) : *cov_mask;
    LLVMValueRef front_facing = NULL;
    boolean have_z, have_s;
 
@@ -870,8 +881,8 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
 
    /* Sanity checking */
    {
-      const unsigned z_swizzle = format_desc->swizzle[0];
-      const unsigned s_swizzle = format_desc->swizzle[1];
+      ASSERTED const unsigned z_swizzle = format_desc->swizzle[0];
+      ASSERTED const unsigned s_swizzle = format_desc->swizzle[1];
 
       assert(z_swizzle != PIPE_SWIZZLE_NONE ||
              s_swizzle != PIPE_SWIZZLE_NONE);
@@ -1066,7 +1077,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
          current_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, "");
       }
 
-      if (!stencil[0].enabled) {
+      if (!stencil[0].enabled && mask) {
          /* We can potentially skip all remaining operations here, but only
           * if stencil is disabled because we still need to update the stencil
           * buffer values.  Don't need to update Z buffer values.
@@ -1141,10 +1152,21 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
       *s_value = stencil_vals;
    }
 
-   if (s_pass_mask)
-      lp_build_mask_update(mask, s_pass_mask);
+   if (mask) {
+      if (s_pass_mask)
+         lp_build_mask_update(mask, s_pass_mask);
 
-   if (depth->enabled && stencil[0].enabled)
-      lp_build_mask_update(mask, z_pass);
+      if (depth->enabled && stencil[0].enabled)
+         lp_build_mask_update(mask, z_pass);
+   } else {
+      LLVMValueRef tmp_mask = *cov_mask;
+      if (s_pass_mask)
+         tmp_mask = LLVMBuildAnd(builder, tmp_mask, s_pass_mask, "");
+
+      /* for multisample we don't do the stencil optimisation so update always */
+      if (depth->enabled)
+         tmp_mask = LLVMBuildAnd(builder, tmp_mask, z_pass, "");
+      *cov_mask = tmp_mask;
+   }
 }
 
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c
index c49e66914..aaf6a80e8 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c
@@ -46,6 +46,7 @@
 #include "lp_surface.h"
 #include "lp_query.h"
 #include "lp_setup.h"
+#include "lp_screen.h"
 
 /* This is only safe if there's just one concurrent context */
 #ifdef EMBEDDED_DEVICE
@@ -55,7 +56,7 @@
 static void llvmpipe_destroy( struct pipe_context *pipe )
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
-   uint i, j;
+   uint i;
 
    lp_print_counters();
 
@@ -80,21 +81,18 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
 
    pipe_surface_reference(&llvmpipe->framebuffer.zsbuf, NULL);
 
-   for (i = 0; i < ARRAY_SIZE(llvmpipe->sampler_views[0]); i++) {
-      pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_FRAGMENT][i], NULL);
-   }
-
-   for (i = 0; i < ARRAY_SIZE(llvmpipe->sampler_views[0]); i++) {
-      pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_VERTEX][i], NULL);
-   }
-
-   for (i = 0; i < ARRAY_SIZE(llvmpipe->sampler_views[0]); i++) {
-      pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_GEOMETRY][i], NULL);
-   }
-
-   for (i = 0; i < ARRAY_SIZE(llvmpipe->constants); i++) {
-      for (j = 0; j < ARRAY_SIZE(llvmpipe->constants[i]); j++) {
-         pipe_resource_reference(&llvmpipe->constants[i][j].buffer, NULL);
+   for (enum pipe_shader_type s = PIPE_SHADER_VERTEX; s < PIPE_SHADER_TYPES; s++) {
+      for (i = 0; i < ARRAY_SIZE(llvmpipe->sampler_views[0]); i++) {
+         pipe_sampler_view_reference(&llvmpipe->sampler_views[s][i], NULL);
+      }
+      for (i = 0; i < LP_MAX_TGSI_SHADER_IMAGES; i++) {
+         pipe_resource_reference(&llvmpipe->images[s][i].resource, NULL);
+      }
+      for (i = 0; i < LP_MAX_TGSI_SHADER_BUFFERS; i++) {
+         pipe_resource_reference(&llvmpipe->ssbos[s][i].buffer, NULL);
+      }
+      for (i = 0; i < ARRAY_SIZE(llvmpipe->constants[s]); i++) {
+         pipe_resource_reference(&llvmpipe->constants[s][i].buffer, NULL);
       }
    }
 
@@ -134,6 +132,47 @@ llvmpipe_render_condition(struct pipe_context *pipe,
    llvmpipe->render_cond_cond = condition;
 }
 
+static void
+llvmpipe_render_condition_mem(struct pipe_context *pipe,
+                              struct pipe_resource *buffer,
+                              unsigned offset,
+                              bool condition)
+{
+   struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
+
+   llvmpipe->render_cond_buffer = llvmpipe_resource(buffer);
+   llvmpipe->render_cond_offset = offset;
+   llvmpipe->render_cond_cond = condition;
+}
+
+static void
+llvmpipe_texture_barrier(struct pipe_context *pipe, unsigned flags)
+{
+   llvmpipe_flush(pipe, NULL, __FUNCTION__);
+}
+
+static void lp_draw_disk_cache_find_shader(void *cookie,
+                                           struct lp_cached_code *cache,
+                                           unsigned char ir_sha1_cache_key[20])
+{
+   struct llvmpipe_screen *screen = cookie;
+   lp_disk_cache_find_shader(screen, cache, ir_sha1_cache_key);
+}
+
+static void lp_draw_disk_cache_insert_shader(void *cookie,
+                                             struct lp_cached_code *cache,
+                                             unsigned char ir_sha1_cache_key[20])
+{
+   struct llvmpipe_screen *screen = cookie;
+   lp_disk_cache_insert_shader(screen, cache, ir_sha1_cache_key);
+}
+
+static enum pipe_reset_status
+llvmpipe_get_device_reset_status(struct pipe_context *pipe)
+{
+   return PIPE_NO_RESET;
+}
+
 struct pipe_context *
 llvmpipe_create_context(struct pipe_screen *screen, void *priv,
                         unsigned flags)
@@ -162,9 +201,12 @@ llvmpipe_create_context(struct pipe_screen *screen, void *priv,
    llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state;
    llvmpipe->pipe.clear = llvmpipe_clear;
    llvmpipe->pipe.flush = do_flush;
+   llvmpipe->pipe.texture_barrier = llvmpipe_texture_barrier;
 
    llvmpipe->pipe.render_condition = llvmpipe_render_condition;
+   llvmpipe->pipe.render_condition_mem = llvmpipe_render_condition_mem;
 
+   llvmpipe->pipe.get_device_reset_status = llvmpipe_get_device_reset_status;
    llvmpipe_init_blend_funcs(llvmpipe);
    llvmpipe_init_clip_funcs(llvmpipe);
    llvmpipe_init_draw_funcs(llvmpipe);
@@ -176,6 +218,7 @@ llvmpipe_create_context(struct pipe_screen *screen, void *priv,
    llvmpipe_init_fs_funcs(llvmpipe);
    llvmpipe_init_vs_funcs(llvmpipe);
    llvmpipe_init_gs_funcs(llvmpipe);
+   llvmpipe_init_tess_funcs(llvmpipe);
    llvmpipe_init_rasterizer_funcs(llvmpipe);
    llvmpipe_init_context_resource_funcs( &llvmpipe->pipe );
    llvmpipe_init_surface_functions(llvmpipe);
@@ -197,6 +240,13 @@ llvmpipe_create_context(struct pipe_screen *screen, void *priv,
    if (!llvmpipe->draw)
       goto fail;
 
+   draw_set_disk_cache_callbacks(llvmpipe->draw,
+                                 llvmpipe_screen(screen),
+                                 lp_draw_disk_cache_find_shader,
+                                 lp_draw_disk_cache_insert_shader);
+
+   draw_set_constant_buffer_stride(llvmpipe->draw, lp_get_constant_buffer_stride(screen));
+
    /* FIXME: devise alternative to draw_texture_samplers */
 
    llvmpipe->setup = lp_setup_create( &llvmpipe->pipe,
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h
index 0e029f591..b1adba61d 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h
@@ -67,12 +67,15 @@ struct llvmpipe_context {
    struct lp_fragment_shader *fs;
    struct draw_vertex_shader *vs;
    const struct lp_geometry_shader *gs;
+   const struct lp_tess_ctrl_shader *tcs;
+   const struct lp_tess_eval_shader *tes;
    struct lp_compute_shader *cs;
    const struct lp_velems_state *velems;
    const struct lp_so_state *so;
 
    /** Other rendering state */
    unsigned sample_mask;
+   unsigned min_samples;
    struct pipe_blend_color blend_color;
    struct pipe_stencil_ref stencil_ref;
    struct pipe_clip_state clip;
@@ -96,7 +99,7 @@ struct llvmpipe_context {
 
    struct draw_so_target *so_targets[PIPE_MAX_SO_BUFFERS];
    int num_so_targets;
-   struct pipe_query_data_so_statistics so_stats;
+   struct pipe_query_data_so_statistics so_stats[PIPE_MAX_VERTEX_STREAMS];
 
    struct pipe_query_data_pipeline_statistics pipeline_statistics;
    unsigned active_statistics_queries;
@@ -167,6 +170,10 @@ struct llvmpipe_context {
    enum pipe_render_cond_flag render_cond_mode;
    boolean render_cond_cond;
 
+   /** VK render cond */
+   struct llvmpipe_resource *render_cond_buffer;
+   unsigned render_cond_offset;
+
    /** The LLVMContext to use for LLVM related work */
    LLVMContextRef context;
 
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
index cf81111b4..e8f0ae609 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -51,8 +51,14 @@
  * the drawing to the 'draw' module.
  */
 static void
-llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,
+                  const struct pipe_draw_indirect_info *indirect,
+                  const struct pipe_draw_start_count *draws,
+                  unsigned num_draws)
 {
+   if (!indirect && (!draws[0].count || !info->instance_count))
+      return;
+
    struct llvmpipe_context *lp = llvmpipe_context(pipe);
    struct draw_context *draw = lp->draw;
    const void *mapped_indices = NULL;
@@ -61,8 +67,8 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    if (!llvmpipe_check_render_cond(lp))
       return;
 
-   if (info->indirect) {
-      util_draw_indirect(pipe, info);
+   if (indirect && indirect->buffer) {
+      util_draw_indirect(pipe, info, indirect);
       return;
    }
 
@@ -105,6 +111,12 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    llvmpipe_prepare_geometry_sampling(lp,
                                       lp->num_sampler_views[PIPE_SHADER_GEOMETRY],
                                       lp->sampler_views[PIPE_SHADER_GEOMETRY]);
+   llvmpipe_prepare_tess_ctrl_sampling(lp,
+                                       lp->num_sampler_views[PIPE_SHADER_TESS_CTRL],
+                                       lp->sampler_views[PIPE_SHADER_TESS_CTRL]);
+   llvmpipe_prepare_tess_eval_sampling(lp,
+                                       lp->num_sampler_views[PIPE_SHADER_TESS_EVAL],
+                                       lp->sampler_views[PIPE_SHADER_TESS_EVAL]);
 
    llvmpipe_prepare_vertex_images(lp,
                                   lp->num_images[PIPE_SHADER_VERTEX],
@@ -112,6 +124,12 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    llvmpipe_prepare_geometry_images(lp,
                                     lp->num_images[PIPE_SHADER_GEOMETRY],
                                     lp->images[PIPE_SHADER_GEOMETRY]);
+   llvmpipe_prepare_tess_ctrl_images(lp,
+                                     lp->num_images[PIPE_SHADER_TESS_CTRL],
+                                     lp->images[PIPE_SHADER_TESS_CTRL]);
+   llvmpipe_prepare_tess_eval_images(lp,
+                                     lp->num_images[PIPE_SHADER_TESS_EVAL],
+                                     lp->images[PIPE_SHADER_TESS_EVAL]);
    if (lp->gs && lp->gs->no_tokens) {
       /* we have an empty geometry shader with stream output, so
          attach the stream output info to the current vertex shader */
@@ -127,7 +145,7 @@ llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
                                      !lp->queries_disabled);
 
    /* draw! */
-   draw_vbo(draw, info);
+   draw_vbo(draw, info, indirect, draws, num_draws);
 
    /*
     * unmap vertex/index buffers
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c
index 00b6477f9..80d8d9e5c 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -52,6 +52,8 @@ create_jit_texture_type(struct gallivm_state *gallivm)
    elem_types[LP_JIT_TEXTURE_WIDTH]  =
    elem_types[LP_JIT_TEXTURE_HEIGHT] =
    elem_types[LP_JIT_TEXTURE_DEPTH] =
+   elem_types[LP_JIT_TEXTURE_NUM_SAMPLES] =
+   elem_types[LP_JIT_TEXTURE_SAMPLE_STRIDE] =
    elem_types[LP_JIT_TEXTURE_FIRST_LEVEL] =
    elem_types[LP_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32TypeInContext(lc);
    elem_types[LP_JIT_TEXTURE_BASE] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
@@ -90,6 +92,12 @@ create_jit_texture_type(struct gallivm_state *gallivm)
    LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, mip_offsets,
                           gallivm->target, texture_type,
                           LP_JIT_TEXTURE_MIP_OFFSETS);
+   LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, num_samples,
+                          gallivm->target, texture_type,
+                          LP_JIT_TEXTURE_NUM_SAMPLES);
+   LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, sample_stride,
+                          gallivm->target, texture_type,
+                          LP_JIT_TEXTURE_SAMPLE_STRIDE);
    LP_CHECK_STRUCT_SIZE(struct lp_jit_texture,
                         gallivm->target, texture_type);
    return texture_type;
@@ -138,7 +146,9 @@ create_jit_image_type(struct gallivm_state *gallivm)
    elem_types[LP_JIT_IMAGE_DEPTH] = LLVMInt32TypeInContext(lc);
    elem_types[LP_JIT_IMAGE_BASE] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
    elem_types[LP_JIT_IMAGE_ROW_STRIDE] =
-   elem_types[LP_JIT_IMAGE_IMG_STRIDE] = LLVMInt32TypeInContext(lc);
+   elem_types[LP_JIT_IMAGE_IMG_STRIDE] =
+   elem_types[LP_JIT_IMAGE_NUM_SAMPLES] =
+   elem_types[LP_JIT_IMAGE_SAMPLE_STRIDE] = LLVMInt32TypeInContext(lc);
 
    image_type = LLVMStructTypeInContext(lc, elem_types,
                                         ARRAY_SIZE(elem_types), 0);
@@ -160,6 +170,12 @@ create_jit_image_type(struct gallivm_state *gallivm)
    LP_CHECK_MEMBER_OFFSET(struct lp_jit_image, img_stride,
                           gallivm->target, image_type,
                           LP_JIT_IMAGE_IMG_STRIDE);
+   LP_CHECK_MEMBER_OFFSET(struct lp_jit_image, num_samples,
+                          gallivm->target, image_type,
+                          LP_JIT_IMAGE_NUM_SAMPLES);
+   LP_CHECK_MEMBER_OFFSET(struct lp_jit_image, sample_stride,
+                          gallivm->target, image_type,
+                          LP_JIT_IMAGE_SAMPLE_STRIDE);
    return image_type;
 }
 
@@ -210,6 +226,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
       elem_types[LP_JIT_CTX_IMAGES] = LLVMArrayType(image_type,
                                                     PIPE_MAX_SHADER_IMAGES);
       elem_types[LP_JIT_CTX_ALPHA_REF] = LLVMFloatTypeInContext(lc);
+      elem_types[LP_JIT_CTX_SAMPLE_MASK] =
       elem_types[LP_JIT_CTX_STENCIL_REF_FRONT] =
       elem_types[LP_JIT_CTX_STENCIL_REF_BACK] = LLVMInt32TypeInContext(lc);
       elem_types[LP_JIT_CTX_U8_BLEND_COLOR] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
@@ -261,6 +278,9 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
       LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, num_ssbos,
                              gallivm->target, context_type,
                              LP_JIT_CTX_NUM_SSBOS);
+      LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, sample_mask,
+                             gallivm->target, context_type,
+                             LP_JIT_CTX_SAMPLE_MASK);
       LP_CHECK_STRUCT_SIZE(struct lp_jit_context,
                            gallivm->target, context_type);
 
@@ -277,6 +297,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
       elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc);
       elem_types[LP_JIT_THREAD_DATA_INVOCATIONS] = LLVMInt64TypeInContext(lc);
       elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX] =
+      elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEW_INDEX] =
             LLVMInt32TypeInContext(lc);
 
       thread_data_type = LLVMStructTypeInContext(lc, elem_types,
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c
index ef783ea6f..6b5160667 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -33,7 +33,7 @@
 #include "util/u_pack_color.h"
 #include "util/u_string.h"
 #include "util/u_thread.h"
-
+#include "util/u_memset.h"
 #include "util/os_time.h"
 
 #include "lp_scene_queue.h"
@@ -56,6 +56,10 @@ const struct lp_rast_state *jit_state = NULL;
 const struct lp_rasterizer_task *jit_task = NULL;
 #endif
 
+const float lp_sample_pos_4x[4][2] = { { 0.375, 0.125 },
+                                       { 0.875, 0.375 },
+                                       { 0.125, 0.625 },
+                                       { 0.625, 0.875 } };
 
 /**
  * Begin rasterizing a scene.
@@ -152,18 +156,20 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
    LP_DBG(DEBUG_RAST, "%s clear value (target format %d) raw 0x%x,0x%x,0x%x,0x%x\n",
           __FUNCTION__, format, uc.ui[0], uc.ui[1], uc.ui[2], uc.ui[3]);
 
-
-   util_fill_box(scene->cbufs[cbuf].map,
-                 format,
-                 scene->cbufs[cbuf].stride,
-                 scene->cbufs[cbuf].layer_stride,
-                 task->x,
-                 task->y,
-                 0,
-                 task->width,
-                 task->height,
-                 scene->fb_max_layer + 1,
-                 &uc);
+   for (unsigned s = 0; s < scene->cbufs[cbuf].nr_samples; s++) {
+      void *map = (char *)scene->cbufs[cbuf].map + scene->cbufs[cbuf].sample_stride * s;
+      util_fill_box(map,
+                    format,
+                    scene->cbufs[cbuf].stride,
+                    scene->cbufs[cbuf].layer_stride,
+                    task->x,
+                    task->y,
+                    0,
+                    task->width,
+                    task->height,
+                    scene->fb_max_layer + 1,
+                    &uc);
+   }
 
    /* this will increase for each rb which probably doesn't mean much */
    LP_COUNT(nr_color_tile_clear);
@@ -200,86 +206,89 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
 
    if (scene->fb.zsbuf) {
       unsigned layer;
-      uint8_t *dst_layer = task->depth_tile;
-      block_size = util_format_get_blocksize(scene->fb.zsbuf->format);
 
-      clear_value &= clear_mask;
+      for (unsigned s = 0; s < scene->zsbuf.nr_samples; s++) {
+         uint8_t *dst_layer = task->depth_tile + (s * scene->zsbuf.sample_stride);
+         block_size = util_format_get_blocksize(scene->fb.zsbuf->format);
 
-      for (layer = 0; layer <= scene->fb_max_layer; layer++) {
-         dst = dst_layer;
+         clear_value &= clear_mask;
 
-         switch (block_size) {
-         case 1:
-            assert(clear_mask == 0xff);
-            memset(dst, (uint8_t) clear_value, height * width);
-            break;
-         case 2:
-            if (clear_mask == 0xffff) {
+         for (layer = 0; layer <= scene->fb_max_layer; layer++) {
+            dst = dst_layer;
+
+            switch (block_size) {
+            case 1:
+               assert(clear_mask == 0xff);
                for (i = 0; i < height; i++) {
-                  uint16_t *row = (uint16_t *)dst;
-                  for (j = 0; j < width; j++)
-                     *row++ = (uint16_t) clear_value;
+                  uint8_t *row = (uint8_t *)dst;
+                  memset(row, (uint8_t) clear_value, width);
                   dst += dst_stride;
                }
-            }
-            else {
-               for (i = 0; i < height; i++) {
-                  uint16_t *row = (uint16_t *)dst;
-                  for (j = 0; j < width; j++) {
-                     uint16_t tmp = ~clear_mask & *row;
-                     *row++ = clear_value | tmp;
+               break;
+            case 2:
+               if (clear_mask == 0xffff) {
+                  for (i = 0; i < height; i++) {
+                     uint16_t *row = (uint16_t *)dst;
+                     for (j = 0; j < width; j++)
+                        *row++ = (uint16_t) clear_value;
+                     dst += dst_stride;
                   }
-                  dst += dst_stride;
                }
-            }
-            break;
-         case 4:
-            if (clear_mask == 0xffffffff) {
-               for (i = 0; i < height; i++) {
-                  uint32_t *row = (uint32_t *)dst;
-                  for (j = 0; j < width; j++)
-                     *row++ = clear_value;
-                  dst += dst_stride;
+               else {
+                  for (i = 0; i < height; i++) {
+                     uint16_t *row = (uint16_t *)dst;
+                     for (j = 0; j < width; j++) {
+                        uint16_t tmp = ~clear_mask & *row;
+                        *row++ = clear_value | tmp;
+                     }
+                     dst += dst_stride;
+                  }
                }
-            }
-            else {
-               for (i = 0; i < height; i++) {
-                  uint32_t *row = (uint32_t *)dst;
-                  for (j = 0; j < width; j++) {
-                     uint32_t tmp = ~clear_mask & *row;
-                     *row++ = clear_value | tmp;
+               break;
+            case 4:
+               if (clear_mask == 0xffffffff) {
+                  for (i = 0; i < height; i++) {
+                     util_memset32(dst, clear_value, width);
+                     dst += dst_stride;
                   }
-                  dst += dst_stride;
                }
-            }
-            break;
-         case 8:
-            clear_value64 &= clear_mask64;
-            if (clear_mask64 == 0xffffffffffULL) {
-               for (i = 0; i < height; i++) {
-                  uint64_t *row = (uint64_t *)dst;
-                  for (j = 0; j < width; j++)
-                     *row++ = clear_value64;
-                  dst += dst_stride;
+               else {
+                  for (i = 0; i < height; i++) {
+                     uint32_t *row = (uint32_t *)dst;
+                     for (j = 0; j < width; j++) {
+                        uint32_t tmp = ~clear_mask & *row;
+                        *row++ = clear_value | tmp;
+                     }
+                     dst += dst_stride;
+                  }
                }
-            }
-            else {
-               for (i = 0; i < height; i++) {
-                  uint64_t *row = (uint64_t *)dst;
-                  for (j = 0; j < width; j++) {
-                     uint64_t tmp = ~clear_mask64 & *row;
-                     *row++ = clear_value64 | tmp;
+               break;
+            case 8:
+               clear_value64 &= clear_mask64;
+               if (clear_mask64 == 0xffffffffffULL) {
+                  for (i = 0; i < height; i++) {
+                     util_memset64(dst, clear_value64, width);
+                     dst += dst_stride;
                   }
-                  dst += dst_stride;
                }
-            }
-            break;
+               else {
+                  for (i = 0; i < height; i++) {
+                     uint64_t *row = (uint64_t *)dst;
+                     for (j = 0; j < width; j++) {
+                        uint64_t tmp = ~clear_mask64 & *row;
+                        *row++ = clear_value64 | tmp;
+                     }
+                     dst += dst_stride;
+                  }
+               }
+               break;
 
-         default:
-            assert(0);
-            break;
+            default:
+               assert(0);
+               break;
+            }
+            dst_layer += scene->zsbuf.layer_stride;
          }
-         dst_layer += scene->zsbuf.layer_stride;
       }
    }
 }
@@ -321,19 +330,23 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
       for (x = 0; x < task->width; x += 4) {
          uint8_t *color[PIPE_MAX_COLOR_BUFS];
          unsigned stride[PIPE_MAX_COLOR_BUFS];
+         unsigned sample_stride[PIPE_MAX_COLOR_BUFS];
          uint8_t *depth = NULL;
          unsigned depth_stride = 0;
+         unsigned depth_sample_stride = 0;
          unsigned i;
 
          /* color buffer */
          for (i = 0; i < scene->fb.nr_cbufs; i++){
             if (scene->fb.cbufs[i]) {
                stride[i] = scene->cbufs[i].stride;
+               sample_stride[i] = scene->cbufs[i].sample_stride;
                color[i] = lp_rast_get_color_block_pointer(task, i, tile_x + x,
-                                                          tile_y + y, inputs->layer);
+                                                          tile_y + y, inputs->layer + inputs->view_index);
             }
             else {
                stride[i] = 0;
+               sample_stride[i] = 0;
                color[i] = NULL;
             }
          }
@@ -341,12 +354,18 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
          /* depth buffer */
          if (scene->zsbuf.map) {
             depth = lp_rast_get_depth_block_pointer(task, tile_x + x,
-                                                    tile_y + y, inputs->layer);
+                                                    tile_y + y, inputs->layer + inputs->view_index);
             depth_stride = scene->zsbuf.stride;
+            depth_sample_stride = scene->zsbuf.sample_stride;
          }
 
+         uint64_t mask = 0;
+         for (unsigned i = 0; i < scene->fb_max_samples; i++)
+            mask |= (uint64_t)(0xffff) << (16 * i);
+
          /* Propagate non-interpolated raster state. */
          task->thread_data.raster_state.viewport_index = inputs->viewport_index;
+         task->thread_data.raster_state.view_index = inputs->view_index;
 
          /* run shader on 4x4 block */
          BEGIN_JIT_CALL(state, task);
@@ -358,10 +377,12 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
                                             GET_DADY(inputs),
                                             color,
                                             depth,
-                                            0xffff,
+                                            mask,
                                             &task->thread_data,
                                             stride,
-                                            depth_stride);
+                                            depth_stride,
+                                            sample_stride,
+                                            depth_sample_stride);
          END_JIT_CALL();
       }
    }
@@ -395,18 +416,20 @@ lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task,
  * \param y  Y position of quad in window coords
  */
 void
-lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
-                         const struct lp_rast_shader_inputs *inputs,
-                         unsigned x, unsigned y,
-                         unsigned mask)
+lp_rast_shade_quads_mask_sample(struct lp_rasterizer_task *task,
+                                const struct lp_rast_shader_inputs *inputs,
+                                unsigned x, unsigned y,
+                                uint64_t mask)
 {
    const struct lp_rast_state *state = task->state;
    struct lp_fragment_shader_variant *variant = state->variant;
    const struct lp_scene *scene = task->scene;
    uint8_t *color[PIPE_MAX_COLOR_BUFS];
    unsigned stride[PIPE_MAX_COLOR_BUFS];
+   unsigned sample_stride[PIPE_MAX_COLOR_BUFS];
    uint8_t *depth = NULL;
    unsigned depth_stride = 0;
+   unsigned depth_sample_stride = 0;
    unsigned i;
 
    assert(state);
@@ -424,11 +447,13 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
    for (i = 0; i < scene->fb.nr_cbufs; i++) {
       if (scene->fb.cbufs[i]) {
          stride[i] = scene->cbufs[i].stride;
+         sample_stride[i] = scene->cbufs[i].sample_stride;
          color[i] = lp_rast_get_color_block_pointer(task, i, x, y,
-                                                    inputs->layer);
+                                                    inputs->layer + inputs->view_index);
       }
       else {
          stride[i] = 0;
+         sample_stride[i] = 0;
          color[i] = NULL;
       }
    }
@@ -436,7 +461,8 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
    /* depth buffer */
    if (scene->zsbuf.map) {
       depth_stride = scene->zsbuf.stride;
-      depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer);
+      depth_sample_stride = scene->zsbuf.sample_stride;
+      depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer + inputs->view_index);
    }
 
    assert(lp_check_alignment(state->jit_context.u8_blend_color, 16));
@@ -448,6 +474,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
    if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
       /* Propagate non-interpolated raster state. */
       task->thread_data.raster_state.viewport_index = inputs->viewport_index;
+      task->thread_data.raster_state.view_index = inputs->view_index;
 
       /* run shader on 4x4 block */
       BEGIN_JIT_CALL(state, task);
@@ -462,12 +489,24 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
                                             mask,
                                             &task->thread_data,
                                             stride,
-                                            depth_stride);
+                                            depth_stride,
+                                            sample_stride,
+                                            depth_sample_stride);
       END_JIT_CALL();
    }
 }
 
-
+void
+lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
+                         const struct lp_rast_shader_inputs *inputs,
+                         unsigned x, unsigned y,
+                         unsigned mask)
+{
+   uint64_t new_mask = 0;
+   for (unsigned i = 0; i < task->scene->fb_max_samples; i++)
+      new_mask |= ((uint64_t)mask) << (16 * i);
+   lp_rast_shade_quads_mask_sample(task, inputs, x, y, new_mask);
+}
 
 /**
  * Begin a new occlusion query.
@@ -588,7 +627,18 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] =
    lp_rast_triangle_32_8,
    lp_rast_triangle_32_3_4,
    lp_rast_triangle_32_3_16,
-   lp_rast_triangle_32_4_16
+   lp_rast_triangle_32_4_16,
+   lp_rast_triangle_ms_1,
+   lp_rast_triangle_ms_2,
+   lp_rast_triangle_ms_3,
+   lp_rast_triangle_ms_4,
+   lp_rast_triangle_ms_5,
+   lp_rast_triangle_ms_6,
+   lp_rast_triangle_ms_7,
+   lp_rast_triangle_ms_8,
+   lp_rast_triangle_ms_3_4,
+   lp_rast_triangle_ms_3_16,
+   lp_rast_triangle_ms_4_16,
 };
 
 
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index 4b5ca8192..c8154348e 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -131,7 +131,11 @@ struct lp_rasterizer
    util_barrier barrier;
 };
 
-
+void
+lp_rast_shade_quads_mask_sample(struct lp_rasterizer_task *task,
+                                const struct lp_rast_shader_inputs *inputs,
+                                unsigned x, unsigned y,
+                                uint64_t mask);
 void
 lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
                          const struct lp_rast_shader_inputs *inputs,
@@ -230,28 +234,37 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
    struct lp_fragment_shader_variant *variant = state->variant;
    uint8_t *color[PIPE_MAX_COLOR_BUFS];
    unsigned stride[PIPE_MAX_COLOR_BUFS];
+   unsigned sample_stride[PIPE_MAX_COLOR_BUFS];
    uint8_t *depth = NULL;
    unsigned depth_stride = 0;
+   unsigned depth_sample_stride = 0;
    unsigned i;
 
    /* color buffer */
    for (i = 0; i < scene->fb.nr_cbufs; i++) {
       if (scene->fb.cbufs[i]) {
          stride[i] = scene->cbufs[i].stride;
+         sample_stride[i] = scene->cbufs[i].sample_stride;
          color[i] = lp_rast_get_color_block_pointer(task, i, x, y,
-                                                    inputs->layer);
+                                                    inputs->layer + inputs->view_index);
       }
       else {
          stride[i] = 0;
+         sample_stride[i] = 0;
          color[i] = NULL;
       }
    }
 
    if (scene->zsbuf.map) {
-      depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer);
+      depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer + inputs->view_index);
+      depth_sample_stride = scene->zsbuf.sample_stride;
       depth_stride = scene->zsbuf.stride;
    }
 
+   uint64_t mask = 0;
+   for (unsigned i = 0; i < scene->fb_max_samples; i++)
+      mask |= (uint64_t)0xffff << (16 * i);
+
    /*
     * The rasterizer may produce fragments outside our
     * allocated 4x4 blocks hence need to filter them out here.
@@ -259,6 +272,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
    if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
       /* Propagate non-interpolated raster state. */
       task->thread_data.raster_state.viewport_index = inputs->viewport_index;
+      task->thread_data.raster_state.view_index = inputs->view_index;
 
       /* run shader on 4x4 block */
       BEGIN_JIT_CALL(state, task);
@@ -270,10 +284,12 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
                                          GET_DADY(inputs),
                                          color,
                                          depth,
-                                         0xffff,
+                                         mask,
                                          &task->thread_data,
                                          stride,
-                                         depth_stride);
+                                         depth_stride,
+                                         sample_stride,
+                                         depth_sample_stride);
       END_JIT_CALL();
    }
 }
@@ -331,6 +347,58 @@ void lp_rast_triangle_32_3_16( struct lp_rasterizer_task *,
 void lp_rast_triangle_32_4_16( struct lp_rasterizer_task *, 
                             const union lp_rast_cmd_arg );
 
+void lp_rast_triangle_ms_1( struct lp_rasterizer_task *,
+                         const union lp_rast_cmd_arg );
+void lp_rast_triangle_ms_2( struct lp_rasterizer_task *,
+                         const union lp_rast_cmd_arg );
+void lp_rast_triangle_ms_3( struct lp_rasterizer_task *,
+                         const union lp_rast_cmd_arg );
+void lp_rast_triangle_ms_4( struct lp_rasterizer_task *,
+                         const union lp_rast_cmd_arg );
+void lp_rast_triangle_ms_5( struct lp_rasterizer_task *,
+                         const union lp_rast_cmd_arg );
+void lp_rast_triangle_ms_6( struct lp_rasterizer_task *,
+                         const union lp_rast_cmd_arg );
+void lp_rast_triangle_ms_7( struct lp_rasterizer_task *,
+                         const union lp_rast_cmd_arg );
+void lp_rast_triangle_ms_8( struct lp_rasterizer_task *,
+                         const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_ms_3_4(struct lp_rasterizer_task *,
+                          const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_ms_3_16( struct lp_rasterizer_task *,
+                            const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_ms_4_16( struct lp_rasterizer_task *,
+                            const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_ms_32_1( struct lp_rasterizer_task *,
+                         const union lp_rast_cmd_arg );
+void lp_rast_triangle_ms_32_2( struct lp_rasterizer_task *,
+                         const union lp_rast_cmd_arg );
+void lp_rast_triangle_ms_32_3( struct lp_rasterizer_task *,
+                         const union lp_rast_cmd_arg );
+void lp_rast_triangle_ms_32_4( struct lp_rasterizer_task *,
+                         const union lp_rast_cmd_arg );
+void lp_rast_triangle_ms_32_5( struct lp_rasterizer_task *,
+                         const union lp_rast_cmd_arg );
+void lp_rast_triangle_ms_32_6( struct lp_rasterizer_task *,
+                         const union lp_rast_cmd_arg );
+void lp_rast_triangle_ms_32_7( struct lp_rasterizer_task *,
+                         const union lp_rast_cmd_arg );
+void lp_rast_triangle_ms_32_8( struct lp_rasterizer_task *,
+                         const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_ms_32_3_4(struct lp_rasterizer_task *,
+                          const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_ms_32_3_16( struct lp_rasterizer_task *,
+                            const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_ms_32_4_16( struct lp_rasterizer_task *,
+                            const union lp_rast_cmd_arg );
+
 void
 lp_rast_set_state(struct lp_rasterizer_task *task,
                   const union lp_rast_cmd_arg arg);
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c
index beab118ac..539b84c65 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -34,6 +34,8 @@
 #include "lp_scene.h"
 #include "lp_fence.h"
 #include "lp_debug.h"
+#include "lp_context.h"
+#include "lp_state_fs.h"
 
 
 #define RESOURCE_REF_SZ 32
@@ -45,6 +47,14 @@ struct resource_ref {
    struct resource_ref *next;
 };
 
+#define SHADER_REF_SZ 32
+/** List of shader variant references */
+struct shader_ref {
+   struct lp_fragment_shader_variant *variant[SHADER_REF_SZ];
+   int count;
+   struct shader_ref *next;
+};
+
 
 /**
  * Create a new scene object.
@@ -106,8 +116,8 @@ lp_scene_is_empty(struct lp_scene *scene )
 {
    unsigned x, y;
 
-   for (y = 0; y < TILES_Y; y++) {
-      for (x = 0; x < TILES_X; x++) {
+   for (y = 0; y < scene->tiles_y; y++) {
+      for (x = 0; x < scene->tiles_x; x++) {
          const struct cmd_bin *bin = lp_scene_get_bin(scene, x, y);
          if (bin->head) {
             return FALSE;
@@ -160,6 +170,8 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
       if (!cbuf) {
          scene->cbufs[i].stride = 0;
          scene->cbufs[i].layer_stride = 0;
+         scene->cbufs[i].sample_stride = 0;
+         scene->cbufs[i].nr_samples = 0;
          scene->cbufs[i].map = NULL;
          continue;
       }
@@ -169,18 +181,22 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
                                                            cbuf->u.tex.level);
          scene->cbufs[i].layer_stride = llvmpipe_layer_stride(cbuf->texture,
                                                               cbuf->u.tex.level);
+         scene->cbufs[i].sample_stride = llvmpipe_sample_stride(cbuf->texture);
 
          scene->cbufs[i].map = llvmpipe_resource_map(cbuf->texture,
                                                      cbuf->u.tex.level,
                                                      cbuf->u.tex.first_layer,
                                                      LP_TEX_USAGE_READ_WRITE);
          scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format);
+         scene->cbufs[i].nr_samples = util_res_sample_count(cbuf->texture);
       }
       else {
          struct llvmpipe_resource *lpr = llvmpipe_resource(cbuf->texture);
          unsigned pixstride = util_format_get_blocksize(cbuf->format);
          scene->cbufs[i].stride = cbuf->texture->width0;
          scene->cbufs[i].layer_stride = 0;
+         scene->cbufs[i].sample_stride = 0;
+         scene->cbufs[i].nr_samples = 1;
          scene->cbufs[i].map = lpr->data;
          scene->cbufs[i].map += cbuf->u.buf.first_element * pixstride;
          scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format);
@@ -191,7 +207,8 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
       struct pipe_surface *zsbuf = scene->fb.zsbuf;
       scene->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->u.tex.level);
       scene->zsbuf.layer_stride = llvmpipe_layer_stride(zsbuf->texture, zsbuf->u.tex.level);
-
+      scene->zsbuf.sample_stride = llvmpipe_sample_stride(zsbuf->texture);
+      scene->zsbuf.nr_samples = util_res_sample_count(zsbuf->texture);
       scene->zsbuf.map = llvmpipe_resource_map(zsbuf->texture,
                                                zsbuf->u.tex.level,
                                                zsbuf->u.tex.first_layer,
@@ -274,6 +291,22 @@ lp_scene_end_rasterization(struct lp_scene *scene )
                       j, scene->resource_reference_size);
    }
 
+   /* Decrement shader variant ref counts
+    */
+   {
+      struct shader_ref *ref;
+      int i, j = 0;
+
+      for (ref = scene->frag_shaders; ref; ref = ref->next) {
+         for (i = 0; i < ref->count; i++) {
+            if (LP_DEBUG & DEBUG_SETUP)
+               debug_printf("shader %d: %p\n", j, (void *) ref->variant[i]);
+            j++;
+            lp_fs_variant_reference(llvmpipe_context(scene->pipe), &ref->variant[i], NULL);
+         }
+      }
+   }
+
    /* Free all scene data blocks:
     */
    {
@@ -292,6 +325,7 @@ lp_scene_end_rasterization(struct lp_scene *scene )
    lp_fence_reference(&scene->fence, NULL);
 
    scene->resources = NULL;
+   scene->frag_shaders = NULL;
    scene->scene_size = 0;
    scene->resource_reference_size = 0;
 
@@ -428,6 +462,53 @@ lp_scene_add_resource_reference(struct lp_scene *scene,
 
 
 /**
+ * Add a reference to a fragment shader variant
+ */
+boolean
+lp_scene_add_frag_shader_reference(struct lp_scene *scene,
+                                   struct lp_fragment_shader_variant *variant)
+{
+   struct shader_ref *ref, **last = &scene->frag_shaders;
+   int i;
+
+   /* Look at existing resource blocks:
+    */
+   for (ref = scene->frag_shaders; ref; ref = ref->next) {
+      last = &ref->next;
+
+      /* Search for this resource:
+       */
+      for (i = 0; i < ref->count; i++)
+         if (ref->variant[i] == variant)
+            return TRUE;
+
+      if (ref->count < SHADER_REF_SZ) {
+         /* If the block is half-empty, then append the reference here.
+          */
+         break;
+      }
+   }
+
+   /* Create a new block if no half-empty block was found.
+    */
+   if (!ref) {
+      assert(*last == NULL);
+      *last = lp_scene_alloc(scene, sizeof *ref);
+      if (*last == NULL)
+          return FALSE;
+
+      ref = *last;
+      memset(ref, 0, sizeof *ref);
+   }
+
+   /* Append the reference to the reference block.
+    */
+   lp_fs_variant_reference(llvmpipe_context(scene->pipe), &ref->variant[ref->count++], variant);
+
+   return TRUE;
+}
+
+/**
  * Does this scene have a reference to the given resource?
  */
 boolean
@@ -545,6 +626,13 @@ void lp_scene_begin_binning(struct lp_scene *scene,
       max_layer = MIN2(max_layer, zsbuf->u.tex.last_layer - zsbuf->u.tex.first_layer);
    }
    scene->fb_max_layer = max_layer;
+   scene->fb_max_samples = util_framebuffer_get_num_samples(fb);
+   if (scene->fb_max_samples == 4) {
+      for (unsigned i = 0; i < 4; i++) {
+         scene->fixed_sample_pos[i][0] = util_iround(lp_sample_pos_4x[i][0] * FIXED_ONE);
+         scene->fixed_sample_pos[i][1] = util_iround(lp_sample_pos_4x[i][1] * FIXED_ONE);
+      }
+   }
 }
 
 
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h
index b4ed8817e..ba6b20139 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h
@@ -60,7 +60,7 @@ struct lp_rast_state;
 
 /* Scene temporary storage is clamped to this size:
  */
-#define LP_SCENE_MAX_SIZE (9*1024*1024)
+#define LP_SCENE_MAX_SIZE (36*1024*1024)
 
 /* The maximum amount of texture storage referenced by a scene is
  * clamped to this size:
@@ -117,6 +117,8 @@ struct data_block_list {
 
 struct resource_ref;
 
+struct shader_ref;
+
 /**
  * All bins and bin data are contained here.
  * Per-bin data goes into the 'tile' bins.
@@ -143,17 +145,28 @@ struct lp_scene {
       unsigned stride;
       unsigned layer_stride;
       unsigned format_bytes;
+      unsigned sample_stride;
+      unsigned nr_samples;
    } zsbuf, cbufs[PIPE_MAX_COLOR_BUFS];
 
    /* The amount of layers in the fb (minimum of all attachments) */
    unsigned fb_max_layer;
 
+   /* fixed point sample positions. */
+   int32_t fixed_sample_pos[LP_MAX_SAMPLES][2];
+
+   /* max samples for bound framebuffer */
+   unsigned fb_max_samples;
+
    /** the framebuffer to render the scene into */
    struct pipe_framebuffer_state fb;
 
    /** list of resources referenced by the scene commands */
    struct resource_ref *resources;
 
+   /** list of frag shaders referenced by the scene commands */
+   struct shader_ref *frag_shaders;
+
    /** Total memory used by the scene (in bytes).  This sums all the
     * data blocks and counts all bins, state, resource references and
     * other random allocations within the scene.
@@ -201,6 +214,10 @@ boolean lp_scene_add_resource_reference(struct lp_scene *scene,
 boolean lp_scene_is_resource_referenced(const struct lp_scene *scene,
                                         const struct pipe_resource *resource );
 
+boolean lp_scene_add_frag_shader_reference(struct lp_scene *scene,
+                                           struct lp_fragment_shader_variant *variant);
+
+
 
 /**
  * Allocate space for a command/data in the bin's data buffer.
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c
index e951baa06..2adf8b786 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -38,7 +38,7 @@
 #include "draw/draw_context.h"
 #include "gallivm/lp_bld_type.h"
 #include "gallivm/lp_bld_nir.h"
-
+#include "util/disk_cache.h"
 #include "util/os_misc.h"
 #include "util/os_time.h"
 #include "lp_texture.h"
@@ -52,7 +52,7 @@
 #include "lp_rast.h"
 #include "lp_cs_tpool.h"
 
-#include "state_tracker/sw_winsys.h"
+#include "frontend/sw_winsys.h"
 
 #include "nir.h"
 
@@ -74,7 +74,7 @@ static const struct debug_named_value lp_debug_flags[] = {
    { "fs", DEBUG_FS, NULL },
    { "cs", DEBUG_CS, NULL },
    { "tgsi_ir", DEBUG_TGSI_IR, NULL },
-   { "cl", DEBUG_CL, NULL },
+   { "cache_stats", DEBUG_CACHE_STATS, NULL },
    DEBUG_NAMED_VALUE_END
 };
 #endif
@@ -96,7 +96,7 @@ static const struct debug_named_value lp_perf_flags[] = {
 static const char *
 llvmpipe_get_vendor(struct pipe_screen *screen)
 {
-   return "VMware, Inc.";
+   return "Mesa/X.org";
 }
 
 
@@ -126,16 +126,11 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
       return 1;
    case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
       return PIPE_MAX_SO_BUFFERS;
-   case PIPE_CAP_ANISOTROPIC_FILTER:
-      return 0;
    case PIPE_CAP_POINT_SPRITE:
       return 1;
    case PIPE_CAP_MAX_RENDER_TARGETS:
       return PIPE_MAX_COLOR_BUFS;
    case PIPE_CAP_OCCLUSION_QUERY:
-      return 1;
-   case PIPE_CAP_QUERY_TIME_ELAPSED:
-      return 0;
    case PIPE_CAP_QUERY_TIMESTAMP:
       return 1;
    case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
@@ -144,10 +139,8 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
       return 1;
    case PIPE_CAP_TEXTURE_SWIZZLE:
+   case PIPE_CAP_TEXTURE_SHADOW_LOD:
       return 1;
-   case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
-   case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET:
-      return 0;
    case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
       return 1 << (LP_MAX_TEXTURE_2D_LEVELS - 1);
    case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
@@ -166,10 +159,8 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
    case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
       return 1;
-   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
-   case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE:
-      return 0;
    case PIPE_CAP_PRIMITIVE_RESTART:
+   case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
       return 1;
    case PIPE_CAP_DEPTH_CLIP_DISABLE:
       return 1;
@@ -178,9 +169,6 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_TGSI_INSTANCEID:
    case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
    case PIPE_CAP_START_INSTANCE:
-      return 1;
-   case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
-      return 0;
    case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
       return 1;
    case PIPE_CAP_SEAMLESS_CUBE_MAP:
@@ -194,41 +182,35 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_MAX_TEXEL_OFFSET:
       return 31;
    case PIPE_CAP_CONDITIONAL_RENDER:
-      return 1;
    case PIPE_CAP_TEXTURE_BARRIER:
-      return 0;
+      return 1;
    case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
    case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
       return 16*4;
    case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
    case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
       return 1024;
-   case PIPE_CAP_MAX_VERTEX_STREAMS:
-      return 1;
+   case PIPE_CAP_MAX_VERTEX_STREAMS: {
+      struct llvmpipe_screen *lscreen = llvmpipe_screen(screen);
+      return lscreen->use_tgsi ? 1 : 4;
+   }
    case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
       return 2048;
    case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
    case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
-      return 1;
-   case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
-      return 0;
    case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
    case PIPE_CAP_VERTEX_COLOR_CLAMPED:
       return 1;
-   case PIPE_CAP_GLSL_FEATURE_LEVEL:
-      return 330;
+   case PIPE_CAP_GLSL_FEATURE_LEVEL: {
+      struct llvmpipe_screen *lscreen = llvmpipe_screen(screen);
+      return lscreen->use_tgsi ? 330 : 450;
+   }
    case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
       return 140;
-   case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
-      return 0;
    case PIPE_CAP_COMPUTE:
       return GALLIVM_HAVE_CORO;
    case PIPE_CAP_USER_VERTEX_BUFFERS:
       return 1;
-   case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
-   case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
-   case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
-      return 0;
    case PIPE_CAP_TGSI_TEXCOORD:
    case PIPE_CAP_DRAW_INDIRECT:
       return 1;
@@ -237,14 +219,13 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
       return 1;
    case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
       return 16;
-   case PIPE_CAP_TEXTURE_MULTISAMPLE:
-      return 0;
    case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
       return 64;
    case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
       return 1;
+   /* Adressing that many 64bpp texels fits in an i32 so this is a reasonable value */
    case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
-      return 65536;
+      return 134217728;
    case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
       return 16;
    case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
@@ -253,16 +234,13 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
       return PIPE_MAX_VIEWPORTS;
    case PIPE_CAP_ENDIANNESS:
       return PIPE_ENDIAN_NATIVE;
+   case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
    case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
       return 1;
    case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
       return 1;
    case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
       return 4;
-   case PIPE_CAP_TEXTURE_GATHER_SM5:
-   case PIPE_CAP_SAMPLE_SHADING:
-   case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
-      return 0;
    case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
       return 1;
    case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
@@ -270,8 +248,10 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_TGSI_TEX_TXF_LZ:
    case PIPE_CAP_SAMPLER_VIEW_TARGET:
       return 1;
-   case PIPE_CAP_FAKE_SW_MSAA:
-      return 1;
+   case PIPE_CAP_FAKE_SW_MSAA: {
+      struct llvmpipe_screen *lscreen = llvmpipe_screen(screen);
+      return lscreen->use_tgsi ? 1 : 0;
+   }
    case PIPE_CAP_TEXTURE_QUERY_LOD:
    case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
    case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
@@ -307,8 +287,6 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
       return 0;
    case PIPE_CAP_CLIP_HALFZ:
       return 1;
-   case PIPE_CAP_VERTEXID_NOBASE:
-      return 0;
    case PIPE_CAP_POLYGON_OFFSET_CLAMP:
    case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
@@ -322,71 +300,39 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_MAX_VARYINGS:
       return 32;
    case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
-      return 1;
+      return 16;
    case PIPE_CAP_QUERY_BUFFER_OBJECT:
       return 1;
    case PIPE_CAP_DRAW_PARAMETERS:
       return 1;
+   case PIPE_CAP_FBFETCH:
+      return 8;
+   case PIPE_CAP_FBFETCH_COHERENT:
+      return 0;
    case PIPE_CAP_MULTI_DRAW_INDIRECT:
    case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
       return 1;
-   case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
-   case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+   case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+      return 1;
    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
-   case PIPE_CAP_DEPTH_BOUNDS_TEST:
-   case PIPE_CAP_TGSI_TXQS:
-   case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
-   case PIPE_CAP_SHAREABLE_SHADERS:
-   case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
-   case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
-   case PIPE_CAP_INVALIDATE_BUFFER:
-   case PIPE_CAP_GENERATE_MIPMAP:
-   case PIPE_CAP_STRING_MARKER:
-   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
-   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
-   case PIPE_CAP_QUERY_MEMORY_INFO:
+      return 32;
+   case PIPE_CAP_RASTERIZER_SUBPIXEL_BITS:
+      return 8;
    case PIPE_CAP_PCI_GROUP:
    case PIPE_CAP_PCI_BUS:
    case PIPE_CAP_PCI_DEVICE:
    case PIPE_CAP_PCI_FUNCTION:
-   case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
-   case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
-   case PIPE_CAP_MAX_WINDOW_RECTANGLES:
-   case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
-   case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
-   case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
-   case PIPE_CAP_NATIVE_FENCE_FD:
    case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
-   case PIPE_CAP_FBFETCH:
-   case PIPE_CAP_TGSI_MUL_ZERO_WINS:
-   case PIPE_CAP_TGSI_CLOCK:
-   case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
-   case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
-   case PIPE_CAP_TGSI_BALLOT:
-   case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
-   case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
    case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
-   case PIPE_CAP_POST_DEPTH_COVERAGE:
-   case PIPE_CAP_BINDLESS_TEXTURE:
-   case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
-   case PIPE_CAP_MEMOBJ:
-   case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
-   case PIPE_CAP_TILE_RASTER_ORDER:
-   case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
-   case PIPE_CAP_FRAMEBUFFER_MSAA_CONSTRAINTS:
-   case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
-   case PIPE_CAP_CONTEXT_PRIORITY_MASK:
-   case PIPE_CAP_FENCE_SIGNAL:
-   case PIPE_CAP_CONSTBUF0_FLAGS:
-   case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES:
-   case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES:
-   case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES:
-   case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES:
-   case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE:
-   case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS:
-   case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
       return 0;
+
+   case PIPE_CAP_SHAREABLE_SHADERS:
+      /* Can't expose shareable shaders because the draw shaders reference the
+       * draw module's state, which is per-context.
+       */
+      return 0;
+
    case PIPE_CAP_MAX_GS_INVOCATIONS:
       return 32;
    case PIPE_CAP_MAX_SHADER_BUFFER_SIZE:
@@ -395,8 +341,14 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE:
    case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
       return 1;
+   case PIPE_CAP_SAMPLER_REDUCTION_MINMAX:
+   case PIPE_CAP_TGSI_TXQS:
    case PIPE_CAP_TGSI_VOTE:
    case PIPE_CAP_LOAD_CONSTBUF:
+   case PIPE_CAP_TEXTURE_MULTISAMPLE:
+   case PIPE_CAP_SAMPLE_SHADING:
+   case PIPE_CAP_GL_SPIRV:
+   case PIPE_CAP_POST_DEPTH_COVERAGE:
    case PIPE_CAP_PACKED_UNIFORMS: {
       struct llvmpipe_screen *lscreen = llvmpipe_screen(screen);
       return !lscreen->use_tgsi;
@@ -411,14 +363,15 @@ llvmpipe_get_shader_param(struct pipe_screen *screen,
                           enum pipe_shader_type shader,
                           enum pipe_shader_cap param)
 {
+   struct llvmpipe_screen *lscreen = llvmpipe_screen(screen);
    switch(shader)
    {
    case PIPE_SHADER_COMPUTE:
-      if ((LP_DEBUG & DEBUG_CL) && param == PIPE_SHADER_CAP_SUPPORTED_IRS)
+      if ((lscreen->allow_cl) && param == PIPE_SHADER_CAP_SUPPORTED_IRS)
          return (1 << PIPE_SHADER_IR_TGSI) | (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_NIR_SERIALIZED);
+      FALLTHROUGH;
    case PIPE_SHADER_FRAGMENT:
       if (param == PIPE_SHADER_CAP_PREFERRED_IR) {
-         struct llvmpipe_screen *lscreen = llvmpipe_screen(screen);
          if (lscreen->use_tgsi)
             return PIPE_SHADER_IR_TGSI;
          else
@@ -428,10 +381,16 @@ llvmpipe_get_shader_param(struct pipe_screen *screen,
       default:
          return gallivm_get_shader_param(param);
       }
+      FALLTHROUGH;
+   case PIPE_SHADER_TESS_CTRL:
+   case PIPE_SHADER_TESS_EVAL:
+      /* Tessellation shader needs llvm coroutines support */
+      if (!GALLIVM_HAVE_CORO || lscreen->use_tgsi)
+         return 0;
+      FALLTHROUGH;
    case PIPE_SHADER_VERTEX:
    case PIPE_SHADER_GEOMETRY:
       if (param == PIPE_SHADER_CAP_PREFERRED_IR) {
-         struct llvmpipe_screen *lscreen = llvmpipe_screen(screen);
          if (lscreen->use_tgsi)
             return PIPE_SHADER_IR_TGSI;
          else
@@ -466,13 +425,13 @@ llvmpipe_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
 {
    switch (param) {
    case PIPE_CAPF_MAX_LINE_WIDTH:
-      /* fall-through */
+      FALLTHROUGH;
    case PIPE_CAPF_MAX_LINE_WIDTH_AA:
       return 255.0; /* arbitrary */
    case PIPE_CAPF_MAX_POINT_WIDTH:
-      /* fall-through */
+      FALLTHROUGH;
    case PIPE_CAPF_MAX_POINT_WIDTH_AA:
-      return 255.0; /* arbitrary */
+      return LP_MAX_POINT_WIDTH; /* arbitrary */
    case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
       return 16.0; /* not actually signficant at this time */
    case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
@@ -559,7 +518,7 @@ llvmpipe_get_compute_param(struct pipe_screen *_screen,
    case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
       if (ret) {
          uint32_t *images = ret;
-         *images = 0;
+         *images = LP_MAX_TGSI_SHADER_IMAGES;
       }
       return sizeof(uint32_t);
    case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
@@ -599,27 +558,40 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
    .lower_fsat = true,
    .lower_bitfield_insert_to_shifts = true,
    .lower_bitfield_extract_to_shifts = true,
-   .lower_sub = true,
-   .lower_ffma = true,
+   .lower_fdot = true,
+   .lower_fdph = true,
+   .lower_ffma16 = true,
+   .lower_ffma32 = true,
+   .lower_ffma64 = true,
    .lower_fmod = true,
    .lower_hadd = true,
    .lower_add_sat = true,
+   .lower_ldexp = true,
    .lower_pack_snorm_2x16 = true,
    .lower_pack_snorm_4x8 = true,
    .lower_pack_unorm_2x16 = true,
    .lower_pack_unorm_4x8 = true,
+   .lower_pack_half_2x16 = true,
+   .lower_pack_split = true,
    .lower_unpack_snorm_2x16 = true,
    .lower_unpack_snorm_4x8 = true,
    .lower_unpack_unorm_2x16 = true,
    .lower_unpack_unorm_4x8 = true,
+   .lower_unpack_half_2x16 = true,
    .lower_extract_byte = true,
    .lower_extract_word = true,
    .lower_rotate = true,
+   .lower_uadd_carry = true,
+   .lower_usub_borrow = true,
+   .lower_mul_2x32_64 = true,
    .lower_ifind_msb = true,
-   .optimize_sample_mask_in = true,
    .max_unroll_iterations = 32,
    .use_interpolated_input_intrinsics = true,
    .lower_to_scalar = true,
+   .lower_cs_local_index_from_id = true,
+   .lower_uniforms_to_ubo = true,
+   .lower_vector_cmp = true,
+   .lower_device_index_to_zero = true,
 };
 
 static void
@@ -671,7 +643,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
           target == PIPE_TEXTURE_CUBE ||
           target == PIPE_TEXTURE_CUBE_ARRAY);
 
-   if (sample_count > 1)
+   if (sample_count != 0 && sample_count != 1 && sample_count != 4)
       return false;
 
    if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
@@ -716,6 +688,10 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
       }
    }
 
+   if (!(bind & PIPE_BIND_VERTEX_BUFFER) &&
+       util_format_is_scaled(format))
+      return false;
+
    if (bind & PIPE_BIND_DISPLAY_TARGET) {
       if(!winsys->is_displaytarget_format_supported(winsys, bind, format))
          return false;
@@ -727,16 +703,10 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
 
       if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
          return false;
-
-      /* TODO: Support stencil-only formats */
-      if (format_desc->swizzle[0] == PIPE_SWIZZLE_NONE) {
-         return false;
-      }
    }
 
    if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC ||
-       format_desc->layout == UTIL_FORMAT_LAYOUT_ATC ||
-       format_desc->layout == UTIL_FORMAT_LAYOUT_FXT1) {
+       format_desc->layout == UTIL_FORMAT_LAYOUT_ATC) {
       /* Software decoding is not hooked up. */
       return false;
    }
@@ -758,6 +728,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
 
 static void
 llvmpipe_flush_frontbuffer(struct pipe_screen *_screen,
+                           struct pipe_context *_pipe,
                            struct pipe_resource *resource,
                            unsigned level, unsigned layer,
                            void *context_private,
@@ -786,6 +757,10 @@ llvmpipe_destroy_screen( struct pipe_screen *_screen )
 
    lp_jit_screen_cleanup(screen);
 
+   if (LP_DEBUG & DEBUG_CACHE_STATS)
+      printf("disk shader cache:   hits = %u, misses = %u\n", screen->num_disk_shader_cache_hits,
+             screen->num_disk_shader_cache_misses);
+   disk_cache_destroy(screen->disk_shader_cache);
    if(winsys->destroy)
       winsys->destroy(winsys);
 
@@ -843,6 +818,77 @@ llvmpipe_get_timestamp(struct pipe_screen *_screen)
    return os_time_get_nano();
 }
 
+static void update_cache_sha1_cpu(struct mesa_sha1 *ctx)
+{
+   const struct util_cpu_caps_t *cpu_caps = util_get_cpu_caps();
+   /*
+    * Don't need the cpu cache affinity stuff. The rest
+    * is contained in first 5 dwords.
+    */
+   STATIC_ASSERT(offsetof(struct util_cpu_caps_t, num_L3_caches) == 5 * sizeof(uint32_t));
+   _mesa_sha1_update(ctx, cpu_caps, 5 * sizeof(uint32_t));
+}
+
+static void lp_disk_cache_create(struct llvmpipe_screen *screen)
+{
+   struct mesa_sha1 ctx;
+   unsigned gallivm_perf = gallivm_get_perf_flags();
+   unsigned char sha1[20];
+   char cache_id[20 * 2 + 1];
+   _mesa_sha1_init(&ctx);
+
+   if (!disk_cache_get_function_identifier(lp_disk_cache_create, &ctx) ||
+       !disk_cache_get_function_identifier(LLVMLinkInMCJIT, &ctx))
+      return;
+
+   _mesa_sha1_update(&ctx, &gallivm_perf, sizeof(gallivm_perf));
+   update_cache_sha1_cpu(&ctx);
+   _mesa_sha1_final(&ctx, sha1);
+   disk_cache_format_hex_id(cache_id, sha1, 20 * 2);
+
+   screen->disk_shader_cache = disk_cache_create("llvmpipe", cache_id, 0);
+}
+
+static struct disk_cache *lp_get_disk_shader_cache(struct pipe_screen *_screen)
+{
+   struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
+
+   return screen->disk_shader_cache;
+}
+
+void lp_disk_cache_find_shader(struct llvmpipe_screen *screen,
+                               struct lp_cached_code *cache,
+                               unsigned char ir_sha1_cache_key[20])
+{
+   unsigned char sha1[CACHE_KEY_SIZE];
+
+   if (!screen->disk_shader_cache)
+      return;
+   disk_cache_compute_key(screen->disk_shader_cache, ir_sha1_cache_key, 20, sha1);
+
+   size_t binary_size;
+   uint8_t *buffer = disk_cache_get(screen->disk_shader_cache, sha1, &binary_size);
+   if (!buffer) {
+      cache->data_size = 0;
+      p_atomic_inc(&screen->num_disk_shader_cache_misses);
+      return;
+   }
+   cache->data_size = binary_size;
+   cache->data = buffer;
+   p_atomic_inc(&screen->num_disk_shader_cache_hits);
+}
+
+void lp_disk_cache_insert_shader(struct llvmpipe_screen *screen,
+                                 struct lp_cached_code *cache,
+                                 unsigned char ir_sha1_cache_key[20])
+{
+   unsigned char sha1[CACHE_KEY_SIZE];
+
+   if (!screen->disk_shader_cache || !cache->data_size || cache->dont_cache)
+      return;
+   disk_cache_compute_key(screen->disk_shader_cache, ir_sha1_cache_key, 20, sha1);
+   disk_cache_put(screen->disk_shader_cache, sha1, cache->data, cache->data_size, NULL);
+}
 /**
  * Create a new pipe_screen object
  * Note: we're not presently subclassing pipe_screen (no llvmpipe_screen).
@@ -893,10 +939,13 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
    screen->base.get_timestamp = llvmpipe_get_timestamp;
 
    screen->base.finalize_nir = llvmpipe_finalize_nir;
+
+   screen->base.get_disk_shader_cache = lp_get_disk_shader_cache;
    llvmpipe_init_screen_resource_funcs(&screen->base);
 
+   screen->allow_cl = !!getenv("LP_CL");
    screen->use_tgsi = (LP_DEBUG & DEBUG_TGSI_IR);
-   screen->num_threads = util_cpu_caps.nr_cpus > 1 ? util_cpu_caps.nr_cpus : 0;
+   screen->num_threads = util_get_cpu_caps()->nr_cpus > 1 ? util_get_cpu_caps()->nr_cpus : 0;
 #ifdef EMBEDDED_DEVICE
    screen->num_threads = 0;
 #endif
@@ -920,5 +969,6 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
    }
    (void) mtx_init(&screen->cs_mutex, mtx_plain);
 
+   lp_disk_cache_create(screen);
    return &screen->base;
 }
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h
index 7c57f3ec1..a790c199c 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h
@@ -38,7 +38,7 @@
 #include "pipe/p_defines.h"
 #include "os/os_thread.h"
 #include "gallivm/lp_bld.h"
-
+#include "gallivm/lp_bld_misc.h"
 
 struct sw_winsys;
 struct lp_cs_tpool;
@@ -62,9 +62,19 @@ struct llvmpipe_screen
    mtx_t cs_mutex;
 
    bool use_tgsi;
-};
+   bool allow_cl;
 
+   struct disk_cache *disk_shader_cache;
+   unsigned num_disk_shader_cache_hits;
+   unsigned num_disk_shader_cache_misses;
+};
 
+void lp_disk_cache_find_shader(struct llvmpipe_screen *screen,
+                               struct lp_cached_code *cache,
+                               unsigned char ir_sha1_cache_key[20]);
+void lp_disk_cache_insert_shader(struct llvmpipe_screen *screen,
+                                 struct lp_cached_code *cache,
+                                 unsigned char ir_sha1_cache_key[20]);
 
 
 static inline struct llvmpipe_screen *
@@ -73,6 +83,10 @@ llvmpipe_screen( struct pipe_screen *pipe )
    return (struct llvmpipe_screen *)pipe;
 }
 
-
+static inline unsigned lp_get_constant_buffer_stride(struct pipe_screen *_screen)
+{
+   struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
+   return screen->use_tgsi ? (sizeof(float) * 4) : sizeof(float);
+}
 
 #endif /* LP_SCREEN_H */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c
index 002c8b8a2..883473919 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -53,7 +53,7 @@
 #include "lp_setup_context.h"
 #include "lp_screen.h"
 #include "lp_state.h"
-#include "state_tracker/sw_winsys.h"
+#include "frontend/sw_winsys.h"
 
 #include "draw/draw_context.h"
 #include "draw/draw_vbuf.h"
@@ -128,6 +128,7 @@ void lp_setup_reset( struct lp_setup_context *setup )
       setup->constants[i].stored_size = 0;
       setup->constants[i].stored_data = NULL;
    }
+
    setup->fs.stored = NULL;
    setup->dirty = ~0;
 
@@ -409,23 +410,7 @@ lp_setup_try_clear_color_buffer(struct lp_setup_context *setup,
 
    LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state);
 
-   if (util_format_is_pure_integer(format)) {
-      /*
-       * We expect int/uint clear values here, though some APIs
-       * might disagree (but in any case util_pack_color()
-       * couldn't handle it)...
-       */
-      if (util_format_is_pure_sint(format)) {
-         util_format_write_4i(format, color->i, 0, &uc, 0, 0, 0, 1, 1);
-      }
-      else {
-         assert(util_format_is_pure_uint(format));
-         util_format_write_4ui(format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
-      }
-   }
-   else {
-      util_pack_color(color->f, format, &uc);
-   }
+   util_pack_color_union(format, &uc, color);
 
    if (setup->state == SETUP_ACTIVE) {
       struct lp_scene *scene = setup->scene;
@@ -456,7 +441,7 @@ lp_setup_try_clear_color_buffer(struct lp_setup_context *setup,
    else {
       /* Put ourselves into the 'pre-clear' state, specifically to try
        * and accumulate multiple clears to color and depth_stencil
-       * buffers which the app or state-tracker might issue
+       * buffers which the app or gallium frontend might issue
        * separately.
        */
       set_scene_state( setup, SETUP_CLEARED, __FUNCTION__ );
@@ -520,7 +505,7 @@ lp_setup_try_clear_zs(struct lp_setup_context *setup,
    else {
       /* Put ourselves into the 'pre-clear' state, specifically to try
        * and accumulate multiple clears to color and depth_stencil
-       * buffers which the app or state-tracker might issue
+       * buffers which the app or gallium frontend might issue
        * separately.
        */
       set_scene_state( setup, SETUP_CLEARED, __FUNCTION__ );
@@ -583,13 +568,15 @@ lp_setup_set_triangle_state( struct lp_setup_context *setup,
                              boolean ccw_is_frontface,
                              boolean scissor,
                              boolean half_pixel_center,
-                             boolean bottom_edge_rule)
+                             boolean bottom_edge_rule,
+                             boolean multisample)
 {
    LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
 
    setup->ccw_is_frontface = ccw_is_frontface;
    setup->cullmode = cull_mode;
    setup->triangle = first_triangle;
+   setup->multisample = multisample;
    setup->pixel_offset = half_pixel_center ? 0.5f : 0.0f;
    setup->bottom_edge_rule = bottom_edge_rule;
 
@@ -638,7 +625,6 @@ lp_setup_set_fs_variant( struct lp_setup_context *setup,
 {
    LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__,
           variant);
-   /* FIXME: reference count */
 
    setup->fs.current.variant = variant;
    setup->dirty |= LP_SETUP_NEW_FS;
@@ -656,10 +642,10 @@ lp_setup_set_fs_constants(struct lp_setup_context *setup,
    assert(num <= ARRAY_SIZE(setup->constants));
 
    for (i = 0; i < num; ++i) {
-      util_copy_constant_buffer(&setup->constants[i].current, &buffers[i]);
+      util_copy_constant_buffer(&setup->constants[i].current, &buffers[i], false);
    }
    for (; i < ARRAY_SIZE(setup->constants); i++) {
-      util_copy_constant_buffer(&setup->constants[i].current, NULL);
+      util_copy_constant_buffer(&setup->constants[i].current, NULL, false);
    }
    setup->dirty |= LP_SETUP_NEW_CONSTANTS;
 }
@@ -716,6 +702,7 @@ lp_setup_set_fs_images(struct lp_setup_context *setup,
          jit_image->width = res->width0;
          jit_image->height = res->height0;
          jit_image->depth = res->depth0;
+         jit_image->num_samples = res->nr_samples;
 
          if (llvmpipe_resource_is_texture(res)) {
             uint32_t mip_offset = lp_res->mip_offsets[image->u.tex.level];
@@ -741,6 +728,7 @@ lp_setup_set_fs_images(struct lp_setup_context *setup,
 
             jit_image->row_stride = lp_res->row_stride[image->u.tex.level];
             jit_image->img_stride = lp_res->img_stride[image->u.tex.level];
+            jit_image->sample_stride = lp_res->sample_stride;
             jit_image->base = (uint8_t *)jit_image->base + mip_offset;
          }
          else {
@@ -753,7 +741,7 @@ lp_setup_set_fs_images(struct lp_setup_context *setup,
    for (; i < ARRAY_SIZE(setup->images); i++) {
       util_copy_image_view(&setup->images[i].current, NULL);
    }
-   setup->dirty |= LP_SETUP_NEW_IMAGES;
+   setup->dirty |= LP_SETUP_NEW_FS;
 }
 
 void
@@ -815,6 +803,15 @@ lp_setup_set_scissors( struct lp_setup_context *setup,
    setup->dirty |= LP_SETUP_NEW_SCISSOR;
 }
 
+void
+lp_setup_set_sample_mask(struct lp_setup_context *setup,
+                         uint32_t sample_mask)
+{
+   if (setup->fs.current.jit_context.sample_mask != sample_mask) {
+      setup->fs.current.jit_context.sample_mask = sample_mask;
+      setup->dirty |= LP_SETUP_NEW_FS;
+   }
+}
 
 void 
 lp_setup_set_flatshade_first(struct lp_setup_context *setup,
@@ -938,6 +935,8 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
                jit_tex->mip_offsets[0] = 0;
                jit_tex->row_stride[0] = 0;
                jit_tex->img_stride[0] = 0;
+               jit_tex->num_samples = 0;
+               jit_tex->sample_stride = 0;
             }
             else {
                jit_tex->width = res->width0;
@@ -945,6 +944,8 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
                jit_tex->depth = res->depth0;
                jit_tex->first_level = first_level;
                jit_tex->last_level = last_level;
+               jit_tex->num_samples = res->nr_samples;
+               jit_tex->sample_stride = 0;
 
                if (llvmpipe_resource_is_texture(res)) {
                   for (j = first_level; j <= last_level; j++) {
@@ -953,6 +954,8 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
                      jit_tex->img_stride[j] = lp_tex->img_stride[j];
                   }
 
+                  jit_tex->sample_stride = lp_tex->sample_stride;
+
                   if (res->target == PIPE_TEXTURE_1D_ARRAY ||
                       res->target == PIPE_TEXTURE_2D_ARRAY ||
                       res->target == PIPE_TEXTURE_CUBE ||
@@ -1003,7 +1006,7 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
             struct llvmpipe_screen *screen = llvmpipe_screen(res->screen);
             struct sw_winsys *winsys = screen->winsys;
             jit_tex->base = winsys->displaytarget_map(winsys, lp_tex->dt,
-                                                         PIPE_TRANSFER_READ);
+                                                         PIPE_MAP_READ);
             jit_tex->row_stride[0] = lp_tex->row_stride[0];
             jit_tex->img_stride[0] = lp_tex->img_stride[0];
             jit_tex->mip_offsets[0] = 0;
@@ -1011,6 +1014,8 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
             jit_tex->height = res->height0;
             jit_tex->depth = res->depth0;
             jit_tex->first_level = jit_tex->last_level = 0;
+            jit_tex->num_samples = res->nr_samples;
+            jit_tex->sample_stride = 0;
             assert(jit_tex->base);
          }
       }
@@ -1177,6 +1182,12 @@ try_update_scene_state( struct lp_setup_context *setup )
       setup->dirty |= LP_SETUP_NEW_FS;
    }
 
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(setup->pipe);
+   if (llvmpipe->dirty & LP_NEW_FS_CONSTANTS)
+      lp_setup_set_fs_constants(llvmpipe->setup,
+                                ARRAY_SIZE(llvmpipe->constants[PIPE_SHADER_FRAGMENT]),
+                                llvmpipe->constants[PIPE_SHADER_FRAGMENT]);
+
    if (setup->dirty & LP_SETUP_NEW_CONSTANTS) {
       for (i = 0; i < ARRAY_SIZE(setup->constants); ++i) {
          struct pipe_resource *buffer = setup->constants[i].current.buffer;
@@ -1196,7 +1207,7 @@ try_update_scene_state( struct lp_setup_context *setup )
             current_data = (ubyte *) setup->constants[i].current.user_buffer;
          }
 
-         if (current_data) {
+         if (current_data && current_size >= sizeof(float)) {
             current_data += setup->constants[i].current.buffer_offset;
 
             /* TODO: copy only the actually used constants? */
@@ -1230,7 +1241,7 @@ try_update_scene_state( struct lp_setup_context *setup )
          }
 
          num_constants =
-            DIV_ROUND_UP(setup->constants[i].stored_size, (sizeof(float) * 4));
+            DIV_ROUND_UP(setup->constants[i].stored_size, lp_get_constant_buffer_stride(scene->pipe->screen));
          setup->fs.current.jit_context.num_constants[i] = num_constants;
          setup->dirty |= LP_SETUP_NEW_FS;
       }
@@ -1275,9 +1286,14 @@ try_update_scene_state( struct lp_setup_context *setup )
             return FALSE;
          }
 
-         memcpy(stored,
-                &setup->fs.current,
-                sizeof setup->fs.current);
+         memcpy(&stored->jit_context,
+                &setup->fs.current.jit_context,
+                sizeof setup->fs.current.jit_context);
+         stored->variant = setup->fs.current.variant;
+
+         if (!lp_scene_add_frag_shader_reference(scene,
+                                                 setup->fs.current.variant))
+            return FALSE;
          setup->fs.stored = stored;
          
          /* The scene now references the textures in the rasterization
@@ -1504,7 +1520,6 @@ void
 lp_setup_begin_query(struct lp_setup_context *setup,
                      struct llvmpipe_query *pq)
 {
-
    set_scene_state(setup, SETUP_ACTIVE, "begin_query");
 
    if (!(pq->type == PIPE_QUERY_OCCLUSION_COUNTER ||
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h
index 701dcadfd..82fc14b5e 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -50,7 +50,6 @@
 #define LP_SETUP_NEW_SCISSOR     0x08
 #define LP_SETUP_NEW_VIEWPORTS   0x10
 #define LP_SETUP_NEW_SSBOS       0x20
-#define LP_SETUP_NEW_IMAGES      0x40
 
 struct lp_setup_variant;
 
@@ -76,6 +75,7 @@ struct lp_setup_context
 
    struct pipe_context *pipe;
    struct vertex_info *vertex_info;
+   uint view_index;
    uint prim;
    uint vertex_size;
    uint nr_vertices;
@@ -101,6 +101,7 @@ struct lp_setup_context
    boolean scissor_test;
    boolean point_size_per_vertex;
    boolean rasterizer_discard;
+   boolean multisample;
    unsigned cullmode;
    unsigned bottom_edge_rule;
    float pixel_offset;
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c
index 5e26b1e9f..0535138df 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -298,7 +298,7 @@ try_setup_line( struct lp_setup_context *setup,
    int nr_planes = 4;
    unsigned viewport_index = 0;
    unsigned layer = 0;
-   
+   float pixel_offset = setup->multisample ? 0.0 : setup->pixel_offset;
    /* linewidth should be interpreted as integer */
    int fixed_width = util_iround(width) * FIXED_ONE;
 
@@ -320,6 +320,10 @@ try_setup_line( struct lp_setup_context *setup,
    boolean will_draw_start;
    boolean will_draw_end;
 
+   if (lp_context->active_statistics_queries) {
+      lp_context->pipeline_statistics.c_primitives++;
+   }
+
    if (0)
       print_line(setup, v1, v2);
 
@@ -357,10 +361,10 @@ try_setup_line( struct lp_setup_context *setup,
    if (fabsf(dx) >= fabsf(dy)) {
       float dydx = dy / dx;
 
-      x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5;
-      y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5;
-      x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5;
-      y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5;
+      x1diff = v1[0][0] - floorf(v1[0][0]) - 0.5f;
+      y1diff = v1[0][1] - floorf(v1[0][1]) - 0.5f;
+      x2diff = v2[0][0] - floorf(v2[0][0]) - 0.5f;
+      y2diff = v2[0][1] - floorf(v2[0][1]) - 0.5f;
 
       if (y2diff==-0.5 && dy<0){
          y2diff = 0.5;
@@ -440,25 +444,25 @@ try_setup_line( struct lp_setup_context *setup,
       }
   
       /* x/y positions in fixed point */
-      x[0] = subpixel_snap(v1[0][0] + x_offset     - setup->pixel_offset);
-      x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset);
-      x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset);
-      x[3] = subpixel_snap(v1[0][0] + x_offset     - setup->pixel_offset);
+      x[0] = subpixel_snap(v1[0][0] + x_offset     - pixel_offset);
+      x[1] = subpixel_snap(v2[0][0] + x_offset_end - pixel_offset);
+      x[2] = subpixel_snap(v2[0][0] + x_offset_end - pixel_offset);
+      x[3] = subpixel_snap(v1[0][0] + x_offset     - pixel_offset);
       
-      y[0] = subpixel_snap(v1[0][1] + y_offset     - setup->pixel_offset) - fixed_width/2;
-      y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) - fixed_width/2;
-      y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) + fixed_width/2;
-      y[3] = subpixel_snap(v1[0][1] + y_offset     - setup->pixel_offset) + fixed_width/2;
+      y[0] = subpixel_snap(v1[0][1] + y_offset     - pixel_offset) - fixed_width/2;
+      y[1] = subpixel_snap(v2[0][1] + y_offset_end - pixel_offset) - fixed_width/2;
+      y[2] = subpixel_snap(v2[0][1] + y_offset_end - pixel_offset) + fixed_width/2;
+      y[3] = subpixel_snap(v1[0][1] + y_offset     - pixel_offset) + fixed_width/2;
       
    }
    else {
       const float dxdy = dx / dy;
 
       /* Y-MAJOR LINE */      
-      x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5;
-      y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5;
-      x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5;
-      y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5;
+      x1diff = v1[0][0] - floorf(v1[0][0]) - 0.5f;
+      y1diff = v1[0][1] - floorf(v1[0][1]) - 0.5f;
+      x2diff = v2[0][0] - floorf(v2[0][0]) - 0.5f;
+      y2diff = v2[0][1] - floorf(v2[0][1]) - 0.5f;
 
       if (x2diff==-0.5 && dx<0) {
          x2diff = 0.5;
@@ -537,15 +541,15 @@ try_setup_line( struct lp_setup_context *setup,
       }
 
       /* x/y positions in fixed point */
-      x[0] = subpixel_snap(v1[0][0] + x_offset     - setup->pixel_offset) - fixed_width/2;
-      x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - fixed_width/2;
-      x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) + fixed_width/2;
-      x[3] = subpixel_snap(v1[0][0] + x_offset     - setup->pixel_offset) + fixed_width/2;
+      x[0] = subpixel_snap(v1[0][0] + x_offset     - pixel_offset) - fixed_width/2;
+      x[1] = subpixel_snap(v2[0][0] + x_offset_end - pixel_offset) - fixed_width/2;
+      x[2] = subpixel_snap(v2[0][0] + x_offset_end - pixel_offset) + fixed_width/2;
+      x[3] = subpixel_snap(v1[0][0] + x_offset     - pixel_offset) + fixed_width/2;
      
-      y[0] = subpixel_snap(v1[0][1] + y_offset     - setup->pixel_offset); 
-      y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset);
-      y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset);
-      y[3] = subpixel_snap(v1[0][1] + y_offset     - setup->pixel_offset);
+      y[0] = subpixel_snap(v1[0][1] + y_offset     - pixel_offset);
+      y[1] = subpixel_snap(v2[0][1] + y_offset_end - pixel_offset);
+      y[2] = subpixel_snap(v2[0][1] + y_offset_end - pixel_offset);
+      y[3] = subpixel_snap(v1[0][1] + y_offset     - pixel_offset);
    }
 
    /* Bounding rectangle (in pixels) */
@@ -593,12 +597,9 @@ try_setup_line( struct lp_setup_context *setup,
     * Determine how many scissor planes we need, that is drop scissor
     * edges if the bounding box of the tri is fully inside that edge.
     */
-   if (setup->scissor_test) {
-      /* why not just use draw_regions */
-      scissor = &setup->scissors[viewport_index];
-      scissor_planes_needed(s_planes, &bboxpos, scissor);
-      nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
-   }
+   scissor = &setup->draw_regions[viewport_index];
+   scissor_planes_needed(s_planes, &bboxpos, scissor);
+   nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
 
    line = lp_setup_alloc_triangle(scene,
                                   key->num_inputs,
@@ -616,10 +617,6 @@ try_setup_line( struct lp_setup_context *setup,
 
    LP_COUNT(nr_tris);
 
-   if (lp_context->active_statistics_queries) {
-      lp_context->pipeline_statistics.c_primitives++;
-   }
-
    /* calculate the deltas */
    plane = GET_PLANES(line);
    plane[0].dcdy = x[0] - x[1];
@@ -651,6 +648,7 @@ try_setup_line( struct lp_setup_context *setup,
    line->inputs.opaque = FALSE;
    line->inputs.layer = layer;
    line->inputs.viewport_index = viewport_index;
+   line->inputs.view_index = setup->view_index;
 
    /*
     * XXX: this code is mostly identical to the one in lp_setup_tri, except it
@@ -673,7 +671,7 @@ try_setup_line( struct lp_setup_context *setup,
          plane[i].c++;
       }
       else if (plane[i].dcdx == 0) {
-         if (setup->pixel_offset == 0) {
+         if (setup->bottom_edge_rule == 0) {
             /* correct for top-left fill convention:
              */
             if (plane[i].dcdy > 0) plane[i].c++;
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c
index 092febdba..696612309 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c
@@ -81,7 +81,7 @@ point_persp_coeff(struct lp_setup_context *setup,
 {
    /*
     * Fragment shader expects pre-multiplied w for LP_INTERP_PERSPECTIVE. A
-    * better stratergy would be to take the primitive in consideration when
+    * better strategy would be to take the primitive in consideration when
     * generating the fragment shader key, and therefore avoid the per-fragment
     * perspective divide.
     */
@@ -240,7 +240,7 @@ setup_point_coefficients( struct lp_setup_context *setup,
 
       case LP_INTERP_LINEAR:
          /* Sprite tex coords may use linear interpolation someday */
-         /* fall-through */
+         FALLTHROUGH;
       case LP_INTERP_PERSPECTIVE: {
          /* check if the sprite coord flag is set for this attribute.
           * If so, set it up so it up so x and y vary from 0 to 1.
@@ -270,7 +270,7 @@ setup_point_coefficients( struct lp_setup_context *setup,
             break;
          }
       }
-         /* fall-through */
+         FALLTHROUGH;
       case LP_INTERP_CONSTANT:
          for (i = 0; i < NUM_CHANNELS; i++) {
             if (usage_mask & (1 << i)) {
@@ -337,17 +337,20 @@ try_setup_point( struct lp_setup_context *setup,
    /* x/y positions in fixed point */
    const struct lp_setup_variant_key *key = &setup->setup.variant->key;
    const int sizeAttr = setup->psize_slot;
-   const float size
+   float size
       = (setup->point_size_per_vertex && sizeAttr > 0) ? v0[sizeAttr][0]
       : setup->point_size;
 
+   if (size > LP_MAX_POINT_WIDTH)
+      size = LP_MAX_POINT_WIDTH;
+
    /* Yes this is necessary to accurately calculate bounding boxes
     * with the two fill-conventions we support.  GL (normally) ends
     * up needing a bottom-left fill convention, which requires
     * slightly different rounding.
     */
    int adj = (setup->bottom_edge_rule != 0) ? 1 : 0;
-
+   float pixel_offset = setup->multisample ? 0.0 : setup->pixel_offset;
    struct lp_scene *scene = setup->scene;
    struct lp_rast_triangle *point;
    unsigned bytes;
@@ -382,8 +385,8 @@ try_setup_point( struct lp_setup_context *setup,
        */
       fixed_width = MAX2(FIXED_ONE, subpixel_snap(size));
 
-      x0 = subpixel_snap(v0[0][0] - setup->pixel_offset) - fixed_width/2;
-      y0 = subpixel_snap(v0[0][1] - setup->pixel_offset) - fixed_width/2;
+      x0 = subpixel_snap(v0[0][0] - pixel_offset) - fixed_width/2;
+      y0 = subpixel_snap(v0[0][1] - pixel_offset) - fixed_width/2;
 
       bbox.x0 = (x0 + (FIXED_ONE-1)) >> FIXED_ORDER;
       bbox.x1 = (x0 + fixed_width + (FIXED_ONE-1)) >> FIXED_ORDER;
@@ -401,7 +404,7 @@ try_setup_point( struct lp_setup_context *setup,
        * Per OpenGL 2.1 spec, section 3.3.1, "Basic Point Rasterization".
        *
        * This type of point rasterization is only available in pre 3.0 contexts
-       * (or compatibilility contexts which we don't support) anyway.
+       * (or compatibility contexts which we don't support) anyway.
        */
 
       const int x0 = subpixel_snap(v0[0][0]);
@@ -444,6 +447,10 @@ try_setup_point( struct lp_setup_context *setup,
                    bbox.x1, bbox.y1);
    }
 
+   if (lp_context->active_statistics_queries) {
+      lp_context->pipeline_statistics.c_primitives++;
+   }
+
    if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) {
       if (0) debug_printf("offscreen\n");
       LP_COUNT(nr_culled_tris);
@@ -466,10 +473,6 @@ try_setup_point( struct lp_setup_context *setup,
 
    LP_COUNT(nr_tris);
 
-   if (lp_context->active_statistics_queries) {
-      lp_context->pipeline_statistics.c_primitives++;
-   }
-
    if (draw_will_inject_frontface(lp_context->draw) &&
        setup->face_slot > 0) {
       point->inputs.frontfacing = v0[setup->face_slot][0];
@@ -495,6 +498,7 @@ try_setup_point( struct lp_setup_context *setup,
    point->inputs.opaque = FALSE;
    point->inputs.layer = layer;
    point->inputs.viewport_index = viewport_index;
+   point->inputs.view_index = setup->view_index;
 
    {
       struct lp_rast_plane *plane = GET_PLANES(point);
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index d24a4b4af..4fb76dd22 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -104,8 +104,9 @@ lp_setup_alloc_triangle(struct lp_scene *scene,
    tri->inputs.stride = input_array_sz;
 
    {
-      char *a = (char *)tri;
-      char *b = (char *)&GET_PLANES(tri)[nr_planes];
+      ASSERTED char *a = (char *)tri;
+      ASSERTED char *b = (char *)&GET_PLANES(tri)[nr_planes];
+
       assert(b - a == *tri_size);
    }
 
@@ -204,7 +205,18 @@ lp_rast_32_tri_tab[MAX_PLANES+1] = {
    LP_RAST_OP_TRIANGLE_32_8
 };
 
-
+static unsigned
+lp_rast_ms_tri_tab[MAX_PLANES+1] = {
+   0,               /* should be impossible */
+   LP_RAST_OP_MS_TRIANGLE_1,
+   LP_RAST_OP_MS_TRIANGLE_2,
+   LP_RAST_OP_MS_TRIANGLE_3,
+   LP_RAST_OP_MS_TRIANGLE_4,
+   LP_RAST_OP_MS_TRIANGLE_5,
+   LP_RAST_OP_MS_TRIANGLE_6,
+   LP_RAST_OP_MS_TRIANGLE_7,
+   LP_RAST_OP_MS_TRIANGLE_8
+};
 
 /**
  * The primitive covers the whole tile- shade whole tile.
@@ -249,7 +261,7 @@ lp_setup_whole_tile(struct lp_setup_context *setup,
    } else {
       LP_COUNT(nr_shade_64);
       return lp_scene_bin_cmd_with_state( scene, tx, ty,
-                                          setup->fs.stored, 
+                                          setup->fs.stored,
                                           LP_RAST_OP_SHADE_TILE,
                                           lp_rast_arg_inputs(inputs) );
    }
@@ -273,7 +285,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
    const struct lp_setup_variant_key *key = &setup->setup.variant->key;
    struct lp_rast_triangle *tri;
    struct lp_rast_plane *plane;
-   const struct u_rect *scissor;
+   const struct u_rect *scissor = NULL;
    struct u_rect bbox, bboxpos;
    boolean s_planes[4];
    unsigned tri_bytes;
@@ -348,12 +360,9 @@ do_triangle_ccw(struct lp_setup_context *setup,
     * Determine how many scissor planes we need, that is drop scissor
     * edges if the bounding box of the tri is fully inside that edge.
     */
-   if (setup->scissor_test) {
-      /* why not just use draw_regions */
-      scissor = &setup->scissors[viewport_index];
-      scissor_planes_needed(s_planes, &bboxpos, scissor);
-      nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
-   }
+   scissor = &setup->draw_regions[viewport_index];
+   scissor_planes_needed(s_planes, &bboxpos, scissor);
+   nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
 
    tri = lp_setup_alloc_triangle(scene,
                                  key->num_inputs,
@@ -386,6 +395,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
    tri->inputs.opaque = setup->fs.current.variant->opaque;
    tri->inputs.layer = layer;
    tri->inputs.viewport_index = viewport_index;
+   tri->inputs.view_index = setup->view_index;
 
    if (0)
       lp_dump_setup_coef(&setup->setup.variant->key,
@@ -759,6 +769,8 @@ lp_setup_bin_triangle(struct lp_setup_context *setup,
    struct lp_scene *scene = setup->scene;
    struct u_rect trimmed_box = *bbox;   
    int i;
+   unsigned cmd;
+
    /* What is the largest power-of-two boundary this triangle crosses:
     */
    int dx = floor_pot((bbox->x0 ^ bbox->x1) |
@@ -808,11 +820,12 @@ lp_setup_bin_triangle(struct lp_setup_context *setup,
              */
             assert(px + 4 <= TILE_SIZE);
             assert(py + 4 <= TILE_SIZE);
+            if (setup->multisample)
+               cmd = LP_RAST_OP_MS_TRIANGLE_3_4;
+            else
+               cmd = use_32bits ? LP_RAST_OP_TRIANGLE_32_3_4 : LP_RAST_OP_TRIANGLE_3_4;
             return lp_scene_bin_cmd_with_state( scene, ix0, iy0,
-                                                setup->fs.stored,
-                                                use_32bits ?
-                                                LP_RAST_OP_TRIANGLE_32_3_4 :
-                                                LP_RAST_OP_TRIANGLE_3_4,
+                                                setup->fs.stored, cmd,
                                                 lp_rast_arg_triangle_contained(tri, px, py) );
          }
 
@@ -832,11 +845,12 @@ lp_setup_bin_triangle(struct lp_setup_context *setup,
             assert(px + 16 <= TILE_SIZE);
             assert(py + 16 <= TILE_SIZE);
 
+            if (setup->multisample)
+               cmd = LP_RAST_OP_MS_TRIANGLE_3_16;
+            else
+               cmd = use_32bits ? LP_RAST_OP_TRIANGLE_32_3_16 : LP_RAST_OP_TRIANGLE_3_16;
             return lp_scene_bin_cmd_with_state( scene, ix0, iy0,
-                                                setup->fs.stored,
-                                                use_32bits ?
-                                                LP_RAST_OP_TRIANGLE_32_3_16 :
-                                                LP_RAST_OP_TRIANGLE_3_16,
+                                                setup->fs.stored, cmd,
                                                 lp_rast_arg_triangle_contained(tri, px, py) );
          }
       }
@@ -848,20 +862,24 @@ lp_setup_bin_triangle(struct lp_setup_context *setup,
          assert(px + 16 <= TILE_SIZE);
          assert(py + 16 <= TILE_SIZE);
 
+         if (setup->multisample)
+            cmd = LP_RAST_OP_MS_TRIANGLE_4_16;
+         else
+            cmd = use_32bits ? LP_RAST_OP_TRIANGLE_32_4_16 : LP_RAST_OP_TRIANGLE_4_16;
          return lp_scene_bin_cmd_with_state(scene, ix0, iy0,
-                                            setup->fs.stored,
-                                            use_32bits ?
-                                            LP_RAST_OP_TRIANGLE_32_4_16 :
-                                            LP_RAST_OP_TRIANGLE_4_16,
+                                            setup->fs.stored, cmd,
                                             lp_rast_arg_triangle_contained(tri, px, py));
       }
 
 
       /* Triangle is contained in a single tile:
        */
+      if (setup->multisample)
+         cmd = lp_rast_ms_tri_tab[nr_planes];
+      else
+         cmd = use_32bits ? lp_rast_32_tri_tab[nr_planes] : lp_rast_tri_tab[nr_planes];
       return lp_scene_bin_cmd_with_state(
-         scene, ix0, iy0, setup->fs.stored,
-         use_32bits ? lp_rast_32_tri_tab[nr_planes] : lp_rast_tri_tab[nr_planes],
+         scene, ix0, iy0, setup->fs.stored, cmd,
          lp_rast_arg_triangle(tri, (1<<nr_planes)-1));
    }
    else
@@ -933,12 +951,13 @@ lp_setup_bin_triangle(struct lp_setup_context *setup,
                 */
                int count = util_bitcount(partial);
                in = TRUE;
-               
+
+               if (setup->multisample)
+                  cmd = lp_rast_ms_tri_tab[count];
+               else
+                  cmd = use_32bits ? lp_rast_32_tri_tab[count] : lp_rast_tri_tab[count];
                if (!lp_scene_bin_cmd_with_state( scene, x, y,
-                                                 setup->fs.stored,
-                                                 use_32bits ?
-                                                 lp_rast_32_tri_tab[count] :
-                                                 lp_rast_tri_tab[count],
+                                                 setup->fs.stored, cmd,
                                                  lp_rast_arg_triangle(tri, partial) ))
                   goto fail;
 
@@ -1008,6 +1027,7 @@ calc_fixed_position(struct lp_setup_context *setup,
                     const float (*v1)[4],
                     const float (*v2)[4])
 {
+   float pixel_offset = setup->multisample ? 0.0 : setup->pixel_offset;
    /*
     * The rounding may not be quite the same with PIPE_ARCH_SSE
     * (util_iround right now only does nearest/even on x87,
@@ -1019,7 +1039,7 @@ calc_fixed_position(struct lp_setup_context *setup,
    __m128 vxy0xy2, vxy1xy0;
    __m128i vxy0xy2i, vxy1xy0i;
    __m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120;
-   __m128 pix_offset = _mm_set1_ps(setup->pixel_offset);
+   __m128 pix_offset = _mm_set1_ps(pixel_offset);
    __m128 fixed_one = _mm_set1_ps((float)FIXED_ONE);
    v0r = _mm_castpd_ps(_mm_load_sd((double *)v0[0]));
    vxy0xy2 = _mm_loadh_pi(v0r, (__m64 *)v2[0]);
@@ -1045,14 +1065,14 @@ calc_fixed_position(struct lp_setup_context *setup,
    _mm_store_si128((__m128i *)&position->y[0], y0120);
 
 #else
-   position->x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset);
-   position->x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset);
-   position->x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset);
+   position->x[0] = subpixel_snap(v0[0][0] - pixel_offset);
+   position->x[1] = subpixel_snap(v1[0][0] - pixel_offset);
+   position->x[2] = subpixel_snap(v2[0][0] - pixel_offset);
    position->x[3] = 0; // should be unused
 
-   position->y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset);
-   position->y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset);
-   position->y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset);
+   position->y[0] = subpixel_snap(v0[0][1] - pixel_offset);
+   position->y[1] = subpixel_snap(v1[0][1] - pixel_offset);
+   position->y[2] = subpixel_snap(v2[0][1] - pixel_offset);
    position->y[3] = 0; // should be unused
 
    position->dx01 = position->x[0] - position->x[1];
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c
index 6f8e855e8..04899dd9b 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -178,7 +178,7 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
  * Called just prior to drawing anything (pipe::draw_arrays(), etc).
  *
  * Hopefully this will remain quite simple, otherwise need to pull in
- * something like the state tracker mechanism.
+ * something like the gallium frontend mechanism.
  */
 void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
 {
@@ -195,6 +195,8 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
    if (llvmpipe->dirty & (LP_NEW_RASTERIZER |
                           LP_NEW_FS |
                           LP_NEW_GS |
+                          LP_NEW_TCS |
+                          LP_NEW_TES |
                           LP_NEW_VS))
       compute_vertex_info(llvmpipe);
 
@@ -212,6 +214,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
    if (llvmpipe->dirty & (LP_NEW_FS |
                           LP_NEW_FRAMEBUFFER |
                           LP_NEW_RASTERIZER |
+                          LP_NEW_SAMPLE_MASK |
                           LP_NEW_DEPTH_STENCIL_ALPHA)) {
 
       /*
@@ -223,10 +226,10 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
       boolean null_fs = !llvmpipe->fs ||
                         llvmpipe->fs->info.base.num_instructions <= 1;
       boolean discard =
-         (llvmpipe->sample_mask & 1) == 0 ||
+         (llvmpipe->sample_mask) == 0 ||
          (llvmpipe->rasterizer ? llvmpipe->rasterizer->rasterizer_discard : FALSE) ||
          (null_fs &&
-          !llvmpipe->depth_stencil->depth.enabled &&
+          !llvmpipe->depth_stencil->depth_enabled &&
           !llvmpipe->depth_stencil->stencil[0].enabled);
       lp_setup_set_rasterizer_discard(llvmpipe->setup, discard);
    }
@@ -236,6 +239,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
                           LP_NEW_RASTERIZER))
       llvmpipe_update_setup( llvmpipe );
 
+   if (llvmpipe->dirty & LP_NEW_SAMPLE_MASK)
+      lp_setup_set_sample_mask(llvmpipe->setup, llvmpipe->sample_mask);
+
    if (llvmpipe->dirty & LP_NEW_BLEND_COLOR)
       lp_setup_set_blend_color(llvmpipe->setup,
                                &llvmpipe->blend_color);
@@ -245,7 +251,7 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
 
    if (llvmpipe->dirty & LP_NEW_DEPTH_STENCIL_ALPHA) {
       lp_setup_set_alpha_ref_value(llvmpipe->setup, 
-                                   llvmpipe->depth_stencil->alpha.ref_value);
+                                   llvmpipe->depth_stencil->alpha_ref_value);
       lp_setup_set_stencil_ref_values(llvmpipe->setup,
                                       llvmpipe->stencil_ref.ref_value);
    }
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 1c81155aa..2fe01ce48 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -89,6 +89,7 @@
 #include "gallivm/lp_bld_pack.h"
 #include "gallivm/lp_bld_format.h"
 #include "gallivm/lp_bld_quad.h"
+#include "gallivm/lp_bld_gather.h"
 
 #include "lp_bld_alpha.h"
 #include "lp_bld_blend.h"
@@ -105,9 +106,106 @@
 #include "lp_rast.h"
 #include "nir/nir_to_tgsi_info.h"
 
+#include "lp_screen.h"
+#include "compiler/nir/nir_serialize.h"
+#include "util/mesa-sha1.h"
 /** Fragment shader number (for debugging) */
 static unsigned fs_no = 0;
 
+static void
+load_unswizzled_block(struct gallivm_state *gallivm,
+                      LLVMValueRef base_ptr,
+                      LLVMValueRef stride,
+                      unsigned block_width,
+                      unsigned block_height,
+                      LLVMValueRef* dst,
+                      struct lp_type dst_type,
+                      unsigned dst_count,
+                      unsigned dst_alignment,
+                      LLVMValueRef x_offset,
+                      LLVMValueRef y_offset,
+                      bool fb_fetch_twiddle);
+/**
+ * Checks if a format description is an arithmetic format
+ *
+ * A format which has irregular channel sizes such as R3_G3_B2 or R5_G6_B5.
+ */
+static inline boolean
+is_arithmetic_format(const struct util_format_description *format_desc)
+{
+   boolean arith = false;
+   unsigned i;
+
+   for (i = 0; i < format_desc->nr_channels; ++i) {
+      arith |= format_desc->channel[i].size != format_desc->channel[0].size;
+      arith |= (format_desc->channel[i].size % 8) != 0;
+   }
+
+   return arith;
+}
+
+/**
+ * Checks if this format requires special handling due to required expansion
+ * to floats for blending, and furthermore has "natural" packed AoS -> unpacked
+ * SoA conversion.
+ */
+static inline boolean
+format_expands_to_float_soa(const struct util_format_description *format_desc)
+{
+   if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
+       format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+      return true;
+   }
+   return false;
+}
+
+
+/**
+ * Retrieves the type representing the memory layout for a format
+ *
+ * e.g. RGBA16F = 4x half-float and R3G3B2 = 1x byte
+ */
+static inline void
+lp_mem_type_from_format_desc(const struct util_format_description *format_desc,
+                             struct lp_type* type)
+{
+   unsigned i;
+   unsigned chan;
+
+   if (format_expands_to_float_soa(format_desc)) {
+      /* just make this a uint with width of block */
+      type->floating = false;
+      type->fixed = false;
+      type->sign = false;
+      type->norm = false;
+      type->width = format_desc->block.bits;
+      type->length = 1;
+      return;
+   }
+
+   for (i = 0; i < 4; i++)
+      if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
+         break;
+   chan = i;
+
+   memset(type, 0, sizeof(struct lp_type));
+   type->floating = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT;
+   type->fixed    = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FIXED;
+   type->sign     = format_desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED;
+   type->norm     = format_desc->channel[chan].normalized;
+
+   if (is_arithmetic_format(format_desc)) {
+      type->width = 0;
+      type->length = 1;
+
+      for (i = 0; i < format_desc->nr_channels; ++i) {
+         type->width += format_desc->channel[i].size;
+      }
+   } else {
+      type->width = format_desc->channel[chan].size;
+      type->length = format_desc->nr_channels;
+   }
+}
 
 /**
  * Expand the relevant bits of mask_input to a n*4-dword mask for the
@@ -123,7 +221,8 @@ static LLVMValueRef
 generate_quad_mask(struct gallivm_state *gallivm,
                    struct lp_type fs_type,
                    unsigned first_quad,
-                   LLVMValueRef mask_input) /* int32 */
+                   unsigned sample,
+                   LLVMValueRef mask_input) /* int64 */
 {
    LLVMBuilderRef builder = gallivm->builder;
    struct lp_type mask_type;
@@ -162,6 +261,11 @@ generate_quad_mask(struct gallivm_state *gallivm,
       shift = 0;
    }
 
+   mask_input = LLVMBuildLShr(builder, mask_input, lp_build_const_int64(gallivm, 16 * sample), "");
+   mask_input = LLVMBuildTrunc(builder, mask_input,
+                               i32t, "");
+   mask_input = LLVMBuildAnd(builder, mask_input, lp_build_const_int32(gallivm, 0xffff), "");
+
    mask_input = LLVMBuildLShr(builder,
                               mask_input,
                               LLVMConstInt(i32t, shift, 0),
@@ -287,6 +391,163 @@ lp_build_depth_clamp(struct gallivm_state *gallivm,
    return lp_build_clamp(&f32_bld, z, min_depth, max_depth);
 }
 
+static void
+lp_build_sample_alpha_to_coverage(struct gallivm_state *gallivm,
+                                  struct lp_type type,
+                                  unsigned coverage_samples,
+                                  LLVMValueRef num_loop,
+                                  LLVMValueRef loop_counter,
+                                  LLVMValueRef coverage_mask_store,
+                                  LLVMValueRef alpha)
+{
+   struct lp_build_context bld;
+   LLVMBuilderRef builder = gallivm->builder;
+   float step = 1.0 / coverage_samples;
+
+   lp_build_context_init(&bld, gallivm, type);
+   for (unsigned s = 0; s < coverage_samples; s++) {
+      LLVMValueRef alpha_ref_value = lp_build_const_vec(gallivm, type, step * s);
+      LLVMValueRef test = lp_build_cmp(&bld, PIPE_FUNC_GREATER, alpha, alpha_ref_value);
+
+      LLVMValueRef s_mask_idx = LLVMBuildMul(builder, lp_build_const_int32(gallivm, s), num_loop, "");
+      s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_counter, "");
+      LLVMValueRef s_mask_ptr = LLVMBuildGEP(builder, coverage_mask_store, &s_mask_idx, 1, "");
+      LLVMValueRef s_mask = LLVMBuildLoad(builder, s_mask_ptr, "");
+      s_mask = LLVMBuildAnd(builder, s_mask, test, "");
+      LLVMBuildStore(builder, s_mask, s_mask_ptr);
+   }
+};
+
+struct lp_build_fs_llvm_iface {
+   struct lp_build_fs_iface base;
+   struct lp_build_interp_soa_context *interp;
+   struct lp_build_for_loop_state *loop_state;
+   LLVMValueRef mask_store;
+   LLVMValueRef sample_id;
+   LLVMValueRef color_ptr_ptr;
+   LLVMValueRef color_stride_ptr;
+   LLVMValueRef color_sample_stride_ptr;
+   const struct lp_fragment_shader_variant_key *key;
+};
+
+static LLVMValueRef fs_interp(const struct lp_build_fs_iface *iface,
+                              struct lp_build_context *bld,
+                              unsigned attrib, unsigned chan,
+                              bool centroid, bool sample,
+                              LLVMValueRef attrib_indir,
+                              LLVMValueRef offsets[2])
+{
+   struct lp_build_fs_llvm_iface *fs_iface = (struct lp_build_fs_llvm_iface *)iface;
+   struct lp_build_interp_soa_context *interp = fs_iface->interp;
+   unsigned loc = TGSI_INTERPOLATE_LOC_CENTER;
+   if (centroid)
+      loc = TGSI_INTERPOLATE_LOC_CENTROID;
+   if (sample)
+      loc = TGSI_INTERPOLATE_LOC_SAMPLE;
+
+   return lp_build_interp_soa(interp, bld->gallivm, fs_iface->loop_state->counter,
+                              fs_iface->mask_store,
+                              attrib, chan, loc, attrib_indir, offsets);
+}
+
+static void fs_fb_fetch(const struct lp_build_fs_iface *iface,
+                                struct lp_build_context *bld,
+                                unsigned cbuf,
+                                LLVMValueRef result[4])
+{
+   struct lp_build_fs_llvm_iface *fs_iface = (struct lp_build_fs_llvm_iface *)iface;
+   struct gallivm_state *gallivm = bld->gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   const struct lp_fragment_shader_variant_key *key = fs_iface->key;
+   LLVMValueRef index = lp_build_const_int32(gallivm, cbuf);
+   LLVMValueRef color_ptr = LLVMBuildLoad(builder, LLVMBuildGEP(builder, fs_iface->color_ptr_ptr, &index, 1, ""), "");
+   LLVMValueRef stride = LLVMBuildLoad(builder, LLVMBuildGEP(builder, fs_iface->color_stride_ptr, &index, 1, ""), "");
+
+   LLVMValueRef dst[4 * 4];
+   enum pipe_format cbuf_format = key->cbuf_format[cbuf];
+   const struct util_format_description* out_format_desc = util_format_description(cbuf_format);
+   struct lp_type dst_type;
+   unsigned block_size = bld->type.length;
+   unsigned block_height = key->resource_1d ? 1 : 2;
+   unsigned block_width = block_size / block_height;
+
+   lp_mem_type_from_format_desc(out_format_desc, &dst_type);
+
+   struct lp_type blend_type;
+   memset(&blend_type, 0, sizeof blend_type);
+   blend_type.floating = FALSE; /* values are integers */
+   blend_type.sign = FALSE;     /* values are unsigned */
+   blend_type.norm = TRUE;      /* values are in [0,1] or [-1,1] */
+   blend_type.width = 8;        /* 8-bit ubyte values */
+   blend_type.length = 16;      /* 16 elements per vector */
+
+   uint32_t dst_alignment;
+   /*
+    * Compute the alignment of the destination pointer in bytes
+    * We fetch 1-4 pixels, if the format has pot alignment then those fetches
+    * are always aligned by MIN2(16, fetch_width) except for buffers (not
+    * 1d tex but can't distinguish here) so need to stick with per-pixel
+    * alignment in this case.
+    */
+   if (key->resource_1d) {
+      dst_alignment = (out_format_desc->block.bits + 7)/(out_format_desc->block.width * 8);
+   }
+   else {
+      dst_alignment = dst_type.length * dst_type.width / 8;
+   }
+   /* Force power-of-two alignment by extracting only the least-significant-bit */
+   dst_alignment = 1 << (ffs(dst_alignment) - 1);
+   /*
+    * Resource base and stride pointers are aligned to 16 bytes, so that's
+    * the maximum alignment we can guarantee
+    */
+   dst_alignment = MIN2(16, dst_alignment);
+
+   LLVMTypeRef blend_vec_type = lp_build_vec_type(gallivm, blend_type);
+   color_ptr = LLVMBuildBitCast(builder, color_ptr, LLVMPointerType(blend_vec_type, 0), "");
+
+   if (key->multisample) {
+      LLVMValueRef sample_stride = LLVMBuildLoad(builder,
+                                                 LLVMBuildGEP(builder, fs_iface->color_sample_stride_ptr,
+                                                              &index, 1, ""), "");
+      LLVMValueRef sample_offset = LLVMBuildMul(builder, sample_stride, fs_iface->sample_id, "");
+      color_ptr = LLVMBuildGEP(builder, color_ptr, &sample_offset, 1, "");
+   }
+   /* fragment shader executes on 4x4 blocks. depending on vector width it can execute 2 or 4 iterations.
+    * only move to the next row once the top row has completed 8 wide 1 iteration, 4 wide 2 iterations */
+   LLVMValueRef x_offset = NULL, y_offset = NULL;
+   if (!key->resource_1d) {
+      LLVMValueRef counter = fs_iface->loop_state->counter;
+
+      if (block_size == 4) {
+         x_offset = LLVMBuildShl(builder,
+                                 LLVMBuildAnd(builder, fs_iface->loop_state->counter, lp_build_const_int32(gallivm, 1), ""),
+                                 lp_build_const_int32(gallivm, 1), "");
+         counter = LLVMBuildLShr(builder, fs_iface->loop_state->counter, lp_build_const_int32(gallivm, 1), "");
+      }
+      y_offset = LLVMBuildMul(builder, counter, lp_build_const_int32(gallivm, 2), "");
+   }
+   load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height, dst, dst_type, block_size, dst_alignment, x_offset, y_offset, true);
+
+   for (unsigned i = 0; i < block_size; i++) {
+      dst[i] = LLVMBuildBitCast(builder, dst[i], LLVMInt32TypeInContext(gallivm->context), "");
+   }
+   LLVMValueRef packed = lp_build_gather_values(gallivm, dst, block_size);
+
+   struct lp_type texel_type = bld->type;
+   if (out_format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
+       out_format_desc->channel[0].pure_integer) {
+      if (out_format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+         texel_type = lp_type_int_vec(bld->type.width, bld->type.width * bld->type.length);
+      }
+      else if (out_format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+         texel_type = lp_type_uint_vec(bld->type.width, bld->type.width * bld->type.length);
+      }
+   }
+   lp_build_unpack_rgba_soa(gallivm, out_format_desc,
+                            texel_type,
+                            packed, result);
+}
 
 /**
  * Generate the fragment shader, depth/stencil test, and alpha tests.
@@ -298,14 +559,19 @@ generate_fs_loop(struct gallivm_state *gallivm,
                  LLVMBuilderRef builder,
                  struct lp_type type,
                  LLVMValueRef context_ptr,
+                 LLVMValueRef sample_pos_array,
                  LLVMValueRef num_loop,
                  struct lp_build_interp_soa_context *interp,
                  const struct lp_build_sampler_soa *sampler,
                  const struct lp_build_image_soa *image,
                  LLVMValueRef mask_store,
                  LLVMValueRef (*out_color)[4],
-                 LLVMValueRef depth_ptr,
+                 LLVMValueRef depth_base_ptr,
                  LLVMValueRef depth_stride,
+                 LLVMValueRef depth_sample_stride,
+                 LLVMValueRef color_ptr_ptr,
+                 LLVMValueRef color_stride_ptr,
+                 LLVMValueRef color_sample_stride_ptr,
                  LLVMValueRef facing,
                  LLVMValueRef thread_data_ptr)
 {
@@ -313,15 +579,17 @@ generate_fs_loop(struct gallivm_state *gallivm,
    const struct tgsi_token *tokens = shader->base.tokens;
    struct lp_type int_type = lp_int_type(type);
    LLVMTypeRef vec_type, int_vec_type;
-   LLVMValueRef mask_ptr, mask_val;
+   LLVMValueRef mask_ptr = NULL, mask_val = NULL;
    LLVMValueRef consts_ptr, num_consts_ptr;
    LLVMValueRef ssbo_ptr, num_ssbo_ptr;
    LLVMValueRef z;
    LLVMValueRef z_value, s_value;
    LLVMValueRef z_fb, s_fb;
+   LLVMValueRef depth_ptr;
    LLVMValueRef stencil_refs[2];
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
-   struct lp_build_for_loop_state loop_state;
+   LLVMValueRef zs_samples = lp_build_const_int32(gallivm, key->zsbuf_nr_samples);
+   struct lp_build_for_loop_state loop_state, sample_loop_state;
    struct lp_build_mask_context mask;
    /*
     * TODO: figure out if simple_shader optimization is really worthwile to
@@ -333,6 +601,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
                             shader->info.base.num_instructions < 8) && 0;
    const boolean dual_source_blend = key->blend.rt[0].blend_enable &&
                                      util_blend_state_is_dual(&key->blend, 0);
+   const bool post_depth_coverage = shader->info.base.properties[TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE];
    unsigned attrib;
    unsigned chan;
    unsigned cbuf;
@@ -345,7 +614,8 @@ generate_fs_loop(struct gallivm_state *gallivm,
    /* truncate then sign extend. */
    system_values.front_facing = LLVMBuildTrunc(gallivm->builder, facing, LLVMInt1TypeInContext(gallivm->context), "");
    system_values.front_facing = LLVMBuildSExt(gallivm->builder, system_values.front_facing, LLVMInt32TypeInContext(gallivm->context), "");
-
+   system_values.view_index = lp_jit_thread_data_raster_state_view_index(gallivm,
+                                                                         thread_data_ptr);
    if (key->depth.enabled ||
        key->stencil[0].enabled) {
 
@@ -406,24 +676,19 @@ generate_fs_loop(struct gallivm_state *gallivm,
    ssbo_ptr = lp_jit_context_ssbos(gallivm, context_ptr);
    num_ssbo_ptr = lp_jit_context_num_ssbos(gallivm, context_ptr);
 
-   lp_build_for_loop_begin(&loop_state, gallivm,
-                           lp_build_const_int32(gallivm, 0),
-                           LLVMIntULT,
-                           num_loop,
-                           lp_build_const_int32(gallivm, 1));
-
-   mask_ptr = LLVMBuildGEP(builder, mask_store,
-                           &loop_state.counter, 1, "mask_ptr");
-   mask_val = LLVMBuildLoad(builder, mask_ptr, "");
-
    memset(outputs, 0, sizeof outputs);
 
+   /* Allocate color storage for each fragment sample */
+   LLVMValueRef color_store_size = num_loop;
+   if (key->min_samples > 1)
+      color_store_size = LLVMBuildMul(builder, num_loop, lp_build_const_int32(gallivm, key->min_samples), "");
+
    for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
       for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
          out_color[cbuf][chan] = lp_build_array_alloca(gallivm,
                                                        lp_build_vec_type(gallivm,
                                                                          type),
-                                                       num_loop, "color");
+                                                       color_store_size, "color");
       }
    }
    if (dual_source_blend) {
@@ -432,10 +697,41 @@ generate_fs_loop(struct gallivm_state *gallivm,
          out_color[1][chan] = lp_build_array_alloca(gallivm,
                                                     lp_build_vec_type(gallivm,
                                                                       type),
-                                                    num_loop, "color1");
+                                                    color_store_size, "color1");
       }
    }
 
+   lp_build_for_loop_begin(&loop_state, gallivm,
+                           lp_build_const_int32(gallivm, 0),
+                           LLVMIntULT,
+                           num_loop,
+                           lp_build_const_int32(gallivm, 1));
+
+   LLVMValueRef sample_mask_in;
+   if (key->multisample) {
+      sample_mask_in = lp_build_const_int_vec(gallivm, type, 0);
+      /* create shader execution mask by combining all sample masks. */
+      for (unsigned s = 0; s < key->coverage_samples; s++) {
+         LLVMValueRef s_mask_idx = LLVMBuildMul(builder, num_loop, lp_build_const_int32(gallivm, s), "");
+         s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_state.counter, "");
+         LLVMValueRef s_mask = lp_build_pointer_get(builder, mask_store, s_mask_idx);
+         if (s == 0)
+            mask_val = s_mask;
+         else
+            mask_val = LLVMBuildOr(builder, s_mask, mask_val, "");
+
+         LLVMValueRef mask_in = LLVMBuildAnd(builder, s_mask, lp_build_const_int_vec(gallivm, type, (1ll << s)), "");
+         sample_mask_in = LLVMBuildOr(builder, sample_mask_in, mask_in, "");
+      }
+   } else {
+      sample_mask_in = lp_build_const_int_vec(gallivm, type, 1);
+      mask_ptr = LLVMBuildGEP(builder, mask_store,
+                              &loop_state.counter, 1, "mask_ptr");
+      mask_val = LLVMBuildLoad(builder, mask_ptr, "");
+
+      LLVMValueRef mask_in = LLVMBuildAnd(builder, mask_val, lp_build_const_int_vec(gallivm, type, 1), "");
+      sample_mask_in = LLVMBuildOr(builder, sample_mask_in, mask_in, "");
+   }
 
    /* 'mask' will control execution based on quad's pixel alive/killed state */
    lp_build_mask_begin(&mask, gallivm, type, mask_val);
@@ -443,9 +739,70 @@ generate_fs_loop(struct gallivm_state *gallivm,
    if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader)
       lp_build_mask_check(&mask);
 
-   lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter);
+   /* Create storage for recombining sample masks after early Z pass. */
+   LLVMValueRef s_mask_or = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, type), "cov_mask_early_depth");
+   LLVMBuildStore(builder, LLVMConstNull(lp_build_int_vec_type(gallivm, type)), s_mask_or);
+
+   /* Create storage for post depth sample mask */
+   LLVMValueRef post_depth_sample_mask_in = NULL;
+   if (post_depth_coverage)
+      post_depth_sample_mask_in = lp_build_alloca(gallivm, int_vec_type, "post_depth_sample_mask_in");
+
+   LLVMValueRef s_mask = NULL, s_mask_ptr = NULL;
+   LLVMValueRef z_sample_value_store = NULL, s_sample_value_store = NULL;
+   LLVMValueRef z_fb_store = NULL, s_fb_store = NULL;
+   LLVMTypeRef z_type = NULL, z_fb_type = NULL;
+
+   /* Run early depth once per sample */
+   if (key->multisample) {
+
+      if (zs_format_desc) {
+         struct lp_type zs_type = lp_depth_type(zs_format_desc, type.length);
+         struct lp_type z_type = zs_type;
+         struct lp_type s_type = zs_type;
+         if (zs_format_desc->block.bits < type.width)
+            z_type.width = type.width;
+         if (zs_format_desc->block.bits == 8)
+            s_type.width = type.width;
+
+         else if (zs_format_desc->block.bits > 32) {
+            z_type.width = z_type.width / 2;
+            s_type.width = s_type.width / 2;
+            s_type.floating = 0;
+         }
+         z_sample_value_store = lp_build_array_alloca(gallivm, lp_build_int_vec_type(gallivm, type),
+                                                      zs_samples, "z_sample_store");
+         s_sample_value_store = lp_build_array_alloca(gallivm, lp_build_int_vec_type(gallivm, type),
+                                                      zs_samples, "s_sample_store");
+         z_fb_store = lp_build_array_alloca(gallivm, lp_build_vec_type(gallivm, z_type),
+                                            zs_samples, "z_fb_store");
+         s_fb_store = lp_build_array_alloca(gallivm, lp_build_vec_type(gallivm, s_type),
+                                            zs_samples, "s_fb_store");
+      }
+      lp_build_for_loop_begin(&sample_loop_state, gallivm,
+                              lp_build_const_int32(gallivm, 0),
+                              LLVMIntULT, lp_build_const_int32(gallivm, key->coverage_samples),
+                              lp_build_const_int32(gallivm, 1));
+
+      LLVMValueRef s_mask_idx = LLVMBuildMul(builder, sample_loop_state.counter, num_loop, "");
+      s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_state.counter, "");
+      s_mask_ptr = LLVMBuildGEP(builder, mask_store, &s_mask_idx, 1, "");
+
+      s_mask = LLVMBuildLoad(builder, s_mask_ptr, "");
+      s_mask = LLVMBuildAnd(builder, s_mask, mask_val, "");
+   }
+
+
+   /* for multisample Z needs to be interpolated at sample points for testing. */
+   lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, key->multisample ? sample_loop_state.counter : NULL);
    z = interp->pos[2];
 
+   depth_ptr = depth_base_ptr;
+   if (key->multisample) {
+      LLVMValueRef sample_offset = LLVMBuildMul(builder, sample_loop_state.counter, depth_sample_stride, "");
+      depth_ptr = LLVMBuildGEP(builder, depth_ptr, &sample_offset, 1, "");
+   }
+
    if (depth_mode & EARLY_DEPTH_TEST) {
       /*
        * Clamp according to ARB_depth_clamp semantics.
@@ -463,12 +820,13 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                   key->stencil,
                                   type,
                                   zs_format_desc,
-                                  &mask,
+                                  key->multisample ? NULL : &mask,
+                                  &s_mask,
                                   stencil_refs,
                                   z, z_fb, s_fb,
                                   facing,
                                   &z_value, &s_value,
-                                  !simple_shader);
+                                  !simple_shader && !key->multisample);
 
       if (depth_mode & EARLY_DEPTH_WRITE) {
          lp_build_depth_stencil_write_swizzled(gallivm, type,
@@ -482,17 +840,114 @@ generate_fs_loop(struct gallivm_state *gallivm,
        * stencil test otherwise new stencil values may not get written if all
        * fragments got killed by depth/stencil test.
        */
-      if (!simple_shader && key->stencil[0].enabled)
+      if (!simple_shader && key->stencil[0].enabled && !key->multisample)
          lp_build_mask_check(&mask);
+
+      if (key->multisample) {
+         z_fb_type = LLVMTypeOf(z_fb);
+         z_type = LLVMTypeOf(z_value);
+         lp_build_pointer_set(builder, z_sample_value_store, sample_loop_state.counter, LLVMBuildBitCast(builder, z_value, lp_build_int_vec_type(gallivm, type), ""));
+         lp_build_pointer_set(builder, s_sample_value_store, sample_loop_state.counter, LLVMBuildBitCast(builder, s_value, lp_build_int_vec_type(gallivm, type), ""));
+         lp_build_pointer_set(builder, z_fb_store, sample_loop_state.counter, z_fb);
+         lp_build_pointer_set(builder, s_fb_store, sample_loop_state.counter, s_fb);
+      }
    }
 
-   lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter);
+   if (key->multisample) {
+      /*
+       * Store the post-early Z coverage mask.
+       * Recombine the resulting coverage masks post early Z into the fragment
+       * shader execution mask.
+       */
+      LLVMValueRef tmp_s_mask_or = LLVMBuildLoad(builder, s_mask_or, "");
+      tmp_s_mask_or = LLVMBuildOr(builder, tmp_s_mask_or, s_mask, "");
+      LLVMBuildStore(builder, tmp_s_mask_or, s_mask_or);
+
+      if (post_depth_coverage) {
+         LLVMValueRef mask_bit_idx = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), sample_loop_state.counter, "");
+         LLVMValueRef post_depth_mask_in = LLVMBuildLoad(builder, post_depth_sample_mask_in, "");
+         mask_bit_idx = LLVMBuildAnd(builder, s_mask, lp_build_broadcast(gallivm, int_vec_type, mask_bit_idx), "");
+         post_depth_mask_in = LLVMBuildOr(builder, post_depth_mask_in, mask_bit_idx, "");
+         LLVMBuildStore(builder, post_depth_mask_in, post_depth_sample_mask_in);
+      }
+
+      LLVMBuildStore(builder, s_mask, s_mask_ptr);
+
+      lp_build_for_loop_end(&sample_loop_state);
+
+      /* recombined all the coverage masks in the shader exec mask. */
+      tmp_s_mask_or = LLVMBuildLoad(builder, s_mask_or, "");
+      lp_build_mask_update(&mask, tmp_s_mask_or);
+
+      if (key->min_samples == 1) {
+         /* for multisample Z needs to be re interpolated at pixel center */
+         lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, NULL);
+         z = interp->pos[2];
+         lp_build_mask_update(&mask, tmp_s_mask_or);
+      }
+   } else {
+      if (post_depth_coverage) {
+         LLVMValueRef post_depth_mask_in = LLVMBuildAnd(builder, lp_build_mask_value(&mask), lp_build_const_int_vec(gallivm, type, 1), "");
+         LLVMBuildStore(builder, post_depth_mask_in, post_depth_sample_mask_in);
+      }
+   }
+
+   LLVMValueRef out_sample_mask_storage = NULL;
+   if (shader->info.base.writes_samplemask) {
+      out_sample_mask_storage = lp_build_alloca(gallivm, int_vec_type, "write_mask");
+      if (key->min_samples > 1)
+         LLVMBuildStore(builder, LLVMConstNull(int_vec_type), out_sample_mask_storage);
+   }
+
+   if (post_depth_coverage) {
+      system_values.sample_mask_in = LLVMBuildLoad(builder, post_depth_sample_mask_in, "");
+   }
+   else
+      system_values.sample_mask_in = sample_mask_in;
+   if (key->multisample && key->min_samples > 1) {
+      lp_build_for_loop_begin(&sample_loop_state, gallivm,
+                              lp_build_const_int32(gallivm, 0),
+                              LLVMIntULT,
+                              lp_build_const_int32(gallivm, key->min_samples),
+                              lp_build_const_int32(gallivm, 1));
+
+      LLVMValueRef s_mask_idx = LLVMBuildMul(builder, sample_loop_state.counter, num_loop, "");
+      s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_state.counter, "");
+      s_mask_ptr = LLVMBuildGEP(builder, mask_store, &s_mask_idx, 1, "");
+      s_mask = LLVMBuildLoad(builder, s_mask_ptr, "");
+      lp_build_mask_force(&mask, s_mask);
+      lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, sample_loop_state.counter);
+      system_values.sample_id = sample_loop_state.counter;
+      system_values.sample_mask_in = LLVMBuildAnd(builder, system_values.sample_mask_in,
+                                                  lp_build_broadcast(gallivm, int_vec_type,
+                                                                     LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), sample_loop_state.counter, "")), "");
+   } else {
+      system_values.sample_id = lp_build_const_int32(gallivm, 0);
+
+   }
+   system_values.sample_pos = sample_pos_array;
+
+   lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter, mask_store, sample_loop_state.counter);
+
+   struct lp_build_fs_llvm_iface fs_iface = {
+     .base.interp_fn = fs_interp,
+     .base.fb_fetch = fs_fb_fetch,
+     .interp = interp,
+     .loop_state = &loop_state,
+     .sample_id = system_values.sample_id,
+     .mask_store = mask_store,
+     .color_ptr_ptr = color_ptr_ptr,
+     .color_stride_ptr = color_stride_ptr,
+     .color_sample_stride_ptr = color_sample_stride_ptr,
+     .key = key,
+   };
 
    struct lp_build_tgsi_params params;
    memset(&params, 0, sizeof(params));
 
    params.type = type;
    params.mask = &mask;
+   params.fs_iface = &fs_iface.base;
    params.consts_ptr = consts_ptr;
    params.const_sizes_ptr = num_consts_ptr;
    params.system_values = &system_values;
@@ -544,29 +999,121 @@ generate_fs_loop(struct gallivm_state *gallivm,
       if (color0 != -1 && outputs[color0][3]) {
          LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha");
 
-         lp_build_alpha_to_coverage(gallivm, type,
-                                    &mask, alpha,
-                                    (depth_mode & LATE_DEPTH_TEST) != 0);
+         if (!key->multisample) {
+            lp_build_alpha_to_coverage(gallivm, type,
+                                       &mask, alpha,
+                                       (depth_mode & LATE_DEPTH_TEST) != 0);
+         } else {
+            lp_build_sample_alpha_to_coverage(gallivm, type, key->coverage_samples, num_loop,
+                                              loop_state.counter,
+                                              mask_store, alpha);
+         }
+      }
+   }
+   if (key->blend.alpha_to_one && key->multisample) {
+      for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib) {
+         unsigned cbuf = shader->info.base.output_semantic_index[attrib];
+         if ((shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) &&
+             ((cbuf < key->nr_cbufs) || (cbuf == 1 && dual_source_blend)))
+            if (outputs[cbuf][3]) {
+               LLVMBuildStore(builder, lp_build_const_vec(gallivm, type, 1.0), outputs[cbuf][3]);
+            }
       }
    }
-
    if (shader->info.base.writes_samplemask) {
+      LLVMValueRef output_smask = NULL;
       int smaski = find_output_by_semantic(&shader->info.base,
                                            TGSI_SEMANTIC_SAMPLEMASK,
                                            0);
-      LLVMValueRef smask;
       struct lp_build_context smask_bld;
       lp_build_context_init(&smask_bld, gallivm, int_type);
 
       assert(smaski >= 0);
-      smask = LLVMBuildLoad(builder, outputs[smaski][0], "smask");
-      /*
-       * Pixel is alive according to the first sample in the mask.
-       */
-      smask = LLVMBuildBitCast(builder, smask, smask_bld.vec_type, "");
-      smask = lp_build_and(&smask_bld, smask, smask_bld.one);
-      smask = lp_build_cmp(&smask_bld, PIPE_FUNC_NOTEQUAL, smask, smask_bld.zero);
-      lp_build_mask_update(&mask, smask);
+      output_smask = LLVMBuildLoad(builder, outputs[smaski][0], "smask");
+      output_smask = LLVMBuildBitCast(builder, output_smask, smask_bld.vec_type, "");
+      if (!key->multisample && key->no_ms_sample_mask_out) {
+         output_smask = lp_build_and(&smask_bld, output_smask, smask_bld.one);
+         output_smask = lp_build_cmp(&smask_bld, PIPE_FUNC_NOTEQUAL, output_smask, smask_bld.zero);
+         lp_build_mask_update(&mask, output_smask);
+      }
+
+      if (key->min_samples > 1) {
+         /* only the bit corresponding to this sample is to be used. */
+         LLVMValueRef tmp_mask = LLVMBuildLoad(builder, out_sample_mask_storage, "tmp_mask");
+         LLVMValueRef out_smask_idx = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), sample_loop_state.counter, "");
+         LLVMValueRef smask_bit = LLVMBuildAnd(builder, output_smask, lp_build_broadcast(gallivm, int_vec_type, out_smask_idx), "");
+         output_smask = LLVMBuildOr(builder, tmp_mask, smask_bit, "");
+      }
+
+      LLVMBuildStore(builder, output_smask, out_sample_mask_storage);
+   }
+
+   /* Color write - per fragment sample */
+   for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
+   {
+      unsigned cbuf = shader->info.base.output_semantic_index[attrib];
+      if ((shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) &&
+           ((cbuf < key->nr_cbufs) || (cbuf == 1 && dual_source_blend)))
+      {
+         for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+            if(outputs[attrib][chan]) {
+               /* XXX: just initialize outputs to point at colors[] and
+                * skip this.
+                */
+               LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
+               LLVMValueRef color_ptr;
+               LLVMValueRef color_idx = loop_state.counter;
+               if (key->min_samples > 1)
+                  color_idx = LLVMBuildAdd(builder, color_idx,
+                                           LLVMBuildMul(builder, sample_loop_state.counter, num_loop, ""), "");
+               color_ptr = LLVMBuildGEP(builder, out_color[cbuf][chan],
+                                        &color_idx, 1, "");
+               lp_build_name(out, "color%u.%c", attrib, "rgba"[chan]);
+               LLVMBuildStore(builder, out, color_ptr);
+            }
+         }
+      }
+   }
+
+   if (key->multisample && key->min_samples > 1) {
+      LLVMBuildStore(builder, lp_build_mask_value(&mask), s_mask_ptr);
+      lp_build_for_loop_end(&sample_loop_state);
+   }
+
+   if (key->multisample) {
+      /* execute depth test for each sample */
+      lp_build_for_loop_begin(&sample_loop_state, gallivm,
+                              lp_build_const_int32(gallivm, 0),
+                              LLVMIntULT, lp_build_const_int32(gallivm, key->coverage_samples),
+                              lp_build_const_int32(gallivm, 1));
+
+      /* load the per-sample coverage mask */
+      LLVMValueRef s_mask_idx = LLVMBuildMul(builder, sample_loop_state.counter, num_loop, "");
+      s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_state.counter, "");
+      s_mask_ptr = LLVMBuildGEP(builder, mask_store, &s_mask_idx, 1, "");
+
+      /* combine the execution mask post fragment shader with the coverage mask. */
+      s_mask = LLVMBuildLoad(builder, s_mask_ptr, "");
+      if (key->min_samples == 1)
+         s_mask = LLVMBuildAnd(builder, s_mask, lp_build_mask_value(&mask), "");
+
+      /* if the shader writes sample mask use that */
+      if (shader->info.base.writes_samplemask) {
+         LLVMValueRef out_smask_idx = LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), sample_loop_state.counter, "");
+         out_smask_idx = lp_build_broadcast(gallivm, int_vec_type, out_smask_idx);
+         LLVMValueRef output_smask = LLVMBuildLoad(builder, out_sample_mask_storage, "");
+         LLVMValueRef smask_bit = LLVMBuildAnd(builder, output_smask, out_smask_idx, "");
+         LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntNE, smask_bit, lp_build_const_int_vec(gallivm, int_type, 0), "");
+         smask_bit = LLVMBuildSExt(builder, cmp, int_vec_type, "");
+
+         s_mask = LLVMBuildAnd(builder, s_mask, smask_bit, "");
+      }
+   }
+
+   depth_ptr = depth_base_ptr;
+   if (key->multisample) {
+      LLVMValueRef sample_offset = LLVMBuildMul(builder, sample_loop_state.counter, depth_sample_stride, "");
+      depth_ptr = LLVMBuildGEP(builder, depth_ptr, &sample_offset, 1, "");
    }
 
    /* Late Z test */
@@ -579,13 +1126,25 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                           0);
       if (pos0 != -1 && outputs[pos0][2]) {
          z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
+      } else {
+         if (key->multisample) {
+            lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter, key->multisample ? sample_loop_state.counter : NULL);
+            z = interp->pos[2];
+         }
       }
+
       /*
        * Clamp according to ARB_depth_clamp semantics.
        */
       if (key->depth_clamp) {
          z = lp_build_depth_clamp(gallivm, builder, type, context_ptr,
                                   thread_data_ptr, z);
+      } else {
+         struct lp_build_context f32_bld;
+         lp_build_context_init(&f32_bld, gallivm, type);
+         z = lp_build_clamp(&f32_bld, z,
+                            lp_build_const_vec(gallivm, type, 0.0),
+                            lp_build_const_vec(gallivm, type, 1.0));
       }
 
       if (s_out != -1 && outputs[s_out][1]) {
@@ -607,7 +1166,8 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                   key->stencil,
                                   type,
                                   zs_format_desc,
-                                  &mask,
+                                  key->multisample ? NULL : &mask,
+                                  &s_mask,
                                   stencil_refs,
                                   z, z_fb, s_fb,
                                   facing,
@@ -629,46 +1189,36 @@ generate_fs_loop(struct gallivm_state *gallivm,
        * depth value, update from zs_value with the new mask value and
        * write that out.
        */
+      if (key->multisample) {
+         z_value = LLVMBuildBitCast(builder, lp_build_pointer_get(builder, z_sample_value_store, sample_loop_state.counter), z_type, "");;
+         s_value = lp_build_pointer_get(builder, s_sample_value_store, sample_loop_state.counter);
+         z_fb = LLVMBuildBitCast(builder, lp_build_pointer_get(builder, z_fb_store, sample_loop_state.counter), z_fb_type, "");
+         s_fb = lp_build_pointer_get(builder, s_fb_store, sample_loop_state.counter);
+      }
       lp_build_depth_stencil_write_swizzled(gallivm, type,
                                             zs_format_desc, key->resource_1d,
-                                            &mask, z_fb, s_fb, loop_state.counter,
+                                            key->multisample ? s_mask : lp_build_mask_value(&mask), z_fb, s_fb, loop_state.counter,
                                             depth_ptr, depth_stride,
                                             z_value, s_value);
    }
 
-
-   /* Color write  */
-   for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
-   {
-      unsigned cbuf = shader->info.base.output_semantic_index[attrib];
-      if ((shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) &&
-           ((cbuf < key->nr_cbufs) || (cbuf == 1 && dual_source_blend)))
-      {
-         for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
-            if(outputs[attrib][chan]) {
-               /* XXX: just initialize outputs to point at colors[] and
-                * skip this.
-                */
-               LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
-               LLVMValueRef color_ptr;
-               color_ptr = LLVMBuildGEP(builder, out_color[cbuf][chan],
-                                        &loop_state.counter, 1, "");
-               lp_build_name(out, "color%u.%c", attrib, "rgba"[chan]);
-               LLVMBuildStore(builder, out, color_ptr);
-            }
-         }
-      }
-   }
-
    if (key->occlusion_count) {
       LLVMValueRef counter = lp_jit_thread_data_counter(gallivm, thread_data_ptr);
       lp_build_name(counter, "counter");
+
       lp_build_occlusion_count(gallivm, type,
-                               lp_build_mask_value(&mask), counter);
+                               key->multisample ? s_mask : lp_build_mask_value(&mask), counter);
+   }
+
+   if (key->multisample) {
+      /* store the sample mask for this loop */
+      LLVMBuildStore(builder, s_mask, s_mask_ptr);
+      lp_build_for_loop_end(&sample_loop_state);
    }
 
    mask_val = lp_build_mask_end(&mask);
-   LLVMBuildStore(builder, mask_val, mask_ptr);
+   if (!key->multisample)
+      LLVMBuildStore(builder, mask_val, mask_ptr);
    lp_build_for_loop_end(&loop_state);
 }
 
@@ -921,7 +1471,10 @@ load_unswizzled_block(struct gallivm_state *gallivm,
                       LLVMValueRef* dst,
                       struct lp_type dst_type,
                       unsigned dst_count,
-                      unsigned dst_alignment)
+                      unsigned dst_alignment,
+                      LLVMValueRef x_offset,
+                      LLVMValueRef y_offset,
+                      bool fb_fetch_twiddle)
 {
    LLVMBuilderRef builder = gallivm->builder;
    unsigned row_size = dst_count / block_height;
@@ -934,8 +1487,28 @@ load_unswizzled_block(struct gallivm_state *gallivm,
       unsigned x = i % row_size;
       unsigned y = i / row_size;
 
-      LLVMValueRef bx = lp_build_const_int32(gallivm, x * (dst_type.width / 8) * dst_type.length);
-      LLVMValueRef by = LLVMBuildMul(builder, lp_build_const_int32(gallivm, y), stride, "");
+      if (block_height == 2 && dst_count == 8 && fb_fetch_twiddle) {
+         /* remap the raw slots into the fragment shader execution mode. */
+         /* this math took me way too long to work out, I'm sure it's overkill. */
+         x = (i & 1) + ((i >> 2) << 1);
+         y = (i & 2) >> 1;
+      }
+
+      LLVMValueRef x_val;
+      if (x_offset) {
+         x_val = lp_build_const_int32(gallivm, x);
+         if (x_offset)
+            x_val = LLVMBuildAdd(builder, x_val, x_offset, "");
+         x_val = LLVMBuildMul(builder, x_val, lp_build_const_int32(gallivm, (dst_type.width / 8) * dst_type.length), "");
+      } else
+         x_val = lp_build_const_int32(gallivm, x * (dst_type.width / 8) * dst_type.length);
+
+      LLVMValueRef bx = x_val;
+
+      LLVMValueRef y_val = lp_build_const_int32(gallivm, y);
+      if (y_offset)
+         y_val = LLVMBuildAdd(builder, y_val, y_offset, "");
+      LLVMValueRef by = LLVMBuildMul(builder, y_val, stride, "");
 
       LLVMValueRef gep[2];
       LLVMValueRef dst_ptr;
@@ -999,89 +1572,6 @@ store_unswizzled_block(struct gallivm_state *gallivm,
 }
 
 
-/**
- * Checks if a format description is an arithmetic format
- *
- * A format which has irregular channel sizes such as R3_G3_B2 or R5_G6_B5.
- */
-static inline boolean
-is_arithmetic_format(const struct util_format_description *format_desc)
-{
-   boolean arith = false;
-   unsigned i;
-
-   for (i = 0; i < format_desc->nr_channels; ++i) {
-      arith |= format_desc->channel[i].size != format_desc->channel[0].size;
-      arith |= (format_desc->channel[i].size % 8) != 0;
-   }
-
-   return arith;
-}
-
-
-/**
- * Checks if this format requires special handling due to required expansion
- * to floats for blending, and furthermore has "natural" packed AoS -> unpacked
- * SoA conversion.
- */
-static inline boolean
-format_expands_to_float_soa(const struct util_format_description *format_desc)
-{
-   if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
-       format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
-      return true;
-   }
-   return false;
-}
-
-
-/**
- * Retrieves the type representing the memory layout for a format
- *
- * e.g. RGBA16F = 4x half-float and R3G3B2 = 1x byte
- */
-static inline void
-lp_mem_type_from_format_desc(const struct util_format_description *format_desc,
-                             struct lp_type* type)
-{
-   unsigned i;
-   unsigned chan;
-
-   if (format_expands_to_float_soa(format_desc)) {
-      /* just make this a uint with width of block */
-      type->floating = false;
-      type->fixed = false;
-      type->sign = false;
-      type->norm = false;
-      type->width = format_desc->block.bits;
-      type->length = 1;
-      return;
-   }
-
-   for (i = 0; i < 4; i++)
-      if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
-         break;
-   chan = i;
-
-   memset(type, 0, sizeof(struct lp_type));
-   type->floating = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT;
-   type->fixed    = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FIXED;
-   type->sign     = format_desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED;
-   type->norm     = format_desc->channel[chan].normalized;
-
-   if (is_arithmetic_format(format_desc)) {
-      type->width = 0;
-      type->length = 1;
-
-      for (i = 0; i < format_desc->nr_channels; ++i) {
-         type->width += format_desc->channel[i].size;
-      }
-   } else {
-      type->width = format_desc->channel[chan].size;
-      type->length = format_desc->nr_channels;
-   }
-}
-
 
 /**
  * Retrieves the type for a format which is usable in the blending code.
@@ -1590,6 +2080,7 @@ convert_from_blend_type(struct gallivm_state *gallivm,
       for (j = 0; j < src_fmt->nr_channels; ++j) {
          unsigned mask = 0;
          unsigned sa = src_fmt->channel[j].shift;
+         unsigned sz_a = src_fmt->channel[j].size;
 #if UTIL_ARCH_LITTLE_ENDIAN
          unsigned from_lsb = j;
 #else
@@ -1618,6 +2109,10 @@ convert_from_blend_type(struct gallivm_state *gallivm,
          if (src_type.norm) {
             chans[j] = scale_bits(gallivm, blend_type.width,
                                   src_fmt->channel[j].size, chans[j], src_type);
+         } else if (!src_type.floating && sz_a < blend_type.width) {
+            LLVMValueRef mask_val = lp_build_const_int_vec(gallivm, src_type, (1UL << sz_a) - 1);
+            LLVMValueRef mask = LLVMBuildICmp(builder, LLVMIntUGT, chans[j], mask_val, "");
+            chans[j] = LLVMBuildSelect(builder, mask, mask_val, chans[j], "");
          }
 
          /* Insert bits */
@@ -1868,7 +2363,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
          continue;
       }
 
-      /* Ensure we havn't already found all channels */
+      /* Ensure we haven't already found all channels */
       if (dst_channels >= out_format_desc->nr_channels) {
          continue;
       }
@@ -2294,7 +2789,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
 
    if (is_1d) {
       load_unswizzled_block(gallivm, color_ptr, stride, block_width, 1,
-                            dst, ls_type, dst_count / 4, dst_alignment);
+                            dst, ls_type, dst_count / 4, dst_alignment, NULL, NULL, false);
       for (i = dst_count / 4; i < dst_count; i++) {
          dst[i] = lp_build_undef(gallivm, ls_type);
       }
@@ -2302,7 +2797,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
    }
    else {
       load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height,
-                            dst, ls_type, dst_count, dst_alignment);
+                            dst, ls_type, dst_count, dst_alignment, NULL, NULL, false);
    }
 
 
@@ -2442,7 +2937,7 @@ generate_fragment(struct llvmpipe_context *lp,
    struct lp_type blend_type;
    LLVMTypeRef fs_elem_type;
    LLVMTypeRef blend_vec_type;
-   LLVMTypeRef arg_types[13];
+   LLVMTypeRef arg_types[15];
    LLVMTypeRef func_type;
    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
    LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
@@ -2454,8 +2949,10 @@ generate_fragment(struct llvmpipe_context *lp,
    LLVMValueRef dady_ptr;
    LLVMValueRef color_ptr_ptr;
    LLVMValueRef stride_ptr;
+   LLVMValueRef color_sample_stride_ptr;
    LLVMValueRef depth_ptr;
    LLVMValueRef depth_stride;
+   LLVMValueRef depth_sample_stride;
    LLVMValueRef mask_input;
    LLVMValueRef thread_data_ptr;
    LLVMBasicBlockRef block;
@@ -2463,8 +2960,8 @@ generate_fragment(struct llvmpipe_context *lp,
    struct lp_build_sampler_soa *sampler;
    struct lp_build_image_soa *image;
    struct lp_build_interp_soa_context interp;
-   LLVMValueRef fs_mask[16 / 4];
-   LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4];
+   LLVMValueRef fs_mask[(16 / 4) * LP_MAX_SAMPLES];
+   LLVMValueRef fs_out_color[LP_MAX_SAMPLES][PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4];
    LLVMValueRef function;
    LLVMValueRef facing;
    unsigned num_fs;
@@ -2519,8 +3016,8 @@ generate_fragment(struct llvmpipe_context *lp,
 
    blend_vec_type = lp_build_vec_type(gallivm, blend_type);
 
-   snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s",
-            shader->no, variant->no, partial_mask ? "partial" : "whole");
+   snprintf(func_name, sizeof(func_name), "fs_variant_%s",
+            partial_mask ? "partial" : "whole");
 
    arg_types[0] = variant->jit_context_ptr_type;       /* context */
    arg_types[1] = int32_type;                          /* x */
@@ -2529,12 +3026,14 @@ generate_fragment(struct llvmpipe_context *lp,
    arg_types[4] = LLVMPointerType(fs_elem_type, 0);    /* a0 */
    arg_types[5] = LLVMPointerType(fs_elem_type, 0);    /* dadx */
    arg_types[6] = LLVMPointerType(fs_elem_type, 0);    /* dady */
-   arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0);  /* color */
+   arg_types[7] = LLVMPointerType(LLVMPointerType(int8_type, 0), 0);  /* color */
    arg_types[8] = LLVMPointerType(int8_type, 0);       /* depth */
-   arg_types[9] = int32_type;                          /* mask_input */
+   arg_types[9] = LLVMInt64TypeInContext(gallivm->context);  /* mask_input */
    arg_types[10] = variant->jit_thread_data_ptr_type;  /* per thread data */
    arg_types[11] = LLVMPointerType(int32_type, 0);     /* stride */
    arg_types[12] = int32_type;                         /* depth_stride */
+   arg_types[13] = LLVMPointerType(int32_type, 0);     /* color sample strides */
+   arg_types[14] = int32_type;                         /* depth sample stride */
 
    func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
                                 arg_types, ARRAY_SIZE(arg_types), 0);
@@ -2551,6 +3050,9 @@ generate_fragment(struct llvmpipe_context *lp,
       if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
          lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
 
+   if (variant->gallivm->cache->data_size)
+      return;
+
    context_ptr  = LLVMGetParam(function, 0);
    x            = LLVMGetParam(function, 1);
    y            = LLVMGetParam(function, 2);
@@ -2564,6 +3066,8 @@ generate_fragment(struct llvmpipe_context *lp,
    thread_data_ptr  = LLVMGetParam(function, 10);
    stride_ptr   = LLVMGetParam(function, 11);
    depth_stride = LLVMGetParam(function, 12);
+   color_sample_stride_ptr = LLVMGetParam(function, 13);
+   depth_sample_stride = LLVMGetParam(function, 14);
 
    lp_build_name(context_ptr, "context");
    lp_build_name(x, "x");
@@ -2577,6 +3081,8 @@ generate_fragment(struct llvmpipe_context *lp,
    lp_build_name(thread_data_ptr, "thread_data");
    lp_build_name(stride_ptr, "stride_ptr");
    lp_build_name(depth_stride, "depth_stride");
+   lp_build_name(color_sample_stride_ptr, "color_sample_stride_ptr");
+   lp_build_name(depth_sample_stride, "depth_sample_stride");
 
    /*
     * Function body
@@ -2607,8 +3113,8 @@ generate_fragment(struct llvmpipe_context *lp,
    }
 
    /* code generated texture sampling */
-   sampler = lp_llvm_sampler_soa_create(key->samplers);
-   image = lp_llvm_image_soa_create(lp_fs_variant_key_images(key));
+   sampler = lp_llvm_sampler_soa_create(key->samplers, key->nr_samplers);
+   image = lp_llvm_image_soa_create(lp_fs_variant_key_images(key), key->nr_images);
 
    num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */
    /* for 1d resources only run "upper half" of stamp */
@@ -2618,8 +3124,29 @@ generate_fragment(struct llvmpipe_context *lp,
    {
       LLVMValueRef num_loop = lp_build_const_int32(gallivm, num_fs);
       LLVMTypeRef mask_type = lp_build_int_vec_type(gallivm, fs_type);
+      LLVMValueRef num_loop_samp = lp_build_const_int32(gallivm, num_fs * key->coverage_samples);
       LLVMValueRef mask_store = lp_build_array_alloca(gallivm, mask_type,
-                                                      num_loop, "mask_store");
+                                                      num_loop_samp, "mask_store");
+
+      LLVMTypeRef flt_type = LLVMFloatTypeInContext(gallivm->context);
+      LLVMValueRef glob_sample_pos = LLVMAddGlobal(gallivm->module, LLVMArrayType(flt_type, key->coverage_samples * 2), "");
+      LLVMValueRef sample_pos_array;
+
+      if (key->multisample && key->coverage_samples == 4) {
+         LLVMValueRef sample_pos_arr[8];
+         for (unsigned i = 0; i < 4; i++) {
+            sample_pos_arr[i * 2] = LLVMConstReal(flt_type, lp_sample_pos_4x[i][0]);
+            sample_pos_arr[i * 2 + 1] = LLVMConstReal(flt_type, lp_sample_pos_4x[i][1]);
+         }
+         sample_pos_array = LLVMConstArray(LLVMFloatTypeInContext(gallivm->context), sample_pos_arr, 8);
+      } else {
+         LLVMValueRef sample_pos_arr[2];
+         sample_pos_arr[0] = LLVMConstReal(flt_type, 0.5);
+         sample_pos_arr[1] = LLVMConstReal(flt_type, 0.5);
+         sample_pos_array = LLVMConstArray(LLVMFloatTypeInContext(gallivm->context), sample_pos_arr, 2);
+      }
+      LLVMSetInitializer(glob_sample_pos, sample_pos_array);
+
       LLVMValueRef color_store[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS];
       boolean pixel_center_integer =
          shader->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER];
@@ -2634,25 +3161,53 @@ generate_fragment(struct llvmpipe_context *lp,
                                shader->info.base.num_inputs,
                                inputs,
                                pixel_center_integer,
+                               key->coverage_samples, glob_sample_pos,
+                               num_loop,
                                key->depth_clamp,
                                builder, fs_type,
                                a0_ptr, dadx_ptr, dady_ptr,
                                x, y);
 
       for (i = 0; i < num_fs; i++) {
-         LLVMValueRef mask;
-         LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
-         LLVMValueRef mask_ptr = LLVMBuildGEP(builder, mask_store,
-                                              &indexi, 1, "mask_ptr");
-
-         if (partial_mask) {
-            mask = generate_quad_mask(gallivm, fs_type,
-                                      i*fs_type.length/4, mask_input);
-         }
-         else {
-            mask = lp_build_const_int_vec(gallivm, fs_type, ~0);
+         if (key->multisample) {
+            LLVMValueRef smask_val = LLVMBuildLoad(builder, lp_jit_context_sample_mask(gallivm, context_ptr), "");
+
+            /*
+             * For multisampling, extract the per-sample mask from the incoming 64-bit mask,
+             * store to the per sample mask storage. Or all of them together to generate
+             * the fragment shader mask. (sample shading TODO).
+             * Take the incoming state coverage mask into account.
+             */
+            for (unsigned s = 0; s < key->coverage_samples; s++) {
+               LLVMValueRef sindexi = lp_build_const_int32(gallivm, i + (s * num_fs));
+               LLVMValueRef sample_mask_ptr = LLVMBuildGEP(builder, mask_store,
+                                                           &sindexi, 1, "sample_mask_ptr");
+               LLVMValueRef s_mask = generate_quad_mask(gallivm, fs_type,
+                                                        i*fs_type.length/4, s, mask_input);
+
+               LLVMValueRef smask_bit = LLVMBuildAnd(builder, smask_val, lp_build_const_int32(gallivm, (1 << s)), "");
+               LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntNE, smask_bit, lp_build_const_int32(gallivm, 0), "");
+               smask_bit = LLVMBuildSExt(builder, cmp, int32_type, "");
+               smask_bit = lp_build_broadcast(gallivm, mask_type, smask_bit);
+
+               s_mask = LLVMBuildAnd(builder, s_mask, smask_bit, "");
+               LLVMBuildStore(builder, s_mask, sample_mask_ptr);
+            }
+         } else {
+            LLVMValueRef mask;
+            LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
+            LLVMValueRef mask_ptr = LLVMBuildGEP(builder, mask_store,
+                                                 &indexi, 1, "mask_ptr");
+
+            if (partial_mask) {
+               mask = generate_quad_mask(gallivm, fs_type,
+                                         i*fs_type.length/4, 0, mask_input);
+            }
+            else {
+               mask = lp_build_const_int_vec(gallivm, fs_type, ~0);
+            }
+            LLVMBuildStore(builder, mask, mask_ptr);
          }
-         LLVMBuildStore(builder, mask, mask_ptr);
       }
 
       generate_fs_loop(gallivm,
@@ -2660,6 +3215,7 @@ generate_fragment(struct llvmpipe_context *lp,
                        builder,
                        fs_type,
                        context_ptr,
+                       glob_sample_pos,
                        num_loop,
                        &interp,
                        sampler,
@@ -2668,30 +3224,43 @@ generate_fragment(struct llvmpipe_context *lp,
                        color_store,
                        depth_ptr,
                        depth_stride,
+                       depth_sample_stride,
+                       color_ptr_ptr,
+                       stride_ptr,
+                       color_sample_stride_ptr,
                        facing,
                        thread_data_ptr);
 
       for (i = 0; i < num_fs; i++) {
-         LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
-         LLVMValueRef ptr = LLVMBuildGEP(builder, mask_store,
-                                         &indexi, 1, "");
-         fs_mask[i] = LLVMBuildLoad(builder, ptr, "mask");
-         /* This is fucked up need to reorganize things */
-         for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
-            for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
-               ptr = LLVMBuildGEP(builder,
-                                  color_store[cbuf * !cbuf0_write_all][chan],
-                                  &indexi, 1, "");
-               fs_out_color[cbuf][chan][i] = ptr;
-            }
+         LLVMValueRef ptr;
+         for (unsigned s = 0; s < key->coverage_samples; s++) {
+            int idx = (i + (s * num_fs));
+            LLVMValueRef sindexi = lp_build_const_int32(gallivm, idx);
+            ptr = LLVMBuildGEP(builder, mask_store, &sindexi, 1, "");
+
+            fs_mask[idx] = LLVMBuildLoad(builder, ptr, "smask");
          }
-         if (dual_source_blend) {
-            /* only support one dual source blend target hence always use output 1 */
-            for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
-               ptr = LLVMBuildGEP(builder,
-                                  color_store[1][chan],
-                                  &indexi, 1, "");
-               fs_out_color[1][chan][i] = ptr;
+
+         for (unsigned s = 0; s < key->min_samples; s++) {
+            /* This is fucked up need to reorganize things */
+            int idx = s * num_fs + i;
+            LLVMValueRef sindexi = lp_build_const_int32(gallivm, idx);
+            for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
+               for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+                  ptr = LLVMBuildGEP(builder,
+                                     color_store[cbuf * !cbuf0_write_all][chan],
+                                     &sindexi, 1, "");
+                  fs_out_color[s][cbuf][chan][i] = ptr;
+               }
+            }
+            if (dual_source_blend) {
+               /* only support one dual source blend target hence always use output 1 */
+               for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+                  ptr = LLVMBuildGEP(builder,
+                                     color_store[1][chan],
+                                     &sindexi, 1, "");
+                  fs_out_color[s][1][chan][i] = ptr;
+               }
             }
          }
       }
@@ -2705,6 +3274,7 @@ generate_fragment(struct llvmpipe_context *lp,
       if (key->cbuf_format[cbuf] != PIPE_FORMAT_NONE) {
          LLVMValueRef color_ptr;
          LLVMValueRef stride;
+         LLVMValueRef sample_stride = NULL;
          LLVMValueRef index = lp_build_const_int32(gallivm, cbuf);
 
          boolean do_branch = ((key->depth.enabled
@@ -2717,17 +3287,34 @@ generate_fragment(struct llvmpipe_context *lp,
                                                 &index, 1, ""),
                                    "");
 
-         lp_build_name(color_ptr, "color_ptr%d", cbuf);
-
          stride = LLVMBuildLoad(builder,
                                 LLVMBuildGEP(builder, stride_ptr, &index, 1, ""),
                                 "");
 
-         generate_unswizzled_blend(gallivm, cbuf, variant,
-                                   key->cbuf_format[cbuf],
-                                   num_fs, fs_type, fs_mask, fs_out_color,
-                                   context_ptr, color_ptr, stride,
-                                   partial_mask, do_branch);
+         if (key->cbuf_nr_samples[cbuf] > 1)
+            sample_stride = LLVMBuildLoad(builder,
+                                          LLVMBuildGEP(builder, color_sample_stride_ptr,
+                                                       &index, 1, ""), "");
+
+         for (unsigned s = 0; s < key->cbuf_nr_samples[cbuf]; s++) {
+            unsigned mask_idx = num_fs * (key->multisample ? s : 0);
+            unsigned out_idx = key->min_samples == 1 ? 0 : s;
+            LLVMValueRef out_ptr = color_ptr;;
+
+            if (sample_stride) {
+               LLVMValueRef sample_offset = LLVMBuildMul(builder, sample_stride, lp_build_const_int32(gallivm, s), "");
+               out_ptr = LLVMBuildGEP(builder, out_ptr, &sample_offset, 1, "");
+            }
+            out_ptr = LLVMBuildBitCast(builder, out_ptr, LLVMPointerType(blend_vec_type, 0), "");
+
+            lp_build_name(out_ptr, "color_ptr%d", cbuf);
+
+            generate_unswizzled_blend(gallivm, cbuf, variant,
+                                      key->cbuf_format[cbuf],
+                                      num_fs, fs_type, &fs_mask[mask_idx], fs_out_color[out_idx],
+                                      context_ptr, out_ptr, stride,
+                                      partial_mask, do_branch);
+         }
       }
    }
 
@@ -2747,11 +3334,18 @@ dump_fs_variant_key(struct lp_fragment_shader_variant_key *key)
    if (key->flatshade) {
       debug_printf("flatshade = 1\n");
    }
+   if (key->multisample) {
+      debug_printf("multisample = 1\n");
+      debug_printf("coverage samples = %d\n", key->coverage_samples);
+      debug_printf("min samples = %d\n", key->min_samples);
+   }
    for (i = 0; i < key->nr_cbufs; ++i) {
       debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i]));
+      debug_printf("cbuf nr_samples[%u] = %d\n", i, key->cbuf_nr_samples[i]);
    }
    if (key->depth.enabled || key->stencil[0].enabled) {
       debug_printf("depth.format = %s\n", util_format_name(key->zsbuf_format));
+      debug_printf("depth nr_samples = %d\n", key->zsbuf_nr_samples);
    }
    if (key->depth.enabled) {
       debug_printf("depth.func = %s\n", util_str_func(key->depth.func, TRUE));
@@ -2812,6 +3406,7 @@ dump_fs_variant_key(struct lp_fragment_shader_variant_key *key)
       debug_printf("  .lod_bias_non_zero = %u\n", sampler->lod_bias_non_zero);
       debug_printf("  .apply_min_lod = %u\n", sampler->apply_min_lod);
       debug_printf("  .apply_max_lod = %u\n", sampler->apply_max_lod);
+      debug_printf("  .reduction_mode = %u\n", sampler->reduction_mode);
    }
    for (i = 0; i < key->nr_sampler_views; ++i) {
       const struct lp_static_texture_state *texture = &key->samplers[i].texture_state;
@@ -2859,6 +3454,27 @@ lp_debug_fs_variant(struct lp_fragment_shader_variant *variant)
    debug_printf("\n");
 }
 
+static void
+lp_fs_get_ir_cache_key(struct lp_fragment_shader_variant *variant,
+                            unsigned char ir_sha1_cache_key[20])
+{
+   struct blob blob = { 0 };
+   unsigned ir_size;
+   void *ir_binary;
+
+   blob_init(&blob);
+   nir_serialize(&blob, variant->shader->base.ir.nir, true);
+   ir_binary = blob.data;
+   ir_size = blob.size;
+
+   struct mesa_sha1 ctx;
+   _mesa_sha1_init(&ctx);
+   _mesa_sha1_update(&ctx, &variant->key, variant->shader->variant_key_size);
+   _mesa_sha1_update(&ctx, ir_binary, ir_size);
+   _mesa_sha1_final(&ctx, ir_sha1_cache_key);
+
+   blob_finish(&blob);
+}
 
 /**
  * Generate a new fragment shader variant from the shader code and
@@ -2869,11 +3485,14 @@ generate_variant(struct llvmpipe_context *lp,
                  struct lp_fragment_shader *shader,
                  const struct lp_fragment_shader_variant_key *key)
 {
+   struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
    struct lp_fragment_shader_variant *variant;
    const struct util_format_description *cbuf0_format_desc = NULL;
    boolean fullcolormask;
    char module_name[64];
-
+   unsigned char ir_sha1_cache_key[20];
+   struct lp_cached_code cached = { 0 };
+   bool needs_caching = false;
    variant = MALLOC(sizeof *variant + shader->variant_key_size - sizeof variant->key);
    if (!variant)
       return NULL;
@@ -2882,18 +3501,29 @@ generate_variant(struct llvmpipe_context *lp,
    snprintf(module_name, sizeof(module_name), "fs%u_variant%u",
             shader->no, shader->variants_created);
 
-   variant->gallivm = gallivm_create(module_name, lp->context);
+   pipe_reference_init(&variant->reference, 1);
+   lp_fs_reference(lp, &variant->shader, shader);
+
+   memcpy(&variant->key, key, shader->variant_key_size);
+
+   if (shader->base.ir.nir) {
+      lp_fs_get_ir_cache_key(variant, ir_sha1_cache_key);
+
+      lp_disk_cache_find_shader(screen, &cached, ir_sha1_cache_key);
+      if (!cached.data_size)
+         needs_caching = true;
+   }
+   variant->gallivm = gallivm_create(module_name, lp->context, &cached);
    if (!variant->gallivm) {
       FREE(variant);
       return NULL;
    }
 
-   variant->shader = shader;
    variant->list_item_global.base = variant;
    variant->list_item_local.base = variant;
    variant->no = shader->variants_created++;
 
-   memcpy(&variant->key, key, shader->variant_key_size);
+
 
    /*
     * Determine whether we are touching all channels in the color buffer.
@@ -2910,6 +3540,7 @@ generate_variant(struct llvmpipe_context *lp,
          fullcolormask &&
          !key->stencil[0].enabled &&
          !key->alpha.enabled &&
+         !key->multisample &&
          !key->blend.alpha_to_coverage &&
          !key->depth.enabled &&
          !shader->info.base.uses_kill &&
@@ -2954,6 +3585,10 @@ generate_variant(struct llvmpipe_context *lp,
       variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST];
    }
 
+   if (needs_caching) {
+      lp_disk_cache_insert_shader(screen, &cached, ir_sha1_cache_key);
+   }
+
    gallivm_free_ir(variant->gallivm);
 
    return variant;
@@ -2975,6 +3610,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
    if (!shader)
       return NULL;
 
+   pipe_reference_init(&shader->reference, 1);
    shader->no = fs_no++;
    make_empty_list(&shader->variants);
 
@@ -3005,6 +3641,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
    for (i = 0; i < shader->info.base.num_inputs; i++) {
       shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i];
       shader->inputs[i].cyl_wrap = shader->info.base.input_cylindrical_wrap[i];
+      shader->inputs[i].location = shader->info.base.input_interpolate_loc[i];
 
       switch (shader->info.base.input_interpolate[i]) {
       case TGSI_INTERPOLATE_CONSTANT:
@@ -3066,15 +3703,17 @@ static void
 llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
-
-   if (llvmpipe->fs == fs)
+   struct lp_fragment_shader *lp_fs = (struct lp_fragment_shader *)fs;
+   if (llvmpipe->fs == lp_fs)
       return;
 
-   llvmpipe->fs = (struct lp_fragment_shader *) fs;
-
    draw_bind_fragment_shader(llvmpipe->draw,
-                             (llvmpipe->fs ? llvmpipe->fs->draw_data : NULL));
+                             (lp_fs ? lp_fs->draw_data : NULL));
 
+   lp_fs_reference(llvmpipe, &llvmpipe->fs, lp_fs);
+
+   /* invalidate the setup link, NEW_FS will make it update */
+   lp_setup_set_fs_variant(llvmpipe->setup, NULL);
    llvmpipe->dirty |= LP_NEW_FS;
 }
 
@@ -3083,9 +3722,10 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
  * Remove shader variant from two lists: the shader's variant list
  * and the context's variant list.
  */
-static void
-llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
-                               struct lp_fragment_shader_variant *variant)
+
+static
+void llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
+                                    struct lp_fragment_shader_variant *variant)
 {
    if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
       debug_printf("llvmpipe: del fs #%u var %u v created %u v cached %u "
@@ -3096,8 +3736,6 @@ llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
                    lp->nr_fs_variants, variant->nr_instrs, lp->nr_fs_instrs);
    }
 
-   gallivm_destroy(variant->gallivm);
-
    /* remove from shader's list */
    remove_from_list(&variant->list_item_local);
    variant->shader->variants_cached--;
@@ -3106,10 +3744,32 @@ llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
    remove_from_list(&variant->list_item_global);
    lp->nr_fs_variants--;
    lp->nr_fs_instrs -= variant->nr_instrs;
+}
+
+void
+llvmpipe_destroy_shader_variant(struct llvmpipe_context *lp,
+                               struct lp_fragment_shader_variant *variant)
+{
+   gallivm_destroy(variant->gallivm);
+
+   lp_fs_reference(lp, &variant->shader, NULL);
 
    FREE(variant);
 }
 
+void
+llvmpipe_destroy_fs(struct llvmpipe_context *llvmpipe,
+                    struct lp_fragment_shader *shader)
+{
+   /* Delete draw module's data */
+   draw_delete_fragment_shader(llvmpipe->draw, shader->draw_data);
+
+   if (shader->base.ir.nir)
+      ralloc_free(shader->base.ir.nir);
+   assert(shader->variants_cached == 0);
+   FREE((void *) shader->base.tokens);
+   FREE(shader);
+}
 
 static void
 llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
@@ -3118,38 +3778,24 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
    struct lp_fragment_shader *shader = fs;
    struct lp_fs_variant_list_item *li;
 
-   assert(fs != llvmpipe->fs);
-
-   /*
-    * XXX: we need to flush the context until we have some sort of reference
-    * counting in fragment shaders as they may still be binned
-    * Flushing alone might not sufficient we need to wait on it too.
-    */
-   llvmpipe_finish(pipe, __FUNCTION__);
-
    /* Delete all the variants */
    li = first_elem(&shader->variants);
    while(!at_end(&shader->variants, li)) {
       struct lp_fs_variant_list_item *next = next_elem(li);
+      struct lp_fragment_shader_variant *variant;
+      variant = li->base;
       llvmpipe_remove_shader_variant(llvmpipe, li->base);
+      lp_fs_variant_reference(llvmpipe, &variant, NULL);
       li = next;
    }
 
-   /* Delete draw module's data */
-   draw_delete_fragment_shader(llvmpipe->draw, shader->draw_data);
-
-   if (shader->base.ir.nir)
-      ralloc_free(shader->base.ir.nir);
-   assert(shader->variants_cached == 0);
-   FREE((void *) shader->base.tokens);
-   FREE(shader);
+   lp_fs_reference(llvmpipe, &shader, NULL);
 }
 
-
-
 static void
 llvmpipe_set_constant_buffer(struct pipe_context *pipe,
                              enum pipe_shader_type shader, uint index,
+                             bool take_ownership,
                              const struct pipe_constant_buffer *cb)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
@@ -3159,7 +3805,8 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
    assert(index < ARRAY_SIZE(llvmpipe->constants[shader]));
 
    /* note: reference counting */
-   util_copy_constant_buffer(&llvmpipe->constants[shader][index], cb);
+   util_copy_constant_buffer(&llvmpipe->constants[shader][index], cb,
+                             take_ownership);
 
    if (constants) {
        if (!(constants->bind & PIPE_BIND_CONSTANT_BUFFER)) {
@@ -3169,7 +3816,9 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
    }
 
    if (shader == PIPE_SHADER_VERTEX ||
-       shader == PIPE_SHADER_GEOMETRY) {
+       shader == PIPE_SHADER_GEOMETRY ||
+       shader == PIPE_SHADER_TESS_CTRL ||
+       shader == PIPE_SHADER_TESS_EVAL) {
       /* Pass the constants to the 'draw' module */
       const unsigned size = cb ? cb->buffer_size : 0;
       const ubyte *data;
@@ -3214,7 +3863,9 @@ llvmpipe_set_shader_buffers(struct pipe_context *pipe,
       util_copy_shader_buffer(&llvmpipe->ssbos[shader][i], buffer);
 
       if (shader == PIPE_SHADER_VERTEX ||
-          shader == PIPE_SHADER_GEOMETRY) {
+          shader == PIPE_SHADER_GEOMETRY ||
+          shader == PIPE_SHADER_TESS_CTRL ||
+          shader == PIPE_SHADER_TESS_EVAL) {
          const unsigned size = buffer ? buffer->buffer_size : 0;
          const ubyte *data = NULL;
          if (buffer && buffer->buffer)
@@ -3234,7 +3885,8 @@ llvmpipe_set_shader_buffers(struct pipe_context *pipe,
 static void
 llvmpipe_set_shader_images(struct pipe_context *pipe,
                             enum pipe_shader_type shader, unsigned start_slot,
-                           unsigned count, const struct pipe_image_view *images)
+                           unsigned count, unsigned unbind_num_trailing_slots,
+                           const struct pipe_image_view *images)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
    unsigned i, idx;
@@ -3248,7 +3900,9 @@ llvmpipe_set_shader_images(struct pipe_context *pipe,
 
    llvmpipe->num_images[shader] = start_slot + count;
    if (shader == PIPE_SHADER_VERTEX ||
-       shader == PIPE_SHADER_GEOMETRY) {
+       shader == PIPE_SHADER_GEOMETRY ||
+       shader == PIPE_SHADER_TESS_CTRL ||
+       shader == PIPE_SHADER_TESS_EVAL) {
       draw_set_images(llvmpipe->draw,
                       shader,
                       llvmpipe->images[shader],
@@ -3257,6 +3911,11 @@ llvmpipe_set_shader_images(struct pipe_context *pipe,
       llvmpipe->cs_dirty |= LP_CSNEW_IMAGES;
    else
       llvmpipe->dirty |= LP_NEW_FS_IMAGES;
+
+   if (unbind_num_trailing_slots) {
+      llvmpipe_set_shader_images(pipe, shader, start_slot + count,
+                                 unbind_num_trailing_slots, 0, NULL);
+   }
 }
 
 /**
@@ -3305,10 +3964,12 @@ make_variant_key(struct llvmpipe_context *lp,
       const struct util_format_description *zsbuf_desc =
          util_format_description(zsbuf_format);
 
-      if (lp->depth_stencil->depth.enabled &&
+      if (lp->depth_stencil->depth_enabled &&
           util_format_has_depth(zsbuf_desc)) {
          key->zsbuf_format = zsbuf_format;
-         memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth);
+         key->depth.enabled = lp->depth_stencil->depth_enabled;
+         key->depth.writemask = lp->depth_stencil->depth_writemask;
+         key->depth.func = lp->depth_stencil->depth_func;
       }
       if (lp->depth_stencil->stencil[0].enabled &&
           util_format_has_stencil(zsbuf_desc)) {
@@ -3318,46 +3979,41 @@ make_variant_key(struct llvmpipe_context *lp,
       if (llvmpipe_resource_is_1d(lp->framebuffer.zsbuf->texture)) {
          key->resource_1d = TRUE;
       }
+      key->zsbuf_nr_samples = util_res_sample_count(lp->framebuffer.zsbuf->texture);
    }
 
    /*
     * Propagate the depth clamp setting from the rasterizer state.
     * depth_clip == 0 implies depth clamping is enabled.
     *
-    * When clip_halfz is enabled, then always clamp the depth values.
-    *
-    * XXX: This is incorrect for GL, but correct for d3d10 (depth
-    * clamp is always active in d3d10, regardless if depth clip is
-    * enabled or not).
-    * (GL has an always-on [0,1] clamp on fs depth output instead
-    * to ensure the depth values stay in range. Doesn't look like
-    * we do that, though...)
     */
-   if (lp->rasterizer->clip_halfz) {
-      key->depth_clamp = 1;
-   } else {
-      key->depth_clamp = (lp->rasterizer->depth_clip_near == 0) ? 1 : 0;
-   }
+   key->depth_clamp = (lp->rasterizer->depth_clip_near == 0) ? 1 : 0;
 
    /* alpha test only applies if render buffer 0 is non-integer (or does not exist) */
    if (!lp->framebuffer.nr_cbufs ||
        !lp->framebuffer.cbufs[0] ||
        !util_format_is_pure_integer(lp->framebuffer.cbufs[0]->format)) {
-      key->alpha.enabled = lp->depth_stencil->alpha.enabled;
+      key->alpha.enabled = lp->depth_stencil->alpha_enabled;
    }
    if(key->alpha.enabled)
-      key->alpha.func = lp->depth_stencil->alpha.func;
+      key->alpha.func = lp->depth_stencil->alpha_func;
    /* alpha.ref_value is passed in jit_context */
 
    key->flatshade = lp->rasterizer->flatshade;
+   key->multisample = lp->rasterizer->multisample;
+   key->no_ms_sample_mask_out = lp->rasterizer->no_ms_sample_mask_out;
    if (lp->active_occlusion_queries && !lp->queries_disabled) {
       key->occlusion_count = TRUE;
    }
 
-   if (lp->framebuffer.nr_cbufs) {
-      memcpy(&key->blend, lp->blend, sizeof key->blend);
-   }
+   memcpy(&key->blend, lp->blend, sizeof key->blend);
 
+   key->coverage_samples = 1;
+   key->min_samples = 1;
+   if (key->multisample) {
+      key->coverage_samples = util_framebuffer_get_num_samples(&lp->framebuffer);
+      key->min_samples = lp->min_samples == 1 ? 1 : key->coverage_samples;
+   }
    key->nr_cbufs = lp->framebuffer.nr_cbufs;
 
    if (!key->blend.independent_blend_enable) {
@@ -3376,6 +4032,7 @@ make_variant_key(struct llvmpipe_context *lp,
          const struct util_format_description *format_desc;
 
          key->cbuf_format[i] = format;
+         key->cbuf_nr_samples[i] = util_res_sample_count(lp->framebuffer.cbufs[i]->texture);
 
          /*
           * Figure out if this is a 1d resource. Note that OpenGL allows crazy
@@ -3435,6 +4092,7 @@ make_variant_key(struct llvmpipe_context *lp,
       else {
          /* no color buffer for this fragment output */
          key->cbuf_format[i] = PIPE_FORMAT_NONE;
+         key->cbuf_nr_samples[i] = 0;
          blend_rt->colormask = 0x0;
          blend_rt->blend_enable = 0;
       }
@@ -3551,8 +4209,6 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
 
       if (variants_to_cull ||
           lp->nr_fs_instrs >= LP_MAX_SHADER_INSTRUCTIONS) {
-         struct pipe_context *pipe = &lp->pipe;
-
          if (gallivm_debug & GALLIVM_DEBUG_PERF) {
             debug_printf("Evicting FS: %u fs variants,\t%u total variants,"
                          "\t%u instrs,\t%u instrs/variant\n",
@@ -3562,13 +4218,6 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
          }
 
          /*
-          * XXX: we need to flush the context until we have some sort of
-          * reference counting in fragment shaders as they may still be binned
-          * Flushing alone might not be sufficient we need to wait on it too.
-          */
-         llvmpipe_finish(pipe, __FUNCTION__);
-
-         /*
           * We need to re-check lp->nr_fs_variants because an arbitrarliy large
           * number of shader variants (potentially all of them) could be
           * pending for destruction on flush.
@@ -3583,6 +4232,8 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
             assert(item);
             assert(item->base);
             llvmpipe_remove_shader_variant(lp, item->base);
+            struct lp_fragment_shader_variant *variant = item->base;
+            lp_fs_variant_reference(lp, &variant, NULL);
          }
       }
 
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index 0428b5cd2..613e5286a 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -38,7 +38,8 @@
 #include "lp_screen.h"
 #include "lp_state.h"
 #include "lp_debug.h"
-#include "state_tracker/sw_winsys.h"
+#include "frontend/sw_winsys.h"
+#include "lp_flush.h"
 
 
 static void *
@@ -81,7 +82,11 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe,
 
    /* set the new samplers */
    for (i = 0; i < num; i++) {
-      llvmpipe->samplers[shader][start + i] = samplers[i];
+      void *sampler = NULL;
+
+      if (samplers && samplers[i])
+	 sampler = samplers[i];
+      llvmpipe->samplers[shader][start + i] = sampler;
    }
 
    /* find highest non-null samplers[] entry */
@@ -92,7 +97,10 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe,
       llvmpipe->num_samplers[shader] = j;
    }
 
-   if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) {
+   if (shader == PIPE_SHADER_VERTEX ||
+       shader == PIPE_SHADER_GEOMETRY ||
+       shader == PIPE_SHADER_TESS_CTRL ||
+       shader == PIPE_SHADER_TESS_EVAL) {
       draw_set_samplers(llvmpipe->draw,
                         shader,
                         llvmpipe->samplers[shader],
@@ -111,6 +119,7 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe,
                            enum pipe_shader_type shader,
                            unsigned start,
                            unsigned num,
+                           unsigned unbind_num_trailing_slots,
                            struct pipe_sampler_view **views)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
@@ -125,17 +134,29 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe,
 
    /* set the new sampler views */
    for (i = 0; i < num; i++) {
+      struct pipe_sampler_view *view = NULL;
+
+      if (views && views[i])
+	 view = views[i];
       /*
        * Warn if someone tries to set a view created in a different context
        * (which is why we need the hack above in the first place).
        * An assert would be better but st/mesa relies on it...
        */
-      if (views[i] && views[i]->context != pipe) {
+      if (view && view->context != pipe) {
          debug_printf("Illegal setting of sampler_view %d created in another "
                       "context\n", i);
       }
+
+      if (view)
+         llvmpipe_flush_resource(pipe, view->texture, 0, true, false, false, "sampler_view");
       pipe_sampler_view_reference(&llvmpipe->sampler_views[shader][start + i],
-                                  views[i]);
+                                  view);
+   }
+
+   for (; i < num + unbind_num_trailing_slots; i++) {
+      pipe_sampler_view_reference(&llvmpipe->sampler_views[shader][start + i],
+                                  NULL);
    }
 
    /* find highest non-null sampler_views[] entry */
@@ -146,7 +167,10 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe,
       llvmpipe->num_sampler_views[shader] = j;
    }
 
-   if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) {
+   if (shader == PIPE_SHADER_VERTEX ||
+       shader == PIPE_SHADER_GEOMETRY ||
+       shader == PIPE_SHADER_TESS_CTRL ||
+       shader == PIPE_SHADER_TESS_EVAL) {
       draw_set_sampler_views(llvmpipe->draw,
                              shader,
                              llvmpipe->sampler_views[shader],
@@ -185,7 +209,7 @@ llvmpipe_create_sampler_view(struct pipe_context *pipe,
 #ifdef DEBUG
      /*
       * This is possibly too lenient, but the primary reason is just
-      * to catch state trackers which forget to initialize this, so
+      * to catch gallium frontends which forget to initialize this, so
       * it only catches clearly impossible view targets.
       */
       if (view->target != texture->target) {
@@ -262,6 +286,8 @@ prepare_shader_sampling(
          unsigned num_layers = tex->depth0;
          unsigned first_level = 0;
          unsigned last_level = 0;
+         unsigned sample_stride = 0;
+         unsigned num_samples = tex->nr_samples;
 
          if (!lp_tex->dt) {
             /* regular texture - setup array of mipmap level offsets */
@@ -275,6 +301,8 @@ prepare_shader_sampling(
                assert(last_level <= res->last_level);
                addr = lp_tex->tex_data;
 
+               sample_stride = lp_tex->sample_stride;
+
                for (j = first_level; j <= last_level; j++) {
                   mip_offsets[j] = lp_tex->mip_offsets[j];
                   row_stride[j] = lp_tex->row_stride[j];
@@ -319,7 +347,7 @@ prepare_shader_sampling(
             struct llvmpipe_screen *screen = llvmpipe_screen(tex->screen);
             struct sw_winsys *winsys = screen->winsys;
             addr = winsys->displaytarget_map(winsys, lp_tex->dt,
-                                                PIPE_TRANSFER_READ);
+                                                PIPE_MAP_READ);
             row_stride[0] = lp_tex->row_stride[0];
             img_stride[0] = lp_tex->img_stride[0];
             mip_offsets[0] = 0;
@@ -330,6 +358,7 @@ prepare_shader_sampling(
                                  i,
                                  width0, tex->height0, num_layers,
                                  first_level, last_level,
+                                 num_samples, sample_stride,
                                  addr,
                                  row_stride, img_stride, mip_offsets);
       }
@@ -360,6 +389,28 @@ llvmpipe_prepare_geometry_sampling(struct llvmpipe_context *lp,
    prepare_shader_sampling(lp, num, views, PIPE_SHADER_GEOMETRY);
 }
 
+/**
+ * Called whenever we're about to draw (no dirty flag, FIXME?).
+ */
+void
+llvmpipe_prepare_tess_ctrl_sampling(struct llvmpipe_context *lp,
+				    unsigned num,
+				    struct pipe_sampler_view **views)
+{
+   prepare_shader_sampling(lp, num, views, PIPE_SHADER_TESS_CTRL);
+}
+
+/**
+ * Called whenever we're about to draw (no dirty flag, FIXME?).
+ */
+void
+llvmpipe_prepare_tess_eval_sampling(struct llvmpipe_context *lp,
+				    unsigned num,
+				    struct pipe_sampler_view **views)
+{
+   prepare_shader_sampling(lp, num, views, PIPE_SHADER_TESS_EVAL);
+}
+
 static void
 prepare_shader_images(
    struct llvmpipe_context *lp,
@@ -371,6 +422,7 @@ prepare_shader_images(
    unsigned i;
    uint32_t row_stride;
    uint32_t img_stride;
+   uint32_t sample_stride;
    const void *addr;
 
    assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
@@ -389,6 +441,7 @@ prepare_shader_images(
          unsigned width = u_minify(img->width0, view->u.tex.level);
          unsigned height = u_minify(img->height0, view->u.tex.level);
          unsigned num_layers = img->depth0;
+         unsigned num_samples = img->nr_samples;
 
          if (!lp_img->dt) {
             /* regular texture - setup array of mipmap level offsets */
@@ -410,6 +463,7 @@ prepare_shader_images(
 
                row_stride = lp_img->row_stride[view->u.tex.level];
                img_stride = lp_img->img_stride[view->u.tex.level];
+               sample_stride = lp_img->sample_stride;
                addr = (uint8_t *)addr + mip_offset;
             }
             else {
@@ -418,6 +472,7 @@ prepare_shader_images(
                /* probably don't really need to fill that out */
                row_stride = 0;
                img_stride = 0;
+               sample_stride = 0;
 
                /* everything specified in number of elements here. */
                width = view->u.buf.size / view_blocksize;
@@ -433,9 +488,10 @@ prepare_shader_images(
             struct llvmpipe_screen *screen = llvmpipe_screen(img->screen);
             struct sw_winsys *winsys = screen->winsys;
             addr = winsys->displaytarget_map(winsys, lp_img->dt,
-                                                PIPE_TRANSFER_READ);
+                                                PIPE_MAP_READ);
             row_stride = lp_img->row_stride[0];
             img_stride = lp_img->img_stride[0];
+            sample_stride = 0;
             assert(addr);
          }
          draw_set_mapped_image(lp->draw,
@@ -443,7 +499,8 @@ prepare_shader_images(
                                i,
                                width, height, num_layers,
                                addr,
-                               row_stride, img_stride);
+                               row_stride, img_stride,
+                               num_samples, sample_stride);
       }
    }
 }
@@ -472,6 +529,28 @@ llvmpipe_prepare_geometry_images(struct llvmpipe_context *lp,
    prepare_shader_images(lp, num, views, PIPE_SHADER_GEOMETRY);
 }
 
+/**
+ * Called whenever we're about to draw (no dirty flag, FIXME?).
+ */
+void
+llvmpipe_prepare_tess_ctrl_images(struct llvmpipe_context *lp,
+                                  unsigned num,
+                                  struct pipe_image_view *views)
+{
+   prepare_shader_images(lp, num, views, PIPE_SHADER_TESS_CTRL);
+}
+
+/**
+ * Called whenever we're about to draw (no dirty flag, FIXME?).
+ */
+void
+llvmpipe_prepare_tess_eval_images(struct llvmpipe_context *lp,
+                                  unsigned num,
+                                  struct pipe_image_view *views)
+{
+   prepare_shader_images(lp, num, views, PIPE_SHADER_TESS_EVAL);
+}
+
 void
 llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe)
 {
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c
index 915e21db5..2bc94d5d4 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -481,7 +481,7 @@ apply_perspective_corr( struct gallivm_state *gallivm,
 
 
 /**
- * Applys cylindrical wrapping to vertex attributes if enabled.
+ * Apply cylindrical wrapping to vertex attributes if enabled.
  * Input coordinates must be in [0, 1] range, otherwise results are undefined.
  *
  * @param cyl_wrap  TGSI_CYLINDRICAL_WRAP_x flags
@@ -652,7 +652,7 @@ init_args(struct gallivm_state *gallivm,
    load_attribute(gallivm, args, key, 0, attr_pos);
 
    pixel_center = lp_build_const_vec(gallivm, typef4,
-                                     key->pixel_center_half ? 0.5 : 0.0);
+                                     (!key->multisample && key->pixel_center_half) ? 0.5 : 0.0);
 
    /*
     * xy are first two elems in v0a/v1a/v2a but just use vec4 arit
@@ -730,7 +730,7 @@ generate_setup_variant(struct lp_setup_variant_key *key,
    snprintf(func_name, sizeof(func_name), "setup_variant_%u",
             variant->no);
 
-   variant->gallivm = gallivm = gallivm_create(func_name, lp->context);
+   variant->gallivm = gallivm = gallivm_create(func_name, lp->context, NULL);
    if (!variant->gallivm) {
       goto fail;
    }
@@ -843,6 +843,7 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp,
    key->num_inputs = fs->info.base.num_inputs;
    key->flatshade_first = lp->rasterizer->flatshade_first;
    key->pixel_center_half = lp->rasterizer->half_pixel_center;
+   key->multisample = lp->rasterizer->multisample;
    key->twoside = lp->rasterizer->light_twoside;
    key->size = Offset(struct lp_setup_variant_key,
                       inputs[key->num_inputs]);
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_tess.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_tess.c
new file mode 100644
index 000000000..b3f8e74af
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_tess.c
@@ -0,0 +1,196 @@
+/**************************************************************************
+ *
+ * Copyright 2019 Red Hat.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "lp_context.h"
+#include "lp_state.h"
+#include "lp_texture.h"
+#include "lp_debug.h"
+
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "draw/draw_context.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_parse.h"
+
+
+static void *
+llvmpipe_create_tcs_state(struct pipe_context *pipe,
+                          const struct pipe_shader_state *templ)
+{
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+   struct lp_tess_ctrl_shader *state;
+
+   state = CALLOC_STRUCT(lp_tess_ctrl_shader);
+   if (!state)
+      goto no_state;
+
+   /* debug */
+   if (LP_DEBUG & DEBUG_TGSI) {
+      debug_printf("llvmpipe: Create tess ctrl shader %p:\n", (void *)state);
+      tgsi_dump(templ->tokens, 0);
+   }
+
+   /* copy stream output info */
+   state->no_tokens = !templ->tokens;
+   memcpy(&state->stream_output, &templ->stream_output, sizeof state->stream_output);
+
+   if (templ->tokens || templ->type == PIPE_SHADER_IR_NIR) {
+      state->dtcs = draw_create_tess_ctrl_shader(llvmpipe->draw, templ);
+      if (state->dtcs == NULL) {
+         goto no_dgs;
+      }
+   }
+
+   return state;
+
+no_dgs:
+   FREE( state );
+no_state:
+   return NULL;
+}
+
+
+static void
+llvmpipe_bind_tcs_state(struct pipe_context *pipe, void *tcs)
+{
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+   llvmpipe->tcs = (struct lp_tess_ctrl_shader *)tcs;
+
+   draw_bind_tess_ctrl_shader(llvmpipe->draw,
+                              (llvmpipe->tcs ? llvmpipe->tcs->dtcs : NULL));
+
+   llvmpipe->dirty |= LP_NEW_TCS;
+}
+
+
+static void
+llvmpipe_delete_tcs_state(struct pipe_context *pipe, void *tcs)
+{
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+   struct lp_tess_ctrl_shader *state =
+      (struct lp_tess_ctrl_shader *)tcs;
+
+   if (!state) {
+      return;
+   }
+
+   draw_delete_tess_ctrl_shader(llvmpipe->draw, state->dtcs);
+   FREE(state);
+}
+
+
+static void *
+llvmpipe_create_tes_state(struct pipe_context *pipe,
+                          const struct pipe_shader_state *templ)
+{
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+   struct lp_tess_eval_shader *state;
+
+   state = CALLOC_STRUCT(lp_tess_eval_shader);
+   if (!state)
+      goto no_state;
+
+   /* debug */
+   if (LP_DEBUG & DEBUG_TGSI) {
+      debug_printf("llvmpipe: Create tess eval shader %p:\n", (void *)state);
+      tgsi_dump(templ->tokens, 0);
+   }
+
+   /* copy stream output info */
+   state->no_tokens = !templ->tokens;
+   memcpy(&state->stream_output, &templ->stream_output, sizeof state->stream_output);
+
+   if (templ->tokens || templ->type == PIPE_SHADER_IR_NIR) {
+      state->dtes = draw_create_tess_eval_shader(llvmpipe->draw, templ);
+      if (state->dtes == NULL) {
+         goto no_dgs;
+      }
+   }
+
+   return state;
+
+no_dgs:
+   FREE( state );
+no_state:
+   return NULL;
+}
+
+
+static void
+llvmpipe_bind_tes_state(struct pipe_context *pipe, void *tes)
+{
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+   llvmpipe->tes = (struct lp_tess_eval_shader *)tes;
+
+   draw_bind_tess_eval_shader(llvmpipe->draw,
+                              (llvmpipe->tes ? llvmpipe->tes->dtes : NULL));
+
+   llvmpipe->dirty |= LP_NEW_TES;
+}
+
+
+static void
+llvmpipe_delete_tes_state(struct pipe_context *pipe, void *tes)
+{
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+   struct lp_tess_eval_shader *state =
+      (struct lp_tess_eval_shader *)tes;
+
+   if (!state) {
+      return;
+   }
+
+   draw_delete_tess_eval_shader(llvmpipe->draw, state->dtes);
+   FREE(state);
+}
+
+static void
+llvmpipe_set_tess_state(struct pipe_context *pipe,
+                        const float default_outer_level[4],
+                        const float default_inner_level[2])
+{
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+   draw_set_tess_state(llvmpipe->draw, default_outer_level, default_inner_level);
+}
+
+void
+llvmpipe_init_tess_funcs(struct llvmpipe_context *llvmpipe)
+{
+   llvmpipe->pipe.create_tcs_state = llvmpipe_create_tcs_state;
+   llvmpipe->pipe.bind_tcs_state   = llvmpipe_bind_tcs_state;
+   llvmpipe->pipe.delete_tcs_state = llvmpipe_delete_tcs_state;
+
+   llvmpipe->pipe.create_tes_state = llvmpipe_create_tes_state;
+   llvmpipe->pipe.bind_tes_state   = llvmpipe_bind_tes_state;
+   llvmpipe->pipe.delete_tes_state = llvmpipe_delete_tes_state;
+
+   llvmpipe->pipe.set_tess_state = llvmpipe_set_tess_state;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vertex.c
index 702ecf96a..ee87bed5d 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vertex.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vertex.c
@@ -77,6 +77,8 @@ llvmpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
 static void
 llvmpipe_set_vertex_buffers(struct pipe_context *pipe,
                             unsigned start_slot, unsigned count,
+                            unsigned unbind_num_trailing_slots,
+                            bool take_ownership,
                             const struct pipe_vertex_buffer *buffers)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
@@ -85,11 +87,14 @@ llvmpipe_set_vertex_buffers(struct pipe_context *pipe,
 
    util_set_vertex_buffers_count(llvmpipe->vertex_buffer,
                                  &llvmpipe->num_vertex_buffers,
-                                 buffers, start_slot, count);
+                                 buffers, start_slot, count,
+                                 unbind_num_trailing_slots,
+                                 take_ownership);
 
    llvmpipe->dirty |= LP_NEW_VERTEX;
 
-   draw_set_vertex_buffers(llvmpipe->draw, start_slot, count, buffers);
+   draw_set_vertex_buffers(llvmpipe->draw, start_slot, count,
+                           unbind_num_trailing_slots, buffers);
 }
 
 
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c
index 953b26e8c..9ba2b87b8 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c
@@ -27,14 +27,60 @@
 
 #include "util/u_rect.h"
 #include "util/u_surface.h"
+#include "util/u_memset.h"
 #include "lp_context.h"
 #include "lp_flush.h"
 #include "lp_limits.h"
 #include "lp_surface.h"
 #include "lp_texture.h"
 #include "lp_query.h"
+#include "lp_rast.h"
 
+static void
+lp_resource_copy_ms(struct pipe_context *pipe,
+                    struct pipe_resource *dst, unsigned dst_level,
+                    unsigned dstx, unsigned dsty, unsigned dstz,
+                    struct pipe_resource *src, unsigned src_level,
+                    const struct pipe_box *src_box)
+{
+   struct pipe_box dst_box = *src_box;
+   enum pipe_format src_format;
+   dst_box.x = dstx;
+   dst_box.y = dsty;
+   dst_box.z = dstz;
+
+   src_format = src->format;
 
+   for (unsigned i = 0; i < src->nr_samples; i++) {
+      struct pipe_transfer *src_trans, *dst_trans;
+      const uint8_t *src_map = llvmpipe_transfer_map_ms(pipe,
+                                                        src, 0, PIPE_MAP_READ, i,
+                                                        src_box,
+                                                        &src_trans);
+      if (!src_map)
+         return;
+
+      uint8_t *dst_map = llvmpipe_transfer_map_ms(pipe,
+                                                  dst, 0, PIPE_MAP_WRITE, i,
+                                                  &dst_box,
+                                                  &dst_trans);
+      if (!dst_map) {
+         pipe->transfer_unmap(pipe, src_trans);
+         return;
+      }
+
+      util_copy_box(dst_map,
+                    src_format,
+                    dst_trans->stride, dst_trans->layer_stride,
+                    0, 0, 0,
+                    src_box->width, src_box->height, src_box->depth,
+                    src_map,
+                    src_trans->stride, src_trans->layer_stride,
+                    0, 0, 0);
+      pipe->transfer_unmap(pipe, dst_trans);
+      pipe->transfer_unmap(pipe, src_trans);
+   }
+}
 static void
 lp_resource_copy(struct pipe_context *pipe,
                  struct pipe_resource *dst, unsigned dst_level,
@@ -56,6 +102,12 @@ lp_resource_copy(struct pipe_context *pipe,
                            FALSE, /* do_not_block */
                            "blit src");
 
+   if (dst->nr_samples > 1 &&
+       dst->nr_samples == src->nr_samples) {
+      lp_resource_copy_ms(pipe, dst, dst_level, dstx, dsty, dstz,
+                          src, src_level, src_box);
+      return;
+   }
    util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz,
                              src, src_level, src_box);
 }
@@ -70,14 +122,6 @@ static void lp_blit(struct pipe_context *pipe,
    if (blit_info->render_condition_enable && !llvmpipe_check_render_cond(lp))
       return;
 
-   if (info.src.resource->nr_samples > 1 &&
-       info.dst.resource->nr_samples <= 1 &&
-       !util_format_is_depth_or_stencil(info.src.resource->format) &&
-       !util_format_is_pure_integer(info.src.resource->format)) {
-      debug_printf("llvmpipe: color resolve unimplemented\n");
-      return;
-   }
-
    if (util_try_blit_via_copy_region(pipe, &info)) {
       return; /* done */
    }
@@ -89,6 +133,15 @@ static void lp_blit(struct pipe_context *pipe,
       return;
    }
 
+   /* for 32-bit unorm depth, avoid the conversions to float and back,
+      which can introduce accuracy errors. */
+   if (blit_info->src.format == PIPE_FORMAT_Z32_UNORM &&
+       blit_info->dst.format == PIPE_FORMAT_Z32_UNORM && info.filter == PIPE_TEX_FILTER_NEAREST) {
+      info.src.format = PIPE_FORMAT_R32_UINT;
+      info.dst.format = PIPE_FORMAT_R32_UINT;
+      info.mask = PIPE_MASK_R;
+   }
+
    /* XXX turn off occlusion and streamout queries */
 
    util_blitter_save_vertex_buffer_slot(lp->blitter, lp->vertex_buffer);
@@ -102,9 +155,11 @@ static void lp_blit(struct pipe_context *pipe,
    util_blitter_save_scissor(lp->blitter, &lp->scissors[0]);
    util_blitter_save_fragment_shader(lp->blitter, lp->fs);
    util_blitter_save_blend(lp->blitter, (void*)lp->blend);
+   util_blitter_save_tessctrl_shader(lp->blitter, (void*)lp->tcs);
+   util_blitter_save_tesseval_shader(lp->blitter, (void*)lp->tes);
    util_blitter_save_depth_stencil_alpha(lp->blitter, (void*)lp->depth_stencil);
    util_blitter_save_stencil_ref(lp->blitter, &lp->stencil_ref);
-   /*util_blitter_save_sample_mask(sp->blitter, lp->sample_mask);*/
+   util_blitter_save_sample_mask(lp->blitter, lp->sample_mask);
    util_blitter_save_framebuffer(lp->blitter, &lp->framebuffer);
    util_blitter_save_fragment_sampler_states(lp->blitter,
                      lp->num_samplers[PIPE_SHADER_FRAGMENT],
@@ -185,6 +240,64 @@ llvmpipe_surface_destroy(struct pipe_context *pipe,
 }
 
 
+
+static void
+llvmpipe_get_sample_position(struct pipe_context *pipe,
+                             unsigned sample_count,
+                             unsigned sample_index,
+                             float *out_value)
+{
+   switch (sample_count) {
+   case 4:
+      out_value[0] = lp_sample_pos_4x[sample_index][0];
+      out_value[1] = lp_sample_pos_4x[sample_index][1];
+      break;
+   default:
+      break;
+   }
+}
+
+static void
+lp_clear_color_texture_helper(struct pipe_transfer *dst_trans,
+                                ubyte *dst_map,
+                                enum pipe_format format,
+                                const union pipe_color_union *color,
+                                unsigned width, unsigned height, unsigned depth)
+{
+   union util_color uc;
+
+   assert(dst_trans->stride > 0);
+
+   util_pack_color_union(format, &uc, color);
+
+   util_fill_box(dst_map, format,
+                 dst_trans->stride, dst_trans->layer_stride,
+                 0, 0, 0, width, height, depth, &uc);
+}
+
+static void
+lp_clear_color_texture_msaa(struct pipe_context *pipe,
+                            struct pipe_resource *texture,
+                            enum pipe_format format,
+                            const union pipe_color_union *color,
+                            unsigned sample,
+                            const struct pipe_box *box)
+{
+   struct pipe_transfer *dst_trans;
+   ubyte *dst_map;
+
+   dst_map = llvmpipe_transfer_map_ms(pipe, texture, 0, PIPE_MAP_WRITE,
+                                      sample, box, &dst_trans);
+   if (!dst_map)
+      return;
+
+   if (dst_trans->stride > 0) {
+      lp_clear_color_texture_helper(dst_trans, dst_map, format, color,
+                                    box->width, box->height, box->depth);
+   }
+   pipe->transfer_unmap(pipe, dst_trans);
+}
+
 static void
 llvmpipe_clear_render_target(struct pipe_context *pipe,
                              struct pipe_surface *dst,
@@ -198,12 +311,60 @@ llvmpipe_clear_render_target(struct pipe_context *pipe,
    if (render_condition_enabled && !llvmpipe_check_render_cond(llvmpipe))
       return;
 
-   util_clear_render_target(pipe, dst, color,
-                            dstx, dsty, width, height);
+   if (dst->texture->nr_samples > 1) {
+      struct pipe_box box;
+      u_box_2d(dstx, dsty, width, height, &box);
+      if (dst->texture->target != PIPE_BUFFER) {
+         box.z = dst->u.tex.first_layer;
+         box.depth = dst->u.tex.last_layer - dst->u.tex.first_layer + 1;
+      }
+      for (unsigned s = 0; s < util_res_sample_count(dst->texture); s++) {
+         lp_clear_color_texture_msaa(pipe, dst->texture, dst->format,
+                                     color, s, &box);
+      }
+   } else
+      util_clear_render_target(pipe, dst, color,
+                               dstx, dsty, width, height);
 }
 
 
 static void
+lp_clear_depth_stencil_texture_msaa(struct pipe_context *pipe,
+                                    struct pipe_resource *texture,
+                                    enum pipe_format format,
+                                    unsigned clear_flags,
+                                    uint64_t zstencil, unsigned sample,
+                                    const struct pipe_box *box)
+{
+   struct pipe_transfer *dst_trans;
+   ubyte *dst_map;
+   boolean need_rmw = FALSE;
+
+   if ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) &&
+       ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) &&
+       util_format_is_depth_and_stencil(format))
+      need_rmw = TRUE;
+
+   dst_map = llvmpipe_transfer_map_ms(pipe,
+                                      texture,
+                                      0,
+                                      (need_rmw ? PIPE_MAP_READ_WRITE :
+                                       PIPE_MAP_WRITE),
+                                      sample, box, &dst_trans);
+   assert(dst_map);
+   if (!dst_map)
+      return;
+
+   assert(dst_trans->stride > 0);
+
+   util_fill_zs_box(dst_map, format, need_rmw, clear_flags,
+		    dst_trans->stride, dst_trans->layer_stride,
+		    box->width, box->height, box->depth, zstencil);
+
+   pipe->transfer_unmap(pipe, dst_trans);
+}
+
+static void
 llvmpipe_clear_depth_stencil(struct pipe_context *pipe,
                              struct pipe_surface *dst,
                              unsigned clear_flags,
@@ -218,11 +379,104 @@ llvmpipe_clear_depth_stencil(struct pipe_context *pipe,
    if (render_condition_enabled && !llvmpipe_check_render_cond(llvmpipe))
       return;
 
-   util_clear_depth_stencil(pipe, dst, clear_flags,
-                            depth, stencil,
-                            dstx, dsty, width, height);
+   if (dst->texture->nr_samples > 1) {
+      uint64_t zstencil = util_pack64_z_stencil(dst->format, depth, stencil);
+      struct pipe_box box;
+      u_box_2d(dstx, dsty, width, height, &box);
+      if (dst->texture->target != PIPE_BUFFER) {
+         box.z = dst->u.tex.first_layer;
+         box.depth = dst->u.tex.last_layer - dst->u.tex.first_layer + 1;
+      }
+      for (unsigned s = 0; s < util_res_sample_count(dst->texture); s++)
+         lp_clear_depth_stencil_texture_msaa(pipe, dst->texture,
+                                             dst->format, clear_flags,
+                                             zstencil, s, &box);
+   } else
+      util_clear_depth_stencil(pipe, dst, clear_flags,
+                               depth, stencil,
+                               dstx, dsty, width, height);
 }
 
+static void
+llvmpipe_clear_texture(struct pipe_context *pipe,
+                       struct pipe_resource *tex,
+                       unsigned level,
+                       const struct pipe_box *box,
+                       const void *data)
+{
+   const struct util_format_description *desc =
+          util_format_description(tex->format);
+   if (tex->nr_samples <= 1) {
+      util_clear_texture(pipe, tex, level, box, data);
+      return;
+   }
+   union pipe_color_union color;
+
+   if (util_format_is_depth_or_stencil(tex->format)) {
+      unsigned clear = 0;
+      float depth = 0.0f;
+      uint8_t stencil = 0;
+      uint64_t zstencil;
+
+      if (util_format_has_depth(desc)) {
+         clear |= PIPE_CLEAR_DEPTH;
+         util_format_unpack_z_float(tex->format, &depth, data, 1);
+      }
+
+      if (util_format_has_stencil(desc)) {
+         clear |= PIPE_CLEAR_STENCIL;
+         util_format_unpack_s_8uint(tex->format, &stencil, data, 1);
+      }
+
+      zstencil = util_pack64_z_stencil(tex->format, depth, stencil);
+
+      for (unsigned s = 0; s < util_res_sample_count(tex); s++)
+         lp_clear_depth_stencil_texture_msaa(pipe, tex, tex->format, clear, zstencil,
+                                             s, box);
+   } else {
+      util_format_unpack_rgba(tex->format, color.ui, data, 1);
+
+      for (unsigned s = 0; s < util_res_sample_count(tex); s++) {
+         lp_clear_color_texture_msaa(pipe, tex, tex->format, &color, s,
+                                     box);
+      }
+   }
+}
+
+static void
+llvmpipe_clear_buffer(struct pipe_context *pipe,
+                      struct pipe_resource *res,
+                      unsigned offset,
+                      unsigned size,
+                      const void *clear_value,
+                      int clear_value_size)
+{
+   struct pipe_transfer *dst_t;
+   struct pipe_box box;
+   char *dst;
+   u_box_1d(offset, size, &box);
+
+   dst = pipe->transfer_map(pipe,
+                            res,
+                            0,
+                            PIPE_MAP_WRITE,
+                            &box,
+                            &dst_t);
+
+   switch (clear_value_size) {
+   case 1:
+      memset(dst, *(uint8_t *)clear_value, size);
+      break;
+   case 4:
+      util_memset32(dst, *(uint32_t *)clear_value, size / 4);
+      break;
+   default:
+      for (unsigned i = 0; i < size; i += clear_value_size)
+         memcpy(&dst[i], clear_value, clear_value_size);
+      break;
+   }
+   pipe->transfer_unmap(pipe, dst_t);
+}
 
 void
 llvmpipe_init_surface_functions(struct llvmpipe_context *lp)
@@ -232,8 +486,10 @@ llvmpipe_init_surface_functions(struct llvmpipe_context *lp)
    lp->pipe.create_surface = llvmpipe_create_surface;
    lp->pipe.surface_destroy = llvmpipe_surface_destroy;
    /* These are not actually functions dealing with surfaces */
-   lp->pipe.clear_texture = util_clear_texture;
+   lp->pipe.clear_texture = llvmpipe_clear_texture;
+   lp->pipe.clear_buffer = llvmpipe_clear_buffer;
    lp->pipe.resource_copy_region = lp_resource_copy;
    lp->pipe.blit = lp_blit;
    lp->pipe.flush_resource = lp_flush_resource;
+   lp->pipe.get_sample_position = llvmpipe_get_sample_position;
 }
author	Jonathan Gray <jsg@cvs.openbsd.org>	2021-07-22 10:50:50 +0000
committer	Jonathan Gray <jsg@cvs.openbsd.org>	2021-07-22 10:50:50 +0000
commit	9130ec005fbc78a62420643414d8354d0929ca50 (patch)
tree	6762777acdd2d4eee17ef87290e80dc7afe2b73d /lib/mesa/src/gallium/drivers/llvmpipe
parent	ca11beabae33eb59fb981b8adf50b1d47a2a98f0 (diff)