Revert to Mesa 13.0.6 to hopefully address rendering issues a handful of

people have reported with xpdf/fvwm on ivy bridge with modesetting driver.
author: Jonathan Gray <jsg@cvs.openbsd.org> 2017-08-26 16:59:42 +0000
committer: Jonathan Gray <jsg@cvs.openbsd.org> 2017-08-26 16:59:42 +0000
commit: 81ece42815e80818f160cdd85fab57d65b56ad15 (patch)
tree: 1059ff094da1aa50334115952fcb1cfcbda3acc6 /lib/mesa/src/gallium/drivers/llvmpipe
parent: b0244145d5bb49623d58f6b5cab8143ada692b60 (diff)
15 files changed, 670 insertions, 347 deletions
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am
index 1d3853e41..85ae0ae13 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am
@@ -26,11 +26,11 @@ include $(top_srcdir)/src/gallium/Automake.inc
 AM_CFLAGS = \
 	$(GALLIUM_DRIVER_CFLAGS) \
 	$(LLVM_CFLAGS) \
-	$(MSVC2008_COMPAT_CFLAGS)
+	$(MSVC2013_COMPAT_CFLAGS)
 AM_CXXFLAGS= \
 	$(GALLIUM_DRIVER_CXXFLAGS) \
 	$(LLVM_CXXFLAGS) \
-	$(MSVC2008_COMPAT_CXXFLAGS)
+	$(MSVC2013_COMPAT_CXXFLAGS)
 
 noinst_LTLIBRARIES = libllvmpipe.la
 
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in
index 27a6693d9..0a7486d64 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in
@@ -78,13 +78,10 @@ DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \
 	$(srcdir)/Makefile.in $(srcdir)/Makefile.sources \
 	$(top_srcdir)/bin/depcomp \
 	$(top_srcdir)/src/gallium/Automake.inc
-@HAVE_LIBDRM_TRUE@am__append_1 = \
-@HAVE_LIBDRM_TRUE@	$(LIBDRM_LIBS)
-
-@HAVE_DRISW_TRUE@am__append_2 = \
+@HAVE_DRISW_TRUE@am__append_1 = \
 @HAVE_DRISW_TRUE@	$(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la
 
-@HAVE_DRISW_KMS_TRUE@am__append_3 = \
+@HAVE_DRISW_KMS_TRUE@am__append_2 = \
 @HAVE_DRISW_KMS_TRUE@	$(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \
 @HAVE_DRISW_KMS_TRUE@	$(LIBDRM_LIBS)
 
@@ -139,8 +136,7 @@ am__DEPENDENCIES_1 =
 am__DEPENDENCIES_2 = libllvmpipe.la \
 	$(top_builddir)/src/gallium/auxiliary/libgallium.la \
 	$(top_builddir)/src/util/libmesautil.la $(am__DEPENDENCIES_1) \
-	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
-	$(am__DEPENDENCIES_1)
+	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 lp_test_arit_DEPENDENCIES = $(am__DEPENDENCIES_2)
 am_lp_test_blend_OBJECTS = lp_test_blend.$(OBJEXT) \
 	lp_test_main.$(OBJEXT)
@@ -238,8 +234,6 @@ AMDGPU_CFLAGS = @AMDGPU_CFLAGS@
 AMDGPU_LIBS = @AMDGPU_LIBS@
 AMTAR = @AMTAR@
 AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
-ANDROID_CFLAGS = @ANDROID_CFLAGS@
-ANDROID_LIBS = @ANDROID_LIBS@
 AR = @AR@
 AUTOCONF = @AUTOCONF@
 AUTOHEADER = @AUTOHEADER@
@@ -270,6 +264,8 @@ DLLTOOL = @DLLTOOL@
 DLOPEN_LIBS = @DLOPEN_LIBS@
 DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
 DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
+DRI3PROTO_CFLAGS = @DRI3PROTO_CFLAGS@
+DRI3PROTO_LIBS = @DRI3PROTO_LIBS@
 DRIGL_CFLAGS = @DRIGL_CFLAGS@
 DRIGL_LIBS = @DRIGL_LIBS@
 DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
@@ -282,11 +278,10 @@ ECHO_C = @ECHO_C@
 ECHO_N = @ECHO_N@
 ECHO_T = @ECHO_T@
 EGL_CFLAGS = @EGL_CFLAGS@
+EGL_CLIENT_APIS = @EGL_CLIENT_APIS@
 EGL_LIB_DEPS = @EGL_LIB_DEPS@
 EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
 EGREP = @EGREP@
-ETNAVIV_CFLAGS = @ETNAVIV_CFLAGS@
-ETNAVIV_LIBS = @ETNAVIV_LIBS@
 EXEEXT = @EXEEXT@
 EXPAT_CFLAGS = @EXPAT_CFLAGS@
 EXPAT_LIBS = @EXPAT_LIBS@
@@ -334,27 +329,31 @@ LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
 LIBDRM_LIBS = @LIBDRM_LIBS@
 LIBELF_CFLAGS = @LIBELF_CFLAGS@
 LIBELF_LIBS = @LIBELF_LIBS@
-LIBGLVND_DATADIR = @LIBGLVND_DATADIR@
 LIBOBJS = @LIBOBJS@
 LIBS = @LIBS@
-LIBSENSORS_LIBS = @LIBSENSORS_LIBS@
+LIBSENSORS_LDFLAGS = @LIBSENSORS_LDFLAGS@
+LIBSHA1_CFLAGS = @LIBSHA1_CFLAGS@
+LIBSHA1_LIBS = @LIBSHA1_LIBS@
 LIBTOOL = @LIBTOOL@
-LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@
-LIBUNWIND_LIBS = @LIBUNWIND_LIBS@
 LIB_DIR = @LIB_DIR@
 LIB_EXT = @LIB_EXT@
 LIPO = @LIPO@
+LLVM_BINDIR = @LLVM_BINDIR@
 LLVM_CFLAGS = @LLVM_CFLAGS@
 LLVM_CONFIG = @LLVM_CONFIG@
+LLVM_CPPFLAGS = @LLVM_CPPFLAGS@
 LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
 LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
 LLVM_LDFLAGS = @LLVM_LDFLAGS@
+LLVM_LIBDIR = @LLVM_LIBDIR@
 LLVM_LIBS = @LLVM_LIBS@
+LLVM_VERSION = @LLVM_VERSION@
 LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 MAINT = @MAINT@
 MAKEINFO = @MAKEINFO@
 MANIFEST_TOOL = @MANIFEST_TOOL@
+MESA_LLVM = @MESA_LLVM@
 MKDIR_P = @MKDIR_P@
 MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@
 MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@
@@ -375,6 +374,8 @@ OMX_LIBS = @OMX_LIBS@
 OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
 OPENCL_LIBNAME = @OPENCL_LIBNAME@
 OPENCL_VERSION = @OPENCL_VERSION@
+OPENSSL_CFLAGS = @OPENSSL_CFLAGS@
+OPENSSL_LIBS = @OPENSSL_LIBS@
 OSMESA_LIB = @OSMESA_LIB@
 OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
 OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
@@ -394,6 +395,8 @@ PKG_CONFIG = @PKG_CONFIG@
 PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
 PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
 POSIX_SHELL = @POSIX_SHELL@
+PRESENTPROTO_CFLAGS = @PRESENTPROTO_CFLAGS@
+PRESENTPROTO_LIBS = @PRESENTPROTO_LIBS@
 PTHREADSTUBS_CFLAGS = @PTHREADSTUBS_CFLAGS@
 PTHREADSTUBS_LIBS = @PTHREADSTUBS_LIBS@
 PTHREAD_CC = @PTHREAD_CC@
@@ -409,6 +412,8 @@ SED = @SED@
 SELINUX_CFLAGS = @SELINUX_CFLAGS@
 SELINUX_LIBS = @SELINUX_LIBS@
 SET_MAKE = @SET_MAKE@
+SHA1_CFLAGS = @SHA1_CFLAGS@
+SHA1_LIBS = @SHA1_LIBS@
 SHELL = @SHELL@
 SIMPENROSE_CFLAGS = @SIMPENROSE_CFLAGS@
 SIMPENROSE_LIBS = @SIMPENROSE_LIBS@
@@ -417,6 +422,7 @@ STRIP = @STRIP@
 SWR_AVX2_CXXFLAGS = @SWR_AVX2_CXXFLAGS@
 SWR_AVX_CXXFLAGS = @SWR_AVX_CXXFLAGS@
 SWR_CXX11_CXXFLAGS = @SWR_CXX11_CXXFLAGS@
+TIMESTAMP_CMD = @TIMESTAMP_CMD@
 VALGRIND_CFLAGS = @VALGRIND_CFLAGS@
 VALGRIND_LIBS = @VALGRIND_LIBS@
 VA_CFLAGS = @VA_CFLAGS@
@@ -432,6 +438,7 @@ VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
 VDPAU_MAJOR = @VDPAU_MAJOR@
 VDPAU_MINOR = @VDPAU_MINOR@
 VERSION = @VERSION@
+VG_LIB_DEPS = @VG_LIB_DEPS@
 VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
 VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
 VL_CFLAGS = @VL_CFLAGS@
@@ -460,10 +467,9 @@ XVMC_LIBS = @XVMC_LIBS@
 XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
 XVMC_MAJOR = @XVMC_MAJOR@
 XVMC_MINOR = @XVMC_MINOR@
+XXD = @XXD@
 YACC = @YACC@
 YFLAGS = @YFLAGS@
-ZLIB_CFLAGS = @ZLIB_CFLAGS@
-ZLIB_LIBS = @ZLIB_LIBS@
 abs_builddir = @abs_builddir@
 abs_srcdir = @abs_srcdir@
 abs_top_builddir = @abs_top_builddir@
@@ -643,8 +649,12 @@ GALLIUM_TARGET_CFLAGS = \
 	$(LIBDRM_CFLAGS) \
 	$(VISIBILITY_CFLAGS)
 
-GALLIUM_COMMON_LIB_DEPS = -lm $(LIBUNWIND_LIBS) $(LIBSENSORS_LIBS) \
-	$(CLOCK_LIB) $(PTHREAD_LIBS) $(DLOPEN_LIBS) $(am__append_1)
+GALLIUM_COMMON_LIB_DEPS = \
+	-lm \
+	$(CLOCK_LIB) \
+	$(PTHREAD_LIBS) \
+	$(DLOPEN_LIBS)
+
 GALLIUM_WINSYS_CFLAGS = \
 	-I$(top_srcdir)/src \
 	-I$(top_srcdir)/include \
@@ -656,7 +666,7 @@ GALLIUM_WINSYS_CFLAGS = \
 GALLIUM_PIPE_LOADER_WINSYS_LIBS =  \
 	$(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \
 	$(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \
-	$(am__append_2) $(am__append_3)
+	$(am__append_1) $(am__append_2)
 AM_CFLAGS = \
 	$(GALLIUM_DRIVER_CFLAGS) \
 	$(LLVM_CFLAGS) \
@@ -678,8 +688,7 @@ TEST_LIBS = \
 	$(top_builddir)/src/util/libmesautil.la \
 	$(LLVM_LIBS) \
 	$(DLOPEN_LIBS) \
-	$(PTHREAD_LIBS) \
-	$(CLOCK_LIB)
+	$(PTHREAD_LIBS)
 
 lp_test_format_SOURCES = lp_test_format.c lp_test_main.c
 lp_test_format_LDADD = $(TEST_LIBS)
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index 564e19a15..a57670d49 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -255,13 +255,13 @@ lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
    LLVMValueRef rgb_factor_, alpha_factor_;
    enum lp_build_blend_swizzle rgb_swizzle;
 
-   if (alpha_swizzle == UTIL_FORMAT_SWIZZLE_X && num_channels == 1) {
+   if (alpha_swizzle == PIPE_SWIZZLE_X && num_channels == 1) {
       return lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
    }
 
    rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE);
 
-   if (alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
+   if (alpha_swizzle != PIPE_SWIZZLE_NONE) {
       rgb_swizzle   = lp_build_blend_factor_swizzle(rgb_factor);
       alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
       return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, num_channels);
@@ -312,7 +312,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
    struct lp_build_blend_aos_context bld;
    LLVMValueRef src_factor, dst_factor;
    LLVMValueRef result;
-   unsigned alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE;
+   unsigned alpha_swizzle = PIPE_SWIZZLE_NONE;
    unsigned i;
 
    desc = util_format_description(cbuf_format);
@@ -370,7 +370,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
                               rgb_alpha_same,
                               false);
 
-      if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
+      if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != PIPE_SWIZZLE_NONE) {
          LLVMValueRef alpha;
 
          alpha = lp_build_blend(&bld.base,
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index b25e04137..0c27c2f89 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -359,7 +359,7 @@ get_z_shift_and_mask(const struct util_format_description *format_desc,
 
    z_swizzle = format_desc->swizzle[0];
 
-   if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
+   if (z_swizzle == PIPE_SWIZZLE_NONE)
       return FALSE;
 
    *width = format_desc->channel[z_swizzle].size;
@@ -390,7 +390,7 @@ get_s_shift_and_mask(const struct util_format_description *format_desc,
 
    s_swizzle = format_desc->swizzle[1];
 
-   if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
+   if (s_swizzle == PIPE_SWIZZLE_NONE)
       return FALSE;
 
    /* just special case 64bit d/s format */
@@ -873,8 +873,8 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
       const unsigned z_swizzle = format_desc->swizzle[0];
       const unsigned s_swizzle = format_desc->swizzle[1];
 
-      assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
-             s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
+      assert(z_swizzle != PIPE_SWIZZLE_NONE ||
+             s_swizzle != PIPE_SWIZZLE_NONE);
 
       assert(depth->enabled || stencil[0].enabled);
 
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c
index 80cb6578b..84912c6f1 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c
@@ -73,20 +73,20 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
 
    pipe_surface_reference(&llvmpipe->framebuffer.zsbuf, NULL);
 
-   for (i = 0; i < Elements(llvmpipe->sampler_views[0]); i++) {
+   for (i = 0; i < ARRAY_SIZE(llvmpipe->sampler_views[0]); i++) {
       pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_FRAGMENT][i], NULL);
    }
 
-   for (i = 0; i < Elements(llvmpipe->sampler_views[0]); i++) {
+   for (i = 0; i < ARRAY_SIZE(llvmpipe->sampler_views[0]); i++) {
       pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_VERTEX][i], NULL);
    }
 
-   for (i = 0; i < Elements(llvmpipe->sampler_views[0]); i++) {
+   for (i = 0; i < ARRAY_SIZE(llvmpipe->sampler_views[0]); i++) {
       pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_GEOMETRY][i], NULL);
    }
 
-   for (i = 0; i < Elements(llvmpipe->constants); i++) {
-      for (j = 0; j < Elements(llvmpipe->constants[i]); j++) {
+   for (i = 0; i < ARRAY_SIZE(llvmpipe->constants); i++) {
+      for (j = 0; j < ARRAY_SIZE(llvmpipe->constants[i]); j++) {
          pipe_resource_reference(&llvmpipe->constants[i][j].buffer, NULL);
       }
    }
@@ -128,7 +128,8 @@ llvmpipe_render_condition ( struct pipe_context *pipe,
 }
 
 struct pipe_context *
-llvmpipe_create_context( struct pipe_screen *screen, void *priv )
+llvmpipe_create_context(struct pipe_screen *screen, void *priv,
+                        unsigned flags)
 {
    struct llvmpipe_context *llvmpipe;
 
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c
index 9acde4f1b..21260369a 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -36,6 +36,7 @@
 #include "util/u_memory.h"
 #include "gallivm/lp_bld_init.h"
 #include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_format.h"
 #include "lp_context.h"
 #include "lp_jit.h"
 
@@ -55,7 +56,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
       elem_types[LP_JIT_VIEWPORT_MAX_DEPTH] = LLVMFloatTypeInContext(lc);
 
       viewport_type = LLVMStructTypeInContext(lc, elem_types,
-                                              Elements(elem_types), 0);
+                                              ARRAY_SIZE(elem_types), 0);
 
       LP_CHECK_MEMBER_OFFSET(struct lp_jit_viewport, min_depth,
                              gallivm->target, viewport_type,
@@ -83,7 +84,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
          LLVMArrayType(LLVMInt32TypeInContext(lc), LP_MAX_TEXTURE_LEVELS);
 
       texture_type = LLVMStructTypeInContext(lc, elem_types,
-                                             Elements(elem_types), 0);
+                                             ARRAY_SIZE(elem_types), 0);
 
       LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width,
                              gallivm->target, texture_type,
@@ -126,7 +127,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
          LLVMArrayType(LLVMFloatTypeInContext(lc), 4);
 
       sampler_type = LLVMStructTypeInContext(lc, elem_types,
-                                             Elements(elem_types), 0);
+                                             ARRAY_SIZE(elem_types), 0);
 
       LP_CHECK_MEMBER_OFFSET(struct lp_jit_sampler, min_lod,
                              gallivm->target, sampler_type,
@@ -165,7 +166,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
                                                       PIPE_MAX_SAMPLERS);
 
       context_type = LLVMStructTypeInContext(lc, elem_types,
-                                             Elements(elem_types), 0);
+                                             ARRAY_SIZE(elem_types), 0);
 
       LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants,
                              gallivm->target, context_type,
@@ -208,12 +209,14 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
       LLVMTypeRef elem_types[LP_JIT_THREAD_DATA_COUNT];
       LLVMTypeRef thread_data_type;
 
+      elem_types[LP_JIT_THREAD_DATA_CACHE] =
+            LLVMPointerType(lp_build_format_cache_type(gallivm), 0);
       elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc);
       elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX] =
             LLVMInt32TypeInContext(lc);
 
       thread_data_type = LLVMStructTypeInContext(lc, elem_types,
-                                                 Elements(elem_types), 0);
+                                                 ARRAY_SIZE(elem_types), 0);
 
       lp->jit_thread_data_ptr_type = LLVMPointerType(thread_data_type, 0);
    }
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c
index c726707c0..9e56c962d 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -43,6 +43,7 @@
 #include "lp_query.h"
 #include "lp_rast.h"
 #include "lp_rast_priv.h"
+#include "gallivm/lp_bld_format.h"
 #include "gallivm/lp_bld_debug.h"
 #include "lp_scene.h"
 #include "lp_tex_sample.h"
@@ -664,6 +665,17 @@ rasterize_scene(struct lp_rasterizer_task *task,
 {
    task->scene = scene;
 
+   /* Clear the cache tags. This should not always be necessary but
+      simpler for now. */
+#if LP_USE_TEXTURE_CACHE
+   memset(task->thread_data.cache->cache_tags, 0,
+          sizeof(task->thread_data.cache->cache_tags));
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+   task->thread_data.cache->cache_access_total = 0;
+   task->thread_data.cache->cache_access_miss = 0;
+#endif
+#endif
+
    if (!task->rast->no_rast && !scene->discard) {
       /* loop over scene bins, rasterize each */
       {
@@ -679,6 +691,20 @@ rasterize_scene(struct lp_rasterizer_task *task,
    }
 
 
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+   {
+      uint64_t total, miss;
+      total = task->thread_data.cache->cache_access_total;
+      miss = task->thread_data.cache->cache_access_miss;
+      if (total) {
+         debug_printf("thread %d cache access %llu miss %llu hit rate %f\n",
+                 task->thread_index, (long long unsigned)total,
+                 (long long unsigned)miss,
+                 (float)(total - miss)/(float)total);
+      }
+   }
+#endif
+
    if (scene->fence) {
       lp_fence_signal(scene->fence);
    }
@@ -866,10 +892,15 @@ lp_rast_create( unsigned num_threads )
       goto no_full_scenes;
    }
 
-   for (i = 0; i < Elements(rast->tasks); i++) {
+   for (i = 0; i < MAX2(1, num_threads); i++) {
       struct lp_rasterizer_task *task = &rast->tasks[i];
       task->rast = rast;
       task->thread_index = i;
+      task->thread_data.cache = align_malloc(sizeof(struct lp_build_format_cache),
+                                             16);
+      if (!task->thread_data.cache) {
+         goto no_thread_data_cache;
+      }
    }
 
    rast->num_threads = num_threads;
@@ -879,12 +910,22 @@ lp_rast_create( unsigned num_threads )
    create_rast_threads(rast);
 
    /* for synchronizing rasterization threads */
-   pipe_barrier_init( &rast->barrier, rast->num_threads );
+   if (rast->num_threads > 0) {
+      pipe_barrier_init( &rast->barrier, rast->num_threads );
+   }
 
    memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
 
    return rast;
 
+no_thread_data_cache:
+   for (i = 0; i < MAX2(1, rast->num_threads); i++) {
+      if (rast->tasks[i].thread_data.cache) {
+         align_free(rast->tasks[i].thread_data.cache);
+      }
+   }
+
+   lp_scene_queue_destroy(rast->full_scenes);
 no_full_scenes:
    FREE(rast);
 no_rast:
@@ -923,9 +964,14 @@ void lp_rast_destroy( struct lp_rasterizer *rast )
       pipe_semaphore_destroy(&rast->tasks[i].work_ready);
       pipe_semaphore_destroy(&rast->tasks[i].work_done);
    }
+   for (i = 0; i < MAX2(1, rast->num_threads); i++) {
+      align_free(rast->tasks[i].thread_data.cache);
+   }
 
    /* for synchronizing rasterization threads */
-   pipe_barrier_destroy( &rast->barrier );
+   if (rast->num_threads > 0) {
+      pipe_barrier_destroy( &rast->barrier );
+   }
 
    lp_scene_queue_destroy(rast->full_scenes);
 
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c
index 2441b3c0d..223be931e 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -337,7 +337,7 @@ lp_scene_new_data_block( struct lp_scene *scene )
    }
    else {
       struct data_block *block = MALLOC_STRUCT(data_block);
-      if (block == NULL)
+      if (!block)
          return NULL;
       
       scene->scene_size += sizeof *block;
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c
index 14eeab033..3e4f1ef44 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -109,6 +109,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
    switch (param) {
    case PIPE_CAP_NPOT_TEXTURES:
    case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+   case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
       return 1;
    case PIPE_CAP_TWO_SIDED_STENCIL:
       return 1;
@@ -264,6 +265,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_FAKE_SW_MSAA:
       return 1;
    case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+   case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
       return 1;
 
    case PIPE_CAP_VENDOR_ID:
@@ -279,6 +281,12 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
       if (!os_get_total_physical_memory(&system_memory))
          return 0;
 
+      if (sizeof(void *) == 4)
+         /* Cap to 2 GB on 32 bits system. We do this because llvmpipe does
+          * eat application memory, which is quite limited on 32 bits. App
+          * shouldn't expect too much available memory. */
+         system_memory = MIN2(system_memory, 2048 << 20);
+
       return (int)(system_memory >> 20);
    }
    case PIPE_CAP_UMA:
@@ -291,11 +299,44 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
       return 1;
+   case PIPE_CAP_CULL_DISTANCE:
+      return 1;
+   case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+      return 1;
    case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
    case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
+   case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+   case PIPE_CAP_SHAREABLE_SHADERS:
+   case PIPE_CAP_CLEAR_TEXTURE:
+   case PIPE_CAP_DRAW_PARAMETERS:
+   case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+   case PIPE_CAP_MULTI_DRAW_INDIRECT:
+   case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+   case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+   case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+   case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+   case PIPE_CAP_INVALIDATE_BUFFER:
+   case PIPE_CAP_GENERATE_MIPMAP:
+   case PIPE_CAP_STRING_MARKER:
+   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+   case PIPE_CAP_QUERY_BUFFER_OBJECT:
+   case PIPE_CAP_QUERY_MEMORY_INFO:
+   case PIPE_CAP_PCI_GROUP:
+   case PIPE_CAP_PCI_BUS:
+   case PIPE_CAP_PCI_DEVICE:
+   case PIPE_CAP_PCI_FUNCTION:
+   case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+   case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+   case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
+   case PIPE_CAP_TGSI_VOTE:
+   case PIPE_CAP_MAX_WINDOW_RECTANGLES:
+   case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
+   case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
       return 0;
    }
    /* should only get here on unhandled cases */
@@ -421,19 +462,20 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
       if (!format_desc->is_array && !format_desc->is_bitmask &&
           format != PIPE_FORMAT_R11G11B10_FLOAT)
          return FALSE;
+   }
 
-      /*
-       * XXX refuse formats known to crash in generate_unswizzled_blend().
-       * These include all 3-channel 24bit RGB8 variants, plus 48bit
-       * (except those using floats) 3-channel RGB16 variants (the latter
-       * seems to be more of a llvm bug though).
-       * The mesa state tracker only seems to use these for SINT/UINT formats.
+   if ((bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) &&
+       ((bind & PIPE_BIND_DISPLAY_TARGET) == 0)) {
+      /* Disable all 3-channel formats, where channel size != 32 bits.
+       * In some cases we run into crashes (in generate_unswizzled_blend()),
+       * for 3-channel RGB16 variants, there was an apparent LLVM bug.
+       * In any case, disabling the shallower 3-channel formats avoids a
+       * number of issues with GL_ARB_copy_image support.
        */
-      if (format_desc->is_array && format_desc->nr_channels == 3) {
-         if (format_desc->block.bits == 24 || (format_desc->block.bits == 48 &&
-               !util_format_is_float(format))) {
-            return FALSE;
-         }
+      if (format_desc->is_array &&
+          format_desc->nr_channels == 3 &&
+          format_desc->block.bits != 96) {
+         return FALSE;
       }
    }
 
@@ -450,12 +492,13 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
          return FALSE;
 
       /* TODO: Support stencil-only formats */
-      if (format_desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
+      if (format_desc->swizzle[0] == PIPE_SWIZZLE_NONE) {
          return FALSE;
       }
    }
 
-   if (format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
+   if (format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC ||
+       format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
       /* Software decoding is not hooked up. */
       return FALSE;
    }
@@ -537,6 +580,7 @@ llvmpipe_fence_reference(struct pipe_screen *screen,
  */
 static boolean
 llvmpipe_fence_finish(struct pipe_screen *screen,
+                      struct pipe_context *ctx,
                       struct pipe_fence_handle *fence_handle,
                       uint64_t timeout)
 {
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c
index 4c8167a9e..768775b29 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -39,6 +39,7 @@
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
 #include "util/u_pack_color.h"
+#include "util/u_viewport.h"
 #include "draw/draw_pipe.h"
 #include "os/os_time.h"
 #include "lp_context.h"
@@ -69,7 +70,7 @@ lp_setup_get_empty_scene(struct lp_setup_context *setup)
    assert(setup->scene == NULL);
 
    setup->scene_idx++;
-   setup->scene_idx %= Elements(setup->scenes);
+   setup->scene_idx %= ARRAY_SIZE(setup->scenes);
 
    setup->scene = setup->scenes[setup->scene_idx];
 
@@ -123,7 +124,7 @@ void lp_setup_reset( struct lp_setup_context *setup )
    LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
 
    /* Reset derived state */
-   for (i = 0; i < Elements(setup->constants); ++i) {
+   for (i = 0; i < ARRAY_SIZE(setup->constants); ++i) {
       setup->constants[i].stored_size = 0;
       setup->constants[i].stored_data = NULL;
    }
@@ -476,22 +477,30 @@ lp_setup_try_clear_zs(struct lp_setup_context *setup,
    uint64_t zsvalue = 0;
    uint32_t zmask32;
    uint8_t smask8;
+   enum pipe_format format = setup->fb.zsbuf->format;
 
    LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state);
 
    zmask32 = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0;
    smask8 = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
 
-   zsvalue = util_pack64_z_stencil(setup->fb.zsbuf->format,
-                                   depth,
-                                   stencil);
+   zsvalue = util_pack64_z_stencil(format, depth, stencil);
 
-   zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format,
-                                       zmask32,
-                                       smask8);
+   zsmask = util_pack64_mask_z_stencil(format, zmask32, smask8);
 
    zsvalue &= zsmask;
 
+   if (format == PIPE_FORMAT_Z24X8_UNORM ||
+       format == PIPE_FORMAT_X8Z24_UNORM) {
+      /*
+       * Make full mask if there's "X" bits so we can do full
+       * clear (without rmw).
+       */
+      uint32_t zsmask_full = 0;
+      zsmask_full = util_pack_mask_z_stencil(format, ~0, ~0);
+      zsmask |= ~zsmask_full;
+   }
+
    if (setup->state == SETUP_ACTIVE) {
       struct lp_scene *scene = setup->scene;
 
@@ -642,12 +651,12 @@ lp_setup_set_fs_constants(struct lp_setup_context *setup,
 
    LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) buffers);
 
-   assert(num <= Elements(setup->constants));
+   assert(num <= ARRAY_SIZE(setup->constants));
 
    for (i = 0; i < num; ++i) {
       util_copy_constant_buffer(&setup->constants[i].current, &buffers[i]);
    }
-   for (; i < Elements(setup->constants); i++) {
+   for (; i < ARRAY_SIZE(setup->constants); i++) {
       util_copy_constant_buffer(&setup->constants[i].current, NULL);
    }
    setup->dirty |= LP_SETUP_NEW_CONSTANTS;
@@ -763,15 +772,8 @@ lp_setup_set_viewports(struct lp_setup_context *setup,
    for (i = 0; i < num_viewports; i++) {
       float min_depth;
       float max_depth;
-
-      if (lp->rasterizer->clip_halfz == 0) {
-         float half_depth = viewports[i].scale[2];
-         min_depth = viewports[i].translate[2] - half_depth;
-         max_depth = min_depth + half_depth * 2.0f;
-      } else {
-         min_depth = viewports[i].translate[2];
-         max_depth = min_depth + viewports[i].scale[2];
-      }
+      util_viewport_zmin_zmax(&viewports[i], lp->rasterizer->clip_halfz,
+                              &min_depth, &max_depth);
 
       if (setup->viewports[i].min_depth != min_depth ||
           setup->viewports[i].max_depth != max_depth) {
@@ -791,13 +793,15 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
                                     unsigned num,
                                     struct pipe_sampler_view **views)
 {
-   unsigned i;
+   unsigned i, max_tex_num;
 
    LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
 
    assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
 
-   for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
+   max_tex_num = MAX2(num, setup->fs.current_tex_num);
+
+   for (i = 0; i < max_tex_num; i++) {
       struct pipe_sampler_view *view = i < num ? views[i] : NULL;
 
       if (view) {
@@ -854,10 +858,10 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
                      jit_tex->img_stride[j] = lp_tex->img_stride[j];
                   }
 
-                  if (view->target == PIPE_TEXTURE_1D_ARRAY ||
-                      view->target == PIPE_TEXTURE_2D_ARRAY ||
-                      view->target == PIPE_TEXTURE_CUBE ||
-                      view->target == PIPE_TEXTURE_CUBE_ARRAY) {
+                  if (res->target == PIPE_TEXTURE_1D_ARRAY ||
+                      res->target == PIPE_TEXTURE_2D_ARRAY ||
+                      res->target == PIPE_TEXTURE_CUBE ||
+                      res->target == PIPE_TEXTURE_CUBE_ARRAY) {
                      /*
                       * For array textures, we don't have first_layer, instead
                       * adjust last_layer (stored as depth) plus the mip level offsets
@@ -879,8 +883,8 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
                }
                else {
                   /*
-                   * For buffers, we don't have first_element, instead adjust
-                   * last_element (stored as width) plus the base pointer.
+                   * For buffers, we don't have "offset", instead adjust
+                   * the size (stored as width) plus the base pointer.
                    */
                   unsigned view_blocksize = util_format_get_blocksize(view->format);
                   /* probably don't really need to fill that out */
@@ -889,12 +893,10 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
                   jit_tex->img_stride[0] = 0;
 
                   /* everything specified in number of elements here. */
-                  jit_tex->width = view->u.buf.last_element - view->u.buf.first_element + 1;
-                  jit_tex->base = (uint8_t *)jit_tex->base + view->u.buf.first_element *
-                                  view_blocksize;
+                  jit_tex->width = view->u.buf.size / view_blocksize;
+                  jit_tex->base = (uint8_t *)jit_tex->base + view->u.buf.offset;
                   /* XXX Unsure if we need to sanitize parameters? */
-                  assert(view->u.buf.first_element <= view->u.buf.last_element);
-                  assert(view->u.buf.last_element * view_blocksize < res->width0);
+                  assert(view->u.buf.offset + view->u.buf.size <= res->width0);
                }
             }
          }
@@ -917,7 +919,11 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
             assert(jit_tex->base);
          }
       }
+      else {
+         pipe_resource_reference(&setup->fs.current_tex[i], NULL);
+      }
    }
+   setup->fs.current_tex_num = num;
 
    setup->dirty |= LP_SETUP_NEW_FS;
 }
@@ -976,7 +982,7 @@ lp_setup_is_resource_referenced( const struct lp_setup_context *setup,
    }
 
    /* check textures referenced by the scene */
-   for (i = 0; i < Elements(setup->scenes); i++) {
+   for (i = 0; i < ARRAY_SIZE(setup->scenes); i++) {
       if (lp_scene_is_resource_referenced(setup->scenes[i], texture)) {
          return LP_REFERENCED_FOR_READ;
       }
@@ -1067,7 +1073,7 @@ try_update_scene_state( struct lp_setup_context *setup )
    }
 
    if (setup->dirty & LP_SETUP_NEW_CONSTANTS) {
-      for (i = 0; i < Elements(setup->constants); ++i) {
+      for (i = 0; i < ARRAY_SIZE(setup->constants); ++i) {
          struct pipe_resource *buffer = setup->constants[i].current.buffer;
          const unsigned current_size = MIN2(setup->constants[i].current.buffer_size,
                                             LP_MAX_TGSI_CONST_BUFFER_SIZE);
@@ -1152,7 +1158,7 @@ try_update_scene_state( struct lp_setup_context *setup )
          /* The scene now references the textures in the rasterization
           * state record.  Note that now.
           */
-         for (i = 0; i < Elements(setup->fs.current_tex); i++) {
+         for (i = 0; i < ARRAY_SIZE(setup->fs.current_tex); i++) {
             if (setup->fs.current_tex[i]) {
                if (!lp_scene_add_resource_reference(scene,
                                                     setup->fs.current_tex[i],
@@ -1207,7 +1213,7 @@ lp_setup_update_state( struct lp_setup_context *setup,
       /* Will probably need to move this somewhere else, just need  
        * to know about vertex shader point size attribute.
        */
-      setup->psize = lp->psize_slot;
+      setup->psize_slot = lp->psize_slot;
       setup->viewport_index_slot = lp->viewport_index_slot;
       setup->layer_slot = lp->layer_slot;
       setup->face_slot = lp->face_slot;
@@ -1269,16 +1275,16 @@ lp_setup_destroy( struct lp_setup_context *setup )
 
    util_unreference_framebuffer_state(&setup->fb);
 
-   for (i = 0; i < Elements(setup->fs.current_tex); i++) {
+   for (i = 0; i < ARRAY_SIZE(setup->fs.current_tex); i++) {
       pipe_resource_reference(&setup->fs.current_tex[i], NULL);
    }
 
-   for (i = 0; i < Elements(setup->constants); i++) {
+   for (i = 0; i < ARRAY_SIZE(setup->constants); i++) {
       pipe_resource_reference(&setup->constants[i].current.buffer, NULL);
    }
 
    /* free the scenes in the 'empty' queue */
-   for (i = 0; i < Elements(setup->scenes); i++) {
+   for (i = 0; i < ARRAY_SIZE(setup->scenes); i++) {
       struct lp_scene *scene = setup->scenes[i];
 
       if (scene->fence)
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 98a9d4bc2..98243a12d 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -46,6 +46,9 @@
 
 #if defined(PIPE_ARCH_SSE)
 #include <emmintrin.h>
+#elif defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
+#include <altivec.h>
+#include "util/u_pwr8.h"
 #endif
 
 static inline int
@@ -65,11 +68,11 @@ fixed_to_float(int a)
 struct fixed_position {
    int32_t x[4];
    int32_t y[4];
-   int64_t area;
    int32_t dx01;
    int32_t dy01;
    int32_t dx20;
    int32_t dy20;
+   int64_t area;
 };
 
 
@@ -91,12 +94,14 @@ lp_setup_alloc_triangle(struct lp_scene *scene,
    unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane);
    struct lp_rast_triangle *tri;
 
+   STATIC_ASSERT(sizeof(struct lp_rast_plane) % 8 == 0);
+
    *tri_size = (sizeof(struct lp_rast_triangle) +
                 3 * input_array_sz +
                 plane_sz);
 
    tri = lp_scene_alloc_aligned( scene, *tri_size, 16 );
-   if (tri == NULL)
+   if (!tri)
       return NULL;
 
    tri->inputs.stride = input_array_sz;
@@ -276,6 +281,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
    int nr_planes = 3;
    unsigned viewport_index = 0;
    unsigned layer = 0;
+   const float (*pv)[4];
 
    /* Area should always be positive here */
    assert(position->area > 0);
@@ -283,18 +289,18 @@ do_triangle_ccw(struct lp_setup_context *setup,
    if (0)
       lp_setup_print_triangle(setup, v0, v1, v2);
 
-   if (setup->scissor_test) {
-      nr_planes = 7;
-      if (setup->viewport_index_slot > 0) {
-         unsigned *udata = (unsigned*)v0[setup->viewport_index_slot];
-         viewport_index = lp_clamp_viewport_idx(*udata);
-      }
+   if (setup->flatshade_first) {
+      pv = v0;
    }
    else {
-      nr_planes = 3;
+      pv = v2;
+   }
+   if (setup->viewport_index_slot > 0) {
+      unsigned *udata = (unsigned*)pv[setup->viewport_index_slot];
+      viewport_index = lp_clamp_viewport_idx(*udata);
    }
    if (setup->layer_slot > 0) {
-      layer = *(unsigned*)v1[setup->layer_slot];
+      layer = *(unsigned*)pv[setup->layer_slot];
       layer = MIN2(layer, scene->fb_max_layer);
    }
 
@@ -336,6 +342,18 @@ do_triangle_ccw(struct lp_setup_context *setup,
    bbox.x0 = MAX2(bbox.x0, 0);
    bbox.y0 = MAX2(bbox.y0, 0);
 
+   nr_planes = 3;
+   /*
+    * Determine how many scissor planes we need, that is drop scissor
+    * edges if the bounding box of the tri is fully inside that edge.
+    */
+   if (setup->scissor_test) {
+      /* why not just use draw_regions */
+      boolean s_planes[4];
+      scissor_planes_needed(s_planes, &bbox, &setup->scissors[viewport_index]);
+      nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
+   }
+
    tri = lp_setup_alloc_triangle(scene,
                                  key->num_inputs,
                                  nr_planes,
@@ -356,13 +374,11 @@ do_triangle_ccw(struct lp_setup_context *setup,
 
    /* Setup parameter interpolants:
     */
-   setup->setup.variant->jit_function( v0,
-				       v1,
-				       v2,
-				       frontfacing,
-				       GET_A0(&tri->inputs),
-				       GET_DADX(&tri->inputs),
-				       GET_DADY(&tri->inputs) );
+   setup->setup.variant->jit_function(v0, v1, v2,
+                                      frontfacing,
+                                      GET_A0(&tri->inputs),
+                                      GET_DADX(&tri->inputs),
+                                      GET_DADY(&tri->inputs));
 
    tri->inputs.frontfacing = frontfacing;
    tri->inputs.disable = FALSE;
@@ -372,32 +388,28 @@ do_triangle_ccw(struct lp_setup_context *setup,
 
    if (0)
       lp_dump_setup_coef(&setup->setup.variant->key,
-			 (const float (*)[4])GET_A0(&tri->inputs),
-			 (const float (*)[4])GET_DADX(&tri->inputs),
-			 (const float (*)[4])GET_DADY(&tri->inputs));
+                         (const float (*)[4])GET_A0(&tri->inputs),
+                         (const float (*)[4])GET_DADX(&tri->inputs),
+                         (const float (*)[4])GET_DADY(&tri->inputs));
 
    plane = GET_PLANES(tri);
 
 #if defined(PIPE_ARCH_SSE)
-   if (setup->fb.width <= MAX_FIXED_LENGTH32 &&
-       setup->fb.height <= MAX_FIXED_LENGTH32 &&
-       (bbox.x1 - bbox.x0) <= MAX_FIXED_LENGTH32 &&
-       (bbox.y1 - bbox.y0) <= MAX_FIXED_LENGTH32) {
+   if (1) {
       __m128i vertx, verty;
       __m128i shufx, shufy;
-      __m128i dcdx, dcdy, c;
-      __m128i unused;
+      __m128i dcdx, dcdy;
+      __m128i cdx02, cdx13, cdy02, cdy13, c02, c13;
+      __m128i c01, c23, unused;
       __m128i dcdx_neg_mask;
       __m128i dcdy_neg_mask;
       __m128i dcdx_zero_mask;
-      __m128i top_left_flag;
-      __m128i c_inc_mask, c_inc;
+      __m128i top_left_flag, c_dec;
       __m128i eo, p0, p1, p2;
       __m128i zero = _mm_setzero_si128();
-      PIPE_ALIGN_VAR(16) int32_t temp_vec[4];
 
-      vertx = _mm_loadu_si128((__m128i *)position->x); /* vertex x coords */
-      verty = _mm_loadu_si128((__m128i *)position->y); /* vertex y coords */
+      vertx = _mm_load_si128((__m128i *)position->x); /* vertex x coords */
+      verty = _mm_load_si128((__m128i *)position->y); /* vertex y coords */
 
       shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1));
       shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1));
@@ -411,42 +423,161 @@ do_triangle_ccw(struct lp_setup_context *setup,
 
       top_left_flag = _mm_set1_epi32((setup->bottom_edge_rule == 0) ? ~0 : 0);
 
-      c_inc_mask = _mm_or_si128(dcdx_neg_mask,
-                                _mm_and_si128(dcdx_zero_mask,
-                                              _mm_xor_si128(dcdy_neg_mask,
-                                                            top_left_flag)));
-
-      c_inc = _mm_srli_epi32(c_inc_mask, 31);
-
-      c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx),
-                        mm_mullo_epi32(dcdy, verty));
+      c_dec = _mm_or_si128(dcdx_neg_mask,
+                           _mm_and_si128(dcdx_zero_mask,
+                                         _mm_xor_si128(dcdy_neg_mask,
+                                                       top_left_flag)));
 
-      c = _mm_add_epi32(c, c_inc);
+      /*
+       * 64 bit arithmetic.
+       * Note we need _signed_ mul (_mm_mul_epi32) which we emulate.
+       */
+      cdx02 = mm_mullohi_epi32(dcdx, vertx, &cdx13);
+      cdy02 = mm_mullohi_epi32(dcdy, verty, &cdy13);
+      c02 = _mm_sub_epi64(cdx02, cdy02);
+      c13 = _mm_sub_epi64(cdx13, cdy13);
+      c02 = _mm_sub_epi64(c02, _mm_shuffle_epi32(c_dec,
+                                                 _MM_SHUFFLE(2,2,0,0)));
+      c13 = _mm_sub_epi64(c13, _mm_shuffle_epi32(c_dec,
+                                                 _MM_SHUFFLE(3,3,1,1)));
+
+      /*
+       * Useful for very small fbs/tris (or fewer subpixel bits) only:
+       * c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx),
+       *                   mm_mullo_epi32(dcdy, verty));
+       *
+       * c = _mm_sub_epi32(c, c_dec);
+       */
 
       /* Scale up to match c:
        */
       dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER);
       dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER);
 
-      /* Calculate trivial reject values:
+      /*
+       * Calculate trivial reject values:
+       * Note eo cannot overflow even if dcdx/dcdy would already have
+       * 31 bits (which they shouldn't have). This is because eo
+       * is never negative (albeit if we rely on that need to be careful...)
        */
       eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy),
                          _mm_and_si128(dcdx_neg_mask, dcdx));
 
       /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
 
+      /*
+       * Pointless transpose which gets undone immediately in
+       * rasterization.
+       * It is actually difficult to do away with it - would essentially
+       * need GET_PLANES_DX, GET_PLANES_DY etc., but the calculations
+       * for this then would need to depend on the number of planes.
+       * The transpose is quite special here due to c being 64bit...
+       * The store has to be unaligned (unless we'd make the plane size
+       * a multiple of 128), and of course storing eo separately...
+       */
+      c01 = _mm_unpacklo_epi64(c02, c13);
+      c23 = _mm_unpackhi_epi64(c02, c13);
+      transpose2_64_2_32(&c01, &c23, &dcdx, &dcdy,
+                         &p0, &p1, &p2, &unused);
+      _mm_storeu_si128((__m128i *)&plane[0], p0);
+      plane[0].eo = (uint32_t)_mm_cvtsi128_si32(eo);
+      _mm_storeu_si128((__m128i *)&plane[1], p1);
+      eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(3,2,0,1));
+      plane[1].eo = (uint32_t)_mm_cvtsi128_si32(eo);
+      _mm_storeu_si128((__m128i *)&plane[2], p2);
+      eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(0,0,0,2));
+      plane[2].eo = (uint32_t)_mm_cvtsi128_si32(eo);
+   } else
+#elif defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
+   /*
+    * XXX this code is effectively disabled for all practical purposes,
+    * as the allowed fb size is tiny if FIXED_ORDER is 8.
+    */
+   if (setup->fb.width <= MAX_FIXED_LENGTH32 &&
+       setup->fb.height <= MAX_FIXED_LENGTH32 &&
+       (bbox.x1 - bbox.x0) <= MAX_FIXED_LENGTH32 &&
+       (bbox.y1 - bbox.y0) <= MAX_FIXED_LENGTH32) {
+      unsigned int bottom_edge;
+      __m128i vertx, verty;
+      __m128i shufx, shufy;
+      __m128i dcdx, dcdy, c;
+      __m128i unused;
+      __m128i dcdx_neg_mask;
+      __m128i dcdy_neg_mask;
+      __m128i dcdx_zero_mask;
+      __m128i top_left_flag;
+      __m128i c_inc_mask, c_inc;
+      __m128i eo, p0, p1, p2;
+      __m128i_union vshuf_mask;
+      __m128i zero = vec_splats((unsigned char) 0);
+      PIPE_ALIGN_VAR(16) int32_t temp_vec[4];
+
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+      vshuf_mask.i[0] = 0x07060504;
+      vshuf_mask.i[1] = 0x0B0A0908;
+      vshuf_mask.i[2] = 0x03020100;
+      vshuf_mask.i[3] = 0x0F0E0D0C;
+#else
+      vshuf_mask.i[0] = 0x00010203;
+      vshuf_mask.i[1] = 0x0C0D0E0F;
+      vshuf_mask.i[2] = 0x04050607;
+      vshuf_mask.i[3] = 0x08090A0B;
+#endif
+
+      /* vertex x coords */
+      vertx = vec_load_si128((const uint32_t *) position->x);
+      /* vertex y coords */
+      verty = vec_load_si128((const uint32_t *) position->y);
+
+      shufx = vec_perm (vertx, vertx, vshuf_mask.m128i);
+      shufy = vec_perm (verty, verty, vshuf_mask.m128i);
+
+      dcdx = vec_sub_epi32(verty, shufy);
+      dcdy = vec_sub_epi32(vertx, shufx);
+
+      dcdx_neg_mask = vec_srai_epi32(dcdx, 31);
+      dcdx_zero_mask = vec_cmpeq_epi32(dcdx, zero);
+      dcdy_neg_mask = vec_srai_epi32(dcdy, 31);
+
+      bottom_edge = (setup->bottom_edge_rule == 0) ? ~0 : 0;
+      top_left_flag = (__m128i) vec_splats(bottom_edge);
+
+      c_inc_mask = vec_or(dcdx_neg_mask,
+                                vec_and(dcdx_zero_mask,
+                                              vec_xor(dcdy_neg_mask,
+                                                            top_left_flag)));
+
+      c_inc = vec_srli_epi32(c_inc_mask, 31);
+
+      c = vec_sub_epi32(vec_mullo_epi32(dcdx, vertx),
+                        vec_mullo_epi32(dcdy, verty));
+
+      c = vec_add_epi32(c, c_inc);
+
+      /* Scale up to match c:
+       */
+      dcdx = vec_slli_epi32(dcdx, FIXED_ORDER);
+      dcdy = vec_slli_epi32(dcdy, FIXED_ORDER);
+
+      /* Calculate trivial reject values:
+       */
+      eo = vec_sub_epi32(vec_andnot_si128(dcdy_neg_mask, dcdy),
+                         vec_and(dcdx_neg_mask, dcdx));
+
+      /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
+
       /* Pointless transpose which gets undone immediately in
        * rasterization:
        */
       transpose4_epi32(&c, &dcdx, &dcdy, &eo,
                        &p0, &p1, &p2, &unused);
 
-#define STORE_PLANE(plane, vec) do {                 \
-         _mm_store_si128((__m128i *)&temp_vec, vec); \
-         plane.c    = (int64_t)temp_vec[0];          \
-         plane.dcdx = temp_vec[1];                   \
-         plane.dcdy = temp_vec[2];                   \
-         plane.eo   = temp_vec[3];                   \
+#define STORE_PLANE(plane, vec) do {                  \
+         vec_store_si128((uint32_t *)&temp_vec, vec); \
+         plane.c    = (int64_t)temp_vec[0];           \
+         plane.dcdx = temp_vec[1];                    \
+         plane.dcdy = temp_vec[2];                    \
+         plane.eo   = temp_vec[3];                    \
       } while(0)
 
       STORE_PLANE(plane[0], p0);
@@ -465,17 +596,17 @@ do_triangle_ccw(struct lp_setup_context *setup,
       plane[2].dcdx = position->dy20;
   
       for (i = 0; i < 3; i++) {
-         /* half-edge constants, will be interated over the whole render
+         /* half-edge constants, will be iterated over the whole render
           * target.
           */
          plane[i].c = IMUL64(plane[i].dcdx, position->x[i]) -
-               IMUL64(plane[i].dcdy, position->y[i]);
+                      IMUL64(plane[i].dcdy, position->y[i]);
 
          /* correct for top-left vs. bottom-left fill convention.
-          */         
+          */
          if (plane[i].dcdx < 0) {
             /* both fill conventions want this - adjust for left edges */
-            plane[i].c++;            
+            plane[i].c++;
          }
          else if (plane[i].dcdx == 0) {
             if (setup->bottom_edge_rule == 0){
@@ -509,19 +640,19 @@ do_triangle_ccw(struct lp_setup_context *setup,
    }
 
    if (0) {
-      debug_printf("p0: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
+      debug_printf("p0: %"PRIx64"/%08x/%08x/%08x\n",
                    plane[0].c,
                    plane[0].dcdx,
                    plane[0].dcdy,
                    plane[0].eo);
-      
-      debug_printf("p1: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
+
+      debug_printf("p1: %"PRIx64"/%08x/%08x/%08x\n",
                    plane[1].c,
                    plane[1].dcdx,
                    plane[1].dcdy,
                    plane[1].eo);
-      
-      debug_printf("p2: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
+
+      debug_printf("p2: %"PRIx64"/%08x/%08x/%08x\n",
                    plane[2].c,
                    plane[2].dcdx,
                    plane[2].dcdy,
@@ -546,29 +677,46 @@ do_triangle_ccw(struct lp_setup_context *setup,
     * Note that otherwise, the scissor planes only vary in 'C' value,
     * and even then only on state-changes.  Could alternatively store
     * these planes elsewhere.
+    * (Or only store the c value together with a bit indicating which
+    * scissor edge this is, so rasterization would treat them differently
+    * (easier to evaluate) to ordinary planes.)
     */
-   if (nr_planes == 7) {
+   if (nr_planes > 3) {
+      /* why not just use draw_regions */
       const struct u_rect *scissor = &setup->scissors[viewport_index];
-
-      plane[3].dcdx = -1;
-      plane[3].dcdy = 0;
-      plane[3].c = 1-scissor->x0;
-      plane[3].eo = 1;
-
-      plane[4].dcdx = 1;
-      plane[4].dcdy = 0;
-      plane[4].c = scissor->x1+1;
-      plane[4].eo = 0;
-
-      plane[5].dcdx = 0;
-      plane[5].dcdy = 1;
-      plane[5].c = 1-scissor->y0;
-      plane[5].eo = 1;
-
-      plane[6].dcdx = 0;
-      plane[6].dcdy = -1;
-      plane[6].c = scissor->y1+1;
-      plane[6].eo = 0;
+      struct lp_rast_plane *plane_s = &plane[3];
+      boolean s_planes[4];
+      scissor_planes_needed(s_planes, &bbox, scissor);
+
+      if (s_planes[0]) {
+         plane_s->dcdx = -1 << 8;
+         plane_s->dcdy = 0;
+         plane_s->c = (1-scissor->x0) << 8;
+         plane_s->eo = 1 << 8;
+         plane_s++;
+      }
+      if (s_planes[1]) {
+         plane_s->dcdx = 1 << 8;
+         plane_s->dcdy = 0;
+         plane_s->c = (scissor->x1+1) << 8;
+         plane_s->eo = 0 << 8;
+         plane_s++;
+      }
+      if (s_planes[2]) {
+         plane_s->dcdx = 0;
+         plane_s->dcdy = 1 << 8;
+         plane_s->c = (1-scissor->y0) << 8;
+         plane_s->eo = 1 << 8;
+         plane_s++;
+      }
+      if (s_planes[3]) {
+         plane_s->dcdx = 0;
+         plane_s->dcdy = -1 << 8;
+         plane_s->c = (scissor->y1+1) << 8;
+         plane_s->eo = 0;
+         plane_s++;
+      }
+      assert(plane_s == &plane[nr_planes]);
    }
 
    return lp_setup_bin_triangle(setup, tri, &bbox, nr_planes, viewport_index);
@@ -582,7 +730,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
 static inline uint32_t 
 floor_pot(uint32_t n)
 {
-#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
+#if defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
    if (n == 0)
       return 0;
 
@@ -730,9 +878,9 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
 
          ei[i] = (plane[i].dcdy - 
                   plane[i].dcdx - 
-                  plane[i].eo) << TILE_ORDER;
+                  (int64_t)plane[i].eo) << TILE_ORDER;
 
-         eo[i] = plane[i].eo << TILE_ORDER;
+         eo[i] = (int64_t)plane[i].eo << TILE_ORDER;
          xstep[i] = -(((int64_t)plane[i].dcdx) << TILE_ORDER);
          ystep[i] = ((int64_t)plane[i].dcdy) << TILE_ORDER;
       }
@@ -840,29 +988,70 @@ static void retry_triangle_ccw( struct lp_setup_context *setup,
 
 /**
  * Calculate fixed position data for a triangle
+ * It is unfortunate we need to do that here (as we need area
+ * calculated in fixed point), as there's quite some code duplication
+ * to what is done in the jit setup prog.
  */
 static inline void
-calc_fixed_position( struct lp_setup_context *setup,
-                     struct fixed_position* position,
-                     const float (*v0)[4],
-                     const float (*v1)[4],
-                     const float (*v2)[4])
+calc_fixed_position(struct lp_setup_context *setup,
+                    struct fixed_position* position,
+                    const float (*v0)[4],
+                    const float (*v1)[4],
+                    const float (*v2)[4])
 {
+   /*
+    * The rounding may not be quite the same with PIPE_ARCH_SSE
+    * (util_iround right now only does nearest/even on x87,
+    * otherwise nearest/away-from-zero).
+    * Both should be acceptable, I think.
+    */
+#if defined(PIPE_ARCH_SSE)
+   __m128 v0r, v1r;
+   __m128 vxy0xy2, vxy1xy0;
+   __m128i vxy0xy2i, vxy1xy0i;
+   __m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120;
+   __m128 pix_offset = _mm_set1_ps(setup->pixel_offset);
+   __m128 fixed_one = _mm_set1_ps((float)FIXED_ONE);
+   v0r = _mm_castpd_ps(_mm_load_sd((double *)v0[0]));
+   vxy0xy2 = _mm_loadh_pi(v0r, (__m64 *)v2[0]);
+   v1r = _mm_castpd_ps(_mm_load_sd((double *)v1[0]));
+   vxy1xy0 = _mm_movelh_ps(v1r, vxy0xy2);
+   vxy0xy2 = _mm_sub_ps(vxy0xy2, pix_offset);
+   vxy1xy0 = _mm_sub_ps(vxy1xy0, pix_offset);
+   vxy0xy2 = _mm_mul_ps(vxy0xy2, fixed_one);
+   vxy1xy0 = _mm_mul_ps(vxy1xy0, fixed_one);
+   vxy0xy2i = _mm_cvtps_epi32(vxy0xy2);
+   vxy1xy0i = _mm_cvtps_epi32(vxy1xy0);
+   dxdy0120 = _mm_sub_epi32(vxy0xy2i, vxy1xy0i);
+   _mm_store_si128((__m128i *)&position->dx01, dxdy0120);
+   /*
+    * For the mul, would need some more shuffles, plus emulation
+    * for the signed mul (without sse41), so don't bother.
+    */
+   x0x2y0y2 = _mm_shuffle_epi32(vxy0xy2i, _MM_SHUFFLE(3,1,2,0));
+   x1x0y1y0 = _mm_shuffle_epi32(vxy1xy0i, _MM_SHUFFLE(3,1,2,0));
+   x0120 = _mm_unpacklo_epi32(x0x2y0y2, x1x0y1y0);
+   y0120 = _mm_unpackhi_epi32(x0x2y0y2, x1x0y1y0);
+   _mm_store_si128((__m128i *)&position->x[0], x0120);
+   _mm_store_si128((__m128i *)&position->y[0], y0120);
+
+#else
    position->x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset);
    position->x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset);
    position->x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset);
-   position->x[3] = 0;
+   position->x[3] = 0; // should be unused
 
    position->y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset);
    position->y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset);
    position->y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset);
-   position->y[3] = 0;
+   position->y[3] = 0; // should be unused
 
    position->dx01 = position->x[0] - position->x[1];
    position->dy01 = position->y[0] - position->y[1];
 
    position->dx20 = position->x[2] - position->x[0];
    position->dy20 = position->y[2] - position->y[0];
+#endif
 
    position->area = IMUL64(position->dx01, position->dy20) -
          IMUL64(position->dx20, position->dy01);
@@ -924,12 +1113,12 @@ rotate_fixed_position_12( struct fixed_position* position )
 /**
  * Draw triangle if it's CW, cull otherwise.
  */
-static void triangle_cw( struct lp_setup_context *setup,
-			 const float (*v0)[4],
-			 const float (*v1)[4],
-			 const float (*v2)[4] )
+static void triangle_cw(struct lp_setup_context *setup,
+                        const float (*v0)[4],
+                        const float (*v1)[4],
+                        const float (*v2)[4])
 {
-   struct fixed_position position;
+   PIPE_ALIGN_VAR(16) struct fixed_position position;
 
    calc_fixed_position(setup, &position, v0, v1, v2);
 
@@ -945,12 +1134,12 @@ static void triangle_cw( struct lp_setup_context *setup,
 }
 
 
-static void triangle_ccw( struct lp_setup_context *setup,
-                          const float (*v0)[4],
-                          const float (*v1)[4],
-                          const float (*v2)[4])
+static void triangle_ccw(struct lp_setup_context *setup,
+                         const float (*v0)[4],
+                         const float (*v1)[4],
+                         const float (*v2)[4])
 {
-   struct fixed_position position;
+   PIPE_ALIGN_VAR(16) struct fixed_position position;
 
    calc_fixed_position(setup, &position, v0, v1, v2);
 
@@ -961,12 +1150,12 @@ static void triangle_ccw( struct lp_setup_context *setup,
 /**
  * Draw triangle whether it's CW or CCW.
  */
-static void triangle_both( struct lp_setup_context *setup,
-			   const float (*v0)[4],
-			   const float (*v1)[4],
-			   const float (*v2)[4] )
+static void triangle_both(struct lp_setup_context *setup,
+                          const float (*v0)[4],
+                          const float (*v1)[4],
+                          const float (*v2)[4])
 {
-   struct fixed_position position;
+   PIPE_ALIGN_VAR(16) struct fixed_position position;
    struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
 
    if (lp_context->active_statistics_queries &&
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c
index fd6c49aac..3428eed4e 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -134,7 +134,7 @@ generate_quad_mask(struct gallivm_state *gallivm,
     * XXX: We'll need a different path for 16 x u8
     */
    assert(fs_type.width == 32);
-   assert(fs_type.length <= Elements(bits));
+   assert(fs_type.length <= ARRAY_SIZE(bits));
    mask_type = lp_int_type(fs_type);
 
    /*
@@ -238,6 +238,54 @@ lp_llvm_viewport(LLVMValueRef context_ptr,
 }
 
 
+static LLVMValueRef
+lp_build_depth_clamp(struct gallivm_state *gallivm,
+                     LLVMBuilderRef builder,
+                     struct lp_type type,
+                     LLVMValueRef context_ptr,
+                     LLVMValueRef thread_data_ptr,
+                     LLVMValueRef z)
+{
+   LLVMValueRef viewport, min_depth, max_depth;
+   LLVMValueRef viewport_index;
+   struct lp_build_context f32_bld;
+
+   assert(type.floating);
+   lp_build_context_init(&f32_bld, gallivm, type);
+
+   /*
+    * Assumes clamping of the viewport index will occur in setup/gs. Value
+    * is passed through the rasterization stage via lp_rast_shader_inputs.
+    *
+    * See: draw_clamp_viewport_idx and lp_clamp_viewport_idx for clamping
+    *      semantics.
+    */
+   viewport_index = lp_jit_thread_data_raster_state_viewport_index(gallivm,
+                       thread_data_ptr);
+
+   /*
+    * Load the min and max depth from the lp_jit_context.viewports
+    * array of lp_jit_viewport structures.
+    */
+   viewport = lp_llvm_viewport(context_ptr, gallivm, viewport_index);
+
+   /* viewports[viewport_index].min_depth */
+   min_depth = LLVMBuildExtractElement(builder, viewport,
+                  lp_build_const_int32(gallivm, LP_JIT_VIEWPORT_MIN_DEPTH), "");
+   min_depth = lp_build_broadcast_scalar(&f32_bld, min_depth);
+
+   /* viewports[viewport_index].max_depth */
+   max_depth = LLVMBuildExtractElement(builder, viewport,
+                  lp_build_const_int32(gallivm, LP_JIT_VIEWPORT_MAX_DEPTH), "");
+   max_depth = lp_build_broadcast_scalar(&f32_bld, max_depth);
+
+   /*
+    * Clamp to the min and max depth values for the given viewport.
+    */
+   return lp_build_clamp(&f32_bld, z, min_depth, max_depth);
+}
+
+
 /**
  * Generate the fragment shader, depth/stencil test, and alpha tests.
  */
@@ -383,6 +431,13 @@ generate_fs_loop(struct gallivm_state *gallivm,
    z = interp->pos[2];
 
    if (depth_mode & EARLY_DEPTH_TEST) {
+      /*
+       * Clamp according to ARB_depth_clamp semantics.
+       */
+      if (key->depth_clamp) {
+         z = lp_build_depth_clamp(gallivm, builder, type, context_ptr,
+                                  thread_data_ptr, z);
+      }
       lp_build_depth_stencil_load_swizzled(gallivm, type,
                                            zs_format_desc, key->resource_1d,
                                            depth_ptr, depth_stride,
@@ -421,7 +476,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
    lp_build_tgsi_soa(gallivm, tokens, type, &mask,
                      consts_ptr, num_consts_ptr, &system_values,
                      interp->inputs,
-                     outputs, context_ptr,
+                     outputs, context_ptr, thread_data_ptr,
                      sampler, &shader->info.base, NULL);
 
    /* Alpha test */
@@ -471,51 +526,13 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                           0);
       if (pos0 != -1 && outputs[pos0][2]) {
          z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
-
-         /*
-          * Clamp according to ARB_depth_clamp semantics.
-          */
-         if (key->depth_clamp) {
-            LLVMValueRef viewport, min_depth, max_depth;
-            LLVMValueRef viewport_index;
-            struct lp_build_context f32_bld;
-
-            assert(type.floating);
-            lp_build_context_init(&f32_bld, gallivm, type);
-
-            /*
-             * Assumes clamping of the viewport index will occur in setup/gs. Value
-             * is passed through the rasterization stage via lp_rast_shader_inputs.
-             *
-             * See: draw_clamp_viewport_idx and lp_clamp_viewport_idx for clamping
-             *      semantics.
-             */
-            viewport_index = lp_jit_thread_data_raster_state_viewport_index(gallivm,
-                                thread_data_ptr);
-
-            /*
-             * Load the min and max depth from the lp_jit_context.viewports
-             * array of lp_jit_viewport structures.
-             */
-            viewport = lp_llvm_viewport(context_ptr, gallivm, viewport_index);
-
-            /* viewports[viewport_index].min_depth */
-            min_depth = LLVMBuildExtractElement(builder, viewport,
-                           lp_build_const_int32(gallivm, LP_JIT_VIEWPORT_MIN_DEPTH),
-                           "");
-            min_depth = lp_build_broadcast_scalar(&f32_bld, min_depth);
-
-            /* viewports[viewport_index].max_depth */
-            max_depth = LLVMBuildExtractElement(builder, viewport,
-                           lp_build_const_int32(gallivm, LP_JIT_VIEWPORT_MAX_DEPTH),
-                           "");
-            max_depth = lp_build_broadcast_scalar(&f32_bld, max_depth);
-
-            /*
-             * Clamp to the min and max depth values for the given viewport.
-             */
-            z = lp_build_clamp(&f32_bld, z, min_depth, max_depth);
-         }
+      }
+      /*
+       * Clamp according to ARB_depth_clamp semantics.
+       */
+      if (key->depth_clamp) {
+         z = lp_build_depth_clamp(gallivm, builder, type, context_ptr,
+                                  thread_data_ptr, z);
       }
 
       if (s_out != -1 && outputs[s_out][1]) {
@@ -646,7 +663,7 @@ generate_fs_twiddle(struct gallivm_state *gallivm,
    src_count = num_fs * src_channels;
 
    assert(pixels == 2 || pixels == 1);
-   assert(num_fs * src_channels <= Elements(src));
+   assert(num_fs * src_channels <= ARRAY_SIZE(src));
 
    /*
     * Transpose from SoA -> AoS
@@ -786,7 +803,7 @@ load_unswizzled_block(struct gallivm_state *gallivm,
 
       dst[i] = LLVMBuildLoad(builder, dst_ptr, "");
 
-      lp_set_load_alignment(dst[i], dst_alignment);
+      LLVMSetAlignment(dst[i], dst_alignment);
    }
 }
 
@@ -830,7 +847,7 @@ store_unswizzled_block(struct gallivm_state *gallivm,
 
       src_ptr = LLVMBuildStore(builder, src[i], src_ptr);
 
-      lp_set_store_alignment(src_ptr, src_alignment);
+      LLVMSetAlignment(src_ptr, src_alignment);
    }
 }
 
@@ -1601,7 +1618,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
    LLVMValueRef fs_src[4][TGSI_NUM_CHANNELS];
    LLVMValueRef fs_src1[4][TGSI_NUM_CHANNELS];
    LLVMValueRef src_alpha[4 * 4];
-   LLVMValueRef src1_alpha[4 * 4];
+   LLVMValueRef src1_alpha[4 * 4] = { NULL };
    LLVMValueRef src_mask[4 * 4];
    LLVMValueRef src[4 * 4];
    LLVMValueRef src1[4 * 4];
@@ -2267,7 +2284,7 @@ generate_fragment(struct llvmpipe_context *lp,
    arg_types[12] = int32_type;                         /* depth_stride */
 
    func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
-                                arg_types, Elements(arg_types), 0);
+                                arg_types, ARRAY_SIZE(arg_types), 0);
 
    function = LLVMAddFunction(gallivm->module, func_name, func_type);
    LLVMSetFunctionCallConv(function, LLVMCCallConv);
@@ -2277,7 +2294,7 @@ generate_fragment(struct llvmpipe_context *lp,
    /* XXX: need to propagate noalias down into color param now we are
     * passing a pointer-to-pointer?
     */
-   for(i = 0; i < Elements(arg_types); ++i)
+   for(i = 0; i < ARRAY_SIZE(arg_types); ++i)
       if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
          LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute);
 
@@ -2303,8 +2320,8 @@ generate_fragment(struct llvmpipe_context *lp,
    lp_build_name(dady_ptr, "dady");
    lp_build_name(color_ptr_ptr, "color_ptr_ptr");
    lp_build_name(depth_ptr, "depth");
-   lp_build_name(thread_data_ptr, "thread_data");
    lp_build_name(mask_input, "mask_input");
+   lp_build_name(thread_data_ptr, "thread_data");
    lp_build_name(stride_ptr, "stride_ptr");
    lp_build_name(depth_stride, "depth_stride");
 
@@ -2344,6 +2361,7 @@ generate_fragment(struct llvmpipe_context *lp,
                                shader->info.base.num_inputs,
                                inputs,
                                pixel_center_integer,
+                               key->depth_clamp,
                                builder, fs_type,
                                a0_ptr, dadx_ptr, dady_ptr,
                                x, y);
@@ -2563,7 +2581,7 @@ generate_variant(struct llvmpipe_context *lp,
    char module_name[64];
 
    variant = CALLOC_STRUCT(lp_fragment_shader_variant);
-   if(!variant)
+   if (!variant)
       return NULL;
 
    util_snprintf(module_name, sizeof(module_name), "fs%u_variant%u",
@@ -2695,34 +2713,35 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
 
       switch (shader->info.base.input_interpolate[i]) {
       case TGSI_INTERPOLATE_CONSTANT:
-	 shader->inputs[i].interp = LP_INTERP_CONSTANT;
-	 break;
+         shader->inputs[i].interp = LP_INTERP_CONSTANT;
+         break;
       case TGSI_INTERPOLATE_LINEAR:
-	 shader->inputs[i].interp = LP_INTERP_LINEAR;
-	 break;
+         shader->inputs[i].interp = LP_INTERP_LINEAR;
+         break;
       case TGSI_INTERPOLATE_PERSPECTIVE:
-	 shader->inputs[i].interp = LP_INTERP_PERSPECTIVE;
-	 break;
+         shader->inputs[i].interp = LP_INTERP_PERSPECTIVE;
+         break;
       case TGSI_INTERPOLATE_COLOR:
-	 shader->inputs[i].interp = LP_INTERP_COLOR;
-	 break;
+         shader->inputs[i].interp = LP_INTERP_COLOR;
+         break;
       default:
-	 assert(0);
-	 break;
+         assert(0);
+         break;
       }
 
       switch (shader->info.base.input_semantic_name[i]) {
       case TGSI_SEMANTIC_FACE:
-	 shader->inputs[i].interp = LP_INTERP_FACING;
-	 break;
+         shader->inputs[i].interp = LP_INTERP_FACING;
+         break;
       case TGSI_SEMANTIC_POSITION:
-	 /* Position was already emitted above
-	  */
-	 shader->inputs[i].interp = LP_INTERP_POSITION;
-	 shader->inputs[i].src_index = 0;
-	 continue;
+         /* Position was already emitted above
+          */
+         shader->inputs[i].interp = LP_INTERP_POSITION;
+         shader->inputs[i].src_index = 0;
+         continue;
       }
 
+      /* XXX this is a completely pointless index map... */
       shader->inputs[i].src_index = i+1;
    }
 
@@ -2835,17 +2854,24 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
 static void
 llvmpipe_set_constant_buffer(struct pipe_context *pipe,
                              uint shader, uint index,
-                             struct pipe_constant_buffer *cb)
+                             const struct pipe_constant_buffer *cb)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
    struct pipe_resource *constants = cb ? cb->buffer : NULL;
 
    assert(shader < PIPE_SHADER_TYPES);
-   assert(index < Elements(llvmpipe->constants[shader]));
+   assert(index < ARRAY_SIZE(llvmpipe->constants[shader]));
 
    /* note: reference counting */
    util_copy_constant_buffer(&llvmpipe->constants[shader][index], cb);
 
+   if (constants) {
+       if (!(constants->bind & PIPE_BIND_CONSTANT_BUFFER)) {
+         debug_printf("Illegal set constant without bind flag\n");
+         constants->bind |= PIPE_BIND_CONSTANT_BUFFER;
+      }
+   }
+
    if (shader == PIPE_SHADER_VERTEX ||
        shader == PIPE_SHADER_GEOMETRY) {
       /* Pass the constants to the 'draw' module */
@@ -2868,8 +2894,9 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
       draw_set_mapped_constant_buffer(llvmpipe->draw, shader,
                                       index, data, size);
    }
-
-   llvmpipe->dirty |= LP_NEW_CONSTANTS;
+   else {
+      llvmpipe->dirty |= LP_NEW_FS_CONSTANTS;
+   }
 
    if (cb && cb->user_buffer) {
       pipe_resource_reference(&constants, NULL);
@@ -2940,6 +2967,13 @@ make_variant_key(struct llvmpipe_context *lp,
     * depth_clip == 0 implies depth clamping is enabled.
     *
     * When clip_halfz is enabled, then always clamp the depth values.
+    *
+    * XXX: This is incorrect for GL, but correct for d3d10 (depth
+    * clamp is always active in d3d10, regardless if depth clip is
+    * enabled or not).
+    * (GL has an always-on [0,1] clamp on fs depth output instead
+    * to ensure the depth values stay in range. Doesn't look like
+    * we do that, though...)
     */
    if (lp->rasterizer->clip_halfz) {
       key->depth_clamp = 1;
@@ -3026,7 +3060,7 @@ make_variant_key(struct llvmpipe_context *lp,
           * Also, force rgb/alpha func/factors match, to make AoS blending
           * easier.
           */
-         if (format_desc->swizzle[3] > UTIL_FORMAT_SWIZZLE_W ||
+         if (format_desc->swizzle[3] > PIPE_SWIZZLE_W ||
              format_desc->swizzle[3] == format_desc->swizzle[0]) {
             /* Doesn't cover mixed snorm/unorm but can't render to them anyway */
             boolean clamped_zero = !util_format_is_float(format) &&
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index b205f02fd..01af05211 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -66,7 +66,7 @@ llvmpipe_create_sampler_state(struct pipe_context *pipe,
 
 static void
 llvmpipe_bind_sampler_states(struct pipe_context *pipe,
-                             unsigned shader,
+                             enum pipe_shader_type shader,
                              unsigned start,
                              unsigned num,
                              void **samplers)
@@ -75,7 +75,7 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe,
    unsigned i;
 
    assert(shader < PIPE_SHADER_TYPES);
-   assert(start + num <= Elements(llvmpipe->samplers[shader]));
+   assert(start + num <= ARRAY_SIZE(llvmpipe->samplers[shader]));
 
    draw_flush(llvmpipe->draw);
 
@@ -98,14 +98,15 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe,
                         llvmpipe->samplers[shader],
                         llvmpipe->num_samplers[shader]);
    }
-
-   llvmpipe->dirty |= LP_NEW_SAMPLER;
+   else {
+      llvmpipe->dirty |= LP_NEW_SAMPLER;
+   }
 }
 
 
 static void
 llvmpipe_set_sampler_views(struct pipe_context *pipe,
-                           unsigned shader,
+                           enum pipe_shader_type shader,
                            unsigned start,
                            unsigned num,
                            struct pipe_sampler_view **views)
@@ -116,7 +117,7 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe,
    assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
 
    assert(shader < PIPE_SHADER_TYPES);
-   assert(start + num <= Elements(llvmpipe->sampler_views[shader]));
+   assert(start + num <= ARRAY_SIZE(llvmpipe->sampler_views[shader]));
 
    draw_flush(llvmpipe->draw);
 
@@ -128,6 +129,15 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe,
        */
       pipe_sampler_view_release(pipe,
                                 &llvmpipe->sampler_views[shader][start + i]);
+      /*
+       * Warn if someone tries to set a view created in a different context
+       * (which is why we need the hack above in the first place).
+       * An assert would be better but st/mesa relies on it...
+       */
+      if (views[i] && views[i]->context != pipe) {
+         debug_printf("Illegal setting of sampler_view %d created in another "
+                      "context\n", i);
+      }
       pipe_sampler_view_reference(&llvmpipe->sampler_views[shader][start + i],
                                   views[i]);
    }
@@ -146,8 +156,9 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe,
                              llvmpipe->sampler_views[shader],
                              llvmpipe->num_sampler_views[shader]);
    }
-
-   llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW;
+   else {
+      llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW;
+   }
 }
 
 
@@ -158,11 +169,13 @@ llvmpipe_create_sampler_view(struct pipe_context *pipe,
 {
    struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
    /*
-    * XXX we REALLY want to see the correct bind flag here but the OpenGL
-    * state tracker can't guarantee that at least for texture buffer objects.
+    * XXX: bind flags from OpenGL state tracker are notoriously unreliable.
+    * This looks unfixable, so fix the bind flags instead when it happens.
     */
-   if (!(texture->bind & PIPE_BIND_SAMPLER_VIEW))
+   if (!(texture->bind & PIPE_BIND_SAMPLER_VIEW)) {
       debug_printf("Illegal sampler view creation without bind flag\n");
+      texture->bind |= PIPE_BIND_SAMPLER_VIEW;
+   }
 
    if (view) {
       *view = *templ;
@@ -228,8 +241,7 @@ prepare_shader_sampling(
    struct llvmpipe_context *lp,
    unsigned num,
    struct pipe_sampler_view **views,
-   unsigned shader_type,
-   struct pipe_resource *mapped_tex[PIPE_MAX_SHADER_SAMPLER_VIEWS])
+   unsigned shader_type)
 {
 
    unsigned i;
@@ -242,7 +254,7 @@ prepare_shader_sampling(
    if (!num)
       return;
 
-   for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
+   for (i = 0; i < num; i++) {
       struct pipe_sampler_view *view = i < num ? views[i] : NULL;
 
       if (view) {
@@ -253,11 +265,6 @@ prepare_shader_sampling(
          unsigned first_level = 0;
          unsigned last_level = 0;
 
-         /* We're referencing the texture's internal data, so save a
-          * reference to it.
-          */
-         pipe_resource_reference(&mapped_tex[i], tex);
-
          if (!lp_tex->dt) {
             /* regular texture - setup array of mipmap level offsets */
             struct pipe_resource *res = view->texture;
@@ -275,10 +282,10 @@ prepare_shader_sampling(
                   row_stride[j] = lp_tex->row_stride[j];
                   img_stride[j] = lp_tex->img_stride[j];
                }
-               if (view->target == PIPE_TEXTURE_1D_ARRAY ||
-                   view->target == PIPE_TEXTURE_2D_ARRAY ||
-                   view->target == PIPE_TEXTURE_CUBE ||
-                   view->target == PIPE_TEXTURE_CUBE_ARRAY) {
+               if (tex->target == PIPE_TEXTURE_1D_ARRAY ||
+                   tex->target == PIPE_TEXTURE_2D_ARRAY ||
+                   tex->target == PIPE_TEXTURE_CUBE ||
+                   tex->target == PIPE_TEXTURE_CUBE_ARRAY) {
                   num_layers = view->u.tex.last_layer - view->u.tex.first_layer + 1;
                   for (j = first_level; j <= last_level; j++) {
                      mip_offsets[j] += view->u.tex.first_layer *
@@ -301,11 +308,9 @@ prepare_shader_sampling(
                img_stride[0] = 0;
 
                /* everything specified in number of elements here. */
-               width0 = view->u.buf.last_element - view->u.buf.first_element + 1;
-               addr = (uint8_t *)addr + view->u.buf.first_element *
-                               view_blocksize;
-               assert(view->u.buf.first_element <= view->u.buf.last_element);
-               assert(view->u.buf.last_element * view_blocksize < res->width0);
+               width0 = view->u.buf.size / view_blocksize;
+               addr = (uint8_t *)addr + view->u.buf.offset;
+               assert(view->u.buf.offset + view->u.buf.size <= res->width0);
             }
          }
          else {
@@ -335,47 +340,28 @@ prepare_shader_sampling(
 
 
 /**
- * Called during state validation when LP_NEW_SAMPLER_VIEW is set.
+ * Called whenever we're about to draw (no dirty flag, FIXME?).
  */
 void
 llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *lp,
                                  unsigned num,
                                  struct pipe_sampler_view **views)
 {
-   prepare_shader_sampling(lp, num, views, PIPE_SHADER_VERTEX,
-                           lp->mapped_vs_tex);
-}
-
-void
-llvmpipe_cleanup_vertex_sampling(struct llvmpipe_context *ctx)
-{
-   unsigned i;
-   for (i = 0; i < Elements(ctx->mapped_vs_tex); i++) {
-      pipe_resource_reference(&ctx->mapped_vs_tex[i], NULL);
-   }
+   prepare_shader_sampling(lp, num, views, PIPE_SHADER_VERTEX);
 }
 
 
 /**
- * Called during state validation when LP_NEW_SAMPLER_VIEW is set.
+ * Called whenever we're about to draw (no dirty flag, FIXME?).
  */
 void
 llvmpipe_prepare_geometry_sampling(struct llvmpipe_context *lp,
                                    unsigned num,
                                    struct pipe_sampler_view **views)
 {
-   prepare_shader_sampling(lp, num, views, PIPE_SHADER_GEOMETRY,
-                           lp->mapped_gs_tex);
+   prepare_shader_sampling(lp, num, views, PIPE_SHADER_GEOMETRY);
 }
 
-void
-llvmpipe_cleanup_geometry_sampling(struct llvmpipe_context *ctx)
-{
-   unsigned i;
-   for (i = 0; i < Elements(ctx->mapped_gs_tex); i++) {
-      pipe_resource_reference(&ctx->mapped_gs_tex[i], NULL);
-   }
-}
 
 void
 llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe)
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c
index 6397b5196..a57e2f04b 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -135,8 +135,8 @@ emit_facing_coef(struct gallivm_state *gallivm,
    LLVMValueRef a0_0 = args->facing;
    LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, float_type, "");
    LLVMValueRef a0, face_val;
-   const unsigned char swizzles[4] = { PIPE_SWIZZLE_RED, PIPE_SWIZZLE_ZERO,
-                                       PIPE_SWIZZLE_ZERO, PIPE_SWIZZLE_ZERO };
+   const unsigned char swizzles[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_0,
+                                       PIPE_SWIZZLE_0, PIPE_SWIZZLE_0 };
    /* Our face val is either 1 or 0 so we do
     * face = (val * 2) - 1
     * to make it 1 or -1
@@ -723,7 +723,7 @@ generate_setup_variant(struct lp_setup_variant_key *key,
       goto fail;
 
    variant = CALLOC_STRUCT(lp_setup_variant);
-   if (variant == NULL)
+   if (!variant)
       goto fail;
 
    variant->no = setup_no++;
@@ -760,7 +760,7 @@ generate_setup_variant(struct lp_setup_variant_key *key,
    arg_types[6] = LLVMPointerType(vec4f_type, 0);	/* dady, aligned */
 
    func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
-                                arg_types, Elements(arg_types), 0);
+                                arg_types, ARRAY_SIZE(arg_types), 0);
 
    variant->function = LLVMAddFunction(gallivm->module, func_name, func_type);
    if (!variant->function)
@@ -791,7 +791,7 @@ generate_setup_variant(struct lp_setup_variant_key *key,
                                          variant->function, "entry");
    LLVMPositionBuilderAtEnd(builder, block);
 
-   set_noalias(builder, variant->function, arg_types, Elements(arg_types));
+   set_noalias(builder, variant->function, arg_types, ARRAY_SIZE(arg_types));
    init_args(gallivm, &variant->key, &args);
    emit_tri_coef(gallivm, &variant->key, &args);
 
@@ -848,14 +848,10 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp,
    key->size = Offset(struct lp_setup_variant_key,
                       inputs[key->num_inputs]);
 
-   key->color_slot  = lp->color_slot [0];
+   key->color_slot = lp->color_slot[0];
    key->bcolor_slot = lp->bcolor_slot[0];
-   key->spec_slot   = lp->color_slot [1];
-   key->bspec_slot  = lp->bcolor_slot[1];
-   assert(key->color_slot  == lp->color_slot [0]);
-   assert(key->bcolor_slot == lp->bcolor_slot[0]);
-   assert(key->spec_slot   == lp->color_slot [1]);
-   assert(key->bspec_slot  == lp->bcolor_slot[1]);
+   key->spec_slot = lp->color_slot[1];
+   key->bspec_slot = lp->bcolor_slot[1];
 
    /*
     * If depth is floating point, depth bias is calculated with respect
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c
index 96f8ed82c..784db7f73 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c
@@ -131,8 +131,15 @@ llvmpipe_create_surface(struct pipe_context *pipe,
 {
    struct pipe_surface *ps;
 
-   if (!(pt->bind & (PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET)))
+   if (!(pt->bind & (PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET))) {
       debug_printf("Illegal surface creation without bind flag\n");
+      if (util_format_is_depth_or_stencil(surf_tmpl->format)) {
+         pt->bind |= PIPE_BIND_DEPTH_STENCIL;
+      }
+      else {
+         pt->bind |= PIPE_BIND_RENDER_TARGET;
+      }
+   }
 
    ps = CALLOC_STRUCT(pipe_surface);
    if (ps) {
@@ -183,11 +190,12 @@ llvmpipe_clear_render_target(struct pipe_context *pipe,
                              struct pipe_surface *dst,
                              const union pipe_color_union *color,
                              unsigned dstx, unsigned dsty,
-                             unsigned width, unsigned height)
+                             unsigned width, unsigned height,
+                             bool render_condition_enabled)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
 
-   if (!llvmpipe_check_render_cond(llvmpipe))
+   if (render_condition_enabled && !llvmpipe_check_render_cond(llvmpipe))
       return;
 
    util_clear_render_target(pipe, dst, color,
@@ -202,11 +210,12 @@ llvmpipe_clear_depth_stencil(struct pipe_context *pipe,
                              double depth,
                              unsigned stencil,
                              unsigned dstx, unsigned dsty,
-                             unsigned width, unsigned height)
+                             unsigned width, unsigned height,
+                             bool render_condition_enabled)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
 
-   if (!llvmpipe_check_render_cond(llvmpipe))
+   if (render_condition_enabled && !llvmpipe_check_render_cond(llvmpipe))
       return;
 
    util_clear_depth_stencil(pipe, dst, clear_flags,
author	Jonathan Gray <jsg@cvs.openbsd.org>	2017-08-26 16:59:42 +0000
committer	Jonathan Gray <jsg@cvs.openbsd.org>	2017-08-26 16:59:42 +0000
commit	81ece42815e80818f160cdd85fab57d65b56ad15 (patch)
tree	1059ff094da1aa50334115952fcb1cfcbda3acc6 /lib/mesa/src/gallium/drivers/llvmpipe
parent	b0244145d5bb49623d58f6b5cab8143ada692b60 (diff)