summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/drivers/llvmpipe
diff options
context:
space:
mode:
authorJonathan Gray <jsg@cvs.openbsd.org>2017-08-26 16:59:42 +0000
committerJonathan Gray <jsg@cvs.openbsd.org>2017-08-26 16:59:42 +0000
commit81ece42815e80818f160cdd85fab57d65b56ad15 (patch)
tree1059ff094da1aa50334115952fcb1cfcbda3acc6 /lib/mesa/src/gallium/drivers/llvmpipe
parentb0244145d5bb49623d58f6b5cab8143ada692b60 (diff)
Revert to Mesa 13.0.6 to hopefully address rendering issues a handful of
people have reported with xpdf/fvwm on ivy bridge with modesetting driver.
Diffstat (limited to 'lib/mesa/src/gallium/drivers/llvmpipe')
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am4
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in53
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c8
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c8
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c13
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c13
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c52
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c2
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c70
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c84
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c395
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c188
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c88
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c20
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c19
15 files changed, 670 insertions, 347 deletions
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am
index 1d3853e41..85ae0ae13 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am
@@ -26,11 +26,11 @@ include $(top_srcdir)/src/gallium/Automake.inc
AM_CFLAGS = \
$(GALLIUM_DRIVER_CFLAGS) \
$(LLVM_CFLAGS) \
- $(MSVC2008_COMPAT_CFLAGS)
+ $(MSVC2013_COMPAT_CFLAGS)
AM_CXXFLAGS= \
$(GALLIUM_DRIVER_CXXFLAGS) \
$(LLVM_CXXFLAGS) \
- $(MSVC2008_COMPAT_CXXFLAGS)
+ $(MSVC2013_COMPAT_CXXFLAGS)
noinst_LTLIBRARIES = libllvmpipe.la
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in
index 27a6693d9..0a7486d64 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in
@@ -78,13 +78,10 @@ DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \
$(srcdir)/Makefile.in $(srcdir)/Makefile.sources \
$(top_srcdir)/bin/depcomp \
$(top_srcdir)/src/gallium/Automake.inc
-@HAVE_LIBDRM_TRUE@am__append_1 = \
-@HAVE_LIBDRM_TRUE@ $(LIBDRM_LIBS)
-
-@HAVE_DRISW_TRUE@am__append_2 = \
+@HAVE_DRISW_TRUE@am__append_1 = \
@HAVE_DRISW_TRUE@ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la
-@HAVE_DRISW_KMS_TRUE@am__append_3 = \
+@HAVE_DRISW_KMS_TRUE@am__append_2 = \
@HAVE_DRISW_KMS_TRUE@ $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \
@HAVE_DRISW_KMS_TRUE@ $(LIBDRM_LIBS)
@@ -139,8 +136,7 @@ am__DEPENDENCIES_1 =
am__DEPENDENCIES_2 = libllvmpipe.la \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/util/libmesautil.la $(am__DEPENDENCIES_1) \
- $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
- $(am__DEPENDENCIES_1)
+ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
lp_test_arit_DEPENDENCIES = $(am__DEPENDENCIES_2)
am_lp_test_blend_OBJECTS = lp_test_blend.$(OBJEXT) \
lp_test_main.$(OBJEXT)
@@ -238,8 +234,6 @@ AMDGPU_CFLAGS = @AMDGPU_CFLAGS@
AMDGPU_LIBS = @AMDGPU_LIBS@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
-ANDROID_CFLAGS = @ANDROID_CFLAGS@
-ANDROID_LIBS = @ANDROID_LIBS@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
@@ -270,6 +264,8 @@ DLLTOOL = @DLLTOOL@
DLOPEN_LIBS = @DLOPEN_LIBS@
DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
+DRI3PROTO_CFLAGS = @DRI3PROTO_CFLAGS@
+DRI3PROTO_LIBS = @DRI3PROTO_LIBS@
DRIGL_CFLAGS = @DRIGL_CFLAGS@
DRIGL_LIBS = @DRIGL_LIBS@
DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
@@ -282,11 +278,10 @@ ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGL_CFLAGS = @EGL_CFLAGS@
+EGL_CLIENT_APIS = @EGL_CLIENT_APIS@
EGL_LIB_DEPS = @EGL_LIB_DEPS@
EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
EGREP = @EGREP@
-ETNAVIV_CFLAGS = @ETNAVIV_CFLAGS@
-ETNAVIV_LIBS = @ETNAVIV_LIBS@
EXEEXT = @EXEEXT@
EXPAT_CFLAGS = @EXPAT_CFLAGS@
EXPAT_LIBS = @EXPAT_LIBS@
@@ -334,27 +329,31 @@ LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
LIBDRM_LIBS = @LIBDRM_LIBS@
LIBELF_CFLAGS = @LIBELF_CFLAGS@
LIBELF_LIBS = @LIBELF_LIBS@
-LIBGLVND_DATADIR = @LIBGLVND_DATADIR@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
-LIBSENSORS_LIBS = @LIBSENSORS_LIBS@
+LIBSENSORS_LDFLAGS = @LIBSENSORS_LDFLAGS@
+LIBSHA1_CFLAGS = @LIBSHA1_CFLAGS@
+LIBSHA1_LIBS = @LIBSHA1_LIBS@
LIBTOOL = @LIBTOOL@
-LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@
-LIBUNWIND_LIBS = @LIBUNWIND_LIBS@
LIB_DIR = @LIB_DIR@
LIB_EXT = @LIB_EXT@
LIPO = @LIPO@
+LLVM_BINDIR = @LLVM_BINDIR@
LLVM_CFLAGS = @LLVM_CFLAGS@
LLVM_CONFIG = @LLVM_CONFIG@
+LLVM_CPPFLAGS = @LLVM_CPPFLAGS@
LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
LLVM_LDFLAGS = @LLVM_LDFLAGS@
+LLVM_LIBDIR = @LLVM_LIBDIR@
LLVM_LIBS = @LLVM_LIBS@
+LLVM_VERSION = @LLVM_VERSION@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
MAINT = @MAINT@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
+MESA_LLVM = @MESA_LLVM@
MKDIR_P = @MKDIR_P@
MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@
MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@
@@ -375,6 +374,8 @@ OMX_LIBS = @OMX_LIBS@
OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
OPENCL_LIBNAME = @OPENCL_LIBNAME@
OPENCL_VERSION = @OPENCL_VERSION@
+OPENSSL_CFLAGS = @OPENSSL_CFLAGS@
+OPENSSL_LIBS = @OPENSSL_LIBS@
OSMESA_LIB = @OSMESA_LIB@
OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
@@ -394,6 +395,8 @@ PKG_CONFIG = @PKG_CONFIG@
PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
POSIX_SHELL = @POSIX_SHELL@
+PRESENTPROTO_CFLAGS = @PRESENTPROTO_CFLAGS@
+PRESENTPROTO_LIBS = @PRESENTPROTO_LIBS@
PTHREADSTUBS_CFLAGS = @PTHREADSTUBS_CFLAGS@
PTHREADSTUBS_LIBS = @PTHREADSTUBS_LIBS@
PTHREAD_CC = @PTHREAD_CC@
@@ -409,6 +412,8 @@ SED = @SED@
SELINUX_CFLAGS = @SELINUX_CFLAGS@
SELINUX_LIBS = @SELINUX_LIBS@
SET_MAKE = @SET_MAKE@
+SHA1_CFLAGS = @SHA1_CFLAGS@
+SHA1_LIBS = @SHA1_LIBS@
SHELL = @SHELL@
SIMPENROSE_CFLAGS = @SIMPENROSE_CFLAGS@
SIMPENROSE_LIBS = @SIMPENROSE_LIBS@
@@ -417,6 +422,7 @@ STRIP = @STRIP@
SWR_AVX2_CXXFLAGS = @SWR_AVX2_CXXFLAGS@
SWR_AVX_CXXFLAGS = @SWR_AVX_CXXFLAGS@
SWR_CXX11_CXXFLAGS = @SWR_CXX11_CXXFLAGS@
+TIMESTAMP_CMD = @TIMESTAMP_CMD@
VALGRIND_CFLAGS = @VALGRIND_CFLAGS@
VALGRIND_LIBS = @VALGRIND_LIBS@
VA_CFLAGS = @VA_CFLAGS@
@@ -432,6 +438,7 @@ VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
VDPAU_MAJOR = @VDPAU_MAJOR@
VDPAU_MINOR = @VDPAU_MINOR@
VERSION = @VERSION@
+VG_LIB_DEPS = @VG_LIB_DEPS@
VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
VL_CFLAGS = @VL_CFLAGS@
@@ -460,10 +467,9 @@ XVMC_LIBS = @XVMC_LIBS@
XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
XVMC_MAJOR = @XVMC_MAJOR@
XVMC_MINOR = @XVMC_MINOR@
+XXD = @XXD@
YACC = @YACC@
YFLAGS = @YFLAGS@
-ZLIB_CFLAGS = @ZLIB_CFLAGS@
-ZLIB_LIBS = @ZLIB_LIBS@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
@@ -643,8 +649,12 @@ GALLIUM_TARGET_CFLAGS = \
$(LIBDRM_CFLAGS) \
$(VISIBILITY_CFLAGS)
-GALLIUM_COMMON_LIB_DEPS = -lm $(LIBUNWIND_LIBS) $(LIBSENSORS_LIBS) \
- $(CLOCK_LIB) $(PTHREAD_LIBS) $(DLOPEN_LIBS) $(am__append_1)
+GALLIUM_COMMON_LIB_DEPS = \
+ -lm \
+ $(CLOCK_LIB) \
+ $(PTHREAD_LIBS) \
+ $(DLOPEN_LIBS)
+
GALLIUM_WINSYS_CFLAGS = \
-I$(top_srcdir)/src \
-I$(top_srcdir)/include \
@@ -656,7 +666,7 @@ GALLIUM_WINSYS_CFLAGS = \
GALLIUM_PIPE_LOADER_WINSYS_LIBS = \
$(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \
$(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \
- $(am__append_2) $(am__append_3)
+ $(am__append_1) $(am__append_2)
AM_CFLAGS = \
$(GALLIUM_DRIVER_CFLAGS) \
$(LLVM_CFLAGS) \
@@ -678,8 +688,7 @@ TEST_LIBS = \
$(top_builddir)/src/util/libmesautil.la \
$(LLVM_LIBS) \
$(DLOPEN_LIBS) \
- $(PTHREAD_LIBS) \
- $(CLOCK_LIB)
+ $(PTHREAD_LIBS)
lp_test_format_SOURCES = lp_test_format.c lp_test_main.c
lp_test_format_LDADD = $(TEST_LIBS)
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index 564e19a15..a57670d49 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -255,13 +255,13 @@ lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
LLVMValueRef rgb_factor_, alpha_factor_;
enum lp_build_blend_swizzle rgb_swizzle;
- if (alpha_swizzle == UTIL_FORMAT_SWIZZLE_X && num_channels == 1) {
+ if (alpha_swizzle == PIPE_SWIZZLE_X && num_channels == 1) {
return lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
}
rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE);
- if (alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
+ if (alpha_swizzle != PIPE_SWIZZLE_NONE) {
rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, num_channels);
@@ -312,7 +312,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
struct lp_build_blend_aos_context bld;
LLVMValueRef src_factor, dst_factor;
LLVMValueRef result;
- unsigned alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE;
+ unsigned alpha_swizzle = PIPE_SWIZZLE_NONE;
unsigned i;
desc = util_format_description(cbuf_format);
@@ -370,7 +370,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
rgb_alpha_same,
false);
- if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
+ if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != PIPE_SWIZZLE_NONE) {
LLVMValueRef alpha;
alpha = lp_build_blend(&bld.base,
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index b25e04137..0c27c2f89 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -359,7 +359,7 @@ get_z_shift_and_mask(const struct util_format_description *format_desc,
z_swizzle = format_desc->swizzle[0];
- if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
+ if (z_swizzle == PIPE_SWIZZLE_NONE)
return FALSE;
*width = format_desc->channel[z_swizzle].size;
@@ -390,7 +390,7 @@ get_s_shift_and_mask(const struct util_format_description *format_desc,
s_swizzle = format_desc->swizzle[1];
- if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
+ if (s_swizzle == PIPE_SWIZZLE_NONE)
return FALSE;
/* just special case 64bit d/s format */
@@ -873,8 +873,8 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
const unsigned z_swizzle = format_desc->swizzle[0];
const unsigned s_swizzle = format_desc->swizzle[1];
- assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
- s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
+ assert(z_swizzle != PIPE_SWIZZLE_NONE ||
+ s_swizzle != PIPE_SWIZZLE_NONE);
assert(depth->enabled || stencil[0].enabled);
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c
index 80cb6578b..84912c6f1 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c
@@ -73,20 +73,20 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
pipe_surface_reference(&llvmpipe->framebuffer.zsbuf, NULL);
- for (i = 0; i < Elements(llvmpipe->sampler_views[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(llvmpipe->sampler_views[0]); i++) {
pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_FRAGMENT][i], NULL);
}
- for (i = 0; i < Elements(llvmpipe->sampler_views[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(llvmpipe->sampler_views[0]); i++) {
pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_VERTEX][i], NULL);
}
- for (i = 0; i < Elements(llvmpipe->sampler_views[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(llvmpipe->sampler_views[0]); i++) {
pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_GEOMETRY][i], NULL);
}
- for (i = 0; i < Elements(llvmpipe->constants); i++) {
- for (j = 0; j < Elements(llvmpipe->constants[i]); j++) {
+ for (i = 0; i < ARRAY_SIZE(llvmpipe->constants); i++) {
+ for (j = 0; j < ARRAY_SIZE(llvmpipe->constants[i]); j++) {
pipe_resource_reference(&llvmpipe->constants[i][j].buffer, NULL);
}
}
@@ -128,7 +128,8 @@ llvmpipe_render_condition ( struct pipe_context *pipe,
}
struct pipe_context *
-llvmpipe_create_context( struct pipe_screen *screen, void *priv )
+llvmpipe_create_context(struct pipe_screen *screen, void *priv,
+ unsigned flags)
{
struct llvmpipe_context *llvmpipe;
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c
index 9acde4f1b..21260369a 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -36,6 +36,7 @@
#include "util/u_memory.h"
#include "gallivm/lp_bld_init.h"
#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_format.h"
#include "lp_context.h"
#include "lp_jit.h"
@@ -55,7 +56,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
elem_types[LP_JIT_VIEWPORT_MAX_DEPTH] = LLVMFloatTypeInContext(lc);
viewport_type = LLVMStructTypeInContext(lc, elem_types,
- Elements(elem_types), 0);
+ ARRAY_SIZE(elem_types), 0);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_viewport, min_depth,
gallivm->target, viewport_type,
@@ -83,7 +84,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
LLVMArrayType(LLVMInt32TypeInContext(lc), LP_MAX_TEXTURE_LEVELS);
texture_type = LLVMStructTypeInContext(lc, elem_types,
- Elements(elem_types), 0);
+ ARRAY_SIZE(elem_types), 0);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width,
gallivm->target, texture_type,
@@ -126,7 +127,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
LLVMArrayType(LLVMFloatTypeInContext(lc), 4);
sampler_type = LLVMStructTypeInContext(lc, elem_types,
- Elements(elem_types), 0);
+ ARRAY_SIZE(elem_types), 0);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_sampler, min_lod,
gallivm->target, sampler_type,
@@ -165,7 +166,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
PIPE_MAX_SAMPLERS);
context_type = LLVMStructTypeInContext(lc, elem_types,
- Elements(elem_types), 0);
+ ARRAY_SIZE(elem_types), 0);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants,
gallivm->target, context_type,
@@ -208,12 +209,14 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
LLVMTypeRef elem_types[LP_JIT_THREAD_DATA_COUNT];
LLVMTypeRef thread_data_type;
+ elem_types[LP_JIT_THREAD_DATA_CACHE] =
+ LLVMPointerType(lp_build_format_cache_type(gallivm), 0);
elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc);
elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX] =
LLVMInt32TypeInContext(lc);
thread_data_type = LLVMStructTypeInContext(lc, elem_types,
- Elements(elem_types), 0);
+ ARRAY_SIZE(elem_types), 0);
lp->jit_thread_data_ptr_type = LLVMPointerType(thread_data_type, 0);
}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c
index c726707c0..9e56c962d 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -43,6 +43,7 @@
#include "lp_query.h"
#include "lp_rast.h"
#include "lp_rast_priv.h"
+#include "gallivm/lp_bld_format.h"
#include "gallivm/lp_bld_debug.h"
#include "lp_scene.h"
#include "lp_tex_sample.h"
@@ -664,6 +665,17 @@ rasterize_scene(struct lp_rasterizer_task *task,
{
task->scene = scene;
+ /* Clear the cache tags. This should not always be necessary but
+ simpler for now. */
+#if LP_USE_TEXTURE_CACHE
+ memset(task->thread_data.cache->cache_tags, 0,
+ sizeof(task->thread_data.cache->cache_tags));
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+ task->thread_data.cache->cache_access_total = 0;
+ task->thread_data.cache->cache_access_miss = 0;
+#endif
+#endif
+
if (!task->rast->no_rast && !scene->discard) {
/* loop over scene bins, rasterize each */
{
@@ -679,6 +691,20 @@ rasterize_scene(struct lp_rasterizer_task *task,
}
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+ {
+ uint64_t total, miss;
+ total = task->thread_data.cache->cache_access_total;
+ miss = task->thread_data.cache->cache_access_miss;
+ if (total) {
+ debug_printf("thread %d cache access %llu miss %llu hit rate %f\n",
+ task->thread_index, (long long unsigned)total,
+ (long long unsigned)miss,
+ (float)(total - miss)/(float)total);
+ }
+ }
+#endif
+
if (scene->fence) {
lp_fence_signal(scene->fence);
}
@@ -866,10 +892,15 @@ lp_rast_create( unsigned num_threads )
goto no_full_scenes;
}
- for (i = 0; i < Elements(rast->tasks); i++) {
+ for (i = 0; i < MAX2(1, num_threads); i++) {
struct lp_rasterizer_task *task = &rast->tasks[i];
task->rast = rast;
task->thread_index = i;
+ task->thread_data.cache = align_malloc(sizeof(struct lp_build_format_cache),
+ 16);
+ if (!task->thread_data.cache) {
+ goto no_thread_data_cache;
+ }
}
rast->num_threads = num_threads;
@@ -879,12 +910,22 @@ lp_rast_create( unsigned num_threads )
create_rast_threads(rast);
/* for synchronizing rasterization threads */
- pipe_barrier_init( &rast->barrier, rast->num_threads );
+ if (rast->num_threads > 0) {
+ pipe_barrier_init( &rast->barrier, rast->num_threads );
+ }
memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
return rast;
+no_thread_data_cache:
+ for (i = 0; i < MAX2(1, rast->num_threads); i++) {
+ if (rast->tasks[i].thread_data.cache) {
+ align_free(rast->tasks[i].thread_data.cache);
+ }
+ }
+
+ lp_scene_queue_destroy(rast->full_scenes);
no_full_scenes:
FREE(rast);
no_rast:
@@ -923,9 +964,14 @@ void lp_rast_destroy( struct lp_rasterizer *rast )
pipe_semaphore_destroy(&rast->tasks[i].work_ready);
pipe_semaphore_destroy(&rast->tasks[i].work_done);
}
+ for (i = 0; i < MAX2(1, rast->num_threads); i++) {
+ align_free(rast->tasks[i].thread_data.cache);
+ }
/* for synchronizing rasterization threads */
- pipe_barrier_destroy( &rast->barrier );
+ if (rast->num_threads > 0) {
+ pipe_barrier_destroy( &rast->barrier );
+ }
lp_scene_queue_destroy(rast->full_scenes);
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c
index 2441b3c0d..223be931e 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -337,7 +337,7 @@ lp_scene_new_data_block( struct lp_scene *scene )
}
else {
struct data_block *block = MALLOC_STRUCT(data_block);
- if (block == NULL)
+ if (!block)
return NULL;
scene->scene_size += sizeof *block;
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c
index 14eeab033..3e4f1ef44 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -109,6 +109,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
switch (param) {
case PIPE_CAP_NPOT_TEXTURES:
case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+ case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
return 1;
case PIPE_CAP_TWO_SIDED_STENCIL:
return 1;
@@ -264,6 +265,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_FAKE_SW_MSAA:
return 1;
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
return 1;
case PIPE_CAP_VENDOR_ID:
@@ -279,6 +281,12 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
if (!os_get_total_physical_memory(&system_memory))
return 0;
+ if (sizeof(void *) == 4)
+ /* Cap to 2 GB on 32 bits system. We do this because llvmpipe does
+ * eat application memory, which is quite limited on 32 bits. App
+ * shouldn't expect too much available memory. */
+ system_memory = MIN2(system_memory, 2048 << 20);
+
return (int)(system_memory >> 20);
}
case PIPE_CAP_UMA:
@@ -291,11 +299,44 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
return 1;
+ case PIPE_CAP_CULL_DISTANCE:
+ return 1;
+ case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+ return 1;
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ case PIPE_CAP_TGSI_TXQS:
+ case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+ case PIPE_CAP_SHAREABLE_SHADERS:
+ case PIPE_CAP_CLEAR_TEXTURE:
+ case PIPE_CAP_DRAW_PARAMETERS:
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT:
+ case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+ case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+ case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+ case PIPE_CAP_INVALIDATE_BUFFER:
+ case PIPE_CAP_GENERATE_MIPMAP:
+ case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
+ case PIPE_CAP_PCI_GROUP:
+ case PIPE_CAP_PCI_BUS:
+ case PIPE_CAP_PCI_DEVICE:
+ case PIPE_CAP_PCI_FUNCTION:
+ case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+ case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+ case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
+ case PIPE_CAP_TGSI_VOTE:
+ case PIPE_CAP_MAX_WINDOW_RECTANGLES:
+ case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
+ case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
return 0;
}
/* should only get here on unhandled cases */
@@ -421,19 +462,20 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
if (!format_desc->is_array && !format_desc->is_bitmask &&
format != PIPE_FORMAT_R11G11B10_FLOAT)
return FALSE;
+ }
- /*
- * XXX refuse formats known to crash in generate_unswizzled_blend().
- * These include all 3-channel 24bit RGB8 variants, plus 48bit
- * (except those using floats) 3-channel RGB16 variants (the latter
- * seems to be more of a llvm bug though).
- * The mesa state tracker only seems to use these for SINT/UINT formats.
+ if ((bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) &&
+ ((bind & PIPE_BIND_DISPLAY_TARGET) == 0)) {
+ /* Disable all 3-channel formats, where channel size != 32 bits.
+ * In some cases we run into crashes (in generate_unswizzled_blend()),
+ * for 3-channel RGB16 variants, there was an apparent LLVM bug.
+ * In any case, disabling the shallower 3-channel formats avoids a
+ * number of issues with GL_ARB_copy_image support.
*/
- if (format_desc->is_array && format_desc->nr_channels == 3) {
- if (format_desc->block.bits == 24 || (format_desc->block.bits == 48 &&
- !util_format_is_float(format))) {
- return FALSE;
- }
+ if (format_desc->is_array &&
+ format_desc->nr_channels == 3 &&
+ format_desc->block.bits != 96) {
+ return FALSE;
}
}
@@ -450,12 +492,13 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
return FALSE;
/* TODO: Support stencil-only formats */
- if (format_desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
+ if (format_desc->swizzle[0] == PIPE_SWIZZLE_NONE) {
return FALSE;
}
}
- if (format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC ||
+ format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
/* Software decoding is not hooked up. */
return FALSE;
}
@@ -537,6 +580,7 @@ llvmpipe_fence_reference(struct pipe_screen *screen,
*/
static boolean
llvmpipe_fence_finish(struct pipe_screen *screen,
+ struct pipe_context *ctx,
struct pipe_fence_handle *fence_handle,
uint64_t timeout)
{
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c
index 4c8167a9e..768775b29 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -39,6 +39,7 @@
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_pack_color.h"
+#include "util/u_viewport.h"
#include "draw/draw_pipe.h"
#include "os/os_time.h"
#include "lp_context.h"
@@ -69,7 +70,7 @@ lp_setup_get_empty_scene(struct lp_setup_context *setup)
assert(setup->scene == NULL);
setup->scene_idx++;
- setup->scene_idx %= Elements(setup->scenes);
+ setup->scene_idx %= ARRAY_SIZE(setup->scenes);
setup->scene = setup->scenes[setup->scene_idx];
@@ -123,7 +124,7 @@ void lp_setup_reset( struct lp_setup_context *setup )
LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
/* Reset derived state */
- for (i = 0; i < Elements(setup->constants); ++i) {
+ for (i = 0; i < ARRAY_SIZE(setup->constants); ++i) {
setup->constants[i].stored_size = 0;
setup->constants[i].stored_data = NULL;
}
@@ -476,22 +477,30 @@ lp_setup_try_clear_zs(struct lp_setup_context *setup,
uint64_t zsvalue = 0;
uint32_t zmask32;
uint8_t smask8;
+ enum pipe_format format = setup->fb.zsbuf->format;
LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state);
zmask32 = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0;
smask8 = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
- zsvalue = util_pack64_z_stencil(setup->fb.zsbuf->format,
- depth,
- stencil);
+ zsvalue = util_pack64_z_stencil(format, depth, stencil);
- zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format,
- zmask32,
- smask8);
+ zsmask = util_pack64_mask_z_stencil(format, zmask32, smask8);
zsvalue &= zsmask;
+ if (format == PIPE_FORMAT_Z24X8_UNORM ||
+ format == PIPE_FORMAT_X8Z24_UNORM) {
+ /*
+ * Make full mask if there's "X" bits so we can do full
+ * clear (without rmw).
+ */
+ uint32_t zsmask_full = 0;
+ zsmask_full = util_pack_mask_z_stencil(format, ~0, ~0);
+ zsmask |= ~zsmask_full;
+ }
+
if (setup->state == SETUP_ACTIVE) {
struct lp_scene *scene = setup->scene;
@@ -642,12 +651,12 @@ lp_setup_set_fs_constants(struct lp_setup_context *setup,
LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) buffers);
- assert(num <= Elements(setup->constants));
+ assert(num <= ARRAY_SIZE(setup->constants));
for (i = 0; i < num; ++i) {
util_copy_constant_buffer(&setup->constants[i].current, &buffers[i]);
}
- for (; i < Elements(setup->constants); i++) {
+ for (; i < ARRAY_SIZE(setup->constants); i++) {
util_copy_constant_buffer(&setup->constants[i].current, NULL);
}
setup->dirty |= LP_SETUP_NEW_CONSTANTS;
@@ -763,15 +772,8 @@ lp_setup_set_viewports(struct lp_setup_context *setup,
for (i = 0; i < num_viewports; i++) {
float min_depth;
float max_depth;
-
- if (lp->rasterizer->clip_halfz == 0) {
- float half_depth = viewports[i].scale[2];
- min_depth = viewports[i].translate[2] - half_depth;
- max_depth = min_depth + half_depth * 2.0f;
- } else {
- min_depth = viewports[i].translate[2];
- max_depth = min_depth + viewports[i].scale[2];
- }
+ util_viewport_zmin_zmax(&viewports[i], lp->rasterizer->clip_halfz,
+ &min_depth, &max_depth);
if (setup->viewports[i].min_depth != min_depth ||
setup->viewports[i].max_depth != max_depth) {
@@ -791,13 +793,15 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
unsigned num,
struct pipe_sampler_view **views)
{
- unsigned i;
+ unsigned i, max_tex_num;
LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
- for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
+ max_tex_num = MAX2(num, setup->fs.current_tex_num);
+
+ for (i = 0; i < max_tex_num; i++) {
struct pipe_sampler_view *view = i < num ? views[i] : NULL;
if (view) {
@@ -854,10 +858,10 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
jit_tex->img_stride[j] = lp_tex->img_stride[j];
}
- if (view->target == PIPE_TEXTURE_1D_ARRAY ||
- view->target == PIPE_TEXTURE_2D_ARRAY ||
- view->target == PIPE_TEXTURE_CUBE ||
- view->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ if (res->target == PIPE_TEXTURE_1D_ARRAY ||
+ res->target == PIPE_TEXTURE_2D_ARRAY ||
+ res->target == PIPE_TEXTURE_CUBE ||
+ res->target == PIPE_TEXTURE_CUBE_ARRAY) {
/*
* For array textures, we don't have first_layer, instead
* adjust last_layer (stored as depth) plus the mip level offsets
@@ -879,8 +883,8 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
}
else {
/*
- * For buffers, we don't have first_element, instead adjust
- * last_element (stored as width) plus the base pointer.
+ * For buffers, we don't have "offset", instead adjust
+ * the size (stored as width) plus the base pointer.
*/
unsigned view_blocksize = util_format_get_blocksize(view->format);
/* probably don't really need to fill that out */
@@ -889,12 +893,10 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
jit_tex->img_stride[0] = 0;
/* everything specified in number of elements here. */
- jit_tex->width = view->u.buf.last_element - view->u.buf.first_element + 1;
- jit_tex->base = (uint8_t *)jit_tex->base + view->u.buf.first_element *
- view_blocksize;
+ jit_tex->width = view->u.buf.size / view_blocksize;
+ jit_tex->base = (uint8_t *)jit_tex->base + view->u.buf.offset;
/* XXX Unsure if we need to sanitize parameters? */
- assert(view->u.buf.first_element <= view->u.buf.last_element);
- assert(view->u.buf.last_element * view_blocksize < res->width0);
+ assert(view->u.buf.offset + view->u.buf.size <= res->width0);
}
}
}
@@ -917,7 +919,11 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
assert(jit_tex->base);
}
}
+ else {
+ pipe_resource_reference(&setup->fs.current_tex[i], NULL);
+ }
}
+ setup->fs.current_tex_num = num;
setup->dirty |= LP_SETUP_NEW_FS;
}
@@ -976,7 +982,7 @@ lp_setup_is_resource_referenced( const struct lp_setup_context *setup,
}
/* check textures referenced by the scene */
- for (i = 0; i < Elements(setup->scenes); i++) {
+ for (i = 0; i < ARRAY_SIZE(setup->scenes); i++) {
if (lp_scene_is_resource_referenced(setup->scenes[i], texture)) {
return LP_REFERENCED_FOR_READ;
}
@@ -1067,7 +1073,7 @@ try_update_scene_state( struct lp_setup_context *setup )
}
if (setup->dirty & LP_SETUP_NEW_CONSTANTS) {
- for (i = 0; i < Elements(setup->constants); ++i) {
+ for (i = 0; i < ARRAY_SIZE(setup->constants); ++i) {
struct pipe_resource *buffer = setup->constants[i].current.buffer;
const unsigned current_size = MIN2(setup->constants[i].current.buffer_size,
LP_MAX_TGSI_CONST_BUFFER_SIZE);
@@ -1152,7 +1158,7 @@ try_update_scene_state( struct lp_setup_context *setup )
/* The scene now references the textures in the rasterization
* state record. Note that now.
*/
- for (i = 0; i < Elements(setup->fs.current_tex); i++) {
+ for (i = 0; i < ARRAY_SIZE(setup->fs.current_tex); i++) {
if (setup->fs.current_tex[i]) {
if (!lp_scene_add_resource_reference(scene,
setup->fs.current_tex[i],
@@ -1207,7 +1213,7 @@ lp_setup_update_state( struct lp_setup_context *setup,
/* Will probably need to move this somewhere else, just need
* to know about vertex shader point size attribute.
*/
- setup->psize = lp->psize_slot;
+ setup->psize_slot = lp->psize_slot;
setup->viewport_index_slot = lp->viewport_index_slot;
setup->layer_slot = lp->layer_slot;
setup->face_slot = lp->face_slot;
@@ -1269,16 +1275,16 @@ lp_setup_destroy( struct lp_setup_context *setup )
util_unreference_framebuffer_state(&setup->fb);
- for (i = 0; i < Elements(setup->fs.current_tex); i++) {
+ for (i = 0; i < ARRAY_SIZE(setup->fs.current_tex); i++) {
pipe_resource_reference(&setup->fs.current_tex[i], NULL);
}
- for (i = 0; i < Elements(setup->constants); i++) {
+ for (i = 0; i < ARRAY_SIZE(setup->constants); i++) {
pipe_resource_reference(&setup->constants[i].current.buffer, NULL);
}
/* free the scenes in the 'empty' queue */
- for (i = 0; i < Elements(setup->scenes); i++) {
+ for (i = 0; i < ARRAY_SIZE(setup->scenes); i++) {
struct lp_scene *scene = setup->scenes[i];
if (scene->fence)
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 98a9d4bc2..98243a12d 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -46,6 +46,9 @@
#if defined(PIPE_ARCH_SSE)
#include <emmintrin.h>
+#elif defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
+#include <altivec.h>
+#include "util/u_pwr8.h"
#endif
static inline int
@@ -65,11 +68,11 @@ fixed_to_float(int a)
struct fixed_position {
int32_t x[4];
int32_t y[4];
- int64_t area;
int32_t dx01;
int32_t dy01;
int32_t dx20;
int32_t dy20;
+ int64_t area;
};
@@ -91,12 +94,14 @@ lp_setup_alloc_triangle(struct lp_scene *scene,
unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane);
struct lp_rast_triangle *tri;
+ STATIC_ASSERT(sizeof(struct lp_rast_plane) % 8 == 0);
+
*tri_size = (sizeof(struct lp_rast_triangle) +
3 * input_array_sz +
plane_sz);
tri = lp_scene_alloc_aligned( scene, *tri_size, 16 );
- if (tri == NULL)
+ if (!tri)
return NULL;
tri->inputs.stride = input_array_sz;
@@ -276,6 +281,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
int nr_planes = 3;
unsigned viewport_index = 0;
unsigned layer = 0;
+ const float (*pv)[4];
/* Area should always be positive here */
assert(position->area > 0);
@@ -283,18 +289,18 @@ do_triangle_ccw(struct lp_setup_context *setup,
if (0)
lp_setup_print_triangle(setup, v0, v1, v2);
- if (setup->scissor_test) {
- nr_planes = 7;
- if (setup->viewport_index_slot > 0) {
- unsigned *udata = (unsigned*)v0[setup->viewport_index_slot];
- viewport_index = lp_clamp_viewport_idx(*udata);
- }
+ if (setup->flatshade_first) {
+ pv = v0;
}
else {
- nr_planes = 3;
+ pv = v2;
+ }
+ if (setup->viewport_index_slot > 0) {
+ unsigned *udata = (unsigned*)pv[setup->viewport_index_slot];
+ viewport_index = lp_clamp_viewport_idx(*udata);
}
if (setup->layer_slot > 0) {
- layer = *(unsigned*)v1[setup->layer_slot];
+ layer = *(unsigned*)pv[setup->layer_slot];
layer = MIN2(layer, scene->fb_max_layer);
}
@@ -336,6 +342,18 @@ do_triangle_ccw(struct lp_setup_context *setup,
bbox.x0 = MAX2(bbox.x0, 0);
bbox.y0 = MAX2(bbox.y0, 0);
+ nr_planes = 3;
+ /*
+ * Determine how many scissor planes we need, that is drop scissor
+ * edges if the bounding box of the tri is fully inside that edge.
+ */
+ if (setup->scissor_test) {
+ /* why not just use draw_regions */
+ boolean s_planes[4];
+ scissor_planes_needed(s_planes, &bbox, &setup->scissors[viewport_index]);
+ nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
+ }
+
tri = lp_setup_alloc_triangle(scene,
key->num_inputs,
nr_planes,
@@ -356,13 +374,11 @@ do_triangle_ccw(struct lp_setup_context *setup,
/* Setup parameter interpolants:
*/
- setup->setup.variant->jit_function( v0,
- v1,
- v2,
- frontfacing,
- GET_A0(&tri->inputs),
- GET_DADX(&tri->inputs),
- GET_DADY(&tri->inputs) );
+ setup->setup.variant->jit_function(v0, v1, v2,
+ frontfacing,
+ GET_A0(&tri->inputs),
+ GET_DADX(&tri->inputs),
+ GET_DADY(&tri->inputs));
tri->inputs.frontfacing = frontfacing;
tri->inputs.disable = FALSE;
@@ -372,32 +388,28 @@ do_triangle_ccw(struct lp_setup_context *setup,
if (0)
lp_dump_setup_coef(&setup->setup.variant->key,
- (const float (*)[4])GET_A0(&tri->inputs),
- (const float (*)[4])GET_DADX(&tri->inputs),
- (const float (*)[4])GET_DADY(&tri->inputs));
+ (const float (*)[4])GET_A0(&tri->inputs),
+ (const float (*)[4])GET_DADX(&tri->inputs),
+ (const float (*)[4])GET_DADY(&tri->inputs));
plane = GET_PLANES(tri);
#if defined(PIPE_ARCH_SSE)
- if (setup->fb.width <= MAX_FIXED_LENGTH32 &&
- setup->fb.height <= MAX_FIXED_LENGTH32 &&
- (bbox.x1 - bbox.x0) <= MAX_FIXED_LENGTH32 &&
- (bbox.y1 - bbox.y0) <= MAX_FIXED_LENGTH32) {
+ if (1) {
__m128i vertx, verty;
__m128i shufx, shufy;
- __m128i dcdx, dcdy, c;
- __m128i unused;
+ __m128i dcdx, dcdy;
+ __m128i cdx02, cdx13, cdy02, cdy13, c02, c13;
+ __m128i c01, c23, unused;
__m128i dcdx_neg_mask;
__m128i dcdy_neg_mask;
__m128i dcdx_zero_mask;
- __m128i top_left_flag;
- __m128i c_inc_mask, c_inc;
+ __m128i top_left_flag, c_dec;
__m128i eo, p0, p1, p2;
__m128i zero = _mm_setzero_si128();
- PIPE_ALIGN_VAR(16) int32_t temp_vec[4];
- vertx = _mm_loadu_si128((__m128i *)position->x); /* vertex x coords */
- verty = _mm_loadu_si128((__m128i *)position->y); /* vertex y coords */
+ vertx = _mm_load_si128((__m128i *)position->x); /* vertex x coords */
+ verty = _mm_load_si128((__m128i *)position->y); /* vertex y coords */
shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1));
shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1));
@@ -411,42 +423,161 @@ do_triangle_ccw(struct lp_setup_context *setup,
top_left_flag = _mm_set1_epi32((setup->bottom_edge_rule == 0) ? ~0 : 0);
- c_inc_mask = _mm_or_si128(dcdx_neg_mask,
- _mm_and_si128(dcdx_zero_mask,
- _mm_xor_si128(dcdy_neg_mask,
- top_left_flag)));
-
- c_inc = _mm_srli_epi32(c_inc_mask, 31);
-
- c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx),
- mm_mullo_epi32(dcdy, verty));
+ c_dec = _mm_or_si128(dcdx_neg_mask,
+ _mm_and_si128(dcdx_zero_mask,
+ _mm_xor_si128(dcdy_neg_mask,
+ top_left_flag)));
- c = _mm_add_epi32(c, c_inc);
+ /*
+ * 64 bit arithmetic.
+ * Note we need _signed_ mul (_mm_mul_epi32) which we emulate.
+ */
+ cdx02 = mm_mullohi_epi32(dcdx, vertx, &cdx13);
+ cdy02 = mm_mullohi_epi32(dcdy, verty, &cdy13);
+ c02 = _mm_sub_epi64(cdx02, cdy02);
+ c13 = _mm_sub_epi64(cdx13, cdy13);
+ c02 = _mm_sub_epi64(c02, _mm_shuffle_epi32(c_dec,
+ _MM_SHUFFLE(2,2,0,0)));
+ c13 = _mm_sub_epi64(c13, _mm_shuffle_epi32(c_dec,
+ _MM_SHUFFLE(3,3,1,1)));
+
+ /*
+ * Useful for very small fbs/tris (or fewer subpixel bits) only:
+ * c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx),
+ * mm_mullo_epi32(dcdy, verty));
+ *
+ * c = _mm_sub_epi32(c, c_dec);
+ */
/* Scale up to match c:
*/
dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER);
dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER);
- /* Calculate trivial reject values:
+ /*
+ * Calculate trivial reject values:
+ * Note eo cannot overflow even if dcdx/dcdy would already have
+ * 31 bits (which they shouldn't have). This is because eo
+ * is never negative (albeit if we rely on that need to be careful...)
*/
eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy),
_mm_and_si128(dcdx_neg_mask, dcdx));
/* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
+ /*
+ * Pointless transpose which gets undone immediately in
+ * rasterization.
+ * It is actually difficult to do away with it - would essentially
+ * need GET_PLANES_DX, GET_PLANES_DY etc., but the calculations
+ * for this then would need to depend on the number of planes.
+ * The transpose is quite special here due to c being 64bit...
+ * The store has to be unaligned (unless we'd make the plane size
+ * a multiple of 128), and of course storing eo separately...
+ */
+ c01 = _mm_unpacklo_epi64(c02, c13);
+ c23 = _mm_unpackhi_epi64(c02, c13);
+ transpose2_64_2_32(&c01, &c23, &dcdx, &dcdy,
+ &p0, &p1, &p2, &unused);
+ _mm_storeu_si128((__m128i *)&plane[0], p0);
+ plane[0].eo = (uint32_t)_mm_cvtsi128_si32(eo);
+ _mm_storeu_si128((__m128i *)&plane[1], p1);
+ eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(3,2,0,1));
+ plane[1].eo = (uint32_t)_mm_cvtsi128_si32(eo);
+ _mm_storeu_si128((__m128i *)&plane[2], p2);
+ eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(0,0,0,2));
+ plane[2].eo = (uint32_t)_mm_cvtsi128_si32(eo);
+ } else
+#elif defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
+ /*
+ * XXX this code is effectively disabled for all practical purposes,
+ * as the allowed fb size is tiny if FIXED_ORDER is 8.
+ */
+ if (setup->fb.width <= MAX_FIXED_LENGTH32 &&
+ setup->fb.height <= MAX_FIXED_LENGTH32 &&
+ (bbox.x1 - bbox.x0) <= MAX_FIXED_LENGTH32 &&
+ (bbox.y1 - bbox.y0) <= MAX_FIXED_LENGTH32) {
+ unsigned int bottom_edge;
+ __m128i vertx, verty;
+ __m128i shufx, shufy;
+ __m128i dcdx, dcdy, c;
+ __m128i unused;
+ __m128i dcdx_neg_mask;
+ __m128i dcdy_neg_mask;
+ __m128i dcdx_zero_mask;
+ __m128i top_left_flag;
+ __m128i c_inc_mask, c_inc;
+ __m128i eo, p0, p1, p2;
+ __m128i_union vshuf_mask;
+ __m128i zero = vec_splats((unsigned char) 0);
+ PIPE_ALIGN_VAR(16) int32_t temp_vec[4];
+
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ vshuf_mask.i[0] = 0x07060504;
+ vshuf_mask.i[1] = 0x0B0A0908;
+ vshuf_mask.i[2] = 0x03020100;
+ vshuf_mask.i[3] = 0x0F0E0D0C;
+#else
+ vshuf_mask.i[0] = 0x00010203;
+ vshuf_mask.i[1] = 0x0C0D0E0F;
+ vshuf_mask.i[2] = 0x04050607;
+ vshuf_mask.i[3] = 0x08090A0B;
+#endif
+
+ /* vertex x coords */
+ vertx = vec_load_si128((const uint32_t *) position->x);
+ /* vertex y coords */
+ verty = vec_load_si128((const uint32_t *) position->y);
+
+ shufx = vec_perm (vertx, vertx, vshuf_mask.m128i);
+ shufy = vec_perm (verty, verty, vshuf_mask.m128i);
+
+ dcdx = vec_sub_epi32(verty, shufy);
+ dcdy = vec_sub_epi32(vertx, shufx);
+
+ dcdx_neg_mask = vec_srai_epi32(dcdx, 31);
+ dcdx_zero_mask = vec_cmpeq_epi32(dcdx, zero);
+ dcdy_neg_mask = vec_srai_epi32(dcdy, 31);
+
+ bottom_edge = (setup->bottom_edge_rule == 0) ? ~0 : 0;
+ top_left_flag = (__m128i) vec_splats(bottom_edge);
+
+ c_inc_mask = vec_or(dcdx_neg_mask,
+ vec_and(dcdx_zero_mask,
+ vec_xor(dcdy_neg_mask,
+ top_left_flag)));
+
+ c_inc = vec_srli_epi32(c_inc_mask, 31);
+
+ c = vec_sub_epi32(vec_mullo_epi32(dcdx, vertx),
+ vec_mullo_epi32(dcdy, verty));
+
+ c = vec_add_epi32(c, c_inc);
+
+ /* Scale up to match c:
+ */
+ dcdx = vec_slli_epi32(dcdx, FIXED_ORDER);
+ dcdy = vec_slli_epi32(dcdy, FIXED_ORDER);
+
+ /* Calculate trivial reject values:
+ */
+ eo = vec_sub_epi32(vec_andnot_si128(dcdy_neg_mask, dcdy),
+ vec_and(dcdx_neg_mask, dcdx));
+
+ /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
+
/* Pointless transpose which gets undone immediately in
* rasterization:
*/
transpose4_epi32(&c, &dcdx, &dcdy, &eo,
&p0, &p1, &p2, &unused);
-#define STORE_PLANE(plane, vec) do { \
- _mm_store_si128((__m128i *)&temp_vec, vec); \
- plane.c = (int64_t)temp_vec[0]; \
- plane.dcdx = temp_vec[1]; \
- plane.dcdy = temp_vec[2]; \
- plane.eo = temp_vec[3]; \
+#define STORE_PLANE(plane, vec) do { \
+ vec_store_si128((uint32_t *)&temp_vec, vec); \
+ plane.c = (int64_t)temp_vec[0]; \
+ plane.dcdx = temp_vec[1]; \
+ plane.dcdy = temp_vec[2]; \
+ plane.eo = temp_vec[3]; \
} while(0)
STORE_PLANE(plane[0], p0);
@@ -465,17 +596,17 @@ do_triangle_ccw(struct lp_setup_context *setup,
plane[2].dcdx = position->dy20;
for (i = 0; i < 3; i++) {
- /* half-edge constants, will be interated over the whole render
+ /* half-edge constants, will be iterated over the whole render
* target.
*/
plane[i].c = IMUL64(plane[i].dcdx, position->x[i]) -
- IMUL64(plane[i].dcdy, position->y[i]);
+ IMUL64(plane[i].dcdy, position->y[i]);
/* correct for top-left vs. bottom-left fill convention.
- */
+ */
if (plane[i].dcdx < 0) {
/* both fill conventions want this - adjust for left edges */
- plane[i].c++;
+ plane[i].c++;
}
else if (plane[i].dcdx == 0) {
if (setup->bottom_edge_rule == 0){
@@ -509,19 +640,19 @@ do_triangle_ccw(struct lp_setup_context *setup,
}
if (0) {
- debug_printf("p0: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
+ debug_printf("p0: %"PRIx64"/%08x/%08x/%08x\n",
plane[0].c,
plane[0].dcdx,
plane[0].dcdy,
plane[0].eo);
-
- debug_printf("p1: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
+
+ debug_printf("p1: %"PRIx64"/%08x/%08x/%08x\n",
plane[1].c,
plane[1].dcdx,
plane[1].dcdy,
plane[1].eo);
-
- debug_printf("p2: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
+
+ debug_printf("p2: %"PRIx64"/%08x/%08x/%08x\n",
plane[2].c,
plane[2].dcdx,
plane[2].dcdy,
@@ -546,29 +677,46 @@ do_triangle_ccw(struct lp_setup_context *setup,
* Note that otherwise, the scissor planes only vary in 'C' value,
* and even then only on state-changes. Could alternatively store
* these planes elsewhere.
+ * (Or only store the c value together with a bit indicating which
+ * scissor edge this is, so rasterization would treat them differently
+ * (easier to evaluate) to ordinary planes.)
*/
- if (nr_planes == 7) {
+ if (nr_planes > 3) {
+ /* why not just use draw_regions */
const struct u_rect *scissor = &setup->scissors[viewport_index];
-
- plane[3].dcdx = -1;
- plane[3].dcdy = 0;
- plane[3].c = 1-scissor->x0;
- plane[3].eo = 1;
-
- plane[4].dcdx = 1;
- plane[4].dcdy = 0;
- plane[4].c = scissor->x1+1;
- plane[4].eo = 0;
-
- plane[5].dcdx = 0;
- plane[5].dcdy = 1;
- plane[5].c = 1-scissor->y0;
- plane[5].eo = 1;
-
- plane[6].dcdx = 0;
- plane[6].dcdy = -1;
- plane[6].c = scissor->y1+1;
- plane[6].eo = 0;
+ struct lp_rast_plane *plane_s = &plane[3];
+ boolean s_planes[4];
+ scissor_planes_needed(s_planes, &bbox, scissor);
+
+ if (s_planes[0]) {
+ plane_s->dcdx = -1 << 8;
+ plane_s->dcdy = 0;
+ plane_s->c = (1-scissor->x0) << 8;
+ plane_s->eo = 1 << 8;
+ plane_s++;
+ }
+ if (s_planes[1]) {
+ plane_s->dcdx = 1 << 8;
+ plane_s->dcdy = 0;
+ plane_s->c = (scissor->x1+1) << 8;
+ plane_s->eo = 0 << 8;
+ plane_s++;
+ }
+ if (s_planes[2]) {
+ plane_s->dcdx = 0;
+ plane_s->dcdy = 1 << 8;
+ plane_s->c = (1-scissor->y0) << 8;
+ plane_s->eo = 1 << 8;
+ plane_s++;
+ }
+ if (s_planes[3]) {
+ plane_s->dcdx = 0;
+ plane_s->dcdy = -1 << 8;
+ plane_s->c = (scissor->y1+1) << 8;
+ plane_s->eo = 0;
+ plane_s++;
+ }
+ assert(plane_s == &plane[nr_planes]);
}
return lp_setup_bin_triangle(setup, tri, &bbox, nr_planes, viewport_index);
@@ -582,7 +730,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
static inline uint32_t
floor_pot(uint32_t n)
{
-#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
+#if defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
if (n == 0)
return 0;
@@ -730,9 +878,9 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
ei[i] = (plane[i].dcdy -
plane[i].dcdx -
- plane[i].eo) << TILE_ORDER;
+ (int64_t)plane[i].eo) << TILE_ORDER;
- eo[i] = plane[i].eo << TILE_ORDER;
+ eo[i] = (int64_t)plane[i].eo << TILE_ORDER;
xstep[i] = -(((int64_t)plane[i].dcdx) << TILE_ORDER);
ystep[i] = ((int64_t)plane[i].dcdy) << TILE_ORDER;
}
@@ -840,29 +988,70 @@ static void retry_triangle_ccw( struct lp_setup_context *setup,
/**
* Calculate fixed position data for a triangle
+ * It is unfortunate we need to do that here (as we need area
+ * calculated in fixed point), as there's quite some code duplication
+ * to what is done in the jit setup prog.
*/
static inline void
-calc_fixed_position( struct lp_setup_context *setup,
- struct fixed_position* position,
- const float (*v0)[4],
- const float (*v1)[4],
- const float (*v2)[4])
+calc_fixed_position(struct lp_setup_context *setup,
+ struct fixed_position* position,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4])
{
+ /*
+ * The rounding may not be quite the same with PIPE_ARCH_SSE
+ * (util_iround right now only does nearest/even on x87,
+ * otherwise nearest/away-from-zero).
+ * Both should be acceptable, I think.
+ */
+#if defined(PIPE_ARCH_SSE)
+ __m128 v0r, v1r;
+ __m128 vxy0xy2, vxy1xy0;
+ __m128i vxy0xy2i, vxy1xy0i;
+ __m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120;
+ __m128 pix_offset = _mm_set1_ps(setup->pixel_offset);
+ __m128 fixed_one = _mm_set1_ps((float)FIXED_ONE);
+ v0r = _mm_castpd_ps(_mm_load_sd((double *)v0[0]));
+ vxy0xy2 = _mm_loadh_pi(v0r, (__m64 *)v2[0]);
+ v1r = _mm_castpd_ps(_mm_load_sd((double *)v1[0]));
+ vxy1xy0 = _mm_movelh_ps(v1r, vxy0xy2);
+ vxy0xy2 = _mm_sub_ps(vxy0xy2, pix_offset);
+ vxy1xy0 = _mm_sub_ps(vxy1xy0, pix_offset);
+ vxy0xy2 = _mm_mul_ps(vxy0xy2, fixed_one);
+ vxy1xy0 = _mm_mul_ps(vxy1xy0, fixed_one);
+ vxy0xy2i = _mm_cvtps_epi32(vxy0xy2);
+ vxy1xy0i = _mm_cvtps_epi32(vxy1xy0);
+ dxdy0120 = _mm_sub_epi32(vxy0xy2i, vxy1xy0i);
+ _mm_store_si128((__m128i *)&position->dx01, dxdy0120);
+ /*
+ * For the mul, would need some more shuffles, plus emulation
+ * for the signed mul (without sse41), so don't bother.
+ */
+ x0x2y0y2 = _mm_shuffle_epi32(vxy0xy2i, _MM_SHUFFLE(3,1,2,0));
+ x1x0y1y0 = _mm_shuffle_epi32(vxy1xy0i, _MM_SHUFFLE(3,1,2,0));
+ x0120 = _mm_unpacklo_epi32(x0x2y0y2, x1x0y1y0);
+ y0120 = _mm_unpackhi_epi32(x0x2y0y2, x1x0y1y0);
+ _mm_store_si128((__m128i *)&position->x[0], x0120);
+ _mm_store_si128((__m128i *)&position->y[0], y0120);
+
+#else
position->x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset);
position->x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset);
position->x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset);
- position->x[3] = 0;
+ position->x[3] = 0; // should be unused
position->y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset);
position->y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset);
position->y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset);
- position->y[3] = 0;
+ position->y[3] = 0; // should be unused
position->dx01 = position->x[0] - position->x[1];
position->dy01 = position->y[0] - position->y[1];
position->dx20 = position->x[2] - position->x[0];
position->dy20 = position->y[2] - position->y[0];
+#endif
position->area = IMUL64(position->dx01, position->dy20) -
IMUL64(position->dx20, position->dy01);
@@ -924,12 +1113,12 @@ rotate_fixed_position_12( struct fixed_position* position )
/**
* Draw triangle if it's CW, cull otherwise.
*/
-static void triangle_cw( struct lp_setup_context *setup,
- const float (*v0)[4],
- const float (*v1)[4],
- const float (*v2)[4] )
+static void triangle_cw(struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4])
{
- struct fixed_position position;
+ PIPE_ALIGN_VAR(16) struct fixed_position position;
calc_fixed_position(setup, &position, v0, v1, v2);
@@ -945,12 +1134,12 @@ static void triangle_cw( struct lp_setup_context *setup,
}
-static void triangle_ccw( struct lp_setup_context *setup,
- const float (*v0)[4],
- const float (*v1)[4],
- const float (*v2)[4])
+static void triangle_ccw(struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4])
{
- struct fixed_position position;
+ PIPE_ALIGN_VAR(16) struct fixed_position position;
calc_fixed_position(setup, &position, v0, v1, v2);
@@ -961,12 +1150,12 @@ static void triangle_ccw( struct lp_setup_context *setup,
/**
* Draw triangle whether it's CW or CCW.
*/
-static void triangle_both( struct lp_setup_context *setup,
- const float (*v0)[4],
- const float (*v1)[4],
- const float (*v2)[4] )
+static void triangle_both(struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4])
{
- struct fixed_position position;
+ PIPE_ALIGN_VAR(16) struct fixed_position position;
struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
if (lp_context->active_statistics_queries &&
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c
index fd6c49aac..3428eed4e 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -134,7 +134,7 @@ generate_quad_mask(struct gallivm_state *gallivm,
* XXX: We'll need a different path for 16 x u8
*/
assert(fs_type.width == 32);
- assert(fs_type.length <= Elements(bits));
+ assert(fs_type.length <= ARRAY_SIZE(bits));
mask_type = lp_int_type(fs_type);
/*
@@ -238,6 +238,54 @@ lp_llvm_viewport(LLVMValueRef context_ptr,
}
+static LLVMValueRef
+lp_build_depth_clamp(struct gallivm_state *gallivm,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef context_ptr,
+ LLVMValueRef thread_data_ptr,
+ LLVMValueRef z)
+{
+ LLVMValueRef viewport, min_depth, max_depth;
+ LLVMValueRef viewport_index;
+ struct lp_build_context f32_bld;
+
+ assert(type.floating);
+ lp_build_context_init(&f32_bld, gallivm, type);
+
+ /*
+ * Assumes clamping of the viewport index will occur in setup/gs. Value
+ * is passed through the rasterization stage via lp_rast_shader_inputs.
+ *
+ * See: draw_clamp_viewport_idx and lp_clamp_viewport_idx for clamping
+ * semantics.
+ */
+ viewport_index = lp_jit_thread_data_raster_state_viewport_index(gallivm,
+ thread_data_ptr);
+
+ /*
+ * Load the min and max depth from the lp_jit_context.viewports
+ * array of lp_jit_viewport structures.
+ */
+ viewport = lp_llvm_viewport(context_ptr, gallivm, viewport_index);
+
+ /* viewports[viewport_index].min_depth */
+ min_depth = LLVMBuildExtractElement(builder, viewport,
+ lp_build_const_int32(gallivm, LP_JIT_VIEWPORT_MIN_DEPTH), "");
+ min_depth = lp_build_broadcast_scalar(&f32_bld, min_depth);
+
+ /* viewports[viewport_index].max_depth */
+ max_depth = LLVMBuildExtractElement(builder, viewport,
+ lp_build_const_int32(gallivm, LP_JIT_VIEWPORT_MAX_DEPTH), "");
+ max_depth = lp_build_broadcast_scalar(&f32_bld, max_depth);
+
+ /*
+ * Clamp to the min and max depth values for the given viewport.
+ */
+ return lp_build_clamp(&f32_bld, z, min_depth, max_depth);
+}
+
+
/**
* Generate the fragment shader, depth/stencil test, and alpha tests.
*/
@@ -383,6 +431,13 @@ generate_fs_loop(struct gallivm_state *gallivm,
z = interp->pos[2];
if (depth_mode & EARLY_DEPTH_TEST) {
+ /*
+ * Clamp according to ARB_depth_clamp semantics.
+ */
+ if (key->depth_clamp) {
+ z = lp_build_depth_clamp(gallivm, builder, type, context_ptr,
+ thread_data_ptr, z);
+ }
lp_build_depth_stencil_load_swizzled(gallivm, type,
zs_format_desc, key->resource_1d,
depth_ptr, depth_stride,
@@ -421,7 +476,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
lp_build_tgsi_soa(gallivm, tokens, type, &mask,
consts_ptr, num_consts_ptr, &system_values,
interp->inputs,
- outputs, context_ptr,
+ outputs, context_ptr, thread_data_ptr,
sampler, &shader->info.base, NULL);
/* Alpha test */
@@ -471,51 +526,13 @@ generate_fs_loop(struct gallivm_state *gallivm,
0);
if (pos0 != -1 && outputs[pos0][2]) {
z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
-
- /*
- * Clamp according to ARB_depth_clamp semantics.
- */
- if (key->depth_clamp) {
- LLVMValueRef viewport, min_depth, max_depth;
- LLVMValueRef viewport_index;
- struct lp_build_context f32_bld;
-
- assert(type.floating);
- lp_build_context_init(&f32_bld, gallivm, type);
-
- /*
- * Assumes clamping of the viewport index will occur in setup/gs. Value
- * is passed through the rasterization stage via lp_rast_shader_inputs.
- *
- * See: draw_clamp_viewport_idx and lp_clamp_viewport_idx for clamping
- * semantics.
- */
- viewport_index = lp_jit_thread_data_raster_state_viewport_index(gallivm,
- thread_data_ptr);
-
- /*
- * Load the min and max depth from the lp_jit_context.viewports
- * array of lp_jit_viewport structures.
- */
- viewport = lp_llvm_viewport(context_ptr, gallivm, viewport_index);
-
- /* viewports[viewport_index].min_depth */
- min_depth = LLVMBuildExtractElement(builder, viewport,
- lp_build_const_int32(gallivm, LP_JIT_VIEWPORT_MIN_DEPTH),
- "");
- min_depth = lp_build_broadcast_scalar(&f32_bld, min_depth);
-
- /* viewports[viewport_index].max_depth */
- max_depth = LLVMBuildExtractElement(builder, viewport,
- lp_build_const_int32(gallivm, LP_JIT_VIEWPORT_MAX_DEPTH),
- "");
- max_depth = lp_build_broadcast_scalar(&f32_bld, max_depth);
-
- /*
- * Clamp to the min and max depth values for the given viewport.
- */
- z = lp_build_clamp(&f32_bld, z, min_depth, max_depth);
- }
+ }
+ /*
+ * Clamp according to ARB_depth_clamp semantics.
+ */
+ if (key->depth_clamp) {
+ z = lp_build_depth_clamp(gallivm, builder, type, context_ptr,
+ thread_data_ptr, z);
}
if (s_out != -1 && outputs[s_out][1]) {
@@ -646,7 +663,7 @@ generate_fs_twiddle(struct gallivm_state *gallivm,
src_count = num_fs * src_channels;
assert(pixels == 2 || pixels == 1);
- assert(num_fs * src_channels <= Elements(src));
+ assert(num_fs * src_channels <= ARRAY_SIZE(src));
/*
* Transpose from SoA -> AoS
@@ -786,7 +803,7 @@ load_unswizzled_block(struct gallivm_state *gallivm,
dst[i] = LLVMBuildLoad(builder, dst_ptr, "");
- lp_set_load_alignment(dst[i], dst_alignment);
+ LLVMSetAlignment(dst[i], dst_alignment);
}
}
@@ -830,7 +847,7 @@ store_unswizzled_block(struct gallivm_state *gallivm,
src_ptr = LLVMBuildStore(builder, src[i], src_ptr);
- lp_set_store_alignment(src_ptr, src_alignment);
+ LLVMSetAlignment(src_ptr, src_alignment);
}
}
@@ -1601,7 +1618,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
LLVMValueRef fs_src[4][TGSI_NUM_CHANNELS];
LLVMValueRef fs_src1[4][TGSI_NUM_CHANNELS];
LLVMValueRef src_alpha[4 * 4];
- LLVMValueRef src1_alpha[4 * 4];
+ LLVMValueRef src1_alpha[4 * 4] = { NULL };
LLVMValueRef src_mask[4 * 4];
LLVMValueRef src[4 * 4];
LLVMValueRef src1[4 * 4];
@@ -2267,7 +2284,7 @@ generate_fragment(struct llvmpipe_context *lp,
arg_types[12] = int32_type; /* depth_stride */
func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
- arg_types, Elements(arg_types), 0);
+ arg_types, ARRAY_SIZE(arg_types), 0);
function = LLVMAddFunction(gallivm->module, func_name, func_type);
LLVMSetFunctionCallConv(function, LLVMCCallConv);
@@ -2277,7 +2294,7 @@ generate_fragment(struct llvmpipe_context *lp,
/* XXX: need to propagate noalias down into color param now we are
* passing a pointer-to-pointer?
*/
- for(i = 0; i < Elements(arg_types); ++i)
+ for(i = 0; i < ARRAY_SIZE(arg_types); ++i)
if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute);
@@ -2303,8 +2320,8 @@ generate_fragment(struct llvmpipe_context *lp,
lp_build_name(dady_ptr, "dady");
lp_build_name(color_ptr_ptr, "color_ptr_ptr");
lp_build_name(depth_ptr, "depth");
- lp_build_name(thread_data_ptr, "thread_data");
lp_build_name(mask_input, "mask_input");
+ lp_build_name(thread_data_ptr, "thread_data");
lp_build_name(stride_ptr, "stride_ptr");
lp_build_name(depth_stride, "depth_stride");
@@ -2344,6 +2361,7 @@ generate_fragment(struct llvmpipe_context *lp,
shader->info.base.num_inputs,
inputs,
pixel_center_integer,
+ key->depth_clamp,
builder, fs_type,
a0_ptr, dadx_ptr, dady_ptr,
x, y);
@@ -2563,7 +2581,7 @@ generate_variant(struct llvmpipe_context *lp,
char module_name[64];
variant = CALLOC_STRUCT(lp_fragment_shader_variant);
- if(!variant)
+ if (!variant)
return NULL;
util_snprintf(module_name, sizeof(module_name), "fs%u_variant%u",
@@ -2695,34 +2713,35 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
switch (shader->info.base.input_interpolate[i]) {
case TGSI_INTERPOLATE_CONSTANT:
- shader->inputs[i].interp = LP_INTERP_CONSTANT;
- break;
+ shader->inputs[i].interp = LP_INTERP_CONSTANT;
+ break;
case TGSI_INTERPOLATE_LINEAR:
- shader->inputs[i].interp = LP_INTERP_LINEAR;
- break;
+ shader->inputs[i].interp = LP_INTERP_LINEAR;
+ break;
case TGSI_INTERPOLATE_PERSPECTIVE:
- shader->inputs[i].interp = LP_INTERP_PERSPECTIVE;
- break;
+ shader->inputs[i].interp = LP_INTERP_PERSPECTIVE;
+ break;
case TGSI_INTERPOLATE_COLOR:
- shader->inputs[i].interp = LP_INTERP_COLOR;
- break;
+ shader->inputs[i].interp = LP_INTERP_COLOR;
+ break;
default:
- assert(0);
- break;
+ assert(0);
+ break;
}
switch (shader->info.base.input_semantic_name[i]) {
case TGSI_SEMANTIC_FACE:
- shader->inputs[i].interp = LP_INTERP_FACING;
- break;
+ shader->inputs[i].interp = LP_INTERP_FACING;
+ break;
case TGSI_SEMANTIC_POSITION:
- /* Position was already emitted above
- */
- shader->inputs[i].interp = LP_INTERP_POSITION;
- shader->inputs[i].src_index = 0;
- continue;
+ /* Position was already emitted above
+ */
+ shader->inputs[i].interp = LP_INTERP_POSITION;
+ shader->inputs[i].src_index = 0;
+ continue;
}
+ /* XXX this is a completely pointless index map... */
shader->inputs[i].src_index = i+1;
}
@@ -2835,17 +2854,24 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
static void
llvmpipe_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index,
- struct pipe_constant_buffer *cb)
+ const struct pipe_constant_buffer *cb)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
struct pipe_resource *constants = cb ? cb->buffer : NULL;
assert(shader < PIPE_SHADER_TYPES);
- assert(index < Elements(llvmpipe->constants[shader]));
+ assert(index < ARRAY_SIZE(llvmpipe->constants[shader]));
/* note: reference counting */
util_copy_constant_buffer(&llvmpipe->constants[shader][index], cb);
+ if (constants) {
+ if (!(constants->bind & PIPE_BIND_CONSTANT_BUFFER)) {
+ debug_printf("Illegal set constant without bind flag\n");
+ constants->bind |= PIPE_BIND_CONSTANT_BUFFER;
+ }
+ }
+
if (shader == PIPE_SHADER_VERTEX ||
shader == PIPE_SHADER_GEOMETRY) {
/* Pass the constants to the 'draw' module */
@@ -2868,8 +2894,9 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
draw_set_mapped_constant_buffer(llvmpipe->draw, shader,
index, data, size);
}
-
- llvmpipe->dirty |= LP_NEW_CONSTANTS;
+ else {
+ llvmpipe->dirty |= LP_NEW_FS_CONSTANTS;
+ }
if (cb && cb->user_buffer) {
pipe_resource_reference(&constants, NULL);
@@ -2940,6 +2967,13 @@ make_variant_key(struct llvmpipe_context *lp,
* depth_clip == 0 implies depth clamping is enabled.
*
* When clip_halfz is enabled, then always clamp the depth values.
+ *
+ * XXX: This is incorrect for GL, but correct for d3d10 (depth
+ * clamp is always active in d3d10, regardless if depth clip is
+ * enabled or not).
+ * (GL has an always-on [0,1] clamp on fs depth output instead
+ * to ensure the depth values stay in range. Doesn't look like
+ * we do that, though...)
*/
if (lp->rasterizer->clip_halfz) {
key->depth_clamp = 1;
@@ -3026,7 +3060,7 @@ make_variant_key(struct llvmpipe_context *lp,
* Also, force rgb/alpha func/factors match, to make AoS blending
* easier.
*/
- if (format_desc->swizzle[3] > UTIL_FORMAT_SWIZZLE_W ||
+ if (format_desc->swizzle[3] > PIPE_SWIZZLE_W ||
format_desc->swizzle[3] == format_desc->swizzle[0]) {
/* Doesn't cover mixed snorm/unorm but can't render to them anyway */
boolean clamped_zero = !util_format_is_float(format) &&
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index b205f02fd..01af05211 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -66,7 +66,7 @@ llvmpipe_create_sampler_state(struct pipe_context *pipe,
static void
llvmpipe_bind_sampler_states(struct pipe_context *pipe,
- unsigned shader,
+ enum pipe_shader_type shader,
unsigned start,
unsigned num,
void **samplers)
@@ -75,7 +75,7 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe,
unsigned i;
assert(shader < PIPE_SHADER_TYPES);
- assert(start + num <= Elements(llvmpipe->samplers[shader]));
+ assert(start + num <= ARRAY_SIZE(llvmpipe->samplers[shader]));
draw_flush(llvmpipe->draw);
@@ -98,14 +98,15 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe,
llvmpipe->samplers[shader],
llvmpipe->num_samplers[shader]);
}
-
- llvmpipe->dirty |= LP_NEW_SAMPLER;
+ else {
+ llvmpipe->dirty |= LP_NEW_SAMPLER;
+ }
}
static void
llvmpipe_set_sampler_views(struct pipe_context *pipe,
- unsigned shader,
+ enum pipe_shader_type shader,
unsigned start,
unsigned num,
struct pipe_sampler_view **views)
@@ -116,7 +117,7 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe,
assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
assert(shader < PIPE_SHADER_TYPES);
- assert(start + num <= Elements(llvmpipe->sampler_views[shader]));
+ assert(start + num <= ARRAY_SIZE(llvmpipe->sampler_views[shader]));
draw_flush(llvmpipe->draw);
@@ -128,6 +129,15 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe,
*/
pipe_sampler_view_release(pipe,
&llvmpipe->sampler_views[shader][start + i]);
+ /*
+ * Warn if someone tries to set a view created in a different context
+ * (which is why we need the hack above in the first place).
+ * An assert would be better but st/mesa relies on it...
+ */
+ if (views[i] && views[i]->context != pipe) {
+ debug_printf("Illegal setting of sampler_view %d created in another "
+ "context\n", i);
+ }
pipe_sampler_view_reference(&llvmpipe->sampler_views[shader][start + i],
views[i]);
}
@@ -146,8 +156,9 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe,
llvmpipe->sampler_views[shader],
llvmpipe->num_sampler_views[shader]);
}
-
- llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW;
+ else {
+ llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW;
+ }
}
@@ -158,11 +169,13 @@ llvmpipe_create_sampler_view(struct pipe_context *pipe,
{
struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
/*
- * XXX we REALLY want to see the correct bind flag here but the OpenGL
- * state tracker can't guarantee that at least for texture buffer objects.
+ * XXX: bind flags from OpenGL state tracker are notoriously unreliable.
+ * This looks unfixable, so fix the bind flags instead when it happens.
*/
- if (!(texture->bind & PIPE_BIND_SAMPLER_VIEW))
+ if (!(texture->bind & PIPE_BIND_SAMPLER_VIEW)) {
debug_printf("Illegal sampler view creation without bind flag\n");
+ texture->bind |= PIPE_BIND_SAMPLER_VIEW;
+ }
if (view) {
*view = *templ;
@@ -228,8 +241,7 @@ prepare_shader_sampling(
struct llvmpipe_context *lp,
unsigned num,
struct pipe_sampler_view **views,
- unsigned shader_type,
- struct pipe_resource *mapped_tex[PIPE_MAX_SHADER_SAMPLER_VIEWS])
+ unsigned shader_type)
{
unsigned i;
@@ -242,7 +254,7 @@ prepare_shader_sampling(
if (!num)
return;
- for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
+ for (i = 0; i < num; i++) {
struct pipe_sampler_view *view = i < num ? views[i] : NULL;
if (view) {
@@ -253,11 +265,6 @@ prepare_shader_sampling(
unsigned first_level = 0;
unsigned last_level = 0;
- /* We're referencing the texture's internal data, so save a
- * reference to it.
- */
- pipe_resource_reference(&mapped_tex[i], tex);
-
if (!lp_tex->dt) {
/* regular texture - setup array of mipmap level offsets */
struct pipe_resource *res = view->texture;
@@ -275,10 +282,10 @@ prepare_shader_sampling(
row_stride[j] = lp_tex->row_stride[j];
img_stride[j] = lp_tex->img_stride[j];
}
- if (view->target == PIPE_TEXTURE_1D_ARRAY ||
- view->target == PIPE_TEXTURE_2D_ARRAY ||
- view->target == PIPE_TEXTURE_CUBE ||
- view->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ if (tex->target == PIPE_TEXTURE_1D_ARRAY ||
+ tex->target == PIPE_TEXTURE_2D_ARRAY ||
+ tex->target == PIPE_TEXTURE_CUBE ||
+ tex->target == PIPE_TEXTURE_CUBE_ARRAY) {
num_layers = view->u.tex.last_layer - view->u.tex.first_layer + 1;
for (j = first_level; j <= last_level; j++) {
mip_offsets[j] += view->u.tex.first_layer *
@@ -301,11 +308,9 @@ prepare_shader_sampling(
img_stride[0] = 0;
/* everything specified in number of elements here. */
- width0 = view->u.buf.last_element - view->u.buf.first_element + 1;
- addr = (uint8_t *)addr + view->u.buf.first_element *
- view_blocksize;
- assert(view->u.buf.first_element <= view->u.buf.last_element);
- assert(view->u.buf.last_element * view_blocksize < res->width0);
+ width0 = view->u.buf.size / view_blocksize;
+ addr = (uint8_t *)addr + view->u.buf.offset;
+ assert(view->u.buf.offset + view->u.buf.size <= res->width0);
}
}
else {
@@ -335,47 +340,28 @@ prepare_shader_sampling(
/**
- * Called during state validation when LP_NEW_SAMPLER_VIEW is set.
+ * Called whenever we're about to draw (no dirty flag, FIXME?).
*/
void
llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *lp,
unsigned num,
struct pipe_sampler_view **views)
{
- prepare_shader_sampling(lp, num, views, PIPE_SHADER_VERTEX,
- lp->mapped_vs_tex);
-}
-
-void
-llvmpipe_cleanup_vertex_sampling(struct llvmpipe_context *ctx)
-{
- unsigned i;
- for (i = 0; i < Elements(ctx->mapped_vs_tex); i++) {
- pipe_resource_reference(&ctx->mapped_vs_tex[i], NULL);
- }
+ prepare_shader_sampling(lp, num, views, PIPE_SHADER_VERTEX);
}
/**
- * Called during state validation when LP_NEW_SAMPLER_VIEW is set.
+ * Called whenever we're about to draw (no dirty flag, FIXME?).
*/
void
llvmpipe_prepare_geometry_sampling(struct llvmpipe_context *lp,
unsigned num,
struct pipe_sampler_view **views)
{
- prepare_shader_sampling(lp, num, views, PIPE_SHADER_GEOMETRY,
- lp->mapped_gs_tex);
+ prepare_shader_sampling(lp, num, views, PIPE_SHADER_GEOMETRY);
}
-void
-llvmpipe_cleanup_geometry_sampling(struct llvmpipe_context *ctx)
-{
- unsigned i;
- for (i = 0; i < Elements(ctx->mapped_gs_tex); i++) {
- pipe_resource_reference(&ctx->mapped_gs_tex[i], NULL);
- }
-}
void
llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe)
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c
index 6397b5196..a57e2f04b 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -135,8 +135,8 @@ emit_facing_coef(struct gallivm_state *gallivm,
LLVMValueRef a0_0 = args->facing;
LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, float_type, "");
LLVMValueRef a0, face_val;
- const unsigned char swizzles[4] = { PIPE_SWIZZLE_RED, PIPE_SWIZZLE_ZERO,
- PIPE_SWIZZLE_ZERO, PIPE_SWIZZLE_ZERO };
+ const unsigned char swizzles[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_0,
+ PIPE_SWIZZLE_0, PIPE_SWIZZLE_0 };
/* Our face val is either 1 or 0 so we do
* face = (val * 2) - 1
* to make it 1 or -1
@@ -723,7 +723,7 @@ generate_setup_variant(struct lp_setup_variant_key *key,
goto fail;
variant = CALLOC_STRUCT(lp_setup_variant);
- if (variant == NULL)
+ if (!variant)
goto fail;
variant->no = setup_no++;
@@ -760,7 +760,7 @@ generate_setup_variant(struct lp_setup_variant_key *key,
arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */
func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
- arg_types, Elements(arg_types), 0);
+ arg_types, ARRAY_SIZE(arg_types), 0);
variant->function = LLVMAddFunction(gallivm->module, func_name, func_type);
if (!variant->function)
@@ -791,7 +791,7 @@ generate_setup_variant(struct lp_setup_variant_key *key,
variant->function, "entry");
LLVMPositionBuilderAtEnd(builder, block);
- set_noalias(builder, variant->function, arg_types, Elements(arg_types));
+ set_noalias(builder, variant->function, arg_types, ARRAY_SIZE(arg_types));
init_args(gallivm, &variant->key, &args);
emit_tri_coef(gallivm, &variant->key, &args);
@@ -848,14 +848,10 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp,
key->size = Offset(struct lp_setup_variant_key,
inputs[key->num_inputs]);
- key->color_slot = lp->color_slot [0];
+ key->color_slot = lp->color_slot[0];
key->bcolor_slot = lp->bcolor_slot[0];
- key->spec_slot = lp->color_slot [1];
- key->bspec_slot = lp->bcolor_slot[1];
- assert(key->color_slot == lp->color_slot [0]);
- assert(key->bcolor_slot == lp->bcolor_slot[0]);
- assert(key->spec_slot == lp->color_slot [1]);
- assert(key->bspec_slot == lp->bcolor_slot[1]);
+ key->spec_slot = lp->color_slot[1];
+ key->bspec_slot = lp->bcolor_slot[1];
/*
* If depth is floating point, depth bias is calculated with respect
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c
index 96f8ed82c..784db7f73 100644
--- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c
@@ -131,8 +131,15 @@ llvmpipe_create_surface(struct pipe_context *pipe,
{
struct pipe_surface *ps;
- if (!(pt->bind & (PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET)))
+ if (!(pt->bind & (PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET))) {
debug_printf("Illegal surface creation without bind flag\n");
+ if (util_format_is_depth_or_stencil(surf_tmpl->format)) {
+ pt->bind |= PIPE_BIND_DEPTH_STENCIL;
+ }
+ else {
+ pt->bind |= PIPE_BIND_RENDER_TARGET;
+ }
+ }
ps = CALLOC_STRUCT(pipe_surface);
if (ps) {
@@ -183,11 +190,12 @@ llvmpipe_clear_render_target(struct pipe_context *pipe,
struct pipe_surface *dst,
const union pipe_color_union *color,
unsigned dstx, unsigned dsty,
- unsigned width, unsigned height)
+ unsigned width, unsigned height,
+ bool render_condition_enabled)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
- if (!llvmpipe_check_render_cond(llvmpipe))
+ if (render_condition_enabled && !llvmpipe_check_render_cond(llvmpipe))
return;
util_clear_render_target(pipe, dst, color,
@@ -202,11 +210,12 @@ llvmpipe_clear_depth_stencil(struct pipe_context *pipe,
double depth,
unsigned stencil,
unsigned dstx, unsigned dsty,
- unsigned width, unsigned height)
+ unsigned width, unsigned height,
+ bool render_condition_enabled)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
- if (!llvmpipe_check_render_cond(llvmpipe))
+ if (render_condition_enabled && !llvmpipe_check_render_cond(llvmpipe))
return;
util_clear_depth_stencil(pipe, dst, clear_flags,