diff options
Diffstat (limited to 'lib/mesa/src/gallium/drivers/llvmpipe')
-rw-r--r-- | lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am | 4 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in | 53 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c | 8 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c | 8 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c | 13 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c | 13 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c | 52 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c | 2 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c | 70 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c | 84 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c | 395 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c | 188 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c | 88 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c | 20 | ||||
-rw-r--r-- | lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c | 19 |
15 files changed, 670 insertions, 347 deletions
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am index 1d3853e41..85ae0ae13 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am +++ b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am @@ -26,11 +26,11 @@ include $(top_srcdir)/src/gallium/Automake.inc AM_CFLAGS = \ $(GALLIUM_DRIVER_CFLAGS) \ $(LLVM_CFLAGS) \ - $(MSVC2008_COMPAT_CFLAGS) + $(MSVC2013_COMPAT_CFLAGS) AM_CXXFLAGS= \ $(GALLIUM_DRIVER_CXXFLAGS) \ $(LLVM_CXXFLAGS) \ - $(MSVC2008_COMPAT_CXXFLAGS) + $(MSVC2013_COMPAT_CXXFLAGS) noinst_LTLIBRARIES = libllvmpipe.la diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in index 27a6693d9..0a7486d64 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in +++ b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in @@ -78,13 +78,10 @@ DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in $(srcdir)/Makefile.sources \ $(top_srcdir)/bin/depcomp \ $(top_srcdir)/src/gallium/Automake.inc -@HAVE_LIBDRM_TRUE@am__append_1 = \ -@HAVE_LIBDRM_TRUE@ $(LIBDRM_LIBS) - -@HAVE_DRISW_TRUE@am__append_2 = \ +@HAVE_DRISW_TRUE@am__append_1 = \ @HAVE_DRISW_TRUE@ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la -@HAVE_DRISW_KMS_TRUE@am__append_3 = \ +@HAVE_DRISW_KMS_TRUE@am__append_2 = \ @HAVE_DRISW_KMS_TRUE@ $(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \ @HAVE_DRISW_KMS_TRUE@ $(LIBDRM_LIBS) @@ -139,8 +136,7 @@ am__DEPENDENCIES_1 = am__DEPENDENCIES_2 = libllvmpipe.la \ $(top_builddir)/src/gallium/auxiliary/libgallium.la \ $(top_builddir)/src/util/libmesautil.la $(am__DEPENDENCIES_1) \ - $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ - $(am__DEPENDENCIES_1) + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) lp_test_arit_DEPENDENCIES = $(am__DEPENDENCIES_2) am_lp_test_blend_OBJECTS = lp_test_blend.$(OBJEXT) \ lp_test_main.$(OBJEXT) @@ -238,8 +234,6 @@ AMDGPU_CFLAGS = @AMDGPU_CFLAGS@ AMDGPU_LIBS = @AMDGPU_LIBS@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ -ANDROID_CFLAGS = @ANDROID_CFLAGS@ -ANDROID_LIBS = @ANDROID_LIBS@ AR = @AR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ @@ -270,6 +264,8 @@ DLLTOOL = @DLLTOOL@ DLOPEN_LIBS = @DLOPEN_LIBS@ DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@ DRI2PROTO_LIBS = @DRI2PROTO_LIBS@ +DRI3PROTO_CFLAGS = @DRI3PROTO_CFLAGS@ +DRI3PROTO_LIBS = @DRI3PROTO_LIBS@ DRIGL_CFLAGS = @DRIGL_CFLAGS@ DRIGL_LIBS = @DRIGL_LIBS@ DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@ @@ -282,11 +278,10 @@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGL_CFLAGS = @EGL_CFLAGS@ +EGL_CLIENT_APIS = @EGL_CLIENT_APIS@ EGL_LIB_DEPS = @EGL_LIB_DEPS@ EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@ EGREP = @EGREP@ -ETNAVIV_CFLAGS = @ETNAVIV_CFLAGS@ -ETNAVIV_LIBS = @ETNAVIV_LIBS@ EXEEXT = @EXEEXT@ EXPAT_CFLAGS = @EXPAT_CFLAGS@ EXPAT_LIBS = @EXPAT_LIBS@ @@ -334,27 +329,31 @@ LIBDRM_CFLAGS = @LIBDRM_CFLAGS@ LIBDRM_LIBS = @LIBDRM_LIBS@ LIBELF_CFLAGS = @LIBELF_CFLAGS@ LIBELF_LIBS = @LIBELF_LIBS@ -LIBGLVND_DATADIR = @LIBGLVND_DATADIR@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ -LIBSENSORS_LIBS = @LIBSENSORS_LIBS@ +LIBSENSORS_LDFLAGS = @LIBSENSORS_LDFLAGS@ +LIBSHA1_CFLAGS = @LIBSHA1_CFLAGS@ +LIBSHA1_LIBS = @LIBSHA1_LIBS@ LIBTOOL = @LIBTOOL@ -LIBUNWIND_CFLAGS = @LIBUNWIND_CFLAGS@ -LIBUNWIND_LIBS = @LIBUNWIND_LIBS@ LIB_DIR = @LIB_DIR@ LIB_EXT = @LIB_EXT@ LIPO = @LIPO@ +LLVM_BINDIR = @LLVM_BINDIR@ LLVM_CFLAGS = @LLVM_CFLAGS@ LLVM_CONFIG = @LLVM_CONFIG@ +LLVM_CPPFLAGS = @LLVM_CPPFLAGS@ LLVM_CXXFLAGS = @LLVM_CXXFLAGS@ LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@ LLVM_LDFLAGS = @LLVM_LDFLAGS@ +LLVM_LIBDIR = @LLVM_LIBDIR@ LLVM_LIBS = @LLVM_LIBS@ +LLVM_VERSION = @LLVM_VERSION@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ +MESA_LLVM = @MESA_LLVM@ MKDIR_P = @MKDIR_P@ MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@ MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@ @@ -375,6 +374,8 @@ OMX_LIBS = @OMX_LIBS@ OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@ OPENCL_LIBNAME = @OPENCL_LIBNAME@ OPENCL_VERSION = @OPENCL_VERSION@ +OPENSSL_CFLAGS = @OPENSSL_CFLAGS@ +OPENSSL_LIBS = @OPENSSL_LIBS@ OSMESA_LIB = @OSMESA_LIB@ OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@ OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@ @@ -394,6 +395,8 @@ PKG_CONFIG = @PKG_CONFIG@ PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ POSIX_SHELL = @POSIX_SHELL@ +PRESENTPROTO_CFLAGS = @PRESENTPROTO_CFLAGS@ +PRESENTPROTO_LIBS = @PRESENTPROTO_LIBS@ PTHREADSTUBS_CFLAGS = @PTHREADSTUBS_CFLAGS@ PTHREADSTUBS_LIBS = @PTHREADSTUBS_LIBS@ PTHREAD_CC = @PTHREAD_CC@ @@ -409,6 +412,8 @@ SED = @SED@ SELINUX_CFLAGS = @SELINUX_CFLAGS@ SELINUX_LIBS = @SELINUX_LIBS@ SET_MAKE = @SET_MAKE@ +SHA1_CFLAGS = @SHA1_CFLAGS@ +SHA1_LIBS = @SHA1_LIBS@ SHELL = @SHELL@ SIMPENROSE_CFLAGS = @SIMPENROSE_CFLAGS@ SIMPENROSE_LIBS = @SIMPENROSE_LIBS@ @@ -417,6 +422,7 @@ STRIP = @STRIP@ SWR_AVX2_CXXFLAGS = @SWR_AVX2_CXXFLAGS@ SWR_AVX_CXXFLAGS = @SWR_AVX_CXXFLAGS@ SWR_CXX11_CXXFLAGS = @SWR_CXX11_CXXFLAGS@ +TIMESTAMP_CMD = @TIMESTAMP_CMD@ VALGRIND_CFLAGS = @VALGRIND_CFLAGS@ VALGRIND_LIBS = @VALGRIND_LIBS@ VA_CFLAGS = @VA_CFLAGS@ @@ -432,6 +438,7 @@ VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@ VDPAU_MAJOR = @VDPAU_MAJOR@ VDPAU_MINOR = @VDPAU_MINOR@ VERSION = @VERSION@ +VG_LIB_DEPS = @VG_LIB_DEPS@ VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@ VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@ VL_CFLAGS = @VL_CFLAGS@ @@ -460,10 +467,9 @@ XVMC_LIBS = @XVMC_LIBS@ XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@ XVMC_MAJOR = @XVMC_MAJOR@ XVMC_MINOR = @XVMC_MINOR@ +XXD = @XXD@ YACC = @YACC@ YFLAGS = @YFLAGS@ -ZLIB_CFLAGS = @ZLIB_CFLAGS@ -ZLIB_LIBS = @ZLIB_LIBS@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ @@ -643,8 +649,12 @@ GALLIUM_TARGET_CFLAGS = \ $(LIBDRM_CFLAGS) \ $(VISIBILITY_CFLAGS) -GALLIUM_COMMON_LIB_DEPS = -lm $(LIBUNWIND_LIBS) $(LIBSENSORS_LIBS) \ - $(CLOCK_LIB) $(PTHREAD_LIBS) $(DLOPEN_LIBS) $(am__append_1) +GALLIUM_COMMON_LIB_DEPS = \ + -lm \ + $(CLOCK_LIB) \ + $(PTHREAD_LIBS) \ + $(DLOPEN_LIBS) + GALLIUM_WINSYS_CFLAGS = \ -I$(top_srcdir)/src \ -I$(top_srcdir)/include \ @@ -656,7 +666,7 @@ GALLIUM_WINSYS_CFLAGS = \ GALLIUM_PIPE_LOADER_WINSYS_LIBS = \ $(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \ $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \ - $(am__append_2) $(am__append_3) + $(am__append_1) $(am__append_2) AM_CFLAGS = \ $(GALLIUM_DRIVER_CFLAGS) \ $(LLVM_CFLAGS) \ @@ -678,8 +688,7 @@ TEST_LIBS = \ $(top_builddir)/src/util/libmesautil.la \ $(LLVM_LIBS) \ $(DLOPEN_LIBS) \ - $(PTHREAD_LIBS) \ - $(CLOCK_LIB) + $(PTHREAD_LIBS) lp_test_format_SOURCES = lp_test_format.c lp_test_main.c lp_test_format_LDADD = $(TEST_LIBS) diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index 564e19a15..a57670d49 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -255,13 +255,13 @@ lp_build_blend_factor(struct lp_build_blend_aos_context *bld, LLVMValueRef rgb_factor_, alpha_factor_; enum lp_build_blend_swizzle rgb_swizzle; - if (alpha_swizzle == UTIL_FORMAT_SWIZZLE_X && num_channels == 1) { + if (alpha_swizzle == PIPE_SWIZZLE_X && num_channels == 1) { return lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); } rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE); - if (alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) { + if (alpha_swizzle != PIPE_SWIZZLE_NONE) { rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor); alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, num_channels); @@ -312,7 +312,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm, struct lp_build_blend_aos_context bld; LLVMValueRef src_factor, dst_factor; LLVMValueRef result; - unsigned alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE; + unsigned alpha_swizzle = PIPE_SWIZZLE_NONE; unsigned i; desc = util_format_description(cbuf_format); @@ -370,7 +370,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm, rgb_alpha_same, false); - if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) { + if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != PIPE_SWIZZLE_NONE) { LLVMValueRef alpha; alpha = lp_build_blend(&bld.base, diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c index b25e04137..0c27c2f89 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -359,7 +359,7 @@ get_z_shift_and_mask(const struct util_format_description *format_desc, z_swizzle = format_desc->swizzle[0]; - if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) + if (z_swizzle == PIPE_SWIZZLE_NONE) return FALSE; *width = format_desc->channel[z_swizzle].size; @@ -390,7 +390,7 @@ get_s_shift_and_mask(const struct util_format_description *format_desc, s_swizzle = format_desc->swizzle[1]; - if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE) + if (s_swizzle == PIPE_SWIZZLE_NONE) return FALSE; /* just special case 64bit d/s format */ @@ -873,8 +873,8 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, const unsigned z_swizzle = format_desc->swizzle[0]; const unsigned s_swizzle = format_desc->swizzle[1]; - assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE || - s_swizzle != UTIL_FORMAT_SWIZZLE_NONE); + assert(z_swizzle != PIPE_SWIZZLE_NONE || + s_swizzle != PIPE_SWIZZLE_NONE); assert(depth->enabled || stencil[0].enabled); diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c index 80cb6578b..84912c6f1 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c @@ -73,20 +73,20 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) pipe_surface_reference(&llvmpipe->framebuffer.zsbuf, NULL); - for (i = 0; i < Elements(llvmpipe->sampler_views[0]); i++) { + for (i = 0; i < ARRAY_SIZE(llvmpipe->sampler_views[0]); i++) { pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_FRAGMENT][i], NULL); } - for (i = 0; i < Elements(llvmpipe->sampler_views[0]); i++) { + for (i = 0; i < ARRAY_SIZE(llvmpipe->sampler_views[0]); i++) { pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_VERTEX][i], NULL); } - for (i = 0; i < Elements(llvmpipe->sampler_views[0]); i++) { + for (i = 0; i < ARRAY_SIZE(llvmpipe->sampler_views[0]); i++) { pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_GEOMETRY][i], NULL); } - for (i = 0; i < Elements(llvmpipe->constants); i++) { - for (j = 0; j < Elements(llvmpipe->constants[i]); j++) { + for (i = 0; i < ARRAY_SIZE(llvmpipe->constants); i++) { + for (j = 0; j < ARRAY_SIZE(llvmpipe->constants[i]); j++) { pipe_resource_reference(&llvmpipe->constants[i][j].buffer, NULL); } } @@ -128,7 +128,8 @@ llvmpipe_render_condition ( struct pipe_context *pipe, } struct pipe_context * -llvmpipe_create_context( struct pipe_screen *screen, void *priv ) +llvmpipe_create_context(struct pipe_screen *screen, void *priv, + unsigned flags) { struct llvmpipe_context *llvmpipe; diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c index 9acde4f1b..21260369a 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c @@ -36,6 +36,7 @@ #include "util/u_memory.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_format.h" #include "lp_context.h" #include "lp_jit.h" @@ -55,7 +56,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp) elem_types[LP_JIT_VIEWPORT_MAX_DEPTH] = LLVMFloatTypeInContext(lc); viewport_type = LLVMStructTypeInContext(lc, elem_types, - Elements(elem_types), 0); + ARRAY_SIZE(elem_types), 0); LP_CHECK_MEMBER_OFFSET(struct lp_jit_viewport, min_depth, gallivm->target, viewport_type, @@ -83,7 +84,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp) LLVMArrayType(LLVMInt32TypeInContext(lc), LP_MAX_TEXTURE_LEVELS); texture_type = LLVMStructTypeInContext(lc, elem_types, - Elements(elem_types), 0); + ARRAY_SIZE(elem_types), 0); LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width, gallivm->target, texture_type, @@ -126,7 +127,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp) LLVMArrayType(LLVMFloatTypeInContext(lc), 4); sampler_type = LLVMStructTypeInContext(lc, elem_types, - Elements(elem_types), 0); + ARRAY_SIZE(elem_types), 0); LP_CHECK_MEMBER_OFFSET(struct lp_jit_sampler, min_lod, gallivm->target, sampler_type, @@ -165,7 +166,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp) PIPE_MAX_SAMPLERS); context_type = LLVMStructTypeInContext(lc, elem_types, - Elements(elem_types), 0); + ARRAY_SIZE(elem_types), 0); LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants, gallivm->target, context_type, @@ -208,12 +209,14 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp) LLVMTypeRef elem_types[LP_JIT_THREAD_DATA_COUNT]; LLVMTypeRef thread_data_type; + elem_types[LP_JIT_THREAD_DATA_CACHE] = + LLVMPointerType(lp_build_format_cache_type(gallivm), 0); elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc); elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX] = LLVMInt32TypeInContext(lc); thread_data_type = LLVMStructTypeInContext(lc, elem_types, - Elements(elem_types), 0); + ARRAY_SIZE(elem_types), 0); lp->jit_thread_data_ptr_type = LLVMPointerType(thread_data_type, 0); } diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c index c726707c0..9e56c962d 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c @@ -43,6 +43,7 @@ #include "lp_query.h" #include "lp_rast.h" #include "lp_rast_priv.h" +#include "gallivm/lp_bld_format.h" #include "gallivm/lp_bld_debug.h" #include "lp_scene.h" #include "lp_tex_sample.h" @@ -664,6 +665,17 @@ rasterize_scene(struct lp_rasterizer_task *task, { task->scene = scene; + /* Clear the cache tags. This should not always be necessary but + simpler for now. */ +#if LP_USE_TEXTURE_CACHE + memset(task->thread_data.cache->cache_tags, 0, + sizeof(task->thread_data.cache->cache_tags)); +#if LP_BUILD_FORMAT_CACHE_DEBUG + task->thread_data.cache->cache_access_total = 0; + task->thread_data.cache->cache_access_miss = 0; +#endif +#endif + if (!task->rast->no_rast && !scene->discard) { /* loop over scene bins, rasterize each */ { @@ -679,6 +691,20 @@ rasterize_scene(struct lp_rasterizer_task *task, } +#if LP_BUILD_FORMAT_CACHE_DEBUG + { + uint64_t total, miss; + total = task->thread_data.cache->cache_access_total; + miss = task->thread_data.cache->cache_access_miss; + if (total) { + debug_printf("thread %d cache access %llu miss %llu hit rate %f\n", + task->thread_index, (long long unsigned)total, + (long long unsigned)miss, + (float)(total - miss)/(float)total); + } + } +#endif + if (scene->fence) { lp_fence_signal(scene->fence); } @@ -866,10 +892,15 @@ lp_rast_create( unsigned num_threads ) goto no_full_scenes; } - for (i = 0; i < Elements(rast->tasks); i++) { + for (i = 0; i < MAX2(1, num_threads); i++) { struct lp_rasterizer_task *task = &rast->tasks[i]; task->rast = rast; task->thread_index = i; + task->thread_data.cache = align_malloc(sizeof(struct lp_build_format_cache), + 16); + if (!task->thread_data.cache) { + goto no_thread_data_cache; + } } rast->num_threads = num_threads; @@ -879,12 +910,22 @@ lp_rast_create( unsigned num_threads ) create_rast_threads(rast); /* for synchronizing rasterization threads */ - pipe_barrier_init( &rast->barrier, rast->num_threads ); + if (rast->num_threads > 0) { + pipe_barrier_init( &rast->barrier, rast->num_threads ); + } memset(lp_dummy_tile, 0, sizeof lp_dummy_tile); return rast; +no_thread_data_cache: + for (i = 0; i < MAX2(1, rast->num_threads); i++) { + if (rast->tasks[i].thread_data.cache) { + align_free(rast->tasks[i].thread_data.cache); + } + } + + lp_scene_queue_destroy(rast->full_scenes); no_full_scenes: FREE(rast); no_rast: @@ -923,9 +964,14 @@ void lp_rast_destroy( struct lp_rasterizer *rast ) pipe_semaphore_destroy(&rast->tasks[i].work_ready); pipe_semaphore_destroy(&rast->tasks[i].work_done); } + for (i = 0; i < MAX2(1, rast->num_threads); i++) { + align_free(rast->tasks[i].thread_data.cache); + } /* for synchronizing rasterization threads */ - pipe_barrier_destroy( &rast->barrier ); + if (rast->num_threads > 0) { + pipe_barrier_destroy( &rast->barrier ); + } lp_scene_queue_destroy(rast->full_scenes); diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c index 2441b3c0d..223be931e 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c @@ -337,7 +337,7 @@ lp_scene_new_data_block( struct lp_scene *scene ) } else { struct data_block *block = MALLOC_STRUCT(data_block); - if (block == NULL) + if (!block) return NULL; scene->scene_size += sizeof *block; diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c index 14eeab033..3e4f1ef44 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c @@ -109,6 +109,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) switch (param) { case PIPE_CAP_NPOT_TEXTURES: case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: + case PIPE_CAP_MIXED_COLOR_DEPTH_BITS: return 1; case PIPE_CAP_TWO_SIDED_STENCIL: return 1; @@ -264,6 +265,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_FAKE_SW_MSAA: return 1; case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: + case PIPE_CAP_TGSI_ARRAY_COMPONENTS: return 1; case PIPE_CAP_VENDOR_ID: @@ -279,6 +281,12 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) if (!os_get_total_physical_memory(&system_memory)) return 0; + if (sizeof(void *) == 4) + /* Cap to 2 GB on 32 bits system. We do this because llvmpipe does + * eat application memory, which is quite limited on 32 bits. App + * shouldn't expect too much available memory. */ + system_memory = MIN2(system_memory, 2048 << 20); + return (int)(system_memory >> 20); } case PIPE_CAP_UMA: @@ -291,11 +299,44 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TEXTURE_FLOAT_LINEAR: case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: return 1; + case PIPE_CAP_CULL_DISTANCE: + return 1; + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: + return 1; case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: case PIPE_CAP_DEPTH_BOUNDS_TEST: + case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_CLEAR_TEXTURE: + case PIPE_CAP_DRAW_PARAMETERS: + case PIPE_CAP_TGSI_PACK_HALF_FLOAT: + case PIPE_CAP_MULTI_DRAW_INDIRECT: + case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: + case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: + case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: + case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: + case PIPE_CAP_INVALIDATE_BUFFER: + case PIPE_CAP_GENERATE_MIPMAP: + case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_BUFFER_OBJECT: + case PIPE_CAP_QUERY_MEMORY_INFO: + case PIPE_CAP_PCI_GROUP: + case PIPE_CAP_PCI_BUS: + case PIPE_CAP_PCI_DEVICE: + case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: + case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: + case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES: + case PIPE_CAP_TGSI_VOTE: + case PIPE_CAP_MAX_WINDOW_RECTANGLES: + case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED: + case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS: return 0; } /* should only get here on unhandled cases */ @@ -421,19 +462,20 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, if (!format_desc->is_array && !format_desc->is_bitmask && format != PIPE_FORMAT_R11G11B10_FLOAT) return FALSE; + } - /* - * XXX refuse formats known to crash in generate_unswizzled_blend(). - * These include all 3-channel 24bit RGB8 variants, plus 48bit - * (except those using floats) 3-channel RGB16 variants (the latter - * seems to be more of a llvm bug though). - * The mesa state tracker only seems to use these for SINT/UINT formats. + if ((bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) && + ((bind & PIPE_BIND_DISPLAY_TARGET) == 0)) { + /* Disable all 3-channel formats, where channel size != 32 bits. + * In some cases we run into crashes (in generate_unswizzled_blend()), + * for 3-channel RGB16 variants, there was an apparent LLVM bug. + * In any case, disabling the shallower 3-channel formats avoids a + * number of issues with GL_ARB_copy_image support. */ - if (format_desc->is_array && format_desc->nr_channels == 3) { - if (format_desc->block.bits == 24 || (format_desc->block.bits == 48 && - !util_format_is_float(format))) { - return FALSE; - } + if (format_desc->is_array && + format_desc->nr_channels == 3 && + format_desc->block.bits != 96) { + return FALSE; } } @@ -450,12 +492,13 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, return FALSE; /* TODO: Support stencil-only formats */ - if (format_desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) { + if (format_desc->swizzle[0] == PIPE_SWIZZLE_NONE) { return FALSE; } } - if (format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { + if (format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC || + format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) { /* Software decoding is not hooked up. */ return FALSE; } @@ -537,6 +580,7 @@ llvmpipe_fence_reference(struct pipe_screen *screen, */ static boolean llvmpipe_fence_finish(struct pipe_screen *screen, + struct pipe_context *ctx, struct pipe_fence_handle *fence_handle, uint64_t timeout) { diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c index 4c8167a9e..768775b29 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c @@ -39,6 +39,7 @@ #include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_pack_color.h" +#include "util/u_viewport.h" #include "draw/draw_pipe.h" #include "os/os_time.h" #include "lp_context.h" @@ -69,7 +70,7 @@ lp_setup_get_empty_scene(struct lp_setup_context *setup) assert(setup->scene == NULL); setup->scene_idx++; - setup->scene_idx %= Elements(setup->scenes); + setup->scene_idx %= ARRAY_SIZE(setup->scenes); setup->scene = setup->scenes[setup->scene_idx]; @@ -123,7 +124,7 @@ void lp_setup_reset( struct lp_setup_context *setup ) LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); /* Reset derived state */ - for (i = 0; i < Elements(setup->constants); ++i) { + for (i = 0; i < ARRAY_SIZE(setup->constants); ++i) { setup->constants[i].stored_size = 0; setup->constants[i].stored_data = NULL; } @@ -476,22 +477,30 @@ lp_setup_try_clear_zs(struct lp_setup_context *setup, uint64_t zsvalue = 0; uint32_t zmask32; uint8_t smask8; + enum pipe_format format = setup->fb.zsbuf->format; LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state); zmask32 = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0; smask8 = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0; - zsvalue = util_pack64_z_stencil(setup->fb.zsbuf->format, - depth, - stencil); + zsvalue = util_pack64_z_stencil(format, depth, stencil); - zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format, - zmask32, - smask8); + zsmask = util_pack64_mask_z_stencil(format, zmask32, smask8); zsvalue &= zsmask; + if (format == PIPE_FORMAT_Z24X8_UNORM || + format == PIPE_FORMAT_X8Z24_UNORM) { + /* + * Make full mask if there's "X" bits so we can do full + * clear (without rmw). + */ + uint32_t zsmask_full = 0; + zsmask_full = util_pack_mask_z_stencil(format, ~0, ~0); + zsmask |= ~zsmask_full; + } + if (setup->state == SETUP_ACTIVE) { struct lp_scene *scene = setup->scene; @@ -642,12 +651,12 @@ lp_setup_set_fs_constants(struct lp_setup_context *setup, LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) buffers); - assert(num <= Elements(setup->constants)); + assert(num <= ARRAY_SIZE(setup->constants)); for (i = 0; i < num; ++i) { util_copy_constant_buffer(&setup->constants[i].current, &buffers[i]); } - for (; i < Elements(setup->constants); i++) { + for (; i < ARRAY_SIZE(setup->constants); i++) { util_copy_constant_buffer(&setup->constants[i].current, NULL); } setup->dirty |= LP_SETUP_NEW_CONSTANTS; @@ -763,15 +772,8 @@ lp_setup_set_viewports(struct lp_setup_context *setup, for (i = 0; i < num_viewports; i++) { float min_depth; float max_depth; - - if (lp->rasterizer->clip_halfz == 0) { - float half_depth = viewports[i].scale[2]; - min_depth = viewports[i].translate[2] - half_depth; - max_depth = min_depth + half_depth * 2.0f; - } else { - min_depth = viewports[i].translate[2]; - max_depth = min_depth + viewports[i].scale[2]; - } + util_viewport_zmin_zmax(&viewports[i], lp->rasterizer->clip_halfz, + &min_depth, &max_depth); if (setup->viewports[i].min_depth != min_depth || setup->viewports[i].max_depth != max_depth) { @@ -791,13 +793,15 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, unsigned num, struct pipe_sampler_view **views) { - unsigned i; + unsigned i, max_tex_num; LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS); - for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) { + max_tex_num = MAX2(num, setup->fs.current_tex_num); + + for (i = 0; i < max_tex_num; i++) { struct pipe_sampler_view *view = i < num ? views[i] : NULL; if (view) { @@ -854,10 +858,10 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, jit_tex->img_stride[j] = lp_tex->img_stride[j]; } - if (view->target == PIPE_TEXTURE_1D_ARRAY || - view->target == PIPE_TEXTURE_2D_ARRAY || - view->target == PIPE_TEXTURE_CUBE || - view->target == PIPE_TEXTURE_CUBE_ARRAY) { + if (res->target == PIPE_TEXTURE_1D_ARRAY || + res->target == PIPE_TEXTURE_2D_ARRAY || + res->target == PIPE_TEXTURE_CUBE || + res->target == PIPE_TEXTURE_CUBE_ARRAY) { /* * For array textures, we don't have first_layer, instead * adjust last_layer (stored as depth) plus the mip level offsets @@ -879,8 +883,8 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, } else { /* - * For buffers, we don't have first_element, instead adjust - * last_element (stored as width) plus the base pointer. + * For buffers, we don't have "offset", instead adjust + * the size (stored as width) plus the base pointer. */ unsigned view_blocksize = util_format_get_blocksize(view->format); /* probably don't really need to fill that out */ @@ -889,12 +893,10 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, jit_tex->img_stride[0] = 0; /* everything specified in number of elements here. */ - jit_tex->width = view->u.buf.last_element - view->u.buf.first_element + 1; - jit_tex->base = (uint8_t *)jit_tex->base + view->u.buf.first_element * - view_blocksize; + jit_tex->width = view->u.buf.size / view_blocksize; + jit_tex->base = (uint8_t *)jit_tex->base + view->u.buf.offset; /* XXX Unsure if we need to sanitize parameters? */ - assert(view->u.buf.first_element <= view->u.buf.last_element); - assert(view->u.buf.last_element * view_blocksize < res->width0); + assert(view->u.buf.offset + view->u.buf.size <= res->width0); } } } @@ -917,7 +919,11 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, assert(jit_tex->base); } } + else { + pipe_resource_reference(&setup->fs.current_tex[i], NULL); + } } + setup->fs.current_tex_num = num; setup->dirty |= LP_SETUP_NEW_FS; } @@ -976,7 +982,7 @@ lp_setup_is_resource_referenced( const struct lp_setup_context *setup, } /* check textures referenced by the scene */ - for (i = 0; i < Elements(setup->scenes); i++) { + for (i = 0; i < ARRAY_SIZE(setup->scenes); i++) { if (lp_scene_is_resource_referenced(setup->scenes[i], texture)) { return LP_REFERENCED_FOR_READ; } @@ -1067,7 +1073,7 @@ try_update_scene_state( struct lp_setup_context *setup ) } if (setup->dirty & LP_SETUP_NEW_CONSTANTS) { - for (i = 0; i < Elements(setup->constants); ++i) { + for (i = 0; i < ARRAY_SIZE(setup->constants); ++i) { struct pipe_resource *buffer = setup->constants[i].current.buffer; const unsigned current_size = MIN2(setup->constants[i].current.buffer_size, LP_MAX_TGSI_CONST_BUFFER_SIZE); @@ -1152,7 +1158,7 @@ try_update_scene_state( struct lp_setup_context *setup ) /* The scene now references the textures in the rasterization * state record. Note that now. */ - for (i = 0; i < Elements(setup->fs.current_tex); i++) { + for (i = 0; i < ARRAY_SIZE(setup->fs.current_tex); i++) { if (setup->fs.current_tex[i]) { if (!lp_scene_add_resource_reference(scene, setup->fs.current_tex[i], @@ -1207,7 +1213,7 @@ lp_setup_update_state( struct lp_setup_context *setup, /* Will probably need to move this somewhere else, just need * to know about vertex shader point size attribute. */ - setup->psize = lp->psize_slot; + setup->psize_slot = lp->psize_slot; setup->viewport_index_slot = lp->viewport_index_slot; setup->layer_slot = lp->layer_slot; setup->face_slot = lp->face_slot; @@ -1269,16 +1275,16 @@ lp_setup_destroy( struct lp_setup_context *setup ) util_unreference_framebuffer_state(&setup->fb); - for (i = 0; i < Elements(setup->fs.current_tex); i++) { + for (i = 0; i < ARRAY_SIZE(setup->fs.current_tex); i++) { pipe_resource_reference(&setup->fs.current_tex[i], NULL); } - for (i = 0; i < Elements(setup->constants); i++) { + for (i = 0; i < ARRAY_SIZE(setup->constants); i++) { pipe_resource_reference(&setup->constants[i].current.buffer, NULL); } /* free the scenes in the 'empty' queue */ - for (i = 0; i < Elements(setup->scenes); i++) { + for (i = 0; i < ARRAY_SIZE(setup->scenes); i++) { struct lp_scene *scene = setup->scenes[i]; if (scene->fence) diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 98a9d4bc2..98243a12d 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -46,6 +46,9 @@ #if defined(PIPE_ARCH_SSE) #include <emmintrin.h> +#elif defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN) +#include <altivec.h> +#include "util/u_pwr8.h" #endif static inline int @@ -65,11 +68,11 @@ fixed_to_float(int a) struct fixed_position { int32_t x[4]; int32_t y[4]; - int64_t area; int32_t dx01; int32_t dy01; int32_t dx20; int32_t dy20; + int64_t area; }; @@ -91,12 +94,14 @@ lp_setup_alloc_triangle(struct lp_scene *scene, unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane); struct lp_rast_triangle *tri; + STATIC_ASSERT(sizeof(struct lp_rast_plane) % 8 == 0); + *tri_size = (sizeof(struct lp_rast_triangle) + 3 * input_array_sz + plane_sz); tri = lp_scene_alloc_aligned( scene, *tri_size, 16 ); - if (tri == NULL) + if (!tri) return NULL; tri->inputs.stride = input_array_sz; @@ -276,6 +281,7 @@ do_triangle_ccw(struct lp_setup_context *setup, int nr_planes = 3; unsigned viewport_index = 0; unsigned layer = 0; + const float (*pv)[4]; /* Area should always be positive here */ assert(position->area > 0); @@ -283,18 +289,18 @@ do_triangle_ccw(struct lp_setup_context *setup, if (0) lp_setup_print_triangle(setup, v0, v1, v2); - if (setup->scissor_test) { - nr_planes = 7; - if (setup->viewport_index_slot > 0) { - unsigned *udata = (unsigned*)v0[setup->viewport_index_slot]; - viewport_index = lp_clamp_viewport_idx(*udata); - } + if (setup->flatshade_first) { + pv = v0; } else { - nr_planes = 3; + pv = v2; + } + if (setup->viewport_index_slot > 0) { + unsigned *udata = (unsigned*)pv[setup->viewport_index_slot]; + viewport_index = lp_clamp_viewport_idx(*udata); } if (setup->layer_slot > 0) { - layer = *(unsigned*)v1[setup->layer_slot]; + layer = *(unsigned*)pv[setup->layer_slot]; layer = MIN2(layer, scene->fb_max_layer); } @@ -336,6 +342,18 @@ do_triangle_ccw(struct lp_setup_context *setup, bbox.x0 = MAX2(bbox.x0, 0); bbox.y0 = MAX2(bbox.y0, 0); + nr_planes = 3; + /* + * Determine how many scissor planes we need, that is drop scissor + * edges if the bounding box of the tri is fully inside that edge. + */ + if (setup->scissor_test) { + /* why not just use draw_regions */ + boolean s_planes[4]; + scissor_planes_needed(s_planes, &bbox, &setup->scissors[viewport_index]); + nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3]; + } + tri = lp_setup_alloc_triangle(scene, key->num_inputs, nr_planes, @@ -356,13 +374,11 @@ do_triangle_ccw(struct lp_setup_context *setup, /* Setup parameter interpolants: */ - setup->setup.variant->jit_function( v0, - v1, - v2, - frontfacing, - GET_A0(&tri->inputs), - GET_DADX(&tri->inputs), - GET_DADY(&tri->inputs) ); + setup->setup.variant->jit_function(v0, v1, v2, + frontfacing, + GET_A0(&tri->inputs), + GET_DADX(&tri->inputs), + GET_DADY(&tri->inputs)); tri->inputs.frontfacing = frontfacing; tri->inputs.disable = FALSE; @@ -372,32 +388,28 @@ do_triangle_ccw(struct lp_setup_context *setup, if (0) lp_dump_setup_coef(&setup->setup.variant->key, - (const float (*)[4])GET_A0(&tri->inputs), - (const float (*)[4])GET_DADX(&tri->inputs), - (const float (*)[4])GET_DADY(&tri->inputs)); + (const float (*)[4])GET_A0(&tri->inputs), + (const float (*)[4])GET_DADX(&tri->inputs), + (const float (*)[4])GET_DADY(&tri->inputs)); plane = GET_PLANES(tri); #if defined(PIPE_ARCH_SSE) - if (setup->fb.width <= MAX_FIXED_LENGTH32 && - setup->fb.height <= MAX_FIXED_LENGTH32 && - (bbox.x1 - bbox.x0) <= MAX_FIXED_LENGTH32 && - (bbox.y1 - bbox.y0) <= MAX_FIXED_LENGTH32) { + if (1) { __m128i vertx, verty; __m128i shufx, shufy; - __m128i dcdx, dcdy, c; - __m128i unused; + __m128i dcdx, dcdy; + __m128i cdx02, cdx13, cdy02, cdy13, c02, c13; + __m128i c01, c23, unused; __m128i dcdx_neg_mask; __m128i dcdy_neg_mask; __m128i dcdx_zero_mask; - __m128i top_left_flag; - __m128i c_inc_mask, c_inc; + __m128i top_left_flag, c_dec; __m128i eo, p0, p1, p2; __m128i zero = _mm_setzero_si128(); - PIPE_ALIGN_VAR(16) int32_t temp_vec[4]; - vertx = _mm_loadu_si128((__m128i *)position->x); /* vertex x coords */ - verty = _mm_loadu_si128((__m128i *)position->y); /* vertex y coords */ + vertx = _mm_load_si128((__m128i *)position->x); /* vertex x coords */ + verty = _mm_load_si128((__m128i *)position->y); /* vertex y coords */ shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1)); shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1)); @@ -411,42 +423,161 @@ do_triangle_ccw(struct lp_setup_context *setup, top_left_flag = _mm_set1_epi32((setup->bottom_edge_rule == 0) ? ~0 : 0); - c_inc_mask = _mm_or_si128(dcdx_neg_mask, - _mm_and_si128(dcdx_zero_mask, - _mm_xor_si128(dcdy_neg_mask, - top_left_flag))); - - c_inc = _mm_srli_epi32(c_inc_mask, 31); - - c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx), - mm_mullo_epi32(dcdy, verty)); + c_dec = _mm_or_si128(dcdx_neg_mask, + _mm_and_si128(dcdx_zero_mask, + _mm_xor_si128(dcdy_neg_mask, + top_left_flag))); - c = _mm_add_epi32(c, c_inc); + /* + * 64 bit arithmetic. + * Note we need _signed_ mul (_mm_mul_epi32) which we emulate. + */ + cdx02 = mm_mullohi_epi32(dcdx, vertx, &cdx13); + cdy02 = mm_mullohi_epi32(dcdy, verty, &cdy13); + c02 = _mm_sub_epi64(cdx02, cdy02); + c13 = _mm_sub_epi64(cdx13, cdy13); + c02 = _mm_sub_epi64(c02, _mm_shuffle_epi32(c_dec, + _MM_SHUFFLE(2,2,0,0))); + c13 = _mm_sub_epi64(c13, _mm_shuffle_epi32(c_dec, + _MM_SHUFFLE(3,3,1,1))); + + /* + * Useful for very small fbs/tris (or fewer subpixel bits) only: + * c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx), + * mm_mullo_epi32(dcdy, verty)); + * + * c = _mm_sub_epi32(c, c_dec); + */ /* Scale up to match c: */ dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER); dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER); - /* Calculate trivial reject values: + /* + * Calculate trivial reject values: + * Note eo cannot overflow even if dcdx/dcdy would already have + * 31 bits (which they shouldn't have). This is because eo + * is never negative (albeit if we rely on that need to be careful...) */ eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy), _mm_and_si128(dcdx_neg_mask, dcdx)); /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */ + /* + * Pointless transpose which gets undone immediately in + * rasterization. + * It is actually difficult to do away with it - would essentially + * need GET_PLANES_DX, GET_PLANES_DY etc., but the calculations + * for this then would need to depend on the number of planes. + * The transpose is quite special here due to c being 64bit... + * The store has to be unaligned (unless we'd make the plane size + * a multiple of 128), and of course storing eo separately... + */ + c01 = _mm_unpacklo_epi64(c02, c13); + c23 = _mm_unpackhi_epi64(c02, c13); + transpose2_64_2_32(&c01, &c23, &dcdx, &dcdy, + &p0, &p1, &p2, &unused); + _mm_storeu_si128((__m128i *)&plane[0], p0); + plane[0].eo = (uint32_t)_mm_cvtsi128_si32(eo); + _mm_storeu_si128((__m128i *)&plane[1], p1); + eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(3,2,0,1)); + plane[1].eo = (uint32_t)_mm_cvtsi128_si32(eo); + _mm_storeu_si128((__m128i *)&plane[2], p2); + eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(0,0,0,2)); + plane[2].eo = (uint32_t)_mm_cvtsi128_si32(eo); + } else +#elif defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN) + /* + * XXX this code is effectively disabled for all practical purposes, + * as the allowed fb size is tiny if FIXED_ORDER is 8. + */ + if (setup->fb.width <= MAX_FIXED_LENGTH32 && + setup->fb.height <= MAX_FIXED_LENGTH32 && + (bbox.x1 - bbox.x0) <= MAX_FIXED_LENGTH32 && + (bbox.y1 - bbox.y0) <= MAX_FIXED_LENGTH32) { + unsigned int bottom_edge; + __m128i vertx, verty; + __m128i shufx, shufy; + __m128i dcdx, dcdy, c; + __m128i unused; + __m128i dcdx_neg_mask; + __m128i dcdy_neg_mask; + __m128i dcdx_zero_mask; + __m128i top_left_flag; + __m128i c_inc_mask, c_inc; + __m128i eo, p0, p1, p2; + __m128i_union vshuf_mask; + __m128i zero = vec_splats((unsigned char) 0); + PIPE_ALIGN_VAR(16) int32_t temp_vec[4]; + +#ifdef PIPE_ARCH_LITTLE_ENDIAN + vshuf_mask.i[0] = 0x07060504; + vshuf_mask.i[1] = 0x0B0A0908; + vshuf_mask.i[2] = 0x03020100; + vshuf_mask.i[3] = 0x0F0E0D0C; +#else + vshuf_mask.i[0] = 0x00010203; + vshuf_mask.i[1] = 0x0C0D0E0F; + vshuf_mask.i[2] = 0x04050607; + vshuf_mask.i[3] = 0x08090A0B; +#endif + + /* vertex x coords */ + vertx = vec_load_si128((const uint32_t *) position->x); + /* vertex y coords */ + verty = vec_load_si128((const uint32_t *) position->y); + + shufx = vec_perm (vertx, vertx, vshuf_mask.m128i); + shufy = vec_perm (verty, verty, vshuf_mask.m128i); + + dcdx = vec_sub_epi32(verty, shufy); + dcdy = vec_sub_epi32(vertx, shufx); + + dcdx_neg_mask = vec_srai_epi32(dcdx, 31); + dcdx_zero_mask = vec_cmpeq_epi32(dcdx, zero); + dcdy_neg_mask = vec_srai_epi32(dcdy, 31); + + bottom_edge = (setup->bottom_edge_rule == 0) ? ~0 : 0; + top_left_flag = (__m128i) vec_splats(bottom_edge); + + c_inc_mask = vec_or(dcdx_neg_mask, + vec_and(dcdx_zero_mask, + vec_xor(dcdy_neg_mask, + top_left_flag))); + + c_inc = vec_srli_epi32(c_inc_mask, 31); + + c = vec_sub_epi32(vec_mullo_epi32(dcdx, vertx), + vec_mullo_epi32(dcdy, verty)); + + c = vec_add_epi32(c, c_inc); + + /* Scale up to match c: + */ + dcdx = vec_slli_epi32(dcdx, FIXED_ORDER); + dcdy = vec_slli_epi32(dcdy, FIXED_ORDER); + + /* Calculate trivial reject values: + */ + eo = vec_sub_epi32(vec_andnot_si128(dcdy_neg_mask, dcdy), + vec_and(dcdx_neg_mask, dcdx)); + + /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */ + /* Pointless transpose which gets undone immediately in * rasterization: */ transpose4_epi32(&c, &dcdx, &dcdy, &eo, &p0, &p1, &p2, &unused); -#define STORE_PLANE(plane, vec) do { \ - _mm_store_si128((__m128i *)&temp_vec, vec); \ - plane.c = (int64_t)temp_vec[0]; \ - plane.dcdx = temp_vec[1]; \ - plane.dcdy = temp_vec[2]; \ - plane.eo = temp_vec[3]; \ +#define STORE_PLANE(plane, vec) do { \ + vec_store_si128((uint32_t *)&temp_vec, vec); \ + plane.c = (int64_t)temp_vec[0]; \ + plane.dcdx = temp_vec[1]; \ + plane.dcdy = temp_vec[2]; \ + plane.eo = temp_vec[3]; \ } while(0) STORE_PLANE(plane[0], p0); @@ -465,17 +596,17 @@ do_triangle_ccw(struct lp_setup_context *setup, plane[2].dcdx = position->dy20; for (i = 0; i < 3; i++) { - /* half-edge constants, will be interated over the whole render + /* half-edge constants, will be iterated over the whole render * target. */ plane[i].c = IMUL64(plane[i].dcdx, position->x[i]) - - IMUL64(plane[i].dcdy, position->y[i]); + IMUL64(plane[i].dcdy, position->y[i]); /* correct for top-left vs. bottom-left fill convention. - */ + */ if (plane[i].dcdx < 0) { /* both fill conventions want this - adjust for left edges */ - plane[i].c++; + plane[i].c++; } else if (plane[i].dcdx == 0) { if (setup->bottom_edge_rule == 0){ @@ -509,19 +640,19 @@ do_triangle_ccw(struct lp_setup_context *setup, } if (0) { - debug_printf("p0: %"PRIx64"/%08x/%08x/%"PRIx64"\n", + debug_printf("p0: %"PRIx64"/%08x/%08x/%08x\n", plane[0].c, plane[0].dcdx, plane[0].dcdy, plane[0].eo); - - debug_printf("p1: %"PRIx64"/%08x/%08x/%"PRIx64"\n", + + debug_printf("p1: %"PRIx64"/%08x/%08x/%08x\n", plane[1].c, plane[1].dcdx, plane[1].dcdy, plane[1].eo); - - debug_printf("p2: %"PRIx64"/%08x/%08x/%"PRIx64"\n", + + debug_printf("p2: %"PRIx64"/%08x/%08x/%08x\n", plane[2].c, plane[2].dcdx, plane[2].dcdy, @@ -546,29 +677,46 @@ do_triangle_ccw(struct lp_setup_context *setup, * Note that otherwise, the scissor planes only vary in 'C' value, * and even then only on state-changes. Could alternatively store * these planes elsewhere. + * (Or only store the c value together with a bit indicating which + * scissor edge this is, so rasterization would treat them differently + * (easier to evaluate) to ordinary planes.) */ - if (nr_planes == 7) { + if (nr_planes > 3) { + /* why not just use draw_regions */ const struct u_rect *scissor = &setup->scissors[viewport_index]; - - plane[3].dcdx = -1; - plane[3].dcdy = 0; - plane[3].c = 1-scissor->x0; - plane[3].eo = 1; - - plane[4].dcdx = 1; - plane[4].dcdy = 0; - plane[4].c = scissor->x1+1; - plane[4].eo = 0; - - plane[5].dcdx = 0; - plane[5].dcdy = 1; - plane[5].c = 1-scissor->y0; - plane[5].eo = 1; - - plane[6].dcdx = 0; - plane[6].dcdy = -1; - plane[6].c = scissor->y1+1; - plane[6].eo = 0; + struct lp_rast_plane *plane_s = &plane[3]; + boolean s_planes[4]; + scissor_planes_needed(s_planes, &bbox, scissor); + + if (s_planes[0]) { + plane_s->dcdx = -1 << 8; + plane_s->dcdy = 0; + plane_s->c = (1-scissor->x0) << 8; + plane_s->eo = 1 << 8; + plane_s++; + } + if (s_planes[1]) { + plane_s->dcdx = 1 << 8; + plane_s->dcdy = 0; + plane_s->c = (scissor->x1+1) << 8; + plane_s->eo = 0 << 8; + plane_s++; + } + if (s_planes[2]) { + plane_s->dcdx = 0; + plane_s->dcdy = 1 << 8; + plane_s->c = (1-scissor->y0) << 8; + plane_s->eo = 1 << 8; + plane_s++; + } + if (s_planes[3]) { + plane_s->dcdx = 0; + plane_s->dcdy = -1 << 8; + plane_s->c = (scissor->y1+1) << 8; + plane_s->eo = 0; + plane_s++; + } + assert(plane_s == &plane[nr_planes]); } return lp_setup_bin_triangle(setup, tri, &bbox, nr_planes, viewport_index); @@ -582,7 +730,7 @@ do_triangle_ccw(struct lp_setup_context *setup, static inline uint32_t floor_pot(uint32_t n) { -#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) +#if defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)) if (n == 0) return 0; @@ -730,9 +878,9 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, ei[i] = (plane[i].dcdy - plane[i].dcdx - - plane[i].eo) << TILE_ORDER; + (int64_t)plane[i].eo) << TILE_ORDER; - eo[i] = plane[i].eo << TILE_ORDER; + eo[i] = (int64_t)plane[i].eo << TILE_ORDER; xstep[i] = -(((int64_t)plane[i].dcdx) << TILE_ORDER); ystep[i] = ((int64_t)plane[i].dcdy) << TILE_ORDER; } @@ -840,29 +988,70 @@ static void retry_triangle_ccw( struct lp_setup_context *setup, /** * Calculate fixed position data for a triangle + * It is unfortunate we need to do that here (as we need area + * calculated in fixed point), as there's quite some code duplication + * to what is done in the jit setup prog. */ static inline void -calc_fixed_position( struct lp_setup_context *setup, - struct fixed_position* position, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4]) +calc_fixed_position(struct lp_setup_context *setup, + struct fixed_position* position, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) { + /* + * The rounding may not be quite the same with PIPE_ARCH_SSE + * (util_iround right now only does nearest/even on x87, + * otherwise nearest/away-from-zero). + * Both should be acceptable, I think. + */ +#if defined(PIPE_ARCH_SSE) + __m128 v0r, v1r; + __m128 vxy0xy2, vxy1xy0; + __m128i vxy0xy2i, vxy1xy0i; + __m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120; + __m128 pix_offset = _mm_set1_ps(setup->pixel_offset); + __m128 fixed_one = _mm_set1_ps((float)FIXED_ONE); + v0r = _mm_castpd_ps(_mm_load_sd((double *)v0[0])); + vxy0xy2 = _mm_loadh_pi(v0r, (__m64 *)v2[0]); + v1r = _mm_castpd_ps(_mm_load_sd((double *)v1[0])); + vxy1xy0 = _mm_movelh_ps(v1r, vxy0xy2); + vxy0xy2 = _mm_sub_ps(vxy0xy2, pix_offset); + vxy1xy0 = _mm_sub_ps(vxy1xy0, pix_offset); + vxy0xy2 = _mm_mul_ps(vxy0xy2, fixed_one); + vxy1xy0 = _mm_mul_ps(vxy1xy0, fixed_one); + vxy0xy2i = _mm_cvtps_epi32(vxy0xy2); + vxy1xy0i = _mm_cvtps_epi32(vxy1xy0); + dxdy0120 = _mm_sub_epi32(vxy0xy2i, vxy1xy0i); + _mm_store_si128((__m128i *)&position->dx01, dxdy0120); + /* + * For the mul, would need some more shuffles, plus emulation + * for the signed mul (without sse41), so don't bother. + */ + x0x2y0y2 = _mm_shuffle_epi32(vxy0xy2i, _MM_SHUFFLE(3,1,2,0)); + x1x0y1y0 = _mm_shuffle_epi32(vxy1xy0i, _MM_SHUFFLE(3,1,2,0)); + x0120 = _mm_unpacklo_epi32(x0x2y0y2, x1x0y1y0); + y0120 = _mm_unpackhi_epi32(x0x2y0y2, x1x0y1y0); + _mm_store_si128((__m128i *)&position->x[0], x0120); + _mm_store_si128((__m128i *)&position->y[0], y0120); + +#else position->x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset); position->x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset); position->x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset); - position->x[3] = 0; + position->x[3] = 0; // should be unused position->y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset); position->y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset); position->y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset); - position->y[3] = 0; + position->y[3] = 0; // should be unused position->dx01 = position->x[0] - position->x[1]; position->dy01 = position->y[0] - position->y[1]; position->dx20 = position->x[2] - position->x[0]; position->dy20 = position->y[2] - position->y[0]; +#endif position->area = IMUL64(position->dx01, position->dy20) - IMUL64(position->dx20, position->dy01); @@ -924,12 +1113,12 @@ rotate_fixed_position_12( struct fixed_position* position ) /** * Draw triangle if it's CW, cull otherwise. */ -static void triangle_cw( struct lp_setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) +static void triangle_cw(struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) { - struct fixed_position position; + PIPE_ALIGN_VAR(16) struct fixed_position position; calc_fixed_position(setup, &position, v0, v1, v2); @@ -945,12 +1134,12 @@ static void triangle_cw( struct lp_setup_context *setup, } -static void triangle_ccw( struct lp_setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4]) +static void triangle_ccw(struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) { - struct fixed_position position; + PIPE_ALIGN_VAR(16) struct fixed_position position; calc_fixed_position(setup, &position, v0, v1, v2); @@ -961,12 +1150,12 @@ static void triangle_ccw( struct lp_setup_context *setup, /** * Draw triangle whether it's CW or CCW. */ -static void triangle_both( struct lp_setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) +static void triangle_both(struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) { - struct fixed_position position; + PIPE_ALIGN_VAR(16) struct fixed_position position; struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe; if (lp_context->active_statistics_queries && diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c index fd6c49aac..3428eed4e 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -134,7 +134,7 @@ generate_quad_mask(struct gallivm_state *gallivm, * XXX: We'll need a different path for 16 x u8 */ assert(fs_type.width == 32); - assert(fs_type.length <= Elements(bits)); + assert(fs_type.length <= ARRAY_SIZE(bits)); mask_type = lp_int_type(fs_type); /* @@ -238,6 +238,54 @@ lp_llvm_viewport(LLVMValueRef context_ptr, } +static LLVMValueRef +lp_build_depth_clamp(struct gallivm_state *gallivm, + LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef context_ptr, + LLVMValueRef thread_data_ptr, + LLVMValueRef z) +{ + LLVMValueRef viewport, min_depth, max_depth; + LLVMValueRef viewport_index; + struct lp_build_context f32_bld; + + assert(type.floating); + lp_build_context_init(&f32_bld, gallivm, type); + + /* + * Assumes clamping of the viewport index will occur in setup/gs. Value + * is passed through the rasterization stage via lp_rast_shader_inputs. + * + * See: draw_clamp_viewport_idx and lp_clamp_viewport_idx for clamping + * semantics. + */ + viewport_index = lp_jit_thread_data_raster_state_viewport_index(gallivm, + thread_data_ptr); + + /* + * Load the min and max depth from the lp_jit_context.viewports + * array of lp_jit_viewport structures. + */ + viewport = lp_llvm_viewport(context_ptr, gallivm, viewport_index); + + /* viewports[viewport_index].min_depth */ + min_depth = LLVMBuildExtractElement(builder, viewport, + lp_build_const_int32(gallivm, LP_JIT_VIEWPORT_MIN_DEPTH), ""); + min_depth = lp_build_broadcast_scalar(&f32_bld, min_depth); + + /* viewports[viewport_index].max_depth */ + max_depth = LLVMBuildExtractElement(builder, viewport, + lp_build_const_int32(gallivm, LP_JIT_VIEWPORT_MAX_DEPTH), ""); + max_depth = lp_build_broadcast_scalar(&f32_bld, max_depth); + + /* + * Clamp to the min and max depth values for the given viewport. + */ + return lp_build_clamp(&f32_bld, z, min_depth, max_depth); +} + + /** * Generate the fragment shader, depth/stencil test, and alpha tests. */ @@ -383,6 +431,13 @@ generate_fs_loop(struct gallivm_state *gallivm, z = interp->pos[2]; if (depth_mode & EARLY_DEPTH_TEST) { + /* + * Clamp according to ARB_depth_clamp semantics. + */ + if (key->depth_clamp) { + z = lp_build_depth_clamp(gallivm, builder, type, context_ptr, + thread_data_ptr, z); + } lp_build_depth_stencil_load_swizzled(gallivm, type, zs_format_desc, key->resource_1d, depth_ptr, depth_stride, @@ -421,7 +476,7 @@ generate_fs_loop(struct gallivm_state *gallivm, lp_build_tgsi_soa(gallivm, tokens, type, &mask, consts_ptr, num_consts_ptr, &system_values, interp->inputs, - outputs, context_ptr, + outputs, context_ptr, thread_data_ptr, sampler, &shader->info.base, NULL); /* Alpha test */ @@ -471,51 +526,13 @@ generate_fs_loop(struct gallivm_state *gallivm, 0); if (pos0 != -1 && outputs[pos0][2]) { z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z"); - - /* - * Clamp according to ARB_depth_clamp semantics. - */ - if (key->depth_clamp) { - LLVMValueRef viewport, min_depth, max_depth; - LLVMValueRef viewport_index; - struct lp_build_context f32_bld; - - assert(type.floating); - lp_build_context_init(&f32_bld, gallivm, type); - - /* - * Assumes clamping of the viewport index will occur in setup/gs. Value - * is passed through the rasterization stage via lp_rast_shader_inputs. - * - * See: draw_clamp_viewport_idx and lp_clamp_viewport_idx for clamping - * semantics. - */ - viewport_index = lp_jit_thread_data_raster_state_viewport_index(gallivm, - thread_data_ptr); - - /* - * Load the min and max depth from the lp_jit_context.viewports - * array of lp_jit_viewport structures. - */ - viewport = lp_llvm_viewport(context_ptr, gallivm, viewport_index); - - /* viewports[viewport_index].min_depth */ - min_depth = LLVMBuildExtractElement(builder, viewport, - lp_build_const_int32(gallivm, LP_JIT_VIEWPORT_MIN_DEPTH), - ""); - min_depth = lp_build_broadcast_scalar(&f32_bld, min_depth); - - /* viewports[viewport_index].max_depth */ - max_depth = LLVMBuildExtractElement(builder, viewport, - lp_build_const_int32(gallivm, LP_JIT_VIEWPORT_MAX_DEPTH), - ""); - max_depth = lp_build_broadcast_scalar(&f32_bld, max_depth); - - /* - * Clamp to the min and max depth values for the given viewport. - */ - z = lp_build_clamp(&f32_bld, z, min_depth, max_depth); - } + } + /* + * Clamp according to ARB_depth_clamp semantics. + */ + if (key->depth_clamp) { + z = lp_build_depth_clamp(gallivm, builder, type, context_ptr, + thread_data_ptr, z); } if (s_out != -1 && outputs[s_out][1]) { @@ -646,7 +663,7 @@ generate_fs_twiddle(struct gallivm_state *gallivm, src_count = num_fs * src_channels; assert(pixels == 2 || pixels == 1); - assert(num_fs * src_channels <= Elements(src)); + assert(num_fs * src_channels <= ARRAY_SIZE(src)); /* * Transpose from SoA -> AoS @@ -786,7 +803,7 @@ load_unswizzled_block(struct gallivm_state *gallivm, dst[i] = LLVMBuildLoad(builder, dst_ptr, ""); - lp_set_load_alignment(dst[i], dst_alignment); + LLVMSetAlignment(dst[i], dst_alignment); } } @@ -830,7 +847,7 @@ store_unswizzled_block(struct gallivm_state *gallivm, src_ptr = LLVMBuildStore(builder, src[i], src_ptr); - lp_set_store_alignment(src_ptr, src_alignment); + LLVMSetAlignment(src_ptr, src_alignment); } } @@ -1601,7 +1618,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm, LLVMValueRef fs_src[4][TGSI_NUM_CHANNELS]; LLVMValueRef fs_src1[4][TGSI_NUM_CHANNELS]; LLVMValueRef src_alpha[4 * 4]; - LLVMValueRef src1_alpha[4 * 4]; + LLVMValueRef src1_alpha[4 * 4] = { NULL }; LLVMValueRef src_mask[4 * 4]; LLVMValueRef src[4 * 4]; LLVMValueRef src1[4 * 4]; @@ -2267,7 +2284,7 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[12] = int32_type; /* depth_stride */ func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), - arg_types, Elements(arg_types), 0); + arg_types, ARRAY_SIZE(arg_types), 0); function = LLVMAddFunction(gallivm->module, func_name, func_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); @@ -2277,7 +2294,7 @@ generate_fragment(struct llvmpipe_context *lp, /* XXX: need to propagate noalias down into color param now we are * passing a pointer-to-pointer? */ - for(i = 0; i < Elements(arg_types); ++i) + for(i = 0; i < ARRAY_SIZE(arg_types); ++i) if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute); @@ -2303,8 +2320,8 @@ generate_fragment(struct llvmpipe_context *lp, lp_build_name(dady_ptr, "dady"); lp_build_name(color_ptr_ptr, "color_ptr_ptr"); lp_build_name(depth_ptr, "depth"); - lp_build_name(thread_data_ptr, "thread_data"); lp_build_name(mask_input, "mask_input"); + lp_build_name(thread_data_ptr, "thread_data"); lp_build_name(stride_ptr, "stride_ptr"); lp_build_name(depth_stride, "depth_stride"); @@ -2344,6 +2361,7 @@ generate_fragment(struct llvmpipe_context *lp, shader->info.base.num_inputs, inputs, pixel_center_integer, + key->depth_clamp, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, x, y); @@ -2563,7 +2581,7 @@ generate_variant(struct llvmpipe_context *lp, char module_name[64]; variant = CALLOC_STRUCT(lp_fragment_shader_variant); - if(!variant) + if (!variant) return NULL; util_snprintf(module_name, sizeof(module_name), "fs%u_variant%u", @@ -2695,34 +2713,35 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, switch (shader->info.base.input_interpolate[i]) { case TGSI_INTERPOLATE_CONSTANT: - shader->inputs[i].interp = LP_INTERP_CONSTANT; - break; + shader->inputs[i].interp = LP_INTERP_CONSTANT; + break; case TGSI_INTERPOLATE_LINEAR: - shader->inputs[i].interp = LP_INTERP_LINEAR; - break; + shader->inputs[i].interp = LP_INTERP_LINEAR; + break; case TGSI_INTERPOLATE_PERSPECTIVE: - shader->inputs[i].interp = LP_INTERP_PERSPECTIVE; - break; + shader->inputs[i].interp = LP_INTERP_PERSPECTIVE; + break; case TGSI_INTERPOLATE_COLOR: - shader->inputs[i].interp = LP_INTERP_COLOR; - break; + shader->inputs[i].interp = LP_INTERP_COLOR; + break; default: - assert(0); - break; + assert(0); + break; } switch (shader->info.base.input_semantic_name[i]) { case TGSI_SEMANTIC_FACE: - shader->inputs[i].interp = LP_INTERP_FACING; - break; + shader->inputs[i].interp = LP_INTERP_FACING; + break; case TGSI_SEMANTIC_POSITION: - /* Position was already emitted above - */ - shader->inputs[i].interp = LP_INTERP_POSITION; - shader->inputs[i].src_index = 0; - continue; + /* Position was already emitted above + */ + shader->inputs[i].interp = LP_INTERP_POSITION; + shader->inputs[i].src_index = 0; + continue; } + /* XXX this is a completely pointless index map... */ shader->inputs[i].src_index = i+1; } @@ -2835,17 +2854,24 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) static void llvmpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - struct pipe_constant_buffer *cb) + const struct pipe_constant_buffer *cb) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); struct pipe_resource *constants = cb ? cb->buffer : NULL; assert(shader < PIPE_SHADER_TYPES); - assert(index < Elements(llvmpipe->constants[shader])); + assert(index < ARRAY_SIZE(llvmpipe->constants[shader])); /* note: reference counting */ util_copy_constant_buffer(&llvmpipe->constants[shader][index], cb); + if (constants) { + if (!(constants->bind & PIPE_BIND_CONSTANT_BUFFER)) { + debug_printf("Illegal set constant without bind flag\n"); + constants->bind |= PIPE_BIND_CONSTANT_BUFFER; + } + } + if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) { /* Pass the constants to the 'draw' module */ @@ -2868,8 +2894,9 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, draw_set_mapped_constant_buffer(llvmpipe->draw, shader, index, data, size); } - - llvmpipe->dirty |= LP_NEW_CONSTANTS; + else { + llvmpipe->dirty |= LP_NEW_FS_CONSTANTS; + } if (cb && cb->user_buffer) { pipe_resource_reference(&constants, NULL); @@ -2940,6 +2967,13 @@ make_variant_key(struct llvmpipe_context *lp, * depth_clip == 0 implies depth clamping is enabled. * * When clip_halfz is enabled, then always clamp the depth values. + * + * XXX: This is incorrect for GL, but correct for d3d10 (depth + * clamp is always active in d3d10, regardless if depth clip is + * enabled or not). + * (GL has an always-on [0,1] clamp on fs depth output instead + * to ensure the depth values stay in range. Doesn't look like + * we do that, though...) */ if (lp->rasterizer->clip_halfz) { key->depth_clamp = 1; @@ -3026,7 +3060,7 @@ make_variant_key(struct llvmpipe_context *lp, * Also, force rgb/alpha func/factors match, to make AoS blending * easier. */ - if (format_desc->swizzle[3] > UTIL_FORMAT_SWIZZLE_W || + if (format_desc->swizzle[3] > PIPE_SWIZZLE_W || format_desc->swizzle[3] == format_desc->swizzle[0]) { /* Doesn't cover mixed snorm/unorm but can't render to them anyway */ boolean clamped_zero = !util_format_is_float(format) && diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c index b205f02fd..01af05211 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -66,7 +66,7 @@ llvmpipe_create_sampler_state(struct pipe_context *pipe, static void llvmpipe_bind_sampler_states(struct pipe_context *pipe, - unsigned shader, + enum pipe_shader_type shader, unsigned start, unsigned num, void **samplers) @@ -75,7 +75,7 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe, unsigned i; assert(shader < PIPE_SHADER_TYPES); - assert(start + num <= Elements(llvmpipe->samplers[shader])); + assert(start + num <= ARRAY_SIZE(llvmpipe->samplers[shader])); draw_flush(llvmpipe->draw); @@ -98,14 +98,15 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe, llvmpipe->samplers[shader], llvmpipe->num_samplers[shader]); } - - llvmpipe->dirty |= LP_NEW_SAMPLER; + else { + llvmpipe->dirty |= LP_NEW_SAMPLER; + } } static void llvmpipe_set_sampler_views(struct pipe_context *pipe, - unsigned shader, + enum pipe_shader_type shader, unsigned start, unsigned num, struct pipe_sampler_view **views) @@ -116,7 +117,7 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe, assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS); assert(shader < PIPE_SHADER_TYPES); - assert(start + num <= Elements(llvmpipe->sampler_views[shader])); + assert(start + num <= ARRAY_SIZE(llvmpipe->sampler_views[shader])); draw_flush(llvmpipe->draw); @@ -128,6 +129,15 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe, */ pipe_sampler_view_release(pipe, &llvmpipe->sampler_views[shader][start + i]); + /* + * Warn if someone tries to set a view created in a different context + * (which is why we need the hack above in the first place). + * An assert would be better but st/mesa relies on it... + */ + if (views[i] && views[i]->context != pipe) { + debug_printf("Illegal setting of sampler_view %d created in another " + "context\n", i); + } pipe_sampler_view_reference(&llvmpipe->sampler_views[shader][start + i], views[i]); } @@ -146,8 +156,9 @@ llvmpipe_set_sampler_views(struct pipe_context *pipe, llvmpipe->sampler_views[shader], llvmpipe->num_sampler_views[shader]); } - - llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW; + else { + llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW; + } } @@ -158,11 +169,13 @@ llvmpipe_create_sampler_view(struct pipe_context *pipe, { struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view); /* - * XXX we REALLY want to see the correct bind flag here but the OpenGL - * state tracker can't guarantee that at least for texture buffer objects. + * XXX: bind flags from OpenGL state tracker are notoriously unreliable. + * This looks unfixable, so fix the bind flags instead when it happens. */ - if (!(texture->bind & PIPE_BIND_SAMPLER_VIEW)) + if (!(texture->bind & PIPE_BIND_SAMPLER_VIEW)) { debug_printf("Illegal sampler view creation without bind flag\n"); + texture->bind |= PIPE_BIND_SAMPLER_VIEW; + } if (view) { *view = *templ; @@ -228,8 +241,7 @@ prepare_shader_sampling( struct llvmpipe_context *lp, unsigned num, struct pipe_sampler_view **views, - unsigned shader_type, - struct pipe_resource *mapped_tex[PIPE_MAX_SHADER_SAMPLER_VIEWS]) + unsigned shader_type) { unsigned i; @@ -242,7 +254,7 @@ prepare_shader_sampling( if (!num) return; - for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) { + for (i = 0; i < num; i++) { struct pipe_sampler_view *view = i < num ? views[i] : NULL; if (view) { @@ -253,11 +265,6 @@ prepare_shader_sampling( unsigned first_level = 0; unsigned last_level = 0; - /* We're referencing the texture's internal data, so save a - * reference to it. - */ - pipe_resource_reference(&mapped_tex[i], tex); - if (!lp_tex->dt) { /* regular texture - setup array of mipmap level offsets */ struct pipe_resource *res = view->texture; @@ -275,10 +282,10 @@ prepare_shader_sampling( row_stride[j] = lp_tex->row_stride[j]; img_stride[j] = lp_tex->img_stride[j]; } - if (view->target == PIPE_TEXTURE_1D_ARRAY || - view->target == PIPE_TEXTURE_2D_ARRAY || - view->target == PIPE_TEXTURE_CUBE || - view->target == PIPE_TEXTURE_CUBE_ARRAY) { + if (tex->target == PIPE_TEXTURE_1D_ARRAY || + tex->target == PIPE_TEXTURE_2D_ARRAY || + tex->target == PIPE_TEXTURE_CUBE || + tex->target == PIPE_TEXTURE_CUBE_ARRAY) { num_layers = view->u.tex.last_layer - view->u.tex.first_layer + 1; for (j = first_level; j <= last_level; j++) { mip_offsets[j] += view->u.tex.first_layer * @@ -301,11 +308,9 @@ prepare_shader_sampling( img_stride[0] = 0; /* everything specified in number of elements here. */ - width0 = view->u.buf.last_element - view->u.buf.first_element + 1; - addr = (uint8_t *)addr + view->u.buf.first_element * - view_blocksize; - assert(view->u.buf.first_element <= view->u.buf.last_element); - assert(view->u.buf.last_element * view_blocksize < res->width0); + width0 = view->u.buf.size / view_blocksize; + addr = (uint8_t *)addr + view->u.buf.offset; + assert(view->u.buf.offset + view->u.buf.size <= res->width0); } } else { @@ -335,47 +340,28 @@ prepare_shader_sampling( /** - * Called during state validation when LP_NEW_SAMPLER_VIEW is set. + * Called whenever we're about to draw (no dirty flag, FIXME?). */ void llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *lp, unsigned num, struct pipe_sampler_view **views) { - prepare_shader_sampling(lp, num, views, PIPE_SHADER_VERTEX, - lp->mapped_vs_tex); -} - -void -llvmpipe_cleanup_vertex_sampling(struct llvmpipe_context *ctx) -{ - unsigned i; - for (i = 0; i < Elements(ctx->mapped_vs_tex); i++) { - pipe_resource_reference(&ctx->mapped_vs_tex[i], NULL); - } + prepare_shader_sampling(lp, num, views, PIPE_SHADER_VERTEX); } /** - * Called during state validation when LP_NEW_SAMPLER_VIEW is set. + * Called whenever we're about to draw (no dirty flag, FIXME?). */ void llvmpipe_prepare_geometry_sampling(struct llvmpipe_context *lp, unsigned num, struct pipe_sampler_view **views) { - prepare_shader_sampling(lp, num, views, PIPE_SHADER_GEOMETRY, - lp->mapped_gs_tex); + prepare_shader_sampling(lp, num, views, PIPE_SHADER_GEOMETRY); } -void -llvmpipe_cleanup_geometry_sampling(struct llvmpipe_context *ctx) -{ - unsigned i; - for (i = 0; i < Elements(ctx->mapped_gs_tex); i++) { - pipe_resource_reference(&ctx->mapped_gs_tex[i], NULL); - } -} void llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe) diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c index 6397b5196..a57e2f04b 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c @@ -135,8 +135,8 @@ emit_facing_coef(struct gallivm_state *gallivm, LLVMValueRef a0_0 = args->facing; LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, float_type, ""); LLVMValueRef a0, face_val; - const unsigned char swizzles[4] = { PIPE_SWIZZLE_RED, PIPE_SWIZZLE_ZERO, - PIPE_SWIZZLE_ZERO, PIPE_SWIZZLE_ZERO }; + const unsigned char swizzles[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, + PIPE_SWIZZLE_0, PIPE_SWIZZLE_0 }; /* Our face val is either 1 or 0 so we do * face = (val * 2) - 1 * to make it 1 or -1 @@ -723,7 +723,7 @@ generate_setup_variant(struct lp_setup_variant_key *key, goto fail; variant = CALLOC_STRUCT(lp_setup_variant); - if (variant == NULL) + if (!variant) goto fail; variant->no = setup_no++; @@ -760,7 +760,7 @@ generate_setup_variant(struct lp_setup_variant_key *key, arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */ func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), - arg_types, Elements(arg_types), 0); + arg_types, ARRAY_SIZE(arg_types), 0); variant->function = LLVMAddFunction(gallivm->module, func_name, func_type); if (!variant->function) @@ -791,7 +791,7 @@ generate_setup_variant(struct lp_setup_variant_key *key, variant->function, "entry"); LLVMPositionBuilderAtEnd(builder, block); - set_noalias(builder, variant->function, arg_types, Elements(arg_types)); + set_noalias(builder, variant->function, arg_types, ARRAY_SIZE(arg_types)); init_args(gallivm, &variant->key, &args); emit_tri_coef(gallivm, &variant->key, &args); @@ -848,14 +848,10 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp, key->size = Offset(struct lp_setup_variant_key, inputs[key->num_inputs]); - key->color_slot = lp->color_slot [0]; + key->color_slot = lp->color_slot[0]; key->bcolor_slot = lp->bcolor_slot[0]; - key->spec_slot = lp->color_slot [1]; - key->bspec_slot = lp->bcolor_slot[1]; - assert(key->color_slot == lp->color_slot [0]); - assert(key->bcolor_slot == lp->bcolor_slot[0]); - assert(key->spec_slot == lp->color_slot [1]); - assert(key->bspec_slot == lp->bcolor_slot[1]); + key->spec_slot = lp->color_slot[1]; + key->bspec_slot = lp->bcolor_slot[1]; /* * If depth is floating point, depth bias is calculated with respect diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c index 96f8ed82c..784db7f73 100644 --- a/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c @@ -131,8 +131,15 @@ llvmpipe_create_surface(struct pipe_context *pipe, { struct pipe_surface *ps; - if (!(pt->bind & (PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET))) + if (!(pt->bind & (PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET))) { debug_printf("Illegal surface creation without bind flag\n"); + if (util_format_is_depth_or_stencil(surf_tmpl->format)) { + pt->bind |= PIPE_BIND_DEPTH_STENCIL; + } + else { + pt->bind |= PIPE_BIND_RENDER_TARGET; + } + } ps = CALLOC_STRUCT(pipe_surface); if (ps) { @@ -183,11 +190,12 @@ llvmpipe_clear_render_target(struct pipe_context *pipe, struct pipe_surface *dst, const union pipe_color_union *color, unsigned dstx, unsigned dsty, - unsigned width, unsigned height) + unsigned width, unsigned height, + bool render_condition_enabled) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - if (!llvmpipe_check_render_cond(llvmpipe)) + if (render_condition_enabled && !llvmpipe_check_render_cond(llvmpipe)) return; util_clear_render_target(pipe, dst, color, @@ -202,11 +210,12 @@ llvmpipe_clear_depth_stencil(struct pipe_context *pipe, double depth, unsigned stencil, unsigned dstx, unsigned dsty, - unsigned width, unsigned height) + unsigned width, unsigned height, + bool render_condition_enabled) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - if (!llvmpipe_check_render_cond(llvmpipe)) + if (render_condition_enabled && !llvmpipe_check_render_cond(llvmpipe)) return; util_clear_depth_stencil(pipe, dst, clear_flags, |