summaryrefslogtreecommitdiff
path: root/lib/mesa/src/gallium/drivers/llvmpipe
diff options
context:
space:
mode:
Diffstat (limited to 'lib/mesa/src/gallium/drivers/llvmpipe')
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/Automake.inc7
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am79
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in1529
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/Makefile.sources71
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/SConscript49
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_alpha.c95
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_alpha.h59
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend.c223
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend.h110
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c423
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c109
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c1118
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.h105
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_interp.c819
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_interp.h137
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_clear.c63
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_clear.h44
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c226
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h179
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_debug.h88
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c169
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_fence.c127
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_fence.h95
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_flush.c131
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_flush.h55
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c246
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.h263
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_limits.h96
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_memory.c36
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_memory.h40
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_perf.c110
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_perf.h91
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_public.h18
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_query.c332
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_query.h60
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c935
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.h324
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_debug.c438
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h347
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_tri.c558
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h380
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c564
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h412
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_scene_queue.c124
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_scene_queue.h53
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c623
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h73
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c1493
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.h168
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h208
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c748
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c541
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c1027
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c602
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state.h145
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_blend.c206
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_clip.c105
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c244
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c3217
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.h157
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_gs.c116
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c154
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c390
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c1021
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.h85
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_so.c93
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_surface.c91
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vertex.c117
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vs.c96
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c229
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.h42
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_test.h140
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_test_arit.c484
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_test_blend.c737
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_test_conv.c453
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_test_format.c384
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_test_main.c418
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_test_printf.c139
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_tex_sample.c321
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_tex_sample.h46
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.c815
-rw-r--r--lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.h239
82 files changed, 27704 insertions, 0 deletions
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/Automake.inc b/lib/mesa/src/gallium/drivers/llvmpipe/Automake.inc
new file mode 100644
index 000000000..0a0aa34e7
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/Automake.inc
@@ -0,0 +1,7 @@
+if HAVE_GALLIUM_LLVMPIPE
+
+TARGET_CPPFLAGS += -DGALLIUM_LLVMPIPE
+TARGET_LIB_DEPS += \
+ $(top_builddir)/src/gallium/drivers/llvmpipe/libllvmpipe.la
+
+endif
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am
new file mode 100644
index 000000000..1d3853e41
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am
@@ -0,0 +1,79 @@
+# Copyright © 2012 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+ $(GALLIUM_DRIVER_CFLAGS) \
+ $(LLVM_CFLAGS) \
+ $(MSVC2008_COMPAT_CFLAGS)
+AM_CXXFLAGS= \
+ $(GALLIUM_DRIVER_CXXFLAGS) \
+ $(LLVM_CXXFLAGS) \
+ $(MSVC2008_COMPAT_CXXFLAGS)
+
+noinst_LTLIBRARIES = libllvmpipe.la
+
+libllvmpipe_la_SOURCES = $(C_SOURCES)
+
+libllvmpipe_la_LDFLAGS = $(LLVM_LDFLAGS)
+
+noinst_HEADERS = lp_test.h
+
+check_PROGRAMS = \
+ lp_test_format \
+ lp_test_arit \
+ lp_test_blend \
+ lp_test_conv \
+ lp_test_printf
+TESTS = $(check_PROGRAMS)
+
+TEST_LIBS = \
+ libllvmpipe.la \
+ $(top_builddir)/src/gallium/auxiliary/libgallium.la \
+ $(top_builddir)/src/util/libmesautil.la \
+ $(LLVM_LIBS) \
+ $(DLOPEN_LIBS) \
+ $(PTHREAD_LIBS)
+
+lp_test_format_SOURCES = lp_test_format.c lp_test_main.c
+lp_test_format_LDADD = $(TEST_LIBS)
+nodist_EXTRA_lp_test_format_SOURCES = dummy.cpp
+
+lp_test_arit_SOURCES = lp_test_arit.c lp_test_main.c
+lp_test_arit_LDADD = $(TEST_LIBS)
+nodist_EXTRA_lp_test_arit_SOURCES = dummy.cpp
+
+lp_test_blend_SOURCES = lp_test_blend.c lp_test_main.c
+lp_test_blend_LDADD = $(TEST_LIBS)
+nodist_EXTRA_lp_test_blend_SOURCES = dummy.cpp
+
+lp_test_conv_SOURCES = lp_test_conv.c lp_test_main.c
+lp_test_conv_LDADD = $(TEST_LIBS)
+nodist_EXTRA_lp_test_conv_SOURCES = dummy.cpp
+
+lp_test_printf_SOURCES = lp_test_printf.c lp_test_main.c
+lp_test_printf_LDADD = $(TEST_LIBS)
+nodist_EXTRA_lp_test_printf_SOURCES = dummy.cpp
+
+EXTRA_DIST = SConscript
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in
new file mode 100644
index 000000000..0274f7e87
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in
@@ -0,0 +1,1529 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright © 2012 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+@HAVE_DRISW_TRUE@am__append_1 = \
+@HAVE_DRISW_TRUE@ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la
+
+check_PROGRAMS = lp_test_format$(EXEEXT) lp_test_arit$(EXEEXT) \
+ lp_test_blend$(EXEEXT) lp_test_conv$(EXEEXT) \
+ lp_test_printf$(EXEEXT)
+subdir = src/gallium/drivers/llvmpipe
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+ $(top_srcdir)/m4/ax_check_python_mako_module.m4 \
+ $(top_srcdir)/m4/ax_gcc_builtin.m4 \
+ $(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+ $(top_srcdir)/m4/ax_prog_bison.m4 \
+ $(top_srcdir)/m4/ax_prog_flex.m4 \
+ $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/VERSION $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \
+ $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+libllvmpipe_la_LIBADD =
+am__objects_1 = lp_bld_alpha.lo lp_bld_blend_aos.lo lp_bld_blend.lo \
+ lp_bld_blend_logicop.lo lp_bld_depth.lo lp_bld_interp.lo \
+ lp_clear.lo lp_context.lo lp_draw_arrays.lo lp_fence.lo \
+ lp_flush.lo lp_jit.lo lp_memory.lo lp_perf.lo lp_query.lo \
+ lp_rast.lo lp_rast_debug.lo lp_rast_tri.lo lp_scene.lo \
+ lp_scene_queue.lo lp_screen.lo lp_setup.lo lp_setup_line.lo \
+ lp_setup_point.lo lp_setup_tri.lo lp_setup_vbuf.lo \
+ lp_state_blend.lo lp_state_clip.lo lp_state_derived.lo \
+ lp_state_fs.lo lp_state_gs.lo lp_state_rasterizer.lo \
+ lp_state_sampler.lo lp_state_setup.lo lp_state_so.lo \
+ lp_state_surface.lo lp_state_vertex.lo lp_state_vs.lo \
+ lp_surface.lo lp_tex_sample.lo lp_texture.lo
+am_libllvmpipe_la_OBJECTS = $(am__objects_1)
+libllvmpipe_la_OBJECTS = $(am_libllvmpipe_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+libllvmpipe_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
+ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
+ $(AM_CFLAGS) $(CFLAGS) $(libllvmpipe_la_LDFLAGS) $(LDFLAGS) -o \
+ $@
+am_lp_test_arit_OBJECTS = lp_test_arit.$(OBJEXT) \
+ lp_test_main.$(OBJEXT)
+lp_test_arit_OBJECTS = $(am_lp_test_arit_OBJECTS)
+am__DEPENDENCIES_1 =
+am__DEPENDENCIES_2 = libllvmpipe.la \
+ $(top_builddir)/src/gallium/auxiliary/libgallium.la \
+ $(top_builddir)/src/util/libmesautil.la $(am__DEPENDENCIES_1) \
+ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
+lp_test_arit_DEPENDENCIES = $(am__DEPENDENCIES_2)
+am_lp_test_blend_OBJECTS = lp_test_blend.$(OBJEXT) \
+ lp_test_main.$(OBJEXT)
+lp_test_blend_OBJECTS = $(am_lp_test_blend_OBJECTS)
+lp_test_blend_DEPENDENCIES = $(am__DEPENDENCIES_2)
+am_lp_test_conv_OBJECTS = lp_test_conv.$(OBJEXT) \
+ lp_test_main.$(OBJEXT)
+lp_test_conv_OBJECTS = $(am_lp_test_conv_OBJECTS)
+lp_test_conv_DEPENDENCIES = $(am__DEPENDENCIES_2)
+am_lp_test_format_OBJECTS = lp_test_format.$(OBJEXT) \
+ lp_test_main.$(OBJEXT)
+lp_test_format_OBJECTS = $(am_lp_test_format_OBJECTS)
+lp_test_format_DEPENDENCIES = $(am__DEPENDENCIES_2)
+am_lp_test_printf_OBJECTS = lp_test_printf.$(OBJEXT) \
+ lp_test_main.$(OBJEXT)
+lp_test_printf_OBJECTS = $(am_lp_test_printf_OBJECTS)
+lp_test_printf_DEPENDENCIES = $(am__DEPENDENCIES_2)
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo " CC " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo " CCLD " $@;
+am__v_CCLD_1 =
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CXXFLAGS) $(CXXFLAGS)
+AM_V_CXX = $(am__v_CXX_@AM_V@)
+am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@)
+am__v_CXX_0 = @echo " CXX " $@;
+am__v_CXX_1 =
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CXXLD = $(am__v_CXXLD_@AM_V@)
+am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@)
+am__v_CXXLD_0 = @echo " CXXLD " $@;
+am__v_CXXLD_1 =
+SOURCES = $(libllvmpipe_la_SOURCES) $(lp_test_arit_SOURCES) \
+ $(nodist_EXTRA_lp_test_arit_SOURCES) $(lp_test_blend_SOURCES) \
+ $(nodist_EXTRA_lp_test_blend_SOURCES) $(lp_test_conv_SOURCES) \
+ $(nodist_EXTRA_lp_test_conv_SOURCES) $(lp_test_format_SOURCES) \
+ $(nodist_EXTRA_lp_test_format_SOURCES) \
+ $(lp_test_printf_SOURCES) \
+ $(nodist_EXTRA_lp_test_printf_SOURCES)
+DIST_SOURCES = $(libllvmpipe_la_SOURCES) $(lp_test_arit_SOURCES) \
+ $(lp_test_blend_SOURCES) $(lp_test_conv_SOURCES) \
+ $(lp_test_format_SOURCES) $(lp_test_printf_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+HEADERS = $(noinst_HEADERS)
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__tty_colors_dummy = \
+ mgn= red= grn= lgn= blu= brg= std=; \
+ am__color_tests=no
+am__tty_colors = { \
+ $(am__tty_colors_dummy); \
+ if test "X$(AM_COLOR_TESTS)" = Xno; then \
+ am__color_tests=no; \
+ elif test "X$(AM_COLOR_TESTS)" = Xalways; then \
+ am__color_tests=yes; \
+ elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \
+ am__color_tests=yes; \
+ fi; \
+ if test $$am__color_tests = yes; then \
+ red=''; \
+ grn=''; \
+ lgn=''; \
+ blu=''; \
+ mgn=''; \
+ brg=''; \
+ std=''; \
+ fi; \
+}
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+am__recheck_rx = ^[ ]*:recheck:[ ]*
+am__global_test_result_rx = ^[ ]*:global-test-result:[ ]*
+am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]*
+# A command that, given a newline-separated list of test names on the
+# standard input, print the name of the tests that are to be re-run
+# upon "make recheck".
+am__list_recheck_tests = $(AWK) '{ \
+ recheck = 1; \
+ while ((rc = (getline line < ($$0 ".trs"))) != 0) \
+ { \
+ if (rc < 0) \
+ { \
+ if ((getline line2 < ($$0 ".log")) < 0) \
+ recheck = 0; \
+ break; \
+ } \
+ else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \
+ { \
+ recheck = 0; \
+ break; \
+ } \
+ else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \
+ { \
+ break; \
+ } \
+ }; \
+ if (recheck) \
+ print $$0; \
+ close ($$0 ".trs"); \
+ close ($$0 ".log"); \
+}'
+# A command that, given a newline-separated list of test names on the
+# standard input, create the global log from their .trs and .log files.
+am__create_global_log = $(AWK) ' \
+function fatal(msg) \
+{ \
+ print "fatal: making $@: " msg | "cat >&2"; \
+ exit 1; \
+} \
+function rst_section(header) \
+{ \
+ print header; \
+ len = length(header); \
+ for (i = 1; i <= len; i = i + 1) \
+ printf "="; \
+ printf "\n\n"; \
+} \
+{ \
+ copy_in_global_log = 1; \
+ global_test_result = "RUN"; \
+ while ((rc = (getline line < ($$0 ".trs"))) != 0) \
+ { \
+ if (rc < 0) \
+ fatal("failed to read from " $$0 ".trs"); \
+ if (line ~ /$(am__global_test_result_rx)/) \
+ { \
+ sub("$(am__global_test_result_rx)", "", line); \
+ sub("[ ]*$$", "", line); \
+ global_test_result = line; \
+ } \
+ else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \
+ copy_in_global_log = 0; \
+ }; \
+ if (copy_in_global_log) \
+ { \
+ rst_section(global_test_result ": " $$0); \
+ while ((rc = (getline line < ($$0 ".log"))) != 0) \
+ { \
+ if (rc < 0) \
+ fatal("failed to read from " $$0 ".log"); \
+ print line; \
+ }; \
+ printf "\n"; \
+ }; \
+ close ($$0 ".trs"); \
+ close ($$0 ".log"); \
+}'
+# Restructured Text title.
+am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; }
+# Solaris 10 'make', and several other traditional 'make' implementations,
+# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it
+# by disabling -e (using the XSI extension "set +e") if it's set.
+am__sh_e_setup = case $$- in *e*) set +e;; esac
+# Default flags passed to test drivers.
+am__common_driver_flags = \
+ --color-tests "$$am__color_tests" \
+ --enable-hard-errors "$$am__enable_hard_errors" \
+ --expect-failure "$$am__expect_failure"
+# To be inserted before the command running the test. Creates the
+# directory for the log if needed. Stores in $dir the directory
+# containing $f, in $tst the test, in $log the log. Executes the
+# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and
+# passes TESTS_ENVIRONMENT. Set up options for the wrapper that
+# will run the test scripts (or their associated LOG_COMPILER, if
+# thy have one).
+am__check_pre = \
+$(am__sh_e_setup); \
+$(am__vpath_adj_setup) $(am__vpath_adj) \
+$(am__tty_colors); \
+srcdir=$(srcdir); export srcdir; \
+case "$@" in \
+ */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \
+ *) am__odir=.;; \
+esac; \
+test "x$$am__odir" = x"." || test -d "$$am__odir" \
+ || $(MKDIR_P) "$$am__odir" || exit $$?; \
+if test -f "./$$f"; then dir=./; \
+elif test -f "$$f"; then dir=; \
+else dir="$(srcdir)/"; fi; \
+tst=$$dir$$f; log='$@'; \
+if test -n '$(DISABLE_HARD_ERRORS)'; then \
+ am__enable_hard_errors=no; \
+else \
+ am__enable_hard_errors=yes; \
+fi; \
+case " $(XFAIL_TESTS) " in \
+ *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \
+ am__expect_failure=yes;; \
+ *) \
+ am__expect_failure=no;; \
+esac; \
+$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT)
+# A shell command to get the names of the tests scripts with any registered
+# extension removed (i.e., equivalently, the names of the test logs, with
+# the '.log' extension removed). The result is saved in the shell variable
+# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly,
+# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)",
+# since that might cause problem with VPATH rewrites for suffix-less tests.
+# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'.
+am__set_TESTS_bases = \
+ bases='$(TEST_LOGS)'; \
+ bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \
+ bases=`echo $$bases`
+RECHECK_LOGS = $(TEST_LOGS)
+AM_RECURSIVE_TARGETS = check recheck
+TEST_SUITE_LOG = test-suite.log
+TEST_EXTENSIONS = @EXEEXT@ .test
+LOG_DRIVER = $(SHELL) $(top_srcdir)/bin/test-driver
+LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS)
+am__set_b = \
+ case '$@' in \
+ */*) \
+ case '$*' in \
+ */*) b='$*';; \
+ *) b=`echo '$@' | sed 's/\.log$$//'`; \
+ esac;; \
+ *) \
+ b='$*';; \
+ esac
+am__test_logs1 = $(TESTS:=.log)
+am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log)
+TEST_LOGS = $(am__test_logs2:.test.log=.log)
+TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/bin/test-driver
+TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \
+ $(TEST_LOG_FLAGS)
+am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.sources \
+ $(top_srcdir)/bin/depcomp $(top_srcdir)/bin/test-driver \
+ $(top_srcdir)/src/gallium/Automake.inc
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMDGPU_CFLAGS = @AMDGPU_CFLAGS@
+AMDGPU_LIBS = @AMDGPU_LIBS@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BSYMBOLIC = @BSYMBOLIC@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
+CLOCK_LIB = @CLOCK_LIB@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+D3D_DRIVER_INSTALL_DIR = @D3D_DRIVER_INSTALL_DIR@
+DEFINES = @DEFINES@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DLOPEN_LIBS = @DLOPEN_LIBS@
+DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
+DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
+DRI3PROTO_CFLAGS = @DRI3PROTO_CFLAGS@
+DRI3PROTO_LIBS = @DRI3PROTO_LIBS@
+DRIGL_CFLAGS = @DRIGL_CFLAGS@
+DRIGL_LIBS = @DRIGL_LIBS@
+DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
+DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
+DRI_LIB_DEPS = @DRI_LIB_DEPS@
+DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_CLIENT_APIS = @EGL_CLIENT_APIS@
+EGL_LIB_DEPS = @EGL_LIB_DEPS@
+EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
+EGREP = @EGREP@
+ELF_LIB = @ELF_LIB@
+EXEEXT = @EXEEXT@
+EXPAT_CFLAGS = @EXPAT_CFLAGS@
+EXPAT_LIBS = @EXPAT_LIBS@
+FGREP = @FGREP@
+FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
+FREEDRENO_LIBS = @FREEDRENO_LIBS@
+GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
+GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
+GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
+GC_SECTIONS = @GC_SECTIONS@
+GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
+GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
+GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
+GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
+GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
+GLPROTO_LIBS = @GLPROTO_LIBS@
+GLX_TLS = @GLX_TLS@
+GL_LIB = @GL_LIB@
+GL_LIB_DEPS = @GL_LIB_DEPS@
+GL_PC_CFLAGS = @GL_PC_CFLAGS@
+GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
+GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
+GREP = @GREP@
+HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
+INDENT = @INDENT@
+INDENT_FLAGS = @INDENT_FLAGS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INTEL_CFLAGS = @INTEL_CFLAGS@
+INTEL_LIBS = @INTEL_LIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LD_NO_UNDEFINED = @LD_NO_UNDEFINED@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
+LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
+LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
+LIBDRM_LIBS = @LIBDRM_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@
+LIBUDEV_LIBS = @LIBUDEV_LIBS@
+LIB_DIR = @LIB_DIR@
+LIB_EXT = @LIB_EXT@
+LIPO = @LIPO@
+LLVM_BINDIR = @LLVM_BINDIR@
+LLVM_CFLAGS = @LLVM_CFLAGS@
+LLVM_CONFIG = @LLVM_CONFIG@
+LLVM_CPPFLAGS = @LLVM_CPPFLAGS@
+LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
+LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
+LLVM_LDFLAGS = @LLVM_LDFLAGS@
+LLVM_LIBDIR = @LLVM_LIBDIR@
+LLVM_LIBS = @LLVM_LIBS@
+LLVM_VERSION = @LLVM_VERSION@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MESA_LLVM = @MESA_LLVM@
+MKDIR_P = @MKDIR_P@
+MSVC2008_COMPAT_CFLAGS = @MSVC2008_COMPAT_CFLAGS@
+MSVC2008_COMPAT_CXXFLAGS = @MSVC2008_COMPAT_CXXFLAGS@
+MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@
+MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@
+NINE_MAJOR = @NINE_MAJOR@
+NINE_MINOR = @NINE_MINOR@
+NINE_TINY = @NINE_TINY@
+NINE_VERSION = @NINE_VERSION@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
+NOUVEAU_LIBS = @NOUVEAU_LIBS@
+NVVIEUX_CFLAGS = @NVVIEUX_CFLAGS@
+NVVIEUX_LIBS = @NVVIEUX_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OMX_CFLAGS = @OMX_CFLAGS@
+OMX_LIBS = @OMX_LIBS@
+OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OPENCL_LIBNAME = @OPENCL_LIBNAME@
+OPENCL_VERSION = @OPENCL_VERSION@
+OSMESA_LIB = @OSMESA_LIB@
+OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
+OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
+OSMESA_PC_REQ = @OSMESA_PC_REQ@
+OSMESA_VERSION = @OSMESA_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POSIX_SHELL = @POSIX_SHELL@
+PRESENTPROTO_CFLAGS = @PRESENTPROTO_CFLAGS@
+PRESENTPROTO_LIBS = @PRESENTPROTO_LIBS@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+PYTHON2 = @PYTHON2@
+RADEON_CFLAGS = @RADEON_CFLAGS@
+RADEON_LIBS = @RADEON_LIBS@
+RANLIB = @RANLIB@
+RM = @RM@
+SED = @SED@
+SELINUX_CFLAGS = @SELINUX_CFLAGS@
+SELINUX_LIBS = @SELINUX_LIBS@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SSE41_CFLAGS = @SSE41_CFLAGS@
+STRIP = @STRIP@
+VA_CFLAGS = @VA_CFLAGS@
+VA_LIBS = @VA_LIBS@
+VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
+VA_MAJOR = @VA_MAJOR@
+VA_MINOR = @VA_MINOR@
+VDPAU_CFLAGS = @VDPAU_CFLAGS@
+VDPAU_LIBS = @VDPAU_LIBS@
+VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
+VDPAU_MAJOR = @VDPAU_MAJOR@
+VDPAU_MINOR = @VDPAU_MINOR@
+VERSION = @VERSION@
+VG_LIB_DEPS = @VG_LIB_DEPS@
+VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
+VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
+VL_CFLAGS = @VL_CFLAGS@
+VL_LIBS = @VL_LIBS@
+WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
+WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_SCANNER = @WAYLAND_SCANNER@
+WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
+WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+X11_INCLUDES = @X11_INCLUDES@
+XA_MAJOR = @XA_MAJOR@
+XA_MINOR = @XA_MINOR@
+XA_TINY = @XA_TINY@
+XA_VERSION = @XA_VERSION@
+XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
+XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
+XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
+XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
+XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
+XLIBGL_LIBS = @XLIBGL_LIBS@
+XVMC_CFLAGS = @XVMC_CFLAGS@
+XVMC_LIBS = @XVMC_LIBS@
+XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
+XVMC_MAJOR = @XVMC_MAJOR@
+XVMC_MINOR = @XVMC_MINOR@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acv_mako_found = @acv_mako_found@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+ax_pthread_config = @ax_pthread_config@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+ifGNUmake = @ifGNUmake@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+C_SOURCES := \
+ lp_bld_alpha.c \
+ lp_bld_alpha.h \
+ lp_bld_blend_aos.c \
+ lp_bld_blend.c \
+ lp_bld_blend.h \
+ lp_bld_blend_logicop.c \
+ lp_bld_depth.c \
+ lp_bld_depth.h \
+ lp_bld_interp.c \
+ lp_bld_interp.h \
+ lp_clear.c \
+ lp_clear.h \
+ lp_context.c \
+ lp_context.h \
+ lp_debug.h \
+ lp_draw_arrays.c \
+ lp_fence.c \
+ lp_fence.h \
+ lp_flush.c \
+ lp_flush.h \
+ lp_jit.c \
+ lp_jit.h \
+ lp_limits.h \
+ lp_memory.c \
+ lp_memory.h \
+ lp_perf.c \
+ lp_perf.h \
+ lp_public.h \
+ lp_query.c \
+ lp_query.h \
+ lp_rast.c \
+ lp_rast_debug.c \
+ lp_rast.h \
+ lp_rast_priv.h \
+ lp_rast_tri.c \
+ lp_rast_tri_tmp.h \
+ lp_scene.c \
+ lp_scene.h \
+ lp_scene_queue.c \
+ lp_scene_queue.h \
+ lp_screen.c \
+ lp_screen.h \
+ lp_setup.c \
+ lp_setup_context.h \
+ lp_setup.h \
+ lp_setup_line.c \
+ lp_setup_point.c \
+ lp_setup_tri.c \
+ lp_setup_vbuf.c \
+ lp_state_blend.c \
+ lp_state_clip.c \
+ lp_state_derived.c \
+ lp_state_fs.c \
+ lp_state_fs.h \
+ lp_state_gs.c \
+ lp_state.h \
+ lp_state_rasterizer.c \
+ lp_state_sampler.c \
+ lp_state_setup.c \
+ lp_state_setup.h \
+ lp_state_so.c \
+ lp_state_surface.c \
+ lp_state_vertex.c \
+ lp_state_vs.c \
+ lp_surface.c \
+ lp_surface.h \
+ lp_tex_sample.c \
+ lp_tex_sample.h \
+ lp_texture.c \
+ lp_texture.h
+
+GALLIUM_CFLAGS = \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ $(DEFINES)
+
+
+# src/gallium/auxiliary must appear before src/gallium/drivers
+# because there are stupidly two rbug_context.h files in
+# different directories, and which one is included by the
+# preprocessor is determined by the ordering of the -I flags.
+GALLIUM_DRIVER_CFLAGS = \
+ -I$(srcdir)/include \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/drivers \
+ -I$(top_srcdir)/src/gallium/winsys \
+ $(DEFINES) \
+ $(VISIBILITY_CFLAGS)
+
+GALLIUM_DRIVER_CXXFLAGS = \
+ -I$(srcdir)/include \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/drivers \
+ -I$(top_srcdir)/src/gallium/winsys \
+ $(DEFINES) \
+ $(VISIBILITY_CXXFLAGS)
+
+GALLIUM_TARGET_CFLAGS = \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/loader \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ -I$(top_srcdir)/src/gallium/drivers \
+ -I$(top_srcdir)/src/gallium/winsys \
+ $(DEFINES) \
+ $(PTHREAD_CFLAGS) \
+ $(LIBDRM_CFLAGS) \
+ $(VISIBILITY_CFLAGS)
+
+GALLIUM_COMMON_LIB_DEPS = \
+ -lm \
+ $(CLOCK_LIB) \
+ $(PTHREAD_LIBS) \
+ $(DLOPEN_LIBS)
+
+GALLIUM_WINSYS_CFLAGS = \
+ -I$(top_srcdir)/src \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/gallium/include \
+ -I$(top_srcdir)/src/gallium/auxiliary \
+ $(DEFINES) \
+ $(VISIBILITY_CFLAGS)
+
+GALLIUM_PIPE_LOADER_WINSYS_LIBS = \
+ $(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \
+ $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \
+ $(am__append_1)
+AM_CFLAGS = \
+ $(GALLIUM_DRIVER_CFLAGS) \
+ $(LLVM_CFLAGS) \
+ $(MSVC2008_COMPAT_CFLAGS)
+
+AM_CXXFLAGS = \
+ $(GALLIUM_DRIVER_CXXFLAGS) \
+ $(LLVM_CXXFLAGS) \
+ $(MSVC2008_COMPAT_CXXFLAGS)
+
+noinst_LTLIBRARIES = libllvmpipe.la
+libllvmpipe_la_SOURCES = $(C_SOURCES)
+libllvmpipe_la_LDFLAGS = $(LLVM_LDFLAGS)
+noinst_HEADERS = lp_test.h
+TESTS = $(check_PROGRAMS)
+TEST_LIBS = \
+ libllvmpipe.la \
+ $(top_builddir)/src/gallium/auxiliary/libgallium.la \
+ $(top_builddir)/src/util/libmesautil.la \
+ $(LLVM_LIBS) \
+ $(DLOPEN_LIBS) \
+ $(PTHREAD_LIBS)
+
+lp_test_format_SOURCES = lp_test_format.c lp_test_main.c
+lp_test_format_LDADD = $(TEST_LIBS)
+nodist_EXTRA_lp_test_format_SOURCES = dummy.cpp
+lp_test_arit_SOURCES = lp_test_arit.c lp_test_main.c
+lp_test_arit_LDADD = $(TEST_LIBS)
+nodist_EXTRA_lp_test_arit_SOURCES = dummy.cpp
+lp_test_blend_SOURCES = lp_test_blend.c lp_test_main.c
+lp_test_blend_LDADD = $(TEST_LIBS)
+nodist_EXTRA_lp_test_blend_SOURCES = dummy.cpp
+lp_test_conv_SOURCES = lp_test_conv.c lp_test_main.c
+lp_test_conv_LDADD = $(TEST_LIBS)
+nodist_EXTRA_lp_test_conv_SOURCES = dummy.cpp
+lp_test_printf_SOURCES = lp_test_printf.c lp_test_main.c
+lp_test_printf_LDADD = $(TEST_LIBS)
+nodist_EXTRA_lp_test_printf_SOURCES = dummy.cpp
+EXTRA_DIST = SConscript
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .cpp .lo .log .o .obj .test .test$(EXEEXT) .trs
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/drivers/llvmpipe/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/gallium/drivers/llvmpipe/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+$(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__empty):
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-noinstLTLIBRARIES:
+ -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+ @list='$(noinst_LTLIBRARIES)'; \
+ locs=`for p in $$list; do echo $$p; done | \
+ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+ sort -u`; \
+ test -z "$$locs" || { \
+ echo rm -f $${locs}; \
+ rm -f $${locs}; \
+ }
+
+libllvmpipe.la: $(libllvmpipe_la_OBJECTS) $(libllvmpipe_la_DEPENDENCIES) $(EXTRA_libllvmpipe_la_DEPENDENCIES)
+ $(AM_V_CCLD)$(libllvmpipe_la_LINK) $(libllvmpipe_la_OBJECTS) $(libllvmpipe_la_LIBADD) $(LIBS)
+
+clean-checkPROGRAMS:
+ @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \
+ echo " rm -f" $$list; \
+ rm -f $$list || exit $$?; \
+ test -n "$(EXEEXT)" || exit 0; \
+ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f" $$list; \
+ rm -f $$list
+
+lp_test_arit$(EXEEXT): $(lp_test_arit_OBJECTS) $(lp_test_arit_DEPENDENCIES) $(EXTRA_lp_test_arit_DEPENDENCIES)
+ @rm -f lp_test_arit$(EXEEXT)
+ $(AM_V_CXXLD)$(CXXLINK) $(lp_test_arit_OBJECTS) $(lp_test_arit_LDADD) $(LIBS)
+
+lp_test_blend$(EXEEXT): $(lp_test_blend_OBJECTS) $(lp_test_blend_DEPENDENCIES) $(EXTRA_lp_test_blend_DEPENDENCIES)
+ @rm -f lp_test_blend$(EXEEXT)
+ $(AM_V_CXXLD)$(CXXLINK) $(lp_test_blend_OBJECTS) $(lp_test_blend_LDADD) $(LIBS)
+
+lp_test_conv$(EXEEXT): $(lp_test_conv_OBJECTS) $(lp_test_conv_DEPENDENCIES) $(EXTRA_lp_test_conv_DEPENDENCIES)
+ @rm -f lp_test_conv$(EXEEXT)
+ $(AM_V_CXXLD)$(CXXLINK) $(lp_test_conv_OBJECTS) $(lp_test_conv_LDADD) $(LIBS)
+
+lp_test_format$(EXEEXT): $(lp_test_format_OBJECTS) $(lp_test_format_DEPENDENCIES) $(EXTRA_lp_test_format_DEPENDENCIES)
+ @rm -f lp_test_format$(EXEEXT)
+ $(AM_V_CXXLD)$(CXXLINK) $(lp_test_format_OBJECTS) $(lp_test_format_LDADD) $(LIBS)
+
+lp_test_printf$(EXEEXT): $(lp_test_printf_OBJECTS) $(lp_test_printf_DEPENDENCIES) $(EXTRA_lp_test_printf_DEPENDENCIES)
+ @rm -f lp_test_printf$(EXEEXT)
+ $(AM_V_CXXLD)$(CXXLINK) $(lp_test_printf_OBJECTS) $(lp_test_printf_LDADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dummy.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_bld_alpha.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_bld_blend.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_bld_blend_aos.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_bld_blend_logicop.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_bld_depth.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_bld_interp.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_clear.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_draw_arrays.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_fence.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_flush.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_jit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_memory.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_perf.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_query.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_rast.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_rast_debug.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_rast_tri.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_scene.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_scene_queue.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_screen.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_setup.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_setup_line.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_setup_point.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_setup_tri.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_setup_vbuf.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_blend.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_clip.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_derived.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_fs.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_gs.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_rasterizer.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_sampler.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_setup.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_so.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_surface.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_vertex.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_vs.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_surface.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_test_arit.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_test_blend.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_test_conv.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_test_format.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_test_main.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_test_printf.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_tex_sample.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_texture.Plo@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\
+@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+
+.cpp.o:
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $<
+
+.cpp.obj:
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cpp.lo:
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\
+@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+# Recover from deleted '.trs' file; this should ensure that
+# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create
+# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells
+# to avoid problems with "make -n".
+.log.trs:
+ rm -f $< $@
+ $(MAKE) $(AM_MAKEFLAGS) $<
+
+# Leading 'am--fnord' is there to ensure the list of targets does not
+# expand to empty, as could happen e.g. with make check TESTS=''.
+am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck)
+am--force-recheck:
+ @:
+
+$(TEST_SUITE_LOG): $(TEST_LOGS)
+ @$(am__set_TESTS_bases); \
+ am__f_ok () { test -f "$$1" && test -r "$$1"; }; \
+ redo_bases=`for i in $$bases; do \
+ am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \
+ done`; \
+ if test -n "$$redo_bases"; then \
+ redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \
+ redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \
+ if $(am__make_dryrun); then :; else \
+ rm -f $$redo_logs && rm -f $$redo_results || exit 1; \
+ fi; \
+ fi; \
+ if test -n "$$am__remaking_logs"; then \
+ echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \
+ "recursion detected" >&2; \
+ elif test -n "$$redo_logs"; then \
+ am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \
+ fi; \
+ if $(am__make_dryrun); then :; else \
+ st=0; \
+ errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \
+ for i in $$redo_bases; do \
+ test -f $$i.trs && test -r $$i.trs \
+ || { echo "$$errmsg $$i.trs" >&2; st=1; }; \
+ test -f $$i.log && test -r $$i.log \
+ || { echo "$$errmsg $$i.log" >&2; st=1; }; \
+ done; \
+ test $$st -eq 0 || exit 1; \
+ fi
+ @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \
+ ws='[ ]'; \
+ results=`for b in $$bases; do echo $$b.trs; done`; \
+ test -n "$$results" || results=/dev/null; \
+ all=` grep "^$$ws*:test-result:" $$results | wc -l`; \
+ pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \
+ fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \
+ skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \
+ xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \
+ xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \
+ error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \
+ if test `expr $$fail + $$xpass + $$error` -eq 0; then \
+ success=true; \
+ else \
+ success=false; \
+ fi; \
+ br='==================='; br=$$br$$br$$br$$br; \
+ result_count () \
+ { \
+ if test x"$$1" = x"--maybe-color"; then \
+ maybe_colorize=yes; \
+ elif test x"$$1" = x"--no-color"; then \
+ maybe_colorize=no; \
+ else \
+ echo "$@: invalid 'result_count' usage" >&2; exit 4; \
+ fi; \
+ shift; \
+ desc=$$1 count=$$2; \
+ if test $$maybe_colorize = yes && test $$count -gt 0; then \
+ color_start=$$3 color_end=$$std; \
+ else \
+ color_start= color_end=; \
+ fi; \
+ echo "$${color_start}# $$desc $$count$${color_end}"; \
+ }; \
+ create_testsuite_report () \
+ { \
+ result_count $$1 "TOTAL:" $$all "$$brg"; \
+ result_count $$1 "PASS: " $$pass "$$grn"; \
+ result_count $$1 "SKIP: " $$skip "$$blu"; \
+ result_count $$1 "XFAIL:" $$xfail "$$lgn"; \
+ result_count $$1 "FAIL: " $$fail "$$red"; \
+ result_count $$1 "XPASS:" $$xpass "$$red"; \
+ result_count $$1 "ERROR:" $$error "$$mgn"; \
+ }; \
+ { \
+ echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \
+ $(am__rst_title); \
+ create_testsuite_report --no-color; \
+ echo; \
+ echo ".. contents:: :depth: 2"; \
+ echo; \
+ for b in $$bases; do echo $$b; done \
+ | $(am__create_global_log); \
+ } >$(TEST_SUITE_LOG).tmp || exit 1; \
+ mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \
+ if $$success; then \
+ col="$$grn"; \
+ else \
+ col="$$red"; \
+ test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \
+ fi; \
+ echo "$${col}$$br$${std}"; \
+ echo "$${col}Testsuite summary for $(PACKAGE_STRING)$${std}"; \
+ echo "$${col}$$br$${std}"; \
+ create_testsuite_report --maybe-color; \
+ echo "$$col$$br$$std"; \
+ if $$success; then :; else \
+ echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \
+ if test -n "$(PACKAGE_BUGREPORT)"; then \
+ echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \
+ fi; \
+ echo "$$col$$br$$std"; \
+ fi; \
+ $$success || exit 1
+
+check-TESTS:
+ @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list
+ @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list
+ @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG)
+ @set +e; $(am__set_TESTS_bases); \
+ log_list=`for i in $$bases; do echo $$i.log; done`; \
+ trs_list=`for i in $$bases; do echo $$i.trs; done`; \
+ log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \
+ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \
+ exit $$?;
+recheck: all $(check_PROGRAMS)
+ @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG)
+ @set +e; $(am__set_TESTS_bases); \
+ bases=`for i in $$bases; do echo $$i; done \
+ | $(am__list_recheck_tests)` || exit 1; \
+ log_list=`for i in $$bases; do echo $$i.log; done`; \
+ log_list=`echo $$log_list`; \
+ $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \
+ am__force_recheck=am--force-recheck \
+ TEST_LOGS="$$log_list"; \
+ exit $$?
+lp_test_format.log: lp_test_format$(EXEEXT)
+ @p='lp_test_format$(EXEEXT)'; \
+ b='lp_test_format'; \
+ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+ --log-file $$b.log --trs-file $$b.trs \
+ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+ "$$tst" $(AM_TESTS_FD_REDIRECT)
+lp_test_arit.log: lp_test_arit$(EXEEXT)
+ @p='lp_test_arit$(EXEEXT)'; \
+ b='lp_test_arit'; \
+ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+ --log-file $$b.log --trs-file $$b.trs \
+ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+ "$$tst" $(AM_TESTS_FD_REDIRECT)
+lp_test_blend.log: lp_test_blend$(EXEEXT)
+ @p='lp_test_blend$(EXEEXT)'; \
+ b='lp_test_blend'; \
+ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+ --log-file $$b.log --trs-file $$b.trs \
+ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+ "$$tst" $(AM_TESTS_FD_REDIRECT)
+lp_test_conv.log: lp_test_conv$(EXEEXT)
+ @p='lp_test_conv$(EXEEXT)'; \
+ b='lp_test_conv'; \
+ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+ --log-file $$b.log --trs-file $$b.trs \
+ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+ "$$tst" $(AM_TESTS_FD_REDIRECT)
+lp_test_printf.log: lp_test_printf$(EXEEXT)
+ @p='lp_test_printf$(EXEEXT)'; \
+ b='lp_test_printf'; \
+ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+ --log-file $$b.log --trs-file $$b.trs \
+ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+ "$$tst" $(AM_TESTS_FD_REDIRECT)
+.test.log:
+ @p='$<'; \
+ $(am__set_b); \
+ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \
+ --log-file $$b.log --trs-file $$b.trs \
+ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \
+ "$$tst" $(AM_TESTS_FD_REDIRECT)
+@am__EXEEXT_TRUE@.test$(EXEEXT).log:
+@am__EXEEXT_TRUE@ @p='$<'; \
+@am__EXEEXT_TRUE@ $(am__set_b); \
+@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \
+@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \
+@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \
+@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT)
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+ $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
+ $(MAKE) $(AM_MAKEFLAGS) check-TESTS
+check: check-am
+all-am: Makefile $(LTLIBRARIES) $(HEADERS)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+ -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS)
+ -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs)
+ -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG)
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-checkPROGRAMS clean-generic clean-libtool \
+ clean-noinstLTLIBRARIES mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: check-am install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-TESTS check-am clean \
+ clean-checkPROGRAMS clean-generic clean-libtool \
+ clean-noinstLTLIBRARIES cscopelist-am ctags ctags-am distclean \
+ distclean-compile distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am html html-am info info-am \
+ install install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ pdf pdf-am ps ps-am recheck tags tags-am uninstall \
+ uninstall-am
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.sources b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.sources
new file mode 100644
index 000000000..d928ccba4
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.sources
@@ -0,0 +1,71 @@
+C_SOURCES := \
+ lp_bld_alpha.c \
+ lp_bld_alpha.h \
+ lp_bld_blend_aos.c \
+ lp_bld_blend.c \
+ lp_bld_blend.h \
+ lp_bld_blend_logicop.c \
+ lp_bld_depth.c \
+ lp_bld_depth.h \
+ lp_bld_interp.c \
+ lp_bld_interp.h \
+ lp_clear.c \
+ lp_clear.h \
+ lp_context.c \
+ lp_context.h \
+ lp_debug.h \
+ lp_draw_arrays.c \
+ lp_fence.c \
+ lp_fence.h \
+ lp_flush.c \
+ lp_flush.h \
+ lp_jit.c \
+ lp_jit.h \
+ lp_limits.h \
+ lp_memory.c \
+ lp_memory.h \
+ lp_perf.c \
+ lp_perf.h \
+ lp_public.h \
+ lp_query.c \
+ lp_query.h \
+ lp_rast.c \
+ lp_rast_debug.c \
+ lp_rast.h \
+ lp_rast_priv.h \
+ lp_rast_tri.c \
+ lp_rast_tri_tmp.h \
+ lp_scene.c \
+ lp_scene.h \
+ lp_scene_queue.c \
+ lp_scene_queue.h \
+ lp_screen.c \
+ lp_screen.h \
+ lp_setup.c \
+ lp_setup_context.h \
+ lp_setup.h \
+ lp_setup_line.c \
+ lp_setup_point.c \
+ lp_setup_tri.c \
+ lp_setup_vbuf.c \
+ lp_state_blend.c \
+ lp_state_clip.c \
+ lp_state_derived.c \
+ lp_state_fs.c \
+ lp_state_fs.h \
+ lp_state_gs.c \
+ lp_state.h \
+ lp_state_rasterizer.c \
+ lp_state_sampler.c \
+ lp_state_setup.c \
+ lp_state_setup.h \
+ lp_state_so.c \
+ lp_state_surface.c \
+ lp_state_vertex.c \
+ lp_state_vs.c \
+ lp_surface.c \
+ lp_surface.h \
+ lp_tex_sample.c \
+ lp_tex_sample.h \
+ lp_texture.c \
+ lp_texture.h
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/SConscript b/lib/mesa/src/gallium/drivers/llvmpipe/SConscript
new file mode 100644
index 000000000..3a51efcd5
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/SConscript
@@ -0,0 +1,49 @@
+from sys import executable as python_cmd
+import distutils.version
+
+Import('*')
+
+if not env['llvm']:
+ print 'warning: LLVM disabled: not building llvmpipe'
+ Return()
+
+env = env.Clone()
+
+env.MSVC2008Compat()
+
+llvmpipe = env.ConvenienceLibrary(
+ target = 'llvmpipe',
+ source = env.ParseSourceList('Makefile.sources', 'C_SOURCES')
+ )
+
+env.Alias('llvmpipe', llvmpipe)
+
+
+if not env['embedded']:
+ env = env.Clone()
+
+ env.Prepend(LIBS = [llvmpipe, gallium, mesautil])
+
+ tests = [
+ 'format',
+ 'blend',
+ 'conv',
+ 'printf',
+ ]
+
+ if not env['msvc']:
+ tests.append('arit')
+
+ for test in tests:
+ testname = 'lp_test_' + test
+ target = env.Program(
+ target = testname,
+ source = [testname + '.c', 'lp_test_main.c'],
+ )
+ env.InstallProgram(target)
+
+ # http://www.scons.org/wiki/UnitTests
+ alias = env.Alias(testname, [target], target[0].abspath)
+ AlwaysBuild(alias)
+
+Export('llvmpipe')
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
new file mode 100644
index 000000000..6e2d0376d
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
@@ -0,0 +1,95 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Alpha testing to LLVM IR translation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_format.h"
+
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_conv.h"
+#include "gallivm/lp_bld_logic.h"
+#include "gallivm/lp_bld_flow.h"
+#include "gallivm/lp_bld_debug.h"
+
+#include "lp_bld_alpha.h"
+
+
+void
+lp_build_alpha_test(struct gallivm_state *gallivm,
+ unsigned func,
+ struct lp_type type,
+ const struct util_format_description *cbuf_format_desc,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef alpha,
+ LLVMValueRef ref,
+ boolean do_branch)
+{
+ struct lp_build_context bld;
+ LLVMValueRef test;
+
+ lp_build_context_init(&bld, gallivm, type);
+
+ /*
+ * Alpha testing needs to be done in the color buffer precision.
+ *
+ * TODO: Ideally, instead of duplicating the color conversion code, we would do
+ * alpha testing after converting the output colors, but that's not very
+ * convenient, because it needs to be done before depth testing. Hopefully
+ * LLVM will detect and remove the duplicate expression.
+ *
+ * FIXME: This should be generalized to formats other than rgba8 variants.
+ */
+ if (type.floating &&
+ util_format_is_rgba8_variant(cbuf_format_desc)) {
+ const unsigned dst_width = 8;
+
+ alpha = lp_build_clamp(&bld, alpha, bld.zero, bld.one);
+ ref = lp_build_clamp(&bld, ref, bld.zero, bld.one);
+
+ alpha = lp_build_clamped_float_to_unsigned_norm(gallivm, type, dst_width, alpha);
+ ref = lp_build_clamped_float_to_unsigned_norm(gallivm, type, dst_width, ref);
+
+ type.floating = 0;
+ lp_build_context_init(&bld, gallivm, type);
+ }
+
+ test = lp_build_cmp(&bld, func, alpha, ref);
+
+ lp_build_name(test, "alpha_mask");
+
+ lp_build_mask_update(mask, test);
+
+ if (do_branch)
+ lp_build_mask_check(mask);
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
new file mode 100644
index 000000000..15f1284c5
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
@@ -0,0 +1,59 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Alpha testing to LLVM IR translation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#ifndef LP_BLD_ALPHA_H
+#define LP_BLD_ALPHA_H
+
+#include "pipe/p_compiler.h"
+
+#include "gallivm/lp_bld.h"
+
+struct pipe_alpha_state;
+struct util_format_description;
+struct gallivm_state;
+struct lp_type;
+struct lp_build_mask_context;
+
+
+void
+lp_build_alpha_test(struct gallivm_state *gallivm,
+ unsigned func,
+ struct lp_type type,
+ const struct util_format_description *cbuf_format_desc,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef alpha,
+ LLVMValueRef ref,
+ boolean do_branch);
+
+
+#endif /* !LP_BLD_ALPHA_H */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend.c
new file mode 100644
index 000000000..1feb415c9
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend.c
@@ -0,0 +1,223 @@
+/**************************************************************************
+ *
+ * Copyright 2012 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_state.h"
+#include "util/u_debug.h"
+
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_logic.h"
+#include "gallivm/lp_bld_swizzle.h"
+#include "gallivm/lp_bld_flow.h"
+#include "gallivm/lp_bld_debug.h"
+
+#include "lp_bld_blend.h"
+
+/**
+ * Is (a OP b) == (b OP a)?
+ */
+boolean
+lp_build_blend_func_commutative(unsigned func)
+{
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ case PIPE_BLEND_MIN:
+ case PIPE_BLEND_MAX:
+ return TRUE;
+ case PIPE_BLEND_SUBTRACT:
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return FALSE;
+ default:
+ assert(0);
+ return TRUE;
+ }
+}
+
+
+/**
+ * Whether the blending functions are the reverse of each other.
+ */
+boolean
+lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func)
+{
+ if(rgb_func == alpha_func)
+ return FALSE;
+ if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT)
+ return TRUE;
+ if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT)
+ return TRUE;
+ return FALSE;
+}
+
+
+/**
+ * Whether the blending factors are complementary of each other.
+ */
+static inline boolean
+lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor)
+{
+ return dst_factor == (src_factor ^ 0x10);
+}
+
+
+/**
+ * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml
+ */
+LLVMValueRef
+lp_build_blend_func(struct lp_build_context *bld,
+ unsigned func,
+ LLVMValueRef term1,
+ LLVMValueRef term2)
+{
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ return lp_build_add(bld, term1, term2);
+ case PIPE_BLEND_SUBTRACT:
+ return lp_build_sub(bld, term1, term2);
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return lp_build_sub(bld, term2, term1);
+ case PIPE_BLEND_MIN:
+ return lp_build_min(bld, term1, term2);
+ case PIPE_BLEND_MAX:
+ return lp_build_max(bld, term1, term2);
+ default:
+ assert(0);
+ return bld->zero;
+ }
+}
+
+
+/**
+ * Performs optimisations and blending independent of SoA/AoS
+ *
+ * @param func the blend function
+ * @param factor_src PIPE_BLENDFACTOR_xxx
+ * @param factor_dst PIPE_BLENDFACTOR_xxx
+ * @param src source rgba
+ * @param dst dest rgba
+ * @param src_factor src factor computed value
+ * @param dst_factor dst factor computed value
+ * @param not_alpha_dependent same factors accross all channels of src/dst
+ *
+ * not_alpha_dependent should be:
+ * SoA: always true as it is only one channel at a time
+ * AoS: rgb_src_factor == alpha_src_factor && rgb_dst_factor == alpha_dst_factor
+ *
+ * Note that pretty much every possible optimisation can only be done on non-unorm targets
+ * due to unorm values not going above 1.0 meaning factorisation can change results.
+ * e.g. (0.9 * 0.9) + (0.9 * 0.9) != 0.9 * (0.9 + 0.9) as result of + is always <= 1.
+ */
+LLVMValueRef
+lp_build_blend(struct lp_build_context *bld,
+ unsigned func,
+ unsigned factor_src,
+ unsigned factor_dst,
+ LLVMValueRef src,
+ LLVMValueRef dst,
+ LLVMValueRef src_factor,
+ LLVMValueRef dst_factor,
+ boolean not_alpha_dependent,
+ boolean optimise_only)
+{
+ LLVMValueRef result, src_term, dst_term;
+
+ /* If we are not alpha dependent we can mess with the src/dst factors */
+ if (not_alpha_dependent) {
+ if (lp_build_blend_factor_complementary(factor_src, factor_dst)) {
+ if (func == PIPE_BLEND_ADD) {
+ if (factor_src < factor_dst) {
+ return lp_build_lerp(bld, src_factor, dst, src, 0);
+ } else {
+ return lp_build_lerp(bld, dst_factor, src, dst, 0);
+ }
+ } else if(bld->type.floating && func == PIPE_BLEND_SUBTRACT) {
+ result = lp_build_add(bld, src, dst);
+
+ if (factor_src < factor_dst) {
+ result = lp_build_mul(bld, result, src_factor);
+ return lp_build_sub(bld, result, dst);
+ } else {
+ result = lp_build_mul(bld, result, dst_factor);
+ return lp_build_sub(bld, src, result);
+ }
+ } else if(bld->type.floating && func == PIPE_BLEND_REVERSE_SUBTRACT) {
+ result = lp_build_add(bld, src, dst);
+
+ if (factor_src < factor_dst) {
+ result = lp_build_mul(bld, result, src_factor);
+ return lp_build_sub(bld, dst, result);
+ } else {
+ result = lp_build_mul(bld, result, dst_factor);
+ return lp_build_sub(bld, result, src);
+ }
+ }
+ }
+
+ if (bld->type.floating && factor_src == factor_dst) {
+ if (func == PIPE_BLEND_ADD ||
+ func == PIPE_BLEND_SUBTRACT ||
+ func == PIPE_BLEND_REVERSE_SUBTRACT) {
+ LLVMValueRef result;
+ result = lp_build_blend_func(bld, func, src, dst);
+ return lp_build_mul(bld, result, src_factor);
+ }
+ }
+ }
+
+ if (optimise_only)
+ return NULL;
+
+ src_term = lp_build_mul(bld, src, src_factor);
+ dst_term = lp_build_mul(bld, dst, dst_factor);
+ return lp_build_blend_func(bld, func, src_term, dst_term);
+}
+
+void
+lp_build_alpha_to_coverage(struct gallivm_state *gallivm,
+ struct lp_type type,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef alpha,
+ boolean do_branch)
+{
+ struct lp_build_context bld;
+ LLVMValueRef test;
+ LLVMValueRef alpha_ref_value;
+
+ lp_build_context_init(&bld, gallivm, type);
+
+ alpha_ref_value = lp_build_const_vec(gallivm, type, 0.5);
+
+ test = lp_build_cmp(&bld, PIPE_FUNC_GREATER, alpha, alpha_ref_value);
+
+ lp_build_name(test, "alpha_to_coverage");
+
+ lp_build_mask_update(mask, test);
+
+ if (do_branch)
+ lp_build_mask_check(mask);
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend.h
new file mode 100644
index 000000000..adfab85dc
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend.h
@@ -0,0 +1,110 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_BLD_BLEND_H
+#define LP_BLD_BLEND_H
+
+
+#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_init.h"
+
+#include "pipe/p_format.h"
+
+
+struct pipe_blend_state;
+struct lp_type;
+struct lp_build_context;
+struct lp_build_mask_context;
+
+
+LLVMValueRef
+lp_build_blend(struct lp_build_context *bld,
+ unsigned func,
+ unsigned factor_src,
+ unsigned factor_dst,
+ LLVMValueRef src,
+ LLVMValueRef dst,
+ LLVMValueRef src_factor,
+ LLVMValueRef dst_factor,
+ boolean not_alpha_dependent,
+ boolean optimise_only);
+
+
+LLVMValueRef
+lp_build_blend_aos(struct gallivm_state *gallivm,
+ const struct pipe_blend_state *blend,
+ enum pipe_format cbuf_format,
+ struct lp_type type,
+ unsigned rt,
+ LLVMValueRef src,
+ LLVMValueRef src_alpha,
+ LLVMValueRef src1,
+ LLVMValueRef src1_alpha,
+ LLVMValueRef dst,
+ LLVMValueRef mask,
+ LLVMValueRef const_,
+ LLVMValueRef const_alpha,
+ const unsigned char swizzle[4],
+ int nr_channels);
+
+
+/**
+ * Apply a logic op.
+ *
+ * src/dst parameters are packed values. It should work regardless the inputs
+ * are scalars, or a vector.
+ */
+LLVMValueRef
+lp_build_logicop(LLVMBuilderRef builder,
+ unsigned logicop_func,
+ LLVMValueRef src,
+ LLVMValueRef dst);
+
+
+LLVMValueRef
+lp_build_blend_func(struct lp_build_context *bld,
+ unsigned func,
+ LLVMValueRef term1,
+ LLVMValueRef term2);
+
+
+boolean
+lp_build_blend_func_reverse(unsigned rgb_func,
+ unsigned alpha_func);
+
+
+boolean
+lp_build_blend_func_commutative(unsigned func);
+
+void
+lp_build_alpha_to_coverage(struct gallivm_state *gallivm,
+ struct lp_type type,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef alpha,
+ boolean do_branch);
+
+#endif /* !LP_BLD_BLEND_H */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
new file mode 100644
index 000000000..564e19a15
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -0,0 +1,423 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Blend LLVM IR generation -- AoS layout.
+ *
+ * AoS blending is in general much slower than SoA, but there are some cases
+ * where it might be faster. In particular, if a pixel is rendered only once
+ * then the overhead of tiling and untiling will dominate over the speedup that
+ * SoA gives. So we might want to detect such cases and fallback to AoS in the
+ * future, but for now this function is here for historical/benchmarking
+ * purposes.
+ *
+ * Run lp_blend_test after any change to this file.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_debug.h"
+#include "util/u_format.h"
+
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_logic.h"
+#include "gallivm/lp_bld_swizzle.h"
+#include "gallivm/lp_bld_bitarit.h"
+#include "gallivm/lp_bld_debug.h"
+
+#include "lp_bld_blend.h"
+
+
+/**
+ * We may the same values several times, so we keep them here to avoid
+ * recomputing them. Also reusing the values allows us to do simplifications
+ * that LLVM optimization passes wouldn't normally be able to do.
+ */
+struct lp_build_blend_aos_context
+{
+ struct lp_build_context base;
+
+ LLVMValueRef src;
+ LLVMValueRef src_alpha;
+ LLVMValueRef src1;
+ LLVMValueRef src1_alpha;
+ LLVMValueRef dst;
+ LLVMValueRef const_;
+ LLVMValueRef const_alpha;
+
+ LLVMValueRef inv_src;
+ LLVMValueRef inv_src_alpha;
+ LLVMValueRef inv_dst;
+ LLVMValueRef inv_const;
+ LLVMValueRef inv_const_alpha;
+ LLVMValueRef saturate;
+
+ LLVMValueRef rgb_src_factor;
+ LLVMValueRef alpha_src_factor;
+ LLVMValueRef rgb_dst_factor;
+ LLVMValueRef alpha_dst_factor;
+};
+
+
+static LLVMValueRef
+lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
+ unsigned factor,
+ boolean alpha)
+{
+ LLVMValueRef src_alpha = bld->src_alpha ? bld->src_alpha : bld->src;
+ LLVMValueRef src1_alpha = bld->src1_alpha ? bld->src1_alpha : bld->src1;
+ LLVMValueRef const_alpha = bld->const_alpha ? bld->const_alpha : bld->const_;
+
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ZERO:
+ return bld->base.zero;
+ case PIPE_BLENDFACTOR_ONE:
+ return bld->base.one;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return bld->src;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return src_alpha;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return bld->dst;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ if(alpha)
+ return bld->base.one;
+ else {
+ /*
+ * if there's separate src_alpha there's no dst alpha hence the complement
+ * is zero but for unclamped float inputs min can be non-zero (negative).
+ */
+ if (bld->src_alpha) {
+ if (!bld->saturate)
+ bld->saturate = lp_build_min(&bld->base, src_alpha, bld->base.zero);
+ }
+ else {
+ if(!bld->inv_dst)
+ bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
+ if(!bld->saturate)
+ bld->saturate = lp_build_min(&bld->base, src_alpha, bld->inv_dst);
+ }
+ return bld->saturate;
+ }
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return bld->const_;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return const_alpha;
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ return bld->src1;
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ return src1_alpha;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ if(!bld->inv_src)
+ bld->inv_src = lp_build_comp(&bld->base, bld->src);
+ return bld->inv_src;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ if(!bld->inv_src_alpha)
+ bld->inv_src_alpha = lp_build_comp(&bld->base, src_alpha);
+ return bld->inv_src_alpha;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ if(!bld->inv_dst)
+ bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
+ return bld->inv_dst;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ if(!bld->inv_const)
+ bld->inv_const = lp_build_comp(&bld->base, bld->const_);
+ return bld->inv_const;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ if(!bld->inv_const_alpha)
+ bld->inv_const_alpha = lp_build_comp(&bld->base, const_alpha);
+ return bld->inv_const_alpha;
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ return lp_build_comp(&bld->base, bld->src1);
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ return lp_build_comp(&bld->base, src1_alpha);
+ default:
+ assert(0);
+ return bld->base.zero;
+ }
+}
+
+
+enum lp_build_blend_swizzle {
+ LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
+ LP_BUILD_BLEND_SWIZZLE_AAAA = 1
+};
+
+
+/**
+ * How should we shuffle the base factor.
+ */
+static enum lp_build_blend_swizzle
+lp_build_blend_factor_swizzle(unsigned factor)
+{
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ case PIPE_BLENDFACTOR_ZERO:
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ return LP_BUILD_BLEND_SWIZZLE_RGBA;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ return LP_BUILD_BLEND_SWIZZLE_AAAA;
+ default:
+ assert(0);
+ return LP_BUILD_BLEND_SWIZZLE_RGBA;
+ }
+}
+
+
+static LLVMValueRef
+lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
+ LLVMValueRef rgb,
+ LLVMValueRef alpha,
+ enum lp_build_blend_swizzle rgb_swizzle,
+ unsigned alpha_swizzle,
+ unsigned num_channels)
+{
+ LLVMValueRef swizzled_rgb;
+
+ switch (rgb_swizzle) {
+ case LP_BUILD_BLEND_SWIZZLE_RGBA:
+ swizzled_rgb = rgb;
+ break;
+ case LP_BUILD_BLEND_SWIZZLE_AAAA:
+ swizzled_rgb = lp_build_swizzle_scalar_aos(&bld->base, rgb, alpha_swizzle, num_channels);
+ break;
+ default:
+ assert(0);
+ swizzled_rgb = bld->base.undef;
+ }
+
+ if (rgb != alpha) {
+ swizzled_rgb = lp_build_select_aos(&bld->base, 1 << alpha_swizzle,
+ alpha, swizzled_rgb,
+ num_channels);
+ }
+
+ return swizzled_rgb;
+}
+
+/**
+ * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
+ */
+static LLVMValueRef
+lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
+ unsigned rgb_factor,
+ unsigned alpha_factor,
+ unsigned alpha_swizzle,
+ unsigned num_channels)
+{
+ LLVMValueRef rgb_factor_, alpha_factor_;
+ enum lp_build_blend_swizzle rgb_swizzle;
+
+ if (alpha_swizzle == UTIL_FORMAT_SWIZZLE_X && num_channels == 1) {
+ return lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
+ }
+
+ rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE);
+
+ if (alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
+ rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
+ alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
+ return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, num_channels);
+ } else {
+ return rgb_factor_;
+ }
+}
+
+
+/**
+ * Performs blending of src and dst pixels
+ *
+ * @param blend the blend state of the shader variant
+ * @param cbuf_format format of the colour buffer
+ * @param type data type of the pixel vector
+ * @param rt render target index
+ * @param src blend src
+ * @param src_alpha blend src alpha (if not included in src)
+ * @param src1 second blend src (for dual source blend)
+ * @param src1_alpha second blend src alpha (if not included in src1)
+ * @param dst blend dst
+ * @param mask optional mask to apply to the blending result
+ * @param const_ const blend color
+ * @param const_alpha const blend color alpha (if not included in const_)
+ * @param swizzle swizzle values for RGBA
+ *
+ * @return the result of blending src and dst
+ */
+LLVMValueRef
+lp_build_blend_aos(struct gallivm_state *gallivm,
+ const struct pipe_blend_state *blend,
+ enum pipe_format cbuf_format,
+ struct lp_type type,
+ unsigned rt,
+ LLVMValueRef src,
+ LLVMValueRef src_alpha,
+ LLVMValueRef src1,
+ LLVMValueRef src1_alpha,
+ LLVMValueRef dst,
+ LLVMValueRef mask,
+ LLVMValueRef const_,
+ LLVMValueRef const_alpha,
+ const unsigned char swizzle[4],
+ int nr_channels)
+{
+ const struct pipe_rt_blend_state * state = &blend->rt[rt];
+ const struct util_format_description * desc;
+ struct lp_build_blend_aos_context bld;
+ LLVMValueRef src_factor, dst_factor;
+ LLVMValueRef result;
+ unsigned alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE;
+ unsigned i;
+
+ desc = util_format_description(cbuf_format);
+
+ /* Setup build context */
+ memset(&bld, 0, sizeof bld);
+ lp_build_context_init(&bld.base, gallivm, type);
+ bld.src = src;
+ bld.src1 = src1;
+ bld.dst = dst;
+ bld.const_ = const_;
+ bld.src_alpha = src_alpha;
+ bld.src1_alpha = src1_alpha;
+ bld.const_alpha = const_alpha;
+
+ /* Find the alpha channel if not provided seperately */
+ if (!src_alpha) {
+ for (i = 0; i < 4; ++i) {
+ if (swizzle[i] == 3) {
+ alpha_swizzle = i;
+ }
+ }
+ }
+
+ if (blend->logicop_enable) {
+ if(!type.floating) {
+ result = lp_build_logicop(gallivm->builder, blend->logicop_func, src, dst);
+ }
+ else {
+ result = src;
+ }
+ } else if (!state->blend_enable) {
+ result = src;
+ } else {
+ boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor) || nr_channels == 1;
+
+ src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor,
+ state->alpha_src_factor,
+ alpha_swizzle,
+ nr_channels);
+
+ dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor,
+ state->alpha_dst_factor,
+ alpha_swizzle,
+ nr_channels);
+
+ result = lp_build_blend(&bld.base,
+ state->rgb_func,
+ state->rgb_src_factor,
+ state->rgb_dst_factor,
+ src,
+ dst,
+ src_factor,
+ dst_factor,
+ rgb_alpha_same,
+ false);
+
+ if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
+ LLVMValueRef alpha;
+
+ alpha = lp_build_blend(&bld.base,
+ state->alpha_func,
+ state->alpha_src_factor,
+ state->alpha_dst_factor,
+ src,
+ dst,
+ src_factor,
+ dst_factor,
+ rgb_alpha_same,
+ false);
+
+ result = lp_build_blend_swizzle(&bld,
+ result,
+ alpha,
+ LP_BUILD_BLEND_SWIZZLE_RGBA,
+ alpha_swizzle,
+ nr_channels);
+ }
+ }
+
+ /* Check if color mask is necessary */
+ if (!util_format_colormask_full(desc, state->colormask)) {
+ LLVMValueRef color_mask;
+
+ color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, nr_channels, swizzle);
+ lp_build_name(color_mask, "color_mask");
+
+ /* Combine with input mask if necessary */
+ if (mask) {
+ /* We can be blending floating values but masks are always integer... */
+ unsigned floating = bld.base.type.floating;
+ bld.base.type.floating = 0;
+
+ mask = lp_build_and(&bld.base, color_mask, mask);
+
+ bld.base.type.floating = floating;
+ } else {
+ mask = color_mask;
+ }
+ }
+
+ /* Apply mask, if one exists */
+ if (mask) {
+ result = lp_build_select(&bld.base, mask, result, dst);
+ }
+
+ return result;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c
new file mode 100644
index 000000000..1eac0a5c8
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c
@@ -0,0 +1,109 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Blend LLVM IR generation -- logic ops.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_debug.h"
+
+#include "lp_bld_blend.h"
+
+
+LLVMValueRef
+lp_build_logicop(LLVMBuilderRef builder,
+ unsigned logicop_func,
+ LLVMValueRef src,
+ LLVMValueRef dst)
+{
+ LLVMTypeRef type;
+ LLVMValueRef res;
+
+ type = LLVMTypeOf(src);
+
+ switch (logicop_func) {
+ case PIPE_LOGICOP_CLEAR:
+ res = LLVMConstNull(type);
+ break;
+ case PIPE_LOGICOP_NOR:
+ res = LLVMBuildNot(builder, LLVMBuildOr(builder, src, dst, ""), "");
+ break;
+ case PIPE_LOGICOP_AND_INVERTED:
+ res = LLVMBuildAnd(builder, LLVMBuildNot(builder, src, ""), dst, "");
+ break;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ res = LLVMBuildNot(builder, src, "");
+ break;
+ case PIPE_LOGICOP_AND_REVERSE:
+ res = LLVMBuildAnd(builder, src, LLVMBuildNot(builder, dst, ""), "");
+ break;
+ case PIPE_LOGICOP_INVERT:
+ res = LLVMBuildNot(builder, dst, "");
+ break;
+ case PIPE_LOGICOP_XOR:
+ res = LLVMBuildXor(builder, src, dst, "");
+ break;
+ case PIPE_LOGICOP_NAND:
+ res = LLVMBuildNot(builder, LLVMBuildAnd(builder, src, dst, ""), "");
+ break;
+ case PIPE_LOGICOP_AND:
+ res = LLVMBuildAnd(builder, src, dst, "");
+ break;
+ case PIPE_LOGICOP_EQUIV:
+ res = LLVMBuildNot(builder, LLVMBuildXor(builder, src, dst, ""), "");
+ break;
+ case PIPE_LOGICOP_NOOP:
+ res = dst;
+ break;
+ case PIPE_LOGICOP_OR_INVERTED:
+ res = LLVMBuildOr(builder, LLVMBuildNot(builder, src, ""), dst, "");
+ break;
+ case PIPE_LOGICOP_COPY:
+ res = src;
+ break;
+ case PIPE_LOGICOP_OR_REVERSE:
+ res = LLVMBuildOr(builder, src, LLVMBuildNot(builder, dst, ""), "");
+ break;
+ case PIPE_LOGICOP_OR:
+ res = LLVMBuildOr(builder, src, dst, "");
+ break;
+ case PIPE_LOGICOP_SET:
+ res = LLVMConstAllOnes(type);
+ break;
+ default:
+ assert(0);
+ res = src;
+ }
+
+ return res;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c
new file mode 100644
index 000000000..b25e04137
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -0,0 +1,1118 @@
+/**************************************************************************
+ *
+ * Copyright 2009-2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Depth/stencil testing to LLVM IR translation.
+ *
+ * To be done accurately/efficiently the depth/stencil test must be done with
+ * the same type/format of the depth/stencil buffer, which implies massaging
+ * the incoming depths to fit into place. Using a more straightforward
+ * type/format for depth/stencil values internally and only convert when
+ * flushing would avoid this, but it would most likely result in depth fighting
+ * artifacts.
+ *
+ * Since we're using linear layout for everything, but we need to deal with
+ * 2x2 quads, we need to load/store multiple values and swizzle them into
+ * place (we could avoid this by doing depth/stencil testing in linear format,
+ * which would be easy for late depth/stencil test as we could do that after
+ * the fragment shader loop just as we do for color buffers, but more tricky
+ * for early depth test as we'd need both masks and interpolated depth in
+ * linear format).
+ *
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ * @author Brian Paul <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_format.h"
+#include "util/u_cpu_detect.h"
+
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_bitarit.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_conv.h"
+#include "gallivm/lp_bld_logic.h"
+#include "gallivm/lp_bld_flow.h"
+#include "gallivm/lp_bld_intr.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_swizzle.h"
+#include "gallivm/lp_bld_pack.h"
+
+#include "lp_bld_depth.h"
+
+
+/** Used to select fields from pipe_stencil_state */
+enum stencil_op {
+ S_FAIL_OP,
+ Z_FAIL_OP,
+ Z_PASS_OP
+};
+
+
+
+/**
+ * Do the stencil test comparison (compare FB stencil values against ref value).
+ * This will be used twice when generating two-sided stencil code.
+ * \param stencil the front/back stencil state
+ * \param stencilRef the stencil reference value, replicated as a vector
+ * \param stencilVals vector of stencil values from framebuffer
+ * \return vector mask of pass/fail values (~0 or 0)
+ */
+static LLVMValueRef
+lp_build_stencil_test_single(struct lp_build_context *bld,
+ const struct pipe_stencil_state *stencil,
+ LLVMValueRef stencilRef,
+ LLVMValueRef stencilVals)
+{
+ LLVMBuilderRef builder = bld->gallivm->builder;
+ const unsigned stencilMax = 255; /* XXX fix */
+ struct lp_type type = bld->type;
+ LLVMValueRef res;
+
+ /*
+ * SSE2 has intrinsics for signed comparisons, but not unsigned ones. Values
+ * are between 0..255 so ensure we generate the fastest comparisons for
+ * wider elements.
+ */
+ if (type.width <= 8) {
+ assert(!type.sign);
+ } else {
+ assert(type.sign);
+ }
+
+ assert(stencil->enabled);
+
+ if (stencil->valuemask != stencilMax) {
+ /* compute stencilRef = stencilRef & valuemask */
+ LLVMValueRef valuemask = lp_build_const_int_vec(bld->gallivm, type, stencil->valuemask);
+ stencilRef = LLVMBuildAnd(builder, stencilRef, valuemask, "");
+ /* compute stencilVals = stencilVals & valuemask */
+ stencilVals = LLVMBuildAnd(builder, stencilVals, valuemask, "");
+ }
+
+ res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals);
+
+ return res;
+}
+
+
+/**
+ * Do the one or two-sided stencil test comparison.
+ * \sa lp_build_stencil_test_single
+ * \param front_facing an integer vector mask, indicating front (~0) or back
+ * (0) facing polygon. If NULL, assume front-facing.
+ */
+static LLVMValueRef
+lp_build_stencil_test(struct lp_build_context *bld,
+ const struct pipe_stencil_state stencil[2],
+ LLVMValueRef stencilRefs[2],
+ LLVMValueRef stencilVals,
+ LLVMValueRef front_facing)
+{
+ LLVMValueRef res;
+
+ assert(stencil[0].enabled);
+
+ /* do front face test */
+ res = lp_build_stencil_test_single(bld, &stencil[0],
+ stencilRefs[0], stencilVals);
+
+ if (stencil[1].enabled && front_facing != NULL) {
+ /* do back face test */
+ LLVMValueRef back_res;
+
+ back_res = lp_build_stencil_test_single(bld, &stencil[1],
+ stencilRefs[1], stencilVals);
+
+ res = lp_build_select(bld, front_facing, res, back_res);
+ }
+
+ return res;
+}
+
+
+/**
+ * Apply the stencil operator (add/sub/keep/etc) to the given vector
+ * of stencil values.
+ * \return new stencil values vector
+ */
+static LLVMValueRef
+lp_build_stencil_op_single(struct lp_build_context *bld,
+ const struct pipe_stencil_state *stencil,
+ enum stencil_op op,
+ LLVMValueRef stencilRef,
+ LLVMValueRef stencilVals)
+
+{
+ LLVMBuilderRef builder = bld->gallivm->builder;
+ struct lp_type type = bld->type;
+ LLVMValueRef res;
+ LLVMValueRef max = lp_build_const_int_vec(bld->gallivm, type, 0xff);
+ unsigned stencil_op;
+
+ assert(type.sign);
+
+ switch (op) {
+ case S_FAIL_OP:
+ stencil_op = stencil->fail_op;
+ break;
+ case Z_FAIL_OP:
+ stencil_op = stencil->zfail_op;
+ break;
+ case Z_PASS_OP:
+ stencil_op = stencil->zpass_op;
+ break;
+ default:
+ assert(0 && "Invalid stencil_op mode");
+ stencil_op = PIPE_STENCIL_OP_KEEP;
+ }
+
+ switch (stencil_op) {
+ case PIPE_STENCIL_OP_KEEP:
+ res = stencilVals;
+ /* we can return early for this case */
+ return res;
+ case PIPE_STENCIL_OP_ZERO:
+ res = bld->zero;
+ break;
+ case PIPE_STENCIL_OP_REPLACE:
+ res = stencilRef;
+ break;
+ case PIPE_STENCIL_OP_INCR:
+ res = lp_build_add(bld, stencilVals, bld->one);
+ res = lp_build_min(bld, res, max);
+ break;
+ case PIPE_STENCIL_OP_DECR:
+ res = lp_build_sub(bld, stencilVals, bld->one);
+ res = lp_build_max(bld, res, bld->zero);
+ break;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ res = lp_build_add(bld, stencilVals, bld->one);
+ res = LLVMBuildAnd(builder, res, max, "");
+ break;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ res = lp_build_sub(bld, stencilVals, bld->one);
+ res = LLVMBuildAnd(builder, res, max, "");
+ break;
+ case PIPE_STENCIL_OP_INVERT:
+ res = LLVMBuildNot(builder, stencilVals, "");
+ res = LLVMBuildAnd(builder, res, max, "");
+ break;
+ default:
+ assert(0 && "bad stencil op mode");
+ res = bld->undef;
+ }
+
+ return res;
+}
+
+
+/**
+ * Do the one or two-sided stencil test op/update.
+ */
+static LLVMValueRef
+lp_build_stencil_op(struct lp_build_context *bld,
+ const struct pipe_stencil_state stencil[2],
+ enum stencil_op op,
+ LLVMValueRef stencilRefs[2],
+ LLVMValueRef stencilVals,
+ LLVMValueRef mask,
+ LLVMValueRef front_facing)
+
+{
+ LLVMBuilderRef builder = bld->gallivm->builder;
+ LLVMValueRef res;
+
+ assert(stencil[0].enabled);
+
+ /* do front face op */
+ res = lp_build_stencil_op_single(bld, &stencil[0], op,
+ stencilRefs[0], stencilVals);
+
+ if (stencil[1].enabled && front_facing != NULL) {
+ /* do back face op */
+ LLVMValueRef back_res;
+
+ back_res = lp_build_stencil_op_single(bld, &stencil[1], op,
+ stencilRefs[1], stencilVals);
+
+ res = lp_build_select(bld, front_facing, res, back_res);
+ }
+
+ if (stencil[0].writemask != 0xff ||
+ (stencil[1].enabled && front_facing != NULL && stencil[1].writemask != 0xff)) {
+ /* mask &= stencil[0].writemask */
+ LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type,
+ stencil[0].writemask);
+ if (stencil[1].enabled && stencil[1].writemask != stencil[0].writemask && front_facing != NULL) {
+ LLVMValueRef back_writemask = lp_build_const_int_vec(bld->gallivm, bld->type,
+ stencil[1].writemask);
+ writemask = lp_build_select(bld, front_facing, writemask, back_writemask);
+ }
+
+ mask = LLVMBuildAnd(builder, mask, writemask, "");
+ /* res = (res & mask) | (stencilVals & ~mask) */
+ res = lp_build_select_bitwise(bld, mask, res, stencilVals);
+ }
+ else {
+ /* res = mask ? res : stencilVals */
+ res = lp_build_select(bld, mask, res, stencilVals);
+ }
+
+ return res;
+}
+
+
+
+/**
+ * Return a type that matches the depth/stencil format.
+ */
+struct lp_type
+lp_depth_type(const struct util_format_description *format_desc,
+ unsigned length)
+{
+ struct lp_type type;
+ unsigned z_swizzle;
+
+ assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
+ assert(format_desc->block.width == 1);
+ assert(format_desc->block.height == 1);
+
+ memset(&type, 0, sizeof type);
+ type.width = format_desc->block.bits;
+
+ z_swizzle = format_desc->swizzle[0];
+ if (z_swizzle < 4) {
+ if (format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
+ type.floating = TRUE;
+ assert(z_swizzle == 0);
+ assert(format_desc->channel[z_swizzle].size == 32);
+ }
+ else if(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+ assert(format_desc->block.bits <= 32);
+ assert(format_desc->channel[z_swizzle].normalized);
+ if (format_desc->channel[z_swizzle].size < format_desc->block.bits) {
+ /* Prefer signed integers when possible, as SSE has less support
+ * for unsigned comparison;
+ */
+ type.sign = TRUE;
+ }
+ }
+ else
+ assert(0);
+ }
+
+ type.length = length;
+
+ return type;
+}
+
+
+/**
+ * Compute bitmask and bit shift to apply to the incoming fragment Z values
+ * and the Z buffer values needed before doing the Z comparison.
+ *
+ * Note that we leave the Z bits in the position that we find them
+ * in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us
+ * get by with fewer bit twiddling steps.
+ */
+static boolean
+get_z_shift_and_mask(const struct util_format_description *format_desc,
+ unsigned *shift, unsigned *width, unsigned *mask)
+{
+ unsigned total_bits;
+ unsigned z_swizzle;
+
+ assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
+ assert(format_desc->block.width == 1);
+ assert(format_desc->block.height == 1);
+
+ /* 64bit d/s format is special already extracted 32 bits */
+ total_bits = format_desc->block.bits > 32 ? 32 : format_desc->block.bits;
+
+ z_swizzle = format_desc->swizzle[0];
+
+ if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
+ return FALSE;
+
+ *width = format_desc->channel[z_swizzle].size;
+ /* & 31 is for the same reason as the 32-bit limit above */
+ *shift = format_desc->channel[z_swizzle].shift & 31;
+
+ if (*width == total_bits) {
+ *mask = 0xffffffff;
+ } else {
+ *mask = ((1 << *width) - 1) << *shift;
+ }
+
+ return TRUE;
+}
+
+
+/**
+ * Compute bitmask and bit shift to apply to the framebuffer pixel values
+ * to put the stencil bits in the least significant position.
+ * (i.e. 0x000000ff)
+ */
+static boolean
+get_s_shift_and_mask(const struct util_format_description *format_desc,
+ unsigned *shift, unsigned *mask)
+{
+ unsigned s_swizzle;
+ unsigned sz;
+
+ s_swizzle = format_desc->swizzle[1];
+
+ if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
+ return FALSE;
+
+ /* just special case 64bit d/s format */
+ if (format_desc->block.bits > 32) {
+ /* XXX big-endian? */
+ assert(format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
+ *shift = 0;
+ *mask = 0xff;
+ return TRUE;
+ }
+
+ *shift = format_desc->channel[s_swizzle].shift;
+ sz = format_desc->channel[s_swizzle].size;
+ *mask = (1U << sz) - 1U;
+
+ return TRUE;
+}
+
+
+/**
+ * Perform the occlusion test and increase the counter.
+ * Test the depth mask. Add the number of channel which has none zero mask
+ * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
+ * The counter will add 4.
+ * TODO: could get that out of the fs loop.
+ *
+ * \param type holds element type of the mask vector.
+ * \param maskvalue is the depth test mask.
+ * \param counter is a pointer of the uint32 counter.
+ */
+void
+lp_build_occlusion_count(struct gallivm_state *gallivm,
+ struct lp_type type,
+ LLVMValueRef maskvalue,
+ LLVMValueRef counter)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMContextRef context = gallivm->context;
+ LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1);
+ LLVMValueRef count, newcount;
+
+ assert(type.length <= 16);
+ assert(type.floating);
+
+ if(util_cpu_caps.has_sse && type.length == 4) {
+ const char *movmskintr = "llvm.x86.sse.movmsk.ps";
+ const char *popcntintr = "llvm.ctpop.i32";
+ LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
+ lp_build_vec_type(gallivm, type), "");
+ bits = lp_build_intrinsic_unary(builder, movmskintr,
+ LLVMInt32TypeInContext(context), bits);
+ count = lp_build_intrinsic_unary(builder, popcntintr,
+ LLVMInt32TypeInContext(context), bits);
+ count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
+ }
+ else if(util_cpu_caps.has_avx && type.length == 8) {
+ const char *movmskintr = "llvm.x86.avx.movmsk.ps.256";
+ const char *popcntintr = "llvm.ctpop.i32";
+ LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
+ lp_build_vec_type(gallivm, type), "");
+ bits = lp_build_intrinsic_unary(builder, movmskintr,
+ LLVMInt32TypeInContext(context), bits);
+ count = lp_build_intrinsic_unary(builder, popcntintr,
+ LLVMInt32TypeInContext(context), bits);
+ count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
+ }
+ else {
+ unsigned i;
+ LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
+ LLVMTypeRef counttype = LLVMIntTypeInContext(context, type.length * 8);
+ LLVMTypeRef i8vntype = LLVMVectorType(LLVMInt8TypeInContext(context), type.length * 4);
+ LLVMValueRef shufflev, countd;
+ LLVMValueRef shuffles[16];
+ const char *popcntintr = NULL;
+
+ countv = LLVMBuildBitCast(builder, countv, i8vntype, "");
+
+ for (i = 0; i < type.length; i++) {
+ shuffles[i] = lp_build_const_int32(gallivm, 4*i);
+ }
+
+ shufflev = LLVMConstVector(shuffles, type.length);
+ countd = LLVMBuildShuffleVector(builder, countv, LLVMGetUndef(i8vntype), shufflev, "");
+ countd = LLVMBuildBitCast(builder, countd, counttype, "countd");
+
+ /*
+ * XXX FIXME
+ * this is bad on cpus without popcount (on x86 supported by intel
+ * nehalem, amd barcelona, and up - not tied to sse42).
+ * Would be much faster to just sum the 4 elements of the vector with
+ * some horizontal add (shuffle/add/shuffle/add after the initial and).
+ */
+ switch (type.length) {
+ case 4:
+ popcntintr = "llvm.ctpop.i32";
+ break;
+ case 8:
+ popcntintr = "llvm.ctpop.i64";
+ break;
+ case 16:
+ popcntintr = "llvm.ctpop.i128";
+ break;
+ default:
+ assert(0);
+ }
+ count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd);
+
+ if (type.length > 8) {
+ count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 64), "");
+ }
+ else if (type.length < 8) {
+ count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), "");
+ }
+ }
+ newcount = LLVMBuildLoad(builder, counter, "origcount");
+ newcount = LLVMBuildAdd(builder, newcount, count, "newcount");
+ LLVMBuildStore(builder, newcount, counter);
+}
+
+
+/**
+ * Load depth/stencil values.
+ * The stored values are linear, swizzle them.
+ *
+ * \param type the data type of the fragment depth/stencil values
+ * \param format_desc description of the depth/stencil surface
+ * \param is_1d whether this resource has only one dimension
+ * \param loop_counter the current loop iteration
+ * \param depth_ptr pointer to the depth/stencil values of this 4x4 block
+ * \param depth_stride stride of the depth/stencil buffer
+ * \param z_fb contains z values loaded from fb (may include padding)
+ * \param s_fb contains s values loaded from fb (may include padding)
+ */
+void
+lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
+ struct lp_type z_src_type,
+ const struct util_format_description *format_desc,
+ boolean is_1d,
+ LLVMValueRef depth_ptr,
+ LLVMValueRef depth_stride,
+ LLVMValueRef *z_fb,
+ LLVMValueRef *s_fb,
+ LLVMValueRef loop_counter)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
+ LLVMValueRef zs_dst1, zs_dst2;
+ LLVMValueRef zs_dst_ptr;
+ LLVMValueRef depth_offset1, depth_offset2;
+ LLVMTypeRef load_ptr_type;
+ unsigned depth_bytes = format_desc->block.bits / 8;
+ struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
+ struct lp_type zs_load_type = zs_type;
+
+ zs_load_type.length = zs_load_type.length / 2;
+ load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);
+
+ if (z_src_type.length == 4) {
+ unsigned i;
+ LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
+ lp_build_const_int32(gallivm, 1), "");
+ LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
+ lp_build_const_int32(gallivm, 2), "");
+ LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
+ depth_stride, "");
+ depth_offset1 = LLVMBuildMul(builder, looplsb,
+ lp_build_const_int32(gallivm, depth_bytes * 2), "");
+ depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
+
+ /* just concatenate the loaded 2x2 values into 4-wide vector */
+ for (i = 0; i < 4; i++) {
+ shuffles[i] = lp_build_const_int32(gallivm, i);
+ }
+ }
+ else {
+ unsigned i;
+ LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter,
+ lp_build_const_int32(gallivm, 1), "");
+ assert(z_src_type.length == 8);
+ depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, "");
+ /*
+ * We load 2x4 values, and need to swizzle them (order
+ * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
+ */
+ for (i = 0; i < 8; i++) {
+ shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
+ }
+ }
+
+ depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");
+
+ /* Load current z/stencil values from z/stencil buffer */
+ zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
+ zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
+ zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, "");
+ if (is_1d) {
+ zs_dst2 = lp_build_undef(gallivm, zs_load_type);
+ }
+ else {
+ zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
+ zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
+ zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");
+ }
+
+ *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
+ LLVMConstVector(shuffles, zs_type.length), "");
+ *s_fb = *z_fb;
+
+ if (format_desc->block.bits < z_src_type.width) {
+ /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
+ *z_fb = LLVMBuildZExt(builder, *z_fb,
+ lp_build_int_vec_type(gallivm, z_src_type), "");
+ }
+
+ else if (format_desc->block.bits > 32) {
+ /* rely on llvm to handle too wide vector we have here nicely */
+ unsigned i;
+ struct lp_type typex2 = zs_type;
+ struct lp_type s_type = zs_type;
+ LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4];
+ LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4];
+ LLVMValueRef tmp;
+
+ typex2.width = typex2.width / 2;
+ typex2.length = typex2.length * 2;
+ s_type.width = s_type.width / 2;
+ s_type.floating = 0;
+
+ tmp = LLVMBuildBitCast(builder, *z_fb,
+ lp_build_vec_type(gallivm, typex2), "");
+
+ for (i = 0; i < zs_type.length; i++) {
+ shuffles1[i] = lp_build_const_int32(gallivm, i * 2);
+ shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1);
+ }
+ *z_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
+ LLVMConstVector(shuffles1, zs_type.length), "");
+ *s_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
+ LLVMConstVector(shuffles2, zs_type.length), "");
+ *s_fb = LLVMBuildBitCast(builder, *s_fb,
+ lp_build_vec_type(gallivm, s_type), "");
+ lp_build_name(*s_fb, "s_dst");
+ }
+
+ lp_build_name(*z_fb, "z_dst");
+ lp_build_name(*s_fb, "s_dst");
+ lp_build_name(*z_fb, "z_dst");
+}
+
+/**
+ * Store depth/stencil values.
+ * Incoming values are swizzled (typically n 2x2 quads), stored linear.
+ * If there's a mask it will do select/store otherwise just store.
+ *
+ * \param type the data type of the fragment depth/stencil values
+ * \param format_desc description of the depth/stencil surface
+ * \param is_1d whether this resource has only one dimension
+ * \param mask the alive/dead pixel mask for the quad (vector)
+ * \param z_fb z values read from fb (with padding)
+ * \param s_fb s values read from fb (with padding)
+ * \param loop_counter the current loop iteration
+ * \param depth_ptr pointer to the depth/stencil values of this 4x4 block
+ * \param depth_stride stride of the depth/stencil buffer
+ * \param z_value the depth values to store (with padding)
+ * \param s_value the stencil values to store (with padding)
+ */
+void
+lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
+ struct lp_type z_src_type,
+ const struct util_format_description *format_desc,
+ boolean is_1d,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef z_fb,
+ LLVMValueRef s_fb,
+ LLVMValueRef loop_counter,
+ LLVMValueRef depth_ptr,
+ LLVMValueRef depth_stride,
+ LLVMValueRef z_value,
+ LLVMValueRef s_value)
+{
+ struct lp_build_context z_bld;
+ LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef mask_value = NULL;
+ LLVMValueRef zs_dst1, zs_dst2;
+ LLVMValueRef zs_dst_ptr1, zs_dst_ptr2;
+ LLVMValueRef depth_offset1, depth_offset2;
+ LLVMTypeRef load_ptr_type;
+ unsigned depth_bytes = format_desc->block.bits / 8;
+ struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
+ struct lp_type z_type = zs_type;
+ struct lp_type zs_load_type = zs_type;
+
+ zs_load_type.length = zs_load_type.length / 2;
+ load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);
+
+ z_type.width = z_src_type.width;
+
+ lp_build_context_init(&z_bld, gallivm, z_type);
+
+ /*
+ * This is far from ideal, at least for late depth write we should do this
+ * outside the fs loop to avoid all the swizzle stuff.
+ */
+ if (z_src_type.length == 4) {
+ LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
+ lp_build_const_int32(gallivm, 1), "");
+ LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
+ lp_build_const_int32(gallivm, 2), "");
+ LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
+ depth_stride, "");
+ depth_offset1 = LLVMBuildMul(builder, looplsb,
+ lp_build_const_int32(gallivm, depth_bytes * 2), "");
+ depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
+ }
+ else {
+ unsigned i;
+ LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter,
+ lp_build_const_int32(gallivm, 1), "");
+ assert(z_src_type.length == 8);
+ depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, "");
+ /*
+ * We load 2x4 values, and need to swizzle them (order
+ * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
+ */
+ for (i = 0; i < 8; i++) {
+ shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
+ }
+ }
+
+ depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");
+
+ zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
+ zs_dst_ptr1 = LLVMBuildBitCast(builder, zs_dst_ptr1, load_ptr_type, "");
+ zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
+ zs_dst_ptr2 = LLVMBuildBitCast(builder, zs_dst_ptr2, load_ptr_type, "");
+
+ if (format_desc->block.bits > 32) {
+ s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, "");
+ }
+
+ if (mask) {
+ mask_value = lp_build_mask_value(mask);
+ z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb);
+ if (format_desc->block.bits > 32) {
+ s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, "");
+ s_value = lp_build_select(&z_bld, mask_value, s_value, s_fb);
+ }
+ }
+
+ if (zs_type.width < z_src_type.width) {
+ /* Truncate ZS values (e.g., when writing to Z16_UNORM) */
+ z_value = LLVMBuildTrunc(builder, z_value,
+ lp_build_int_vec_type(gallivm, zs_type), "");
+ }
+
+ if (format_desc->block.bits <= 32) {
+ if (z_src_type.length == 4) {
+ zs_dst1 = lp_build_extract_range(gallivm, z_value, 0, 2);
+ zs_dst2 = lp_build_extract_range(gallivm, z_value, 2, 2);
+ }
+ else {
+ assert(z_src_type.length == 8);
+ zs_dst1 = LLVMBuildShuffleVector(builder, z_value, z_value,
+ LLVMConstVector(&shuffles[0],
+ zs_load_type.length), "");
+ zs_dst2 = LLVMBuildShuffleVector(builder, z_value, z_value,
+ LLVMConstVector(&shuffles[4],
+ zs_load_type.length), "");
+ }
+ }
+ else {
+ if (z_src_type.length == 4) {
+ zs_dst1 = lp_build_interleave2(gallivm, z_type,
+ z_value, s_value, 0);
+ zs_dst2 = lp_build_interleave2(gallivm, z_type,
+ z_value, s_value, 1);
+ }
+ else {
+ unsigned i;
+ LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 2];
+ assert(z_src_type.length == 8);
+ for (i = 0; i < 8; i++) {
+ shuffles[i*2] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
+ shuffles[i*2+1] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2 +
+ z_src_type.length);
+ }
+ zs_dst1 = LLVMBuildShuffleVector(builder, z_value, s_value,
+ LLVMConstVector(&shuffles[0],
+ z_src_type.length), "");
+ zs_dst2 = LLVMBuildShuffleVector(builder, z_value, s_value,
+ LLVMConstVector(&shuffles[8],
+ z_src_type.length), "");
+ }
+ zs_dst1 = LLVMBuildBitCast(builder, zs_dst1,
+ lp_build_vec_type(gallivm, zs_load_type), "");
+ zs_dst2 = LLVMBuildBitCast(builder, zs_dst2,
+ lp_build_vec_type(gallivm, zs_load_type), "");
+ }
+
+ LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1);
+ if (!is_1d) {
+ LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
+ }
+}
+
+/**
+ * Generate code for performing depth and/or stencil tests.
+ * We operate on a vector of values (typically n 2x2 quads).
+ *
+ * \param depth the depth test state
+ * \param stencil the front/back stencil state
+ * \param type the data type of the fragment depth/stencil values
+ * \param format_desc description of the depth/stencil surface
+ * \param mask the alive/dead pixel mask for the quad (vector)
+ * \param stencil_refs the front/back stencil ref values (scalar)
+ * \param z_src the incoming depth/stencil values (n 2x2 quad values, float32)
+ * \param zs_dst the depth/stencil values in framebuffer
+ * \param face contains boolean value indicating front/back facing polygon
+ */
+void
+lp_build_depth_stencil_test(struct gallivm_state *gallivm,
+ const struct pipe_depth_state *depth,
+ const struct pipe_stencil_state stencil[2],
+ struct lp_type z_src_type,
+ const struct util_format_description *format_desc,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef stencil_refs[2],
+ LLVMValueRef z_src,
+ LLVMValueRef z_fb,
+ LLVMValueRef s_fb,
+ LLVMValueRef face,
+ LLVMValueRef *z_value,
+ LLVMValueRef *s_value,
+ boolean do_branch)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_type z_type;
+ struct lp_build_context z_bld;
+ struct lp_build_context s_bld;
+ struct lp_type s_type;
+ unsigned z_shift = 0, z_width = 0, z_mask = 0;
+ LLVMValueRef z_dst = NULL;
+ LLVMValueRef stencil_vals = NULL;
+ LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
+ LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
+ LLVMValueRef current_mask = lp_build_mask_value(mask);
+ LLVMValueRef front_facing = NULL;
+ boolean have_z, have_s;
+
+ /*
+ * Depths are expected to be between 0 and 1, even if they are stored in
+ * floats. Setting these bits here will ensure that the lp_build_conv() call
+ * below won't try to unnecessarily clamp the incoming values.
+ */
+ if(z_src_type.floating) {
+ z_src_type.sign = FALSE;
+ z_src_type.norm = TRUE;
+ }
+ else {
+ assert(!z_src_type.sign);
+ assert(z_src_type.norm);
+ }
+
+ /* Pick the type matching the depth-stencil format. */
+ z_type = lp_depth_type(format_desc, z_src_type.length);
+
+ /* Pick the intermediate type for depth operations. */
+ z_type.width = z_src_type.width;
+ assert(z_type.length == z_src_type.length);
+
+ /* FIXME: for non-float depth/stencil might generate better code
+ * if we'd always split it up to use 128bit operations.
+ * For stencil we'd almost certainly want to pack to 8xi16 values,
+ * for z just run twice.
+ */
+
+ /* Sanity checking */
+ {
+ const unsigned z_swizzle = format_desc->swizzle[0];
+ const unsigned s_swizzle = format_desc->swizzle[1];
+
+ assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
+ s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
+
+ assert(depth->enabled || stencil[0].enabled);
+
+ assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
+ assert(format_desc->block.width == 1);
+ assert(format_desc->block.height == 1);
+
+ if (stencil[0].enabled) {
+ assert(s_swizzle < 4);
+ assert(format_desc->channel[s_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
+ assert(format_desc->channel[s_swizzle].pure_integer);
+ assert(!format_desc->channel[s_swizzle].normalized);
+ assert(format_desc->channel[s_swizzle].size == 8);
+ }
+
+ if (depth->enabled) {
+ assert(z_swizzle < 4);
+ if (z_type.floating) {
+ assert(z_swizzle == 0);
+ assert(format_desc->channel[z_swizzle].type ==
+ UTIL_FORMAT_TYPE_FLOAT);
+ assert(format_desc->channel[z_swizzle].size == 32);
+ }
+ else {
+ assert(format_desc->channel[z_swizzle].type ==
+ UTIL_FORMAT_TYPE_UNSIGNED);
+ assert(format_desc->channel[z_swizzle].normalized);
+ assert(!z_type.fixed);
+ }
+ }
+ }
+
+
+ /* Setup build context for Z vals */
+ lp_build_context_init(&z_bld, gallivm, z_type);
+
+ /* Setup build context for stencil vals */
+ s_type = lp_int_type(z_type);
+ lp_build_context_init(&s_bld, gallivm, s_type);
+
+ /* Compute and apply the Z/stencil bitmasks and shifts.
+ */
+ {
+ unsigned s_shift, s_mask;
+
+ z_dst = z_fb;
+ stencil_vals = s_fb;
+
+ have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask);
+ have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask);
+
+ if (have_z) {
+ if (z_mask != 0xffffffff) {
+ z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask);
+ }
+
+ /*
+ * Align the framebuffer Z 's LSB to the right.
+ */
+ if (z_shift) {
+ LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
+ z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst");
+ } else if (z_bitmask) {
+ z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst");
+ } else {
+ lp_build_name(z_dst, "z_dst");
+ }
+ }
+
+ if (have_s) {
+ if (s_shift) {
+ LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift);
+ stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, "");
+ stencil_shift = shift; /* used below */
+ }
+
+ if (s_mask != 0xffffffff) {
+ LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask);
+ stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
+ }
+
+ lp_build_name(stencil_vals, "s_dst");
+ }
+ }
+
+ if (stencil[0].enabled) {
+
+ if (face) {
+ LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
+
+ /* front_facing = face != 0 ? ~0 : 0 */
+ front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
+ front_facing = LLVMBuildSExt(builder, front_facing,
+ LLVMIntTypeInContext(gallivm->context,
+ s_bld.type.length*s_bld.type.width),
+ "");
+ front_facing = LLVMBuildBitCast(builder, front_facing,
+ s_bld.int_vec_type, "");
+ }
+
+ s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
+ stencil_refs, stencil_vals,
+ front_facing);
+
+ /* apply stencil-fail operator */
+ {
+ LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, current_mask, s_pass_mask);
+ stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP,
+ stencil_refs, stencil_vals,
+ s_fail_mask, front_facing);
+ }
+ }
+
+ if (depth->enabled) {
+ /*
+ * Convert fragment Z to the desired type, aligning the LSB to the right.
+ */
+
+ assert(z_type.width == z_src_type.width);
+ assert(z_type.length == z_src_type.length);
+ assert(lp_check_value(z_src_type, z_src));
+ if (z_src_type.floating) {
+ /*
+ * Convert from floating point values
+ */
+
+ if (!z_type.floating) {
+ z_src = lp_build_clamped_float_to_unsigned_norm(gallivm,
+ z_src_type,
+ z_width,
+ z_src);
+ }
+ } else {
+ /*
+ * Convert from unsigned normalized values.
+ */
+
+ assert(!z_src_type.sign);
+ assert(!z_src_type.fixed);
+ assert(z_src_type.norm);
+ assert(!z_type.floating);
+ if (z_src_type.width > z_width) {
+ LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type,
+ z_src_type.width - z_width);
+ z_src = LLVMBuildLShr(builder, z_src, shift, "");
+ }
+ }
+ assert(lp_check_value(z_type, z_src));
+
+ lp_build_name(z_src, "z_src");
+
+ /* compare src Z to dst Z, returning 'pass' mask */
+ z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);
+
+ /* mask off bits that failed stencil test */
+ if (s_pass_mask) {
+ current_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, "");
+ }
+
+ if (!stencil[0].enabled) {
+ /* We can potentially skip all remaining operations here, but only
+ * if stencil is disabled because we still need to update the stencil
+ * buffer values. Don't need to update Z buffer values.
+ */
+ lp_build_mask_update(mask, z_pass);
+
+ if (do_branch) {
+ lp_build_mask_check(mask);
+ }
+ }
+
+ if (depth->writemask) {
+ LLVMValueRef z_pass_mask;
+
+ /* mask off bits that failed Z test */
+ z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, "");
+
+ /* Mix the old and new Z buffer values.
+ * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
+ */
+ z_dst = lp_build_select(&z_bld, z_pass_mask, z_src, z_dst);
+ }
+
+ if (stencil[0].enabled) {
+ /* update stencil buffer values according to z pass/fail result */
+ LLVMValueRef z_fail_mask, z_pass_mask;
+
+ /* apply Z-fail operator */
+ z_fail_mask = lp_build_andnot(&s_bld, current_mask, z_pass);
+ stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
+ stencil_refs, stencil_vals,
+ z_fail_mask, front_facing);
+
+ /* apply Z-pass operator */
+ z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, "");
+ stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
+ stencil_refs, stencil_vals,
+ z_pass_mask, front_facing);
+ }
+ }
+ else {
+ /* No depth test: apply Z-pass operator to stencil buffer values which
+ * passed the stencil test.
+ */
+ s_pass_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, "");
+ stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
+ stencil_refs, stencil_vals,
+ s_pass_mask, front_facing);
+ }
+
+ /* Put Z and stencil bits in the right place */
+ if (have_z && z_shift) {
+ LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
+ z_dst = LLVMBuildShl(builder, z_dst, shift, "");
+ }
+ if (stencil_vals && stencil_shift)
+ stencil_vals = LLVMBuildShl(builder, stencil_vals,
+ stencil_shift, "");
+
+ /* Finally, merge the z/stencil values */
+ if (format_desc->block.bits <= 32) {
+ if (have_z && have_s)
+ *z_value = LLVMBuildOr(builder, z_dst, stencil_vals, "");
+ else if (have_z)
+ *z_value = z_dst;
+ else
+ *z_value = stencil_vals;
+ *s_value = *z_value;
+ }
+ else {
+ *z_value = z_dst;
+ *s_value = stencil_vals;
+ }
+
+ if (s_pass_mask)
+ lp_build_mask_update(mask, s_pass_mask);
+
+ if (depth->enabled && stencil[0].enabled)
+ lp_build_mask_update(mask, z_pass);
+}
+
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.h
new file mode 100644
index 000000000..d169c8967
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -0,0 +1,105 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * Depth/stencil testing to LLVM IR translation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#ifndef LP_BLD_DEPTH_H
+#define LP_BLD_DEPTH_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+
+#include "gallivm/lp_bld.h"
+
+
+struct pipe_depth_state;
+struct gallivm_state;
+struct util_format_description;
+struct lp_type;
+struct lp_build_mask_context;
+
+
+struct lp_type
+lp_depth_type(const struct util_format_description *format_desc,
+ unsigned length);
+
+
+void
+lp_build_depth_stencil_test(struct gallivm_state *gallivm,
+ const struct pipe_depth_state *depth,
+ const struct pipe_stencil_state stencil[2],
+ struct lp_type z_src_type,
+ const struct util_format_description *format_desc,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef stencil_refs[2],
+ LLVMValueRef z_src,
+ LLVMValueRef z_fb,
+ LLVMValueRef s_fb,
+ LLVMValueRef face,
+ LLVMValueRef *z_value,
+ LLVMValueRef *s_value,
+ boolean do_branch);
+
+void
+lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
+ struct lp_type z_src_type,
+ const struct util_format_description *format_desc,
+ boolean is_1d,
+ LLVMValueRef depth_ptr,
+ LLVMValueRef depth_stride,
+ LLVMValueRef *z_fb,
+ LLVMValueRef *s_fb,
+ LLVMValueRef loop_counter);
+
+void
+lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
+ struct lp_type z_src_type,
+ const struct util_format_description *format_desc,
+ boolean is_1d,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef z_fb,
+ LLVMValueRef s_fb,
+ LLVMValueRef loop_counter,
+ LLVMValueRef depth_ptr,
+ LLVMValueRef depth_stride,
+ LLVMValueRef z_value,
+ LLVMValueRef s_value);
+
+
+void
+lp_build_occlusion_count(struct gallivm_state *gallivm,
+ struct lp_type type,
+ LLVMValueRef maskvalue,
+ LLVMValueRef counter);
+
+#endif /* !LP_BLD_DEPTH_H */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_interp.c
new file mode 100644
index 000000000..ceac86abe
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -0,0 +1,819 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007-2008 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Position and shader input interpolation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_shader_tokens.h"
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "tgsi/tgsi_scan.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_swizzle.h"
+#include "gallivm/lp_bld_flow.h"
+#include "lp_bld_interp.h"
+
+
+/*
+ * The shader JIT function operates on blocks of quads.
+ * Each block has 2x2 quads and each quad has 2x2 pixels.
+ *
+ * We iterate over the quads in order 0, 1, 2, 3:
+ *
+ * #################
+ * # | # | #
+ * #---0---#---1---#
+ * # | # | #
+ * #################
+ * # | # | #
+ * #---2---#---3---#
+ * # | # | #
+ * #################
+ *
+ * If we iterate over multiple quads at once, quads 01 and 23 are processed
+ * together.
+ *
+ * Within each quad, we have four pixels which are represented in SOA
+ * order:
+ *
+ * #########
+ * # 0 | 1 #
+ * #---+---#
+ * # 2 | 3 #
+ * #########
+ *
+ * So the green channel (for example) of the four pixels is stored in
+ * a single vector register: {g0, g1, g2, g3}.
+ * The order stays the same even with multiple quads:
+ * 0 1 4 5
+ * 2 3 6 7
+ * is stored as g0..g7
+ */
+
+
+/**
+ * Do one perspective divide per quad.
+ *
+ * For perspective interpolation, the final attribute value is given
+ *
+ * a' = a/w = a * oow
+ *
+ * where
+ *
+ * a = a0 + dadx*x + dady*y
+ * w = w0 + dwdx*x + dwdy*y
+ * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
+ *
+ * Instead of computing the division per pixel, with this macro we compute the
+ * division on the upper left pixel of each quad, and use a linear
+ * approximation in the remaining pixels, given by:
+ *
+ * da'dx = (dadx - dwdx*a)*oow
+ * da'dy = (dady - dwdy*a)*oow
+ *
+ * Ironically, this actually makes things slower -- probably because the
+ * divide hardware unit is rarely used, whereas the multiply unit is typically
+ * already saturated.
+ */
+#define PERSPECTIVE_DIVIDE_PER_QUAD 0
+
+
+static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};
+static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};
+
+
+static void
+attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
+{
+ if(attrib == 0)
+ lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
+ else
+ lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
+}
+
+static void
+calc_offsets(struct lp_build_context *coeff_bld,
+ unsigned quad_start_index,
+ LLVMValueRef *pixoffx,
+ LLVMValueRef *pixoffy)
+{
+ unsigned i;
+ unsigned num_pix = coeff_bld->type.length;
+ struct gallivm_state *gallivm = coeff_bld->gallivm;
+ LLVMBuilderRef builder = coeff_bld->gallivm->builder;
+ LLVMValueRef nr, pixxf, pixyf;
+
+ *pixoffx = coeff_bld->undef;
+ *pixoffy = coeff_bld->undef;
+
+ for (i = 0; i < num_pix; i++) {
+ nr = lp_build_const_int32(gallivm, i);
+ pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
+ (quad_start_index & 1) * 2);
+ pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
+ (quad_start_index & 2));
+ *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, "");
+ *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, "");
+ }
+}
+
+
+/* Much easier, and significantly less instructions in the per-stamp
+ * part (less than half) but overall more instructions so a loss if
+ * most quads are active. Might be a win though with larger vectors.
+ * No ability to do per-quad divide (doable but not implemented)
+ * Could be made to work with passed in pixel offsets (i.e. active quad merging).
+ */
+static void
+coeffs_init_simple(struct lp_build_interp_soa_context *bld,
+ LLVMValueRef a0_ptr,
+ LLVMValueRef dadx_ptr,
+ LLVMValueRef dady_ptr)
+{
+ struct lp_build_context *coeff_bld = &bld->coeff_bld;
+ struct lp_build_context *setup_bld = &bld->setup_bld;
+ struct gallivm_state *gallivm = coeff_bld->gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ unsigned attrib;
+
+ for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
+ /*
+ * always fetch all 4 values for performance/simplicity
+ * Note: we do that here because it seems to generate better
+ * code. It generates a lot of moves initially but less
+ * moves later. As far as I can tell this looks like a
+ * llvm issue, instead of simply reloading the values from
+ * the passed in pointers it if it runs out of registers
+ * it spills/reloads them. Maybe some optimization passes
+ * would help.
+ * Might want to investigate this again later.
+ */
+ const unsigned interp = bld->interp[attrib];
+ LLVMValueRef index = lp_build_const_int32(gallivm,
+ attrib * TGSI_NUM_CHANNELS);
+ LLVMValueRef ptr;
+ LLVMValueRef dadxaos = setup_bld->zero;
+ LLVMValueRef dadyaos = setup_bld->zero;
+ LLVMValueRef a0aos = setup_bld->zero;
+
+ switch (interp) {
+ case LP_INTERP_PERSPECTIVE:
+ /* fall-through */
+
+ case LP_INTERP_LINEAR:
+ ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
+ ptr = LLVMBuildBitCast(builder, ptr,
+ LLVMPointerType(setup_bld->vec_type, 0), "");
+ dadxaos = LLVMBuildLoad(builder, ptr, "");
+
+ ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
+ ptr = LLVMBuildBitCast(builder, ptr,
+ LLVMPointerType(setup_bld->vec_type, 0), "");
+ dadyaos = LLVMBuildLoad(builder, ptr, "");
+
+ attrib_name(dadxaos, attrib, 0, ".dadxaos");
+ attrib_name(dadyaos, attrib, 0, ".dadyaos");
+ /* fall-through */
+
+ case LP_INTERP_CONSTANT:
+ case LP_INTERP_FACING:
+ ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
+ ptr = LLVMBuildBitCast(builder, ptr,
+ LLVMPointerType(setup_bld->vec_type, 0), "");
+ a0aos = LLVMBuildLoad(builder, ptr, "");
+ attrib_name(a0aos, attrib, 0, ".a0aos");
+ break;
+
+ case LP_INTERP_POSITION:
+ /* Nothing to do as the position coeffs are already setup in slot 0 */
+ continue;
+
+ default:
+ assert(0);
+ break;
+ }
+ bld->a0aos[attrib] = a0aos;
+ bld->dadxaos[attrib] = dadxaos;
+ bld->dadyaos[attrib] = dadyaos;
+ }
+}
+
+/**
+ * Interpolate the shader input attribute values.
+ * This is called for each (group of) quad(s).
+ */
+static void
+attribs_update_simple(struct lp_build_interp_soa_context *bld,
+ struct gallivm_state *gallivm,
+ LLVMValueRef loop_iter,
+ int start,
+ int end)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_build_context *coeff_bld = &bld->coeff_bld;
+ struct lp_build_context *setup_bld = &bld->setup_bld;
+ LLVMValueRef oow = NULL;
+ unsigned attrib;
+ LLVMValueRef pixoffx;
+ LLVMValueRef pixoffy;
+ LLVMValueRef ptr;
+
+ /* could do this with code-generated passed in pixel offsets too */
+
+ assert(loop_iter);
+ ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
+ pixoffx = LLVMBuildLoad(builder, ptr, "");
+ ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
+ pixoffy = LLVMBuildLoad(builder, ptr, "");
+
+ pixoffx = LLVMBuildFAdd(builder, pixoffx,
+ lp_build_broadcast_scalar(coeff_bld, bld->x), "");
+ pixoffy = LLVMBuildFAdd(builder, pixoffy,
+ lp_build_broadcast_scalar(coeff_bld, bld->y), "");
+
+ for (attrib = start; attrib < end; attrib++) {
+ const unsigned mask = bld->mask[attrib];
+ const unsigned interp = bld->interp[attrib];
+ unsigned chan;
+
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (mask & (1 << chan)) {
+ LLVMValueRef index;
+ LLVMValueRef dadx = coeff_bld->zero;
+ LLVMValueRef dady = coeff_bld->zero;
+ LLVMValueRef a = coeff_bld->zero;
+
+ index = lp_build_const_int32(gallivm, chan);
+ switch (interp) {
+ case LP_INTERP_PERSPECTIVE:
+ /* fall-through */
+
+ case LP_INTERP_LINEAR:
+ if (attrib == 0 && chan == 0) {
+ dadx = coeff_bld->one;
+ if (bld->pos_offset) {
+ a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
+ }
+ }
+ else if (attrib == 0 && chan == 1) {
+ dady = coeff_bld->one;
+ if (bld->pos_offset) {
+ a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
+ }
+ }
+ else {
+ dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
+ coeff_bld->type, bld->dadxaos[attrib],
+ index);
+ dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
+ coeff_bld->type, bld->dadyaos[attrib],
+ index);
+ a = lp_build_extract_broadcast(gallivm, setup_bld->type,
+ coeff_bld->type, bld->a0aos[attrib],
+ index);
+ }
+ /*
+ * a = a0 + (x * dadx + y * dady)
+ */
+ dadx = LLVMBuildFMul(builder, dadx, pixoffx, "");
+ dady = LLVMBuildFMul(builder, dady, pixoffy, "");
+ a = LLVMBuildFAdd(builder, a, dadx, "");
+ a = LLVMBuildFAdd(builder, a, dady, "");
+
+ if (interp == LP_INTERP_PERSPECTIVE) {
+ if (oow == NULL) {
+ LLVMValueRef w = bld->attribs[0][3];
+ assert(attrib != 0);
+ assert(bld->mask[0] & TGSI_WRITEMASK_W);
+ oow = lp_build_rcp(coeff_bld, w);
+ }
+ a = lp_build_mul(coeff_bld, a, oow);
+ }
+ break;
+
+ case LP_INTERP_CONSTANT:
+ case LP_INTERP_FACING:
+ a = lp_build_extract_broadcast(gallivm, setup_bld->type,
+ coeff_bld->type, bld->a0aos[attrib],
+ index);
+ break;
+
+ case LP_INTERP_POSITION:
+ assert(attrib > 0);
+ a = bld->attribs[0][chan];
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ if ((attrib == 0) && (chan == 2)){
+ /* FIXME: Depth values can exceed 1.0, due to the fact that
+ * setup interpolation coefficients refer to (0,0) which causes
+ * precision loss. So we must clamp to 1.0 here to avoid artifacts
+ */
+ a = lp_build_min(coeff_bld, a, coeff_bld->one);
+ }
+ bld->attribs[attrib][chan] = a;
+ }
+ }
+ }
+}
+
+/**
+ * Initialize the bld->a, dadq fields. This involves fetching
+ * those values from the arrays which are passed into the JIT function.
+ */
+static void
+coeffs_init(struct lp_build_interp_soa_context *bld,
+ LLVMValueRef a0_ptr,
+ LLVMValueRef dadx_ptr,
+ LLVMValueRef dady_ptr)
+{
+ struct lp_build_context *coeff_bld = &bld->coeff_bld;
+ struct lp_build_context *setup_bld = &bld->setup_bld;
+ struct gallivm_state *gallivm = coeff_bld->gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef pixoffx, pixoffy;
+ unsigned attrib;
+ unsigned chan;
+ unsigned i;
+
+ pixoffx = coeff_bld->undef;
+ pixoffy = coeff_bld->undef;
+ for (i = 0; i < coeff_bld->type.length; i++) {
+ LLVMValueRef nr = lp_build_const_int32(gallivm, i);
+ LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i]);
+ LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i]);
+ pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, "");
+ pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, "");
+ }
+
+
+ for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
+ const unsigned mask = bld->mask[attrib];
+ const unsigned interp = bld->interp[attrib];
+ LLVMValueRef index = lp_build_const_int32(gallivm,
+ attrib * TGSI_NUM_CHANNELS);
+ LLVMValueRef ptr;
+ LLVMValueRef dadxaos = setup_bld->zero;
+ LLVMValueRef dadyaos = setup_bld->zero;
+ LLVMValueRef a0aos = setup_bld->zero;
+
+ /* always fetch all 4 values for performance/simplicity */
+ switch (interp) {
+ case LP_INTERP_PERSPECTIVE:
+ /* fall-through */
+
+ case LP_INTERP_LINEAR:
+ ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
+ ptr = LLVMBuildBitCast(builder, ptr,
+ LLVMPointerType(setup_bld->vec_type, 0), "");
+ dadxaos = LLVMBuildLoad(builder, ptr, "");
+
+ ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
+ ptr = LLVMBuildBitCast(builder, ptr,
+ LLVMPointerType(setup_bld->vec_type, 0), "");
+ dadyaos = LLVMBuildLoad(builder, ptr, "");
+
+ attrib_name(dadxaos, attrib, 0, ".dadxaos");
+ attrib_name(dadyaos, attrib, 0, ".dadyaos");
+ /* fall-through */
+
+ case LP_INTERP_CONSTANT:
+ case LP_INTERP_FACING:
+ ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
+ ptr = LLVMBuildBitCast(builder, ptr,
+ LLVMPointerType(setup_bld->vec_type, 0), "");
+ a0aos = LLVMBuildLoad(builder, ptr, "");
+ attrib_name(a0aos, attrib, 0, ".a0aos");
+ break;
+
+ case LP_INTERP_POSITION:
+ /* Nothing to do as the position coeffs are already setup in slot 0 */
+ continue;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ /*
+ * a = a0 + (x * dadx + y * dady)
+ * a0aos is the attrib value at top left corner of stamp
+ */
+ if (interp != LP_INTERP_CONSTANT &&
+ interp != LP_INTERP_FACING) {
+ LLVMValueRef axaos, ayaos;
+ axaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->x),
+ dadxaos, "");
+ ayaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->y),
+ dadyaos, "");
+ a0aos = LLVMBuildFAdd(builder, a0aos, ayaos, "");
+ a0aos = LLVMBuildFAdd(builder, a0aos, axaos, "");
+ }
+
+ /*
+ * dadq = {0, dadx, dady, dadx + dady}
+ * for two quads (side by side) this is:
+ * {0, dadx, dady, dadx+dady, 2*dadx, 2*dadx+dady, 3*dadx+dady}
+ */
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+ /* this generates a CRAPLOAD of shuffles... */
+ if (mask & (1 << chan)) {
+ LLVMValueRef dadx, dady;
+ LLVMValueRef dadq, dadq2;
+ LLVMValueRef a;
+ LLVMValueRef chan_index = lp_build_const_int32(gallivm, chan);
+
+ if (attrib == 0 && chan == 0) {
+ a = bld->x;
+ if (bld->pos_offset) {
+ a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), "");
+ }
+ a = lp_build_broadcast_scalar(coeff_bld, a);
+ dadx = coeff_bld->one;
+ dady = coeff_bld->zero;
+ }
+ else if (attrib == 0 && chan == 1) {
+ a = bld->y;
+ if (bld->pos_offset) {
+ a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), "");
+ }
+ a = lp_build_broadcast_scalar(coeff_bld, a);
+ dady = coeff_bld->one;
+ dadx = coeff_bld->zero;
+ }
+ else {
+ dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
+ coeff_bld->type, dadxaos, chan_index);
+ dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
+ coeff_bld->type, dadyaos, chan_index);
+
+ /*
+ * a = {a, a, a, a}
+ */
+ a = lp_build_extract_broadcast(gallivm, setup_bld->type,
+ coeff_bld->type, a0aos, chan_index);
+ }
+
+ dadx = LLVMBuildFMul(builder, dadx, pixoffx, "");
+ dady = LLVMBuildFMul(builder, dady, pixoffy, "");
+ dadq = LLVMBuildFAdd(builder, dadx, dady, "");
+
+ /*
+ * Compute the attrib values on the upper-left corner of each
+ * group of quads.
+ * Note that if we process 2 quads at once this doesn't
+ * really exactly to what we want.
+ * We need to access elem 0 and 2 respectively later if we process
+ * 2 quads at once.
+ */
+
+ if (interp != LP_INTERP_CONSTANT &&
+ interp != LP_INTERP_FACING) {
+ dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
+ a = LLVMBuildFAdd(builder, a, dadq2, "");
+ }
+
+#if PERSPECTIVE_DIVIDE_PER_QUAD
+ /*
+ * a *= 1 / w
+ */
+
+ /*
+ * XXX since we're only going to access elements 0,2 out of 8
+ * if we have 8-wide vectors we should do the division only 4-wide.
+ * a is really a 2-elements in a 4-wide vector disguised as 8-wide
+ * in this case.
+ */
+ if (interp == LP_INTERP_PERSPECTIVE) {
+ LLVMValueRef w = bld->a[0][3];
+ assert(attrib != 0);
+ assert(bld->mask[0] & TGSI_WRITEMASK_W);
+ if (!bld->oow) {
+ bld->oow = lp_build_rcp(coeff_bld, w);
+ lp_build_name(bld->oow, "oow");
+ }
+ a = lp_build_mul(coeff_bld, a, bld->oow);
+ }
+#endif
+
+ attrib_name(a, attrib, chan, ".a");
+ attrib_name(dadq, attrib, chan, ".dadq");
+
+ bld->a[attrib][chan] = lp_build_alloca(gallivm,
+ LLVMTypeOf(a), "");
+ LLVMBuildStore(builder, a, bld->a[attrib][chan]);
+ bld->dadq[attrib][chan] = dadq;
+ }
+ }
+ }
+}
+
+
+/**
+ * Increment the shader input attribute values.
+ * This is called when we move from one quad to the next.
+ */
+static void
+attribs_update(struct lp_build_interp_soa_context *bld,
+ struct gallivm_state *gallivm,
+ LLVMValueRef loop_iter,
+ int start,
+ int end)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_build_context *coeff_bld = &bld->coeff_bld;
+ LLVMValueRef oow = NULL;
+ unsigned attrib;
+ unsigned chan;
+
+ for(attrib = start; attrib < end; ++attrib) {
+ const unsigned mask = bld->mask[attrib];
+ const unsigned interp = bld->interp[attrib];
+ for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+ if(mask & (1 << chan)) {
+ LLVMValueRef a;
+ if (interp == LP_INTERP_CONSTANT ||
+ interp == LP_INTERP_FACING) {
+ a = LLVMBuildLoad(builder, bld->a[attrib][chan], "");
+ }
+ else if (interp == LP_INTERP_POSITION) {
+ assert(attrib > 0);
+ a = bld->attribs[0][chan];
+ }
+ else {
+ LLVMValueRef dadq;
+
+ a = bld->a[attrib][chan];
+
+ /*
+ * Broadcast the attribute value for this quad into all elements
+ */
+
+ {
+ /* stored as vector load as float */
+ LLVMTypeRef ptr_type = LLVMPointerType(LLVMFloatTypeInContext(
+ gallivm->context), 0);
+ LLVMValueRef ptr;
+ a = LLVMBuildBitCast(builder, a, ptr_type, "");
+ ptr = LLVMBuildGEP(builder, a, &loop_iter, 1, "");
+ a = LLVMBuildLoad(builder, ptr, "");
+ a = lp_build_broadcast_scalar(&bld->coeff_bld, a);
+ }
+
+ /*
+ * Get the derivatives.
+ */
+
+ dadq = bld->dadq[attrib][chan];
+
+#if PERSPECTIVE_DIVIDE_PER_QUAD
+ if (interp == LP_INTERP_PERSPECTIVE) {
+ LLVMValueRef dwdq = bld->dadq[0][3];
+
+ if (oow == NULL) {
+ assert(bld->oow);
+ oow = LLVMBuildShuffleVector(coeff_bld->builder,
+ bld->oow, coeff_bld->undef,
+ shuffle, "");
+ }
+
+ dadq = lp_build_sub(coeff_bld,
+ dadq,
+ lp_build_mul(coeff_bld, a, dwdq));
+ dadq = lp_build_mul(coeff_bld, dadq, oow);
+ }
+#endif
+
+ /*
+ * Add the derivatives
+ */
+
+ a = lp_build_add(coeff_bld, a, dadq);
+
+#if !PERSPECTIVE_DIVIDE_PER_QUAD
+ if (interp == LP_INTERP_PERSPECTIVE) {
+ if (oow == NULL) {
+ LLVMValueRef w = bld->attribs[0][3];
+ assert(attrib != 0);
+ assert(bld->mask[0] & TGSI_WRITEMASK_W);
+ oow = lp_build_rcp(coeff_bld, w);
+ }
+ a = lp_build_mul(coeff_bld, a, oow);
+ }
+#endif
+
+ if (attrib == 0 && chan == 2) {
+ /* FIXME: Depth values can exceed 1.0, due to the fact that
+ * setup interpolation coefficients refer to (0,0) which causes
+ * precision loss. So we must clamp to 1.0 here to avoid artifacts
+ */
+ a = lp_build_min(coeff_bld, a, coeff_bld->one);
+ }
+
+ attrib_name(a, attrib, chan, "");
+ }
+ bld->attribs[attrib][chan] = a;
+ }
+ }
+ }
+}
+
+
+/**
+ * Generate the position vectors.
+ *
+ * Parameter x0, y0 are the integer values with upper left coordinates.
+ */
+static void
+pos_init(struct lp_build_interp_soa_context *bld,
+ LLVMValueRef x0,
+ LLVMValueRef y0)
+{
+ LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;
+ struct lp_build_context *coeff_bld = &bld->coeff_bld;
+
+ bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");
+ bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");
+}
+
+
+/**
+ * Initialize fragment shader input attribute info.
+ */
+void
+lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
+ struct gallivm_state *gallivm,
+ unsigned num_inputs,
+ const struct lp_shader_input *inputs,
+ boolean pixel_center_integer,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef a0_ptr,
+ LLVMValueRef dadx_ptr,
+ LLVMValueRef dady_ptr,
+ LLVMValueRef x0,
+ LLVMValueRef y0)
+{
+ struct lp_type coeff_type;
+ struct lp_type setup_type;
+ unsigned attrib;
+ unsigned chan;
+
+ memset(bld, 0, sizeof *bld);
+
+ memset(&coeff_type, 0, sizeof coeff_type);
+ coeff_type.floating = TRUE;
+ coeff_type.sign = TRUE;
+ coeff_type.width = 32;
+ coeff_type.length = type.length;
+
+ memset(&setup_type, 0, sizeof setup_type);
+ setup_type.floating = TRUE;
+ setup_type.sign = TRUE;
+ setup_type.width = 32;
+ setup_type.length = TGSI_NUM_CHANNELS;
+
+
+ /* XXX: we don't support interpolating into any other types */
+ assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
+
+ lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
+ lp_build_context_init(&bld->setup_bld, gallivm, setup_type);
+
+ /* For convenience */
+ bld->pos = bld->attribs[0];
+ bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1];
+
+ /* Position */
+ bld->mask[0] = TGSI_WRITEMASK_XYZW;
+ bld->interp[0] = LP_INTERP_LINEAR;
+
+ /* Inputs */
+ for (attrib = 0; attrib < num_inputs; ++attrib) {
+ bld->mask[1 + attrib] = inputs[attrib].usage_mask;
+ bld->interp[1 + attrib] = inputs[attrib].interp;
+ }
+ bld->num_attribs = 1 + num_inputs;
+
+ /* Ensure all masked out input channels have a valid value */
+ for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+ bld->attribs[attrib][chan] = bld->coeff_bld.undef;
+ }
+ }
+
+ if (pixel_center_integer) {
+ bld->pos_offset = 0.0;
+ } else {
+ bld->pos_offset = 0.5;
+ }
+
+ pos_init(bld, x0, y0);
+
+ /*
+ * Simple method (single step interpolation) may be slower if vector length
+ * is just 4, but the results are different (generally less accurate) with
+ * the other method, so always use more accurate version.
+ */
+ if (1) {
+ bld->simple_interp = TRUE;
+ {
+ /* XXX this should use a global static table */
+ unsigned i;
+ unsigned num_loops = 16 / type.length;
+ LLVMValueRef pixoffx, pixoffy, index;
+ LLVMValueRef ptr;
+
+ bld->xoffset_store = lp_build_array_alloca(gallivm,
+ lp_build_vec_type(gallivm, type),
+ lp_build_const_int32(gallivm, num_loops),
+ "");
+ bld->yoffset_store = lp_build_array_alloca(gallivm,
+ lp_build_vec_type(gallivm, type),
+ lp_build_const_int32(gallivm, num_loops),
+ "");
+ for (i = 0; i < num_loops; i++) {
+ index = lp_build_const_int32(gallivm, i);
+ calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy);
+ ptr = LLVMBuildGEP(builder, bld->xoffset_store, &index, 1, "");
+ LLVMBuildStore(builder, pixoffx, ptr);
+ ptr = LLVMBuildGEP(builder, bld->yoffset_store, &index, 1, "");
+ LLVMBuildStore(builder, pixoffy, ptr);
+ }
+ }
+ coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);
+ }
+ else {
+ bld->simple_interp = FALSE;
+ coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
+ }
+
+}
+
+
+/*
+ * Advance the position and inputs to the given quad within the block.
+ */
+
+void
+lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
+ struct gallivm_state *gallivm,
+ LLVMValueRef quad_start_index)
+{
+ if (bld->simple_interp) {
+ attribs_update_simple(bld, gallivm, quad_start_index, 1, bld->num_attribs);
+ }
+ else {
+ attribs_update(bld, gallivm, quad_start_index, 1, bld->num_attribs);
+ }
+}
+
+void
+lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
+ struct gallivm_state *gallivm,
+ LLVMValueRef quad_start_index)
+{
+ if (bld->simple_interp) {
+ attribs_update_simple(bld, gallivm, quad_start_index, 0, 1);
+ }
+ else {
+ attribs_update(bld, gallivm, quad_start_index, 0, 1);
+ }
+}
+
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_interp.h
new file mode 100644
index 000000000..9029d2a41
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -0,0 +1,137 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Position and shader input interpolation.
+ *
+ * Special attention is given to the interpolation of side by side quads.
+ * Multiplications are made only for the first quad. Interpolation of
+ * inputs for posterior quads are done exclusively with additions, and
+ * perspective divide if necessary.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#ifndef LP_BLD_INTERP_H
+#define LP_BLD_INTERP_H
+
+
+#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_type.h"
+
+#include "tgsi/tgsi_exec.h"
+
+/**
+ * Describes how to compute the interpolation coefficients (a0, dadx, dady)
+ * from the vertices passed into our triangle/line/point functions by the
+ * draw module.
+ *
+ * Vertices are treated as an array of float[4] values, indexed by
+ * src_index.
+ *
+ * LP_INTERP_COLOR is translated to either LP_INTERP_CONSTANT or
+ * PERSPECTIVE depending on flatshade state.
+ */
+enum lp_interp {
+ LP_INTERP_CONSTANT,
+ LP_INTERP_COLOR,
+ LP_INTERP_LINEAR,
+ LP_INTERP_PERSPECTIVE,
+ LP_INTERP_POSITION,
+ LP_INTERP_FACING
+};
+
+struct lp_shader_input {
+ uint interp:4; /* enum lp_interp */
+ uint usage_mask:4; /* bitmask of TGSI_WRITEMASK_x flags */
+ uint src_index:8; /* where to find values in incoming vertices */
+ uint cyl_wrap:4; /* TGSI_CYLINDRICAL_WRAP_x flags */
+ uint padding:12;
+};
+
+
+struct lp_build_interp_soa_context
+{
+ /* TGSI_QUAD_SIZE x float */
+ struct lp_build_context coeff_bld;
+ struct lp_build_context setup_bld;
+
+ unsigned num_attribs;
+ unsigned mask[1 + PIPE_MAX_SHADER_INPUTS]; /**< TGSI_WRITE_MASK_x */
+ enum lp_interp interp[1 + PIPE_MAX_SHADER_INPUTS];
+ boolean simple_interp;
+
+ double pos_offset;
+
+ LLVMValueRef x;
+ LLVMValueRef y;
+
+ LLVMValueRef a[1 + PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
+ LLVMValueRef dadq[1 + PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
+ LLVMValueRef a0aos[1 + PIPE_MAX_SHADER_INPUTS];
+ LLVMValueRef dadxaos[1 + PIPE_MAX_SHADER_INPUTS];
+ LLVMValueRef dadyaos[1 + PIPE_MAX_SHADER_INPUTS];
+
+ LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
+
+ LLVMValueRef xoffset_store;
+ LLVMValueRef yoffset_store;
+
+ /*
+ * Convenience pointers. Callers may access this one.
+ */
+ const LLVMValueRef *pos;
+ const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS];
+};
+
+
+void
+lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
+ struct gallivm_state *gallivm,
+ unsigned num_inputs,
+ const struct lp_shader_input *inputs,
+ boolean pixel_center_integer,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef a0_ptr,
+ LLVMValueRef dadx_ptr,
+ LLVMValueRef dady_ptr,
+ LLVMValueRef x,
+ LLVMValueRef y);
+
+void
+lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
+ struct gallivm_state *gallivm,
+ LLVMValueRef quad_start_index);
+
+void
+lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
+ struct gallivm_state *gallivm,
+ LLVMValueRef quad_start_index);
+
+#endif /* LP_BLD_INTERP_H */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_clear.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_clear.c
new file mode 100644
index 000000000..064206fc2
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_clear.c
@@ -0,0 +1,63 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ * Copyright 2009 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Brian Paul
+ * Michel Dänzer
+ */
+
+
+#include "pipe/p_defines.h"
+#include "lp_clear.h"
+#include "lp_context.h"
+#include "lp_setup.h"
+#include "lp_query.h"
+#include "lp_debug.h"
+
+
+/**
+ * Clear the given buffers to the specified values.
+ * No masking, no scissor (clear entire buffer).
+ */
+void
+llvmpipe_clear(struct pipe_context *pipe,
+ unsigned buffers,
+ const union pipe_color_union *color,
+ double depth,
+ unsigned stencil)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ if (!llvmpipe_check_render_cond(llvmpipe))
+ return;
+
+ if (LP_PERF & PERF_NO_DEPTH)
+ buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
+
+ lp_setup_clear( llvmpipe->setup, color, depth, stencil, buffers );
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_clear.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_clear.h
new file mode 100644
index 000000000..7249929cb
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_clear.h
@@ -0,0 +1,44 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Brian Paul
+ */
+
+#ifndef LP_CLEAR_H
+#define LP_CLEAR_H
+
+#include "pipe/p_state.h"
+struct pipe_context;
+
+extern void
+llvmpipe_clear(struct pipe_context *pipe, unsigned buffers,
+ const union pipe_color_union *color,
+ double depth, unsigned stencil);
+
+
+#endif /* LP_CLEAR_H */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c
new file mode 100644
index 000000000..80cb6578b
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c
@@ -0,0 +1,226 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ * Copyright 2008 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Keith Whitwell <keithw@vmware.com>
+ */
+
+#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/simple_list.h"
+#include "lp_clear.h"
+#include "lp_context.h"
+#include "lp_flush.h"
+#include "lp_perf.h"
+#include "lp_state.h"
+#include "lp_surface.h"
+#include "lp_query.h"
+#include "lp_setup.h"
+
+/* This is only safe if there's just one concurrent context */
+#ifdef PIPE_SUBSYSTEM_EMBEDDED
+#define USE_GLOBAL_LLVM_CONTEXT
+#endif
+
+static void llvmpipe_destroy( struct pipe_context *pipe )
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
+ uint i, j;
+
+ lp_print_counters();
+
+ if (llvmpipe->blitter) {
+ util_blitter_destroy(llvmpipe->blitter);
+ }
+
+ /* This will also destroy llvmpipe->setup:
+ */
+ if (llvmpipe->draw)
+ draw_destroy( llvmpipe->draw );
+
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ pipe_surface_reference(&llvmpipe->framebuffer.cbufs[i], NULL);
+ }
+
+ pipe_surface_reference(&llvmpipe->framebuffer.zsbuf, NULL);
+
+ for (i = 0; i < Elements(llvmpipe->sampler_views[0]); i++) {
+ pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_FRAGMENT][i], NULL);
+ }
+
+ for (i = 0; i < Elements(llvmpipe->sampler_views[0]); i++) {
+ pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_VERTEX][i], NULL);
+ }
+
+ for (i = 0; i < Elements(llvmpipe->sampler_views[0]); i++) {
+ pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_GEOMETRY][i], NULL);
+ }
+
+ for (i = 0; i < Elements(llvmpipe->constants); i++) {
+ for (j = 0; j < Elements(llvmpipe->constants[i]); j++) {
+ pipe_resource_reference(&llvmpipe->constants[i][j].buffer, NULL);
+ }
+ }
+
+ for (i = 0; i < llvmpipe->num_vertex_buffers; i++) {
+ pipe_resource_reference(&llvmpipe->vertex_buffer[i].buffer, NULL);
+ }
+
+ lp_delete_setup_variants(llvmpipe);
+
+#ifndef USE_GLOBAL_LLVM_CONTEXT
+ LLVMContextDispose(llvmpipe->context);
+#endif
+ llvmpipe->context = NULL;
+
+ align_free( llvmpipe );
+}
+
+static void
+do_flush( struct pipe_context *pipe,
+ struct pipe_fence_handle **fence,
+ unsigned flags)
+{
+ llvmpipe_flush(pipe, fence, __FUNCTION__);
+}
+
+
+static void
+llvmpipe_render_condition ( struct pipe_context *pipe,
+ struct pipe_query *query,
+ boolean condition,
+ uint mode )
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
+
+ llvmpipe->render_cond_query = query;
+ llvmpipe->render_cond_mode = mode;
+ llvmpipe->render_cond_cond = condition;
+}
+
+struct pipe_context *
+llvmpipe_create_context( struct pipe_screen *screen, void *priv )
+{
+ struct llvmpipe_context *llvmpipe;
+
+ llvmpipe = align_malloc(sizeof(struct llvmpipe_context), 16);
+ if (!llvmpipe)
+ return NULL;
+
+ util_init_math();
+
+ memset(llvmpipe, 0, sizeof *llvmpipe);
+
+ make_empty_list(&llvmpipe->fs_variants_list);
+
+ make_empty_list(&llvmpipe->setup_variants_list);
+
+
+ llvmpipe->pipe.screen = screen;
+ llvmpipe->pipe.priv = priv;
+
+ /* Init the pipe context methods */
+ llvmpipe->pipe.destroy = llvmpipe_destroy;
+ llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state;
+ llvmpipe->pipe.clear = llvmpipe_clear;
+ llvmpipe->pipe.flush = do_flush;
+
+ llvmpipe->pipe.render_condition = llvmpipe_render_condition;
+
+ llvmpipe_init_blend_funcs(llvmpipe);
+ llvmpipe_init_clip_funcs(llvmpipe);
+ llvmpipe_init_draw_funcs(llvmpipe);
+ llvmpipe_init_sampler_funcs(llvmpipe);
+ llvmpipe_init_query_funcs( llvmpipe );
+ llvmpipe_init_vertex_funcs(llvmpipe);
+ llvmpipe_init_so_funcs(llvmpipe);
+ llvmpipe_init_fs_funcs(llvmpipe);
+ llvmpipe_init_vs_funcs(llvmpipe);
+ llvmpipe_init_gs_funcs(llvmpipe);
+ llvmpipe_init_rasterizer_funcs(llvmpipe);
+ llvmpipe_init_context_resource_funcs( &llvmpipe->pipe );
+ llvmpipe_init_surface_functions(llvmpipe);
+
+#ifdef USE_GLOBAL_LLVM_CONTEXT
+ llvmpipe->context = LLVMGetGlobalContext();
+#else
+ llvmpipe->context = LLVMContextCreate();
+#endif
+
+ if (!llvmpipe->context)
+ goto fail;
+
+ /*
+ * Create drawing context and plug our rendering stage into it.
+ */
+ llvmpipe->draw = draw_create_with_llvm_context(&llvmpipe->pipe,
+ llvmpipe->context);
+ if (!llvmpipe->draw)
+ goto fail;
+
+ /* FIXME: devise alternative to draw_texture_samplers */
+
+ llvmpipe->setup = lp_setup_create( &llvmpipe->pipe,
+ llvmpipe->draw );
+ if (!llvmpipe->setup)
+ goto fail;
+
+ llvmpipe->blitter = util_blitter_create(&llvmpipe->pipe);
+ if (!llvmpipe->blitter) {
+ goto fail;
+ }
+
+ /* must be done before installing Draw stages */
+ util_blitter_cache_all_shaders(llvmpipe->blitter);
+
+ /* plug in AA line/point stages */
+ draw_install_aaline_stage(llvmpipe->draw, &llvmpipe->pipe);
+ draw_install_aapoint_stage(llvmpipe->draw, &llvmpipe->pipe);
+ draw_install_pstipple_stage(llvmpipe->draw, &llvmpipe->pipe);
+
+ /* convert points and lines into triangles:
+ * (otherwise, draw points and lines natively)
+ */
+ draw_wide_point_sprites(llvmpipe->draw, FALSE);
+ draw_enable_point_sprites(llvmpipe->draw, FALSE);
+ draw_wide_point_threshold(llvmpipe->draw, 10000.0);
+ draw_wide_line_threshold(llvmpipe->draw, 10000.0);
+
+ lp_reset_counters();
+
+ return &llvmpipe->pipe;
+
+ fail:
+ llvmpipe_destroy(&llvmpipe->pipe);
+ return NULL;
+}
+
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h
new file mode 100644
index 000000000..c273b25f0
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h
@@ -0,0 +1,179 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keithw@vmware.com>
+ */
+
+#ifndef LP_CONTEXT_H
+#define LP_CONTEXT_H
+
+#include "pipe/p_context.h"
+
+#include "draw/draw_vertex.h"
+#include "util/u_blitter.h"
+
+#include "lp_tex_sample.h"
+#include "lp_jit.h"
+#include "lp_setup.h"
+#include "lp_state_fs.h"
+#include "lp_state_setup.h"
+
+
+struct llvmpipe_vbuf_render;
+struct draw_context;
+struct draw_stage;
+struct draw_vertex_shader;
+struct lp_fragment_shader;
+struct lp_blend_state;
+struct lp_setup_context;
+struct lp_setup_variant;
+struct lp_velems_state;
+
+struct llvmpipe_context {
+ struct pipe_context pipe; /**< base class */
+
+ /** Constant state objects */
+ const struct pipe_blend_state *blend;
+ struct pipe_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+
+ const struct pipe_depth_stencil_alpha_state *depth_stencil;
+ const struct pipe_rasterizer_state *rasterizer;
+ struct lp_fragment_shader *fs;
+ struct draw_vertex_shader *vs;
+ const struct lp_geometry_shader *gs;
+ const struct lp_velems_state *velems;
+ const struct lp_so_state *so;
+
+ /** Other rendering state */
+ unsigned sample_mask;
+ struct pipe_blend_color blend_color;
+ struct pipe_stencil_ref stencil_ref;
+ struct pipe_clip_state clip;
+ struct pipe_constant_buffer constants[PIPE_SHADER_TYPES][LP_MAX_TGSI_CONST_BUFFERS];
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_poly_stipple poly_stipple;
+ struct pipe_scissor_state scissors[PIPE_MAX_VIEWPORTS];
+ struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
+
+ struct pipe_viewport_state viewports[PIPE_MAX_VIEWPORTS];
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ struct pipe_index_buffer index_buffer;
+ struct pipe_resource *mapped_vs_tex[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+ struct pipe_resource *mapped_gs_tex[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+
+ unsigned num_samplers[PIPE_SHADER_TYPES];
+ unsigned num_sampler_views[PIPE_SHADER_TYPES];
+
+ unsigned num_vertex_buffers;
+
+ struct draw_so_target *so_targets[PIPE_MAX_SO_BUFFERS];
+ int num_so_targets;
+ struct pipe_query_data_so_statistics so_stats;
+
+ struct pipe_query_data_pipeline_statistics pipeline_statistics;
+ unsigned active_statistics_queries;
+
+ unsigned active_occlusion_queries;
+
+ unsigned dirty; /**< Mask of LP_NEW_x flags */
+
+ /** Mapped vertex buffers */
+ ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS];
+
+ /** Vertex format */
+ struct vertex_info vertex_info;
+
+ /** Which vertex shader output slot contains color */
+ int color_slot[2];
+
+ /** Which vertex shader output slot contains bcolor */
+ int bcolor_slot[2];
+
+ /** Which vertex shader output slot contains point size */
+ int psize_slot;
+
+ /** Which vertex shader output slot contains viewport index */
+ int viewport_index_slot;
+
+ /** Which geometry shader output slot contains layer */
+ int layer_slot;
+
+ /** A fake frontface output for unfilled primitives */
+ int face_slot;
+
+ /** Depth format and bias settings. */
+ boolean floating_point_depth;
+ double mrd; /**< minimum resolvable depth value, for polygon offset */
+
+ /** The tiling engine */
+ struct lp_setup_context *setup;
+ struct lp_setup_variant setup_variant;
+
+ /** The primitive drawing context */
+ struct draw_context *draw;
+
+ struct blitter_context *blitter;
+
+ unsigned tex_timestamp;
+ boolean no_rast;
+
+ /** List of all fragment shader variants */
+ struct lp_fs_variant_list_item fs_variants_list;
+ unsigned nr_fs_variants;
+ unsigned nr_fs_instrs;
+
+ struct lp_setup_variant_list_item setup_variants_list;
+ unsigned nr_setup_variants;
+
+ /** Conditional query object and mode */
+ struct pipe_query *render_cond_query;
+ uint render_cond_mode;
+ boolean render_cond_cond;
+
+ /** The LLVMContext to use for LLVM related work */
+ LLVMContextRef context;
+};
+
+
+struct pipe_context *
+llvmpipe_create_context( struct pipe_screen *screen, void *priv );
+
+struct pipe_resource *
+llvmpipe_user_buffer_create(struct pipe_screen *screen,
+ void *ptr,
+ unsigned bytes,
+ unsigned bind_flags);
+
+
+static inline struct llvmpipe_context *
+llvmpipe_context( struct pipe_context *pipe )
+{
+ return (struct llvmpipe_context *)pipe;
+}
+
+#endif /* LP_CONTEXT_H */
+
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_debug.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_debug.h
new file mode 100644
index 000000000..1038c5fe1
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_debug.h
@@ -0,0 +1,88 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_DEBUG_H
+#define LP_DEBUG_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h"
+
+extern void
+st_print_current(void);
+
+
+#define DEBUG_PIPE 0x1
+#define DEBUG_TGSI 0x2
+#define DEBUG_TEX 0x4
+#define DEBUG_SETUP 0x10
+#define DEBUG_RAST 0x20
+#define DEBUG_QUERY 0x40
+#define DEBUG_SCREEN 0x80
+#define DEBUG_COUNTERS 0x800
+#define DEBUG_SCENE 0x1000
+#define DEBUG_FENCE 0x2000
+#define DEBUG_MEM 0x4000
+#define DEBUG_FS 0x8000
+
+/* Performance flags. These are active even on release builds.
+ */
+#define PERF_TEX_MEM 0x1 /* minimize texture cache footprint */
+#define PERF_NO_MIP_LINEAR 0x2 /* MIP_FILTER_LINEAR ==> _NEAREST */
+#define PERF_NO_MIPMAPS 0x4 /* MIP_FILTER_NONE always */
+#define PERF_NO_LINEAR 0x8 /* FILTER_NEAREST always */
+#define PERF_NO_TEX 0x10 /* sample white always */
+#define PERF_NO_BLEND 0x20 /* disable blending */
+#define PERF_NO_DEPTH 0x40 /* disable depth buffering entirely */
+#define PERF_NO_ALPHATEST 0x80 /* disable alpha testing */
+
+
+extern int LP_PERF;
+
+#ifdef DEBUG
+extern int LP_DEBUG;
+#else
+#define LP_DEBUG 0
+#endif
+
+void st_debug_init( void );
+
+static inline void
+LP_DBG( unsigned flag, const char *fmt, ... )
+{
+ if (LP_DEBUG & flag)
+ {
+ va_list args;
+
+ va_start( args, fmt );
+ debug_vprintf( fmt, args );
+ va_end( args );
+ }
+}
+
+
+#endif /* LP_DEBUG_H */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
new file mode 100644
index 000000000..edfb20409
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -0,0 +1,169 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Brian Paul
+ * Keith Whitwell
+ */
+
+
+#include "pipe/p_defines.h"
+#include "pipe/p_context.h"
+#include "util/u_draw.h"
+#include "util/u_prim.h"
+
+#include "lp_context.h"
+#include "lp_state.h"
+#include "lp_query.h"
+
+#include "draw/draw_context.h"
+
+
+
+/**
+ * Draw vertex arrays, with optional indexing, optional instancing.
+ * All the other drawing functions are implemented in terms of this function.
+ * Basically, map the vertex buffers (and drawing surfaces), then hand off
+ * the drawing to the 'draw' module.
+ */
+static void
+llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+{
+ struct llvmpipe_context *lp = llvmpipe_context(pipe);
+ struct draw_context *draw = lp->draw;
+ const void *mapped_indices = NULL;
+ unsigned i;
+
+ if (!llvmpipe_check_render_cond(lp))
+ return;
+
+ if (info->indirect) {
+ util_draw_indirect(pipe, info);
+ return;
+ }
+
+ if (lp->dirty)
+ llvmpipe_update_derived( lp );
+
+ /*
+ * Map vertex buffers
+ */
+ for (i = 0; i < lp->num_vertex_buffers; i++) {
+ const void *buf = lp->vertex_buffer[i].user_buffer;
+ size_t size = ~0;
+ if (!buf) {
+ if (!lp->vertex_buffer[i].buffer) {
+ continue;
+ }
+ buf = llvmpipe_resource_data(lp->vertex_buffer[i].buffer);
+ size = lp->vertex_buffer[i].buffer->width0;
+ }
+ draw_set_mapped_vertex_buffer(draw, i, buf, size);
+ }
+
+ /* Map index buffer, if present */
+ if (info->indexed) {
+ unsigned available_space = ~0;
+ mapped_indices = lp->index_buffer.user_buffer;
+ if (!mapped_indices) {
+ mapped_indices = llvmpipe_resource_data(lp->index_buffer.buffer);
+ if (lp->index_buffer.buffer->width0 > lp->index_buffer.offset)
+ available_space =
+ (lp->index_buffer.buffer->width0 - lp->index_buffer.offset);
+ else
+ available_space = 0;
+ }
+ draw_set_indexes(draw,
+ (ubyte *) mapped_indices + lp->index_buffer.offset,
+ lp->index_buffer.index_size, available_space);
+ }
+
+ for (i = 0; i < lp->num_so_targets; i++) {
+ void *buf = 0;
+ if (lp->so_targets[i]) {
+ buf = llvmpipe_resource(lp->so_targets[i]->target.buffer)->data;
+ lp->so_targets[i]->mapping = buf;
+ }
+ }
+ draw_set_mapped_so_targets(draw, lp->num_so_targets,
+ lp->so_targets);
+
+ llvmpipe_prepare_vertex_sampling(lp,
+ lp->num_sampler_views[PIPE_SHADER_VERTEX],
+ lp->sampler_views[PIPE_SHADER_VERTEX]);
+ llvmpipe_prepare_geometry_sampling(lp,
+ lp->num_sampler_views[PIPE_SHADER_GEOMETRY],
+ lp->sampler_views[PIPE_SHADER_GEOMETRY]);
+ if (lp->gs && lp->gs->no_tokens) {
+ /* we have an empty geometry shader with stream output, so
+ attach the stream output info to the current vertex shader */
+ if (lp->vs) {
+ draw_vs_attach_so(lp->vs, &lp->gs->stream_output);
+ }
+ }
+ draw_collect_pipeline_statistics(draw,
+ lp->active_statistics_queries > 0);
+
+ /* draw! */
+ draw_vbo(draw, info);
+
+ /*
+ * unmap vertex/index buffers
+ */
+ for (i = 0; i < lp->num_vertex_buffers; i++) {
+ draw_set_mapped_vertex_buffer(draw, i, NULL, 0);
+ }
+ if (mapped_indices) {
+ draw_set_indexes(draw, NULL, 0, 0);
+ }
+ draw_set_mapped_so_targets(draw, 0, NULL);
+
+ if (lp->gs && lp->gs->no_tokens) {
+ /* we have attached stream output to the vs for rendering,
+ now lets reset it */
+ if (lp->vs) {
+ draw_vs_reset_so(lp->vs);
+ }
+ }
+
+ llvmpipe_cleanup_vertex_sampling(lp);
+ llvmpipe_cleanup_geometry_sampling(lp);
+
+ /*
+ * TODO: Flush only when a user vertex/index buffer is present
+ * (or even better, modify draw module to do this
+ * internally when this condition is seen?)
+ */
+ draw_flush(draw);
+}
+
+
+void
+llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe)
+{
+ llvmpipe->pipe.draw_vbo = llvmpipe_draw_vbo;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_fence.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_fence.c
new file mode 100644
index 000000000..a21a3c744
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_fence.c
@@ -0,0 +1,127 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_screen.h"
+#include "util/u_memory.h"
+#include "lp_debug.h"
+#include "lp_fence.h"
+
+
+/**
+ * Create a new fence object.
+ *
+ * The rank will be the number of bins in the scene. Whenever a rendering
+ * thread hits a fence command, it'll increment the fence counter. When
+ * the counter == the rank, the fence is finished.
+ *
+ * \param rank the expected finished value of the fence counter.
+ */
+struct lp_fence *
+lp_fence_create(unsigned rank)
+{
+ static int fence_id;
+ struct lp_fence *fence = CALLOC_STRUCT(lp_fence);
+
+ if (!fence)
+ return NULL;
+
+ pipe_reference_init(&fence->reference, 1);
+
+ pipe_mutex_init(fence->mutex);
+ pipe_condvar_init(fence->signalled);
+
+ fence->id = fence_id++;
+ fence->rank = rank;
+
+ if (LP_DEBUG & DEBUG_FENCE)
+ debug_printf("%s %d\n", __FUNCTION__, fence->id);
+
+ return fence;
+}
+
+
+/** Destroy a fence. Called when refcount hits zero. */
+void
+lp_fence_destroy(struct lp_fence *fence)
+{
+ if (LP_DEBUG & DEBUG_FENCE)
+ debug_printf("%s %d\n", __FUNCTION__, fence->id);
+
+ pipe_mutex_destroy(fence->mutex);
+ pipe_condvar_destroy(fence->signalled);
+ FREE(fence);
+}
+
+
+/**
+ * Called by the rendering threads to increment the fence counter.
+ * When the counter == the rank, the fence is finished.
+ */
+void
+lp_fence_signal(struct lp_fence *fence)
+{
+ if (LP_DEBUG & DEBUG_FENCE)
+ debug_printf("%s %d\n", __FUNCTION__, fence->id);
+
+ pipe_mutex_lock(fence->mutex);
+
+ fence->count++;
+ assert(fence->count <= fence->rank);
+
+ if (LP_DEBUG & DEBUG_FENCE)
+ debug_printf("%s count=%u rank=%u\n", __FUNCTION__,
+ fence->count, fence->rank);
+
+ /* Wakeup all threads waiting on the mutex:
+ */
+ pipe_condvar_broadcast(fence->signalled);
+
+ pipe_mutex_unlock(fence->mutex);
+}
+
+boolean
+lp_fence_signalled(struct lp_fence *f)
+{
+ return f->count == f->rank;
+}
+
+void
+lp_fence_wait(struct lp_fence *f)
+{
+ if (LP_DEBUG & DEBUG_FENCE)
+ debug_printf("%s %d\n", __FUNCTION__, f->id);
+
+ pipe_mutex_lock(f->mutex);
+ assert(f->issued);
+ while (f->count < f->rank) {
+ pipe_condvar_wait(f->signalled, f->mutex);
+ }
+ pipe_mutex_unlock(f->mutex);
+}
+
+
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_fence.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_fence.h
new file mode 100644
index 000000000..d7f0c153e
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_fence.h
@@ -0,0 +1,95 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_FENCE_H
+#define LP_FENCE_H
+
+
+#include "os/os_thread.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+
+
+struct pipe_screen;
+
+
+struct lp_fence
+{
+ struct pipe_reference reference;
+ unsigned id;
+
+ pipe_mutex mutex;
+ pipe_condvar signalled;
+
+ boolean issued;
+ unsigned rank;
+ unsigned count;
+};
+
+
+struct lp_fence *
+lp_fence_create(unsigned rank);
+
+
+void
+lp_fence_signal(struct lp_fence *fence);
+
+boolean
+lp_fence_signalled(struct lp_fence *fence);
+
+void
+lp_fence_wait(struct lp_fence *fence);
+
+void
+llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen);
+
+
+void
+lp_fence_destroy(struct lp_fence *fence);
+
+static inline void
+lp_fence_reference(struct lp_fence **ptr,
+ struct lp_fence *f)
+{
+ struct lp_fence *old = *ptr;
+
+ if (pipe_reference(&old->reference, &f->reference)) {
+ lp_fence_destroy(old);
+ }
+
+ *ptr = f;
+}
+
+static inline boolean
+lp_fence_issued(const struct lp_fence *fence)
+{
+ return fence->issued;
+}
+
+
+#endif /* LP_FENCE_H */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_flush.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_flush.c
new file mode 100644
index 000000000..268aab26c
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_flush.c
@@ -0,0 +1,131 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Keith Whitwell <keithw@vmware.com>
+ */
+
+
+#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
+#include "util/u_string.h"
+#include "draw/draw_context.h"
+#include "lp_flush.h"
+#include "lp_context.h"
+#include "lp_setup.h"
+
+
+/**
+ * \param fence if non-null, returns pointer to a fence which can be waited on
+ */
+void
+llvmpipe_flush( struct pipe_context *pipe,
+ struct pipe_fence_handle **fence,
+ const char *reason)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ draw_flush(llvmpipe->draw);
+
+ /* ask the setup module to flush */
+ lp_setup_flush(llvmpipe->setup, fence, reason);
+
+ /* Enable to dump BMPs of the color/depth buffers each frame */
+ if (0) {
+ static unsigned frame_no = 1;
+ char filename[256];
+ unsigned i;
+
+ for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) {
+ util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no);
+ debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[i]);
+ }
+
+ if (0) {
+ util_snprintf(filename, sizeof(filename), "zsbuf_%u", frame_no);
+ debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.zsbuf);
+ }
+
+ ++frame_no;
+ }
+}
+
+void
+llvmpipe_finish( struct pipe_context *pipe,
+ const char *reason )
+{
+ struct pipe_fence_handle *fence = NULL;
+ llvmpipe_flush(pipe, &fence, reason);
+ if (fence) {
+ pipe->screen->fence_finish(pipe->screen, fence, PIPE_TIMEOUT_INFINITE);
+ pipe->screen->fence_reference(pipe->screen, &fence, NULL);
+ }
+}
+
+/**
+ * Flush context if necessary.
+ *
+ * Returns FALSE if it would have block, but do_not_block was set, TRUE
+ * otherwise.
+ *
+ * TODO: move this logic to an auxiliary library?
+ */
+boolean
+llvmpipe_flush_resource(struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned level,
+ boolean read_only,
+ boolean cpu_access,
+ boolean do_not_block,
+ const char *reason)
+{
+ unsigned referenced;
+
+ referenced = llvmpipe_is_resource_referenced(pipe, resource, level);
+
+ if ((referenced & LP_REFERENCED_FOR_WRITE) ||
+ ((referenced & LP_REFERENCED_FOR_READ) && !read_only)) {
+
+ if (cpu_access) {
+ /*
+ * Flush and wait.
+ */
+ if (do_not_block)
+ return FALSE;
+
+ llvmpipe_finish(pipe, reason);
+ } else {
+ /*
+ * Just flush.
+ */
+
+ llvmpipe_flush(pipe, NULL, reason);
+ }
+ }
+
+ return TRUE;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_flush.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_flush.h
new file mode 100644
index 000000000..68f513028
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_flush.h
@@ -0,0 +1,55 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_FLUSH_H
+#define LP_FLUSH_H
+
+#include "pipe/p_compiler.h"
+
+struct pipe_context;
+struct pipe_fence_handle;
+struct pipe_resource;
+
+void
+llvmpipe_flush(struct pipe_context *pipe,
+ struct pipe_fence_handle **fence,
+ const char *reason);
+
+void
+llvmpipe_finish( struct pipe_context *pipe,
+ const char *reason );
+
+boolean
+llvmpipe_flush_resource(struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned level,
+ boolean read_only,
+ boolean cpu_access,
+ boolean do_not_block,
+ const char *reason);
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c
new file mode 100644
index 000000000..9acde4f1b
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -0,0 +1,246 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * C - JIT interfaces
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_memory.h"
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_debug.h"
+#include "lp_context.h"
+#include "lp_jit.h"
+
+
+static void
+lp_jit_create_types(struct lp_fragment_shader_variant *lp)
+{
+ struct gallivm_state *gallivm = lp->gallivm;
+ LLVMContextRef lc = gallivm->context;
+ LLVMTypeRef viewport_type, texture_type, sampler_type;
+
+ /* struct lp_jit_viewport */
+ {
+ LLVMTypeRef elem_types[LP_JIT_VIEWPORT_NUM_FIELDS];
+
+ elem_types[LP_JIT_VIEWPORT_MIN_DEPTH] =
+ elem_types[LP_JIT_VIEWPORT_MAX_DEPTH] = LLVMFloatTypeInContext(lc);
+
+ viewport_type = LLVMStructTypeInContext(lc, elem_types,
+ Elements(elem_types), 0);
+
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_viewport, min_depth,
+ gallivm->target, viewport_type,
+ LP_JIT_VIEWPORT_MIN_DEPTH);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_viewport, max_depth,
+ gallivm->target, viewport_type,
+ LP_JIT_VIEWPORT_MAX_DEPTH);
+ LP_CHECK_STRUCT_SIZE(struct lp_jit_viewport,
+ gallivm->target, viewport_type);
+ }
+
+ /* struct lp_jit_texture */
+ {
+ LLVMTypeRef elem_types[LP_JIT_TEXTURE_NUM_FIELDS];
+
+ elem_types[LP_JIT_TEXTURE_WIDTH] =
+ elem_types[LP_JIT_TEXTURE_HEIGHT] =
+ elem_types[LP_JIT_TEXTURE_DEPTH] =
+ elem_types[LP_JIT_TEXTURE_FIRST_LEVEL] =
+ elem_types[LP_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32TypeInContext(lc);
+ elem_types[LP_JIT_TEXTURE_BASE] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
+ elem_types[LP_JIT_TEXTURE_ROW_STRIDE] =
+ elem_types[LP_JIT_TEXTURE_IMG_STRIDE] =
+ elem_types[LP_JIT_TEXTURE_MIP_OFFSETS] =
+ LLVMArrayType(LLVMInt32TypeInContext(lc), LP_MAX_TEXTURE_LEVELS);
+
+ texture_type = LLVMStructTypeInContext(lc, elem_types,
+ Elements(elem_types), 0);
+
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width,
+ gallivm->target, texture_type,
+ LP_JIT_TEXTURE_WIDTH);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height,
+ gallivm->target, texture_type,
+ LP_JIT_TEXTURE_HEIGHT);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, depth,
+ gallivm->target, texture_type,
+ LP_JIT_TEXTURE_DEPTH);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, first_level,
+ gallivm->target, texture_type,
+ LP_JIT_TEXTURE_FIRST_LEVEL);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, last_level,
+ gallivm->target, texture_type,
+ LP_JIT_TEXTURE_LAST_LEVEL);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, base,
+ gallivm->target, texture_type,
+ LP_JIT_TEXTURE_BASE);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, row_stride,
+ gallivm->target, texture_type,
+ LP_JIT_TEXTURE_ROW_STRIDE);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, img_stride,
+ gallivm->target, texture_type,
+ LP_JIT_TEXTURE_IMG_STRIDE);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, mip_offsets,
+ gallivm->target, texture_type,
+ LP_JIT_TEXTURE_MIP_OFFSETS);
+ LP_CHECK_STRUCT_SIZE(struct lp_jit_texture,
+ gallivm->target, texture_type);
+ }
+
+ /* struct lp_jit_sampler */
+ {
+ LLVMTypeRef elem_types[LP_JIT_SAMPLER_NUM_FIELDS];
+ elem_types[LP_JIT_SAMPLER_MIN_LOD] =
+ elem_types[LP_JIT_SAMPLER_MAX_LOD] =
+ elem_types[LP_JIT_SAMPLER_LOD_BIAS] = LLVMFloatTypeInContext(lc);
+ elem_types[LP_JIT_SAMPLER_BORDER_COLOR] =
+ LLVMArrayType(LLVMFloatTypeInContext(lc), 4);
+
+ sampler_type = LLVMStructTypeInContext(lc, elem_types,
+ Elements(elem_types), 0);
+
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_sampler, min_lod,
+ gallivm->target, sampler_type,
+ LP_JIT_SAMPLER_MIN_LOD);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_sampler, max_lod,
+ gallivm->target, sampler_type,
+ LP_JIT_SAMPLER_MAX_LOD);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_sampler, lod_bias,
+ gallivm->target, sampler_type,
+ LP_JIT_SAMPLER_LOD_BIAS);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_sampler, border_color,
+ gallivm->target, sampler_type,
+ LP_JIT_SAMPLER_BORDER_COLOR);
+ LP_CHECK_STRUCT_SIZE(struct lp_jit_sampler,
+ gallivm->target, sampler_type);
+ }
+
+ /* struct lp_jit_context */
+ {
+ LLVMTypeRef elem_types[LP_JIT_CTX_COUNT];
+ LLVMTypeRef context_type;
+
+ elem_types[LP_JIT_CTX_CONSTANTS] =
+ LLVMArrayType(LLVMPointerType(LLVMFloatTypeInContext(lc), 0), LP_MAX_TGSI_CONST_BUFFERS);
+ elem_types[LP_JIT_CTX_NUM_CONSTANTS] =
+ LLVMArrayType(LLVMInt32TypeInContext(lc), LP_MAX_TGSI_CONST_BUFFERS);
+ elem_types[LP_JIT_CTX_ALPHA_REF] = LLVMFloatTypeInContext(lc);
+ elem_types[LP_JIT_CTX_STENCIL_REF_FRONT] =
+ elem_types[LP_JIT_CTX_STENCIL_REF_BACK] = LLVMInt32TypeInContext(lc);
+ elem_types[LP_JIT_CTX_U8_BLEND_COLOR] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0);
+ elem_types[LP_JIT_CTX_F_BLEND_COLOR] = LLVMPointerType(LLVMFloatTypeInContext(lc), 0);
+ elem_types[LP_JIT_CTX_VIEWPORTS] = LLVMPointerType(viewport_type, 0);
+ elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type,
+ PIPE_MAX_SHADER_SAMPLER_VIEWS);
+ elem_types[LP_JIT_CTX_SAMPLERS] = LLVMArrayType(sampler_type,
+ PIPE_MAX_SAMPLERS);
+
+ context_type = LLVMStructTypeInContext(lc, elem_types,
+ Elements(elem_types), 0);
+
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants,
+ gallivm->target, context_type,
+ LP_JIT_CTX_CONSTANTS);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, num_constants,
+ gallivm->target, context_type,
+ LP_JIT_CTX_NUM_CONSTANTS);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value,
+ gallivm->target, context_type,
+ LP_JIT_CTX_ALPHA_REF);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_front,
+ gallivm->target, context_type,
+ LP_JIT_CTX_STENCIL_REF_FRONT);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_back,
+ gallivm->target, context_type,
+ LP_JIT_CTX_STENCIL_REF_BACK);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, u8_blend_color,
+ gallivm->target, context_type,
+ LP_JIT_CTX_U8_BLEND_COLOR);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, f_blend_color,
+ gallivm->target, context_type,
+ LP_JIT_CTX_F_BLEND_COLOR);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, viewports,
+ gallivm->target, context_type,
+ LP_JIT_CTX_VIEWPORTS);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures,
+ gallivm->target, context_type,
+ LP_JIT_CTX_TEXTURES);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, samplers,
+ gallivm->target, context_type,
+ LP_JIT_CTX_SAMPLERS);
+ LP_CHECK_STRUCT_SIZE(struct lp_jit_context,
+ gallivm->target, context_type);
+
+ lp->jit_context_ptr_type = LLVMPointerType(context_type, 0);
+ }
+
+ /* struct lp_jit_thread_data */
+ {
+ LLVMTypeRef elem_types[LP_JIT_THREAD_DATA_COUNT];
+ LLVMTypeRef thread_data_type;
+
+ elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc);
+ elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX] =
+ LLVMInt32TypeInContext(lc);
+
+ thread_data_type = LLVMStructTypeInContext(lc, elem_types,
+ Elements(elem_types), 0);
+
+ lp->jit_thread_data_ptr_type = LLVMPointerType(thread_data_type, 0);
+ }
+
+ if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ LLVMDumpModule(gallivm->module);
+ }
+}
+
+
+void
+lp_jit_screen_cleanup(struct llvmpipe_screen *screen)
+{
+ /* nothing */
+}
+
+
+boolean
+lp_jit_screen_init(struct llvmpipe_screen *screen)
+{
+ return lp_build_init();
+}
+
+
+void
+lp_jit_init_types(struct lp_fragment_shader_variant *lp)
+{
+ if (!lp->jit_context_ptr_type)
+ lp_jit_create_types(lp);
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.h
new file mode 100644
index 000000000..097fa7dce
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -0,0 +1,263 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * C - JIT interfaces
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#ifndef LP_JIT_H
+#define LP_JIT_H
+
+
+#include "gallivm/lp_bld_struct.h"
+#include "gallivm/lp_bld_limits.h"
+
+#include "pipe/p_state.h"
+#include "lp_texture.h"
+
+
+struct lp_fragment_shader_variant;
+struct llvmpipe_screen;
+
+
+struct lp_jit_texture
+{
+ uint32_t width; /* same as number of elements */
+ uint32_t height;
+ uint32_t depth; /* doubles as array size */
+ uint32_t first_level;
+ uint32_t last_level;
+ const void *base;
+ uint32_t row_stride[LP_MAX_TEXTURE_LEVELS];
+ uint32_t img_stride[LP_MAX_TEXTURE_LEVELS];
+ uint32_t mip_offsets[LP_MAX_TEXTURE_LEVELS];
+};
+
+
+struct lp_jit_sampler
+{
+ float min_lod;
+ float max_lod;
+ float lod_bias;
+ float border_color[4];
+};
+
+
+struct lp_jit_viewport
+{
+ float min_depth;
+ float max_depth;
+};
+
+
+enum {
+ LP_JIT_TEXTURE_WIDTH = 0,
+ LP_JIT_TEXTURE_HEIGHT,
+ LP_JIT_TEXTURE_DEPTH,
+ LP_JIT_TEXTURE_FIRST_LEVEL,
+ LP_JIT_TEXTURE_LAST_LEVEL,
+ LP_JIT_TEXTURE_BASE,
+ LP_JIT_TEXTURE_ROW_STRIDE,
+ LP_JIT_TEXTURE_IMG_STRIDE,
+ LP_JIT_TEXTURE_MIP_OFFSETS,
+ LP_JIT_TEXTURE_NUM_FIELDS /* number of fields above */
+};
+
+
+enum {
+ LP_JIT_SAMPLER_MIN_LOD,
+ LP_JIT_SAMPLER_MAX_LOD,
+ LP_JIT_SAMPLER_LOD_BIAS,
+ LP_JIT_SAMPLER_BORDER_COLOR,
+ LP_JIT_SAMPLER_NUM_FIELDS /* number of fields above */
+};
+
+
+enum {
+ LP_JIT_VIEWPORT_MIN_DEPTH,
+ LP_JIT_VIEWPORT_MAX_DEPTH,
+ LP_JIT_VIEWPORT_NUM_FIELDS /* number of fields above */
+};
+
+
+/**
+ * This structure is passed directly to the generated fragment shader.
+ *
+ * It contains the derived state.
+ *
+ * Changes here must be reflected in the lp_jit_context_* macros and
+ * lp_jit_init_types function. Changes to the ordering should be avoided.
+ *
+ * Only use types with a clear size and padding here, in particular prefer the
+ * stdint.h types to the basic integer types.
+ */
+struct lp_jit_context
+{
+ const float *constants[LP_MAX_TGSI_CONST_BUFFERS];
+ int num_constants[LP_MAX_TGSI_CONST_BUFFERS];
+
+ float alpha_ref_value;
+
+ uint32_t stencil_ref_front, stencil_ref_back;
+
+ uint8_t *u8_blend_color;
+ float *f_blend_color;
+
+ struct lp_jit_viewport *viewports;
+
+ struct lp_jit_texture textures[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+ struct lp_jit_sampler samplers[PIPE_MAX_SAMPLERS];
+};
+
+
+/**
+ * These enum values must match the position of the fields in the
+ * lp_jit_context struct above.
+ */
+enum {
+ LP_JIT_CTX_CONSTANTS = 0,
+ LP_JIT_CTX_NUM_CONSTANTS,
+ LP_JIT_CTX_ALPHA_REF,
+ LP_JIT_CTX_STENCIL_REF_FRONT,
+ LP_JIT_CTX_STENCIL_REF_BACK,
+ LP_JIT_CTX_U8_BLEND_COLOR,
+ LP_JIT_CTX_F_BLEND_COLOR,
+ LP_JIT_CTX_VIEWPORTS,
+ LP_JIT_CTX_TEXTURES,
+ LP_JIT_CTX_SAMPLERS,
+ LP_JIT_CTX_COUNT
+};
+
+
+#define lp_jit_context_constants(_gallivm, _ptr) \
+ lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_CTX_CONSTANTS, "constants")
+
+#define lp_jit_context_num_constants(_gallivm, _ptr) \
+ lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_CTX_NUM_CONSTANTS, "num_constants")
+
+#define lp_jit_context_alpha_ref_value(_gallivm, _ptr) \
+ lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_ALPHA_REF, "alpha_ref_value")
+
+#define lp_jit_context_stencil_ref_front_value(_gallivm, _ptr) \
+ lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_STENCIL_REF_FRONT, "stencil_ref_front")
+
+#define lp_jit_context_stencil_ref_back_value(_gallivm, _ptr) \
+ lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_STENCIL_REF_BACK, "stencil_ref_back")
+
+#define lp_jit_context_u8_blend_color(_gallivm, _ptr) \
+ lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_U8_BLEND_COLOR, "u8_blend_color")
+
+#define lp_jit_context_f_blend_color(_gallivm, _ptr) \
+ lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_F_BLEND_COLOR, "f_blend_color")
+
+#define lp_jit_context_viewports(_gallivm, _ptr) \
+ lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_VIEWPORTS, "viewports")
+
+#define lp_jit_context_textures(_gallivm, _ptr) \
+ lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_CTX_TEXTURES, "textures")
+
+#define lp_jit_context_samplers(_gallivm, _ptr) \
+ lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_CTX_SAMPLERS, "samplers")
+
+
+struct lp_jit_thread_data
+{
+ uint64_t vis_counter;
+
+ /*
+ * Non-interpolated rasterizer state passed through to the fragment shader.
+ */
+ struct {
+ uint32_t viewport_index;
+ } raster_state;
+};
+
+
+enum {
+ LP_JIT_THREAD_DATA_COUNTER = 0,
+ LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX,
+ LP_JIT_THREAD_DATA_COUNT
+};
+
+
+#define lp_jit_thread_data_counter(_gallivm, _ptr) \
+ lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_THREAD_DATA_COUNTER, "counter")
+
+#define lp_jit_thread_data_raster_state_viewport_index(_gallivm, _ptr) \
+ lp_build_struct_get(_gallivm, _ptr, \
+ LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX, \
+ "raster_state.viewport_index")
+
+/**
+ * typedef for fragment shader function
+ *
+ * @param context jit context
+ * @param x block start x
+ * @param y block start y
+ * @param facing is front facing
+ * @param a0 shader input a0
+ * @param dadx shader input dadx
+ * @param dady shader input dady
+ * @param color color buffer
+ * @param depth depth buffer
+ * @param mask mask of visible pixels in block
+ * @param thread_data task thread data
+ * @param stride color buffer row stride in bytes
+ * @param depth_stride depth buffer row stride in bytes
+ */
+typedef void
+(*lp_jit_frag_func)(const struct lp_jit_context *context,
+ uint32_t x,
+ uint32_t y,
+ uint32_t facing,
+ const void *a0,
+ const void *dadx,
+ const void *dady,
+ uint8_t **color,
+ uint8_t *depth,
+ uint32_t mask,
+ struct lp_jit_thread_data *thread_data,
+ unsigned *stride,
+ unsigned depth_stride);
+
+
+void
+lp_jit_screen_cleanup(struct llvmpipe_screen *screen);
+
+
+boolean
+lp_jit_screen_init(struct llvmpipe_screen *screen);
+
+
+void
+lp_jit_init_types(struct lp_fragment_shader_variant *lp);
+
+
+#endif /* LP_JIT_H */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_limits.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_limits.h
new file mode 100644
index 000000000..5294ced3c
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_limits.h
@@ -0,0 +1,96 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Implementation limits for LLVMpipe driver.
+ */
+
+#ifndef LP_LIMITS_H
+#define LP_LIMITS_H
+
+
+/**
+ * Tile size (width and height). This needs to be a power of two.
+ */
+#define TILE_ORDER 6
+#define TILE_SIZE (1 << TILE_ORDER)
+
+
+/**
+ * Max texture sizes
+ */
+#define LP_MAX_TEXTURE_SIZE (1 * 1024 * 1024 * 1024ULL) /* 1GB for now */
+#define LP_MAX_TEXTURE_2D_LEVELS 14 /* 8K x 8K for now */
+#define LP_MAX_TEXTURE_3D_LEVELS 12 /* 2K x 2K x 2K for now */
+#define LP_MAX_TEXTURE_CUBE_LEVELS 14 /* 8K x 8K for now */
+#define LP_MAX_TEXTURE_ARRAY_LAYERS 512 /* 8K x 512 / 8K x 8K x 512 */
+
+
+/** This must be the larger of LP_MAX_TEXTURE_2D/3D_LEVELS */
+#define LP_MAX_TEXTURE_LEVELS LP_MAX_TEXTURE_2D_LEVELS
+
+
+/**
+ * Max drawing surface size is the max texture size
+ */
+#define LP_MAX_HEIGHT (1 << (LP_MAX_TEXTURE_LEVELS - 1))
+#define LP_MAX_WIDTH (1 << (LP_MAX_TEXTURE_LEVELS - 1))
+
+
+#define LP_MAX_THREADS 16
+
+
+/**
+ * Max bytes per scene. This may be replaced by a runtime parameter.
+ */
+#define LP_MAX_SCENE_SIZE (512 * 1024 * 1024)
+
+/**
+ * Max number of shader variants (for all shaders combined,
+ * per context) that will be kept around.
+ */
+#define LP_MAX_SHADER_VARIANTS 1024
+
+/**
+ * Max number of instructions (for all fragment shaders combined per context)
+ * that will be kept around (counted in terms of llvm ir).
+ * Note: the definition looks odd, but there's branches which use a different
+ * number of max shader variants.
+ */
+#define LP_MAX_SHADER_INSTRUCTIONS MAX2(256*1024, 512*LP_MAX_SHADER_VARIANTS)
+
+/**
+ * Max number of setup variants that will be kept around.
+ *
+ * These are determined by the combination of the fragment shader
+ * input signature and a small amount of rasterization state (eg
+ * flatshading). It is likely that many active fragment shaders will
+ * share the same setup variant.
+ */
+#define LP_MAX_SETUP_VARIANTS 64
+
+#endif /* LP_LIMITS_H */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_memory.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_memory.c
new file mode 100644
index 000000000..712e28ea3
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_memory.c
@@ -0,0 +1,36 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_debug.h"
+#include "lp_limits.h"
+#include "lp_memory.h"
+
+/* A single dummy tile used in a couple of out-of-memory situations.
+ */
+PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN)
+uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
+
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_memory.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_memory.h
new file mode 100644
index 000000000..0acd4e6b8
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_memory.h
@@ -0,0 +1,40 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_MEMORY_H
+#define LP_MEMORY_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "lp_limits.h"
+#include "gallivm/lp_bld_type.h"
+
+extern PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN)
+uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
+
+#endif /* LP_MEMORY_H */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_perf.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_perf.c
new file mode 100644
index 000000000..a4548bccf
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_perf.c
@@ -0,0 +1,110 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_debug.h"
+#include "lp_debug.h"
+#include "lp_perf.h"
+
+
+
+struct lp_counters lp_count;
+
+
+void
+lp_reset_counters(void)
+{
+ memset(&lp_count, 0, sizeof(lp_count));
+}
+
+
+void
+lp_print_counters(void)
+{
+ if (LP_DEBUG & DEBUG_COUNTERS) {
+ unsigned total_64, total_16, total_4;
+ float p1, p2, p3, p4, p5, p6;
+
+ debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris);
+ debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris);
+
+ total_64 = (lp_count.nr_empty_64 +
+ lp_count.nr_fully_covered_64 +
+ lp_count.nr_partially_covered_64);
+
+ p1 = 100.0 * (float) lp_count.nr_empty_64 / (float) total_64;
+ p2 = 100.0 * (float) lp_count.nr_fully_covered_64 / (float) total_64;
+ p3 = 100.0 * (float) lp_count.nr_partially_covered_64 / (float) total_64;
+ p5 = 100.0 * (float) lp_count.nr_shade_opaque_64 / (float) total_64;
+ p6 = 100.0 * (float) lp_count.nr_shade_64 / (float) total_64;
+
+ debug_printf("llvmpipe: nr_64x64: %9u\n", total_64);
+ debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64);
+ debug_printf("llvmpipe: nr_shade_opaque_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_opaque_64, p5, total_64);
+ debug_printf("llvmpipe: nr_pure_shade_opaque: %9u (%3.0f%% of %u)\n", lp_count.nr_pure_shade_opaque_64, 0.0, lp_count.nr_shade_opaque_64);
+ debug_printf("llvmpipe: nr_shade_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_64, p6, total_64);
+ debug_printf("llvmpipe: nr_pure_shade: %9u (%3.0f%% of %u)\n", lp_count.nr_pure_shade_64, 0.0, lp_count.nr_shade_64);
+ debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64);
+ debug_printf("llvmpipe: nr_empty_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64);
+
+ total_16 = (lp_count.nr_empty_16 +
+ lp_count.nr_fully_covered_16 +
+ lp_count.nr_partially_covered_16);
+
+ p1 = 100.0 * (float) lp_count.nr_empty_16 / (float) total_16;
+ p2 = 100.0 * (float) lp_count.nr_fully_covered_16 / (float) total_16;
+ p3 = 100.0 * (float) lp_count.nr_partially_covered_16 / (float) total_16;
+
+ debug_printf("llvmpipe: nr_16x16: %9u\n", total_16);
+ debug_printf("llvmpipe: nr_fully_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_16, p2, total_16);
+ debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16);
+ debug_printf("llvmpipe: nr_empty_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16);
+
+ total_4 = (lp_count.nr_empty_4 +
+ lp_count.nr_fully_covered_4 +
+ lp_count.nr_partially_covered_4);
+
+ p1 = 100.0 * (float) lp_count.nr_empty_4 / (float) total_4;
+ p2 = 100.0 * (float) lp_count.nr_fully_covered_4 / (float) total_4;
+ p3 = 100.0 * (float) lp_count.nr_partially_covered_4 / (float) total_4;
+ p4 = 100.0 * (float) lp_count.nr_non_empty_4 / (float) total_4;
+
+ debug_printf("llvmpipe: nr_tri_4x4: %9u\n", total_4);
+ debug_printf("llvmpipe: nr_fully_covered_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_4, p2, total_4);
+ debug_printf("llvmpipe: nr_partially_covered_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_4, p3, total_4);
+ debug_printf("llvmpipe: nr_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4);
+ debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_non_empty_4, p4, total_4);
+
+ debug_printf("llvmpipe: nr_color_tile_clear: %9u\n", lp_count.nr_color_tile_clear);
+ debug_printf("llvmpipe: nr_color_tile_load: %9u\n", lp_count.nr_color_tile_load);
+ debug_printf("llvmpipe: nr_color_tile_store: %9u\n", lp_count.nr_color_tile_store);
+
+ debug_printf("llvmpipe: nr_llvm_compiles: %u\n", lp_count.nr_llvm_compiles);
+ debug_printf("llvmpipe: total LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0);
+ debug_printf("llvmpipe: average LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0 / lp_count.nr_llvm_compiles);
+
+ }
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_perf.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_perf.h
new file mode 100644
index 000000000..455adf7d6
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_perf.h
@@ -0,0 +1,91 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Performance / statistic counters, etc.
+ */
+
+
+#ifndef LP_PERF_H
+#define LP_PERF_H
+
+#include "pipe/p_compiler.h"
+
+/**
+ * Various counters
+ */
+struct lp_counters
+{
+ unsigned nr_tris;
+ unsigned nr_culled_tris;
+ unsigned nr_empty_64;
+ unsigned nr_fully_covered_64;
+ unsigned nr_partially_covered_64;
+ unsigned nr_pure_shade_opaque_64;
+ unsigned nr_pure_shade_64;
+ unsigned nr_shade_64;
+ unsigned nr_shade_opaque_64;
+ unsigned nr_empty_16;
+ unsigned nr_fully_covered_16;
+ unsigned nr_partially_covered_16;
+ unsigned nr_empty_4;
+ unsigned nr_fully_covered_4;
+ unsigned nr_partially_covered_4;
+ unsigned nr_non_empty_4;
+ unsigned nr_llvm_compiles;
+ int64_t llvm_compile_time; /**< total, in microseconds */
+
+ unsigned nr_color_tile_clear;
+ unsigned nr_color_tile_load;
+ unsigned nr_color_tile_store;
+};
+
+
+extern struct lp_counters lp_count;
+
+
+/** Increment the named counter (only for debug builds) */
+#ifdef DEBUG
+#define LP_COUNT(counter) lp_count.counter++
+#define LP_COUNT_ADD(counter, incr) lp_count.counter += (incr)
+#define LP_COUNT_GET(counter) (lp_count.counter)
+#else
+#define LP_COUNT(counter)
+#define LP_COUNT_ADD(counter, incr) (void)(incr)
+#define LP_COUNT_GET(counter) 0
+#endif
+
+
+extern void
+lp_reset_counters(void);
+
+
+extern void
+lp_print_counters(void);
+
+
+#endif /* LP_PERF_H */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_public.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_public.h
new file mode 100644
index 000000000..27ab1baef
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_public.h
@@ -0,0 +1,18 @@
+#ifndef LP_PUBLIC_H
+#define LP_PUBLIC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct pipe_screen;
+struct sw_winsys;
+
+struct pipe_screen *
+llvmpipe_create_screen(struct sw_winsys *winsys);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.c
new file mode 100644
index 000000000..fc5936706
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.c
@@ -0,0 +1,332 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors:
+ * Keith Whitwell, Qicheng Christopher Li, Brian Paul
+ */
+
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "os/os_time.h"
+#include "lp_context.h"
+#include "lp_flush.h"
+#include "lp_fence.h"
+#include "lp_query.h"
+#include "lp_screen.h"
+#include "lp_state.h"
+#include "lp_rast.h"
+
+
+static struct llvmpipe_query *llvmpipe_query( struct pipe_query *p )
+{
+ return (struct llvmpipe_query *)p;
+}
+
+static struct pipe_query *
+llvmpipe_create_query(struct pipe_context *pipe,
+ unsigned type,
+ unsigned index)
+{
+ struct llvmpipe_query *pq;
+
+ assert(type < PIPE_QUERY_TYPES);
+
+ pq = CALLOC_STRUCT( llvmpipe_query );
+
+ if (pq) {
+ pq->type = type;
+ }
+
+ return (struct pipe_query *) pq;
+}
+
+
+static void
+llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct llvmpipe_query *pq = llvmpipe_query(q);
+
+ /* Ideally we would refcount queries & not get destroyed until the
+ * last scene had finished with us.
+ */
+ if (pq->fence) {
+ if (!lp_fence_issued(pq->fence))
+ llvmpipe_flush(pipe, NULL, __FUNCTION__);
+
+ if (!lp_fence_signalled(pq->fence))
+ lp_fence_wait(pq->fence);
+
+ lp_fence_reference(&pq->fence, NULL);
+ }
+
+ FREE(pq);
+}
+
+
+static boolean
+llvmpipe_get_query_result(struct pipe_context *pipe,
+ struct pipe_query *q,
+ boolean wait,
+ union pipe_query_result *vresult)
+{
+ struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
+ unsigned num_threads = MAX2(1, screen->num_threads);
+ struct llvmpipe_query *pq = llvmpipe_query(q);
+ uint64_t *result = (uint64_t *)vresult;
+ int i;
+
+ if (pq->fence) {
+ /* only have a fence if there was a scene */
+ if (!lp_fence_signalled(pq->fence)) {
+ if (!lp_fence_issued(pq->fence))
+ llvmpipe_flush(pipe, NULL, __FUNCTION__);
+
+ if (!wait)
+ return FALSE;
+
+ lp_fence_wait(pq->fence);
+ }
+ }
+
+ /* Sum the results from each of the threads:
+ */
+ *result = 0;
+
+ switch (pq->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ for (i = 0; i < num_threads; i++) {
+ *result += pq->end[i];
+ }
+ break;
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ for (i = 0; i < num_threads; i++) {
+ /* safer (still not guaranteed) when there's an overflow */
+ vresult->b = vresult->b || pq->end[i];
+ }
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ for (i = 0; i < num_threads; i++) {
+ if (pq->end[i] > *result) {
+ *result = pq->end[i];
+ }
+ }
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT: {
+ struct pipe_query_data_timestamp_disjoint *td =
+ (struct pipe_query_data_timestamp_disjoint *)vresult;
+ /* os_get_time_nano return nanoseconds */
+ td->frequency = UINT64_C(1000000000);
+ td->disjoint = FALSE;
+ }
+ break;
+ case PIPE_QUERY_GPU_FINISHED:
+ vresult->b = TRUE;
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ *result = pq->num_primitives_generated;
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ *result = pq->num_primitives_written;
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ vresult->b = pq->num_primitives_generated > pq->num_primitives_written;
+ break;
+ case PIPE_QUERY_SO_STATISTICS: {
+ struct pipe_query_data_so_statistics *stats =
+ (struct pipe_query_data_so_statistics *)vresult;
+ stats->num_primitives_written = pq->num_primitives_written;
+ stats->primitives_storage_needed = pq->num_primitives_generated;
+ }
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS: {
+ struct pipe_query_data_pipeline_statistics *stats =
+ (struct pipe_query_data_pipeline_statistics *)vresult;
+ /* only ps_invocations come from binned query */
+ for (i = 0; i < num_threads; i++) {
+ pq->stats.ps_invocations += pq->end[i];
+ }
+ pq->stats.ps_invocations *= LP_RASTER_BLOCK_SIZE * LP_RASTER_BLOCK_SIZE;
+ *stats = pq->stats;
+ }
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ return TRUE;
+}
+
+
+static boolean
+llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
+ struct llvmpipe_query *pq = llvmpipe_query(q);
+
+ /* Check if the query is already in the scene. If so, we need to
+ * flush the scene now. Real apps shouldn't re-use a query in a
+ * frame of rendering.
+ */
+ if (pq->fence && !lp_fence_issued(pq->fence)) {
+ llvmpipe_finish(pipe, __FUNCTION__);
+ }
+
+
+ memset(pq->start, 0, sizeof(pq->start));
+ memset(pq->end, 0, sizeof(pq->end));
+ lp_setup_begin_query(llvmpipe->setup, pq);
+
+ switch (pq->type) {
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ pq->num_primitives_written = llvmpipe->so_stats.num_primitives_written;
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ pq->num_primitives_generated = llvmpipe->so_stats.primitives_storage_needed;
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ pq->num_primitives_written = llvmpipe->so_stats.num_primitives_written;
+ pq->num_primitives_generated = llvmpipe->so_stats.primitives_storage_needed;
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ pq->num_primitives_written = llvmpipe->so_stats.num_primitives_written;
+ pq->num_primitives_generated = llvmpipe->so_stats.primitives_storage_needed;
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ /* reset our cache */
+ if (llvmpipe->active_statistics_queries == 0) {
+ memset(&llvmpipe->pipeline_statistics, 0,
+ sizeof(llvmpipe->pipeline_statistics));
+ }
+ memcpy(&pq->stats, &llvmpipe->pipeline_statistics, sizeof(pq->stats));
+ llvmpipe->active_statistics_queries++;
+ break;
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ llvmpipe->active_occlusion_queries++;
+ llvmpipe->dirty |= LP_NEW_OCCLUSION_QUERY;
+ break;
+ default:
+ break;
+ }
+ return true;
+}
+
+
+static void
+llvmpipe_end_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
+ struct llvmpipe_query *pq = llvmpipe_query(q);
+
+ lp_setup_end_query(llvmpipe->setup, pq);
+
+ switch (pq->type) {
+
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ pq->num_primitives_written =
+ llvmpipe->so_stats.num_primitives_written - pq->num_primitives_written;
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ pq->num_primitives_generated =
+ llvmpipe->so_stats.primitives_storage_needed - pq->num_primitives_generated;
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ pq->num_primitives_written =
+ llvmpipe->so_stats.num_primitives_written - pq->num_primitives_written;
+ pq->num_primitives_generated =
+ llvmpipe->so_stats.primitives_storage_needed - pq->num_primitives_generated;
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ pq->num_primitives_written =
+ llvmpipe->so_stats.num_primitives_written - pq->num_primitives_written;
+ pq->num_primitives_generated =
+ llvmpipe->so_stats.primitives_storage_needed - pq->num_primitives_generated;
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ pq->stats.ia_vertices =
+ llvmpipe->pipeline_statistics.ia_vertices - pq->stats.ia_vertices;
+ pq->stats.ia_primitives =
+ llvmpipe->pipeline_statistics.ia_primitives - pq->stats.ia_primitives;
+ pq->stats.vs_invocations =
+ llvmpipe->pipeline_statistics.vs_invocations - pq->stats.vs_invocations;
+ pq->stats.gs_invocations =
+ llvmpipe->pipeline_statistics.gs_invocations - pq->stats.gs_invocations;
+ pq->stats.gs_primitives =
+ llvmpipe->pipeline_statistics.gs_primitives - pq->stats.gs_primitives;
+ pq->stats.c_invocations =
+ llvmpipe->pipeline_statistics.c_invocations - pq->stats.c_invocations;
+ pq->stats.c_primitives =
+ llvmpipe->pipeline_statistics.c_primitives - pq->stats.c_primitives;
+ pq->stats.ps_invocations =
+ llvmpipe->pipeline_statistics.ps_invocations - pq->stats.ps_invocations;
+
+ llvmpipe->active_statistics_queries--;
+ break;
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ assert(llvmpipe->active_occlusion_queries);
+ llvmpipe->active_occlusion_queries--;
+ llvmpipe->dirty |= LP_NEW_OCCLUSION_QUERY;
+ break;
+ default:
+ break;
+ }
+}
+
+boolean
+llvmpipe_check_render_cond(struct llvmpipe_context *lp)
+{
+ struct pipe_context *pipe = &lp->pipe;
+ boolean b, wait;
+ uint64_t result;
+
+ if (!lp->render_cond_query)
+ return TRUE; /* no query predicate, draw normally */
+
+ wait = (lp->render_cond_mode == PIPE_RENDER_COND_WAIT ||
+ lp->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT);
+
+ b = pipe->get_query_result(pipe, lp->render_cond_query, wait, (void*)&result);
+ if (b)
+ return ((!result) == lp->render_cond_cond);
+ else
+ return TRUE;
+}
+
+void llvmpipe_init_query_funcs(struct llvmpipe_context *llvmpipe )
+{
+ llvmpipe->pipe.create_query = llvmpipe_create_query;
+ llvmpipe->pipe.destroy_query = llvmpipe_destroy_query;
+ llvmpipe->pipe.begin_query = llvmpipe_begin_query;
+ llvmpipe->pipe.end_query = llvmpipe_end_query;
+ llvmpipe->pipe.get_query_result = llvmpipe_get_query_result;
+}
+
+
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.h
new file mode 100644
index 000000000..797375c88
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.h
@@ -0,0 +1,60 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors:
+ * Keith Whitwell, Qicheng Christopher Li, Brian Paul
+ */
+
+#ifndef LP_QUERY_H
+#define LP_QUERY_H
+
+#include <limits.h>
+#include "os/os_thread.h"
+#include "lp_limits.h"
+
+
+struct llvmpipe_context;
+
+
+struct llvmpipe_query {
+ uint64_t start[LP_MAX_THREADS]; /* start count value for each thread */
+ uint64_t end[LP_MAX_THREADS]; /* end count value for each thread */
+ struct lp_fence *fence; /* fence from last scene this was binned in */
+ unsigned type; /* PIPE_QUERY_* */
+ unsigned num_primitives_generated;
+ unsigned num_primitives_written;
+
+ struct pipe_query_data_pipeline_statistics stats;
+};
+
+
+extern void llvmpipe_init_query_funcs(struct llvmpipe_context * );
+
+extern boolean llvmpipe_check_render_cond(struct llvmpipe_context *);
+
+#endif /* LP_QUERY_H */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c
new file mode 100644
index 000000000..c726707c0
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -0,0 +1,935 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <limits.h>
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_rect.h"
+#include "util/u_surface.h"
+#include "util/u_pack_color.h"
+#include "util/u_string.h"
+
+#include "os/os_time.h"
+
+#include "lp_scene_queue.h"
+#include "lp_context.h"
+#include "lp_debug.h"
+#include "lp_fence.h"
+#include "lp_perf.h"
+#include "lp_query.h"
+#include "lp_rast.h"
+#include "lp_rast_priv.h"
+#include "gallivm/lp_bld_debug.h"
+#include "lp_scene.h"
+#include "lp_tex_sample.h"
+
+
+#ifdef DEBUG
+int jit_line = 0;
+const struct lp_rast_state *jit_state = NULL;
+const struct lp_rasterizer_task *jit_task = NULL;
+#endif
+
+
+/**
+ * Begin rasterizing a scene.
+ * Called once per scene by one thread.
+ */
+static void
+lp_rast_begin( struct lp_rasterizer *rast,
+ struct lp_scene *scene )
+{
+ rast->curr_scene = scene;
+
+ LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
+
+ lp_scene_begin_rasterization( scene );
+ lp_scene_bin_iter_begin( scene );
+}
+
+
+static void
+lp_rast_end( struct lp_rasterizer *rast )
+{
+ lp_scene_end_rasterization( rast->curr_scene );
+
+ rast->curr_scene = NULL;
+}
+
+
+/**
+ * Beginning rasterization of a tile.
+ * \param x window X position of the tile, in pixels
+ * \param y window Y position of the tile, in pixels
+ */
+static void
+lp_rast_tile_begin(struct lp_rasterizer_task *task,
+ const struct cmd_bin *bin,
+ int x, int y)
+{
+ unsigned i;
+ struct lp_scene *scene = task->scene;
+
+ LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
+
+ task->bin = bin;
+ task->x = x * TILE_SIZE;
+ task->y = y * TILE_SIZE;
+ task->width = TILE_SIZE + x * TILE_SIZE > task->scene->fb.width ?
+ task->scene->fb.width - x * TILE_SIZE : TILE_SIZE;
+ task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ?
+ task->scene->fb.height - y * TILE_SIZE : TILE_SIZE;
+
+ task->thread_data.vis_counter = 0;
+ task->ps_invocations = 0;
+
+ for (i = 0; i < task->scene->fb.nr_cbufs; i++) {
+ if (task->scene->fb.cbufs[i]) {
+ task->color_tiles[i] = scene->cbufs[i].map +
+ scene->cbufs[i].stride * task->y +
+ scene->cbufs[i].format_bytes * task->x;
+ }
+ }
+ if (task->scene->fb.zsbuf) {
+ task->depth_tile = scene->zsbuf.map +
+ scene->zsbuf.stride * task->y +
+ scene->zsbuf.format_bytes * task->x;
+ }
+}
+
+
+/**
+ * Clear the rasterizer's current color tile.
+ * This is a bin command called during bin processing.
+ * Clear commands always clear all bound layers.
+ */
+static void
+lp_rast_clear_color(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_scene *scene = task->scene;
+ unsigned cbuf = arg.clear_rb->cbuf;
+ union util_color uc;
+ enum pipe_format format;
+
+ /* we never bin clear commands for non-existing buffers */
+ assert(cbuf < scene->fb.nr_cbufs);
+ assert(scene->fb.cbufs[cbuf]);
+
+ format = scene->fb.cbufs[cbuf]->format;
+ uc = arg.clear_rb->color_val;
+
+ /*
+ * this is pretty rough since we have target format (bunch of bytes...) here.
+ * dump it as raw 4 dwords.
+ */
+ LP_DBG(DEBUG_RAST, "%s clear value (target format %d) raw 0x%x,0x%x,0x%x,0x%x\n",
+ __FUNCTION__, format, uc.ui[0], uc.ui[1], uc.ui[2], uc.ui[3]);
+
+
+ util_fill_box(scene->cbufs[cbuf].map,
+ format,
+ scene->cbufs[cbuf].stride,
+ scene->cbufs[cbuf].layer_stride,
+ task->x,
+ task->y,
+ 0,
+ task->width,
+ task->height,
+ scene->fb_max_layer + 1,
+ &uc);
+
+ /* this will increase for each rb which probably doesn't mean much */
+ LP_COUNT(nr_color_tile_clear);
+}
+
+
+/**
+ * Clear the rasterizer's current z/stencil tile.
+ * This is a bin command called during bin processing.
+ * Clear commands always clear all bound layers.
+ */
+static void
+lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_scene *scene = task->scene;
+ uint64_t clear_value64 = arg.clear_zstencil.value;
+ uint64_t clear_mask64 = arg.clear_zstencil.mask;
+ uint32_t clear_value = (uint32_t) clear_value64;
+ uint32_t clear_mask = (uint32_t) clear_mask64;
+ const unsigned height = task->height;
+ const unsigned width = task->width;
+ const unsigned dst_stride = scene->zsbuf.stride;
+ uint8_t *dst;
+ unsigned i, j;
+ unsigned block_size;
+
+ LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n",
+ __FUNCTION__, clear_value, clear_mask);
+
+ /*
+ * Clear the area of the depth/depth buffer matching this tile.
+ */
+
+ if (scene->fb.zsbuf) {
+ unsigned layer;
+ uint8_t *dst_layer = task->depth_tile;
+ block_size = util_format_get_blocksize(scene->fb.zsbuf->format);
+
+ clear_value &= clear_mask;
+
+ for (layer = 0; layer <= scene->fb_max_layer; layer++) {
+ dst = dst_layer;
+
+ switch (block_size) {
+ case 1:
+ assert(clear_mask == 0xff);
+ memset(dst, (uint8_t) clear_value, height * width);
+ break;
+ case 2:
+ if (clear_mask == 0xffff) {
+ for (i = 0; i < height; i++) {
+ uint16_t *row = (uint16_t *)dst;
+ for (j = 0; j < width; j++)
+ *row++ = (uint16_t) clear_value;
+ dst += dst_stride;
+ }
+ }
+ else {
+ for (i = 0; i < height; i++) {
+ uint16_t *row = (uint16_t *)dst;
+ for (j = 0; j < width; j++) {
+ uint16_t tmp = ~clear_mask & *row;
+ *row++ = clear_value | tmp;
+ }
+ dst += dst_stride;
+ }
+ }
+ break;
+ case 4:
+ if (clear_mask == 0xffffffff) {
+ for (i = 0; i < height; i++) {
+ uint32_t *row = (uint32_t *)dst;
+ for (j = 0; j < width; j++)
+ *row++ = clear_value;
+ dst += dst_stride;
+ }
+ }
+ else {
+ for (i = 0; i < height; i++) {
+ uint32_t *row = (uint32_t *)dst;
+ for (j = 0; j < width; j++) {
+ uint32_t tmp = ~clear_mask & *row;
+ *row++ = clear_value | tmp;
+ }
+ dst += dst_stride;
+ }
+ }
+ break;
+ case 8:
+ clear_value64 &= clear_mask64;
+ if (clear_mask64 == 0xffffffffffULL) {
+ for (i = 0; i < height; i++) {
+ uint64_t *row = (uint64_t *)dst;
+ for (j = 0; j < width; j++)
+ *row++ = clear_value64;
+ dst += dst_stride;
+ }
+ }
+ else {
+ for (i = 0; i < height; i++) {
+ uint64_t *row = (uint64_t *)dst;
+ for (j = 0; j < width; j++) {
+ uint64_t tmp = ~clear_mask64 & *row;
+ *row++ = clear_value64 | tmp;
+ }
+ dst += dst_stride;
+ }
+ }
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+ dst_layer += scene->zsbuf.layer_stride;
+ }
+ }
+}
+
+
+
+/**
+ * Run the shader on all blocks in a tile. This is used when a tile is
+ * completely contained inside a triangle.
+ * This is a bin command called during bin processing.
+ */
+static void
+lp_rast_shade_tile(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_scene *scene = task->scene;
+ const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
+ const struct lp_rast_state *state;
+ struct lp_fragment_shader_variant *variant;
+ const unsigned tile_x = task->x, tile_y = task->y;
+ unsigned x, y;
+
+ if (inputs->disable) {
+ /* This command was partially binned and has been disabled */
+ return;
+ }
+
+ LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
+
+ state = task->state;
+ assert(state);
+ if (!state) {
+ return;
+ }
+ variant = state->variant;
+
+ /* render the whole 64x64 tile in 4x4 chunks */
+ for (y = 0; y < task->height; y += 4){
+ for (x = 0; x < task->width; x += 4) {
+ uint8_t *color[PIPE_MAX_COLOR_BUFS];
+ unsigned stride[PIPE_MAX_COLOR_BUFS];
+ uint8_t *depth = NULL;
+ unsigned depth_stride = 0;
+ unsigned i;
+
+ /* color buffer */
+ for (i = 0; i < scene->fb.nr_cbufs; i++){
+ if (scene->fb.cbufs[i]) {
+ stride[i] = scene->cbufs[i].stride;
+ color[i] = lp_rast_get_color_block_pointer(task, i, tile_x + x,
+ tile_y + y, inputs->layer);
+ }
+ else {
+ stride[i] = 0;
+ color[i] = NULL;
+ }
+ }
+
+ /* depth buffer */
+ if (scene->zsbuf.map) {
+ depth = lp_rast_get_depth_block_pointer(task, tile_x + x,
+ tile_y + y, inputs->layer);
+ depth_stride = scene->zsbuf.stride;
+ }
+
+ /* Propagate non-interpolated raster state. */
+ task->thread_data.raster_state.viewport_index = inputs->viewport_index;
+
+ /* run shader on 4x4 block */
+ BEGIN_JIT_CALL(state, task);
+ variant->jit_function[RAST_WHOLE]( &state->jit_context,
+ tile_x + x, tile_y + y,
+ inputs->frontfacing,
+ GET_A0(inputs),
+ GET_DADX(inputs),
+ GET_DADY(inputs),
+ color,
+ depth,
+ 0xffff,
+ &task->thread_data,
+ stride,
+ depth_stride);
+ END_JIT_CALL();
+ }
+ }
+}
+
+
+/**
+ * Run the shader on all blocks in a tile. This is used when a tile is
+ * completely contained inside a triangle, and the shader is opaque.
+ * This is a bin command called during bin processing.
+ */
+static void
+lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
+
+ assert(task->state);
+ if (!task->state) {
+ return;
+ }
+
+ lp_rast_shade_tile(task, arg);
+}
+
+
+/**
+ * Compute shading for a 4x4 block of pixels inside a triangle.
+ * This is a bin command called during bin processing.
+ * \param x X position of quad in window coords
+ * \param y Y position of quad in window coords
+ */
+void
+lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
+ const struct lp_rast_shader_inputs *inputs,
+ unsigned x, unsigned y,
+ unsigned mask)
+{
+ const struct lp_rast_state *state = task->state;
+ struct lp_fragment_shader_variant *variant = state->variant;
+ const struct lp_scene *scene = task->scene;
+ uint8_t *color[PIPE_MAX_COLOR_BUFS];
+ unsigned stride[PIPE_MAX_COLOR_BUFS];
+ uint8_t *depth = NULL;
+ unsigned depth_stride = 0;
+ unsigned i;
+
+ assert(state);
+
+ /* Sanity checks */
+ assert(x < scene->tiles_x * TILE_SIZE);
+ assert(y < scene->tiles_y * TILE_SIZE);
+ assert(x % TILE_VECTOR_WIDTH == 0);
+ assert(y % TILE_VECTOR_HEIGHT == 0);
+
+ assert((x % 4) == 0);
+ assert((y % 4) == 0);
+
+ /* color buffer */
+ for (i = 0; i < scene->fb.nr_cbufs; i++) {
+ if (scene->fb.cbufs[i]) {
+ stride[i] = scene->cbufs[i].stride;
+ color[i] = lp_rast_get_color_block_pointer(task, i, x, y,
+ inputs->layer);
+ }
+ else {
+ stride[i] = 0;
+ color[i] = NULL;
+ }
+ }
+
+ /* depth buffer */
+ if (scene->zsbuf.map) {
+ depth_stride = scene->zsbuf.stride;
+ depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer);
+ }
+
+ assert(lp_check_alignment(state->jit_context.u8_blend_color, 16));
+
+ /*
+ * The rasterizer may produce fragments outside our
+ * allocated 4x4 blocks hence need to filter them out here.
+ */
+ if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
+ /* not very accurate would need a popcount on the mask */
+ /* always count this not worth bothering? */
+ task->ps_invocations += 1 * variant->ps_inv_multiplier;
+
+ /* Propagate non-interpolated raster state. */
+ task->thread_data.raster_state.viewport_index = inputs->viewport_index;
+
+ /* run shader on 4x4 block */
+ BEGIN_JIT_CALL(state, task);
+ variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
+ x, y,
+ inputs->frontfacing,
+ GET_A0(inputs),
+ GET_DADX(inputs),
+ GET_DADY(inputs),
+ color,
+ depth,
+ mask,
+ &task->thread_data,
+ stride,
+ depth_stride);
+ END_JIT_CALL();
+ }
+}
+
+
+
+/**
+ * Begin a new occlusion query.
+ * This is a bin command put in all bins.
+ * Called per thread.
+ */
+static void
+lp_rast_begin_query(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ struct llvmpipe_query *pq = arg.query_obj;
+
+ switch (pq->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ pq->start[task->thread_index] = task->thread_data.vis_counter;
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ pq->start[task->thread_index] = task->ps_invocations;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+
+/**
+ * End the current occlusion query.
+ * This is a bin command put in all bins.
+ * Called per thread.
+ */
+static void
+lp_rast_end_query(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ struct llvmpipe_query *pq = arg.query_obj;
+
+ switch (pq->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ pq->end[task->thread_index] +=
+ task->thread_data.vis_counter - pq->start[task->thread_index];
+ pq->start[task->thread_index] = 0;
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ pq->end[task->thread_index] = os_time_get_nano();
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ pq->end[task->thread_index] +=
+ task->ps_invocations - pq->start[task->thread_index];
+ pq->start[task->thread_index] = 0;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+
+void
+lp_rast_set_state(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ task->state = arg.state;
+}
+
+
+
+/**
+ * Called when we're done writing to a color tile.
+ */
+static void
+lp_rast_tile_end(struct lp_rasterizer_task *task)
+{
+ unsigned i;
+
+ for (i = 0; i < task->scene->num_active_queries; ++i) {
+ lp_rast_end_query(task, lp_rast_arg_query(task->scene->active_queries[i]));
+ }
+
+ /* debug */
+ memset(task->color_tiles, 0, sizeof(task->color_tiles));
+ task->depth_tile = NULL;
+
+ task->bin = NULL;
+}
+
+static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] =
+{
+ lp_rast_clear_color,
+ lp_rast_clear_zstencil,
+ lp_rast_triangle_1,
+ lp_rast_triangle_2,
+ lp_rast_triangle_3,
+ lp_rast_triangle_4,
+ lp_rast_triangle_5,
+ lp_rast_triangle_6,
+ lp_rast_triangle_7,
+ lp_rast_triangle_8,
+ lp_rast_triangle_3_4,
+ lp_rast_triangle_3_16,
+ lp_rast_triangle_4_16,
+ lp_rast_shade_tile,
+ lp_rast_shade_tile_opaque,
+ lp_rast_begin_query,
+ lp_rast_end_query,
+ lp_rast_set_state,
+ lp_rast_triangle_32_1,
+ lp_rast_triangle_32_2,
+ lp_rast_triangle_32_3,
+ lp_rast_triangle_32_4,
+ lp_rast_triangle_32_5,
+ lp_rast_triangle_32_6,
+ lp_rast_triangle_32_7,
+ lp_rast_triangle_32_8,
+ lp_rast_triangle_32_3_4,
+ lp_rast_triangle_32_3_16,
+ lp_rast_triangle_32_4_16
+};
+
+
+static void
+do_rasterize_bin(struct lp_rasterizer_task *task,
+ const struct cmd_bin *bin,
+ int x, int y)
+{
+ const struct cmd_block *block;
+ unsigned k;
+
+ if (0)
+ lp_debug_bin(bin, x, y);
+
+ for (block = bin->head; block; block = block->next) {
+ for (k = 0; k < block->count; k++) {
+ dispatch[block->cmd[k]]( task, block->arg[k] );
+ }
+ }
+}
+
+
+
+/**
+ * Rasterize commands for a single bin.
+ * \param x, y position of the bin's tile in the framebuffer
+ * Must be called between lp_rast_begin() and lp_rast_end().
+ * Called per thread.
+ */
+static void
+rasterize_bin(struct lp_rasterizer_task *task,
+ const struct cmd_bin *bin, int x, int y )
+{
+ lp_rast_tile_begin( task, bin, x, y );
+
+ do_rasterize_bin(task, bin, x, y);
+
+ lp_rast_tile_end(task);
+
+
+ /* Debug/Perf flags:
+ */
+ if (bin->head->count == 1) {
+ if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE)
+ LP_COUNT(nr_pure_shade_opaque_64);
+ else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE)
+ LP_COUNT(nr_pure_shade_64);
+ }
+}
+
+
+/* An empty bin is one that just loads the contents of the tile and
+ * stores them again unchanged. This typically happens when bins have
+ * been flushed for some reason in the middle of a frame, or when
+ * incremental updates are being made to a render target.
+ *
+ * Try to avoid doing pointless work in this case.
+ */
+static boolean
+is_empty_bin( const struct cmd_bin *bin )
+{
+ return bin->head == NULL;
+}
+
+
+/**
+ * Rasterize/execute all bins within a scene.
+ * Called per thread.
+ */
+static void
+rasterize_scene(struct lp_rasterizer_task *task,
+ struct lp_scene *scene)
+{
+ task->scene = scene;
+
+ if (!task->rast->no_rast && !scene->discard) {
+ /* loop over scene bins, rasterize each */
+ {
+ struct cmd_bin *bin;
+ int i, j;
+
+ assert(scene);
+ while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) {
+ if (!is_empty_bin( bin ))
+ rasterize_bin(task, bin, i, j);
+ }
+ }
+ }
+
+
+ if (scene->fence) {
+ lp_fence_signal(scene->fence);
+ }
+
+ task->scene = NULL;
+}
+
+
+/**
+ * Called by setup module when it has something for us to render.
+ */
+void
+lp_rast_queue_scene( struct lp_rasterizer *rast,
+ struct lp_scene *scene)
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ if (rast->num_threads == 0) {
+ /* no threading */
+ unsigned fpstate = util_fpstate_get();
+
+ /* Make sure that denorms are treated like zeros. This is
+ * the behavior required by D3D10. OpenGL doesn't care.
+ */
+ util_fpstate_set_denorms_to_zero(fpstate);
+
+ lp_rast_begin( rast, scene );
+
+ rasterize_scene( &rast->tasks[0], scene );
+
+ lp_rast_end( rast );
+
+ util_fpstate_set(fpstate);
+
+ rast->curr_scene = NULL;
+ }
+ else {
+ /* threaded rendering! */
+ unsigned i;
+
+ lp_scene_enqueue( rast->full_scenes, scene );
+
+ /* signal the threads that there's work to do */
+ for (i = 0; i < rast->num_threads; i++) {
+ pipe_semaphore_signal(&rast->tasks[i].work_ready);
+ }
+ }
+
+ LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
+}
+
+
+void
+lp_rast_finish( struct lp_rasterizer *rast )
+{
+ if (rast->num_threads == 0) {
+ /* nothing to do */
+ }
+ else {
+ int i;
+
+ /* wait for work to complete */
+ for (i = 0; i < rast->num_threads; i++) {
+ pipe_semaphore_wait(&rast->tasks[i].work_done);
+ }
+ }
+}
+
+
+/**
+ * This is the thread's main entrypoint.
+ * It's a simple loop:
+ * 1. wait for work
+ * 2. do work
+ * 3. signal that we're done
+ */
+static PIPE_THREAD_ROUTINE( thread_function, init_data )
+{
+ struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
+ struct lp_rasterizer *rast = task->rast;
+ boolean debug = false;
+ char thread_name[16];
+ unsigned fpstate;
+
+ util_snprintf(thread_name, sizeof thread_name, "llvmpipe-%u", task->thread_index);
+ pipe_thread_setname(thread_name);
+
+ /* Make sure that denorms are treated like zeros. This is
+ * the behavior required by D3D10. OpenGL doesn't care.
+ */
+ fpstate = util_fpstate_get();
+ util_fpstate_set_denorms_to_zero(fpstate);
+
+ while (1) {
+ /* wait for work */
+ if (debug)
+ debug_printf("thread %d waiting for work\n", task->thread_index);
+ pipe_semaphore_wait(&task->work_ready);
+
+ if (rast->exit_flag)
+ break;
+
+ if (task->thread_index == 0) {
+ /* thread[0]:
+ * - get next scene to rasterize
+ * - map the framebuffer surfaces
+ */
+ lp_rast_begin( rast,
+ lp_scene_dequeue( rast->full_scenes, TRUE ) );
+ }
+
+ /* Wait for all threads to get here so that threads[1+] don't
+ * get a null rast->curr_scene pointer.
+ */
+ pipe_barrier_wait( &rast->barrier );
+
+ /* do work */
+ if (debug)
+ debug_printf("thread %d doing work\n", task->thread_index);
+
+ rasterize_scene(task,
+ rast->curr_scene);
+
+ /* wait for all threads to finish with this scene */
+ pipe_barrier_wait( &rast->barrier );
+
+ /* XXX: shouldn't be necessary:
+ */
+ if (task->thread_index == 0) {
+ lp_rast_end( rast );
+ }
+
+ /* signal done with work */
+ if (debug)
+ debug_printf("thread %d done working\n", task->thread_index);
+
+ pipe_semaphore_signal(&task->work_done);
+ }
+
+#ifdef _WIN32
+ pipe_semaphore_signal(&task->work_done);
+#endif
+
+ return 0;
+}
+
+
+/**
+ * Initialize semaphores and spawn the threads.
+ */
+static void
+create_rast_threads(struct lp_rasterizer *rast)
+{
+ unsigned i;
+
+ /* NOTE: if num_threads is zero, we won't use any threads */
+ for (i = 0; i < rast->num_threads; i++) {
+ pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
+ pipe_semaphore_init(&rast->tasks[i].work_done, 0);
+ rast->threads[i] = pipe_thread_create(thread_function,
+ (void *) &rast->tasks[i]);
+ }
+}
+
+
+
+/**
+ * Create new lp_rasterizer. If num_threads is zero, don't create any
+ * new threads, do rendering synchronously.
+ * \param num_threads number of rasterizer threads to create
+ */
+struct lp_rasterizer *
+lp_rast_create( unsigned num_threads )
+{
+ struct lp_rasterizer *rast;
+ unsigned i;
+
+ rast = CALLOC_STRUCT(lp_rasterizer);
+ if (!rast) {
+ goto no_rast;
+ }
+
+ rast->full_scenes = lp_scene_queue_create();
+ if (!rast->full_scenes) {
+ goto no_full_scenes;
+ }
+
+ for (i = 0; i < Elements(rast->tasks); i++) {
+ struct lp_rasterizer_task *task = &rast->tasks[i];
+ task->rast = rast;
+ task->thread_index = i;
+ }
+
+ rast->num_threads = num_threads;
+
+ rast->no_rast = debug_get_bool_option("LP_NO_RAST", FALSE);
+
+ create_rast_threads(rast);
+
+ /* for synchronizing rasterization threads */
+ pipe_barrier_init( &rast->barrier, rast->num_threads );
+
+ memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
+
+ return rast;
+
+no_full_scenes:
+ FREE(rast);
+no_rast:
+ return NULL;
+}
+
+
+/* Shutdown:
+ */
+void lp_rast_destroy( struct lp_rasterizer *rast )
+{
+ unsigned i;
+
+ /* Set exit_flag and signal each thread's work_ready semaphore.
+ * Each thread will be woken up, notice that the exit_flag is set and
+ * break out of its main loop. The thread will then exit.
+ */
+ rast->exit_flag = TRUE;
+ for (i = 0; i < rast->num_threads; i++) {
+ pipe_semaphore_signal(&rast->tasks[i].work_ready);
+ }
+
+ /* Wait for threads to terminate before cleaning up per-thread data.
+ * We don't actually call pipe_thread_wait to avoid dead lock on Windows
+ * per https://bugs.freedesktop.org/show_bug.cgi?id=76252 */
+ for (i = 0; i < rast->num_threads; i++) {
+#ifdef _WIN32
+ pipe_semaphore_wait(&rast->tasks[i].work_done);
+#else
+ pipe_thread_wait(rast->threads[i]);
+#endif
+ }
+
+ /* Clean up per-thread data */
+ for (i = 0; i < rast->num_threads; i++) {
+ pipe_semaphore_destroy(&rast->tasks[i].work_ready);
+ pipe_semaphore_destroy(&rast->tasks[i].work_done);
+ }
+
+ /* for synchronizing rasterization threads */
+ pipe_barrier_destroy( &rast->barrier );
+
+ lp_scene_queue_destroy(rast->full_scenes);
+
+ FREE(rast);
+}
+
+
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.h
new file mode 100644
index 000000000..c19f93180
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -0,0 +1,324 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * The rast code is concerned with rasterization of command bins.
+ * Each screen tile has a bin associated with it. To render the
+ * scene we iterate over the tile bins and execute the commands
+ * in each bin.
+ * We'll do that with multiple threads...
+ */
+
+
+#ifndef LP_RAST_H
+#define LP_RAST_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_pack_color.h"
+#include "lp_jit.h"
+
+
+struct lp_rasterizer;
+struct lp_scene;
+struct lp_fence;
+struct cmd_bin;
+
+#define FIXED_TYPE_WIDTH 64
+/** For sub-pixel positioning */
+#define FIXED_ORDER 8
+#define FIXED_ONE (1<<FIXED_ORDER)
+#define FIXED_SHIFT (FIXED_TYPE_WIDTH - 1)
+/** Maximum length of an edge in a primitive in pixels.
+ * If the framebuffer is large we have to think about fixed-point
+ * integer overflow. Coordinates need ((FIXED_TYPE_WIDTH/2) - 1) bits
+ * to be able to fit product of two such coordinates inside
+ * FIXED_TYPE_WIDTH, any larger and we could overflow a
+ * FIXED_TYPE_WIDTH_-bit int.
+ */
+#define MAX_FIXED_LENGTH (1 << (((FIXED_TYPE_WIDTH/2) - 1) - FIXED_ORDER))
+
+#define MAX_FIXED_LENGTH32 (1 << (((32/2) - 1) - FIXED_ORDER))
+
+/* Rasterizer output size going to jit fs, width/height */
+#define LP_RASTER_BLOCK_SIZE 4
+
+#define LP_MAX_ACTIVE_BINNED_QUERIES 64
+
+#define IMUL64(a, b) (((int64_t)(a)) * ((int64_t)(b)))
+
+struct lp_rasterizer_task;
+
+
+/**
+ * Rasterization state.
+ * Objects of this type are put into the shared data bin and pointed
+ * to by commands in the per-tile bins.
+ */
+struct lp_rast_state {
+ /* State for the shader. This also contains state which feeds into
+ * the fragment shader, such as blend color and alpha ref value.
+ */
+ struct lp_jit_context jit_context;
+
+ /* The shader itself. Probably we also need to pass a pointer to
+ * the tile color/z/stencil data somehow
+ */
+ struct lp_fragment_shader_variant *variant;
+};
+
+
+/**
+ * Coefficients necessary to run the shader at a given location.
+ * First coefficient is position.
+ * These pointers point into the bin data buffer.
+ */
+struct lp_rast_shader_inputs {
+ unsigned frontfacing:1; /** True for front-facing */
+ unsigned disable:1; /** Partially binned, disable this command */
+ unsigned opaque:1; /** Is opaque */
+ unsigned pad0:29; /* wasted space */
+ unsigned stride; /* how much to advance data between a0, dadx, dady */
+ unsigned layer; /* the layer to render to (from gs, already clamped) */
+ unsigned viewport_index; /* the active viewport index (from gs, already clamped) */
+ /* followed by a0, dadx, dady and planes[] */
+};
+
+struct lp_rast_plane {
+ /* edge function values at minx,miny ?? */
+ int64_t c;
+
+ int32_t dcdx;
+ int32_t dcdy;
+
+ /* one-pixel sized trivial reject offsets for each plane */
+ int64_t eo;
+};
+
+/**
+ * Rasterization information for a triangle known to be in this bin,
+ * plus inputs to run the shader:
+ * These fields are tile- and bin-independent.
+ * Objects of this type are put into the lp_setup_context::data buffer.
+ */
+struct lp_rast_triangle {
+#ifdef DEBUG
+ float v[3][2];
+ float pad0;
+ float pad1;
+#endif
+
+ /* inputs for the shader */
+ struct lp_rast_shader_inputs inputs;
+ /* planes are also allocated here */
+};
+
+
+struct lp_rast_clear_rb {
+ union util_color color_val;
+ unsigned cbuf;
+};
+
+
+#define GET_A0(inputs) ((float (*)[4])((inputs)+1))
+#define GET_DADX(inputs) ((float (*)[4])((char *)((inputs) + 1) + (inputs)->stride))
+#define GET_DADY(inputs) ((float (*)[4])((char *)((inputs) + 1) + 2 * (inputs)->stride))
+#define GET_PLANES(tri) ((struct lp_rast_plane *)((char *)(&(tri)->inputs + 1) + 3 * (tri)->inputs.stride))
+
+
+
+struct lp_rasterizer *
+lp_rast_create( unsigned num_threads );
+
+void
+lp_rast_destroy( struct lp_rasterizer * );
+
+void
+lp_rast_queue_scene( struct lp_rasterizer *rast,
+ struct lp_scene *scene );
+
+void
+lp_rast_finish( struct lp_rasterizer *rast );
+
+
+union lp_rast_cmd_arg {
+ const struct lp_rast_shader_inputs *shade_tile;
+ struct {
+ const struct lp_rast_triangle *tri;
+ unsigned plane_mask;
+ } triangle;
+ const struct lp_rast_state *set_state;
+ const struct lp_rast_clear_rb *clear_rb;
+ struct {
+ uint64_t value;
+ uint64_t mask;
+ } clear_zstencil;
+ const struct lp_rast_state *state;
+ struct lp_fence *fence;
+ struct llvmpipe_query *query_obj;
+};
+
+
+/* Cast wrappers. Hopefully these compile to noops!
+ */
+static inline union lp_rast_cmd_arg
+lp_rast_arg_inputs( const struct lp_rast_shader_inputs *shade_tile )
+{
+ union lp_rast_cmd_arg arg;
+ arg.shade_tile = shade_tile;
+ return arg;
+}
+
+static inline union lp_rast_cmd_arg
+lp_rast_arg_triangle( const struct lp_rast_triangle *triangle,
+ unsigned plane_mask)
+{
+ union lp_rast_cmd_arg arg;
+ arg.triangle.tri = triangle;
+ arg.triangle.plane_mask = plane_mask;
+ return arg;
+}
+
+/**
+ * Build argument for a contained triangle.
+ *
+ * All planes are enabled, so instead of the plane mask we pass the upper
+ * left coordinates of the a block that fully encloses the triangle.
+ */
+static inline union lp_rast_cmd_arg
+lp_rast_arg_triangle_contained( const struct lp_rast_triangle *triangle,
+ unsigned x, unsigned y)
+{
+ union lp_rast_cmd_arg arg;
+ arg.triangle.tri = triangle;
+ arg.triangle.plane_mask = x | (y << 8);
+ return arg;
+}
+
+static inline union lp_rast_cmd_arg
+lp_rast_arg_state( const struct lp_rast_state *state )
+{
+ union lp_rast_cmd_arg arg;
+ arg.set_state = state;
+ return arg;
+}
+
+static inline union lp_rast_cmd_arg
+lp_rast_arg_fence( struct lp_fence *fence )
+{
+ union lp_rast_cmd_arg arg;
+ arg.fence = fence;
+ return arg;
+}
+
+
+static inline union lp_rast_cmd_arg
+lp_rast_arg_clearzs( uint64_t value, uint64_t mask )
+{
+ union lp_rast_cmd_arg arg;
+ arg.clear_zstencil.value = value;
+ arg.clear_zstencil.mask = mask;
+ return arg;
+}
+
+
+static inline union lp_rast_cmd_arg
+lp_rast_arg_query( struct llvmpipe_query *pq )
+{
+ union lp_rast_cmd_arg arg;
+ arg.query_obj = pq;
+ return arg;
+}
+
+static inline union lp_rast_cmd_arg
+lp_rast_arg_null( void )
+{
+ union lp_rast_cmd_arg arg;
+ arg.set_state = NULL;
+ return arg;
+}
+
+
+/**
+ * Binnable Commands.
+ * These get put into bins by the setup code and are called when
+ * the bins are executed.
+ */
+#define LP_RAST_OP_CLEAR_COLOR 0x0
+#define LP_RAST_OP_CLEAR_ZSTENCIL 0x1
+#define LP_RAST_OP_TRIANGLE_1 0x2
+#define LP_RAST_OP_TRIANGLE_2 0x3
+#define LP_RAST_OP_TRIANGLE_3 0x4
+#define LP_RAST_OP_TRIANGLE_4 0x5
+#define LP_RAST_OP_TRIANGLE_5 0x6
+#define LP_RAST_OP_TRIANGLE_6 0x7
+#define LP_RAST_OP_TRIANGLE_7 0x8
+#define LP_RAST_OP_TRIANGLE_8 0x9
+#define LP_RAST_OP_TRIANGLE_3_4 0xa
+#define LP_RAST_OP_TRIANGLE_3_16 0xb
+#define LP_RAST_OP_TRIANGLE_4_16 0xc
+#define LP_RAST_OP_SHADE_TILE 0xd
+#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xe
+#define LP_RAST_OP_BEGIN_QUERY 0xf
+#define LP_RAST_OP_END_QUERY 0x10
+#define LP_RAST_OP_SET_STATE 0x11
+#define LP_RAST_OP_TRIANGLE_32_1 0x12
+#define LP_RAST_OP_TRIANGLE_32_2 0x13
+#define LP_RAST_OP_TRIANGLE_32_3 0x14
+#define LP_RAST_OP_TRIANGLE_32_4 0x15
+#define LP_RAST_OP_TRIANGLE_32_5 0x16
+#define LP_RAST_OP_TRIANGLE_32_6 0x17
+#define LP_RAST_OP_TRIANGLE_32_7 0x18
+#define LP_RAST_OP_TRIANGLE_32_8 0x19
+#define LP_RAST_OP_TRIANGLE_32_3_4 0x1a
+#define LP_RAST_OP_TRIANGLE_32_3_16 0x1b
+#define LP_RAST_OP_TRIANGLE_32_4_16 0x1c
+
+#define LP_RAST_OP_MAX 0x1d
+#define LP_RAST_OP_MASK 0xff
+
+void
+lp_debug_bins( struct lp_scene *scene );
+void
+lp_debug_draw_bins_by_cmd_length( struct lp_scene *scene );
+void
+lp_debug_draw_bins_by_coverage( struct lp_scene *scene );
+
+
+#ifdef PIPE_ARCH_SSE
+#include <emmintrin.h>
+#include "util/u_sse.h"
+
+static inline __m128i
+lp_plane_to_m128i(const struct lp_rast_plane *plane)
+{
+ return _mm_setr_epi32((int32_t)plane->c, (int32_t)plane->dcdx,
+ (int32_t)plane->dcdy, (int32_t)plane->eo);
+}
+
+#endif
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_debug.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_debug.c
new file mode 100644
index 000000000..b5ae9dadf
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_debug.c
@@ -0,0 +1,438 @@
+#include <inttypes.h> /* for PRIu64 macro */
+#include "util/u_math.h"
+#include "lp_rast_priv.h"
+#include "lp_state_fs.h"
+
+struct tile {
+ int coverage;
+ int overdraw;
+ const struct lp_rast_state *state;
+ char data[TILE_SIZE][TILE_SIZE];
+};
+
+static char get_label( int i )
+{
+ static const char *cmd_labels = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ unsigned max_label = (2*26+10);
+
+ if (i < max_label)
+ return cmd_labels[i];
+ else
+ return '?';
+}
+
+
+
+static const char *cmd_names[LP_RAST_OP_MAX] =
+{
+ "clear_color",
+ "clear_zstencil",
+ "triangle_1",
+ "triangle_2",
+ "triangle_3",
+ "triangle_4",
+ "triangle_5",
+ "triangle_6",
+ "triangle_7",
+ "triangle_8",
+ "triangle_3_4",
+ "triangle_3_16",
+ "triangle_4_16",
+ "shade_tile",
+ "shade_tile_opaque",
+ "begin_query",
+ "end_query",
+ "set_state",
+ "triangle_32_1",
+ "triangle_32_2",
+ "triangle_32_3",
+ "triangle_32_4",
+ "triangle_32_5",
+ "triangle_32_6",
+ "triangle_32_7",
+ "triangle_32_8",
+ "triangle_32_3_4",
+ "triangle_32_3_16",
+ "triangle_32_4_16",
+};
+
+static const char *cmd_name(unsigned cmd)
+{
+ assert(Elements(cmd_names) > cmd);
+ return cmd_names[cmd];
+}
+
+static const struct lp_fragment_shader_variant *
+get_variant( const struct lp_rast_state *state,
+ const struct cmd_block *block,
+ int k )
+{
+ if (!state)
+ return NULL;
+
+ if (block->cmd[k] == LP_RAST_OP_SHADE_TILE ||
+ block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_1 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_2 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_3 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_4 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_5 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_6 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_7)
+ return state->variant;
+
+ return NULL;
+}
+
+
+static boolean
+is_blend( const struct lp_rast_state *state,
+ const struct cmd_block *block,
+ int k )
+{
+ const struct lp_fragment_shader_variant *variant = get_variant(state, block, k);
+
+ if (variant)
+ return variant->key.blend.rt[0].blend_enable;
+
+ return FALSE;
+}
+
+
+
+static void
+debug_bin( const struct cmd_bin *bin, int x, int y )
+{
+ const struct lp_rast_state *state = NULL;
+ const struct cmd_block *head = bin->head;
+ int i, j = 0;
+
+ debug_printf("bin %d,%d:\n", x, y);
+
+ while (head) {
+ for (i = 0; i < head->count; i++, j++) {
+ if (head->cmd[i] == LP_RAST_OP_SET_STATE)
+ state = head->arg[i].state;
+
+ debug_printf("%d: %s %s\n", j,
+ cmd_name(head->cmd[i]),
+ is_blend(state, head, i) ? "blended" : "");
+ }
+ head = head->next;
+ }
+}
+
+
+static void plot(struct tile *tile,
+ int x, int y,
+ char val,
+ boolean blend)
+{
+ if (tile->data[x][y] == ' ')
+ tile->coverage++;
+ else
+ tile->overdraw++;
+
+ tile->data[x][y] = val;
+}
+
+
+
+
+
+
+static int
+debug_shade_tile(int x, int y,
+ const union lp_rast_cmd_arg arg,
+ struct tile *tile,
+ char val)
+{
+ const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
+ boolean blend;
+ unsigned i,j;
+
+ if (!tile->state)
+ return 0;
+
+ blend = tile->state->variant->key.blend.rt[0].blend_enable;
+
+ if (inputs->disable)
+ return 0;
+
+ for (i = 0; i < TILE_SIZE; i++)
+ for (j = 0; j < TILE_SIZE; j++)
+ plot(tile, i, j, val, blend);
+
+ return TILE_SIZE * TILE_SIZE;
+}
+
+static int
+debug_clear_tile(int x, int y,
+ const union lp_rast_cmd_arg arg,
+ struct tile *tile,
+ char val)
+{
+ unsigned i,j;
+
+ for (i = 0; i < TILE_SIZE; i++)
+ for (j = 0; j < TILE_SIZE; j++)
+ plot(tile, i, j, val, FALSE);
+
+ return TILE_SIZE * TILE_SIZE;
+
+}
+
+
+static int
+debug_triangle(int tilex, int tiley,
+ const union lp_rast_cmd_arg arg,
+ struct tile *tile,
+ char val)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ unsigned plane_mask = arg.triangle.plane_mask;
+ const struct lp_rast_plane *tri_plane = GET_PLANES(tri);
+ struct lp_rast_plane plane[8];
+ int x, y;
+ int count = 0;
+ unsigned i, nr_planes = 0;
+ boolean blend = tile->state->variant->key.blend.rt[0].blend_enable;
+
+ if (tri->inputs.disable) {
+ /* This triangle was partially binned and has been disabled */
+ return 0;
+ }
+
+ while (plane_mask) {
+ plane[nr_planes] = tri_plane[u_bit_scan(&plane_mask)];
+ plane[nr_planes].c = (plane[nr_planes].c +
+ IMUL64(plane[nr_planes].dcdy, tiley) -
+ IMUL64(plane[nr_planes].dcdx, tilex));
+ nr_planes++;
+ }
+
+ for(y = 0; y < TILE_SIZE; y++)
+ {
+ for(x = 0; x < TILE_SIZE; x++)
+ {
+ for (i = 0; i < nr_planes; i++)
+ if (plane[i].c <= 0)
+ goto out;
+
+ plot(tile, x, y, val, blend);
+ count++;
+
+ out:
+ for (i = 0; i < nr_planes; i++)
+ plane[i].c -= plane[i].dcdx;
+ }
+
+ for (i = 0; i < nr_planes; i++) {
+ plane[i].c += IMUL64(plane[i].dcdx, TILE_SIZE);
+ plane[i].c += plane[i].dcdy;
+ }
+ }
+ return count;
+}
+
+
+
+
+
+static void
+do_debug_bin( struct tile *tile,
+ const struct cmd_bin *bin,
+ int x, int y,
+ boolean print_cmds)
+{
+ unsigned k, j = 0;
+ const struct cmd_block *block;
+
+ int tx = x * TILE_SIZE;
+ int ty = y * TILE_SIZE;
+
+ memset(tile->data, ' ', sizeof tile->data);
+ tile->coverage = 0;
+ tile->overdraw = 0;
+ tile->state = NULL;
+
+ for (block = bin->head; block; block = block->next) {
+ for (k = 0; k < block->count; k++, j++) {
+ boolean blend = is_blend(tile->state, block, k);
+ char val = get_label(j);
+ int count = 0;
+
+ if (print_cmds)
+ debug_printf("%c: %15s", val, cmd_name(block->cmd[k]));
+
+ if (block->cmd[k] == LP_RAST_OP_SET_STATE)
+ tile->state = block->arg[k].state;
+
+ if (block->cmd[k] == LP_RAST_OP_CLEAR_COLOR ||
+ block->cmd[k] == LP_RAST_OP_CLEAR_ZSTENCIL)
+ count = debug_clear_tile(tx, ty, block->arg[k], tile, val);
+
+ if (block->cmd[k] == LP_RAST_OP_SHADE_TILE ||
+ block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE)
+ count = debug_shade_tile(tx, ty, block->arg[k], tile, val);
+
+ if (block->cmd[k] == LP_RAST_OP_TRIANGLE_1 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_2 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_3 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_4 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_5 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_6 ||
+ block->cmd[k] == LP_RAST_OP_TRIANGLE_7)
+ count = debug_triangle(tx, ty, block->arg[k], tile, val);
+
+ if (print_cmds) {
+ debug_printf(" % 5d", count);
+
+ if (blend)
+ debug_printf(" blended");
+
+ debug_printf("\n");
+ }
+ }
+ }
+}
+
+void
+lp_debug_bin( const struct cmd_bin *bin, int i, int j)
+{
+ struct tile tile;
+ int x,y;
+
+ if (bin->head) {
+ do_debug_bin(&tile, bin, i, j, TRUE);
+
+ debug_printf("------------------------------------------------------------------\n");
+ for (y = 0; y < TILE_SIZE; y++) {
+ for (x = 0; x < TILE_SIZE; x++) {
+ debug_printf("%c", tile.data[y][x]);
+ }
+ debug_printf("|\n");
+ }
+ debug_printf("------------------------------------------------------------------\n");
+
+ debug_printf("each pixel drawn avg %f times\n",
+ ((float)tile.overdraw + tile.coverage)/(float)tile.coverage);
+ }
+}
+
+
+
+
+
+
+/** Return number of bytes used for a single bin */
+static unsigned
+lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y )
+{
+ struct cmd_bin *bin = lp_scene_get_bin((struct lp_scene *) scene, x, y);
+ const struct cmd_block *cmd;
+ unsigned size = 0;
+ for (cmd = bin->head; cmd; cmd = cmd->next) {
+ size += (cmd->count *
+ (sizeof(uint8_t) + sizeof(union lp_rast_cmd_arg)));
+ }
+ return size;
+}
+
+
+
+void
+lp_debug_draw_bins_by_coverage( struct lp_scene *scene )
+{
+ unsigned x, y;
+ unsigned total = 0;
+ unsigned possible = 0;
+ static uint64_t _total = 0;
+ static uint64_t _possible = 0;
+
+ for (x = 0; x < scene->tiles_x; x++)
+ debug_printf("-");
+ debug_printf("\n");
+
+ for (y = 0; y < scene->tiles_y; y++) {
+ for (x = 0; x < scene->tiles_x; x++) {
+ struct cmd_bin *bin = lp_scene_get_bin(scene, x, y);
+ const char *bits = "0123456789";
+ struct tile tile;
+
+ if (bin->head) {
+ //lp_debug_bin(bin, x, y);
+
+ do_debug_bin(&tile, bin, x, y, FALSE);
+
+ total += tile.coverage;
+ possible += 64*64;
+
+ if (tile.coverage == 64*64)
+ debug_printf("*");
+ else if (tile.coverage) {
+ int bit = tile.coverage/(64.0*64.0)*10;
+ debug_printf("%c", bits[MIN2(bit,10)]);
+ }
+ else
+ debug_printf("?");
+ }
+ else {
+ debug_printf(" ");
+ }
+ }
+ debug_printf("|\n");
+ }
+
+ for (x = 0; x < scene->tiles_x; x++)
+ debug_printf("-");
+ debug_printf("\n");
+
+ debug_printf("this tile total: %u possible %u: percentage: %f\n",
+ total,
+ possible,
+ total * 100.0 / (float)possible);
+
+ _total += total;
+ _possible += possible;
+
+
+ debug_printf("overall total: %" PRIu64
+ " possible %" PRIu64 ": percentage: %f\n",
+ _total,
+ _possible,
+ (double) _total * 100.0 / (double)_possible);
+}
+
+
+void
+lp_debug_draw_bins_by_cmd_length( struct lp_scene *scene )
+{
+ unsigned x, y;
+
+ for (y = 0; y < scene->tiles_y; y++) {
+ for (x = 0; x < scene->tiles_x; x++) {
+ const char *bits = " ...,-~:;=o+xaw*#XAWWWWWWWWWWWWWWWW";
+ unsigned sz = lp_scene_bin_size(scene, x, y);
+ unsigned sz2 = util_logbase2(sz);
+ debug_printf("%c", bits[MIN2(sz2,32)]);
+ }
+ debug_printf("\n");
+ }
+}
+
+
+void
+lp_debug_bins( struct lp_scene *scene )
+{
+ unsigned x, y;
+
+ for (y = 0; y < scene->tiles_y; y++) {
+ for (x = 0; x < scene->tiles_x; x++) {
+ struct cmd_bin *bin = lp_scene_get_bin(scene, x, y);
+ if (bin->head) {
+ debug_bin(bin, x, y);
+ }
+ }
+ }
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h
new file mode 100644
index 000000000..9aa7e8746
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -0,0 +1,347 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_RAST_PRIV_H
+#define LP_RAST_PRIV_H
+
+#include "os/os_thread.h"
+#include "util/u_format.h"
+#include "gallivm/lp_bld_debug.h"
+#include "lp_memory.h"
+#include "lp_rast.h"
+#include "lp_scene.h"
+#include "lp_state.h"
+#include "lp_texture.h"
+#include "lp_limits.h"
+
+
+#define TILE_VECTOR_HEIGHT 4
+#define TILE_VECTOR_WIDTH 4
+
+/* If we crash in a jitted function, we can examine jit_line and jit_state
+ * to get some info. This is not thread-safe, however.
+ */
+#ifdef DEBUG
+
+struct lp_rasterizer_task;
+extern int jit_line;
+extern const struct lp_rast_state *jit_state;
+extern const struct lp_rasterizer_task *jit_task;
+
+#define BEGIN_JIT_CALL(state, task) \
+ do { \
+ jit_line = __LINE__; \
+ jit_state = state; \
+ jit_task = task; \
+ } while (0)
+
+#define END_JIT_CALL() \
+ do { \
+ jit_line = 0; \
+ jit_state = NULL; \
+ } while (0)
+
+#else
+
+#define BEGIN_JIT_CALL(X, Y)
+#define END_JIT_CALL()
+
+#endif
+
+
+struct lp_rasterizer;
+struct cmd_bin;
+
+/**
+ * Per-thread rasterization state
+ */
+struct lp_rasterizer_task
+{
+ const struct cmd_bin *bin;
+ const struct lp_rast_state *state;
+
+ struct lp_scene *scene;
+ unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */
+ unsigned width, height; /**< width, height of current tile, in pixels */
+
+ uint8_t *color_tiles[PIPE_MAX_COLOR_BUFS];
+ uint8_t *depth_tile;
+
+ /** "back" pointer */
+ struct lp_rasterizer *rast;
+
+ /** "my" index */
+ unsigned thread_index;
+
+ /** Non-interpolated passthru state and occlude counter for visible pixels */
+ struct lp_jit_thread_data thread_data;
+ uint64_t ps_invocations;
+ uint8_t ps_inv_multiplier;
+
+ pipe_semaphore work_ready;
+ pipe_semaphore work_done;
+};
+
+
+/**
+ * This is the state required while rasterizing tiles.
+ * Note that this contains per-thread information too.
+ * The tile size is TILE_SIZE x TILE_SIZE pixels.
+ */
+struct lp_rasterizer
+{
+ boolean exit_flag;
+ boolean no_rast; /**< For debugging/profiling */
+
+ /** The incoming queue of scenes ready to rasterize */
+ struct lp_scene_queue *full_scenes;
+
+ /** The scene currently being rasterized by the threads */
+ struct lp_scene *curr_scene;
+
+ /** A task object for each rasterization thread */
+ struct lp_rasterizer_task tasks[LP_MAX_THREADS];
+
+ unsigned num_threads;
+ pipe_thread threads[LP_MAX_THREADS];
+
+ /** For synchronizing the rasterization threads */
+ pipe_barrier barrier;
+};
+
+
+void
+lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
+ const struct lp_rast_shader_inputs *inputs,
+ unsigned x, unsigned y,
+ unsigned mask);
+
+
+/**
+ * Get the pointer to a 4x4 color block (within a 64x64 tile).
+ * \param x, y location of 4x4 block in window coords
+ */
+static inline uint8_t *
+lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
+ unsigned buf, unsigned x, unsigned y,
+ unsigned layer)
+{
+ unsigned px, py, pixel_offset;
+ uint8_t *color;
+
+ assert(x < task->scene->tiles_x * TILE_SIZE);
+ assert(y < task->scene->tiles_y * TILE_SIZE);
+ assert((x % TILE_VECTOR_WIDTH) == 0);
+ assert((y % TILE_VECTOR_HEIGHT) == 0);
+ assert(buf < task->scene->fb.nr_cbufs);
+
+ assert(task->color_tiles[buf]);
+
+ /*
+ * We don't actually benefit from having per tile cbuf/zsbuf pointers,
+ * it's just extra work - the mul/add would be exactly the same anyway.
+ * Fortunately the extra work (modulo) here is very cheap at least...
+ */
+ px = x % TILE_SIZE;
+ py = y % TILE_SIZE;
+
+ pixel_offset = px * task->scene->cbufs[buf].format_bytes +
+ py * task->scene->cbufs[buf].stride;
+ color = task->color_tiles[buf] + pixel_offset;
+
+ if (layer) {
+ color += layer * task->scene->cbufs[buf].layer_stride;
+ }
+
+ assert(lp_check_alignment(color, llvmpipe_get_format_alignment(task->scene->fb.cbufs[buf]->format)));
+ return color;
+}
+
+
+/**
+ * Get the pointer to a 4x4 depth block (within a 64x64 tile).
+ * \param x, y location of 4x4 block in window coords
+ */
+static inline uint8_t *
+lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
+ unsigned x, unsigned y, unsigned layer)
+{
+ unsigned px, py, pixel_offset;
+ uint8_t *depth;
+
+ assert(x < task->scene->tiles_x * TILE_SIZE);
+ assert(y < task->scene->tiles_y * TILE_SIZE);
+ assert((x % TILE_VECTOR_WIDTH) == 0);
+ assert((y % TILE_VECTOR_HEIGHT) == 0);
+
+ assert(task->depth_tile);
+
+ px = x % TILE_SIZE;
+ py = y % TILE_SIZE;
+
+ pixel_offset = px * task->scene->zsbuf.format_bytes +
+ py * task->scene->zsbuf.stride;
+ depth = task->depth_tile + pixel_offset;
+
+ if (layer) {
+ depth += layer * task->scene->zsbuf.layer_stride;
+ }
+
+ assert(lp_check_alignment(depth, llvmpipe_get_format_alignment(task->scene->fb.zsbuf->format)));
+ return depth;
+}
+
+
+
+/**
+ * Shade all pixels in a 4x4 block. The fragment code omits the
+ * triangle in/out tests.
+ * \param x, y location of 4x4 block in window coords
+ */
+static inline void
+lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
+ const struct lp_rast_shader_inputs *inputs,
+ unsigned x, unsigned y )
+{
+ const struct lp_scene *scene = task->scene;
+ const struct lp_rast_state *state = task->state;
+ struct lp_fragment_shader_variant *variant = state->variant;
+ uint8_t *color[PIPE_MAX_COLOR_BUFS];
+ unsigned stride[PIPE_MAX_COLOR_BUFS];
+ uint8_t *depth = NULL;
+ unsigned depth_stride = 0;
+ unsigned i;
+
+ /* color buffer */
+ for (i = 0; i < scene->fb.nr_cbufs; i++) {
+ if (scene->fb.cbufs[i]) {
+ stride[i] = scene->cbufs[i].stride;
+ color[i] = lp_rast_get_color_block_pointer(task, i, x, y,
+ inputs->layer);
+ }
+ else {
+ stride[i] = 0;
+ color[i] = NULL;
+ }
+ }
+
+ if (scene->zsbuf.map) {
+ depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer);
+ depth_stride = scene->zsbuf.stride;
+ }
+
+ /*
+ * The rasterizer may produce fragments outside our
+ * allocated 4x4 blocks hence need to filter them out here.
+ */
+ if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
+ /* not very accurate would need a popcount on the mask */
+ /* always count this not worth bothering? */
+ task->ps_invocations += 1 * variant->ps_inv_multiplier;
+
+ /* Propagate non-interpolated raster state. */
+ task->thread_data.raster_state.viewport_index = inputs->viewport_index;
+
+ /* run shader on 4x4 block */
+ BEGIN_JIT_CALL(state, task);
+ variant->jit_function[RAST_WHOLE]( &state->jit_context,
+ x, y,
+ inputs->frontfacing,
+ GET_A0(inputs),
+ GET_DADX(inputs),
+ GET_DADY(inputs),
+ color,
+ depth,
+ 0xffff,
+ &task->thread_data,
+ stride,
+ depth_stride);
+ END_JIT_CALL();
+ }
+}
+
+void lp_rast_triangle_1( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_2( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_3( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_4( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_5( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_6( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_7( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_8( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_3_4(struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_3_16( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_4_16( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+
+void lp_rast_triangle_32_1( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_32_2( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_32_3( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_32_4( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_32_5( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_32_6( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_32_7( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+void lp_rast_triangle_32_8( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_32_3_4(struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_32_3_16( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+void lp_rast_triangle_32_4_16( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+void
+lp_rast_set_state(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg);
+
+void
+lp_debug_bin( const struct cmd_bin *bin, int x, int y );
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_tri.c
new file mode 100644
index 000000000..c9b9221d8
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -0,0 +1,558 @@
+/**************************************************************************
+ *
+ * Copyright 2007-2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Rasterization for binned triangles within a tile
+ */
+
+#include <limits.h>
+#include "util/u_math.h"
+#include "lp_debug.h"
+#include "lp_perf.h"
+#include "lp_rast_priv.h"
+
+/**
+ * Shade all pixels in a 4x4 block.
+ */
+static void
+block_full_4(struct lp_rasterizer_task *task,
+ const struct lp_rast_triangle *tri,
+ int x, int y)
+{
+ lp_rast_shade_quads_all(task, &tri->inputs, x, y);
+}
+
+
+/**
+ * Shade all pixels in a 16x16 block.
+ */
+static void
+block_full_16(struct lp_rasterizer_task *task,
+ const struct lp_rast_triangle *tri,
+ int x, int y)
+{
+ unsigned ix, iy;
+ assert(x % 16 == 0);
+ assert(y % 16 == 0);
+ for (iy = 0; iy < 16; iy += 4)
+ for (ix = 0; ix < 16; ix += 4)
+ block_full_4(task, tri, x + ix, y + iy);
+}
+
+static inline unsigned
+build_mask_linear(int64_t c, int64_t dcdx, int64_t dcdy)
+{
+ unsigned mask = 0;
+
+ int64_t c0 = c;
+ int64_t c1 = c0 + dcdy;
+ int64_t c2 = c1 + dcdy;
+ int64_t c3 = c2 + dcdy;
+
+ mask |= ((c0 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 0);
+ mask |= ((c0 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 1);
+ mask |= ((c0 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 2);
+ mask |= ((c0 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 3);
+ mask |= ((c1 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 4);
+ mask |= ((c1 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 5);
+ mask |= ((c1 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 6);
+ mask |= ((c1 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 7);
+ mask |= ((c2 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 8);
+ mask |= ((c2 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 9);
+ mask |= ((c2 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 10);
+ mask |= ((c2 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 11);
+ mask |= ((c3 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 12);
+ mask |= ((c3 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 13);
+ mask |= ((c3 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 14);
+ mask |= ((c3 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 15);
+
+ return mask;
+}
+
+
+static inline void
+build_masks(int64_t c,
+ int64_t cdiff,
+ int64_t dcdx,
+ int64_t dcdy,
+ unsigned *outmask,
+ unsigned *partmask)
+{
+ *outmask |= build_mask_linear(c, dcdx, dcdy);
+ *partmask |= build_mask_linear(c + cdiff, dcdx, dcdy);
+}
+
+void
+lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ union lp_rast_cmd_arg arg2;
+ arg2.triangle.tri = arg.triangle.tri;
+ arg2.triangle.plane_mask = (1<<3)-1;
+ lp_rast_triangle_3(task, arg2);
+}
+
+void
+lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ lp_rast_triangle_3_16(task, arg);
+}
+
+void
+lp_rast_triangle_4_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ union lp_rast_cmd_arg arg2;
+ arg2.triangle.tri = arg.triangle.tri;
+ arg2.triangle.plane_mask = (1<<4)-1;
+ lp_rast_triangle_4(task, arg2);
+}
+
+#if !defined(PIPE_ARCH_SSE)
+
+void
+lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ union lp_rast_cmd_arg arg2;
+ arg2.triangle.tri = arg.triangle.tri;
+ arg2.triangle.plane_mask = (1<<3)-1;
+ lp_rast_triangle_32_3(task, arg2);
+}
+
+void
+lp_rast_triangle_32_4_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ union lp_rast_cmd_arg arg2;
+ arg2.triangle.tri = arg.triangle.tri;
+ arg2.triangle.plane_mask = (1<<4)-1;
+ lp_rast_triangle_32_4(task, arg2);
+}
+
+void
+lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ lp_rast_triangle_32_3_16(task, arg);
+}
+
+#else
+#include <emmintrin.h>
+#include "util/u_sse.h"
+
+
+static inline void
+build_masks_32(int c,
+ int cdiff,
+ int dcdx,
+ int dcdy,
+ unsigned *outmask,
+ unsigned *partmask)
+{
+ __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
+ __m128i xdcdy = _mm_set1_epi32(dcdy);
+
+ /* Get values across the quad
+ */
+ __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
+ __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
+ __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
+
+ {
+ __m128i cstep01, cstep23, result;
+
+ cstep01 = _mm_packs_epi32(cstep0, cstep1);
+ cstep23 = _mm_packs_epi32(cstep2, cstep3);
+ result = _mm_packs_epi16(cstep01, cstep23);
+
+ *outmask |= _mm_movemask_epi8(result);
+ }
+
+
+ {
+ __m128i cio4 = _mm_set1_epi32(cdiff);
+ __m128i cstep01, cstep23, result;
+
+ cstep0 = _mm_add_epi32(cstep0, cio4);
+ cstep1 = _mm_add_epi32(cstep1, cio4);
+ cstep2 = _mm_add_epi32(cstep2, cio4);
+ cstep3 = _mm_add_epi32(cstep3, cio4);
+
+ cstep01 = _mm_packs_epi32(cstep0, cstep1);
+ cstep23 = _mm_packs_epi32(cstep2, cstep3);
+ result = _mm_packs_epi16(cstep01, cstep23);
+
+ *partmask |= _mm_movemask_epi8(result);
+ }
+}
+
+
+static inline unsigned
+build_mask_linear_32(int c, int dcdx, int dcdy)
+{
+ __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
+ __m128i xdcdy = _mm_set1_epi32(dcdy);
+
+ /* Get values across the quad
+ */
+ __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
+ __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
+ __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
+
+ /* pack pairs of results into epi16
+ */
+ __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
+ __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
+
+ /* pack into epi8, preserving sign bits
+ */
+ __m128i result = _mm_packs_epi16(cstep01, cstep23);
+
+ /* extract sign bits to create mask
+ */
+ return _mm_movemask_epi8(result);
+}
+
+static inline unsigned
+sign_bits4(const __m128i *cstep, int cdiff)
+{
+
+ /* Adjust the step values
+ */
+ __m128i cio4 = _mm_set1_epi32(cdiff);
+ __m128i cstep0 = _mm_add_epi32(cstep[0], cio4);
+ __m128i cstep1 = _mm_add_epi32(cstep[1], cio4);
+ __m128i cstep2 = _mm_add_epi32(cstep[2], cio4);
+ __m128i cstep3 = _mm_add_epi32(cstep[3], cio4);
+
+ /* Pack down to epi8
+ */
+ __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
+ __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
+ __m128i result = _mm_packs_epi16(cstep01, cstep23);
+
+ /* Extract the sign bits
+ */
+ return _mm_movemask_epi8(result);
+}
+
+
+#define NR_PLANES 3
+
+
+
+
+
+
+
+void
+lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ const struct lp_rast_plane *plane = GET_PLANES(tri);
+ int x = (arg.triangle.plane_mask & 0xff) + task->x;
+ int y = (arg.triangle.plane_mask >> 8) + task->y;
+ unsigned i, j;
+
+ struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16];
+ unsigned nr = 0;
+
+ __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */
+ __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */
+ __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */
+ __m128i zero = _mm_setzero_si128();
+
+ __m128i c;
+ __m128i dcdx;
+ __m128i dcdy;
+ __m128i rej4;
+
+ __m128i dcdx2;
+ __m128i dcdx3;
+
+ __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */
+ __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */
+ __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */
+ __m128i unused;
+
+ transpose4_epi32(&p0, &p1, &p2, &zero,
+ &c, &dcdx, &dcdy, &rej4);
+
+ /* Adjust dcdx;
+ */
+ dcdx = _mm_sub_epi32(zero, dcdx);
+
+ c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x)));
+ c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y)));
+ rej4 = _mm_slli_epi32(rej4, 2);
+
+ /* Adjust so we can just check the sign bit (< 0 comparison), instead of having to do a less efficient <= 0 comparison */
+ c = _mm_sub_epi32(c, _mm_set1_epi32(1));
+ rej4 = _mm_add_epi32(rej4, _mm_set1_epi32(1));
+
+ dcdx2 = _mm_add_epi32(dcdx, dcdx);
+ dcdx3 = _mm_add_epi32(dcdx2, dcdx);
+
+ transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3,
+ &span_0, &span_1, &span_2, &unused);
+
+ for (i = 0; i < 4; i++) {
+ __m128i cx = c;
+
+ for (j = 0; j < 4; j++) {
+ __m128i c4rej = _mm_add_epi32(cx, rej4);
+ __m128i rej_masks = _mm_srai_epi32(c4rej, 31);
+
+ /* if (is_zero(rej_masks)) */
+ if (_mm_movemask_epi8(rej_masks) == 0) {
+ __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(cx, 0), span_0);
+ __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(cx, 1), span_1);
+ __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(cx, 2), span_2);
+
+ __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0);
+
+ __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0));
+ __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1));
+ __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2));
+
+ __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1);
+ __m128i c_01 = _mm_packs_epi32(c_0, c_1);
+
+ __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0));
+ __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1));
+ __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2));
+
+ __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2);
+
+ __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0));
+ __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1));
+ __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2));
+
+ __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3);
+ __m128i c_23 = _mm_packs_epi32(c_2, c_3);
+ __m128i c_0123 = _mm_packs_epi16(c_01, c_23);
+
+ unsigned mask = _mm_movemask_epi8(c_0123);
+
+ out[nr].i = i;
+ out[nr].j = j;
+ out[nr].mask = mask;
+ if (mask != 0xffff)
+ nr++;
+ }
+ cx = _mm_add_epi32(cx, _mm_slli_epi32(dcdx, 2));
+ }
+
+ c = _mm_add_epi32(c, _mm_slli_epi32(dcdy, 2));
+ }
+
+ for (i = 0; i < nr; i++)
+ lp_rast_shade_quads_mask(task,
+ &tri->inputs,
+ x + 4 * out[i].j,
+ y + 4 * out[i].i,
+ 0xffff & ~out[i].mask);
+}
+
+
+
+
+
+void
+lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ const struct lp_rast_plane *plane = GET_PLANES(tri);
+ unsigned x = (arg.triangle.plane_mask & 0xff) + task->x;
+ unsigned y = (arg.triangle.plane_mask >> 8) + task->y;
+
+ __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */
+ __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */
+ __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */
+ __m128i zero = _mm_setzero_si128();
+
+ __m128i c;
+ __m128i dcdx;
+ __m128i dcdy;
+
+ __m128i dcdx2;
+ __m128i dcdx3;
+
+ __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */
+ __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */
+ __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */
+ __m128i unused;
+
+ transpose4_epi32(&p0, &p1, &p2, &zero,
+ &c, &dcdx, &dcdy, &unused);
+
+ /* Adjust dcdx;
+ */
+ dcdx = _mm_sub_epi32(zero, dcdx);
+
+ c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x)));
+ c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y)));
+
+ /* Adjust so we can just check the sign bit (< 0 comparison), instead of having to do a less efficient <= 0 comparison */
+ c = _mm_sub_epi32(c, _mm_set1_epi32(1));
+
+ dcdx2 = _mm_add_epi32(dcdx, dcdx);
+ dcdx3 = _mm_add_epi32(dcdx2, dcdx);
+
+ transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3,
+ &span_0, &span_1, &span_2, &unused);
+
+
+ {
+ __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(c, 0), span_0);
+ __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(c, 1), span_1);
+ __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(c, 2), span_2);
+
+ __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0);
+
+ __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0));
+ __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1));
+ __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2));
+
+ __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1);
+ __m128i c_01 = _mm_packs_epi32(c_0, c_1);
+
+ __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0));
+ __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1));
+ __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2));
+
+ __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2);
+
+ __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0));
+ __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1));
+ __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2));
+
+ __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3);
+ __m128i c_23 = _mm_packs_epi32(c_2, c_3);
+ __m128i c_0123 = _mm_packs_epi16(c_01, c_23);
+
+ unsigned mask = _mm_movemask_epi8(c_0123);
+
+ if (mask != 0xffff)
+ lp_rast_shade_quads_mask(task,
+ &tri->inputs,
+ x,
+ y,
+ 0xffff & ~mask);
+ }
+}
+
+#undef NR_PLANES
+#endif
+
+
+#define BUILD_MASKS(c, cdiff, dcdx, dcdy, omask, pmask) build_masks(c, cdiff, dcdx, dcdy, omask, pmask)
+#define BUILD_MASK_LINEAR(c, dcdx, dcdy) build_mask_linear(c, dcdx, dcdy)
+
+#define TAG(x) x##_1
+#define NR_PLANES 1
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_2
+#define NR_PLANES 2
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_3
+#define NR_PLANES 3
+/*#define TRI_4 lp_rast_triangle_3_4*/
+/*#define TRI_16 lp_rast_triangle_3_16*/
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_4
+#define NR_PLANES 4
+/*#define TRI_16 lp_rast_triangle_4_16*/
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_5
+#define NR_PLANES 5
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_6
+#define NR_PLANES 6
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_7
+#define NR_PLANES 7
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_8
+#define NR_PLANES 8
+#include "lp_rast_tri_tmp.h"
+
+#ifdef PIPE_ARCH_SSE
+#undef BUILD_MASKS
+#undef BUILD_MASK_LINEAR
+#define BUILD_MASKS(c, cdiff, dcdx, dcdy, omask, pmask) build_masks_32((int)c, (int)cdiff, dcdx, dcdy, omask, pmask)
+#define BUILD_MASK_LINEAR(c, dcdx, dcdy) build_mask_linear_32((int)c, dcdx, dcdy)
+#endif
+
+#define TAG(x) x##_32_1
+#define NR_PLANES 1
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_32_2
+#define NR_PLANES 2
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_32_3
+#define NR_PLANES 3
+/*#define TRI_4 lp_rast_triangle_3_4*/
+/*#define TRI_16 lp_rast_triangle_3_16*/
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_32_4
+#define NR_PLANES 4
+#ifdef PIPE_ARCH_SSE
+#define TRI_16 lp_rast_triangle_32_4_16
+#endif
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_32_5
+#define NR_PLANES 5
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_32_6
+#define NR_PLANES 6
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_32_7
+#define NR_PLANES 7
+#include "lp_rast_tri_tmp.h"
+
+#define TAG(x) x##_32_8
+#define NR_PLANES 8
+#include "lp_rast_tri_tmp.h"
+
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
new file mode 100644
index 000000000..52f6e9996
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
@@ -0,0 +1,380 @@
+/**************************************************************************
+ *
+ * Copyright 2007-2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Rasterization for binned triangles within a tile
+ */
+
+
+
+/**
+ * Prototype for a 8 plane rasterizer function. Will codegenerate
+ * several of these.
+ *
+ * XXX: Varients for more/fewer planes.
+ * XXX: Need ways of dropping planes as we descend.
+ * XXX: SIMD
+ */
+static void
+TAG(do_block_4)(struct lp_rasterizer_task *task,
+ const struct lp_rast_triangle *tri,
+ const struct lp_rast_plane *plane,
+ int x, int y,
+ const int64_t *c)
+{
+ unsigned mask = 0xffff;
+ int j;
+
+ for (j = 0; j < NR_PLANES; j++) {
+ mask &= ~BUILD_MASK_LINEAR(c[j] - 1,
+ -plane[j].dcdx,
+ plane[j].dcdy);
+ }
+
+ /* Now pass to the shader:
+ */
+ if (mask)
+ lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask);
+}
+
+/**
+ * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out
+ * of the triangle's bounds.
+ */
+static void
+TAG(do_block_16)(struct lp_rasterizer_task *task,
+ const struct lp_rast_triangle *tri,
+ const struct lp_rast_plane *plane,
+ int x, int y,
+ const int64_t *c)
+{
+ unsigned outmask, inmask, partmask, partial_mask;
+ unsigned j;
+
+ outmask = 0; /* outside one or more trivial reject planes */
+ partmask = 0; /* outside one or more trivial accept planes */
+
+ for (j = 0; j < NR_PLANES; j++) {
+ const int64_t dcdx = -IMUL64(plane[j].dcdx, 4);
+ const int64_t dcdy = IMUL64(plane[j].dcdy, 4);
+ const int64_t cox = IMUL64(plane[j].eo, 4);
+ const int64_t ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo;
+ const int64_t cio = IMUL64(ei, 4) - 1;
+
+ BUILD_MASKS(c[j] + cox,
+ cio - cox,
+ dcdx, dcdy,
+ &outmask, /* sign bits from c[i][0..15] + cox */
+ &partmask); /* sign bits from c[i][0..15] + cio */
+ }
+
+ if (outmask == 0xffff)
+ return;
+
+ /* Mask of sub-blocks which are inside all trivial accept planes:
+ */
+ inmask = ~partmask & 0xffff;
+
+ /* Mask of sub-blocks which are inside all trivial reject planes,
+ * but outside at least one trivial accept plane:
+ */
+ partial_mask = partmask & ~outmask;
+
+ assert((partial_mask & inmask) == 0);
+
+ LP_COUNT_ADD(nr_empty_4, util_bitcount(0xffff & ~(partial_mask | inmask)));
+
+ /* Iterate over partials:
+ */
+ while (partial_mask) {
+ int i = ffs(partial_mask) - 1;
+ int ix = (i & 3) * 4;
+ int iy = (i >> 2) * 4;
+ int px = x + ix;
+ int py = y + iy;
+ int64_t cx[NR_PLANES];
+
+ partial_mask &= ~(1 << i);
+
+ LP_COUNT(nr_partially_covered_4);
+
+ for (j = 0; j < NR_PLANES; j++)
+ cx[j] = (c[j]
+ - IMUL64(plane[j].dcdx, ix)
+ + IMUL64(plane[j].dcdy, iy));
+
+ TAG(do_block_4)(task, tri, plane, px, py, cx);
+ }
+
+ /* Iterate over fulls:
+ */
+ while (inmask) {
+ int i = ffs(inmask) - 1;
+ int ix = (i & 3) * 4;
+ int iy = (i >> 2) * 4;
+ int px = x + ix;
+ int py = y + iy;
+
+ inmask &= ~(1 << i);
+
+ LP_COUNT(nr_fully_covered_4);
+ block_full_4(task, tri, px, py);
+ }
+}
+
+
+/**
+ * Scan the tile in chunks and figure out which pixels to rasterize
+ * for this triangle.
+ */
+void
+TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ unsigned plane_mask = arg.triangle.plane_mask;
+ const struct lp_rast_plane *tri_plane = GET_PLANES(tri);
+ const int x = task->x, y = task->y;
+ struct lp_rast_plane plane[NR_PLANES];
+ int64_t c[NR_PLANES];
+ unsigned outmask, inmask, partmask, partial_mask;
+ unsigned j = 0;
+
+ if (tri->inputs.disable) {
+ /* This triangle was partially binned and has been disabled */
+ return;
+ }
+
+ outmask = 0; /* outside one or more trivial reject planes */
+ partmask = 0; /* outside one or more trivial accept planes */
+
+ while (plane_mask) {
+ int i = ffs(plane_mask) - 1;
+ plane[j] = tri_plane[i];
+ plane_mask &= ~(1 << i);
+ c[j] = plane[j].c + IMUL64(plane[j].dcdy, y) - IMUL64(plane[j].dcdx, x);
+
+ {
+ const int64_t dcdx = -IMUL64(plane[j].dcdx, 16);
+ const int64_t dcdy = IMUL64(plane[j].dcdy, 16);
+ const int64_t cox = IMUL64(plane[j].eo, 16);
+ const int64_t ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo;
+ const int64_t cio = IMUL64(ei, 16) - 1;
+
+ BUILD_MASKS(c[j] + cox,
+ cio - cox,
+ dcdx, dcdy,
+ &outmask, /* sign bits from c[i][0..15] + cox */
+ &partmask); /* sign bits from c[i][0..15] + cio */
+ }
+
+ j++;
+ }
+
+ if (outmask == 0xffff)
+ return;
+
+ /* Mask of sub-blocks which are inside all trivial accept planes:
+ */
+ inmask = ~partmask & 0xffff;
+
+ /* Mask of sub-blocks which are inside all trivial reject planes,
+ * but outside at least one trivial accept plane:
+ */
+ partial_mask = partmask & ~outmask;
+
+ assert((partial_mask & inmask) == 0);
+
+ LP_COUNT_ADD(nr_empty_16, util_bitcount(0xffff & ~(partial_mask | inmask)));
+
+ /* Iterate over partials:
+ */
+ while (partial_mask) {
+ int i = ffs(partial_mask) - 1;
+ int ix = (i & 3) * 16;
+ int iy = (i >> 2) * 16;
+ int px = x + ix;
+ int py = y + iy;
+ int64_t cx[NR_PLANES];
+
+ for (j = 0; j < NR_PLANES; j++)
+ cx[j] = (c[j]
+ - IMUL64(plane[j].dcdx, ix)
+ + IMUL64(plane[j].dcdy, iy));
+
+ partial_mask &= ~(1 << i);
+
+ LP_COUNT(nr_partially_covered_16);
+ TAG(do_block_16)(task, tri, plane, px, py, cx);
+ }
+
+ /* Iterate over fulls:
+ */
+ while (inmask) {
+ int i = ffs(inmask) - 1;
+ int ix = (i & 3) * 16;
+ int iy = (i >> 2) * 16;
+ int px = x + ix;
+ int py = y + iy;
+
+ inmask &= ~(1 << i);
+
+ LP_COUNT(nr_fully_covered_16);
+ block_full_16(task, tri, px, py);
+ }
+}
+
+#if defined(PIPE_ARCH_SSE) && defined(TRI_16)
+/* XXX: special case this when intersection is not required.
+ * - tile completely within bbox,
+ * - bbox completely within tile.
+ */
+void
+TRI_16(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ const struct lp_rast_plane *plane = GET_PLANES(tri);
+ unsigned mask = arg.triangle.plane_mask;
+ unsigned outmask, partial_mask;
+ unsigned j;
+ __m128i cstep4[NR_PLANES][4];
+
+ int x = (mask & 0xff);
+ int y = (mask >> 8);
+
+ outmask = 0; /* outside one or more trivial reject planes */
+
+ x += task->x;
+ y += task->y;
+
+ for (j = 0; j < NR_PLANES; j++) {
+ const int dcdx = -plane[j].dcdx * 4;
+ const int dcdy = plane[j].dcdy * 4;
+ __m128i xdcdy = _mm_set1_epi32(dcdy);
+
+ cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3);
+ cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy);
+ cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy);
+ cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy);
+
+ {
+ const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
+ const int cox = plane[j].eo * 4;
+
+ outmask |= sign_bits4(cstep4[j], c + cox);
+ }
+ }
+
+ if (outmask == 0xffff)
+ return;
+
+
+ /* Mask of sub-blocks which are inside all trivial reject planes,
+ * but outside at least one trivial accept plane:
+ */
+ partial_mask = 0xffff & ~outmask;
+
+ /* Iterate over partials:
+ */
+ while (partial_mask) {
+ int i = ffs(partial_mask) - 1;
+ int ix = (i & 3) * 4;
+ int iy = (i >> 2) * 4;
+ int px = x + ix;
+ int py = y + iy;
+ unsigned mask = 0xffff;
+
+ partial_mask &= ~(1 << i);
+
+ for (j = 0; j < NR_PLANES; j++) {
+ const int cx = (plane[j].c - 1
+ - plane[j].dcdx * px
+ + plane[j].dcdy * py) * 4;
+
+ mask &= ~sign_bits4(cstep4[j], cx);
+ }
+
+ if (mask)
+ lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask);
+ }
+}
+#endif
+
+#if defined(PIPE_ARCH_SSE) && defined(TRI_4)
+void
+TRI_4(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_triangle *tri = arg.triangle.tri;
+ const struct lp_rast_plane *plane = GET_PLANES(tri);
+ unsigned mask = arg.triangle.plane_mask;
+ const int x = task->x + (mask & 0xff);
+ const int y = task->y + (mask >> 8);
+ unsigned j;
+
+ /* Iterate over partials:
+ */
+ {
+ unsigned mask = 0xffff;
+
+ for (j = 0; j < NR_PLANES; j++) {
+ const int cx = (plane[j].c
+ - plane[j].dcdx * x
+ + plane[j].dcdy * y);
+
+ const int dcdx = -plane[j].dcdx;
+ const int dcdy = plane[j].dcdy;
+ __m128i xdcdy = _mm_set1_epi32(dcdy);
+
+ __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3);
+ __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy);
+ __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy);
+ __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy);
+
+ __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1);
+ __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3);
+ __m128i result = _mm_packs_epi16(cstep01, cstep23);
+
+ /* Extract the sign bits
+ */
+ mask &= ~_mm_movemask_epi8(result);
+ }
+
+ if (mask)
+ lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask);
+ }
+}
+#endif
+
+
+
+#undef TAG
+#undef TRI_4
+#undef TRI_16
+#undef NR_PLANES
+
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c
new file mode 100644
index 000000000..2441b3c0d
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -0,0 +1,564 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_framebuffer.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/simple_list.h"
+#include "util/u_format.h"
+#include "lp_scene.h"
+#include "lp_fence.h"
+#include "lp_debug.h"
+
+
+#define RESOURCE_REF_SZ 32
+
+/** List of resource references */
+struct resource_ref {
+ struct pipe_resource *resource[RESOURCE_REF_SZ];
+ int count;
+ struct resource_ref *next;
+};
+
+
+/**
+ * Create a new scene object.
+ * \param queue the queue to put newly rendered/emptied scenes into
+ */
+struct lp_scene *
+lp_scene_create( struct pipe_context *pipe )
+{
+ struct lp_scene *scene = CALLOC_STRUCT(lp_scene);
+ if (!scene)
+ return NULL;
+
+ scene->pipe = pipe;
+
+ scene->data.head =
+ CALLOC_STRUCT(data_block);
+
+ pipe_mutex_init(scene->mutex);
+
+#ifdef DEBUG
+ /* Do some scene limit sanity checks here */
+ {
+ size_t maxBins = TILES_X * TILES_Y;
+ size_t maxCommandBytes = sizeof(struct cmd_block) * maxBins;
+ size_t maxCommandPlusData = maxCommandBytes + DATA_BLOCK_SIZE;
+ /* We'll need at least one command block per bin. Make sure that's
+ * less than the max allowed scene size.
+ */
+ assert(maxCommandBytes < LP_SCENE_MAX_SIZE);
+ /* We'll also need space for at least one other data block */
+ assert(maxCommandPlusData <= LP_SCENE_MAX_SIZE);
+ }
+#endif
+
+ return scene;
+}
+
+
+/**
+ * Free all data associated with the given scene, and the scene itself.
+ */
+void
+lp_scene_destroy(struct lp_scene *scene)
+{
+ lp_fence_reference(&scene->fence, NULL);
+ pipe_mutex_destroy(scene->mutex);
+ assert(scene->data.head->next == NULL);
+ FREE(scene->data.head);
+ FREE(scene);
+}
+
+
+/**
+ * Check if the scene's bins are all empty.
+ * For debugging purposes.
+ */
+boolean
+lp_scene_is_empty(struct lp_scene *scene )
+{
+ unsigned x, y;
+
+ for (y = 0; y < TILES_Y; y++) {
+ for (x = 0; x < TILES_X; x++) {
+ const struct cmd_bin *bin = lp_scene_get_bin(scene, x, y);
+ if (bin->head) {
+ return FALSE;
+ }
+ }
+ }
+ return TRUE;
+}
+
+
+/* Returns true if there has ever been a failed allocation attempt in
+ * this scene. Used in triangle emit to avoid having to check success
+ * at each bin.
+ */
+boolean
+lp_scene_is_oom(struct lp_scene *scene)
+{
+ return scene->alloc_failed;
+}
+
+
+/* Remove all commands from a bin. Tries to reuse some of the memory
+ * allocated to the bin, however.
+ */
+void
+lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y)
+{
+ struct cmd_bin *bin = lp_scene_get_bin(scene, x, y);
+
+ bin->last_state = NULL;
+ bin->head = bin->tail;
+ if (bin->tail) {
+ bin->tail->next = NULL;
+ bin->tail->count = 0;
+ }
+}
+
+
+void
+lp_scene_begin_rasterization(struct lp_scene *scene)
+{
+ const struct pipe_framebuffer_state *fb = &scene->fb;
+ int i;
+
+ //LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
+
+ for (i = 0; i < scene->fb.nr_cbufs; i++) {
+ struct pipe_surface *cbuf = scene->fb.cbufs[i];
+
+ if (!cbuf) {
+ scene->cbufs[i].stride = 0;
+ scene->cbufs[i].layer_stride = 0;
+ scene->cbufs[i].map = NULL;
+ continue;
+ }
+
+ if (llvmpipe_resource_is_texture(cbuf->texture)) {
+ scene->cbufs[i].stride = llvmpipe_resource_stride(cbuf->texture,
+ cbuf->u.tex.level);
+ scene->cbufs[i].layer_stride = llvmpipe_layer_stride(cbuf->texture,
+ cbuf->u.tex.level);
+
+ scene->cbufs[i].map = llvmpipe_resource_map(cbuf->texture,
+ cbuf->u.tex.level,
+ cbuf->u.tex.first_layer,
+ LP_TEX_USAGE_READ_WRITE);
+ scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format);
+ }
+ else {
+ struct llvmpipe_resource *lpr = llvmpipe_resource(cbuf->texture);
+ unsigned pixstride = util_format_get_blocksize(cbuf->format);
+ scene->cbufs[i].stride = cbuf->texture->width0;
+ scene->cbufs[i].layer_stride = 0;
+ scene->cbufs[i].map = lpr->data;
+ scene->cbufs[i].map += cbuf->u.buf.first_element * pixstride;
+ scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format);
+ }
+ }
+
+ if (fb->zsbuf) {
+ struct pipe_surface *zsbuf = scene->fb.zsbuf;
+ scene->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->u.tex.level);
+ scene->zsbuf.layer_stride = llvmpipe_layer_stride(zsbuf->texture, zsbuf->u.tex.level);
+
+ scene->zsbuf.map = llvmpipe_resource_map(zsbuf->texture,
+ zsbuf->u.tex.level,
+ zsbuf->u.tex.first_layer,
+ LP_TEX_USAGE_READ_WRITE);
+ scene->zsbuf.format_bytes = util_format_get_blocksize(zsbuf->format);
+ }
+}
+
+
+
+
+/**
+ * Free all the temporary data in a scene.
+ */
+void
+lp_scene_end_rasterization(struct lp_scene *scene )
+{
+ int i, j;
+
+ /* Unmap color buffers */
+ for (i = 0; i < scene->fb.nr_cbufs; i++) {
+ if (scene->cbufs[i].map) {
+ struct pipe_surface *cbuf = scene->fb.cbufs[i];
+ if (llvmpipe_resource_is_texture(cbuf->texture)) {
+ llvmpipe_resource_unmap(cbuf->texture,
+ cbuf->u.tex.level,
+ cbuf->u.tex.first_layer);
+ }
+ scene->cbufs[i].map = NULL;
+ }
+ }
+
+ /* Unmap z/stencil buffer */
+ if (scene->zsbuf.map) {
+ struct pipe_surface *zsbuf = scene->fb.zsbuf;
+ llvmpipe_resource_unmap(zsbuf->texture,
+ zsbuf->u.tex.level,
+ zsbuf->u.tex.first_layer);
+ scene->zsbuf.map = NULL;
+ }
+
+ /* Reset all command lists:
+ */
+ for (i = 0; i < scene->tiles_x; i++) {
+ for (j = 0; j < scene->tiles_y; j++) {
+ struct cmd_bin *bin = lp_scene_get_bin(scene, i, j);
+ bin->head = NULL;
+ bin->tail = NULL;
+ bin->last_state = NULL;
+ }
+ }
+
+ /* If there are any bins which weren't cleared by the loop above,
+ * they will be caught (on debug builds at least) by this assert:
+ */
+ assert(lp_scene_is_empty(scene));
+
+ /* Decrement texture ref counts
+ */
+ {
+ struct resource_ref *ref;
+ int i, j = 0;
+
+ for (ref = scene->resources; ref; ref = ref->next) {
+ for (i = 0; i < ref->count; i++) {
+ if (LP_DEBUG & DEBUG_SETUP)
+ debug_printf("resource %d: %p %dx%d sz %d\n",
+ j,
+ (void *) ref->resource[i],
+ ref->resource[i]->width0,
+ ref->resource[i]->height0,
+ llvmpipe_resource_size(ref->resource[i]));
+ j++;
+ pipe_resource_reference(&ref->resource[i], NULL);
+ }
+ }
+
+ if (LP_DEBUG & DEBUG_SETUP)
+ debug_printf("scene %d resources, sz %d\n",
+ j, scene->resource_reference_size);
+ }
+
+ /* Free all scene data blocks:
+ */
+ {
+ struct data_block_list *list = &scene->data;
+ struct data_block *block, *tmp;
+
+ for (block = list->head->next; block; block = tmp) {
+ tmp = block->next;
+ FREE(block);
+ }
+
+ list->head->next = NULL;
+ list->head->used = 0;
+ }
+
+ lp_fence_reference(&scene->fence, NULL);
+
+ scene->resources = NULL;
+ scene->scene_size = 0;
+ scene->resource_reference_size = 0;
+
+ scene->alloc_failed = FALSE;
+
+ util_unreference_framebuffer_state( &scene->fb );
+}
+
+
+
+
+
+
+struct cmd_block *
+lp_scene_new_cmd_block( struct lp_scene *scene,
+ struct cmd_bin *bin )
+{
+ struct cmd_block *block = lp_scene_alloc(scene, sizeof(struct cmd_block));
+ if (block) {
+ if (bin->tail) {
+ bin->tail->next = block;
+ bin->tail = block;
+ }
+ else {
+ bin->head = block;
+ bin->tail = block;
+ }
+ //memset(block, 0, sizeof *block);
+ block->next = NULL;
+ block->count = 0;
+ }
+ return block;
+}
+
+
+struct data_block *
+lp_scene_new_data_block( struct lp_scene *scene )
+{
+ if (scene->scene_size + DATA_BLOCK_SIZE > LP_SCENE_MAX_SIZE) {
+ if (0) debug_printf("%s: failed\n", __FUNCTION__);
+ scene->alloc_failed = TRUE;
+ return NULL;
+ }
+ else {
+ struct data_block *block = MALLOC_STRUCT(data_block);
+ if (block == NULL)
+ return NULL;
+
+ scene->scene_size += sizeof *block;
+
+ block->used = 0;
+ block->next = scene->data.head;
+ scene->data.head = block;
+
+ return block;
+ }
+}
+
+
+/**
+ * Return number of bytes used for all bin data within a scene.
+ * This does not include resources (textures) referenced by the scene.
+ */
+static unsigned
+lp_scene_data_size( const struct lp_scene *scene )
+{
+ unsigned size = 0;
+ const struct data_block *block;
+ for (block = scene->data.head; block; block = block->next) {
+ size += block->used;
+ }
+ return size;
+}
+
+
+
+/**
+ * Add a reference to a resource by the scene.
+ */
+boolean
+lp_scene_add_resource_reference(struct lp_scene *scene,
+ struct pipe_resource *resource,
+ boolean initializing_scene)
+{
+ struct resource_ref *ref, **last = &scene->resources;
+ int i;
+
+ /* Look at existing resource blocks:
+ */
+ for (ref = scene->resources; ref; ref = ref->next) {
+ last = &ref->next;
+
+ /* Search for this resource:
+ */
+ for (i = 0; i < ref->count; i++)
+ if (ref->resource[i] == resource)
+ return TRUE;
+
+ if (ref->count < RESOURCE_REF_SZ) {
+ /* If the block is half-empty, then append the reference here.
+ */
+ break;
+ }
+ }
+
+ /* Create a new block if no half-empty block was found.
+ */
+ if (!ref) {
+ assert(*last == NULL);
+ *last = lp_scene_alloc(scene, sizeof *ref);
+ if (*last == NULL)
+ return FALSE;
+
+ ref = *last;
+ memset(ref, 0, sizeof *ref);
+ }
+
+ /* Append the reference to the reference block.
+ */
+ pipe_resource_reference(&ref->resource[ref->count++], resource);
+ scene->resource_reference_size += llvmpipe_resource_size(resource);
+
+ /* Heuristic to advise scene flushes. This isn't helpful in the
+ * initial setup of the scene, but after that point flush on the
+ * next resource added which exceeds 64MB in referenced texture
+ * data.
+ */
+ if (!initializing_scene &&
+ scene->resource_reference_size >= LP_SCENE_MAX_RESOURCE_SIZE)
+ return FALSE;
+
+ return TRUE;
+}
+
+
+/**
+ * Does this scene have a reference to the given resource?
+ */
+boolean
+lp_scene_is_resource_referenced(const struct lp_scene *scene,
+ const struct pipe_resource *resource)
+{
+ const struct resource_ref *ref;
+ int i;
+
+ for (ref = scene->resources; ref; ref = ref->next) {
+ for (i = 0; i < ref->count; i++)
+ if (ref->resource[i] == resource)
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+
+
+/** advance curr_x,y to the next bin */
+static boolean
+next_bin(struct lp_scene *scene)
+{
+ scene->curr_x++;
+ if (scene->curr_x >= scene->tiles_x) {
+ scene->curr_x = 0;
+ scene->curr_y++;
+ }
+ if (scene->curr_y >= scene->tiles_y) {
+ /* no more bins */
+ return FALSE;
+ }
+ return TRUE;
+}
+
+
+void
+lp_scene_bin_iter_begin( struct lp_scene *scene )
+{
+ scene->curr_x = scene->curr_y = -1;
+}
+
+
+/**
+ * Return pointer to next bin to be rendered.
+ * The lp_scene::curr_x and ::curr_y fields will be advanced.
+ * Multiple rendering threads will call this function to get a chunk
+ * of work (a bin) to work on.
+ */
+struct cmd_bin *
+lp_scene_bin_iter_next( struct lp_scene *scene , int *x, int *y)
+{
+ struct cmd_bin *bin = NULL;
+
+ pipe_mutex_lock(scene->mutex);
+
+ if (scene->curr_x < 0) {
+ /* first bin */
+ scene->curr_x = 0;
+ scene->curr_y = 0;
+ }
+ else if (!next_bin(scene)) {
+ /* no more bins left */
+ goto end;
+ }
+
+ bin = lp_scene_get_bin(scene, scene->curr_x, scene->curr_y);
+ *x = scene->curr_x;
+ *y = scene->curr_y;
+
+end:
+ /*printf("return bin %p at %d, %d\n", (void *) bin, *bin_x, *bin_y);*/
+ pipe_mutex_unlock(scene->mutex);
+ return bin;
+}
+
+
+void lp_scene_begin_binning( struct lp_scene *scene,
+ struct pipe_framebuffer_state *fb, boolean discard )
+{
+ int i;
+ unsigned max_layer = ~0;
+
+ assert(lp_scene_is_empty(scene));
+
+ scene->discard = discard;
+ util_copy_framebuffer_state(&scene->fb, fb);
+
+ scene->tiles_x = align(fb->width, TILE_SIZE) / TILE_SIZE;
+ scene->tiles_y = align(fb->height, TILE_SIZE) / TILE_SIZE;
+ assert(scene->tiles_x <= TILES_X);
+ assert(scene->tiles_y <= TILES_Y);
+
+ /*
+ * Determine how many layers the fb has (used for clamping layer value).
+ * OpenGL (but not d3d10) permits different amount of layers per rt, however
+ * results are undefined if layer exceeds the amount of layers of ANY
+ * attachment hence don't need separate per cbuf and zsbuf max.
+ */
+ for (i = 0; i < scene->fb.nr_cbufs; i++) {
+ struct pipe_surface *cbuf = scene->fb.cbufs[i];
+ if (cbuf) {
+ if (llvmpipe_resource_is_texture(cbuf->texture)) {
+ max_layer = MIN2(max_layer,
+ cbuf->u.tex.last_layer - cbuf->u.tex.first_layer);
+ }
+ else {
+ max_layer = 0;
+ }
+ }
+ }
+ if (fb->zsbuf) {
+ struct pipe_surface *zsbuf = scene->fb.zsbuf;
+ max_layer = MIN2(max_layer, zsbuf->u.tex.last_layer - zsbuf->u.tex.first_layer);
+ }
+ scene->fb_max_layer = max_layer;
+}
+
+
+void lp_scene_end_binning( struct lp_scene *scene )
+{
+ if (LP_DEBUG & DEBUG_SCENE) {
+ debug_printf("rasterize scene:\n");
+ debug_printf(" scene_size: %u\n",
+ scene->scene_size);
+ debug_printf(" data size: %u\n",
+ lp_scene_data_size(scene));
+
+ if (0)
+ lp_debug_bins( scene );
+ }
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h
new file mode 100644
index 000000000..b1464bb54
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h
@@ -0,0 +1,412 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * Binner data structures and bin-related functions.
+ * Note: the "setup" code is concerned with building scenes while
+ * The "rast" code is concerned with consuming/executing scenes.
+ */
+
+#ifndef LP_SCENE_H
+#define LP_SCENE_H
+
+#include "os/os_thread.h"
+#include "lp_rast.h"
+#include "lp_debug.h"
+
+struct lp_scene_queue;
+struct lp_rast_state;
+
+/* We're limited to 2K by 2K for 32bit fixed point rasterization.
+ * Will need a 64-bit version for larger framebuffers.
+ */
+#define TILES_X (LP_MAX_WIDTH / TILE_SIZE)
+#define TILES_Y (LP_MAX_HEIGHT / TILE_SIZE)
+
+
+/* Commands per command block (ideally so sizeof(cmd_block) is a power of
+ * two in size.)
+ */
+#define CMD_BLOCK_MAX 29
+
+/* Bytes per data block.
+ */
+#define DATA_BLOCK_SIZE (64 * 1024)
+
+/* Scene temporary storage is clamped to this size:
+ */
+#define LP_SCENE_MAX_SIZE (9*1024*1024)
+
+/* The maximum amount of texture storage referenced by a scene is
+ * clamped to this size:
+ */
+#define LP_SCENE_MAX_RESOURCE_SIZE (64*1024*1024)
+
+
+/* switch to a non-pointer value for this:
+ */
+typedef void (*lp_rast_cmd_func)( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+
+struct cmd_block {
+ uint8_t cmd[CMD_BLOCK_MAX];
+ union lp_rast_cmd_arg arg[CMD_BLOCK_MAX];
+ unsigned count;
+ struct cmd_block *next;
+};
+
+
+struct data_block {
+ ubyte data[DATA_BLOCK_SIZE];
+ unsigned used;
+ struct data_block *next;
+};
+
+
+
+/**
+ * For each screen tile we have one of these bins.
+ */
+struct cmd_bin {
+ const struct lp_rast_state *last_state; /* most recent state set in bin */
+ struct cmd_block *head;
+ struct cmd_block *tail;
+};
+
+
+/**
+ * This stores bulk data which is used for all memory allocations
+ * within a scene.
+ *
+ * Examples include triangle data and state data. The commands in
+ * the per-tile bins will point to chunks of data in this structure.
+ *
+ * Include the first block of data statically to ensure we can always
+ * initiate a scene without relying on malloc succeeding.
+ */
+struct data_block_list {
+ struct data_block first;
+ struct data_block *head;
+};
+
+struct resource_ref;
+
+/**
+ * All bins and bin data are contained here.
+ * Per-bin data goes into the 'tile' bins.
+ * Shared data goes into the 'data' buffer.
+ *
+ * When there are multiple threads, will want to double-buffer between
+ * scenes:
+ */
+struct lp_scene {
+ struct pipe_context *pipe;
+ struct lp_fence *fence;
+
+ /* The queries still active at end of scene */
+ struct llvmpipe_query *active_queries[LP_MAX_ACTIVE_BINNED_QUERIES];
+ unsigned num_active_queries;
+ /* If queries were either active or there were begin/end query commands */
+ boolean had_queries;
+
+ /* Framebuffer mappings - valid only between begin_rasterization()
+ * and end_rasterization().
+ */
+ struct {
+ uint8_t *map;
+ unsigned stride;
+ unsigned layer_stride;
+ unsigned format_bytes;
+ } zsbuf, cbufs[PIPE_MAX_COLOR_BUFS];
+
+ /* The amount of layers in the fb (minimum of all attachments) */
+ unsigned fb_max_layer;
+
+ /** the framebuffer to render the scene into */
+ struct pipe_framebuffer_state fb;
+
+ /** list of resources referenced by the scene commands */
+ struct resource_ref *resources;
+
+ /** Total memory used by the scene (in bytes). This sums all the
+ * data blocks and counts all bins, state, resource references and
+ * other random allocations within the scene.
+ */
+ unsigned scene_size;
+
+ /** Sum of sizes of all resources referenced by the scene. Sums
+ * all the textures read by the scene:
+ */
+ unsigned resource_reference_size;
+
+ boolean alloc_failed;
+ boolean discard;
+ /**
+ * Number of active tiles in each dimension.
+ * This basically the framebuffer size divided by tile size
+ */
+ unsigned tiles_x, tiles_y;
+
+ int curr_x, curr_y; /**< for iterating over bins */
+ pipe_mutex mutex;
+
+ struct cmd_bin tile[TILES_X][TILES_Y];
+ struct data_block_list data;
+};
+
+
+
+struct lp_scene *lp_scene_create(struct pipe_context *pipe);
+
+void lp_scene_destroy(struct lp_scene *scene);
+
+boolean lp_scene_is_empty(struct lp_scene *scene );
+boolean lp_scene_is_oom(struct lp_scene *scene );
+
+
+struct data_block *lp_scene_new_data_block( struct lp_scene *scene );
+
+struct cmd_block *lp_scene_new_cmd_block( struct lp_scene *scene,
+ struct cmd_bin *bin );
+
+boolean lp_scene_add_resource_reference(struct lp_scene *scene,
+ struct pipe_resource *resource,
+ boolean initializing_scene);
+
+boolean lp_scene_is_resource_referenced(const struct lp_scene *scene,
+ const struct pipe_resource *resource );
+
+
+/**
+ * Allocate space for a command/data in the bin's data buffer.
+ * Grow the block list if needed.
+ */
+static inline void *
+lp_scene_alloc( struct lp_scene *scene, unsigned size)
+{
+ struct data_block_list *list = &scene->data;
+ struct data_block *block = list->head;
+
+ assert(size <= DATA_BLOCK_SIZE);
+ assert(block != NULL);
+
+ if (LP_DEBUG & DEBUG_MEM)
+ debug_printf("alloc %u block %u/%u tot %u/%u\n",
+ size, block->used, DATA_BLOCK_SIZE,
+ scene->scene_size, LP_SCENE_MAX_SIZE);
+
+ if (block->used + size > DATA_BLOCK_SIZE) {
+ block = lp_scene_new_data_block( scene );
+ if (!block) {
+ /* out of memory */
+ return NULL;
+ }
+ }
+
+ {
+ ubyte *data = block->data + block->used;
+ block->used += size;
+ return data;
+ }
+}
+
+
+/**
+ * As above, but with specific alignment.
+ */
+static inline void *
+lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size,
+ unsigned alignment )
+{
+ struct data_block_list *list = &scene->data;
+ struct data_block *block = list->head;
+
+ assert(block != NULL);
+
+ if (LP_DEBUG & DEBUG_MEM)
+ debug_printf("alloc %u block %u/%u tot %u/%u\n",
+ size + alignment - 1,
+ block->used, DATA_BLOCK_SIZE,
+ scene->scene_size, LP_SCENE_MAX_SIZE);
+
+ if (block->used + size + alignment - 1 > DATA_BLOCK_SIZE) {
+ block = lp_scene_new_data_block( scene );
+ if (!block)
+ return NULL;
+ }
+
+ {
+ ubyte *data = block->data + block->used;
+ unsigned offset = (((uintptr_t)data + alignment - 1) & ~(alignment - 1)) - (uintptr_t)data;
+ block->used += offset + size;
+ return data + offset;
+ }
+}
+
+
+/* Put back data if we decide not to use it, eg. culled triangles.
+ */
+static inline void
+lp_scene_putback_data( struct lp_scene *scene, unsigned size)
+{
+ struct data_block_list *list = &scene->data;
+ assert(list->head && list->head->used >= size);
+ list->head->used -= size;
+}
+
+
+/** Return pointer to a particular tile's bin. */
+static inline struct cmd_bin *
+lp_scene_get_bin(struct lp_scene *scene, unsigned x, unsigned y)
+{
+ return &scene->tile[x][y];
+}
+
+
+/** Remove all commands from a bin */
+void
+lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y);
+
+
+/* Add a command to bin[x][y].
+ */
+static inline boolean
+lp_scene_bin_command( struct lp_scene *scene,
+ unsigned x, unsigned y,
+ unsigned cmd,
+ union lp_rast_cmd_arg arg )
+{
+ struct cmd_bin *bin = lp_scene_get_bin(scene, x, y);
+ struct cmd_block *tail = bin->tail;
+
+ assert(x < scene->tiles_x);
+ assert(y < scene->tiles_y);
+ assert(cmd < LP_RAST_OP_MAX);
+
+ if (tail == NULL || tail->count == CMD_BLOCK_MAX) {
+ tail = lp_scene_new_cmd_block( scene, bin );
+ if (!tail) {
+ return FALSE;
+ }
+ assert(tail->count == 0);
+ }
+
+ {
+ unsigned i = tail->count;
+ tail->cmd[i] = cmd & LP_RAST_OP_MASK;
+ tail->arg[i] = arg;
+ tail->count++;
+ }
+
+ return TRUE;
+}
+
+
+static inline boolean
+lp_scene_bin_cmd_with_state( struct lp_scene *scene,
+ unsigned x, unsigned y,
+ const struct lp_rast_state *state,
+ unsigned cmd,
+ union lp_rast_cmd_arg arg )
+{
+ struct cmd_bin *bin = lp_scene_get_bin(scene, x, y);
+
+ if (state != bin->last_state) {
+ bin->last_state = state;
+ if (!lp_scene_bin_command(scene, x, y,
+ LP_RAST_OP_SET_STATE,
+ lp_rast_arg_state(state)))
+ return FALSE;
+ }
+
+ if (!lp_scene_bin_command( scene, x, y, cmd, arg ))
+ return FALSE;
+
+ return TRUE;
+}
+
+
+/* Add a command to all active bins.
+ */
+static inline boolean
+lp_scene_bin_everywhere( struct lp_scene *scene,
+ unsigned cmd,
+ const union lp_rast_cmd_arg arg )
+{
+ unsigned i, j;
+ for (i = 0; i < scene->tiles_x; i++) {
+ for (j = 0; j < scene->tiles_y; j++) {
+ if (!lp_scene_bin_command( scene, i, j, cmd, arg ))
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+
+static inline unsigned
+lp_scene_get_num_bins( const struct lp_scene *scene )
+{
+ return scene->tiles_x * scene->tiles_y;
+}
+
+
+void
+lp_scene_bin_iter_begin( struct lp_scene *scene );
+
+struct cmd_bin *
+lp_scene_bin_iter_next( struct lp_scene *scene, int *x, int *y );
+
+
+
+/* Begin/end binning of a scene
+ */
+void
+lp_scene_begin_binning( struct lp_scene *scene,
+ struct pipe_framebuffer_state *fb,
+ boolean discard );
+
+void
+lp_scene_end_binning( struct lp_scene *scene );
+
+
+/* Begin/end rasterization of a scene
+ */
+void
+lp_scene_begin_rasterization(struct lp_scene *scene);
+
+void
+lp_scene_end_rasterization(struct lp_scene *scene );
+
+
+
+
+
+#endif /* LP_BIN_H */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene_queue.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene_queue.c
new file mode 100644
index 000000000..975db43c4
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene_queue.c
@@ -0,0 +1,124 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * Scene queue. We'll use two queues. One contains "full" scenes which
+ * are produced by the "setup" code. The other contains "empty" scenes
+ * which are produced by the "rast" code when it finishes rendering a scene.
+ */
+
+#include "util/u_ringbuffer.h"
+#include "util/u_memory.h"
+#include "lp_scene_queue.h"
+
+
+
+#define MAX_SCENE_QUEUE 4
+
+struct scene_packet {
+ struct util_packet header;
+ struct lp_scene *scene;
+};
+
+/**
+ * A queue of scenes
+ */
+struct lp_scene_queue
+{
+ struct util_ringbuffer *ring;
+};
+
+
+
+/** Allocate a new scene queue */
+struct lp_scene_queue *
+lp_scene_queue_create(void)
+{
+ struct lp_scene_queue *queue = CALLOC_STRUCT(lp_scene_queue);
+ if (queue == NULL)
+ return NULL;
+
+ queue->ring = util_ringbuffer_create( MAX_SCENE_QUEUE *
+ sizeof( struct scene_packet ) / 4);
+ if (queue->ring == NULL)
+ goto fail;
+
+ return queue;
+
+fail:
+ FREE(queue);
+ return NULL;
+}
+
+
+/** Delete a scene queue */
+void
+lp_scene_queue_destroy(struct lp_scene_queue *queue)
+{
+ util_ringbuffer_destroy(queue->ring);
+ FREE(queue);
+}
+
+
+/** Remove first lp_scene from head of queue */
+struct lp_scene *
+lp_scene_dequeue(struct lp_scene_queue *queue, boolean wait)
+{
+ struct scene_packet packet;
+ enum pipe_error ret;
+
+ packet.scene = NULL;
+
+ ret = util_ringbuffer_dequeue(queue->ring,
+ &packet.header,
+ sizeof packet / 4,
+ wait );
+ if (ret != PIPE_OK)
+ return NULL;
+
+ return packet.scene;
+}
+
+
+/** Add an lp_scene to tail of queue */
+void
+lp_scene_enqueue(struct lp_scene_queue *queue, struct lp_scene *scene)
+{
+ struct scene_packet packet;
+
+ packet.header.dwords = sizeof packet / 4;
+ packet.header.data24 = 0;
+ packet.scene = scene;
+
+ util_ringbuffer_enqueue(queue->ring, &packet.header);
+}
+
+
+
+
+
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene_queue.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene_queue.h
new file mode 100644
index 000000000..dd9ab593b
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene_queue.h
@@ -0,0 +1,53 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_SCENE_QUEUE
+#define LP_SCENE_QUEUE
+
+#include "pipe/p_compiler.h"
+
+struct lp_scene_queue;
+struct lp_scene;
+
+
+struct lp_scene_queue *
+lp_scene_queue_create(void);
+
+void
+lp_scene_queue_destroy(struct lp_scene_queue *queue);
+
+struct lp_scene *
+lp_scene_dequeue(struct lp_scene_queue *queue, boolean wait);
+
+void
+lp_scene_enqueue(struct lp_scene_queue *queue, struct lp_scene *scene);
+
+
+
+
+#endif /* LP_BIN_QUEUE */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c
new file mode 100644
index 000000000..14eeab033
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -0,0 +1,623 @@
+/**************************************************************************
+ *
+ * Copyright 2008 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_cpu_detect.h"
+#include "util/u_format.h"
+#include "util/u_string.h"
+#include "util/u_format_s3tc.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
+#include "draw/draw_context.h"
+#include "gallivm/lp_bld_type.h"
+
+#include "os/os_misc.h"
+#include "os/os_time.h"
+#include "lp_texture.h"
+#include "lp_fence.h"
+#include "lp_jit.h"
+#include "lp_screen.h"
+#include "lp_context.h"
+#include "lp_debug.h"
+#include "lp_public.h"
+#include "lp_limits.h"
+#include "lp_rast.h"
+
+#include "state_tracker/sw_winsys.h"
+
+#ifdef DEBUG
+int LP_DEBUG = 0;
+
+static const struct debug_named_value lp_debug_flags[] = {
+ { "pipe", DEBUG_PIPE, NULL },
+ { "tgsi", DEBUG_TGSI, NULL },
+ { "tex", DEBUG_TEX, NULL },
+ { "setup", DEBUG_SETUP, NULL },
+ { "rast", DEBUG_RAST, NULL },
+ { "query", DEBUG_QUERY, NULL },
+ { "screen", DEBUG_SCREEN, NULL },
+ { "counters", DEBUG_COUNTERS, NULL },
+ { "scene", DEBUG_SCENE, NULL },
+ { "fence", DEBUG_FENCE, NULL },
+ { "mem", DEBUG_MEM, NULL },
+ { "fs", DEBUG_FS, NULL },
+ DEBUG_NAMED_VALUE_END
+};
+#endif
+
+int LP_PERF = 0;
+static const struct debug_named_value lp_perf_flags[] = {
+ { "texmem", PERF_TEX_MEM, NULL },
+ { "no_mipmap", PERF_NO_MIPMAPS, NULL },
+ { "no_linear", PERF_NO_LINEAR, NULL },
+ { "no_mip_linear", PERF_NO_MIP_LINEAR, NULL },
+ { "no_tex", PERF_NO_TEX, NULL },
+ { "no_blend", PERF_NO_BLEND, NULL },
+ { "no_depth", PERF_NO_DEPTH, NULL },
+ { "no_alphatest", PERF_NO_ALPHATEST, NULL },
+ DEBUG_NAMED_VALUE_END
+};
+
+
+static const char *
+llvmpipe_get_vendor(struct pipe_screen *screen)
+{
+ return "VMware, Inc.";
+}
+
+
+static const char *
+llvmpipe_get_name(struct pipe_screen *screen)
+{
+ static char buf[100];
+ util_snprintf(buf, sizeof(buf), "llvmpipe (LLVM %u.%u, %u bits)",
+ HAVE_LLVM >> 8, HAVE_LLVM & 0xff,
+ lp_native_vector_width );
+ return buf;
+}
+
+
+static int
+llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
+{
+ switch (param) {
+ case PIPE_CAP_NPOT_TEXTURES:
+ case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_SM3:
+ return 1;
+ case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+ return PIPE_MAX_SO_BUFFERS;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 0;
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return PIPE_MAX_COLOR_BUFS;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_QUERY_TIME_ELAPSED:
+ return 0;
+ case PIPE_CAP_QUERY_TIMESTAMP:
+ return 1;
+ case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+ return 0;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ return 1;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ return 1;
+ case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return LP_MAX_TEXTURE_2D_LEVELS;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return LP_MAX_TEXTURE_3D_LEVELS;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return LP_MAX_TEXTURE_CUBE_LEVELS;
+ case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+ return LP_MAX_TEXTURE_ARRAY_LAYERS;
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ return 1;
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ return 1;
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+ return 0;
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ return 1;
+ case PIPE_CAP_DEPTH_CLIP_DISABLE:
+ return 1;
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ return 1;
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+ case PIPE_CAP_START_INSTANCE:
+ return 1;
+ case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+ return 0;
+ case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+ return 1;
+ case PIPE_CAP_SEAMLESS_CUBE_MAP:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ return 1;
+ /* this is a lie could support arbitrary large offsets */
+ case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
+ case PIPE_CAP_MIN_TEXEL_OFFSET:
+ return -32;
+ case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
+ case PIPE_CAP_MAX_TEXEL_OFFSET:
+ return 31;
+ case PIPE_CAP_CONDITIONAL_RENDER:
+ return 1;
+ case PIPE_CAP_TEXTURE_BARRIER:
+ return 0;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+ case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+ return 16*4;
+ case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+ case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+ return 1024;
+ case PIPE_CAP_MAX_VERTEX_STREAMS:
+ return 1;
+ case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
+ return 2048;
+ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ return 1;
+ case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+ return 0;
+ case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+ case PIPE_CAP_VERTEX_COLOR_CLAMPED:
+ return 1;
+ case PIPE_CAP_GLSL_FEATURE_LEVEL:
+ return 330;
+ case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+ return 0;
+ case PIPE_CAP_COMPUTE:
+ return 0;
+ case PIPE_CAP_USER_VERTEX_BUFFERS:
+ case PIPE_CAP_USER_INDEX_BUFFERS:
+ return 1;
+ case PIPE_CAP_USER_CONSTANT_BUFFERS:
+ return 0;
+ case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_TGSI_TEXCOORD:
+ return 0;
+ case PIPE_CAP_DRAW_INDIRECT:
+ return 1;
+
+ case PIPE_CAP_CUBE_MAP_ARRAY:
+ return 1;
+ case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+ return 16;
+ case PIPE_CAP_TEXTURE_MULTISAMPLE:
+ return 0;
+ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+ return 64;
+ case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+ return 65536;
+ case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+ return 1;
+ case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+ return 0;
+ case PIPE_CAP_MAX_VIEWPORTS:
+ return PIPE_MAX_VIEWPORTS;
+ case PIPE_CAP_ENDIANNESS:
+ return PIPE_ENDIAN_NATIVE;
+ case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+ return 1;
+ case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+ return 4;
+ case PIPE_CAP_TEXTURE_GATHER_SM5:
+ case PIPE_CAP_TEXTURE_QUERY_LOD:
+ case PIPE_CAP_SAMPLE_SHADING:
+ case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+ return 0;
+ case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
+ return 1;
+ case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+ return 0;
+ case PIPE_CAP_SAMPLER_VIEW_TARGET:
+ return 1;
+ case PIPE_CAP_FAKE_SW_MSAA:
+ return 1;
+ case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ return 1;
+
+ case PIPE_CAP_VENDOR_ID:
+ return 0xFFFFFFFF;
+ case PIPE_CAP_DEVICE_ID:
+ return 0xFFFFFFFF;
+ case PIPE_CAP_ACCELERATED:
+ return 0;
+ case PIPE_CAP_VIDEO_MEMORY: {
+ /* XXX: Do we want to return the full amount fo system memory ? */
+ uint64_t system_memory;
+
+ if (!os_get_total_physical_memory(&system_memory))
+ return 0;
+
+ return (int)(system_memory >> 20);
+ }
+ case PIPE_CAP_UMA:
+ return 0;
+ case PIPE_CAP_CLIP_HALFZ:
+ return 1;
+ case PIPE_CAP_VERTEXID_NOBASE:
+ return 0;
+ case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ return 1;
+ case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
+ case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ return 0;
+ }
+ /* should only get here on unhandled cases */
+ debug_printf("Unexpected PIPE_CAP %d query\n", param);
+ return 0;
+}
+
+static int
+llvmpipe_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param)
+{
+ switch(shader)
+ {
+ case PIPE_SHADER_FRAGMENT:
+ switch (param) {
+ default:
+ return gallivm_get_shader_param(param);
+ }
+ case PIPE_SHADER_VERTEX:
+ case PIPE_SHADER_GEOMETRY:
+ switch (param) {
+ case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+ /* At this time, the draw module and llvmpipe driver only
+ * support vertex shader texture lookups when LLVM is enabled in
+ * the draw module.
+ */
+ if (debug_get_bool_option("DRAW_USE_LLVM", TRUE))
+ return PIPE_MAX_SAMPLERS;
+ else
+ return 0;
+ case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+ if (debug_get_bool_option("DRAW_USE_LLVM", TRUE))
+ return PIPE_MAX_SHADER_SAMPLER_VIEWS;
+ else
+ return 0;
+ default:
+ return draw_get_shader_param(shader, param);
+ }
+ default:
+ return 0;
+ }
+}
+
+static float
+llvmpipe_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
+{
+ switch (param) {
+ case PIPE_CAPF_MAX_LINE_WIDTH:
+ /* fall-through */
+ case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+ return 255.0; /* arbitrary */
+ case PIPE_CAPF_MAX_POINT_WIDTH:
+ /* fall-through */
+ case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+ return 255.0; /* arbitrary */
+ case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+ return 16.0; /* not actually signficant at this time */
+ case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+ return 16.0; /* arbitrary */
+ case PIPE_CAPF_GUARD_BAND_LEFT:
+ case PIPE_CAPF_GUARD_BAND_TOP:
+ case PIPE_CAPF_GUARD_BAND_RIGHT:
+ case PIPE_CAPF_GUARD_BAND_BOTTOM:
+ return 0.0;
+ }
+ /* should only get here on unhandled cases */
+ debug_printf("Unexpected PIPE_CAP %d query\n", param);
+ return 0.0;
+}
+
+
+/**
+ * Query format support for creating a texture, drawing surface, etc.
+ * \param format the format to test
+ * \param type one of PIPE_TEXTURE, PIPE_SURFACE
+ */
+static boolean
+llvmpipe_is_format_supported( struct pipe_screen *_screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned bind)
+{
+ struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
+ struct sw_winsys *winsys = screen->winsys;
+ const struct util_format_description *format_desc;
+
+ format_desc = util_format_description(format);
+ if (!format_desc)
+ return FALSE;
+
+ assert(target == PIPE_BUFFER ||
+ target == PIPE_TEXTURE_1D ||
+ target == PIPE_TEXTURE_1D_ARRAY ||
+ target == PIPE_TEXTURE_2D ||
+ target == PIPE_TEXTURE_2D_ARRAY ||
+ target == PIPE_TEXTURE_RECT ||
+ target == PIPE_TEXTURE_3D ||
+ target == PIPE_TEXTURE_CUBE ||
+ target == PIPE_TEXTURE_CUBE_ARRAY);
+
+ if (sample_count > 1)
+ return FALSE;
+
+ if (bind & PIPE_BIND_RENDER_TARGET) {
+ if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+ /* this is a lie actually other formats COULD exist where we would fail */
+ if (format_desc->nr_channels < 3)
+ return FALSE;
+ }
+ else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB)
+ return FALSE;
+
+ if (format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN &&
+ format != PIPE_FORMAT_R11G11B10_FLOAT)
+ return FALSE;
+
+ assert(format_desc->block.width == 1);
+ assert(format_desc->block.height == 1);
+
+ if (format_desc->is_mixed)
+ return FALSE;
+
+ if (!format_desc->is_array && !format_desc->is_bitmask &&
+ format != PIPE_FORMAT_R11G11B10_FLOAT)
+ return FALSE;
+
+ /*
+ * XXX refuse formats known to crash in generate_unswizzled_blend().
+ * These include all 3-channel 24bit RGB8 variants, plus 48bit
+ * (except those using floats) 3-channel RGB16 variants (the latter
+ * seems to be more of a llvm bug though).
+ * The mesa state tracker only seems to use these for SINT/UINT formats.
+ */
+ if (format_desc->is_array && format_desc->nr_channels == 3) {
+ if (format_desc->block.bits == 24 || (format_desc->block.bits == 48 &&
+ !util_format_is_float(format))) {
+ return FALSE;
+ }
+ }
+ }
+
+ if (bind & PIPE_BIND_DISPLAY_TARGET) {
+ if(!winsys->is_displaytarget_format_supported(winsys, bind, format))
+ return FALSE;
+ }
+
+ if (bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
+ return FALSE;
+
+ if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+ return FALSE;
+
+ /* TODO: Support stencil-only formats */
+ if (format_desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
+ return FALSE;
+ }
+ }
+
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
+ /* Software decoding is not hooked up. */
+ return FALSE;
+ }
+
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
+ format != PIPE_FORMAT_ETC1_RGB8)
+ return FALSE;
+
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+ return util_format_s3tc_enabled;
+ }
+
+ /*
+ * Everything can be supported by u_format
+ * (those without fetch_rgba_float might be not but shouldn't hit that)
+ */
+
+ return TRUE;
+}
+
+
+
+
+static void
+llvmpipe_flush_frontbuffer(struct pipe_screen *_screen,
+ struct pipe_resource *resource,
+ unsigned level, unsigned layer,
+ void *context_private,
+ struct pipe_box *sub_box)
+{
+ struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
+ struct sw_winsys *winsys = screen->winsys;
+ struct llvmpipe_resource *texture = llvmpipe_resource(resource);
+
+ assert(texture->dt);
+ if (texture->dt)
+ winsys->displaytarget_display(winsys, texture->dt, context_private, sub_box);
+}
+
+static void
+llvmpipe_destroy_screen( struct pipe_screen *_screen )
+{
+ struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
+ struct sw_winsys *winsys = screen->winsys;
+
+ if (screen->rast)
+ lp_rast_destroy(screen->rast);
+
+ lp_jit_screen_cleanup(screen);
+
+ if(winsys->destroy)
+ winsys->destroy(winsys);
+
+ pipe_mutex_destroy(screen->rast_mutex);
+
+ FREE(screen);
+}
+
+
+
+
+/**
+ * Fence reference counting.
+ */
+static void
+llvmpipe_fence_reference(struct pipe_screen *screen,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence)
+{
+ struct lp_fence **old = (struct lp_fence **) ptr;
+ struct lp_fence *f = (struct lp_fence *) fence;
+
+ lp_fence_reference(old, f);
+}
+
+
+/**
+ * Wait for the fence to finish.
+ */
+static boolean
+llvmpipe_fence_finish(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence_handle,
+ uint64_t timeout)
+{
+ struct lp_fence *f = (struct lp_fence *) fence_handle;
+
+ if (!timeout)
+ return lp_fence_signalled(f);
+
+ lp_fence_wait(f);
+ return TRUE;
+}
+
+static uint64_t
+llvmpipe_get_timestamp(struct pipe_screen *_screen)
+{
+ return os_time_get_nano();
+}
+
+/**
+ * Create a new pipe_screen object
+ * Note: we're not presently subclassing pipe_screen (no llvmpipe_screen).
+ */
+struct pipe_screen *
+llvmpipe_create_screen(struct sw_winsys *winsys)
+{
+ struct llvmpipe_screen *screen;
+
+ util_cpu_detect();
+
+#ifdef DEBUG
+ LP_DEBUG = debug_get_flags_option("LP_DEBUG", lp_debug_flags, 0 );
+#endif
+
+ LP_PERF = debug_get_flags_option("LP_PERF", lp_perf_flags, 0 );
+
+ screen = CALLOC_STRUCT(llvmpipe_screen);
+ if (!screen)
+ return NULL;
+
+ if (!lp_jit_screen_init(screen)) {
+ FREE(screen);
+ return NULL;
+ }
+
+ screen->winsys = winsys;
+
+ screen->base.destroy = llvmpipe_destroy_screen;
+
+ screen->base.get_name = llvmpipe_get_name;
+ screen->base.get_vendor = llvmpipe_get_vendor;
+ screen->base.get_device_vendor = llvmpipe_get_vendor; // TODO should be the CPU vendor
+ screen->base.get_param = llvmpipe_get_param;
+ screen->base.get_shader_param = llvmpipe_get_shader_param;
+ screen->base.get_paramf = llvmpipe_get_paramf;
+ screen->base.is_format_supported = llvmpipe_is_format_supported;
+
+ screen->base.context_create = llvmpipe_create_context;
+ screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer;
+ screen->base.fence_reference = llvmpipe_fence_reference;
+ screen->base.fence_finish = llvmpipe_fence_finish;
+
+ screen->base.get_timestamp = llvmpipe_get_timestamp;
+
+ llvmpipe_init_screen_resource_funcs(&screen->base);
+
+ screen->num_threads = util_cpu_caps.nr_cpus > 1 ? util_cpu_caps.nr_cpus : 0;
+#ifdef PIPE_SUBSYSTEM_EMBEDDED
+ screen->num_threads = 0;
+#endif
+ screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads);
+ screen->num_threads = MIN2(screen->num_threads, LP_MAX_THREADS);
+
+ screen->rast = lp_rast_create(screen->num_threads);
+ if (!screen->rast) {
+ lp_jit_screen_cleanup(screen);
+ FREE(screen);
+ return NULL;
+ }
+ pipe_mutex_init(screen->rast_mutex);
+
+ util_format_s3tc_init();
+
+ return &screen->base;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h
new file mode 100644
index 000000000..00bf20c8c
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h
@@ -0,0 +1,73 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ * @author Keith Whitwell <keithw@vmware.com>
+ */
+
+#ifndef LP_SCREEN_H
+#define LP_SCREEN_H
+
+#include "pipe/p_screen.h"
+#include "pipe/p_defines.h"
+#include "os/os_thread.h"
+#include "gallivm/lp_bld.h"
+
+
+struct sw_winsys;
+
+
+struct llvmpipe_screen
+{
+ struct pipe_screen base;
+
+ struct sw_winsys *winsys;
+
+ unsigned num_threads;
+
+ /* Increments whenever textures are modified. Contexts can track this.
+ */
+ unsigned timestamp;
+
+ struct lp_rasterizer *rast;
+ pipe_mutex rast_mutex;
+};
+
+
+
+
+static inline struct llvmpipe_screen *
+llvmpipe_screen( struct pipe_screen *pipe )
+{
+ return (struct llvmpipe_screen *)pipe;
+}
+
+
+
+#endif /* LP_SCREEN_H */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c
new file mode 100644
index 000000000..4c8167a9e
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -0,0 +1,1493 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Tiling engine.
+ *
+ * Builds per-tile display lists and executes them on calls to
+ * lp_setup_flush().
+ */
+
+#include <limits.h>
+
+#include "pipe/p_defines.h"
+#include "util/u_framebuffer.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_pack_color.h"
+#include "draw/draw_pipe.h"
+#include "os/os_time.h"
+#include "lp_context.h"
+#include "lp_memory.h"
+#include "lp_scene.h"
+#include "lp_texture.h"
+#include "lp_debug.h"
+#include "lp_fence.h"
+#include "lp_query.h"
+#include "lp_rast.h"
+#include "lp_setup_context.h"
+#include "lp_screen.h"
+#include "lp_state.h"
+#include "state_tracker/sw_winsys.h"
+
+#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
+
+
+static boolean set_scene_state( struct lp_setup_context *, enum setup_state,
+ const char *reason);
+static boolean try_update_scene_state( struct lp_setup_context *setup );
+
+
+static void
+lp_setup_get_empty_scene(struct lp_setup_context *setup)
+{
+ assert(setup->scene == NULL);
+
+ setup->scene_idx++;
+ setup->scene_idx %= Elements(setup->scenes);
+
+ setup->scene = setup->scenes[setup->scene_idx];
+
+ if (setup->scene->fence) {
+ if (LP_DEBUG & DEBUG_SETUP)
+ debug_printf("%s: wait for scene %d\n",
+ __FUNCTION__, setup->scene->fence->id);
+
+ lp_fence_wait(setup->scene->fence);
+ }
+
+ lp_scene_begin_binning(setup->scene, &setup->fb, setup->rasterizer_discard);
+
+}
+
+
+static void
+first_triangle( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4])
+{
+ assert(setup->state == SETUP_ACTIVE);
+ lp_setup_choose_triangle( setup );
+ setup->triangle( setup, v0, v1, v2 );
+}
+
+static void
+first_line( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4])
+{
+ assert(setup->state == SETUP_ACTIVE);
+ lp_setup_choose_line( setup );
+ setup->line( setup, v0, v1 );
+}
+
+static void
+first_point( struct lp_setup_context *setup,
+ const float (*v0)[4])
+{
+ assert(setup->state == SETUP_ACTIVE);
+ lp_setup_choose_point( setup );
+ setup->point( setup, v0 );
+}
+
+void lp_setup_reset( struct lp_setup_context *setup )
+{
+ unsigned i;
+
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ /* Reset derived state */
+ for (i = 0; i < Elements(setup->constants); ++i) {
+ setup->constants[i].stored_size = 0;
+ setup->constants[i].stored_data = NULL;
+ }
+ setup->fs.stored = NULL;
+ setup->dirty = ~0;
+
+ /* no current bin */
+ setup->scene = NULL;
+
+ /* Reset some state:
+ */
+ memset(&setup->clear, 0, sizeof setup->clear);
+
+ /* Have an explicit "start-binning" call and get rid of this
+ * pointer twiddling?
+ */
+ setup->line = first_line;
+ setup->point = first_point;
+ setup->triangle = first_triangle;
+}
+
+
+/** Rasterize all scene's bins */
+static void
+lp_setup_rasterize_scene( struct lp_setup_context *setup )
+{
+ struct lp_scene *scene = setup->scene;
+ struct llvmpipe_screen *screen = llvmpipe_screen(scene->pipe->screen);
+
+ scene->num_active_queries = setup->active_binned_queries;
+ memcpy(scene->active_queries, setup->active_queries,
+ scene->num_active_queries * sizeof(scene->active_queries[0]));
+
+ lp_scene_end_binning(scene);
+
+ lp_fence_reference(&setup->last_fence, scene->fence);
+
+ if (setup->last_fence)
+ setup->last_fence->issued = TRUE;
+
+ pipe_mutex_lock(screen->rast_mutex);
+
+ /* FIXME: We enqueue the scene then wait on the rasterizer to finish.
+ * This means we never actually run any vertex stuff in parallel to
+ * rasterization (not in the same context at least) which is what the
+ * multiple scenes per setup is about - when we get a new empty scene
+ * any old one is already empty again because we waited here for
+ * raster tasks to be finished. Ideally, we shouldn't need to wait here
+ * and rely on fences elsewhere when waiting is necessary.
+ * Certainly, lp_scene_end_rasterization() would need to be deferred too
+ * and there's probably other bits why this doesn't actually work.
+ */
+ lp_rast_queue_scene(screen->rast, scene);
+ lp_rast_finish(screen->rast);
+ pipe_mutex_unlock(screen->rast_mutex);
+
+ lp_scene_end_rasterization(setup->scene);
+ lp_setup_reset( setup );
+
+ LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
+}
+
+
+
+static boolean
+begin_binning( struct lp_setup_context *setup )
+{
+ struct lp_scene *scene = setup->scene;
+ boolean need_zsload = FALSE;
+ boolean ok;
+
+ assert(scene);
+ assert(scene->fence == NULL);
+
+ /* Always create a fence:
+ */
+ scene->fence = lp_fence_create(MAX2(1, setup->num_threads));
+ if (!scene->fence)
+ return FALSE;
+
+ ok = try_update_scene_state(setup);
+ if (!ok)
+ return FALSE;
+
+ if (setup->fb.zsbuf &&
+ ((setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) &&
+ util_format_is_depth_and_stencil(setup->fb.zsbuf->format))
+ need_zsload = TRUE;
+
+ LP_DBG(DEBUG_SETUP, "%s color clear bufs: %x depth: %s\n", __FUNCTION__,
+ setup->clear.flags >> 2,
+ need_zsload ? "clear": "load");
+
+ if (setup->clear.flags & PIPE_CLEAR_COLOR) {
+ unsigned cbuf;
+ for (cbuf = 0; cbuf < setup->fb.nr_cbufs; cbuf++) {
+ assert(PIPE_CLEAR_COLOR0 == 1 << 2);
+ if (setup->clear.flags & (1 << (2 + cbuf))) {
+ union lp_rast_cmd_arg clearrb_arg;
+ struct lp_rast_clear_rb *cc_scene =
+ (struct lp_rast_clear_rb *)
+ lp_scene_alloc(scene, sizeof(struct lp_rast_clear_rb));
+
+ if (!cc_scene) {
+ return FALSE;
+ }
+
+ cc_scene->cbuf = cbuf;
+ cc_scene->color_val = setup->clear.color_val[cbuf];
+ clearrb_arg.clear_rb = cc_scene;
+
+ if (!lp_scene_bin_everywhere(scene,
+ LP_RAST_OP_CLEAR_COLOR,
+ clearrb_arg))
+ return FALSE;
+ }
+ }
+ }
+
+ if (setup->fb.zsbuf) {
+ if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) {
+ ok = lp_scene_bin_everywhere( scene,
+ LP_RAST_OP_CLEAR_ZSTENCIL,
+ lp_rast_arg_clearzs(
+ setup->clear.zsvalue,
+ setup->clear.zsmask));
+ if (!ok)
+ return FALSE;
+ }
+ }
+
+ setup->clear.flags = 0;
+ setup->clear.zsmask = 0;
+ setup->clear.zsvalue = 0;
+
+ scene->had_queries = !!setup->active_binned_queries;
+
+ LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__);
+ return TRUE;
+}
+
+
+/* This basically bins and then flushes any outstanding full-screen
+ * clears.
+ *
+ * TODO: fast path for fullscreen clears and no triangles.
+ */
+static boolean
+execute_clears( struct lp_setup_context *setup )
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ return begin_binning( setup );
+}
+
+const char *states[] = {
+ "FLUSHED",
+ "CLEARED",
+ "ACTIVE "
+};
+
+
+static boolean
+set_scene_state( struct lp_setup_context *setup,
+ enum setup_state new_state,
+ const char *reason)
+{
+ unsigned old_state = setup->state;
+
+ if (old_state == new_state)
+ return TRUE;
+
+ if (LP_DEBUG & DEBUG_SCENE) {
+ debug_printf("%s old %s new %s%s%s\n",
+ __FUNCTION__,
+ states[old_state],
+ states[new_state],
+ (new_state == SETUP_FLUSHED) ? ": " : "",
+ (new_state == SETUP_FLUSHED) ? reason : "");
+
+ if (new_state == SETUP_FLUSHED && setup->scene)
+ lp_debug_draw_bins_by_cmd_length(setup->scene);
+ }
+
+ /* wait for a free/empty scene
+ */
+ if (old_state == SETUP_FLUSHED)
+ lp_setup_get_empty_scene(setup);
+
+ switch (new_state) {
+ case SETUP_CLEARED:
+ break;
+
+ case SETUP_ACTIVE:
+ if (!begin_binning( setup ))
+ goto fail;
+ break;
+
+ case SETUP_FLUSHED:
+ if (old_state == SETUP_CLEARED)
+ if (!execute_clears( setup ))
+ goto fail;
+
+ lp_setup_rasterize_scene( setup );
+ assert(setup->scene == NULL);
+ break;
+
+ default:
+ assert(0 && "invalid setup state mode");
+ goto fail;
+ }
+
+ setup->state = new_state;
+ return TRUE;
+
+fail:
+ if (setup->scene) {
+ lp_scene_end_rasterization(setup->scene);
+ setup->scene = NULL;
+ }
+
+ setup->state = SETUP_FLUSHED;
+ lp_setup_reset( setup );
+ return FALSE;
+}
+
+
+void
+lp_setup_flush( struct lp_setup_context *setup,
+ struct pipe_fence_handle **fence,
+ const char *reason)
+{
+ set_scene_state( setup, SETUP_FLUSHED, reason );
+
+ if (fence) {
+ lp_fence_reference((struct lp_fence **)fence, setup->last_fence);
+ }
+}
+
+
+void
+lp_setup_bind_framebuffer( struct lp_setup_context *setup,
+ const struct pipe_framebuffer_state *fb )
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ /* Flush any old scene.
+ */
+ set_scene_state( setup, SETUP_FLUSHED, __FUNCTION__ );
+
+ /*
+ * Ensure the old scene is not reused.
+ */
+ assert(!setup->scene);
+
+ /* Set new state. This will be picked up later when we next need a
+ * scene.
+ */
+ util_copy_framebuffer_state(&setup->fb, fb);
+ setup->framebuffer.x0 = 0;
+ setup->framebuffer.y0 = 0;
+ setup->framebuffer.x1 = fb->width-1;
+ setup->framebuffer.y1 = fb->height-1;
+ setup->dirty |= LP_SETUP_NEW_SCISSOR;
+}
+
+
+/*
+ * Try to clear one color buffer of the attached fb, either by binning a clear
+ * command or queuing up the clear for later (when binning is started).
+ */
+static boolean
+lp_setup_try_clear_color_buffer(struct lp_setup_context *setup,
+ const union pipe_color_union *color,
+ unsigned cbuf)
+{
+ union lp_rast_cmd_arg clearrb_arg;
+ union util_color uc;
+ enum pipe_format format = setup->fb.cbufs[cbuf]->format;
+
+ LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state);
+
+ if (util_format_is_pure_integer(format)) {
+ /*
+ * We expect int/uint clear values here, though some APIs
+ * might disagree (but in any case util_pack_color()
+ * couldn't handle it)...
+ */
+ if (util_format_is_pure_sint(format)) {
+ util_format_write_4i(format, color->i, 0, &uc, 0, 0, 0, 1, 1);
+ }
+ else {
+ assert(util_format_is_pure_uint(format));
+ util_format_write_4ui(format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
+ }
+ }
+ else {
+ util_pack_color(color->f, format, &uc);
+ }
+
+ if (setup->state == SETUP_ACTIVE) {
+ struct lp_scene *scene = setup->scene;
+
+ /* Add the clear to existing scene. In the unusual case where
+ * both color and depth-stencil are being cleared when there's
+ * already been some rendering, we could discard the currently
+ * binned scene and start again, but I don't see that as being
+ * a common usage.
+ */
+ struct lp_rast_clear_rb *cc_scene =
+ (struct lp_rast_clear_rb *)
+ lp_scene_alloc_aligned(scene, sizeof(struct lp_rast_clear_rb), 8);
+
+ if (!cc_scene) {
+ return FALSE;
+ }
+
+ cc_scene->cbuf = cbuf;
+ cc_scene->color_val = uc;
+ clearrb_arg.clear_rb = cc_scene;
+
+ if (!lp_scene_bin_everywhere(scene,
+ LP_RAST_OP_CLEAR_COLOR,
+ clearrb_arg))
+ return FALSE;
+ }
+ else {
+ /* Put ourselves into the 'pre-clear' state, specifically to try
+ * and accumulate multiple clears to color and depth_stencil
+ * buffers which the app or state-tracker might issue
+ * separately.
+ */
+ set_scene_state( setup, SETUP_CLEARED, __FUNCTION__ );
+
+ assert(PIPE_CLEAR_COLOR0 == (1 << 2));
+ setup->clear.flags |= 1 << (cbuf + 2);
+ setup->clear.color_val[cbuf] = uc;
+ }
+
+ return TRUE;
+}
+
+static boolean
+lp_setup_try_clear_zs(struct lp_setup_context *setup,
+ double depth,
+ unsigned stencil,
+ unsigned flags)
+{
+ uint64_t zsmask = 0;
+ uint64_t zsvalue = 0;
+ uint32_t zmask32;
+ uint8_t smask8;
+
+ LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state);
+
+ zmask32 = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0;
+ smask8 = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
+
+ zsvalue = util_pack64_z_stencil(setup->fb.zsbuf->format,
+ depth,
+ stencil);
+
+ zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format,
+ zmask32,
+ smask8);
+
+ zsvalue &= zsmask;
+
+ if (setup->state == SETUP_ACTIVE) {
+ struct lp_scene *scene = setup->scene;
+
+ /* Add the clear to existing scene. In the unusual case where
+ * both color and depth-stencil are being cleared when there's
+ * already been some rendering, we could discard the currently
+ * binned scene and start again, but I don't see that as being
+ * a common usage.
+ */
+ if (!lp_scene_bin_everywhere(scene,
+ LP_RAST_OP_CLEAR_ZSTENCIL,
+ lp_rast_arg_clearzs(zsvalue, zsmask)))
+ return FALSE;
+ }
+ else {
+ /* Put ourselves into the 'pre-clear' state, specifically to try
+ * and accumulate multiple clears to color and depth_stencil
+ * buffers which the app or state-tracker might issue
+ * separately.
+ */
+ set_scene_state( setup, SETUP_CLEARED, __FUNCTION__ );
+
+ setup->clear.flags |= flags;
+
+ setup->clear.zsmask |= zsmask;
+ setup->clear.zsvalue =
+ (setup->clear.zsvalue & ~zsmask) | (zsvalue & zsmask);
+ }
+
+ return TRUE;
+}
+
+void
+lp_setup_clear( struct lp_setup_context *setup,
+ const union pipe_color_union *color,
+ double depth,
+ unsigned stencil,
+ unsigned flags )
+{
+ unsigned i;
+
+ /*
+ * Note any of these (max 9) clears could fail (but at most there should
+ * be just one failure!). This avoids doing the previous succeeded
+ * clears again (we still clear tiles twice if a clear command succeeded
+ * partially for one buffer).
+ */
+ if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
+ unsigned flagszs = flags & PIPE_CLEAR_DEPTHSTENCIL;
+ if (!lp_setup_try_clear_zs(setup, depth, stencil, flagszs)) {
+ lp_setup_flush(setup, NULL, __FUNCTION__);
+
+ if (!lp_setup_try_clear_zs(setup, depth, stencil, flagszs))
+ assert(0);
+ }
+ }
+
+ if (flags & PIPE_CLEAR_COLOR) {
+ assert(PIPE_CLEAR_COLOR0 == (1 << 2));
+ for (i = 0; i < setup->fb.nr_cbufs; i++) {
+ if ((flags & (1 << (2 + i))) && setup->fb.cbufs[i]) {
+ if (!lp_setup_try_clear_color_buffer(setup, color, i)) {
+ lp_setup_flush(setup, NULL, __FUNCTION__);
+
+ if (!lp_setup_try_clear_color_buffer(setup, color, i))
+ assert(0);
+ }
+ }
+ }
+ }
+}
+
+
+
+void
+lp_setup_set_triangle_state( struct lp_setup_context *setup,
+ unsigned cull_mode,
+ boolean ccw_is_frontface,
+ boolean scissor,
+ boolean half_pixel_center,
+ boolean bottom_edge_rule)
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ setup->ccw_is_frontface = ccw_is_frontface;
+ setup->cullmode = cull_mode;
+ setup->triangle = first_triangle;
+ setup->pixel_offset = half_pixel_center ? 0.5f : 0.0f;
+ setup->bottom_edge_rule = bottom_edge_rule;
+
+ if (setup->scissor_test != scissor) {
+ setup->dirty |= LP_SETUP_NEW_SCISSOR;
+ setup->scissor_test = scissor;
+ }
+}
+
+void
+lp_setup_set_line_state( struct lp_setup_context *setup,
+ float line_width)
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ setup->line_width = line_width;
+}
+
+void
+lp_setup_set_point_state( struct lp_setup_context *setup,
+ float point_size,
+ boolean point_size_per_vertex,
+ uint sprite_coord_enable,
+ uint sprite_coord_origin)
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ setup->point_size = point_size;
+ setup->sprite_coord_enable = sprite_coord_enable;
+ setup->sprite_coord_origin = sprite_coord_origin;
+ setup->point_size_per_vertex = point_size_per_vertex;
+}
+
+void
+lp_setup_set_setup_variant( struct lp_setup_context *setup,
+ const struct lp_setup_variant *variant)
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ setup->setup.variant = variant;
+}
+
+void
+lp_setup_set_fs_variant( struct lp_setup_context *setup,
+ struct lp_fragment_shader_variant *variant)
+{
+ LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__,
+ variant);
+ /* FIXME: reference count */
+
+ setup->fs.current.variant = variant;
+ setup->dirty |= LP_SETUP_NEW_FS;
+}
+
+void
+lp_setup_set_fs_constants(struct lp_setup_context *setup,
+ unsigned num,
+ struct pipe_constant_buffer *buffers)
+{
+ unsigned i;
+
+ LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) buffers);
+
+ assert(num <= Elements(setup->constants));
+
+ for (i = 0; i < num; ++i) {
+ util_copy_constant_buffer(&setup->constants[i].current, &buffers[i]);
+ }
+ for (; i < Elements(setup->constants); i++) {
+ util_copy_constant_buffer(&setup->constants[i].current, NULL);
+ }
+ setup->dirty |= LP_SETUP_NEW_CONSTANTS;
+}
+
+
+void
+lp_setup_set_alpha_ref_value( struct lp_setup_context *setup,
+ float alpha_ref_value )
+{
+ LP_DBG(DEBUG_SETUP, "%s %f\n", __FUNCTION__, alpha_ref_value);
+
+ if(setup->fs.current.jit_context.alpha_ref_value != alpha_ref_value) {
+ setup->fs.current.jit_context.alpha_ref_value = alpha_ref_value;
+ setup->dirty |= LP_SETUP_NEW_FS;
+ }
+}
+
+void
+lp_setup_set_stencil_ref_values( struct lp_setup_context *setup,
+ const ubyte refs[2] )
+{
+ LP_DBG(DEBUG_SETUP, "%s %d %d\n", __FUNCTION__, refs[0], refs[1]);
+
+ if (setup->fs.current.jit_context.stencil_ref_front != refs[0] ||
+ setup->fs.current.jit_context.stencil_ref_back != refs[1]) {
+ setup->fs.current.jit_context.stencil_ref_front = refs[0];
+ setup->fs.current.jit_context.stencil_ref_back = refs[1];
+ setup->dirty |= LP_SETUP_NEW_FS;
+ }
+}
+
+void
+lp_setup_set_blend_color( struct lp_setup_context *setup,
+ const struct pipe_blend_color *blend_color )
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ assert(blend_color);
+
+ if(memcmp(&setup->blend_color.current, blend_color, sizeof *blend_color) != 0) {
+ memcpy(&setup->blend_color.current, blend_color, sizeof *blend_color);
+ setup->dirty |= LP_SETUP_NEW_BLEND_COLOR;
+ }
+}
+
+
+void
+lp_setup_set_scissors( struct lp_setup_context *setup,
+ const struct pipe_scissor_state *scissors )
+{
+ unsigned i;
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ assert(scissors);
+
+ for (i = 0; i < PIPE_MAX_VIEWPORTS; ++i) {
+ setup->scissors[i].x0 = scissors[i].minx;
+ setup->scissors[i].x1 = scissors[i].maxx-1;
+ setup->scissors[i].y0 = scissors[i].miny;
+ setup->scissors[i].y1 = scissors[i].maxy-1;
+ }
+ setup->dirty |= LP_SETUP_NEW_SCISSOR;
+}
+
+
+void
+lp_setup_set_flatshade_first( struct lp_setup_context *setup,
+ boolean flatshade_first )
+{
+ setup->flatshade_first = flatshade_first;
+}
+
+void
+lp_setup_set_rasterizer_discard( struct lp_setup_context *setup,
+ boolean rasterizer_discard )
+{
+ if (setup->rasterizer_discard != rasterizer_discard) {
+ setup->rasterizer_discard = rasterizer_discard;
+ set_scene_state( setup, SETUP_FLUSHED, __FUNCTION__ );
+ }
+}
+
+void
+lp_setup_set_vertex_info( struct lp_setup_context *setup,
+ struct vertex_info *vertex_info )
+{
+ /* XXX: just silently holding onto the pointer:
+ */
+ setup->vertex_info = vertex_info;
+}
+
+
+/**
+ * Called during state validation when LP_NEW_VIEWPORT is set.
+ */
+void
+lp_setup_set_viewports(struct lp_setup_context *setup,
+ unsigned num_viewports,
+ const struct pipe_viewport_state *viewports)
+{
+ struct llvmpipe_context *lp = llvmpipe_context(setup->pipe);
+ unsigned i;
+
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ assert(num_viewports <= PIPE_MAX_VIEWPORTS);
+ assert(viewports);
+
+ /*
+ * For use in lp_state_fs.c, propagate the viewport values for all viewports.
+ */
+ for (i = 0; i < num_viewports; i++) {
+ float min_depth;
+ float max_depth;
+
+ if (lp->rasterizer->clip_halfz == 0) {
+ float half_depth = viewports[i].scale[2];
+ min_depth = viewports[i].translate[2] - half_depth;
+ max_depth = min_depth + half_depth * 2.0f;
+ } else {
+ min_depth = viewports[i].translate[2];
+ max_depth = min_depth + viewports[i].scale[2];
+ }
+
+ if (setup->viewports[i].min_depth != min_depth ||
+ setup->viewports[i].max_depth != max_depth) {
+ setup->viewports[i].min_depth = min_depth;
+ setup->viewports[i].max_depth = max_depth;
+ setup->dirty |= LP_SETUP_NEW_VIEWPORTS;
+ }
+ }
+}
+
+
+/**
+ * Called during state validation when LP_NEW_SAMPLER_VIEW is set.
+ */
+void
+lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
+ unsigned num,
+ struct pipe_sampler_view **views)
+{
+ unsigned i;
+
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
+
+ for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
+ struct pipe_sampler_view *view = i < num ? views[i] : NULL;
+
+ if (view) {
+ struct pipe_resource *res = view->texture;
+ struct llvmpipe_resource *lp_tex = llvmpipe_resource(res);
+ struct lp_jit_texture *jit_tex;
+ jit_tex = &setup->fs.current.jit_context.textures[i];
+
+ /* We're referencing the texture's internal data, so save a
+ * reference to it.
+ */
+ pipe_resource_reference(&setup->fs.current_tex[i], res);
+
+ if (!lp_tex->dt) {
+ /* regular texture - setup array of mipmap level offsets */
+ int j;
+ unsigned first_level = 0;
+ unsigned last_level = 0;
+
+ if (llvmpipe_resource_is_texture(res)) {
+ first_level = view->u.tex.first_level;
+ last_level = view->u.tex.last_level;
+ assert(first_level <= last_level);
+ assert(last_level <= res->last_level);
+ jit_tex->base = lp_tex->tex_data;
+ }
+ else {
+ jit_tex->base = lp_tex->data;
+ }
+
+ if (LP_PERF & PERF_TEX_MEM) {
+ /* use dummy tile memory */
+ jit_tex->base = lp_dummy_tile;
+ jit_tex->width = TILE_SIZE/8;
+ jit_tex->height = TILE_SIZE/8;
+ jit_tex->depth = 1;
+ jit_tex->first_level = 0;
+ jit_tex->last_level = 0;
+ jit_tex->mip_offsets[0] = 0;
+ jit_tex->row_stride[0] = 0;
+ jit_tex->img_stride[0] = 0;
+ }
+ else {
+ jit_tex->width = res->width0;
+ jit_tex->height = res->height0;
+ jit_tex->depth = res->depth0;
+ jit_tex->first_level = first_level;
+ jit_tex->last_level = last_level;
+
+ if (llvmpipe_resource_is_texture(res)) {
+ for (j = first_level; j <= last_level; j++) {
+ jit_tex->mip_offsets[j] = lp_tex->mip_offsets[j];
+ jit_tex->row_stride[j] = lp_tex->row_stride[j];
+ jit_tex->img_stride[j] = lp_tex->img_stride[j];
+ }
+
+ if (view->target == PIPE_TEXTURE_1D_ARRAY ||
+ view->target == PIPE_TEXTURE_2D_ARRAY ||
+ view->target == PIPE_TEXTURE_CUBE ||
+ view->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ /*
+ * For array textures, we don't have first_layer, instead
+ * adjust last_layer (stored as depth) plus the mip level offsets
+ * (as we have mip-first layout can't just adjust base ptr).
+ * XXX For mip levels, could do something similar.
+ */
+ jit_tex->depth = view->u.tex.last_layer - view->u.tex.first_layer + 1;
+ for (j = first_level; j <= last_level; j++) {
+ jit_tex->mip_offsets[j] += view->u.tex.first_layer *
+ lp_tex->img_stride[j];
+ }
+ if (view->target == PIPE_TEXTURE_CUBE ||
+ view->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ assert(jit_tex->depth % 6 == 0);
+ }
+ assert(view->u.tex.first_layer <= view->u.tex.last_layer);
+ assert(view->u.tex.last_layer < res->array_size);
+ }
+ }
+ else {
+ /*
+ * For buffers, we don't have first_element, instead adjust
+ * last_element (stored as width) plus the base pointer.
+ */
+ unsigned view_blocksize = util_format_get_blocksize(view->format);
+ /* probably don't really need to fill that out */
+ jit_tex->mip_offsets[0] = 0;
+ jit_tex->row_stride[0] = 0;
+ jit_tex->img_stride[0] = 0;
+
+ /* everything specified in number of elements here. */
+ jit_tex->width = view->u.buf.last_element - view->u.buf.first_element + 1;
+ jit_tex->base = (uint8_t *)jit_tex->base + view->u.buf.first_element *
+ view_blocksize;
+ /* XXX Unsure if we need to sanitize parameters? */
+ assert(view->u.buf.first_element <= view->u.buf.last_element);
+ assert(view->u.buf.last_element * view_blocksize < res->width0);
+ }
+ }
+ }
+ else {
+ /* display target texture/surface */
+ /*
+ * XXX: Where should this be unmapped?
+ */
+ struct llvmpipe_screen *screen = llvmpipe_screen(res->screen);
+ struct sw_winsys *winsys = screen->winsys;
+ jit_tex->base = winsys->displaytarget_map(winsys, lp_tex->dt,
+ PIPE_TRANSFER_READ);
+ jit_tex->row_stride[0] = lp_tex->row_stride[0];
+ jit_tex->img_stride[0] = lp_tex->img_stride[0];
+ jit_tex->mip_offsets[0] = 0;
+ jit_tex->width = res->width0;
+ jit_tex->height = res->height0;
+ jit_tex->depth = res->depth0;
+ jit_tex->first_level = jit_tex->last_level = 0;
+ assert(jit_tex->base);
+ }
+ }
+ }
+
+ setup->dirty |= LP_SETUP_NEW_FS;
+}
+
+
+/**
+ * Called during state validation when LP_NEW_SAMPLER is set.
+ */
+void
+lp_setup_set_fragment_sampler_state(struct lp_setup_context *setup,
+ unsigned num,
+ struct pipe_sampler_state **samplers)
+{
+ unsigned i;
+
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ assert(num <= PIPE_MAX_SAMPLERS);
+
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ const struct pipe_sampler_state *sampler = i < num ? samplers[i] : NULL;
+
+ if (sampler) {
+ struct lp_jit_sampler *jit_sam;
+ jit_sam = &setup->fs.current.jit_context.samplers[i];
+
+ jit_sam->min_lod = sampler->min_lod;
+ jit_sam->max_lod = sampler->max_lod;
+ jit_sam->lod_bias = sampler->lod_bias;
+ COPY_4V(jit_sam->border_color, sampler->border_color.f);
+ }
+ }
+
+ setup->dirty |= LP_SETUP_NEW_FS;
+}
+
+
+/**
+ * Is the given texture referenced by any scene?
+ * Note: we have to check all scenes including any scenes currently
+ * being rendered and the current scene being built.
+ */
+unsigned
+lp_setup_is_resource_referenced( const struct lp_setup_context *setup,
+ const struct pipe_resource *texture )
+{
+ unsigned i;
+
+ /* check the render targets */
+ for (i = 0; i < setup->fb.nr_cbufs; i++) {
+ if (setup->fb.cbufs[i] && setup->fb.cbufs[i]->texture == texture)
+ return LP_REFERENCED_FOR_READ | LP_REFERENCED_FOR_WRITE;
+ }
+ if (setup->fb.zsbuf && setup->fb.zsbuf->texture == texture) {
+ return LP_REFERENCED_FOR_READ | LP_REFERENCED_FOR_WRITE;
+ }
+
+ /* check textures referenced by the scene */
+ for (i = 0; i < Elements(setup->scenes); i++) {
+ if (lp_scene_is_resource_referenced(setup->scenes[i], texture)) {
+ return LP_REFERENCED_FOR_READ;
+ }
+ }
+
+ return LP_UNREFERENCED;
+}
+
+
+/**
+ * Called by vbuf code when we're about to draw something.
+ *
+ * This function stores all dirty state in the current scene's display list
+ * memory, via lp_scene_alloc(). We can not pass pointers of mutable state to
+ * the JIT functions, as the JIT functions will be called later on, most likely
+ * on a different thread.
+ *
+ * When processing dirty state it is imperative that we don't refer to any
+ * pointers previously allocated with lp_scene_alloc() in this function (or any
+ * function) as they may belong to a scene freed since then.
+ */
+static boolean
+try_update_scene_state( struct lp_setup_context *setup )
+{
+ static const float fake_const_buf[4];
+ boolean new_scene = (setup->fs.stored == NULL);
+ struct lp_scene *scene = setup->scene;
+ unsigned i;
+
+ assert(scene);
+
+ if (setup->dirty & LP_SETUP_NEW_VIEWPORTS) {
+ /*
+ * Record new depth range state for changes due to viewport updates.
+ *
+ * TODO: Collapse the existing viewport and depth range information
+ * into one structure, for access by JIT.
+ */
+ struct lp_jit_viewport *stored;
+
+ stored = (struct lp_jit_viewport *)
+ lp_scene_alloc(scene, sizeof setup->viewports);
+
+ if (!stored) {
+ assert(!new_scene);
+ return FALSE;
+ }
+
+ memcpy(stored, setup->viewports, sizeof setup->viewports);
+
+ setup->fs.current.jit_context.viewports = stored;
+ setup->dirty |= LP_SETUP_NEW_FS;
+ }
+
+ if(setup->dirty & LP_SETUP_NEW_BLEND_COLOR) {
+ uint8_t *stored;
+ float* fstored;
+ unsigned i, j;
+ unsigned size;
+
+ /* Alloc u8_blend_color (16 x i8) and f_blend_color (4 or 8 x f32) */
+ size = 4 * 16 * sizeof(uint8_t);
+ size += (LP_MAX_VECTOR_LENGTH / 4) * sizeof(float);
+ stored = lp_scene_alloc_aligned(scene, size, LP_MIN_VECTOR_ALIGN);
+
+ if (!stored) {
+ assert(!new_scene);
+ return FALSE;
+ }
+
+ /* Store floating point colour */
+ fstored = (float*)(stored + 4*16);
+ for (i = 0; i < (LP_MAX_VECTOR_LENGTH / 4); ++i) {
+ fstored[i] = setup->blend_color.current.color[i % 4];
+ }
+
+ /* smear each blend color component across 16 ubyte elements */
+ for (i = 0; i < 4; ++i) {
+ uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]);
+ for (j = 0; j < 16; ++j)
+ stored[i*16 + j] = c;
+ }
+
+ setup->blend_color.stored = stored;
+ setup->fs.current.jit_context.u8_blend_color = stored;
+ setup->fs.current.jit_context.f_blend_color = fstored;
+ setup->dirty |= LP_SETUP_NEW_FS;
+ }
+
+ if (setup->dirty & LP_SETUP_NEW_CONSTANTS) {
+ for (i = 0; i < Elements(setup->constants); ++i) {
+ struct pipe_resource *buffer = setup->constants[i].current.buffer;
+ const unsigned current_size = MIN2(setup->constants[i].current.buffer_size,
+ LP_MAX_TGSI_CONST_BUFFER_SIZE);
+ const ubyte *current_data = NULL;
+ int num_constants;
+
+ STATIC_ASSERT(DATA_BLOCK_SIZE >= LP_MAX_TGSI_CONST_BUFFER_SIZE);
+
+ if (buffer) {
+ /* resource buffer */
+ current_data = (ubyte *) llvmpipe_resource_data(buffer);
+ }
+ else if (setup->constants[i].current.user_buffer) {
+ /* user-space buffer */
+ current_data = (ubyte *) setup->constants[i].current.user_buffer;
+ }
+
+ if (current_data) {
+ current_data += setup->constants[i].current.buffer_offset;
+
+ /* TODO: copy only the actually used constants? */
+
+ if (setup->constants[i].stored_size != current_size ||
+ !setup->constants[i].stored_data ||
+ memcmp(setup->constants[i].stored_data,
+ current_data,
+ current_size) != 0) {
+ void *stored;
+
+ stored = lp_scene_alloc(scene, current_size);
+ if (!stored) {
+ assert(!new_scene);
+ return FALSE;
+ }
+
+ memcpy(stored,
+ current_data,
+ current_size);
+ setup->constants[i].stored_size = current_size;
+ setup->constants[i].stored_data = stored;
+ }
+ setup->fs.current.jit_context.constants[i] =
+ setup->constants[i].stored_data;
+ }
+ else {
+ setup->constants[i].stored_size = 0;
+ setup->constants[i].stored_data = NULL;
+ setup->fs.current.jit_context.constants[i] = fake_const_buf;
+ }
+
+ num_constants =
+ setup->constants[i].stored_size / (sizeof(float) * 4);
+ setup->fs.current.jit_context.num_constants[i] = num_constants;
+ setup->dirty |= LP_SETUP_NEW_FS;
+ }
+ }
+
+
+ if (setup->dirty & LP_SETUP_NEW_FS) {
+ if (!setup->fs.stored ||
+ memcmp(setup->fs.stored,
+ &setup->fs.current,
+ sizeof setup->fs.current) != 0)
+ {
+ struct lp_rast_state *stored;
+
+ /* The fs state that's been stored in the scene is different from
+ * the new, current state. So allocate a new lp_rast_state object
+ * and append it to the bin's setup data buffer.
+ */
+ stored = (struct lp_rast_state *) lp_scene_alloc(scene, sizeof *stored);
+ if (!stored) {
+ assert(!new_scene);
+ return FALSE;
+ }
+
+ memcpy(stored,
+ &setup->fs.current,
+ sizeof setup->fs.current);
+ setup->fs.stored = stored;
+
+ /* The scene now references the textures in the rasterization
+ * state record. Note that now.
+ */
+ for (i = 0; i < Elements(setup->fs.current_tex); i++) {
+ if (setup->fs.current_tex[i]) {
+ if (!lp_scene_add_resource_reference(scene,
+ setup->fs.current_tex[i],
+ new_scene)) {
+ assert(!new_scene);
+ return FALSE;
+ }
+ }
+ }
+ }
+ }
+
+ if (setup->dirty & LP_SETUP_NEW_SCISSOR) {
+ unsigned i;
+ for (i = 0; i < PIPE_MAX_VIEWPORTS; ++i) {
+ setup->draw_regions[i] = setup->framebuffer;
+ if (setup->scissor_test) {
+ u_rect_possible_intersection(&setup->scissors[i],
+ &setup->draw_regions[i]);
+ }
+ }
+ }
+
+ setup->dirty = 0;
+
+ assert(setup->fs.stored);
+ return TRUE;
+}
+
+boolean
+lp_setup_update_state( struct lp_setup_context *setup,
+ boolean update_scene )
+{
+ /* Some of the 'draw' pipeline stages may have changed some driver state.
+ * Make sure we've processed those state changes before anything else.
+ *
+ * XXX this is the only place where llvmpipe_context is used in the
+ * setup code. This may get refactored/changed...
+ */
+ {
+ struct llvmpipe_context *lp = llvmpipe_context(setup->pipe);
+ if (lp->dirty) {
+ llvmpipe_update_derived(lp);
+ }
+
+ if (lp->setup->dirty) {
+ llvmpipe_update_setup(lp);
+ }
+
+ assert(setup->setup.variant);
+
+ /* Will probably need to move this somewhere else, just need
+ * to know about vertex shader point size attribute.
+ */
+ setup->psize = lp->psize_slot;
+ setup->viewport_index_slot = lp->viewport_index_slot;
+ setup->layer_slot = lp->layer_slot;
+ setup->face_slot = lp->face_slot;
+
+ assert(lp->dirty == 0);
+
+ assert(lp->setup_variant.key.size ==
+ setup->setup.variant->key.size);
+
+ assert(memcmp(&lp->setup_variant.key,
+ &setup->setup.variant->key,
+ setup->setup.variant->key.size) == 0);
+ }
+
+ if (update_scene && setup->state != SETUP_ACTIVE) {
+ if (!set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ ))
+ return FALSE;
+ }
+
+ /* Only call into update_scene_state() if we already have a
+ * scene:
+ */
+ if (update_scene && setup->scene) {
+ assert(setup->state == SETUP_ACTIVE);
+
+ if (try_update_scene_state(setup))
+ return TRUE;
+
+ /* Update failed, try to restart the scene.
+ *
+ * Cannot call lp_setup_flush_and_restart() directly here
+ * because of potential recursion.
+ */
+ if (!set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__))
+ return FALSE;
+
+ if (!set_scene_state(setup, SETUP_ACTIVE, __FUNCTION__))
+ return FALSE;
+
+ if (!setup->scene)
+ return FALSE;
+
+ return try_update_scene_state(setup);
+ }
+
+ return TRUE;
+}
+
+
+
+/* Only caller is lp_setup_vbuf_destroy()
+ */
+void
+lp_setup_destroy( struct lp_setup_context *setup )
+{
+ uint i;
+
+ lp_setup_reset( setup );
+
+ util_unreference_framebuffer_state(&setup->fb);
+
+ for (i = 0; i < Elements(setup->fs.current_tex); i++) {
+ pipe_resource_reference(&setup->fs.current_tex[i], NULL);
+ }
+
+ for (i = 0; i < Elements(setup->constants); i++) {
+ pipe_resource_reference(&setup->constants[i].current.buffer, NULL);
+ }
+
+ /* free the scenes in the 'empty' queue */
+ for (i = 0; i < Elements(setup->scenes); i++) {
+ struct lp_scene *scene = setup->scenes[i];
+
+ if (scene->fence)
+ lp_fence_wait(scene->fence);
+
+ lp_scene_destroy(scene);
+ }
+
+ lp_fence_reference(&setup->last_fence, NULL);
+
+ FREE( setup );
+}
+
+
+/**
+ * Create a new primitive tiling engine. Plug it into the backend of
+ * the draw module. Currently also creates a rasterizer to use with
+ * it.
+ */
+struct lp_setup_context *
+lp_setup_create( struct pipe_context *pipe,
+ struct draw_context *draw )
+{
+ struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
+ struct lp_setup_context *setup;
+ unsigned i;
+
+ setup = CALLOC_STRUCT(lp_setup_context);
+ if (!setup) {
+ goto no_setup;
+ }
+
+ lp_setup_init_vbuf(setup);
+
+ /* Used only in update_state():
+ */
+ setup->pipe = pipe;
+
+
+ setup->num_threads = screen->num_threads;
+ setup->vbuf = draw_vbuf_stage(draw, &setup->base);
+ if (!setup->vbuf) {
+ goto no_vbuf;
+ }
+
+ draw_set_rasterize_stage(draw, setup->vbuf);
+ draw_set_render(draw, &setup->base);
+
+ /* create some empty scenes */
+ for (i = 0; i < MAX_SCENES; i++) {
+ setup->scenes[i] = lp_scene_create( pipe );
+ if (!setup->scenes[i]) {
+ goto no_scenes;
+ }
+ }
+
+ setup->triangle = first_triangle;
+ setup->line = first_line;
+ setup->point = first_point;
+
+ setup->dirty = ~0;
+
+ return setup;
+
+no_scenes:
+ for (i = 0; i < MAX_SCENES; i++) {
+ if (setup->scenes[i]) {
+ lp_scene_destroy(setup->scenes[i]);
+ }
+ }
+
+ setup->vbuf->destroy(setup->vbuf);
+no_vbuf:
+ FREE(setup);
+no_setup:
+ return NULL;
+}
+
+
+/**
+ * Put a BeginQuery command into all bins.
+ */
+void
+lp_setup_begin_query(struct lp_setup_context *setup,
+ struct llvmpipe_query *pq)
+{
+
+ set_scene_state(setup, SETUP_ACTIVE, "begin_query");
+
+ if (!(pq->type == PIPE_QUERY_OCCLUSION_COUNTER ||
+ pq->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
+ pq->type == PIPE_QUERY_PIPELINE_STATISTICS))
+ return;
+
+ /* init the query to its beginning state */
+ assert(setup->active_binned_queries < LP_MAX_ACTIVE_BINNED_QUERIES);
+ /* exceeding list size so just ignore the query */
+ if (setup->active_binned_queries >= LP_MAX_ACTIVE_BINNED_QUERIES) {
+ return;
+ }
+ assert(setup->active_queries[setup->active_binned_queries] == NULL);
+ setup->active_queries[setup->active_binned_queries] = pq;
+ setup->active_binned_queries++;
+
+ assert(setup->scene);
+ if (setup->scene) {
+ if (!lp_scene_bin_everywhere(setup->scene,
+ LP_RAST_OP_BEGIN_QUERY,
+ lp_rast_arg_query(pq))) {
+
+ if (!lp_setup_flush_and_restart(setup))
+ return;
+
+ if (!lp_scene_bin_everywhere(setup->scene,
+ LP_RAST_OP_BEGIN_QUERY,
+ lp_rast_arg_query(pq))) {
+ return;
+ }
+ }
+ setup->scene->had_queries |= TRUE;
+ }
+}
+
+
+/**
+ * Put an EndQuery command into all bins.
+ */
+void
+lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq)
+{
+ set_scene_state(setup, SETUP_ACTIVE, "end_query");
+
+ assert(setup->scene);
+ if (setup->scene) {
+ /* pq->fence should be the fence of the *last* scene which
+ * contributed to the query result.
+ */
+ lp_fence_reference(&pq->fence, setup->scene->fence);
+
+ if (pq->type == PIPE_QUERY_OCCLUSION_COUNTER ||
+ pq->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
+ pq->type == PIPE_QUERY_PIPELINE_STATISTICS ||
+ pq->type == PIPE_QUERY_TIMESTAMP) {
+ if (pq->type == PIPE_QUERY_TIMESTAMP &&
+ !(setup->scene->tiles_x | setup->scene->tiles_y)) {
+ /*
+ * If there's a zero width/height framebuffer, there's no bins and
+ * hence no rast task is ever run. So fill in something here instead.
+ */
+ pq->end[0] = os_time_get_nano();
+ }
+
+ if (!lp_scene_bin_everywhere(setup->scene,
+ LP_RAST_OP_END_QUERY,
+ lp_rast_arg_query(pq))) {
+ if (!lp_setup_flush_and_restart(setup))
+ goto fail;
+
+ if (!lp_scene_bin_everywhere(setup->scene,
+ LP_RAST_OP_END_QUERY,
+ lp_rast_arg_query(pq))) {
+ goto fail;
+ }
+ }
+ setup->scene->had_queries |= TRUE;
+ }
+ }
+ else {
+ lp_fence_reference(&pq->fence, setup->last_fence);
+ }
+
+fail:
+ /* Need to do this now not earlier since it still needs to be marked as
+ * active when binning it would cause a flush.
+ */
+ if (pq->type == PIPE_QUERY_OCCLUSION_COUNTER ||
+ pq->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
+ pq->type == PIPE_QUERY_PIPELINE_STATISTICS) {
+ unsigned i;
+
+ /* remove from active binned query list */
+ for (i = 0; i < setup->active_binned_queries; i++) {
+ if (setup->active_queries[i] == pq)
+ break;
+ }
+ assert(i < setup->active_binned_queries);
+ if (i == setup->active_binned_queries)
+ return;
+ setup->active_binned_queries--;
+ setup->active_queries[i] = setup->active_queries[setup->active_binned_queries];
+ setup->active_queries[setup->active_binned_queries] = NULL;
+ }
+}
+
+
+boolean
+lp_setup_flush_and_restart(struct lp_setup_context *setup)
+{
+ if (0) debug_printf("%s\n", __FUNCTION__);
+
+ assert(setup->state == SETUP_ACTIVE);
+
+ if (!set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__))
+ return FALSE;
+
+ if (!lp_setup_update_state(setup, TRUE))
+ return FALSE;
+
+ return TRUE;
+}
+
+
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.h
new file mode 100644
index 000000000..a42df2dc9
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.h
@@ -0,0 +1,168 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef LP_SETUP_H
+#define LP_SETUP_H
+
+#include "pipe/p_compiler.h"
+#include "lp_jit.h"
+
+struct draw_context;
+struct vertex_info;
+
+
+struct pipe_resource;
+struct pipe_query;
+struct pipe_surface;
+struct pipe_blend_color;
+struct pipe_screen;
+struct pipe_framebuffer_state;
+struct lp_fragment_shader_variant;
+struct lp_jit_context;
+struct llvmpipe_query;
+struct pipe_fence_handle;
+struct lp_setup_variant;
+struct lp_setup_context;
+
+void lp_setup_reset( struct lp_setup_context *setup );
+
+struct lp_setup_context *
+lp_setup_create( struct pipe_context *pipe,
+ struct draw_context *draw );
+
+void
+lp_setup_clear(struct lp_setup_context *setup,
+ const union pipe_color_union *clear_color,
+ double clear_depth,
+ unsigned clear_stencil,
+ unsigned flags);
+
+
+
+void
+lp_setup_flush( struct lp_setup_context *setup,
+ struct pipe_fence_handle **fence,
+ const char *reason);
+
+
+void
+lp_setup_bind_framebuffer( struct lp_setup_context *setup,
+ const struct pipe_framebuffer_state *fb );
+
+void
+lp_setup_set_triangle_state( struct lp_setup_context *setup,
+ unsigned cullmode,
+ boolean front_is_ccw,
+ boolean scissor,
+ boolean half_pixel_center,
+ boolean bottom_edge_rule);
+
+void
+lp_setup_set_line_state( struct lp_setup_context *setup,
+ float line_width);
+
+void
+lp_setup_set_point_state( struct lp_setup_context *setup,
+ float point_size,
+ boolean point_size_per_vertex,
+ uint sprite_coord_enable,
+ uint sprite_coord_origin);
+
+void
+lp_setup_set_setup_variant( struct lp_setup_context *setup,
+ const struct lp_setup_variant *variant );
+
+void
+lp_setup_set_fs_variant( struct lp_setup_context *setup,
+ struct lp_fragment_shader_variant *variant );
+
+void
+lp_setup_set_fs_constants(struct lp_setup_context *setup,
+ unsigned num,
+ struct pipe_constant_buffer *buffers);
+
+void
+lp_setup_set_alpha_ref_value( struct lp_setup_context *setup,
+ float alpha_ref_value );
+
+void
+lp_setup_set_stencil_ref_values( struct lp_setup_context *setup,
+ const ubyte refs[2] );
+
+void
+lp_setup_set_blend_color( struct lp_setup_context *setup,
+ const struct pipe_blend_color *blend_color );
+
+void
+lp_setup_set_scissors( struct lp_setup_context *setup,
+ const struct pipe_scissor_state *scissors );
+
+void
+lp_setup_set_viewports(struct lp_setup_context *setup,
+ unsigned num_viewports,
+ const struct pipe_viewport_state *viewports);
+
+void
+lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
+ unsigned num,
+ struct pipe_sampler_view **views);
+
+void
+lp_setup_set_fragment_sampler_state(struct lp_setup_context *setup,
+ unsigned num,
+ struct pipe_sampler_state **samplers);
+
+unsigned
+lp_setup_is_resource_referenced( const struct lp_setup_context *setup,
+ const struct pipe_resource *texture );
+
+void
+lp_setup_set_flatshade_first( struct lp_setup_context *setup,
+ boolean flatshade_first );
+
+void
+lp_setup_set_rasterizer_discard( struct lp_setup_context *setup,
+ boolean rasterizer_discard );
+
+void
+lp_setup_set_vertex_info( struct lp_setup_context *setup,
+ struct vertex_info *info );
+
+void
+lp_setup_begin_query(struct lp_setup_context *setup,
+ struct llvmpipe_query *pq);
+
+void
+lp_setup_end_query(struct lp_setup_context *setup,
+ struct llvmpipe_query *pq);
+
+static inline unsigned
+lp_clamp_viewport_idx(int idx)
+{
+ return (PIPE_MAX_VIEWPORTS > idx && idx >= 0) ? idx : 0;
+}
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h
new file mode 100644
index 000000000..2410e2384
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -0,0 +1,208 @@
+/**************************************************************************
+ *
+ * Copyright 2007-2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * The setup code is concerned with point/line/triangle setup and
+ * putting commands/data into the bins.
+ */
+
+
+#ifndef LP_SETUP_CONTEXT_H
+#define LP_SETUP_CONTEXT_H
+
+#include "lp_setup.h"
+#include "lp_rast.h"
+#include "lp_scene.h"
+#include "lp_bld_interp.h" /* for struct lp_shader_input */
+
+#include "draw/draw_vbuf.h"
+#include "util/u_rect.h"
+#include "util/u_pack_color.h"
+
+#define LP_SETUP_NEW_FS 0x01
+#define LP_SETUP_NEW_CONSTANTS 0x02
+#define LP_SETUP_NEW_BLEND_COLOR 0x04
+#define LP_SETUP_NEW_SCISSOR 0x08
+#define LP_SETUP_NEW_VIEWPORTS 0x10
+
+
+struct lp_setup_variant;
+
+
+/** Max number of scenes */
+/* XXX: make multiple scenes per context work, see lp_setup_rasterize_scene */
+#define MAX_SCENES 1
+
+
+
+/**
+ * Point/line/triangle setup context.
+ * Note: "stored" below indicates data which is stored in the bins,
+ * not arbitrary malloc'd memory.
+ *
+ *
+ * Subclass of vbuf_render, plugged directly into the draw module as
+ * the rendering backend.
+ */
+struct lp_setup_context
+{
+ struct vbuf_render base;
+
+ struct pipe_context *pipe;
+ struct vertex_info *vertex_info;
+ uint prim;
+ uint vertex_size;
+ uint nr_vertices;
+ uint sprite_coord_enable, sprite_coord_origin;
+ uint vertex_buffer_size;
+ void *vertex_buffer;
+
+ /* Final pipeline stage for draw module. Draw module should
+ * create/install this itself now.
+ */
+ struct draw_stage *vbuf;
+ unsigned num_threads;
+ unsigned scene_idx;
+ struct lp_scene *scenes[MAX_SCENES]; /**< all the scenes */
+ struct lp_scene *scene; /**< current scene being built */
+
+ struct lp_fence *last_fence;
+ struct llvmpipe_query *active_queries[LP_MAX_ACTIVE_BINNED_QUERIES];
+ unsigned active_binned_queries;
+
+ boolean flatshade_first;
+ boolean ccw_is_frontface;
+ boolean scissor_test;
+ boolean point_size_per_vertex;
+ boolean rasterizer_discard;
+ unsigned cullmode;
+ unsigned bottom_edge_rule;
+ float pixel_offset;
+ float line_width;
+ float point_size;
+ float psize;
+ unsigned viewport_index_slot;
+ unsigned layer_slot;
+ int face_slot;
+
+ struct pipe_framebuffer_state fb;
+ struct u_rect framebuffer;
+ struct u_rect scissors[PIPE_MAX_VIEWPORTS];
+ struct u_rect draw_regions[PIPE_MAX_VIEWPORTS]; /* intersection of fb & scissor */
+ struct lp_jit_viewport viewports[PIPE_MAX_VIEWPORTS];
+
+ struct {
+ unsigned flags;
+ union util_color color_val[PIPE_MAX_COLOR_BUFS];
+ uint64_t zsmask;
+ uint64_t zsvalue; /**< lp_rast_clear_zstencil() cmd */
+ } clear;
+
+ enum setup_state {
+ SETUP_FLUSHED, /**< scene is null */
+ SETUP_CLEARED, /**< scene exists but has only clears */
+ SETUP_ACTIVE /**< scene exists and has at least one draw/query */
+ } state;
+
+ struct {
+ const struct lp_rast_state *stored; /**< what's in the scene */
+ struct lp_rast_state current; /**< currently set state */
+ struct pipe_resource *current_tex[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+ } fs;
+
+ /** fragment shader constants */
+ struct {
+ struct pipe_constant_buffer current;
+ unsigned stored_size;
+ const void *stored_data;
+ } constants[LP_MAX_TGSI_CONST_BUFFERS];
+
+ struct {
+ struct pipe_blend_color current;
+ uint8_t *stored;
+ } blend_color;
+
+
+ struct {
+ const struct lp_setup_variant *variant;
+ } setup;
+
+ unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */
+
+ void (*point)( struct lp_setup_context *,
+ const float (*v0)[4]);
+
+ void (*line)( struct lp_setup_context *,
+ const float (*v0)[4],
+ const float (*v1)[4]);
+
+ void (*triangle)( struct lp_setup_context *,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4]);
+};
+
+void lp_setup_choose_triangle( struct lp_setup_context *setup );
+void lp_setup_choose_line( struct lp_setup_context *setup );
+void lp_setup_choose_point( struct lp_setup_context *setup );
+
+void lp_setup_init_vbuf(struct lp_setup_context *setup);
+
+boolean lp_setup_update_state( struct lp_setup_context *setup,
+ boolean update_scene);
+
+void lp_setup_destroy( struct lp_setup_context *setup );
+
+boolean lp_setup_flush_and_restart(struct lp_setup_context *setup);
+
+void
+lp_setup_print_triangle(struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4]);
+
+void
+lp_setup_print_vertex(struct lp_setup_context *setup,
+ const char *name,
+ const float (*v)[4]);
+
+
+struct lp_rast_triangle *
+lp_setup_alloc_triangle(struct lp_scene *scene,
+ unsigned num_inputs,
+ unsigned nr_planes,
+ unsigned *tri_size);
+
+boolean
+lp_setup_bin_triangle( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ const struct u_rect *bbox,
+ int nr_planes,
+ unsigned scissor_index );
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c
new file mode 100644
index 000000000..a190254d9
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -0,0 +1,748 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Binning code for lines
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "lp_perf.h"
+#include "lp_setup_context.h"
+#include "lp_rast.h"
+#include "lp_state_fs.h"
+#include "lp_state_setup.h"
+#include "lp_context.h"
+#include "draw/draw_context.h"
+
+#define NUM_CHANNELS 4
+
+struct lp_line_info {
+
+ float dx;
+ float dy;
+ float oneoverarea;
+ boolean frontfacing;
+
+ const float (*v1)[4];
+ const float (*v2)[4];
+
+ float (*a0)[4];
+ float (*dadx)[4];
+ float (*dady)[4];
+};
+
+
+/**
+ * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
+ */
+static void constant_coef( struct lp_setup_context *setup,
+ struct lp_line_info *info,
+ unsigned slot,
+ const float value,
+ unsigned i )
+{
+ info->a0[slot][i] = value;
+ info->dadx[slot][i] = 0.0f;
+ info->dady[slot][i] = 0.0f;
+}
+
+
+/**
+ * Compute a0, dadx and dady for a linearly interpolated coefficient,
+ * for a triangle.
+ */
+static void linear_coef( struct lp_setup_context *setup,
+ struct lp_line_info *info,
+ unsigned slot,
+ unsigned vert_attr,
+ unsigned i)
+{
+ float a1 = info->v1[vert_attr][i];
+ float a2 = info->v2[vert_attr][i];
+
+ float da21 = a1 - a2;
+ float dadx = da21 * info->dx * info->oneoverarea;
+ float dady = da21 * info->dy * info->oneoverarea;
+
+ info->dadx[slot][i] = dadx;
+ info->dady[slot][i] = dady;
+
+ info->a0[slot][i] = (a1 -
+ (dadx * (info->v1[0][0] - setup->pixel_offset) +
+ dady * (info->v1[0][1] - setup->pixel_offset)));
+}
+
+
+/**
+ * Compute a0, dadx and dady for a perspective-corrected interpolant,
+ * for a triangle.
+ * We basically multiply the vertex value by 1/w before computing
+ * the plane coefficients (a0, dadx, dady).
+ * Later, when we compute the value at a particular fragment position we'll
+ * divide the interpolated value by the interpolated W at that fragment.
+ */
+static void perspective_coef( struct lp_setup_context *setup,
+ struct lp_line_info *info,
+ unsigned slot,
+ unsigned vert_attr,
+ unsigned i)
+{
+ /* premultiply by 1/w (v[0][3] is always 1/w):
+ */
+ float a1 = info->v1[vert_attr][i] * info->v1[0][3];
+ float a2 = info->v2[vert_attr][i] * info->v2[0][3];
+
+ float da21 = a1 - a2;
+ float dadx = da21 * info->dx * info->oneoverarea;
+ float dady = da21 * info->dy * info->oneoverarea;
+
+ info->dadx[slot][i] = dadx;
+ info->dady[slot][i] = dady;
+
+ info->a0[slot][i] = (a1 -
+ (dadx * (info->v1[0][0] - setup->pixel_offset) +
+ dady * (info->v1[0][1] - setup->pixel_offset)));
+}
+
+static void
+setup_fragcoord_coef( struct lp_setup_context *setup,
+ struct lp_line_info *info,
+ unsigned slot,
+ unsigned usage_mask)
+{
+ /*X*/
+ if (usage_mask & TGSI_WRITEMASK_X) {
+ info->a0[slot][0] = 0.0;
+ info->dadx[slot][0] = 1.0;
+ info->dady[slot][0] = 0.0;
+ }
+
+ /*Y*/
+ if (usage_mask & TGSI_WRITEMASK_Y) {
+ info->a0[slot][1] = 0.0;
+ info->dadx[slot][1] = 0.0;
+ info->dady[slot][1] = 1.0;
+ }
+
+ /*Z*/
+ if (usage_mask & TGSI_WRITEMASK_Z) {
+ linear_coef(setup, info, slot, 0, 2);
+ }
+
+ /*W*/
+ if (usage_mask & TGSI_WRITEMASK_W) {
+ linear_coef(setup, info, slot, 0, 3);
+ }
+}
+
+/**
+ * Compute the tri->coef[] array dadx, dady, a0 values.
+ */
+static void setup_line_coefficients( struct lp_setup_context *setup,
+ struct lp_line_info *info)
+{
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
+ unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
+ unsigned slot;
+
+ /* setup interpolation for all the remaining attributes:
+ */
+ for (slot = 0; slot < key->num_inputs; slot++) {
+ unsigned vert_attr = key->inputs[slot].src_index;
+ unsigned usage_mask = key->inputs[slot].usage_mask;
+ unsigned i;
+
+ switch (key->inputs[slot].interp) {
+ case LP_INTERP_CONSTANT:
+ if (key->flatshade_first) {
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ constant_coef(setup, info, slot+1, info->v1[vert_attr][i], i);
+ }
+ else {
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ constant_coef(setup, info, slot+1, info->v2[vert_attr][i], i);
+ }
+ break;
+
+ case LP_INTERP_LINEAR:
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ linear_coef(setup, info, slot+1, vert_attr, i);
+ break;
+
+ case LP_INTERP_PERSPECTIVE:
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ perspective_coef(setup, info, slot+1, vert_attr, i);
+ fragcoord_usage_mask |= TGSI_WRITEMASK_W;
+ break;
+
+ case LP_INTERP_POSITION:
+ /*
+ * The generated pixel interpolators will pick up the coeffs from
+ * slot 0, so all need to ensure that the usage mask is covers all
+ * usages.
+ */
+ fragcoord_usage_mask |= usage_mask;
+ break;
+
+ case LP_INTERP_FACING:
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ constant_coef(setup, info, slot+1,
+ info->frontfacing ? 1.0f : -1.0f, i);
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+
+ /* The internal position input is in slot zero:
+ */
+ setup_fragcoord_coef(setup, info, 0,
+ fragcoord_usage_mask);
+}
+
+
+
+static inline int subpixel_snap( float a )
+{
+ return util_iround(FIXED_ONE * a);
+}
+
+
+/**
+ * Print line vertex attribs (for debug).
+ */
+static void
+print_line(struct lp_setup_context *setup,
+ const float (*v1)[4],
+ const float (*v2)[4])
+{
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
+ uint i;
+
+ debug_printf("llvmpipe line\n");
+ for (i = 0; i < 1 + key->num_inputs; i++) {
+ debug_printf(" v1[%d]: %f %f %f %f\n", i,
+ v1[i][0], v1[i][1], v1[i][2], v1[i][3]);
+ }
+ for (i = 0; i < 1 + key->num_inputs; i++) {
+ debug_printf(" v2[%d]: %f %f %f %f\n", i,
+ v2[i][0], v2[i][1], v2[i][2], v2[i][3]);
+ }
+}
+
+
+static inline boolean sign(float x){
+ return x >= 0;
+}
+
+
+/* Used on positive floats only:
+ */
+static inline float fracf(float f)
+{
+ return f - floorf(f);
+}
+
+
+
+static boolean
+try_setup_line( struct lp_setup_context *setup,
+ const float (*v1)[4],
+ const float (*v2)[4])
+{
+ struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
+ struct lp_scene *scene = setup->scene;
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
+ struct lp_rast_triangle *line;
+ struct lp_rast_plane *plane;
+ struct lp_line_info info;
+ float width = MAX2(1.0, setup->line_width);
+ struct u_rect bbox;
+ unsigned tri_bytes;
+ int x[4];
+ int y[4];
+ int i;
+ int nr_planes = 4;
+ unsigned viewport_index = 0;
+ unsigned layer = 0;
+
+ /* linewidth should be interpreted as integer */
+ int fixed_width = util_iround(width) * FIXED_ONE;
+
+ float x_offset=0;
+ float y_offset=0;
+ float x_offset_end=0;
+ float y_offset_end=0;
+
+ float x1diff;
+ float y1diff;
+ float x2diff;
+ float y2diff;
+ float dx, dy;
+ float area;
+
+ boolean draw_start;
+ boolean draw_end;
+ boolean will_draw_start;
+ boolean will_draw_end;
+
+ if (0)
+ print_line(setup, v1, v2);
+
+ if (setup->scissor_test) {
+ nr_planes = 8;
+ if (setup->viewport_index_slot > 0) {
+ unsigned *udata = (unsigned*)v1[setup->viewport_index_slot];
+ viewport_index = lp_clamp_viewport_idx(*udata);
+ }
+ }
+ else {
+ nr_planes = 4;
+ }
+
+ if (setup->layer_slot > 0) {
+ layer = *(unsigned*)v1[setup->layer_slot];
+ layer = MIN2(layer, scene->fb_max_layer);
+ }
+
+ dx = v1[0][0] - v2[0][0];
+ dy = v1[0][1] - v2[0][1];
+ area = (dx * dx + dy * dy);
+ if (area == 0) {
+ LP_COUNT(nr_culled_tris);
+ return TRUE;
+ }
+
+ info.oneoverarea = 1.0f / area;
+ info.dx = dx;
+ info.dy = dy;
+ info.v1 = v1;
+ info.v2 = v2;
+
+
+ /* X-MAJOR LINE */
+ if (fabsf(dx) >= fabsf(dy)) {
+ float dydx = dy / dx;
+
+ x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5;
+ y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5;
+ x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5;
+ y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5;
+
+ if (y2diff==-0.5 && dy<0){
+ y2diff = 0.5;
+ }
+
+ /*
+ * Diamond exit rule test for starting point
+ */
+ if (fabsf(x1diff) + fabsf(y1diff) < 0.5) {
+ draw_start = TRUE;
+ }
+ else if (sign(x1diff) == sign(-dx)) {
+ draw_start = FALSE;
+ }
+ else if (sign(-y1diff) != sign(dy)) {
+ draw_start = TRUE;
+ }
+ else {
+ /* do intersection test */
+ float yintersect = fracf(v1[0][1]) + x1diff * dydx;
+ draw_start = (yintersect < 1.0 && yintersect > 0.0);
+ }
+
+
+ /*
+ * Diamond exit rule test for ending point
+ */
+ if (fabsf(x2diff) + fabsf(y2diff) < 0.5) {
+ draw_end = FALSE;
+ }
+ else if (sign(x2diff) != sign(-dx)) {
+ draw_end = FALSE;
+ }
+ else if (sign(-y2diff) == sign(dy)) {
+ draw_end = TRUE;
+ }
+ else {
+ /* do intersection test */
+ float yintersect = fracf(v2[0][1]) + x2diff * dydx;
+ draw_end = (yintersect < 1.0 && yintersect > 0.0);
+ }
+
+ /* Are we already drawing start/end?
+ */
+ will_draw_start = sign(-x1diff) != sign(dx);
+ will_draw_end = (sign(x2diff) == sign(-dx)) || x2diff==0;
+
+ if (dx < 0) {
+ /* if v2 is to the right of v1, swap pointers */
+ const float (*temp)[4] = v1;
+ v1 = v2;
+ v2 = temp;
+ dx = -dx;
+ dy = -dy;
+ /* Otherwise shift planes appropriately */
+ if (will_draw_start != draw_start) {
+ x_offset_end = - x1diff - 0.5;
+ y_offset_end = x_offset_end * dydx;
+
+ }
+ if (will_draw_end != draw_end) {
+ x_offset = - x2diff - 0.5;
+ y_offset = x_offset * dydx;
+ }
+
+ }
+ else{
+ /* Otherwise shift planes appropriately */
+ if (will_draw_start != draw_start) {
+ x_offset = - x1diff + 0.5;
+ y_offset = x_offset * dydx;
+ }
+ if (will_draw_end != draw_end) {
+ x_offset_end = - x2diff + 0.5;
+ y_offset_end = x_offset_end * dydx;
+ }
+ }
+
+ /* x/y positions in fixed point */
+ x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset);
+ x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset);
+ x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset);
+ x[3] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset);
+
+ y[0] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset) - fixed_width/2;
+ y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) - fixed_width/2;
+ y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) + fixed_width/2;
+ y[3] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset) + fixed_width/2;
+
+ }
+ else {
+ const float dxdy = dx / dy;
+
+ /* Y-MAJOR LINE */
+ x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5;
+ y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5;
+ x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5;
+ y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5;
+
+ if (x2diff==-0.5 && dx<0) {
+ x2diff = 0.5;
+ }
+
+ /*
+ * Diamond exit rule test for starting point
+ */
+ if (fabsf(x1diff) + fabsf(y1diff) < 0.5) {
+ draw_start = TRUE;
+ }
+ else if (sign(-y1diff) == sign(dy)) {
+ draw_start = FALSE;
+ }
+ else if (sign(x1diff) != sign(-dx)) {
+ draw_start = TRUE;
+ }
+ else {
+ /* do intersection test */
+ float xintersect = fracf(v1[0][0]) + y1diff * dxdy;
+ draw_start = (xintersect < 1.0 && xintersect > 0.0);
+ }
+
+ /*
+ * Diamond exit rule test for ending point
+ */
+ if (fabsf(x2diff) + fabsf(y2diff) < 0.5) {
+ draw_end = FALSE;
+ }
+ else if (sign(-y2diff) != sign(dy) ) {
+ draw_end = FALSE;
+ }
+ else if (sign(x2diff) == sign(-dx) ) {
+ draw_end = TRUE;
+ }
+ else {
+ /* do intersection test */
+ float xintersect = fracf(v2[0][0]) + y2diff * dxdy;
+ draw_end = (xintersect < 1.0 && xintersect >= 0.0);
+ }
+
+ /* Are we already drawing start/end?
+ */
+ will_draw_start = sign(y1diff) == sign(dy);
+ will_draw_end = (sign(-y2diff) == sign(dy)) || y2diff==0;
+
+ if (dy > 0) {
+ /* if v2 is on top of v1, swap pointers */
+ const float (*temp)[4] = v1;
+ v1 = v2;
+ v2 = temp;
+ dx = -dx;
+ dy = -dy;
+
+ /* Otherwise shift planes appropriately */
+ if (will_draw_start != draw_start) {
+ y_offset_end = - y1diff + 0.5;
+ x_offset_end = y_offset_end * dxdy;
+ }
+ if (will_draw_end != draw_end) {
+ y_offset = - y2diff + 0.5;
+ x_offset = y_offset * dxdy;
+ }
+ }
+ else {
+ /* Otherwise shift planes appropriately */
+ if (will_draw_start != draw_start) {
+ y_offset = - y1diff - 0.5;
+ x_offset = y_offset * dxdy;
+
+ }
+ if (will_draw_end != draw_end) {
+ y_offset_end = - y2diff - 0.5;
+ x_offset_end = y_offset_end * dxdy;
+ }
+ }
+
+ /* x/y positions in fixed point */
+ x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) - fixed_width/2;
+ x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - fixed_width/2;
+ x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) + fixed_width/2;
+ x[3] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) + fixed_width/2;
+
+ y[0] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset);
+ y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset);
+ y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset);
+ y[3] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset);
+ }
+
+ /* Bounding rectangle (in pixels) */
+ {
+ /* Yes this is necessary to accurately calculate bounding boxes
+ * with the two fill-conventions we support. GL (normally) ends
+ * up needing a bottom-left fill convention, which requires
+ * slightly different rounding.
+ */
+ int adj = (setup->bottom_edge_rule != 0) ? 1 : 0;
+
+ bbox.x0 = (MIN4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ bbox.x1 = (MAX4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ bbox.y0 = (MIN4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+ bbox.y1 = (MAX4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+
+ /* Inclusive coordinates:
+ */
+ bbox.x1--;
+ bbox.y1--;
+ }
+
+ if (bbox.x1 < bbox.x0 ||
+ bbox.y1 < bbox.y0) {
+ if (0) debug_printf("empty bounding box\n");
+ LP_COUNT(nr_culled_tris);
+ return TRUE;
+ }
+
+ if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) {
+ if (0) debug_printf("offscreen\n");
+ LP_COUNT(nr_culled_tris);
+ return TRUE;
+ }
+
+ /* Can safely discard negative regions:
+ */
+ bbox.x0 = MAX2(bbox.x0, 0);
+ bbox.y0 = MAX2(bbox.y0, 0);
+
+ line = lp_setup_alloc_triangle(scene,
+ key->num_inputs,
+ nr_planes,
+ &tri_bytes);
+ if (!line)
+ return FALSE;
+
+#ifdef DEBUG
+ line->v[0][0] = v1[0][0];
+ line->v[1][0] = v2[0][0];
+ line->v[0][1] = v1[0][1];
+ line->v[1][1] = v2[0][1];
+#endif
+
+ LP_COUNT(nr_tris);
+
+ if (lp_context->active_statistics_queries &&
+ !llvmpipe_rasterization_disabled(lp_context)) {
+ lp_context->pipeline_statistics.c_primitives++;
+ }
+
+ /* calculate the deltas */
+ plane = GET_PLANES(line);
+ plane[0].dcdy = x[0] - x[1];
+ plane[1].dcdy = x[1] - x[2];
+ plane[2].dcdy = x[2] - x[3];
+ plane[3].dcdy = x[3] - x[0];
+
+ plane[0].dcdx = y[0] - y[1];
+ plane[1].dcdx = y[1] - y[2];
+ plane[2].dcdx = y[2] - y[3];
+ plane[3].dcdx = y[3] - y[0];
+
+ if (draw_will_inject_frontface(lp_context->draw) &&
+ setup->face_slot > 0) {
+ line->inputs.frontfacing = v1[setup->face_slot][0];
+ } else {
+ line->inputs.frontfacing = TRUE;
+ }
+
+ /* Setup parameter interpolants:
+ */
+ info.a0 = GET_A0(&line->inputs);
+ info.dadx = GET_DADX(&line->inputs);
+ info.dady = GET_DADY(&line->inputs);
+ info.frontfacing = line->inputs.frontfacing;
+ setup_line_coefficients(setup, &info);
+
+ line->inputs.disable = FALSE;
+ line->inputs.opaque = FALSE;
+ line->inputs.layer = layer;
+ line->inputs.viewport_index = viewport_index;
+
+ for (i = 0; i < 4; i++) {
+
+ /* half-edge constants, will be interated over the whole render
+ * target.
+ */
+ plane[i].c = IMUL64(plane[i].dcdx, x[i]) - IMUL64(plane[i].dcdy, y[i]);
+
+
+ /* correct for top-left vs. bottom-left fill convention.
+ */
+ if (plane[i].dcdx < 0) {
+ /* both fill conventions want this - adjust for left edges */
+ plane[i].c++;
+ }
+ else if (plane[i].dcdx == 0) {
+ if (setup->pixel_offset == 0) {
+ /* correct for top-left fill convention:
+ */
+ if (plane[i].dcdy > 0) plane[i].c++;
+ }
+ else {
+ /* correct for bottom-left fill convention:
+ */
+ if (plane[i].dcdy < 0) plane[i].c++;
+ }
+ }
+
+ plane[i].dcdx *= FIXED_ONE;
+ plane[i].dcdy *= FIXED_ONE;
+
+ /* find trivial reject offsets for each edge for a single-pixel
+ * sized block. These will be scaled up at each recursive level to
+ * match the active blocksize. Scaling in this way works best if
+ * the blocks are square.
+ */
+ plane[i].eo = 0;
+ if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx;
+ if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy;
+ }
+
+
+ /*
+ * When rasterizing scissored tris, use the intersection of the
+ * triangle bounding box and the scissor rect to generate the
+ * scissor planes.
+ *
+ * This permits us to cut off the triangle "tails" that are present
+ * in the intermediate recursive levels caused when two of the
+ * triangles edges don't diverge quickly enough to trivially reject
+ * exterior blocks from the triangle.
+ *
+ * It's not really clear if it's worth worrying about these tails,
+ * but since we generate the planes for each scissored tri, it's
+ * free to trim them in this case.
+ *
+ * Note that otherwise, the scissor planes only vary in 'C' value,
+ * and even then only on state-changes. Could alternatively store
+ * these planes elsewhere.
+ */
+ if (nr_planes == 8) {
+ const struct u_rect *scissor =
+ &setup->scissors[viewport_index];
+
+ plane[4].dcdx = -1;
+ plane[4].dcdy = 0;
+ plane[4].c = 1-scissor->x0;
+ plane[4].eo = 1;
+
+ plane[5].dcdx = 1;
+ plane[5].dcdy = 0;
+ plane[5].c = scissor->x1+1;
+ plane[5].eo = 0;
+
+ plane[6].dcdx = 0;
+ plane[6].dcdy = 1;
+ plane[6].c = 1-scissor->y0;
+ plane[6].eo = 1;
+
+ plane[7].dcdx = 0;
+ plane[7].dcdy = -1;
+ plane[7].c = scissor->y1+1;
+ plane[7].eo = 0;
+ }
+
+ return lp_setup_bin_triangle(setup, line, &bbox, nr_planes, viewport_index);
+}
+
+
+static void lp_setup_line( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4] )
+{
+ if (!try_setup_line( setup, v0, v1 ))
+ {
+ if (!lp_setup_flush_and_restart(setup))
+ return;
+
+ if (!try_setup_line( setup, v0, v1 ))
+ return;
+ }
+}
+
+
+void lp_setup_choose_line( struct lp_setup_context *setup )
+{
+ setup->line = lp_setup_line;
+}
+
+
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c
new file mode 100644
index 000000000..75544b524
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c
@@ -0,0 +1,541 @@
+/**************************************************************************
+ *
+ * Copyright 2010, VMware Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Binning code for points
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "lp_setup_context.h"
+#include "lp_perf.h"
+#include "lp_rast.h"
+#include "lp_state_fs.h"
+#include "lp_state_setup.h"
+#include "lp_context.h"
+#include "tgsi/tgsi_scan.h"
+#include "draw/draw_context.h"
+
+#define NUM_CHANNELS 4
+
+struct point_info {
+ /* x,y deltas */
+ int dy01, dy12;
+ int dx01, dx12;
+
+ const float (*v0)[4];
+
+ float (*a0)[4];
+ float (*dadx)[4];
+ float (*dady)[4];
+
+ boolean frontfacing;
+};
+
+
+/**
+ * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
+ */
+static void
+constant_coef(struct lp_setup_context *setup,
+ struct point_info *info,
+ unsigned slot,
+ const float value,
+ unsigned i)
+{
+ info->a0[slot][i] = value;
+ info->dadx[slot][i] = 0.0f;
+ info->dady[slot][i] = 0.0f;
+}
+
+
+static void
+point_persp_coeff(struct lp_setup_context *setup,
+ const struct point_info *info,
+ unsigned slot,
+ unsigned i)
+{
+ /*
+ * Fragment shader expects pre-multiplied w for LP_INTERP_PERSPECTIVE. A
+ * better stratergy would be to take the primitive in consideration when
+ * generating the fragment shader key, and therefore avoid the per-fragment
+ * perspective divide.
+ */
+
+ float w0 = info->v0[0][3];
+
+ assert(i < 4);
+
+ info->a0[slot][i] = info->v0[slot][i]*w0;
+ info->dadx[slot][i] = 0.0f;
+ info->dady[slot][i] = 0.0f;
+}
+
+
+/**
+ * Setup automatic texcoord coefficients (for sprite rendering).
+ * \param slot the vertex attribute slot to setup
+ * \param i the attribute channel in [0,3]
+ * \param sprite_coord_origin one of PIPE_SPRITE_COORD_x
+ * \param perspective does the shader expects pre-multiplied w, i.e.,
+ * LP_INTERP_PERSPECTIVE is specified in the shader key
+ */
+static void
+texcoord_coef(struct lp_setup_context *setup,
+ const struct point_info *info,
+ unsigned slot,
+ unsigned i,
+ unsigned sprite_coord_origin,
+ boolean perspective)
+{
+ float w0 = info->v0[0][3];
+
+ assert(i < 4);
+
+ if (i == 0) {
+ float dadx = FIXED_ONE / (float)info->dx12;
+ float dady = 0.0f;
+ float x0 = info->v0[0][0] - setup->pixel_offset;
+ float y0 = info->v0[0][1] - setup->pixel_offset;
+
+ info->dadx[slot][0] = dadx;
+ info->dady[slot][0] = dady;
+ info->a0[slot][0] = 0.5 - (dadx * x0 + dady * y0);
+
+ if (perspective) {
+ info->dadx[slot][0] *= w0;
+ info->dady[slot][0] *= w0;
+ info->a0[slot][0] *= w0;
+ }
+ }
+ else if (i == 1) {
+ float dadx = 0.0f;
+ float dady = FIXED_ONE / (float)info->dx12;
+ float x0 = info->v0[0][0] - setup->pixel_offset;
+ float y0 = info->v0[0][1] - setup->pixel_offset;
+
+ if (sprite_coord_origin == PIPE_SPRITE_COORD_LOWER_LEFT) {
+ dady = -dady;
+ }
+
+ info->dadx[slot][1] = dadx;
+ info->dady[slot][1] = dady;
+ info->a0[slot][1] = 0.5 - (dadx * x0 + dady * y0);
+
+ if (perspective) {
+ info->dadx[slot][1] *= w0;
+ info->dady[slot][1] *= w0;
+ info->a0[slot][1] *= w0;
+ }
+ }
+ else if (i == 2) {
+ info->a0[slot][2] = 0.0f;
+ info->dadx[slot][2] = 0.0f;
+ info->dady[slot][2] = 0.0f;
+ }
+ else {
+ info->a0[slot][3] = perspective ? w0 : 1.0f;
+ info->dadx[slot][3] = 0.0f;
+ info->dady[slot][3] = 0.0f;
+ }
+}
+
+
+/**
+ * Special coefficient setup for gl_FragCoord.
+ * X and Y are trivial
+ * Z and W are copied from position_coef which should have already been computed.
+ * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
+ */
+static void
+setup_point_fragcoord_coef(struct lp_setup_context *setup,
+ struct point_info *info,
+ unsigned slot,
+ unsigned usage_mask)
+{
+ /*X*/
+ if (usage_mask & TGSI_WRITEMASK_X) {
+ info->a0[slot][0] = 0.0;
+ info->dadx[slot][0] = 1.0;
+ info->dady[slot][0] = 0.0;
+ }
+
+ /*Y*/
+ if (usage_mask & TGSI_WRITEMASK_Y) {
+ info->a0[slot][1] = 0.0;
+ info->dadx[slot][1] = 0.0;
+ info->dady[slot][1] = 1.0;
+ }
+
+ /*Z*/
+ if (usage_mask & TGSI_WRITEMASK_Z) {
+ constant_coef(setup, info, slot, info->v0[0][2], 2);
+ }
+
+ /*W*/
+ if (usage_mask & TGSI_WRITEMASK_W) {
+ constant_coef(setup, info, slot, info->v0[0][3], 3);
+ }
+}
+
+
+/**
+ * Compute the point->coef[] array dadx, dady, a0 values.
+ */
+static void
+setup_point_coefficients( struct lp_setup_context *setup,
+ struct point_info *info)
+{
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
+ const struct lp_fragment_shader *shader = setup->fs.current.variant->shader;
+ unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
+ unsigned slot;
+
+ /* setup interpolation for all the remaining attributes:
+ */
+ for (slot = 0; slot < key->num_inputs; slot++) {
+ unsigned vert_attr = key->inputs[slot].src_index;
+ unsigned usage_mask = key->inputs[slot].usage_mask;
+ enum lp_interp interp = key->inputs[slot].interp;
+ boolean perspective = !!(interp == LP_INTERP_PERSPECTIVE);
+ unsigned i;
+
+ if (perspective & usage_mask) {
+ fragcoord_usage_mask |= TGSI_WRITEMASK_W;
+ }
+
+ switch (interp) {
+ case LP_INTERP_POSITION:
+ /*
+ * The generated pixel interpolators will pick up the coeffs from
+ * slot 0, so all need to ensure that the usage mask is covers all
+ * usages.
+ */
+ fragcoord_usage_mask |= usage_mask;
+ break;
+
+ case LP_INTERP_LINEAR:
+ /* Sprite tex coords may use linear interpolation someday */
+ /* fall-through */
+ case LP_INTERP_PERSPECTIVE:
+ /* check if the sprite coord flag is set for this attribute.
+ * If so, set it up so it up so x and y vary from 0 to 1.
+ */
+ if (shader->info.base.input_semantic_name[slot] == TGSI_SEMANTIC_GENERIC) {
+ unsigned semantic_index = shader->info.base.input_semantic_index[slot];
+ /* Note that sprite_coord enable is a bitfield of
+ * PIPE_MAX_SHADER_OUTPUTS bits.
+ */
+ if (semantic_index < PIPE_MAX_SHADER_OUTPUTS &&
+ (setup->sprite_coord_enable & (1 << semantic_index))) {
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ if (usage_mask & (1 << i)) {
+ texcoord_coef(setup, info, slot + 1, i,
+ setup->sprite_coord_origin,
+ perspective);
+ }
+ }
+ break;
+ }
+ }
+ /* fall-through */
+ case LP_INTERP_CONSTANT:
+ for (i = 0; i < NUM_CHANNELS; i++) {
+ if (usage_mask & (1 << i)) {
+ if (perspective) {
+ point_persp_coeff(setup, info, slot+1, i);
+ }
+ else {
+ constant_coef(setup, info, slot+1, info->v0[vert_attr][i], i);
+ }
+ }
+ }
+ break;
+
+ case LP_INTERP_FACING:
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ constant_coef(setup, info, slot+1,
+ info->frontfacing ? 1.0f : -1.0f, i);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ /* The internal position input is in slot zero:
+ */
+ setup_point_fragcoord_coef(setup, info, 0,
+ fragcoord_usage_mask);
+}
+
+
+static inline int
+subpixel_snap(float a)
+{
+ return util_iround(FIXED_ONE * a);
+}
+
+/**
+ * Print point vertex attribs (for debug).
+ */
+static void
+print_point(struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float size)
+{
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
+ uint i;
+
+ debug_printf("llvmpipe point, width %f\n", size);
+ for (i = 0; i < 1 + key->num_inputs; i++) {
+ debug_printf(" v0[%d]: %f %f %f %f\n", i,
+ v0[i][0], v0[i][1], v0[i][2], v0[i][3]);
+ }
+}
+
+
+static boolean
+try_setup_point( struct lp_setup_context *setup,
+ const float (*v0)[4] )
+{
+ struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
+ /* x/y positions in fixed point */
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
+ const int sizeAttr = setup->psize;
+ const float size
+ = (setup->point_size_per_vertex && sizeAttr > 0) ? v0[sizeAttr][0]
+ : setup->point_size;
+
+ /* Yes this is necessary to accurately calculate bounding boxes
+ * with the two fill-conventions we support. GL (normally) ends
+ * up needing a bottom-left fill convention, which requires
+ * slightly different rounding.
+ */
+ int adj = (setup->bottom_edge_rule != 0) ? 1 : 0;
+
+ struct lp_scene *scene = setup->scene;
+ struct lp_rast_triangle *point;
+ unsigned bytes;
+ struct u_rect bbox;
+ unsigned nr_planes = 4;
+ struct point_info info;
+ unsigned viewport_index = 0;
+ unsigned layer = 0;
+ int fixed_width;
+
+ if (setup->viewport_index_slot > 0) {
+ unsigned *udata = (unsigned*)v0[setup->viewport_index_slot];
+ viewport_index = lp_clamp_viewport_idx(*udata);
+ }
+ if (setup->layer_slot > 0) {
+ layer = *(unsigned*)v0[setup->layer_slot];
+ layer = MIN2(layer, scene->fb_max_layer);
+ }
+
+ if (0)
+ print_point(setup, v0, size);
+
+ /* Bounding rectangle (in pixels) */
+ if (!lp_context->rasterizer ||
+ lp_context->rasterizer->point_quad_rasterization) {
+ /*
+ * Rasterize points as quads.
+ */
+ int x0, y0;
+ /* Point size as fixed point integer, remove rounding errors
+ * and gives minimum width for very small points.
+ */
+ fixed_width = MAX2(FIXED_ONE, subpixel_snap(size));
+
+ x0 = subpixel_snap(v0[0][0] - setup->pixel_offset) - fixed_width/2;
+ y0 = subpixel_snap(v0[0][1] - setup->pixel_offset) - fixed_width/2;
+
+ bbox.x0 = (x0 + (FIXED_ONE-1)) >> FIXED_ORDER;
+ bbox.x1 = (x0 + fixed_width + (FIXED_ONE-1)) >> FIXED_ORDER;
+ bbox.y0 = (y0 + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+ bbox.y1 = (y0 + fixed_width + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+
+ /* Inclusive coordinates:
+ */
+ bbox.x1--;
+ bbox.y1--;
+ } else {
+ /*
+ * OpenGL legacy rasterization rules for non-sprite points.
+ *
+ * Per OpenGL 2.1 spec, section 3.3.1, "Basic Point Rasterization".
+ *
+ * This type of point rasterization is only available in pre 3.0 contexts
+ * (or compatibilility contexts which we don't support) anyway.
+ */
+
+ const int x0 = subpixel_snap(v0[0][0]);
+ const int y0 = subpixel_snap(v0[0][1]) - adj;
+
+ int int_width;
+ /* Point size as fixed point integer. For GL legacy points
+ * the point size is always a whole integer.
+ */
+ fixed_width = MAX2(FIXED_ONE,
+ (subpixel_snap(size) + FIXED_ONE/2 - 1) & ~(FIXED_ONE-1));
+ int_width = fixed_width >> FIXED_ORDER;
+
+ assert(setup->pixel_offset != 0);
+
+ if (int_width == 1) {
+ bbox.x0 = x0 >> FIXED_ORDER;
+ bbox.y0 = y0 >> FIXED_ORDER;
+ bbox.x1 = bbox.x0;
+ bbox.y1 = bbox.y0;
+ } else {
+ if (int_width & 1) {
+ /* Odd width */
+ bbox.x0 = (x0 >> FIXED_ORDER) - (int_width - 1)/2;
+ bbox.y0 = (y0 >> FIXED_ORDER) - (int_width - 1)/2;
+ } else {
+ /* Even width */
+ bbox.x0 = ((x0 + FIXED_ONE/2) >> FIXED_ORDER) - int_width/2;
+ bbox.y0 = ((y0 + FIXED_ONE/2) >> FIXED_ORDER) - int_width/2;
+ }
+
+ bbox.x1 = bbox.x0 + int_width - 1;
+ bbox.y1 = bbox.y0 + int_width - 1;
+ }
+ }
+
+ if (0) {
+ debug_printf(" bbox: (%i, %i) - (%i, %i)\n",
+ bbox.x0, bbox.y0,
+ bbox.x1, bbox.y1);
+ }
+
+ if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) {
+ if (0) debug_printf("offscreen\n");
+ LP_COUNT(nr_culled_tris);
+ return TRUE;
+ }
+
+ u_rect_find_intersection(&setup->draw_regions[viewport_index], &bbox);
+
+ point = lp_setup_alloc_triangle(scene,
+ key->num_inputs,
+ nr_planes,
+ &bytes);
+ if (!point)
+ return FALSE;
+
+#ifdef DEBUG
+ point->v[0][0] = v0[0][0];
+ point->v[0][1] = v0[0][1];
+#endif
+
+ LP_COUNT(nr_tris);
+
+ if (lp_context->active_statistics_queries &&
+ !llvmpipe_rasterization_disabled(lp_context)) {
+ lp_context->pipeline_statistics.c_primitives++;
+ }
+
+ if (draw_will_inject_frontface(lp_context->draw) &&
+ setup->face_slot > 0) {
+ point->inputs.frontfacing = v0[setup->face_slot][0];
+ } else {
+ point->inputs.frontfacing = TRUE;
+ }
+
+ info.v0 = v0;
+ info.dx01 = 0;
+ info.dx12 = fixed_width;
+ info.dy01 = fixed_width;
+ info.dy12 = 0;
+ info.a0 = GET_A0(&point->inputs);
+ info.dadx = GET_DADX(&point->inputs);
+ info.dady = GET_DADY(&point->inputs);
+ info.frontfacing = point->inputs.frontfacing;
+
+ /* Setup parameter interpolants:
+ */
+ setup_point_coefficients(setup, &info);
+
+ point->inputs.disable = FALSE;
+ point->inputs.opaque = FALSE;
+ point->inputs.layer = layer;
+ point->inputs.viewport_index = viewport_index;
+
+ {
+ struct lp_rast_plane *plane = GET_PLANES(point);
+
+ plane[0].dcdx = -1;
+ plane[0].dcdy = 0;
+ plane[0].c = 1-bbox.x0;
+ plane[0].eo = 1;
+
+ plane[1].dcdx = 1;
+ plane[1].dcdy = 0;
+ plane[1].c = bbox.x1+1;
+ plane[1].eo = 0;
+
+ plane[2].dcdx = 0;
+ plane[2].dcdy = 1;
+ plane[2].c = 1-bbox.y0;
+ plane[2].eo = 1;
+
+ plane[3].dcdx = 0;
+ plane[3].dcdy = -1;
+ plane[3].c = bbox.y1+1;
+ plane[3].eo = 0;
+ }
+
+ return lp_setup_bin_triangle(setup, point, &bbox, nr_planes, viewport_index);
+}
+
+
+static void
+lp_setup_point(struct lp_setup_context *setup,
+ const float (*v0)[4])
+{
+ if (!try_setup_point( setup, v0 ))
+ {
+ if (!lp_setup_flush_and_restart(setup))
+ return;
+
+ if (!try_setup_point( setup, v0 ))
+ return;
+ }
+}
+
+
+void
+lp_setup_choose_point( struct lp_setup_context *setup )
+{
+ setup->point = lp_setup_point;
+}
+
+
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c
new file mode 100644
index 000000000..98a9d4bc2
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -0,0 +1,1027 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Binning code for triangles
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_rect.h"
+#include "util/u_sse.h"
+#include "lp_perf.h"
+#include "lp_setup_context.h"
+#include "lp_rast.h"
+#include "lp_state_fs.h"
+#include "lp_state_setup.h"
+#include "lp_context.h"
+
+#include <inttypes.h>
+
+#define NUM_CHANNELS 4
+
+#if defined(PIPE_ARCH_SSE)
+#include <emmintrin.h>
+#endif
+
+static inline int
+subpixel_snap(float a)
+{
+ return util_iround(FIXED_ONE * a);
+}
+
+static inline float
+fixed_to_float(int a)
+{
+ return a * (1.0f / FIXED_ONE);
+}
+
+
+/* Position and area in fixed point coordinates */
+struct fixed_position {
+ int32_t x[4];
+ int32_t y[4];
+ int64_t area;
+ int32_t dx01;
+ int32_t dy01;
+ int32_t dx20;
+ int32_t dy20;
+};
+
+
+/**
+ * Alloc space for a new triangle plus the input.a0/dadx/dady arrays
+ * immediately after it.
+ * The memory is allocated from the per-scene pool, not per-tile.
+ * \param tri_size returns number of bytes allocated
+ * \param num_inputs number of fragment shader inputs
+ * \return pointer to triangle space
+ */
+struct lp_rast_triangle *
+lp_setup_alloc_triangle(struct lp_scene *scene,
+ unsigned nr_inputs,
+ unsigned nr_planes,
+ unsigned *tri_size)
+{
+ unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
+ unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane);
+ struct lp_rast_triangle *tri;
+
+ *tri_size = (sizeof(struct lp_rast_triangle) +
+ 3 * input_array_sz +
+ plane_sz);
+
+ tri = lp_scene_alloc_aligned( scene, *tri_size, 16 );
+ if (tri == NULL)
+ return NULL;
+
+ tri->inputs.stride = input_array_sz;
+
+ {
+ char *a = (char *)tri;
+ char *b = (char *)&GET_PLANES(tri)[nr_planes];
+ assert(b - a == *tri_size);
+ }
+
+ return tri;
+}
+
+void
+lp_setup_print_vertex(struct lp_setup_context *setup,
+ const char *name,
+ const float (*v)[4])
+{
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
+ int i, j;
+
+ debug_printf(" wpos (%s[0]) xyzw %f %f %f %f\n",
+ name,
+ v[0][0], v[0][1], v[0][2], v[0][3]);
+
+ for (i = 0; i < key->num_inputs; i++) {
+ const float *in = v[key->inputs[i].src_index];
+
+ debug_printf(" in[%d] (%s[%d]) %s%s%s%s ",
+ i,
+ name, key->inputs[i].src_index,
+ (key->inputs[i].usage_mask & 0x1) ? "x" : " ",
+ (key->inputs[i].usage_mask & 0x2) ? "y" : " ",
+ (key->inputs[i].usage_mask & 0x4) ? "z" : " ",
+ (key->inputs[i].usage_mask & 0x8) ? "w" : " ");
+
+ for (j = 0; j < 4; j++)
+ if (key->inputs[i].usage_mask & (1<<j))
+ debug_printf("%.5f ", in[j]);
+
+ debug_printf("\n");
+ }
+}
+
+
+/**
+ * Print triangle vertex attribs (for debug).
+ */
+void
+lp_setup_print_triangle(struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4])
+{
+ debug_printf("triangle\n");
+
+ {
+ const float ex = v0[0][0] - v2[0][0];
+ const float ey = v0[0][1] - v2[0][1];
+ const float fx = v1[0][0] - v2[0][0];
+ const float fy = v1[0][1] - v2[0][1];
+
+ /* det = cross(e,f).z */
+ const float det = ex * fy - ey * fx;
+ if (det < 0.0f)
+ debug_printf(" - ccw\n");
+ else if (det > 0.0f)
+ debug_printf(" - cw\n");
+ else
+ debug_printf(" - zero area\n");
+ }
+
+ lp_setup_print_vertex(setup, "v0", v0);
+ lp_setup_print_vertex(setup, "v1", v1);
+ lp_setup_print_vertex(setup, "v2", v2);
+}
+
+
+#define MAX_PLANES 8
+static unsigned
+lp_rast_tri_tab[MAX_PLANES+1] = {
+ 0, /* should be impossible */
+ LP_RAST_OP_TRIANGLE_1,
+ LP_RAST_OP_TRIANGLE_2,
+ LP_RAST_OP_TRIANGLE_3,
+ LP_RAST_OP_TRIANGLE_4,
+ LP_RAST_OP_TRIANGLE_5,
+ LP_RAST_OP_TRIANGLE_6,
+ LP_RAST_OP_TRIANGLE_7,
+ LP_RAST_OP_TRIANGLE_8
+};
+
+static unsigned
+lp_rast_32_tri_tab[MAX_PLANES+1] = {
+ 0, /* should be impossible */
+ LP_RAST_OP_TRIANGLE_32_1,
+ LP_RAST_OP_TRIANGLE_32_2,
+ LP_RAST_OP_TRIANGLE_32_3,
+ LP_RAST_OP_TRIANGLE_32_4,
+ LP_RAST_OP_TRIANGLE_32_5,
+ LP_RAST_OP_TRIANGLE_32_6,
+ LP_RAST_OP_TRIANGLE_32_7,
+ LP_RAST_OP_TRIANGLE_32_8
+};
+
+
+
+/**
+ * The primitive covers the whole tile- shade whole tile.
+ *
+ * \param tx, ty the tile position in tiles, not pixels
+ */
+static boolean
+lp_setup_whole_tile(struct lp_setup_context *setup,
+ const struct lp_rast_shader_inputs *inputs,
+ int tx, int ty)
+{
+ struct lp_scene *scene = setup->scene;
+
+ LP_COUNT(nr_fully_covered_64);
+
+ /* if variant is opaque and scissor doesn't effect the tile */
+ if (inputs->opaque) {
+ /* Several things prevent this optimization from working:
+ * - For layered rendering we can't determine if this covers the same layer
+ * as previous rendering (or in case of clears those actually always cover
+ * all layers so optimization is impossible). Need to use fb_max_layer and
+ * not setup->layer_slot to determine this since even if there's currently
+ * no slot assigned previous rendering could have used one.
+ * - If there were any Begin/End query commands in the scene then those
+ * would get removed which would be very wrong. Furthermore, if queries
+ * were just active we also can't do the optimization since to get
+ * accurate query results we unfortunately need to execute the rendering
+ * commands.
+ */
+ if (!scene->fb.zsbuf && scene->fb_max_layer == 0 && !scene->had_queries) {
+ /*
+ * All previous rendering will be overwritten so reset the bin.
+ */
+ lp_scene_bin_reset( scene, tx, ty );
+ }
+
+ LP_COUNT(nr_shade_opaque_64);
+ return lp_scene_bin_cmd_with_state( scene, tx, ty,
+ setup->fs.stored,
+ LP_RAST_OP_SHADE_TILE_OPAQUE,
+ lp_rast_arg_inputs(inputs) );
+ } else {
+ LP_COUNT(nr_shade_64);
+ return lp_scene_bin_cmd_with_state( scene, tx, ty,
+ setup->fs.stored,
+ LP_RAST_OP_SHADE_TILE,
+ lp_rast_arg_inputs(inputs) );
+ }
+}
+
+
+/**
+ * Do basic setup for triangle rasterization and determine which
+ * framebuffer tiles are touched. Put the triangle in the scene's
+ * bins for the tiles which we overlap.
+ */
+static boolean
+do_triangle_ccw(struct lp_setup_context *setup,
+ struct fixed_position* position,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4],
+ boolean frontfacing )
+{
+ struct lp_scene *scene = setup->scene;
+ const struct lp_setup_variant_key *key = &setup->setup.variant->key;
+ struct lp_rast_triangle *tri;
+ struct lp_rast_plane *plane;
+ struct u_rect bbox;
+ unsigned tri_bytes;
+ int nr_planes = 3;
+ unsigned viewport_index = 0;
+ unsigned layer = 0;
+
+ /* Area should always be positive here */
+ assert(position->area > 0);
+
+ if (0)
+ lp_setup_print_triangle(setup, v0, v1, v2);
+
+ if (setup->scissor_test) {
+ nr_planes = 7;
+ if (setup->viewport_index_slot > 0) {
+ unsigned *udata = (unsigned*)v0[setup->viewport_index_slot];
+ viewport_index = lp_clamp_viewport_idx(*udata);
+ }
+ }
+ else {
+ nr_planes = 3;
+ }
+ if (setup->layer_slot > 0) {
+ layer = *(unsigned*)v1[setup->layer_slot];
+ layer = MIN2(layer, scene->fb_max_layer);
+ }
+
+ /* Bounding rectangle (in pixels) */
+ {
+ /* Yes this is necessary to accurately calculate bounding boxes
+ * with the two fill-conventions we support. GL (normally) ends
+ * up needing a bottom-left fill convention, which requires
+ * slightly different rounding.
+ */
+ int adj = (setup->bottom_edge_rule != 0) ? 1 : 0;
+
+ /* Inclusive x0, exclusive x1 */
+ bbox.x0 = MIN3(position->x[0], position->x[1], position->x[2]) >> FIXED_ORDER;
+ bbox.x1 = (MAX3(position->x[0], position->x[1], position->x[2]) - 1) >> FIXED_ORDER;
+
+ /* Inclusive / exclusive depending upon adj (bottom-left or top-right) */
+ bbox.y0 = (MIN3(position->y[0], position->y[1], position->y[2]) + adj) >> FIXED_ORDER;
+ bbox.y1 = (MAX3(position->y[0], position->y[1], position->y[2]) - 1 + adj) >> FIXED_ORDER;
+ }
+
+ if (bbox.x1 < bbox.x0 ||
+ bbox.y1 < bbox.y0) {
+ if (0) debug_printf("empty bounding box\n");
+ LP_COUNT(nr_culled_tris);
+ return TRUE;
+ }
+
+ if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) {
+ if (0) debug_printf("offscreen\n");
+ LP_COUNT(nr_culled_tris);
+ return TRUE;
+ }
+
+ /* Can safely discard negative regions, but need to keep hold of
+ * information about when the triangle extends past screen
+ * boundaries. See trimmed_box in lp_setup_bin_triangle().
+ */
+ bbox.x0 = MAX2(bbox.x0, 0);
+ bbox.y0 = MAX2(bbox.y0, 0);
+
+ tri = lp_setup_alloc_triangle(scene,
+ key->num_inputs,
+ nr_planes,
+ &tri_bytes);
+ if (!tri)
+ return FALSE;
+
+#if 0
+ tri->v[0][0] = v0[0][0];
+ tri->v[1][0] = v1[0][0];
+ tri->v[2][0] = v2[0][0];
+ tri->v[0][1] = v0[0][1];
+ tri->v[1][1] = v1[0][1];
+ tri->v[2][1] = v2[0][1];
+#endif
+
+ LP_COUNT(nr_tris);
+
+ /* Setup parameter interpolants:
+ */
+ setup->setup.variant->jit_function( v0,
+ v1,
+ v2,
+ frontfacing,
+ GET_A0(&tri->inputs),
+ GET_DADX(&tri->inputs),
+ GET_DADY(&tri->inputs) );
+
+ tri->inputs.frontfacing = frontfacing;
+ tri->inputs.disable = FALSE;
+ tri->inputs.opaque = setup->fs.current.variant->opaque;
+ tri->inputs.layer = layer;
+ tri->inputs.viewport_index = viewport_index;
+
+ if (0)
+ lp_dump_setup_coef(&setup->setup.variant->key,
+ (const float (*)[4])GET_A0(&tri->inputs),
+ (const float (*)[4])GET_DADX(&tri->inputs),
+ (const float (*)[4])GET_DADY(&tri->inputs));
+
+ plane = GET_PLANES(tri);
+
+#if defined(PIPE_ARCH_SSE)
+ if (setup->fb.width <= MAX_FIXED_LENGTH32 &&
+ setup->fb.height <= MAX_FIXED_LENGTH32 &&
+ (bbox.x1 - bbox.x0) <= MAX_FIXED_LENGTH32 &&
+ (bbox.y1 - bbox.y0) <= MAX_FIXED_LENGTH32) {
+ __m128i vertx, verty;
+ __m128i shufx, shufy;
+ __m128i dcdx, dcdy, c;
+ __m128i unused;
+ __m128i dcdx_neg_mask;
+ __m128i dcdy_neg_mask;
+ __m128i dcdx_zero_mask;
+ __m128i top_left_flag;
+ __m128i c_inc_mask, c_inc;
+ __m128i eo, p0, p1, p2;
+ __m128i zero = _mm_setzero_si128();
+ PIPE_ALIGN_VAR(16) int32_t temp_vec[4];
+
+ vertx = _mm_loadu_si128((__m128i *)position->x); /* vertex x coords */
+ verty = _mm_loadu_si128((__m128i *)position->y); /* vertex y coords */
+
+ shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1));
+ shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1));
+
+ dcdx = _mm_sub_epi32(verty, shufy);
+ dcdy = _mm_sub_epi32(vertx, shufx);
+
+ dcdx_neg_mask = _mm_srai_epi32(dcdx, 31);
+ dcdx_zero_mask = _mm_cmpeq_epi32(dcdx, zero);
+ dcdy_neg_mask = _mm_srai_epi32(dcdy, 31);
+
+ top_left_flag = _mm_set1_epi32((setup->bottom_edge_rule == 0) ? ~0 : 0);
+
+ c_inc_mask = _mm_or_si128(dcdx_neg_mask,
+ _mm_and_si128(dcdx_zero_mask,
+ _mm_xor_si128(dcdy_neg_mask,
+ top_left_flag)));
+
+ c_inc = _mm_srli_epi32(c_inc_mask, 31);
+
+ c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx),
+ mm_mullo_epi32(dcdy, verty));
+
+ c = _mm_add_epi32(c, c_inc);
+
+ /* Scale up to match c:
+ */
+ dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER);
+ dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER);
+
+ /* Calculate trivial reject values:
+ */
+ eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy),
+ _mm_and_si128(dcdx_neg_mask, dcdx));
+
+ /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
+
+ /* Pointless transpose which gets undone immediately in
+ * rasterization:
+ */
+ transpose4_epi32(&c, &dcdx, &dcdy, &eo,
+ &p0, &p1, &p2, &unused);
+
+#define STORE_PLANE(plane, vec) do { \
+ _mm_store_si128((__m128i *)&temp_vec, vec); \
+ plane.c = (int64_t)temp_vec[0]; \
+ plane.dcdx = temp_vec[1]; \
+ plane.dcdy = temp_vec[2]; \
+ plane.eo = temp_vec[3]; \
+ } while(0)
+
+ STORE_PLANE(plane[0], p0);
+ STORE_PLANE(plane[1], p1);
+ STORE_PLANE(plane[2], p2);
+#undef STORE_PLANE
+ } else
+#endif
+ {
+ int i;
+ plane[0].dcdy = position->dx01;
+ plane[1].dcdy = position->x[1] - position->x[2];
+ plane[2].dcdy = position->dx20;
+ plane[0].dcdx = position->dy01;
+ plane[1].dcdx = position->y[1] - position->y[2];
+ plane[2].dcdx = position->dy20;
+
+ for (i = 0; i < 3; i++) {
+ /* half-edge constants, will be interated over the whole render
+ * target.
+ */
+ plane[i].c = IMUL64(plane[i].dcdx, position->x[i]) -
+ IMUL64(plane[i].dcdy, position->y[i]);
+
+ /* correct for top-left vs. bottom-left fill convention.
+ */
+ if (plane[i].dcdx < 0) {
+ /* both fill conventions want this - adjust for left edges */
+ plane[i].c++;
+ }
+ else if (plane[i].dcdx == 0) {
+ if (setup->bottom_edge_rule == 0){
+ /* correct for top-left fill convention:
+ */
+ if (plane[i].dcdy > 0) plane[i].c++;
+ }
+ else {
+ /* correct for bottom-left fill convention:
+ */
+ if (plane[i].dcdy < 0) plane[i].c++;
+ }
+ }
+
+ /* Scale up to match c:
+ */
+ assert((plane[i].dcdx << FIXED_ORDER) >> FIXED_ORDER == plane[i].dcdx);
+ assert((plane[i].dcdy << FIXED_ORDER) >> FIXED_ORDER == plane[i].dcdy);
+ plane[i].dcdx <<= FIXED_ORDER;
+ plane[i].dcdy <<= FIXED_ORDER;
+
+ /* find trivial reject offsets for each edge for a single-pixel
+ * sized block. These will be scaled up at each recursive level to
+ * match the active blocksize. Scaling in this way works best if
+ * the blocks are square.
+ */
+ plane[i].eo = 0;
+ if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx;
+ if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy;
+ }
+ }
+
+ if (0) {
+ debug_printf("p0: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
+ plane[0].c,
+ plane[0].dcdx,
+ plane[0].dcdy,
+ plane[0].eo);
+
+ debug_printf("p1: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
+ plane[1].c,
+ plane[1].dcdx,
+ plane[1].dcdy,
+ plane[1].eo);
+
+ debug_printf("p2: %"PRIx64"/%08x/%08x/%"PRIx64"\n",
+ plane[2].c,
+ plane[2].dcdx,
+ plane[2].dcdy,
+ plane[2].eo);
+ }
+
+
+ /*
+ * When rasterizing scissored tris, use the intersection of the
+ * triangle bounding box and the scissor rect to generate the
+ * scissor planes.
+ *
+ * This permits us to cut off the triangle "tails" that are present
+ * in the intermediate recursive levels caused when two of the
+ * triangles edges don't diverge quickly enough to trivially reject
+ * exterior blocks from the triangle.
+ *
+ * It's not really clear if it's worth worrying about these tails,
+ * but since we generate the planes for each scissored tri, it's
+ * free to trim them in this case.
+ *
+ * Note that otherwise, the scissor planes only vary in 'C' value,
+ * and even then only on state-changes. Could alternatively store
+ * these planes elsewhere.
+ */
+ if (nr_planes == 7) {
+ const struct u_rect *scissor = &setup->scissors[viewport_index];
+
+ plane[3].dcdx = -1;
+ plane[3].dcdy = 0;
+ plane[3].c = 1-scissor->x0;
+ plane[3].eo = 1;
+
+ plane[4].dcdx = 1;
+ plane[4].dcdy = 0;
+ plane[4].c = scissor->x1+1;
+ plane[4].eo = 0;
+
+ plane[5].dcdx = 0;
+ plane[5].dcdy = 1;
+ plane[5].c = 1-scissor->y0;
+ plane[5].eo = 1;
+
+ plane[6].dcdx = 0;
+ plane[6].dcdy = -1;
+ plane[6].c = scissor->y1+1;
+ plane[6].eo = 0;
+ }
+
+ return lp_setup_bin_triangle(setup, tri, &bbox, nr_planes, viewport_index);
+}
+
+/*
+ * Round to nearest less or equal power of two of the input.
+ *
+ * Undefined if no bit set exists, so code should check against 0 first.
+ */
+static inline uint32_t
+floor_pot(uint32_t n)
+{
+#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
+ if (n == 0)
+ return 0;
+
+ __asm__("bsr %1,%0"
+ : "=r" (n)
+ : "rm" (n));
+ return 1 << n;
+#else
+ n |= (n >> 1);
+ n |= (n >> 2);
+ n |= (n >> 4);
+ n |= (n >> 8);
+ n |= (n >> 16);
+ return n - (n >> 1);
+#endif
+}
+
+
+boolean
+lp_setup_bin_triangle( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ const struct u_rect *bbox,
+ int nr_planes,
+ unsigned viewport_index )
+{
+ struct lp_scene *scene = setup->scene;
+ struct u_rect trimmed_box = *bbox;
+ int i;
+ /* What is the largest power-of-two boundary this triangle crosses:
+ */
+ int dx = floor_pot((bbox->x0 ^ bbox->x1) |
+ (bbox->y0 ^ bbox->y1));
+
+ /* The largest dimension of the rasterized area of the triangle
+ * (aligned to a 4x4 grid), rounded down to the nearest power of two:
+ */
+ int max_sz = ((bbox->x1 - (bbox->x0 & ~3)) |
+ (bbox->y1 - (bbox->y0 & ~3)));
+ int sz = floor_pot(max_sz);
+ boolean use_32bits = max_sz <= MAX_FIXED_LENGTH32;
+
+ /* Now apply scissor, etc to the bounding box. Could do this
+ * earlier, but it confuses the logic for tri-16 and would force
+ * the rasterizer to also respect scissor, etc, just for the rare
+ * cases where a small triangle extends beyond the scissor.
+ */
+ u_rect_find_intersection(&setup->draw_regions[viewport_index],
+ &trimmed_box);
+
+ /* Determine which tile(s) intersect the triangle's bounding box
+ */
+ if (dx < TILE_SIZE)
+ {
+ int ix0 = bbox->x0 / TILE_SIZE;
+ int iy0 = bbox->y0 / TILE_SIZE;
+ unsigned px = bbox->x0 & 63 & ~3;
+ unsigned py = bbox->y0 & 63 & ~3;
+
+ assert(iy0 == bbox->y1 / TILE_SIZE &&
+ ix0 == bbox->x1 / TILE_SIZE);
+
+ if (nr_planes == 3) {
+ if (sz < 4)
+ {
+ /* Triangle is contained in a single 4x4 stamp:
+ */
+ assert(px + 4 <= TILE_SIZE);
+ assert(py + 4 <= TILE_SIZE);
+ return lp_scene_bin_cmd_with_state( scene, ix0, iy0,
+ setup->fs.stored,
+ use_32bits ?
+ LP_RAST_OP_TRIANGLE_32_3_4 :
+ LP_RAST_OP_TRIANGLE_3_4,
+ lp_rast_arg_triangle_contained(tri, px, py) );
+ }
+
+ if (sz < 16)
+ {
+ /* Triangle is contained in a single 16x16 block:
+ */
+
+ /*
+ * The 16x16 block is only 4x4 aligned, and can exceed the tile
+ * dimensions if the triangle is 16 pixels in one dimension but 4
+ * in the other. So budge the 16x16 back inside the tile.
+ */
+ px = MIN2(px, TILE_SIZE - 16);
+ py = MIN2(py, TILE_SIZE - 16);
+
+ assert(px + 16 <= TILE_SIZE);
+ assert(py + 16 <= TILE_SIZE);
+
+ return lp_scene_bin_cmd_with_state( scene, ix0, iy0,
+ setup->fs.stored,
+ use_32bits ?
+ LP_RAST_OP_TRIANGLE_32_3_16 :
+ LP_RAST_OP_TRIANGLE_3_16,
+ lp_rast_arg_triangle_contained(tri, px, py) );
+ }
+ }
+ else if (nr_planes == 4 && sz < 16)
+ {
+ px = MIN2(px, TILE_SIZE - 16);
+ py = MIN2(py, TILE_SIZE - 16);
+
+ assert(px + 16 <= TILE_SIZE);
+ assert(py + 16 <= TILE_SIZE);
+
+ return lp_scene_bin_cmd_with_state(scene, ix0, iy0,
+ setup->fs.stored,
+ use_32bits ?
+ LP_RAST_OP_TRIANGLE_32_4_16 :
+ LP_RAST_OP_TRIANGLE_4_16,
+ lp_rast_arg_triangle_contained(tri, px, py));
+ }
+
+
+ /* Triangle is contained in a single tile:
+ */
+ return lp_scene_bin_cmd_with_state(
+ scene, ix0, iy0, setup->fs.stored,
+ use_32bits ? lp_rast_32_tri_tab[nr_planes] : lp_rast_tri_tab[nr_planes],
+ lp_rast_arg_triangle(tri, (1<<nr_planes)-1));
+ }
+ else
+ {
+ struct lp_rast_plane *plane = GET_PLANES(tri);
+ int64_t c[MAX_PLANES];
+ int64_t ei[MAX_PLANES];
+
+ int64_t eo[MAX_PLANES];
+ int64_t xstep[MAX_PLANES];
+ int64_t ystep[MAX_PLANES];
+ int x, y;
+
+ int ix0 = trimmed_box.x0 / TILE_SIZE;
+ int iy0 = trimmed_box.y0 / TILE_SIZE;
+ int ix1 = trimmed_box.x1 / TILE_SIZE;
+ int iy1 = trimmed_box.y1 / TILE_SIZE;
+
+ for (i = 0; i < nr_planes; i++) {
+ c[i] = (plane[i].c +
+ IMUL64(plane[i].dcdy, iy0) * TILE_SIZE -
+ IMUL64(plane[i].dcdx, ix0) * TILE_SIZE);
+
+ ei[i] = (plane[i].dcdy -
+ plane[i].dcdx -
+ plane[i].eo) << TILE_ORDER;
+
+ eo[i] = plane[i].eo << TILE_ORDER;
+ xstep[i] = -(((int64_t)plane[i].dcdx) << TILE_ORDER);
+ ystep[i] = ((int64_t)plane[i].dcdy) << TILE_ORDER;
+ }
+
+
+
+ /* Test tile-sized blocks against the triangle.
+ * Discard blocks fully outside the tri. If the block is fully
+ * contained inside the tri, bin an lp_rast_shade_tile command.
+ * Else, bin a lp_rast_triangle command.
+ */
+ for (y = iy0; y <= iy1; y++)
+ {
+ boolean in = FALSE; /* are we inside the triangle? */
+ int64_t cx[MAX_PLANES];
+
+ for (i = 0; i < nr_planes; i++)
+ cx[i] = c[i];
+
+ for (x = ix0; x <= ix1; x++)
+ {
+ int out = 0;
+ int partial = 0;
+
+ for (i = 0; i < nr_planes; i++) {
+ int64_t planeout = cx[i] + eo[i];
+ int64_t planepartial = cx[i] + ei[i] - 1;
+ out |= (int) (planeout >> 63);
+ partial |= ((int) (planepartial >> 63)) & (1<<i);
+ }
+
+ if (out) {
+ /* do nothing */
+ if (in)
+ break; /* exiting triangle, all done with this row */
+ LP_COUNT(nr_empty_64);
+ }
+ else if (partial) {
+ /* Not trivially accepted by at least one plane -
+ * rasterize/shade partial tile
+ */
+ int count = util_bitcount(partial);
+ in = TRUE;
+
+ if (!lp_scene_bin_cmd_with_state( scene, x, y,
+ setup->fs.stored,
+ use_32bits ?
+ lp_rast_32_tri_tab[count] :
+ lp_rast_tri_tab[count],
+ lp_rast_arg_triangle(tri, partial) ))
+ goto fail;
+
+ LP_COUNT(nr_partially_covered_64);
+ }
+ else {
+ /* triangle covers the whole tile- shade whole tile */
+ LP_COUNT(nr_fully_covered_64);
+ in = TRUE;
+ if (!lp_setup_whole_tile(setup, &tri->inputs, x, y))
+ goto fail;
+ }
+
+ /* Iterate cx values across the region: */
+ for (i = 0; i < nr_planes; i++)
+ cx[i] += xstep[i];
+ }
+
+ /* Iterate c values down the region: */
+ for (i = 0; i < nr_planes; i++)
+ c[i] += ystep[i];
+ }
+ }
+
+ return TRUE;
+
+fail:
+ /* Need to disable any partially binned triangle. This is easier
+ * than trying to locate all the triangle, shade-tile, etc,
+ * commands which may have been binned.
+ */
+ tri->inputs.disable = TRUE;
+ return FALSE;
+}
+
+
+/**
+ * Try to draw the triangle, restart the scene on failure.
+ */
+static void retry_triangle_ccw( struct lp_setup_context *setup,
+ struct fixed_position* position,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4],
+ boolean front)
+{
+ if (!do_triangle_ccw( setup, position, v0, v1, v2, front ))
+ {
+ if (!lp_setup_flush_and_restart(setup))
+ return;
+
+ if (!do_triangle_ccw( setup, position, v0, v1, v2, front ))
+ return;
+ }
+}
+
+/**
+ * Calculate fixed position data for a triangle
+ */
+static inline void
+calc_fixed_position( struct lp_setup_context *setup,
+ struct fixed_position* position,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4])
+{
+ position->x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset);
+ position->x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset);
+ position->x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset);
+ position->x[3] = 0;
+
+ position->y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset);
+ position->y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset);
+ position->y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset);
+ position->y[3] = 0;
+
+ position->dx01 = position->x[0] - position->x[1];
+ position->dy01 = position->y[0] - position->y[1];
+
+ position->dx20 = position->x[2] - position->x[0];
+ position->dy20 = position->y[2] - position->y[0];
+
+ position->area = IMUL64(position->dx01, position->dy20) -
+ IMUL64(position->dx20, position->dy01);
+}
+
+
+/**
+ * Rotate a triangle, flipping its clockwise direction,
+ * Swaps values for xy[0] and xy[1]
+ */
+static inline void
+rotate_fixed_position_01( struct fixed_position* position )
+{
+ int x, y;
+
+ x = position->x[1];
+ y = position->y[1];
+ position->x[1] = position->x[0];
+ position->y[1] = position->y[0];
+ position->x[0] = x;
+ position->y[0] = y;
+
+ position->dx01 = -position->dx01;
+ position->dy01 = -position->dy01;
+ position->dx20 = position->x[2] - position->x[0];
+ position->dy20 = position->y[2] - position->y[0];
+
+ position->area = -position->area;
+}
+
+
+/**
+ * Rotate a triangle, flipping its clockwise direction,
+ * Swaps values for xy[1] and xy[2]
+ */
+static inline void
+rotate_fixed_position_12( struct fixed_position* position )
+{
+ int x, y;
+
+ x = position->x[2];
+ y = position->y[2];
+ position->x[2] = position->x[1];
+ position->y[2] = position->y[1];
+ position->x[1] = x;
+ position->y[1] = y;
+
+ x = position->dx01;
+ y = position->dy01;
+ position->dx01 = -position->dx20;
+ position->dy01 = -position->dy20;
+ position->dx20 = -x;
+ position->dy20 = -y;
+
+ position->area = -position->area;
+}
+
+
+/**
+ * Draw triangle if it's CW, cull otherwise.
+ */
+static void triangle_cw( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4] )
+{
+ struct fixed_position position;
+
+ calc_fixed_position(setup, &position, v0, v1, v2);
+
+ if (position.area < 0) {
+ if (setup->flatshade_first) {
+ rotate_fixed_position_12(&position);
+ retry_triangle_ccw(setup, &position, v0, v2, v1, !setup->ccw_is_frontface);
+ } else {
+ rotate_fixed_position_01(&position);
+ retry_triangle_ccw(setup, &position, v1, v0, v2, !setup->ccw_is_frontface);
+ }
+ }
+}
+
+
+static void triangle_ccw( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4])
+{
+ struct fixed_position position;
+
+ calc_fixed_position(setup, &position, v0, v1, v2);
+
+ if (position.area > 0)
+ retry_triangle_ccw(setup, &position, v0, v1, v2, setup->ccw_is_frontface);
+}
+
+/**
+ * Draw triangle whether it's CW or CCW.
+ */
+static void triangle_both( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4] )
+{
+ struct fixed_position position;
+ struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
+
+ if (lp_context->active_statistics_queries &&
+ !llvmpipe_rasterization_disabled(lp_context)) {
+ lp_context->pipeline_statistics.c_primitives++;
+ }
+
+ calc_fixed_position(setup, &position, v0, v1, v2);
+
+ if (0) {
+ assert(!util_is_inf_or_nan(v0[0][0]));
+ assert(!util_is_inf_or_nan(v0[0][1]));
+ assert(!util_is_inf_or_nan(v1[0][0]));
+ assert(!util_is_inf_or_nan(v1[0][1]));
+ assert(!util_is_inf_or_nan(v2[0][0]));
+ assert(!util_is_inf_or_nan(v2[0][1]));
+ }
+
+ if (position.area > 0)
+ retry_triangle_ccw( setup, &position, v0, v1, v2, setup->ccw_is_frontface );
+ else if (position.area < 0) {
+ if (setup->flatshade_first) {
+ rotate_fixed_position_12( &position );
+ retry_triangle_ccw( setup, &position, v0, v2, v1, !setup->ccw_is_frontface );
+ } else {
+ rotate_fixed_position_01( &position );
+ retry_triangle_ccw( setup, &position, v1, v0, v2, !setup->ccw_is_frontface );
+ }
+ }
+}
+
+
+static void triangle_nop( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4] )
+{
+}
+
+
+void
+lp_setup_choose_triangle( struct lp_setup_context *setup )
+{
+ switch (setup->cullmode) {
+ case PIPE_FACE_NONE:
+ setup->triangle = triangle_both;
+ break;
+ case PIPE_FACE_BACK:
+ setup->triangle = setup->ccw_is_frontface ? triangle_ccw : triangle_cw;
+ break;
+ case PIPE_FACE_FRONT:
+ setup->triangle = setup->ccw_is_frontface ? triangle_cw : triangle_ccw;
+ break;
+ default:
+ setup->triangle = triangle_nop;
+ break;
+ }
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
new file mode 100644
index 000000000..534c5f48a
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
@@ -0,0 +1,602 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Interface between 'draw' module's output and the llvmpipe rasterizer/setup
+ * code. When the 'draw' module has finished filling a vertex buffer, the
+ * draw_arrays() functions below will be called. Loop over the vertices and
+ * call the point/line/tri setup functions.
+ *
+ * Authors
+ * Brian Paul
+ */
+
+
+#include "lp_setup_context.h"
+#include "lp_context.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "util/u_memory.h"
+
+
+#define LP_MAX_VBUF_INDEXES 1024
+#define LP_MAX_VBUF_SIZE 4096
+
+
+
+/** cast wrapper */
+static struct lp_setup_context *
+lp_setup_context(struct vbuf_render *vbr)
+{
+ return (struct lp_setup_context *) vbr;
+}
+
+
+
+static const struct vertex_info *
+lp_setup_get_vertex_info(struct vbuf_render *vbr)
+{
+ struct lp_setup_context *setup = lp_setup_context(vbr);
+
+ /* Vertex size/info depends on the latest state.
+ * The draw module may have issued additional state-change commands.
+ */
+ lp_setup_update_state(setup, FALSE);
+
+ return setup->vertex_info;
+}
+
+
+static boolean
+lp_setup_allocate_vertices(struct vbuf_render *vbr,
+ ushort vertex_size, ushort nr_vertices)
+{
+ struct lp_setup_context *setup = lp_setup_context(vbr);
+ unsigned size = vertex_size * nr_vertices;
+
+ if (setup->vertex_buffer_size < size) {
+ align_free(setup->vertex_buffer);
+ setup->vertex_buffer = align_malloc(size, 16);
+ setup->vertex_buffer_size = size;
+ }
+
+ setup->vertex_size = vertex_size;
+ setup->nr_vertices = nr_vertices;
+
+ return setup->vertex_buffer != NULL;
+}
+
+static void
+lp_setup_release_vertices(struct vbuf_render *vbr)
+{
+ /* keep the old allocation for next time */
+}
+
+static void *
+lp_setup_map_vertices(struct vbuf_render *vbr)
+{
+ struct lp_setup_context *setup = lp_setup_context(vbr);
+ return setup->vertex_buffer;
+}
+
+static void
+lp_setup_unmap_vertices(struct vbuf_render *vbr,
+ ushort min_index,
+ ushort max_index )
+{
+ struct lp_setup_context *setup = lp_setup_context(vbr);
+ assert( setup->vertex_buffer_size >= (max_index+1) * setup->vertex_size );
+ /* do nothing */
+}
+
+
+static void
+lp_setup_set_primitive(struct vbuf_render *vbr, unsigned prim)
+{
+ lp_setup_context(vbr)->prim = prim;
+}
+
+typedef const float (*const_float4_ptr)[4];
+
+static inline const_float4_ptr get_vert( const void *vertex_buffer,
+ int index,
+ int stride )
+{
+ return (const_float4_ptr)((char *)vertex_buffer + index * stride);
+}
+
+/**
+ * draw elements / indexed primitives
+ */
+static void
+lp_setup_draw_elements(struct vbuf_render *vbr, const ushort *indices, uint nr)
+{
+ struct lp_setup_context *setup = lp_setup_context(vbr);
+ const unsigned stride = setup->vertex_info->size * sizeof(float);
+ const void *vertex_buffer = setup->vertex_buffer;
+ const boolean flatshade_first = setup->flatshade_first;
+ unsigned i;
+
+ assert(setup->setup.variant);
+
+ if (!lp_setup_update_state(setup, TRUE))
+ return;
+
+ switch (setup->prim) {
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < nr; i++) {
+ setup->point( setup,
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ break;
+
+ case PIPE_PRIM_LINES:
+ for (i = 1; i < nr; i += 2) {
+ setup->line( setup,
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ break;
+
+ case PIPE_PRIM_LINE_STRIP:
+ for (i = 1; i < nr; i ++) {
+ setup->line( setup,
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ for (i = 1; i < nr; i ++) {
+ setup->line( setup,
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ if (nr) {
+ setup->line( setup,
+ get_vert(vertex_buffer, indices[nr-1], stride),
+ get_vert(vertex_buffer, indices[0], stride) );
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLES:
+ for (i = 2; i < nr; i += 3) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-2], stride),
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ if (flatshade_first) {
+ for (i = 2; i < nr; i += 1) {
+ /* emit first triangle vertex as first triangle vertex */
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-2], stride),
+ get_vert(vertex_buffer, indices[i+(i&1)-1], stride),
+ get_vert(vertex_buffer, indices[i-(i&1)], stride) );
+
+ }
+ }
+ else {
+ for (i = 2; i < nr; i += 1) {
+ /* emit last triangle vertex as last triangle vertex */
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i+(i&1)-2], stride),
+ get_vert(vertex_buffer, indices[i-(i&1)-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (flatshade_first) {
+ for (i = 2; i < nr; i += 1) {
+ /* emit first non-spoke vertex as first vertex */
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride),
+ get_vert(vertex_buffer, indices[0], stride) );
+ }
+ }
+ else {
+ for (i = 2; i < nr; i += 1) {
+ /* emit last non-spoke vertex as last vertex */
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[0], stride),
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUADS:
+ /* GL quads don't follow provoking vertex convention */
+ if (flatshade_first) {
+ /* emit last quad vertex as first triangle vertex */
+ for (i = 3; i < nr; i += 4) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-0], stride),
+ get_vert(vertex_buffer, indices[i-3], stride),
+ get_vert(vertex_buffer, indices[i-2], stride) );
+
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-0], stride),
+ get_vert(vertex_buffer, indices[i-2], stride),
+ get_vert(vertex_buffer, indices[i-1], stride) );
+ }
+ }
+ else {
+ /* emit last quad vertex as last triangle vertex */
+ for (i = 3; i < nr; i += 4) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-3], stride),
+ get_vert(vertex_buffer, indices[i-2], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-2], stride),
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ /* GL quad strips don't follow provoking vertex convention */
+ if (flatshade_first) {
+ /* emit last quad vertex as first triangle vertex */
+ for (i = 3; i < nr; i += 2) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-0], stride),
+ get_vert(vertex_buffer, indices[i-3], stride),
+ get_vert(vertex_buffer, indices[i-2], stride) );
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-0], stride),
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-3], stride) );
+ }
+ }
+ else {
+ /* emit last quad vertex as last triangle vertex */
+ for (i = 3; i < nr; i += 2) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-3], stride),
+ get_vert(vertex_buffer, indices[i-2], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-3], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_POLYGON:
+ /* Almost same as tri fan but the _first_ vertex specifies the flat
+ * shading color.
+ */
+ if (flatshade_first) {
+ /* emit first polygon vertex as first triangle vertex */
+ for (i = 2; i < nr; i += 1) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[0], stride),
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ }
+ else {
+ /* emit first polygon vertex as last triangle vertex */
+ for (i = 2; i < nr; i += 1) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride),
+ get_vert(vertex_buffer, indices[0], stride) );
+ }
+ }
+ break;
+
+ default:
+ assert(0);
+ }
+}
+
+
+/**
+ * This function is hit when the draw module is working in pass-through mode.
+ * It's up to us to convert the vertex array into point/line/tri prims.
+ */
+static void
+lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
+{
+ struct lp_setup_context *setup = lp_setup_context(vbr);
+ const unsigned stride = setup->vertex_info->size * sizeof(float);
+ const void *vertex_buffer =
+ (void *) get_vert(setup->vertex_buffer, start, stride);
+ const boolean flatshade_first = setup->flatshade_first;
+ unsigned i;
+
+ if (!lp_setup_update_state(setup, TRUE))
+ return;
+
+ switch (setup->prim) {
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < nr; i++) {
+ setup->point( setup,
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ break;
+
+ case PIPE_PRIM_LINES:
+ for (i = 1; i < nr; i += 2) {
+ setup->line( setup,
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ break;
+
+ case PIPE_PRIM_LINE_STRIP:
+ for (i = 1; i < nr; i ++) {
+ setup->line( setup,
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ for (i = 1; i < nr; i ++) {
+ setup->line( setup,
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ if (nr) {
+ setup->line( setup,
+ get_vert(vertex_buffer, nr-1, stride),
+ get_vert(vertex_buffer, 0, stride) );
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLES:
+ for (i = 2; i < nr; i += 3) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-2, stride),
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ if (flatshade_first) {
+ for (i = 2; i < nr; i++) {
+ /* emit first triangle vertex as first triangle vertex */
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-2, stride),
+ get_vert(vertex_buffer, i+(i&1)-1, stride),
+ get_vert(vertex_buffer, i-(i&1), stride) );
+ }
+ }
+ else {
+ for (i = 2; i < nr; i++) {
+ /* emit last triangle vertex as last triangle vertex */
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i+(i&1)-2, stride),
+ get_vert(vertex_buffer, i-(i&1)-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (flatshade_first) {
+ for (i = 2; i < nr; i += 1) {
+ /* emit first non-spoke vertex as first vertex */
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride),
+ get_vert(vertex_buffer, 0, stride) );
+ }
+ }
+ else {
+ for (i = 2; i < nr; i += 1) {
+ /* emit last non-spoke vertex as last vertex */
+ setup->triangle( setup,
+ get_vert(vertex_buffer, 0, stride),
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUADS:
+ /* GL quads don't follow provoking vertex convention */
+ if (flatshade_first) {
+ /* emit last quad vertex as first triangle vertex */
+ for (i = 3; i < nr; i += 4) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-0, stride),
+ get_vert(vertex_buffer, i-3, stride),
+ get_vert(vertex_buffer, i-2, stride) );
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-0, stride),
+ get_vert(vertex_buffer, i-2, stride),
+ get_vert(vertex_buffer, i-1, stride) );
+ }
+ }
+ else {
+ /* emit last quad vertex as last triangle vertex */
+ for (i = 3; i < nr; i += 4) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-3, stride),
+ get_vert(vertex_buffer, i-2, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-2, stride),
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ /* GL quad strips don't follow provoking vertex convention */
+ if (flatshade_first) {
+ /* emit last quad vertex as first triangle vertex */
+ for (i = 3; i < nr; i += 2) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-0, stride),
+ get_vert(vertex_buffer, i-3, stride),
+ get_vert(vertex_buffer, i-2, stride) );
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-0, stride),
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-3, stride) );
+ }
+ }
+ else {
+ /* emit last quad vertex as last triangle vertex */
+ for (i = 3; i < nr; i += 2) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-3, stride),
+ get_vert(vertex_buffer, i-2, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-3, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_POLYGON:
+ /* Almost same as tri fan but the _first_ vertex specifies the flat
+ * shading color.
+ */
+ if (flatshade_first) {
+ /* emit first polygon vertex as first triangle vertex */
+ for (i = 2; i < nr; i += 1) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, 0, stride),
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ }
+ else {
+ /* emit first polygon vertex as last triangle vertex */
+ for (i = 2; i < nr; i += 1) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride),
+ get_vert(vertex_buffer, 0, stride) );
+ }
+ }
+ break;
+
+ default:
+ assert(0);
+ }
+}
+
+
+
+static void
+lp_setup_vbuf_destroy(struct vbuf_render *vbr)
+{
+ struct lp_setup_context *setup = lp_setup_context(vbr);
+ if (setup->vertex_buffer) {
+ align_free(setup->vertex_buffer);
+ setup->vertex_buffer = NULL;
+ }
+ lp_setup_destroy(setup);
+}
+
+/*
+ * FIXME: it is unclear if primitives_storage_needed (which is generally
+ * the same as pipe query num_primitives_generated) should increase
+ * if SO is disabled for d3d10, but for GL we definitely need to
+ * increase num_primitives_generated and this is only called for active
+ * SO. If it must not increase for d3d10 need to disambiguate the counters
+ * in the driver and do some work for getting correct values, if it should
+ * increase too should call this from outside streamout code.
+ */
+static void
+lp_setup_so_info(struct vbuf_render *vbr, uint primitives, uint prim_generated)
+{
+ struct lp_setup_context *setup = lp_setup_context(vbr);
+ struct llvmpipe_context *lp = llvmpipe_context(setup->pipe);
+
+ lp->so_stats.num_primitives_written += primitives;
+ lp->so_stats.primitives_storage_needed += prim_generated;
+}
+
+static void
+lp_setup_pipeline_statistics(
+ struct vbuf_render *vbr,
+ const struct pipe_query_data_pipeline_statistics *stats)
+{
+ struct lp_setup_context *setup = lp_setup_context(vbr);
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(setup->pipe);
+
+ llvmpipe->pipeline_statistics.ia_vertices +=
+ stats->ia_vertices;
+ llvmpipe->pipeline_statistics.ia_primitives +=
+ stats->ia_primitives;
+ llvmpipe->pipeline_statistics.vs_invocations +=
+ stats->vs_invocations;
+ llvmpipe->pipeline_statistics.gs_invocations +=
+ stats->gs_invocations;
+ llvmpipe->pipeline_statistics.gs_primitives +=
+ stats->gs_primitives;
+ if (!llvmpipe_rasterization_disabled(llvmpipe)) {
+ llvmpipe->pipeline_statistics.c_invocations +=
+ stats->c_invocations;
+ } else {
+ llvmpipe->pipeline_statistics.c_invocations = 0;
+ }
+}
+
+/**
+ * Create the post-transform vertex handler for the given context.
+ */
+void
+lp_setup_init_vbuf(struct lp_setup_context *setup)
+{
+ setup->base.max_indices = LP_MAX_VBUF_INDEXES;
+ setup->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE;
+
+ setup->base.get_vertex_info = lp_setup_get_vertex_info;
+ setup->base.allocate_vertices = lp_setup_allocate_vertices;
+ setup->base.map_vertices = lp_setup_map_vertices;
+ setup->base.unmap_vertices = lp_setup_unmap_vertices;
+ setup->base.set_primitive = lp_setup_set_primitive;
+ setup->base.draw_elements = lp_setup_draw_elements;
+ setup->base.draw_arrays = lp_setup_draw_arrays;
+ setup->base.release_vertices = lp_setup_release_vertices;
+ setup->base.destroy = lp_setup_vbuf_destroy;
+ setup->base.set_stream_output_info = lp_setup_so_info;
+ setup->base.pipeline_statistics = lp_setup_pipeline_statistics;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state.h
new file mode 100644
index 000000000..2da6caaef
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state.h
@@ -0,0 +1,145 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keithw@vmware.com>
+ */
+
+#ifndef LP_STATE_H
+#define LP_STATE_H
+
+#include "pipe/p_state.h"
+#include "lp_jit.h"
+#include "lp_state_fs.h"
+#include "gallivm/lp_bld.h"
+
+
+#define LP_NEW_VIEWPORT 0x1
+#define LP_NEW_RASTERIZER 0x2
+#define LP_NEW_FS 0x4
+#define LP_NEW_BLEND 0x8
+#define LP_NEW_CLIP 0x10
+#define LP_NEW_SCISSOR 0x20
+#define LP_NEW_STIPPLE 0x40
+#define LP_NEW_FRAMEBUFFER 0x80
+#define LP_NEW_DEPTH_STENCIL_ALPHA 0x100
+#define LP_NEW_CONSTANTS 0x200
+#define LP_NEW_SAMPLER 0x400
+#define LP_NEW_SAMPLER_VIEW 0x800
+#define LP_NEW_VERTEX 0x1000
+#define LP_NEW_VS 0x2000
+#define LP_NEW_OCCLUSION_QUERY 0x4000
+#define LP_NEW_BLEND_COLOR 0x8000
+#define LP_NEW_GS 0x10000
+#define LP_NEW_SO 0x20000
+#define LP_NEW_SO_BUFFERS 0x40000
+
+
+
+struct vertex_info;
+struct pipe_context;
+struct llvmpipe_context;
+
+
+
+struct lp_geometry_shader {
+ boolean no_tokens;
+ struct pipe_stream_output_info stream_output;
+ struct draw_geometry_shader *dgs;
+};
+
+/** Vertex element state */
+struct lp_velems_state
+{
+ unsigned count;
+ struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+};
+
+struct lp_so_state {
+ struct pipe_stream_output_info base;
+};
+
+
+void
+llvmpipe_set_framebuffer_state(struct pipe_context *,
+ const struct pipe_framebuffer_state *);
+
+void
+llvmpipe_update_fs(struct llvmpipe_context *lp);
+
+void
+llvmpipe_update_setup(struct llvmpipe_context *lp);
+
+void
+llvmpipe_update_derived(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_blend_funcs(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_clip_funcs(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_vs_funcs(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_gs_funcs(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_rasterizer_funcs(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_so_funcs(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *ctx,
+ unsigned num,
+ struct pipe_sampler_view **views);
+void
+llvmpipe_cleanup_vertex_sampling(struct llvmpipe_context *ctx);
+
+
+void
+llvmpipe_prepare_geometry_sampling(struct llvmpipe_context *ctx,
+ unsigned num,
+ struct pipe_sampler_view **views);
+void
+llvmpipe_cleanup_geometry_sampling(struct llvmpipe_context *ctx);
+
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_blend.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_blend.c
new file mode 100644
index 000000000..e38de9aca
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_blend.c
@@ -0,0 +1,206 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ * @author Keith Whitwell <keithw@vmware.com>
+ */
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_dump.h"
+#include "draw/draw_context.h"
+#include "lp_screen.h"
+#include "lp_context.h"
+#include "lp_state.h"
+#include "lp_debug.h"
+
+
+static void *
+llvmpipe_create_blend_state(struct pipe_context *pipe,
+ const struct pipe_blend_state *blend)
+{
+ struct pipe_blend_state *state = mem_dup(blend, sizeof *blend);
+ int i;
+
+ if (LP_PERF & PERF_NO_BLEND) {
+ state->independent_blend_enable = 0;
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
+ state->rt[i].blend_enable = 0;
+ }
+
+ return state;
+}
+
+
+static void
+llvmpipe_bind_blend_state(struct pipe_context *pipe, void *blend)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ if (llvmpipe->blend == blend)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ llvmpipe->blend = blend;
+
+ llvmpipe->dirty |= LP_NEW_BLEND;
+}
+
+
+static void
+llvmpipe_delete_blend_state(struct pipe_context *pipe, void *blend)
+{
+ FREE( blend );
+}
+
+
+static void
+llvmpipe_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *blend_color)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ if(!blend_color)
+ return;
+
+ if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ memcpy(&llvmpipe->blend_color, blend_color, sizeof *blend_color);
+
+ llvmpipe->dirty |= LP_NEW_BLEND_COLOR;
+}
+
+
+/** XXX move someday? Or consolidate all these simple state setters
+ * into one file.
+ */
+
+
+static void *
+llvmpipe_create_depth_stencil_state(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *depth_stencil)
+{
+ struct pipe_depth_stencil_alpha_state *state;
+
+ state = mem_dup(depth_stencil, sizeof *depth_stencil);
+
+ if (LP_PERF & PERF_NO_DEPTH) {
+ state->depth.enabled = 0;
+ state->depth.writemask = 0;
+ state->stencil[0].enabled = 0;
+ state->stencil[1].enabled = 0;
+ }
+
+ if (LP_PERF & PERF_NO_ALPHATEST) {
+ state->alpha.enabled = 0;
+ }
+
+ return state;
+}
+
+
+static void
+llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe,
+ void *depth_stencil)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ if (llvmpipe->depth_stencil == depth_stencil)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ llvmpipe->depth_stencil = depth_stencil;
+
+ llvmpipe->dirty |= LP_NEW_DEPTH_STENCIL_ALPHA;
+}
+
+
+static void
+llvmpipe_delete_depth_stencil_state(struct pipe_context *pipe, void *depth)
+{
+ FREE( depth );
+}
+
+
+static void
+llvmpipe_set_stencil_ref(struct pipe_context *pipe,
+ const struct pipe_stencil_ref *stencil_ref)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ if(!stencil_ref)
+ return;
+
+ if(memcmp(&llvmpipe->stencil_ref, stencil_ref, sizeof *stencil_ref) == 0)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ memcpy(&llvmpipe->stencil_ref, stencil_ref, sizeof *stencil_ref);
+
+ /* not sure. want new flag? */
+ llvmpipe->dirty |= LP_NEW_DEPTH_STENCIL_ALPHA;
+}
+
+static void
+llvmpipe_set_sample_mask(struct pipe_context *pipe,
+ unsigned sample_mask)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ if (sample_mask != llvmpipe->sample_mask) {
+ llvmpipe->sample_mask = sample_mask;
+
+ llvmpipe->dirty |= LP_NEW_RASTERIZER;
+ }
+}
+
+void
+llvmpipe_init_blend_funcs(struct llvmpipe_context *llvmpipe)
+{
+ llvmpipe->pipe.create_blend_state = llvmpipe_create_blend_state;
+ llvmpipe->pipe.bind_blend_state = llvmpipe_bind_blend_state;
+ llvmpipe->pipe.delete_blend_state = llvmpipe_delete_blend_state;
+
+ llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state;
+ llvmpipe->pipe.bind_depth_stencil_alpha_state = llvmpipe_bind_depth_stencil_state;
+ llvmpipe->pipe.delete_depth_stencil_alpha_state = llvmpipe_delete_depth_stencil_state;
+
+ llvmpipe->pipe.set_blend_color = llvmpipe_set_blend_color;
+
+ llvmpipe->pipe.set_stencil_ref = llvmpipe_set_stencil_ref;
+ llvmpipe->pipe.set_sample_mask = llvmpipe_set_sample_mask;
+
+ llvmpipe->sample_mask = ~0;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_clip.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_clip.c
new file mode 100644
index 000000000..1b9b84c08
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_clip.c
@@ -0,0 +1,105 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keithw@vmware.com>
+ */
+#include "lp_context.h"
+#include "lp_state.h"
+#include "draw/draw_context.h"
+
+
+static void
+llvmpipe_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ /* pass the clip state to the draw module */
+ draw_set_clip_state(llvmpipe->draw, clip);
+}
+
+
+static void
+llvmpipe_set_viewport_states(struct pipe_context *pipe,
+ unsigned start_slot,
+ unsigned num_viewports,
+ const struct pipe_viewport_state *viewports)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ /* pass the viewport info to the draw module */
+ draw_set_viewport_states(llvmpipe->draw, start_slot, num_viewports,
+ viewports);
+
+ memcpy(llvmpipe->viewports + start_slot, viewports,
+ sizeof(struct pipe_viewport_state) * num_viewports);
+ llvmpipe->dirty |= LP_NEW_VIEWPORT;
+}
+
+
+static void
+llvmpipe_set_scissor_states(struct pipe_context *pipe,
+ unsigned start_slot,
+ unsigned num_scissors,
+ const struct pipe_scissor_state *scissors)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ draw_flush(llvmpipe->draw);
+
+ debug_assert(start_slot < PIPE_MAX_VIEWPORTS);
+ debug_assert((start_slot + num_scissors) <= PIPE_MAX_VIEWPORTS);
+
+ memcpy(llvmpipe->scissors + start_slot, scissors,
+ sizeof(struct pipe_scissor_state) * num_scissors);
+
+ llvmpipe->dirty |= LP_NEW_SCISSOR;
+}
+
+
+static void
+llvmpipe_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ draw_flush(llvmpipe->draw);
+
+ llvmpipe->poly_stipple = *stipple; /* struct copy */
+ llvmpipe->dirty |= LP_NEW_STIPPLE;
+}
+
+
+
+void
+llvmpipe_init_clip_funcs(struct llvmpipe_context *llvmpipe)
+{
+ llvmpipe->pipe.set_clip_state = llvmpipe_set_clip_state;
+ llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple;
+ llvmpipe->pipe.set_scissor_states = llvmpipe_set_scissor_states;
+ llvmpipe->pipe.set_viewport_states = llvmpipe_set_viewport_states;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c
new file mode 100644
index 000000000..a25e83261
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -0,0 +1,244 @@
+/**************************************************************************
+ *
+ * Copyright 2003 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipe/p_shader_tokens.h"
+#include "draw/draw_context.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_private.h"
+#include "lp_context.h"
+#include "lp_screen.h"
+#include "lp_setup.h"
+#include "lp_state.h"
+
+
+
+/**
+ * The vertex info describes how to convert the post-transformed vertices
+ * (simple float[][4]) used by the 'draw' module into vertices for
+ * rasterization.
+ *
+ * This function validates the vertex layout.
+ */
+static void
+compute_vertex_info(struct llvmpipe_context *llvmpipe)
+{
+ const struct lp_fragment_shader *lpfs = llvmpipe->fs;
+ struct vertex_info *vinfo = &llvmpipe->vertex_info;
+ int vs_index;
+ uint i;
+
+ draw_prepare_shader_outputs(llvmpipe->draw);
+
+ llvmpipe->color_slot[0] = -1;
+ llvmpipe->color_slot[1] = -1;
+ llvmpipe->bcolor_slot[0] = -1;
+ llvmpipe->bcolor_slot[1] = -1;
+
+ /*
+ * Match FS inputs against VS outputs, emitting the necessary
+ * attributes. Could cache these structs and look them up with a
+ * combination of fragment shader, vertex shader ids.
+ */
+
+ vinfo->num_attribs = 0;
+
+ vs_index = draw_find_shader_output(llvmpipe->draw,
+ TGSI_SEMANTIC_POSITION,
+ 0);
+
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
+
+ for (i = 0; i < lpfs->info.base.num_inputs; i++) {
+ /*
+ * Search for each input in current vs output:
+ */
+
+ vs_index = draw_find_shader_output(llvmpipe->draw,
+ lpfs->info.base.input_semantic_name[i],
+ lpfs->info.base.input_semantic_index[i]);
+
+ if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_COLOR &&
+ lpfs->info.base.input_semantic_index[i] < 2) {
+ int idx = lpfs->info.base.input_semantic_index[i];
+ llvmpipe->color_slot[idx] = (int)vinfo->num_attribs;
+ }
+
+ if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_FACE) {
+ llvmpipe->face_slot = vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
+ } else if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_PRIMID) {
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
+ } else {
+ /*
+ * Emit the requested fs attribute for all but position.
+ */
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
+ }
+ }
+ /* Figure out if we need bcolor as well.
+ */
+ for (i = 0; i < 2; i++) {
+ vs_index = draw_find_shader_output(llvmpipe->draw,
+ TGSI_SEMANTIC_BCOLOR, i);
+
+ if (vs_index >= 0) {
+ llvmpipe->bcolor_slot[i] = (int)vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
+ }
+ }
+
+
+ /* Figure out if we need pointsize as well.
+ */
+ vs_index = draw_find_shader_output(llvmpipe->draw,
+ TGSI_SEMANTIC_PSIZE, 0);
+
+ if (vs_index >= 0) {
+ llvmpipe->psize_slot = vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
+ }
+
+ /* Figure out if we need viewport index */
+ vs_index = draw_find_shader_output(llvmpipe->draw,
+ TGSI_SEMANTIC_VIEWPORT_INDEX,
+ 0);
+ if (vs_index >= 0) {
+ llvmpipe->viewport_index_slot = vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
+ } else {
+ llvmpipe->viewport_index_slot = 0;
+ }
+
+ /* Figure out if we need layer */
+ vs_index = draw_find_shader_output(llvmpipe->draw,
+ TGSI_SEMANTIC_LAYER,
+ 0);
+ if (vs_index >= 0) {
+ llvmpipe->layer_slot = vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
+ } else {
+ llvmpipe->layer_slot = 0;
+ }
+
+ draw_compute_vertex_size(vinfo);
+ lp_setup_set_vertex_info(llvmpipe->setup, vinfo);
+}
+
+
+/**
+ * Handle state changes.
+ * Called just prior to drawing anything (pipe::draw_arrays(), etc).
+ *
+ * Hopefully this will remain quite simple, otherwise need to pull in
+ * something like the state tracker mechanism.
+ */
+void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
+{
+ struct llvmpipe_screen *lp_screen = llvmpipe_screen(llvmpipe->pipe.screen);
+
+ /* Check for updated textures.
+ */
+ if (llvmpipe->tex_timestamp != lp_screen->timestamp) {
+ llvmpipe->tex_timestamp = lp_screen->timestamp;
+ llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW;
+ }
+
+ if (llvmpipe->dirty & (LP_NEW_RASTERIZER |
+ LP_NEW_FS |
+ LP_NEW_VS))
+ compute_vertex_info( llvmpipe );
+
+ if (llvmpipe->dirty & (LP_NEW_FS |
+ LP_NEW_FRAMEBUFFER |
+ LP_NEW_BLEND |
+ LP_NEW_SCISSOR |
+ LP_NEW_DEPTH_STENCIL_ALPHA |
+ LP_NEW_RASTERIZER |
+ LP_NEW_SAMPLER |
+ LP_NEW_SAMPLER_VIEW |
+ LP_NEW_OCCLUSION_QUERY))
+ llvmpipe_update_fs( llvmpipe );
+
+ if (llvmpipe->dirty & (LP_NEW_RASTERIZER)) {
+ boolean discard =
+ (llvmpipe->sample_mask & 1) == 0 ||
+ (llvmpipe->rasterizer ? llvmpipe->rasterizer->rasterizer_discard : FALSE);
+
+ lp_setup_set_rasterizer_discard(llvmpipe->setup, discard);
+ }
+
+ if (llvmpipe->dirty & (LP_NEW_FS |
+ LP_NEW_FRAMEBUFFER |
+ LP_NEW_RASTERIZER))
+ llvmpipe_update_setup( llvmpipe );
+
+ if (llvmpipe->dirty & LP_NEW_BLEND_COLOR)
+ lp_setup_set_blend_color(llvmpipe->setup,
+ &llvmpipe->blend_color);
+
+ if (llvmpipe->dirty & LP_NEW_SCISSOR)
+ lp_setup_set_scissors(llvmpipe->setup, llvmpipe->scissors);
+
+ if (llvmpipe->dirty & LP_NEW_DEPTH_STENCIL_ALPHA) {
+ lp_setup_set_alpha_ref_value(llvmpipe->setup,
+ llvmpipe->depth_stencil->alpha.ref_value);
+ lp_setup_set_stencil_ref_values(llvmpipe->setup,
+ llvmpipe->stencil_ref.ref_value);
+ }
+
+ if (llvmpipe->dirty & LP_NEW_CONSTANTS)
+ lp_setup_set_fs_constants(llvmpipe->setup,
+ Elements(llvmpipe->constants[PIPE_SHADER_FRAGMENT]),
+ llvmpipe->constants[PIPE_SHADER_FRAGMENT]);
+
+ if (llvmpipe->dirty & (LP_NEW_SAMPLER_VIEW))
+ lp_setup_set_fragment_sampler_views(llvmpipe->setup,
+ llvmpipe->num_sampler_views[PIPE_SHADER_FRAGMENT],
+ llvmpipe->sampler_views[PIPE_SHADER_FRAGMENT]);
+
+ if (llvmpipe->dirty & (LP_NEW_SAMPLER))
+ lp_setup_set_fragment_sampler_state(llvmpipe->setup,
+ llvmpipe->num_samplers[PIPE_SHADER_FRAGMENT],
+ llvmpipe->samplers[PIPE_SHADER_FRAGMENT]);
+
+ if (llvmpipe->dirty & LP_NEW_VIEWPORT) {
+ /*
+ * Update setup and fragment's view of the active viewport state.
+ *
+ * XXX TODO: It is possible to only loop over the active viewports
+ * instead of all viewports (PIPE_MAX_VIEWPORTS).
+ */
+ lp_setup_set_viewports(llvmpipe->setup,
+ PIPE_MAX_VIEWPORTS,
+ llvmpipe->viewports);
+ }
+
+ llvmpipe->dirty = 0;
+}
+
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c
new file mode 100644
index 000000000..fd6c49aac
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -0,0 +1,3217 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Code generate the whole fragment pipeline.
+ *
+ * The fragment pipeline consists of the following stages:
+ * - early depth test
+ * - fragment shader
+ * - alpha test
+ * - depth/stencil test
+ * - blending
+ *
+ * This file has only the glue to assemble the fragment pipeline. The actual
+ * plumbing of converting Gallium state into LLVM IR is done elsewhere, in the
+ * lp_bld_*.[ch] files, and in a complete generic and reusable way. Here we
+ * muster the LLVM JIT execution engine to create a function that follows an
+ * established binary interface and that can be called from C directly.
+ *
+ * A big source of complexity here is that we often want to run different
+ * stages with different precisions and data types and precisions. For example,
+ * the fragment shader needs typically to be done in floats, but the
+ * depth/stencil test and blending is better done in the type that most closely
+ * matches the depth/stencil and color buffer respectively.
+ *
+ * Since the width of a SIMD vector register stays the same regardless of the
+ * element type, different types imply different number of elements, so we must
+ * code generate more instances of the stages with larger types to be able to
+ * feed/consume the stages with smaller types.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include <limits.h>
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_pointer.h"
+#include "util/u_format.h"
+#include "util/u_dump.h"
+#include "util/u_string.h"
+#include "util/simple_list.h"
+#include "util/u_dual_blend.h"
+#include "os/os_time.h"
+#include "pipe/p_shader_tokens.h"
+#include "draw/draw_context.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_parse.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_conv.h"
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_intr.h"
+#include "gallivm/lp_bld_logic.h"
+#include "gallivm/lp_bld_tgsi.h"
+#include "gallivm/lp_bld_swizzle.h"
+#include "gallivm/lp_bld_flow.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_pack.h"
+#include "gallivm/lp_bld_format.h"
+#include "gallivm/lp_bld_quad.h"
+
+#include "lp_bld_alpha.h"
+#include "lp_bld_blend.h"
+#include "lp_bld_depth.h"
+#include "lp_bld_interp.h"
+#include "lp_context.h"
+#include "lp_debug.h"
+#include "lp_perf.h"
+#include "lp_setup.h"
+#include "lp_state.h"
+#include "lp_tex_sample.h"
+#include "lp_flush.h"
+#include "lp_state_fs.h"
+#include "lp_rast.h"
+
+
+/** Fragment shader number (for debugging) */
+static unsigned fs_no = 0;
+
+
+/**
+ * Expand the relevant bits of mask_input to a n*4-dword mask for the
+ * n*four pixels in n 2x2 quads. This will set the n*four elements of the
+ * quad mask vector to 0 or ~0.
+ * Grouping is 01, 23 for 2 quad mode hence only 0 and 2 are valid
+ * quad arguments with fs length 8.
+ *
+ * \param first_quad which quad(s) of the quad group to test, in [0,3]
+ * \param mask_input bitwise mask for the whole 4x4 stamp
+ */
+static LLVMValueRef
+generate_quad_mask(struct gallivm_state *gallivm,
+ struct lp_type fs_type,
+ unsigned first_quad,
+ LLVMValueRef mask_input) /* int32 */
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_type mask_type;
+ LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
+ LLVMValueRef bits[16];
+ LLVMValueRef mask;
+ int shift, i;
+
+ /*
+ * XXX: We'll need a different path for 16 x u8
+ */
+ assert(fs_type.width == 32);
+ assert(fs_type.length <= Elements(bits));
+ mask_type = lp_int_type(fs_type);
+
+ /*
+ * mask_input >>= (quad * 4)
+ */
+ switch (first_quad) {
+ case 0:
+ shift = 0;
+ break;
+ case 1:
+ assert(fs_type.length == 4);
+ shift = 2;
+ break;
+ case 2:
+ shift = 8;
+ break;
+ case 3:
+ assert(fs_type.length == 4);
+ shift = 10;
+ break;
+ default:
+ assert(0);
+ shift = 0;
+ }
+
+ mask_input = LLVMBuildLShr(builder,
+ mask_input,
+ LLVMConstInt(i32t, shift, 0),
+ "");
+
+ /*
+ * mask = { mask_input & (1 << i), for i in [0,3] }
+ */
+ mask = lp_build_broadcast(gallivm,
+ lp_build_vec_type(gallivm, mask_type),
+ mask_input);
+
+ for (i = 0; i < fs_type.length / 4; i++) {
+ unsigned j = 2 * (i % 2) + (i / 2) * 8;
+ bits[4*i + 0] = LLVMConstInt(i32t, 1ULL << (j + 0), 0);
+ bits[4*i + 1] = LLVMConstInt(i32t, 1ULL << (j + 1), 0);
+ bits[4*i + 2] = LLVMConstInt(i32t, 1ULL << (j + 4), 0);
+ bits[4*i + 3] = LLVMConstInt(i32t, 1ULL << (j + 5), 0);
+ }
+ mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, fs_type.length), "");
+
+ /*
+ * mask = mask != 0 ? ~0 : 0
+ */
+ mask = lp_build_compare(gallivm,
+ mask_type, PIPE_FUNC_NOTEQUAL,
+ mask,
+ lp_build_const_int_vec(gallivm, mask_type, 0));
+
+ return mask;
+}
+
+
+#define EARLY_DEPTH_TEST 0x1
+#define LATE_DEPTH_TEST 0x2
+#define EARLY_DEPTH_WRITE 0x4
+#define LATE_DEPTH_WRITE 0x8
+
+static int
+find_output_by_semantic( const struct tgsi_shader_info *info,
+ unsigned semantic,
+ unsigned index )
+{
+ int i;
+
+ for (i = 0; i < info->num_outputs; i++)
+ if (info->output_semantic_name[i] == semantic &&
+ info->output_semantic_index[i] == index)
+ return i;
+
+ return -1;
+}
+
+
+/**
+ * Fetch the specified lp_jit_viewport structure for a given viewport_index.
+ */
+static LLVMValueRef
+lp_llvm_viewport(LLVMValueRef context_ptr,
+ struct gallivm_state *gallivm,
+ LLVMValueRef viewport_index)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef ptr;
+ LLVMValueRef res;
+ struct lp_type viewport_type =
+ lp_type_float_vec(32, 32 * LP_JIT_VIEWPORT_NUM_FIELDS);
+
+ ptr = lp_jit_context_viewports(gallivm, context_ptr);
+ ptr = LLVMBuildPointerCast(builder, ptr,
+ LLVMPointerType(lp_build_vec_type(gallivm, viewport_type), 0), "");
+
+ res = lp_build_pointer_get(builder, ptr, viewport_index);
+
+ return res;
+}
+
+
+/**
+ * Generate the fragment shader, depth/stencil test, and alpha tests.
+ */
+static void
+generate_fs_loop(struct gallivm_state *gallivm,
+ struct lp_fragment_shader *shader,
+ const struct lp_fragment_shader_variant_key *key,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef context_ptr,
+ LLVMValueRef num_loop,
+ struct lp_build_interp_soa_context *interp,
+ struct lp_build_sampler_soa *sampler,
+ LLVMValueRef mask_store,
+ LLVMValueRef (*out_color)[4],
+ LLVMValueRef depth_ptr,
+ LLVMValueRef depth_stride,
+ LLVMValueRef facing,
+ LLVMValueRef thread_data_ptr)
+{
+ const struct util_format_description *zs_format_desc = NULL;
+ const struct tgsi_token *tokens = shader->base.tokens;
+ struct lp_type int_type = lp_int_type(type);
+ LLVMTypeRef vec_type, int_vec_type;
+ LLVMValueRef mask_ptr, mask_val;
+ LLVMValueRef consts_ptr, num_consts_ptr;
+ LLVMValueRef z;
+ LLVMValueRef z_value, s_value;
+ LLVMValueRef z_fb, s_fb;
+ LLVMValueRef stencil_refs[2];
+ LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
+ struct lp_build_for_loop_state loop_state;
+ struct lp_build_mask_context mask;
+ /*
+ * TODO: figure out if simple_shader optimization is really worthwile to
+ * keep. Disabled because it may hide some real bugs in the (depth/stencil)
+ * code since tests tend to take another codepath than real shaders.
+ */
+ boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] == 0 &&
+ shader->info.base.num_inputs < 3 &&
+ shader->info.base.num_instructions < 8) && 0;
+ const boolean dual_source_blend = key->blend.rt[0].blend_enable &&
+ util_blend_state_is_dual(&key->blend, 0);
+ unsigned attrib;
+ unsigned chan;
+ unsigned cbuf;
+ unsigned depth_mode;
+
+ struct lp_bld_tgsi_system_values system_values;
+
+ memset(&system_values, 0, sizeof(system_values));
+
+ if (key->depth.enabled ||
+ key->stencil[0].enabled) {
+
+ zs_format_desc = util_format_description(key->zsbuf_format);
+ assert(zs_format_desc);
+
+ if (!shader->info.base.writes_z && !shader->info.base.writes_stencil) {
+ if (key->alpha.enabled ||
+ key->blend.alpha_to_coverage ||
+ shader->info.base.uses_kill) {
+ /* With alpha test and kill, can do the depth test early
+ * and hopefully eliminate some quads. But need to do a
+ * special deferred depth write once the final mask value
+ * is known. This only works though if there's either no
+ * stencil test or the stencil value isn't written.
+ */
+ if (key->stencil[0].enabled && (key->stencil[0].writemask ||
+ (key->stencil[1].enabled &&
+ key->stencil[1].writemask)))
+ depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
+ else
+ depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE;
+ }
+ else
+ depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE;
+ }
+ else {
+ depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
+ }
+
+ if (!(key->depth.enabled && key->depth.writemask) &&
+ !(key->stencil[0].enabled && (key->stencil[0].writemask ||
+ (key->stencil[1].enabled &&
+ key->stencil[1].writemask))))
+ depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE);
+ }
+ else {
+ depth_mode = 0;
+ }
+
+ vec_type = lp_build_vec_type(gallivm, type);
+ int_vec_type = lp_build_vec_type(gallivm, int_type);
+
+ stencil_refs[0] = lp_jit_context_stencil_ref_front_value(gallivm, context_ptr);
+ stencil_refs[1] = lp_jit_context_stencil_ref_back_value(gallivm, context_ptr);
+ /* convert scalar stencil refs into vectors */
+ stencil_refs[0] = lp_build_broadcast(gallivm, int_vec_type, stencil_refs[0]);
+ stencil_refs[1] = lp_build_broadcast(gallivm, int_vec_type, stencil_refs[1]);
+
+ consts_ptr = lp_jit_context_constants(gallivm, context_ptr);
+ num_consts_ptr = lp_jit_context_num_constants(gallivm, context_ptr);
+
+ lp_build_for_loop_begin(&loop_state, gallivm,
+ lp_build_const_int32(gallivm, 0),
+ LLVMIntULT,
+ num_loop,
+ lp_build_const_int32(gallivm, 1));
+
+ mask_ptr = LLVMBuildGEP(builder, mask_store,
+ &loop_state.counter, 1, "mask_ptr");
+ mask_val = LLVMBuildLoad(builder, mask_ptr, "");
+
+ memset(outputs, 0, sizeof outputs);
+
+ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
+ for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+ out_color[cbuf][chan] = lp_build_array_alloca(gallivm,
+ lp_build_vec_type(gallivm,
+ type),
+ num_loop, "color");
+ }
+ }
+ if (dual_source_blend) {
+ assert(key->nr_cbufs <= 1);
+ for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+ out_color[1][chan] = lp_build_array_alloca(gallivm,
+ lp_build_vec_type(gallivm,
+ type),
+ num_loop, "color1");
+ }
+ }
+
+
+ /* 'mask' will control execution based on quad's pixel alive/killed state */
+ lp_build_mask_begin(&mask, gallivm, type, mask_val);
+
+ if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader)
+ lp_build_mask_check(&mask);
+
+ lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter);
+ z = interp->pos[2];
+
+ if (depth_mode & EARLY_DEPTH_TEST) {
+ lp_build_depth_stencil_load_swizzled(gallivm, type,
+ zs_format_desc, key->resource_1d,
+ depth_ptr, depth_stride,
+ &z_fb, &s_fb, loop_state.counter);
+ lp_build_depth_stencil_test(gallivm,
+ &key->depth,
+ key->stencil,
+ type,
+ zs_format_desc,
+ &mask,
+ stencil_refs,
+ z, z_fb, s_fb,
+ facing,
+ &z_value, &s_value,
+ !simple_shader);
+
+ if (depth_mode & EARLY_DEPTH_WRITE) {
+ lp_build_depth_stencil_write_swizzled(gallivm, type,
+ zs_format_desc, key->resource_1d,
+ NULL, NULL, NULL, loop_state.counter,
+ depth_ptr, depth_stride,
+ z_value, s_value);
+ }
+ /*
+ * Note mask check if stencil is enabled must be after ds write not after
+ * stencil test otherwise new stencil values may not get written if all
+ * fragments got killed by depth/stencil test.
+ */
+ if (!simple_shader && key->stencil[0].enabled)
+ lp_build_mask_check(&mask);
+ }
+
+ lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter);
+
+ /* Build the actual shader */
+ lp_build_tgsi_soa(gallivm, tokens, type, &mask,
+ consts_ptr, num_consts_ptr, &system_values,
+ interp->inputs,
+ outputs, context_ptr,
+ sampler, &shader->info.base, NULL);
+
+ /* Alpha test */
+ if (key->alpha.enabled) {
+ int color0 = find_output_by_semantic(&shader->info.base,
+ TGSI_SEMANTIC_COLOR,
+ 0);
+
+ if (color0 != -1 && outputs[color0][3]) {
+ const struct util_format_description *cbuf_format_desc;
+ LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha");
+ LLVMValueRef alpha_ref_value;
+
+ alpha_ref_value = lp_jit_context_alpha_ref_value(gallivm, context_ptr);
+ alpha_ref_value = lp_build_broadcast(gallivm, vec_type, alpha_ref_value);
+
+ cbuf_format_desc = util_format_description(key->cbuf_format[0]);
+
+ lp_build_alpha_test(gallivm, key->alpha.func, type, cbuf_format_desc,
+ &mask, alpha, alpha_ref_value,
+ (depth_mode & LATE_DEPTH_TEST) != 0);
+ }
+ }
+
+ /* Emulate Alpha to Coverage with Alpha test */
+ if (key->blend.alpha_to_coverage) {
+ int color0 = find_output_by_semantic(&shader->info.base,
+ TGSI_SEMANTIC_COLOR,
+ 0);
+
+ if (color0 != -1 && outputs[color0][3]) {
+ LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha");
+
+ lp_build_alpha_to_coverage(gallivm, type,
+ &mask, alpha,
+ (depth_mode & LATE_DEPTH_TEST) != 0);
+ }
+ }
+
+ /* Late Z test */
+ if (depth_mode & LATE_DEPTH_TEST) {
+ int pos0 = find_output_by_semantic(&shader->info.base,
+ TGSI_SEMANTIC_POSITION,
+ 0);
+ int s_out = find_output_by_semantic(&shader->info.base,
+ TGSI_SEMANTIC_STENCIL,
+ 0);
+ if (pos0 != -1 && outputs[pos0][2]) {
+ z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
+
+ /*
+ * Clamp according to ARB_depth_clamp semantics.
+ */
+ if (key->depth_clamp) {
+ LLVMValueRef viewport, min_depth, max_depth;
+ LLVMValueRef viewport_index;
+ struct lp_build_context f32_bld;
+
+ assert(type.floating);
+ lp_build_context_init(&f32_bld, gallivm, type);
+
+ /*
+ * Assumes clamping of the viewport index will occur in setup/gs. Value
+ * is passed through the rasterization stage via lp_rast_shader_inputs.
+ *
+ * See: draw_clamp_viewport_idx and lp_clamp_viewport_idx for clamping
+ * semantics.
+ */
+ viewport_index = lp_jit_thread_data_raster_state_viewport_index(gallivm,
+ thread_data_ptr);
+
+ /*
+ * Load the min and max depth from the lp_jit_context.viewports
+ * array of lp_jit_viewport structures.
+ */
+ viewport = lp_llvm_viewport(context_ptr, gallivm, viewport_index);
+
+ /* viewports[viewport_index].min_depth */
+ min_depth = LLVMBuildExtractElement(builder, viewport,
+ lp_build_const_int32(gallivm, LP_JIT_VIEWPORT_MIN_DEPTH),
+ "");
+ min_depth = lp_build_broadcast_scalar(&f32_bld, min_depth);
+
+ /* viewports[viewport_index].max_depth */
+ max_depth = LLVMBuildExtractElement(builder, viewport,
+ lp_build_const_int32(gallivm, LP_JIT_VIEWPORT_MAX_DEPTH),
+ "");
+ max_depth = lp_build_broadcast_scalar(&f32_bld, max_depth);
+
+ /*
+ * Clamp to the min and max depth values for the given viewport.
+ */
+ z = lp_build_clamp(&f32_bld, z, min_depth, max_depth);
+ }
+ }
+
+ if (s_out != -1 && outputs[s_out][1]) {
+ /* there's only one value, and spec says to discard additional bits */
+ LLVMValueRef s_max_mask = lp_build_const_int_vec(gallivm, int_type, 255);
+ stencil_refs[0] = LLVMBuildLoad(builder, outputs[s_out][1], "output.s");
+ stencil_refs[0] = LLVMBuildBitCast(builder, stencil_refs[0], int_vec_type, "");
+ stencil_refs[0] = LLVMBuildAnd(builder, stencil_refs[0], s_max_mask, "");
+ stencil_refs[1] = stencil_refs[0];
+ }
+
+ lp_build_depth_stencil_load_swizzled(gallivm, type,
+ zs_format_desc, key->resource_1d,
+ depth_ptr, depth_stride,
+ &z_fb, &s_fb, loop_state.counter);
+
+ lp_build_depth_stencil_test(gallivm,
+ &key->depth,
+ key->stencil,
+ type,
+ zs_format_desc,
+ &mask,
+ stencil_refs,
+ z, z_fb, s_fb,
+ facing,
+ &z_value, &s_value,
+ !simple_shader);
+ /* Late Z write */
+ if (depth_mode & LATE_DEPTH_WRITE) {
+ lp_build_depth_stencil_write_swizzled(gallivm, type,
+ zs_format_desc, key->resource_1d,
+ NULL, NULL, NULL, loop_state.counter,
+ depth_ptr, depth_stride,
+ z_value, s_value);
+ }
+ }
+ else if ((depth_mode & EARLY_DEPTH_TEST) &&
+ (depth_mode & LATE_DEPTH_WRITE))
+ {
+ /* Need to apply a reduced mask to the depth write. Reload the
+ * depth value, update from zs_value with the new mask value and
+ * write that out.
+ */
+ lp_build_depth_stencil_write_swizzled(gallivm, type,
+ zs_format_desc, key->resource_1d,
+ &mask, z_fb, s_fb, loop_state.counter,
+ depth_ptr, depth_stride,
+ z_value, s_value);
+ }
+
+
+ /* Color write */
+ for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
+ {
+ unsigned cbuf = shader->info.base.output_semantic_index[attrib];
+ if ((shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) &&
+ ((cbuf < key->nr_cbufs) || (cbuf == 1 && dual_source_blend)))
+ {
+ for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+ if(outputs[attrib][chan]) {
+ /* XXX: just initialize outputs to point at colors[] and
+ * skip this.
+ */
+ LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
+ LLVMValueRef color_ptr;
+ color_ptr = LLVMBuildGEP(builder, out_color[cbuf][chan],
+ &loop_state.counter, 1, "");
+ lp_build_name(out, "color%u.%c", attrib, "rgba"[chan]);
+ LLVMBuildStore(builder, out, color_ptr);
+ }
+ }
+ }
+ }
+
+ if (key->occlusion_count) {
+ LLVMValueRef counter = lp_jit_thread_data_counter(gallivm, thread_data_ptr);
+ lp_build_name(counter, "counter");
+ lp_build_occlusion_count(gallivm, type,
+ lp_build_mask_value(&mask), counter);
+ }
+
+ mask_val = lp_build_mask_end(&mask);
+ LLVMBuildStore(builder, mask_val, mask_ptr);
+ lp_build_for_loop_end(&loop_state);
+}
+
+
+/**
+ * This function will reorder pixels from the fragment shader SoA to memory layout AoS
+ *
+ * Fragment Shader outputs pixels in small 2x2 blocks
+ * e.g. (0, 0), (1, 0), (0, 1), (1, 1) ; (2, 0) ...
+ *
+ * However in memory pixels are stored in rows
+ * e.g. (0, 0), (1, 0), (2, 0), (3, 0) ; (0, 1) ...
+ *
+ * @param type fragment shader type (4x or 8x float)
+ * @param num_fs number of fs_src
+ * @param is_1d whether we're outputting to a 1d resource
+ * @param dst_channels number of output channels
+ * @param fs_src output from fragment shader
+ * @param dst pointer to store result
+ * @param pad_inline is channel padding inline or at end of row
+ * @return the number of dsts
+ */
+static int
+generate_fs_twiddle(struct gallivm_state *gallivm,
+ struct lp_type type,
+ unsigned num_fs,
+ unsigned dst_channels,
+ LLVMValueRef fs_src[][4],
+ LLVMValueRef* dst,
+ bool pad_inline)
+{
+ LLVMValueRef src[16];
+
+ bool swizzle_pad;
+ bool twiddle;
+ bool split;
+
+ unsigned pixels = type.length / 4;
+ unsigned reorder_group;
+ unsigned src_channels;
+ unsigned src_count;
+ unsigned i;
+
+ src_channels = dst_channels < 3 ? dst_channels : 4;
+ src_count = num_fs * src_channels;
+
+ assert(pixels == 2 || pixels == 1);
+ assert(num_fs * src_channels <= Elements(src));
+
+ /*
+ * Transpose from SoA -> AoS
+ */
+ for (i = 0; i < num_fs; ++i) {
+ lp_build_transpose_aos_n(gallivm, type, &fs_src[i][0], src_channels, &src[i * src_channels]);
+ }
+
+ /*
+ * Pick transformation options
+ */
+ swizzle_pad = false;
+ twiddle = false;
+ split = false;
+ reorder_group = 0;
+
+ if (dst_channels == 1) {
+ twiddle = true;
+
+ if (pixels == 2) {
+ split = true;
+ }
+ } else if (dst_channels == 2) {
+ if (pixels == 1) {
+ reorder_group = 1;
+ }
+ } else if (dst_channels > 2) {
+ if (pixels == 1) {
+ reorder_group = 2;
+ } else {
+ twiddle = true;
+ }
+
+ if (!pad_inline && dst_channels == 3 && pixels > 1) {
+ swizzle_pad = true;
+ }
+ }
+
+ /*
+ * Split the src in half
+ */
+ if (split) {
+ for (i = num_fs; i > 0; --i) {
+ src[(i - 1)*2 + 1] = lp_build_extract_range(gallivm, src[i - 1], 4, 4);
+ src[(i - 1)*2 + 0] = lp_build_extract_range(gallivm, src[i - 1], 0, 4);
+ }
+
+ src_count *= 2;
+ type.length = 4;
+ }
+
+ /*
+ * Ensure pixels are in memory order
+ */
+ if (reorder_group) {
+ /* Twiddle pixels by reordering the array, e.g.:
+ *
+ * src_count = 8 -> 0 2 1 3 4 6 5 7
+ * src_count = 16 -> 0 1 4 5 2 3 6 7 8 9 12 13 10 11 14 15
+ */
+ const unsigned reorder_sw[] = { 0, 2, 1, 3 };
+
+ for (i = 0; i < src_count; ++i) {
+ unsigned group = i / reorder_group;
+ unsigned block = (group / 4) * 4 * reorder_group;
+ unsigned j = block + (reorder_sw[group % 4] * reorder_group) + (i % reorder_group);
+ dst[i] = src[j];
+ }
+ } else if (twiddle) {
+ /* Twiddle pixels across elements of array */
+ lp_bld_quad_twiddle(gallivm, type, src, src_count, dst);
+ } else {
+ /* Do nothing */
+ memcpy(dst, src, sizeof(LLVMValueRef) * src_count);
+ }
+
+ /*
+ * Moves any padding between pixels to the end
+ * e.g. RGBXRGBX -> RGBRGBXX
+ */
+ if (swizzle_pad) {
+ unsigned char swizzles[16];
+ unsigned elems = pixels * dst_channels;
+
+ for (i = 0; i < type.length; ++i) {
+ if (i < elems)
+ swizzles[i] = i % dst_channels + (i / dst_channels) * 4;
+ else
+ swizzles[i] = LP_BLD_SWIZZLE_DONTCARE;
+ }
+
+ for (i = 0; i < src_count; ++i) {
+ dst[i] = lp_build_swizzle_aos_n(gallivm, dst[i], swizzles, type.length, type.length);
+ }
+ }
+
+ return src_count;
+}
+
+
+/**
+ * Load an unswizzled block of pixels from memory
+ */
+static void
+load_unswizzled_block(struct gallivm_state *gallivm,
+ LLVMValueRef base_ptr,
+ LLVMValueRef stride,
+ unsigned block_width,
+ unsigned block_height,
+ LLVMValueRef* dst,
+ struct lp_type dst_type,
+ unsigned dst_count,
+ unsigned dst_alignment)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ unsigned row_size = dst_count / block_height;
+ unsigned i;
+
+ /* Ensure block exactly fits into dst */
+ assert((block_width * block_height) % dst_count == 0);
+
+ for (i = 0; i < dst_count; ++i) {
+ unsigned x = i % row_size;
+ unsigned y = i / row_size;
+
+ LLVMValueRef bx = lp_build_const_int32(gallivm, x * (dst_type.width / 8) * dst_type.length);
+ LLVMValueRef by = LLVMBuildMul(builder, lp_build_const_int32(gallivm, y), stride, "");
+
+ LLVMValueRef gep[2];
+ LLVMValueRef dst_ptr;
+
+ gep[0] = lp_build_const_int32(gallivm, 0);
+ gep[1] = LLVMBuildAdd(builder, bx, by, "");
+
+ dst_ptr = LLVMBuildGEP(builder, base_ptr, gep, 2, "");
+ dst_ptr = LLVMBuildBitCast(builder, dst_ptr, LLVMPointerType(lp_build_vec_type(gallivm, dst_type), 0), "");
+
+ dst[i] = LLVMBuildLoad(builder, dst_ptr, "");
+
+ lp_set_load_alignment(dst[i], dst_alignment);
+ }
+}
+
+
+/**
+ * Store an unswizzled block of pixels to memory
+ */
+static void
+store_unswizzled_block(struct gallivm_state *gallivm,
+ LLVMValueRef base_ptr,
+ LLVMValueRef stride,
+ unsigned block_width,
+ unsigned block_height,
+ LLVMValueRef* src,
+ struct lp_type src_type,
+ unsigned src_count,
+ unsigned src_alignment)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ unsigned row_size = src_count / block_height;
+ unsigned i;
+
+ /* Ensure src exactly fits into block */
+ assert((block_width * block_height) % src_count == 0);
+
+ for (i = 0; i < src_count; ++i) {
+ unsigned x = i % row_size;
+ unsigned y = i / row_size;
+
+ LLVMValueRef bx = lp_build_const_int32(gallivm, x * (src_type.width / 8) * src_type.length);
+ LLVMValueRef by = LLVMBuildMul(builder, lp_build_const_int32(gallivm, y), stride, "");
+
+ LLVMValueRef gep[2];
+ LLVMValueRef src_ptr;
+
+ gep[0] = lp_build_const_int32(gallivm, 0);
+ gep[1] = LLVMBuildAdd(builder, bx, by, "");
+
+ src_ptr = LLVMBuildGEP(builder, base_ptr, gep, 2, "");
+ src_ptr = LLVMBuildBitCast(builder, src_ptr, LLVMPointerType(lp_build_vec_type(gallivm, src_type), 0), "");
+
+ src_ptr = LLVMBuildStore(builder, src[i], src_ptr);
+
+ lp_set_store_alignment(src_ptr, src_alignment);
+ }
+}
+
+
+/**
+ * Checks if a format description is an arithmetic format
+ *
+ * A format which has irregular channel sizes such as R3_G3_B2 or R5_G6_B5.
+ */
+static inline boolean
+is_arithmetic_format(const struct util_format_description *format_desc)
+{
+ boolean arith = false;
+ unsigned i;
+
+ for (i = 0; i < format_desc->nr_channels; ++i) {
+ arith |= format_desc->channel[i].size != format_desc->channel[0].size;
+ arith |= (format_desc->channel[i].size % 8) != 0;
+ }
+
+ return arith;
+}
+
+
+/**
+ * Checks if this format requires special handling due to required expansion
+ * to floats for blending, and furthermore has "natural" packed AoS -> unpacked
+ * SoA conversion.
+ */
+static inline boolean
+format_expands_to_float_soa(const struct util_format_description *format_desc)
+{
+ if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
+ format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+ return true;
+ }
+ return false;
+}
+
+
+/**
+ * Retrieves the type representing the memory layout for a format
+ *
+ * e.g. RGBA16F = 4x half-float and R3G3B2 = 1x byte
+ */
+static inline void
+lp_mem_type_from_format_desc(const struct util_format_description *format_desc,
+ struct lp_type* type)
+{
+ unsigned i;
+ unsigned chan;
+
+ if (format_expands_to_float_soa(format_desc)) {
+ /* just make this a uint with width of block */
+ type->floating = false;
+ type->fixed = false;
+ type->sign = false;
+ type->norm = false;
+ type->width = format_desc->block.bits;
+ type->length = 1;
+ return;
+ }
+
+ for (i = 0; i < 4; i++)
+ if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
+ break;
+ chan = i;
+
+ memset(type, 0, sizeof(struct lp_type));
+ type->floating = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT;
+ type->fixed = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FIXED;
+ type->sign = format_desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED;
+ type->norm = format_desc->channel[chan].normalized;
+
+ if (is_arithmetic_format(format_desc)) {
+ type->width = 0;
+ type->length = 1;
+
+ for (i = 0; i < format_desc->nr_channels; ++i) {
+ type->width += format_desc->channel[i].size;
+ }
+ } else {
+ type->width = format_desc->channel[chan].size;
+ type->length = format_desc->nr_channels;
+ }
+}
+
+
+/**
+ * Retrieves the type for a format which is usable in the blending code.
+ *
+ * e.g. RGBA16F = 4x float, R3G3B2 = 3x byte
+ */
+static inline void
+lp_blend_type_from_format_desc(const struct util_format_description *format_desc,
+ struct lp_type* type)
+{
+ unsigned i;
+ unsigned chan;
+
+ if (format_expands_to_float_soa(format_desc)) {
+ /* always use ordinary floats for blending */
+ type->floating = true;
+ type->fixed = false;
+ type->sign = true;
+ type->norm = false;
+ type->width = 32;
+ type->length = 4;
+ return;
+ }
+
+ for (i = 0; i < 4; i++)
+ if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
+ break;
+ chan = i;
+
+ memset(type, 0, sizeof(struct lp_type));
+ type->floating = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT;
+ type->fixed = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FIXED;
+ type->sign = format_desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED;
+ type->norm = format_desc->channel[chan].normalized;
+ type->width = format_desc->channel[chan].size;
+ type->length = format_desc->nr_channels;
+
+ for (i = 1; i < format_desc->nr_channels; ++i) {
+ if (format_desc->channel[i].size > type->width)
+ type->width = format_desc->channel[i].size;
+ }
+
+ if (type->floating) {
+ type->width = 32;
+ } else {
+ if (type->width <= 8) {
+ type->width = 8;
+ } else if (type->width <= 16) {
+ type->width = 16;
+ } else {
+ type->width = 32;
+ }
+ }
+
+ if (is_arithmetic_format(format_desc) && type->length == 3) {
+ type->length = 4;
+ }
+}
+
+
+/**
+ * Scale a normalized value from src_bits to dst_bits.
+ *
+ * The exact calculation is
+ *
+ * dst = iround(src * dst_mask / src_mask)
+ *
+ * or with integer rounding
+ *
+ * dst = src * (2*dst_mask + sign(src)*src_mask) / (2*src_mask)
+ *
+ * where
+ *
+ * src_mask = (1 << src_bits) - 1
+ * dst_mask = (1 << dst_bits) - 1
+ *
+ * but we try to avoid division and multiplication through shifts.
+ */
+static inline LLVMValueRef
+scale_bits(struct gallivm_state *gallivm,
+ int src_bits,
+ int dst_bits,
+ LLVMValueRef src,
+ struct lp_type src_type)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef result = src;
+
+ if (dst_bits < src_bits) {
+ int delta_bits = src_bits - dst_bits;
+
+ if (delta_bits <= dst_bits) {
+ /*
+ * Approximate the rescaling with a single shift.
+ *
+ * This gives the wrong rounding.
+ */
+
+ result = LLVMBuildLShr(builder,
+ src,
+ lp_build_const_int_vec(gallivm, src_type, delta_bits),
+ "");
+
+ } else {
+ /*
+ * Try more accurate rescaling.
+ */
+
+ /*
+ * Drop the least significant bits to make space for the multiplication.
+ *
+ * XXX: A better approach would be to use a wider integer type as intermediate. But
+ * this is enough to convert alpha from 16bits -> 2 when rendering to
+ * PIPE_FORMAT_R10G10B10A2_UNORM.
+ */
+ result = LLVMBuildLShr(builder,
+ src,
+ lp_build_const_int_vec(gallivm, src_type, dst_bits),
+ "");
+
+
+ result = LLVMBuildMul(builder,
+ result,
+ lp_build_const_int_vec(gallivm, src_type, (1LL << dst_bits) - 1),
+ "");
+
+ /*
+ * Add a rounding term before the division.
+ *
+ * TODO: Handle signed integers too.
+ */
+ if (!src_type.sign) {
+ result = LLVMBuildAdd(builder,
+ result,
+ lp_build_const_int_vec(gallivm, src_type, (1LL << (delta_bits - 1))),
+ "");
+ }
+
+ /*
+ * Approximate the division by src_mask with a src_bits shift.
+ *
+ * Given the src has already been shifted by dst_bits, all we need
+ * to do is to shift by the difference.
+ */
+
+ result = LLVMBuildLShr(builder,
+ result,
+ lp_build_const_int_vec(gallivm, src_type, delta_bits),
+ "");
+ }
+
+ } else if (dst_bits > src_bits) {
+ /* Scale up bits */
+ int db = dst_bits - src_bits;
+
+ /* Shift left by difference in bits */
+ result = LLVMBuildShl(builder,
+ src,
+ lp_build_const_int_vec(gallivm, src_type, db),
+ "");
+
+ if (db < src_bits) {
+ /* Enough bits in src to fill the remainder */
+ LLVMValueRef lower = LLVMBuildLShr(builder,
+ src,
+ lp_build_const_int_vec(gallivm, src_type, src_bits - db),
+ "");
+
+ result = LLVMBuildOr(builder, result, lower, "");
+ } else if (db > src_bits) {
+ /* Need to repeatedly copy src bits to fill remainder in dst */
+ unsigned n;
+
+ for (n = src_bits; n < dst_bits; n *= 2) {
+ LLVMValueRef shuv = lp_build_const_int_vec(gallivm, src_type, n);
+
+ result = LLVMBuildOr(builder,
+ result,
+ LLVMBuildLShr(builder, result, shuv, ""),
+ "");
+ }
+ }
+ }
+
+ return result;
+}
+
+/**
+ * If RT is a smallfloat (needing denorms) format
+ */
+static inline int
+have_smallfloat_format(struct lp_type dst_type,
+ enum pipe_format format)
+{
+ return ((dst_type.floating && dst_type.width != 32) ||
+ /* due to format handling hacks this format doesn't have floating set
+ * here (and actually has width set to 32 too) so special case this. */
+ (format == PIPE_FORMAT_R11G11B10_FLOAT));
+}
+
+
+/**
+ * Convert from memory format to blending format
+ *
+ * e.g. GL_R3G3B2 is 1 byte in memory but 3 bytes for blending
+ */
+static void
+convert_to_blend_type(struct gallivm_state *gallivm,
+ unsigned block_size,
+ const struct util_format_description *src_fmt,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef* src, // and dst
+ unsigned num_srcs)
+{
+ LLVMValueRef *dst = src;
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_type blend_type;
+ struct lp_type mem_type;
+ unsigned i, j, k;
+ unsigned pixels = block_size / num_srcs;
+ bool is_arith;
+
+ /*
+ * full custom path for packed floats and srgb formats - none of the later
+ * functions would do anything useful, and given the lp_type representation they
+ * can't be fixed. Should really have some SoA blend path for these kind of
+ * formats rather than hacking them in here.
+ */
+ if (format_expands_to_float_soa(src_fmt)) {
+ LLVMValueRef tmpsrc[4];
+ /*
+ * This is pretty suboptimal for this case blending in SoA would be much
+ * better, since conversion gets us SoA values so need to convert back.
+ */
+ assert(src_type.width == 32 || src_type.width == 16);
+ assert(dst_type.floating);
+ assert(dst_type.width == 32);
+ assert(dst_type.length % 4 == 0);
+ assert(num_srcs % 4 == 0);
+
+ if (src_type.width == 16) {
+ /* expand 4x16bit values to 4x32bit */
+ struct lp_type type32x4 = src_type;
+ LLVMTypeRef ltype32x4;
+ unsigned num_fetch = dst_type.length == 8 ? num_srcs / 2 : num_srcs / 4;
+ type32x4.width = 32;
+ ltype32x4 = lp_build_vec_type(gallivm, type32x4);
+ for (i = 0; i < num_fetch; i++) {
+ src[i] = LLVMBuildZExt(builder, src[i], ltype32x4, "");
+ }
+ src_type.width = 32;
+ }
+ for (i = 0; i < 4; i++) {
+ tmpsrc[i] = src[i];
+ }
+ for (i = 0; i < num_srcs / 4; i++) {
+ LLVMValueRef tmpsoa[4];
+ LLVMValueRef tmps = tmpsrc[i];
+ if (dst_type.length == 8) {
+ LLVMValueRef shuffles[8];
+ unsigned j;
+ /* fetch was 4 values but need 8-wide output values */
+ tmps = lp_build_concat(gallivm, &tmpsrc[i * 2], src_type, 2);
+ /*
+ * for 8-wide aos transpose would give us wrong order not matching
+ * incoming converted fs values and mask. ARGH.
+ */
+ for (j = 0; j < 4; j++) {
+ shuffles[j] = lp_build_const_int32(gallivm, j * 2);
+ shuffles[j + 4] = lp_build_const_int32(gallivm, j * 2 + 1);
+ }
+ tmps = LLVMBuildShuffleVector(builder, tmps, tmps,
+ LLVMConstVector(shuffles, 8), "");
+ }
+ if (src_fmt->format == PIPE_FORMAT_R11G11B10_FLOAT) {
+ lp_build_r11g11b10_to_float(gallivm, tmps, tmpsoa);
+ }
+ else {
+ lp_build_unpack_rgba_soa(gallivm, src_fmt, dst_type, tmps, tmpsoa);
+ }
+ lp_build_transpose_aos(gallivm, dst_type, tmpsoa, &src[i * 4]);
+ }
+ return;
+ }
+
+ lp_mem_type_from_format_desc(src_fmt, &mem_type);
+ lp_blend_type_from_format_desc(src_fmt, &blend_type);
+
+ /* Is the format arithmetic */
+ is_arith = blend_type.length * blend_type.width != mem_type.width * mem_type.length;
+ is_arith &= !(mem_type.width == 16 && mem_type.floating);
+
+ /* Pad if necessary */
+ if (!is_arith && src_type.length < dst_type.length) {
+ for (i = 0; i < num_srcs; ++i) {
+ dst[i] = lp_build_pad_vector(gallivm, src[i], dst_type.length);
+ }
+
+ src_type.length = dst_type.length;
+ }
+
+ /* Special case for half-floats */
+ if (mem_type.width == 16 && mem_type.floating) {
+ assert(blend_type.width == 32 && blend_type.floating);
+ lp_build_conv_auto(gallivm, src_type, &dst_type, dst, num_srcs, dst);
+ is_arith = false;
+ }
+
+ if (!is_arith) {
+ return;
+ }
+
+ src_type.width = blend_type.width * blend_type.length;
+ blend_type.length *= pixels;
+ src_type.length *= pixels / (src_type.length / mem_type.length);
+
+ for (i = 0; i < num_srcs; ++i) {
+ LLVMValueRef chans[4];
+ LLVMValueRef res = NULL;
+
+ dst[i] = LLVMBuildZExt(builder, src[i], lp_build_vec_type(gallivm, src_type), "");
+
+ for (j = 0; j < src_fmt->nr_channels; ++j) {
+ unsigned mask = 0;
+ unsigned sa = src_fmt->channel[j].shift;
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ unsigned from_lsb = j;
+#else
+ unsigned from_lsb = src_fmt->nr_channels - j - 1;
+#endif
+
+ for (k = 0; k < src_fmt->channel[j].size; ++k) {
+ mask |= 1 << k;
+ }
+
+ /* Extract bits from source */
+ chans[j] = LLVMBuildLShr(builder,
+ dst[i],
+ lp_build_const_int_vec(gallivm, src_type, sa),
+ "");
+
+ chans[j] = LLVMBuildAnd(builder,
+ chans[j],
+ lp_build_const_int_vec(gallivm, src_type, mask),
+ "");
+
+ /* Scale bits */
+ if (src_type.norm) {
+ chans[j] = scale_bits(gallivm, src_fmt->channel[j].size,
+ blend_type.width, chans[j], src_type);
+ }
+
+ /* Insert bits into correct position */
+ chans[j] = LLVMBuildShl(builder,
+ chans[j],
+ lp_build_const_int_vec(gallivm, src_type, from_lsb * blend_type.width),
+ "");
+
+ if (j == 0) {
+ res = chans[j];
+ } else {
+ res = LLVMBuildOr(builder, res, chans[j], "");
+ }
+ }
+
+ dst[i] = LLVMBuildBitCast(builder, res, lp_build_vec_type(gallivm, blend_type), "");
+ }
+}
+
+
+/**
+ * Convert from blending format to memory format
+ *
+ * e.g. GL_R3G3B2 is 3 bytes for blending but 1 byte in memory
+ */
+static void
+convert_from_blend_type(struct gallivm_state *gallivm,
+ unsigned block_size,
+ const struct util_format_description *src_fmt,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef* src, // and dst
+ unsigned num_srcs)
+{
+ LLVMValueRef* dst = src;
+ unsigned i, j, k;
+ struct lp_type mem_type;
+ struct lp_type blend_type;
+ LLVMBuilderRef builder = gallivm->builder;
+ unsigned pixels = block_size / num_srcs;
+ bool is_arith;
+
+ /*
+ * full custom path for packed floats and srgb formats - none of the later
+ * functions would do anything useful, and given the lp_type representation they
+ * can't be fixed. Should really have some SoA blend path for these kind of
+ * formats rather than hacking them in here.
+ */
+ if (format_expands_to_float_soa(src_fmt)) {
+ /*
+ * This is pretty suboptimal for this case blending in SoA would be much
+ * better - we need to transpose the AoS values back to SoA values for
+ * conversion/packing.
+ */
+ assert(src_type.floating);
+ assert(src_type.width == 32);
+ assert(src_type.length % 4 == 0);
+ assert(dst_type.width == 32 || dst_type.width == 16);
+
+ for (i = 0; i < num_srcs / 4; i++) {
+ LLVMValueRef tmpsoa[4], tmpdst;
+ lp_build_transpose_aos(gallivm, src_type, &src[i * 4], tmpsoa);
+ /* really really need SoA here */
+
+ if (src_fmt->format == PIPE_FORMAT_R11G11B10_FLOAT) {
+ tmpdst = lp_build_float_to_r11g11b10(gallivm, tmpsoa);
+ }
+ else {
+ tmpdst = lp_build_float_to_srgb_packed(gallivm, src_fmt,
+ src_type, tmpsoa);
+ }
+
+ if (src_type.length == 8) {
+ LLVMValueRef tmpaos, shuffles[8];
+ unsigned j;
+ /*
+ * for 8-wide aos transpose has given us wrong order not matching
+ * output order. HMPF. Also need to split the output values manually.
+ */
+ for (j = 0; j < 4; j++) {
+ shuffles[j * 2] = lp_build_const_int32(gallivm, j);
+ shuffles[j * 2 + 1] = lp_build_const_int32(gallivm, j + 4);
+ }
+ tmpaos = LLVMBuildShuffleVector(builder, tmpdst, tmpdst,
+ LLVMConstVector(shuffles, 8), "");
+ src[i * 2] = lp_build_extract_range(gallivm, tmpaos, 0, 4);
+ src[i * 2 + 1] = lp_build_extract_range(gallivm, tmpaos, 4, 4);
+ }
+ else {
+ src[i] = tmpdst;
+ }
+ }
+ if (dst_type.width == 16) {
+ struct lp_type type16x8 = dst_type;
+ struct lp_type type32x4 = dst_type;
+ LLVMTypeRef ltype16x4, ltypei64, ltypei128;
+ unsigned num_fetch = src_type.length == 8 ? num_srcs / 2 : num_srcs / 4;
+ type16x8.length = 8;
+ type32x4.width = 32;
+ ltypei128 = LLVMIntTypeInContext(gallivm->context, 128);
+ ltypei64 = LLVMIntTypeInContext(gallivm->context, 64);
+ ltype16x4 = lp_build_vec_type(gallivm, dst_type);
+ /* We could do vector truncation but it doesn't generate very good code */
+ for (i = 0; i < num_fetch; i++) {
+ src[i] = lp_build_pack2(gallivm, type32x4, type16x8,
+ src[i], lp_build_zero(gallivm, type32x4));
+ src[i] = LLVMBuildBitCast(builder, src[i], ltypei128, "");
+ src[i] = LLVMBuildTrunc(builder, src[i], ltypei64, "");
+ src[i] = LLVMBuildBitCast(builder, src[i], ltype16x4, "");
+ }
+ }
+ return;
+ }
+
+ lp_mem_type_from_format_desc(src_fmt, &mem_type);
+ lp_blend_type_from_format_desc(src_fmt, &blend_type);
+
+ is_arith = (blend_type.length * blend_type.width != mem_type.width * mem_type.length);
+
+ /* Special case for half-floats */
+ if (mem_type.width == 16 && mem_type.floating) {
+ int length = dst_type.length;
+ assert(blend_type.width == 32 && blend_type.floating);
+
+ dst_type.length = src_type.length;
+
+ lp_build_conv_auto(gallivm, src_type, &dst_type, dst, num_srcs, dst);
+
+ dst_type.length = length;
+ is_arith = false;
+ }
+
+ /* Remove any padding */
+ if (!is_arith && (src_type.length % mem_type.length)) {
+ src_type.length -= (src_type.length % mem_type.length);
+
+ for (i = 0; i < num_srcs; ++i) {
+ dst[i] = lp_build_extract_range(gallivm, dst[i], 0, src_type.length);
+ }
+ }
+
+ /* No bit arithmetic to do */
+ if (!is_arith) {
+ return;
+ }
+
+ src_type.length = pixels;
+ src_type.width = blend_type.length * blend_type.width;
+ dst_type.length = pixels;
+
+ for (i = 0; i < num_srcs; ++i) {
+ LLVMValueRef chans[4];
+ LLVMValueRef res = NULL;
+
+ dst[i] = LLVMBuildBitCast(builder, src[i], lp_build_vec_type(gallivm, src_type), "");
+
+ for (j = 0; j < src_fmt->nr_channels; ++j) {
+ unsigned mask = 0;
+ unsigned sa = src_fmt->channel[j].shift;
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ unsigned from_lsb = j;
+#else
+ unsigned from_lsb = src_fmt->nr_channels - j - 1;
+#endif
+
+ assert(blend_type.width > src_fmt->channel[j].size);
+
+ for (k = 0; k < blend_type.width; ++k) {
+ mask |= 1 << k;
+ }
+
+ /* Extract bits */
+ chans[j] = LLVMBuildLShr(builder,
+ dst[i],
+ lp_build_const_int_vec(gallivm, src_type, from_lsb * blend_type.width),
+ "");
+
+ chans[j] = LLVMBuildAnd(builder,
+ chans[j],
+ lp_build_const_int_vec(gallivm, src_type, mask),
+ "");
+
+ /* Scale down bits */
+ if (src_type.norm) {
+ chans[j] = scale_bits(gallivm, blend_type.width,
+ src_fmt->channel[j].size, chans[j], src_type);
+ }
+
+ /* Insert bits */
+ chans[j] = LLVMBuildShl(builder,
+ chans[j],
+ lp_build_const_int_vec(gallivm, src_type, sa),
+ "");
+
+ sa += src_fmt->channel[j].size;
+
+ if (j == 0) {
+ res = chans[j];
+ } else {
+ res = LLVMBuildOr(builder, res, chans[j], "");
+ }
+ }
+
+ assert (dst_type.width != 24);
+
+ dst[i] = LLVMBuildTrunc(builder, res, lp_build_vec_type(gallivm, dst_type), "");
+ }
+}
+
+
+/**
+ * Convert alpha to same blend type as src
+ */
+static void
+convert_alpha(struct gallivm_state *gallivm,
+ struct lp_type row_type,
+ struct lp_type alpha_type,
+ const unsigned block_size,
+ const unsigned block_height,
+ const unsigned src_count,
+ const unsigned dst_channels,
+ const bool pad_inline,
+ LLVMValueRef* src_alpha)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ unsigned i, j;
+ unsigned length = row_type.length;
+ row_type.length = alpha_type.length;
+
+ /* Twiddle the alpha to match pixels */
+ lp_bld_quad_twiddle(gallivm, alpha_type, src_alpha, block_height, src_alpha);
+
+ /*
+ * TODO this should use single lp_build_conv call for
+ * src_count == 1 && dst_channels == 1 case (dropping the concat below)
+ */
+ for (i = 0; i < block_height; ++i) {
+ lp_build_conv(gallivm, alpha_type, row_type, &src_alpha[i], 1, &src_alpha[i], 1);
+ }
+
+ alpha_type = row_type;
+ row_type.length = length;
+
+ /* If only one channel we can only need the single alpha value per pixel */
+ if (src_count == 1 && dst_channels == 1) {
+
+ lp_build_concat_n(gallivm, alpha_type, src_alpha, block_height, src_alpha, src_count);
+ } else {
+ /* If there are more srcs than rows then we need to split alpha up */
+ if (src_count > block_height) {
+ for (i = src_count; i > 0; --i) {
+ unsigned pixels = block_size / src_count;
+ unsigned idx = i - 1;
+
+ src_alpha[idx] = lp_build_extract_range(gallivm, src_alpha[(idx * pixels) / 4],
+ (idx * pixels) % 4, pixels);
+ }
+ }
+
+ /* If there is a src for each pixel broadcast the alpha across whole row */
+ if (src_count == block_size) {
+ for (i = 0; i < src_count; ++i) {
+ src_alpha[i] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, row_type), src_alpha[i]);
+ }
+ } else {
+ unsigned pixels = block_size / src_count;
+ unsigned channels = pad_inline ? TGSI_NUM_CHANNELS : dst_channels;
+ unsigned alpha_span = 1;
+ LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+
+ /* Check if we need 2 src_alphas for our shuffles */
+ if (pixels > alpha_type.length) {
+ alpha_span = 2;
+ }
+
+ /* Broadcast alpha across all channels, e.g. a1a2 to a1a1a1a1a2a2a2a2 */
+ for (j = 0; j < row_type.length; ++j) {
+ if (j < pixels * channels) {
+ shuffles[j] = lp_build_const_int32(gallivm, j / channels);
+ } else {
+ shuffles[j] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
+ }
+ }
+
+ for (i = 0; i < src_count; ++i) {
+ unsigned idx1 = i, idx2 = i;
+
+ if (alpha_span > 1){
+ idx1 *= alpha_span;
+ idx2 = idx1 + 1;
+ }
+
+ src_alpha[i] = LLVMBuildShuffleVector(builder,
+ src_alpha[idx1],
+ src_alpha[idx2],
+ LLVMConstVector(shuffles, row_type.length),
+ "");
+ }
+ }
+ }
+}
+
+
+/**
+ * Generates the blend function for unswizzled colour buffers
+ * Also generates the read & write from colour buffer
+ */
+static void
+generate_unswizzled_blend(struct gallivm_state *gallivm,
+ unsigned rt,
+ struct lp_fragment_shader_variant *variant,
+ enum pipe_format out_format,
+ unsigned int num_fs,
+ struct lp_type fs_type,
+ LLVMValueRef* fs_mask,
+ LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][4],
+ LLVMValueRef context_ptr,
+ LLVMValueRef color_ptr,
+ LLVMValueRef stride,
+ unsigned partial_mask,
+ boolean do_branch)
+{
+ const unsigned alpha_channel = 3;
+ const unsigned block_width = LP_RASTER_BLOCK_SIZE;
+ const unsigned block_height = LP_RASTER_BLOCK_SIZE;
+ const unsigned block_size = block_width * block_height;
+ const unsigned lp_integer_vector_width = 128;
+
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef fs_src[4][TGSI_NUM_CHANNELS];
+ LLVMValueRef fs_src1[4][TGSI_NUM_CHANNELS];
+ LLVMValueRef src_alpha[4 * 4];
+ LLVMValueRef src1_alpha[4 * 4];
+ LLVMValueRef src_mask[4 * 4];
+ LLVMValueRef src[4 * 4];
+ LLVMValueRef src1[4 * 4];
+ LLVMValueRef dst[4 * 4];
+ LLVMValueRef blend_color;
+ LLVMValueRef blend_alpha;
+ LLVMValueRef i32_zero;
+ LLVMValueRef check_mask;
+ LLVMValueRef undef_src_val;
+
+ struct lp_build_mask_context mask_ctx;
+ struct lp_type mask_type;
+ struct lp_type blend_type;
+ struct lp_type row_type;
+ struct lp_type dst_type;
+
+ unsigned char swizzle[TGSI_NUM_CHANNELS];
+ unsigned vector_width;
+ unsigned src_channels = TGSI_NUM_CHANNELS;
+ unsigned dst_channels;
+ unsigned dst_count;
+ unsigned src_count;
+ unsigned i, j;
+
+ const struct util_format_description* out_format_desc = util_format_description(out_format);
+
+ unsigned dst_alignment;
+
+ bool pad_inline = is_arithmetic_format(out_format_desc);
+ bool has_alpha = false;
+ const boolean dual_source_blend = variant->key.blend.rt[0].blend_enable &&
+ util_blend_state_is_dual(&variant->key.blend, 0);
+
+ const boolean is_1d = variant->key.resource_1d;
+ unsigned num_fullblock_fs = is_1d ? 2 * num_fs : num_fs;
+ LLVMValueRef fpstate = 0;
+
+ /* Get type from output format */
+ lp_blend_type_from_format_desc(out_format_desc, &row_type);
+ lp_mem_type_from_format_desc(out_format_desc, &dst_type);
+
+ /*
+ * Technically this code should go into lp_build_smallfloat_to_float
+ * and lp_build_float_to_smallfloat but due to the
+ * http://llvm.org/bugs/show_bug.cgi?id=6393
+ * llvm reorders the mxcsr intrinsics in a way that breaks the code.
+ * So the ordering is important here and there shouldn't be any
+ * llvm ir instrunctions in this function before
+ * this, otherwise half-float format conversions won't work
+ * (again due to llvm bug #6393).
+ */
+ if (have_smallfloat_format(dst_type, out_format)) {
+ /* We need to make sure that denorms are ok for half float
+ conversions */
+ fpstate = lp_build_fpstate_get(gallivm);
+ lp_build_fpstate_set_denorms_zero(gallivm, FALSE);
+ }
+
+ mask_type = lp_int32_vec4_type();
+ mask_type.length = fs_type.length;
+
+ for (i = num_fs; i < num_fullblock_fs; i++) {
+ fs_mask[i] = lp_build_zero(gallivm, mask_type);
+ }
+
+ /* Do not bother executing code when mask is empty.. */
+ if (do_branch) {
+ check_mask = LLVMConstNull(lp_build_int_vec_type(gallivm, mask_type));
+
+ for (i = 0; i < num_fullblock_fs; ++i) {
+ check_mask = LLVMBuildOr(builder, check_mask, fs_mask[i], "");
+ }
+
+ lp_build_mask_begin(&mask_ctx, gallivm, mask_type, check_mask);
+ lp_build_mask_check(&mask_ctx);
+ }
+
+ partial_mask |= !variant->opaque;
+ i32_zero = lp_build_const_int32(gallivm, 0);
+
+ undef_src_val = lp_build_undef(gallivm, fs_type);
+
+ row_type.length = fs_type.length;
+ vector_width = dst_type.floating ? lp_native_vector_width : lp_integer_vector_width;
+
+ /* Compute correct swizzle and count channels */
+ memset(swizzle, LP_BLD_SWIZZLE_DONTCARE, TGSI_NUM_CHANNELS);
+ dst_channels = 0;
+
+ for (i = 0; i < TGSI_NUM_CHANNELS; ++i) {
+ /* Ensure channel is used */
+ if (out_format_desc->swizzle[i] >= TGSI_NUM_CHANNELS) {
+ continue;
+ }
+
+ /* Ensure not already written to (happens in case with GL_ALPHA) */
+ if (swizzle[out_format_desc->swizzle[i]] < TGSI_NUM_CHANNELS) {
+ continue;
+ }
+
+ /* Ensure we havn't already found all channels */
+ if (dst_channels >= out_format_desc->nr_channels) {
+ continue;
+ }
+
+ swizzle[out_format_desc->swizzle[i]] = i;
+ ++dst_channels;
+
+ if (i == alpha_channel) {
+ has_alpha = true;
+ }
+ }
+
+ if (format_expands_to_float_soa(out_format_desc)) {
+ /*
+ * the code above can't work for layout_other
+ * for srgb it would sort of work but we short-circuit swizzles, etc.
+ * as that is done as part of unpack / pack.
+ */
+ dst_channels = 4; /* HACK: this is fake 4 really but need it due to transpose stuff later */
+ has_alpha = true;
+ swizzle[0] = 0;
+ swizzle[1] = 1;
+ swizzle[2] = 2;
+ swizzle[3] = 3;
+ pad_inline = true; /* HACK: prevent rgbxrgbx->rgbrgbxx conversion later */
+ }
+
+ /* If 3 channels then pad to include alpha for 4 element transpose */
+ if (dst_channels == 3 && !has_alpha) {
+ for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
+ if (swizzle[i] > TGSI_NUM_CHANNELS)
+ swizzle[i] = 3;
+ }
+ if (out_format_desc->nr_channels == 4) {
+ dst_channels = 4;
+ }
+ }
+
+ /*
+ * Load shader output
+ */
+ for (i = 0; i < num_fullblock_fs; ++i) {
+ /* Always load alpha for use in blending */
+ LLVMValueRef alpha;
+ if (i < num_fs) {
+ alpha = LLVMBuildLoad(builder, fs_out_color[rt][alpha_channel][i], "");
+ }
+ else {
+ alpha = undef_src_val;
+ }
+
+ /* Load each channel */
+ for (j = 0; j < dst_channels; ++j) {
+ assert(swizzle[j] < 4);
+ if (i < num_fs) {
+ fs_src[i][j] = LLVMBuildLoad(builder, fs_out_color[rt][swizzle[j]][i], "");
+ }
+ else {
+ fs_src[i][j] = undef_src_val;
+ }
+ }
+
+ /* If 3 channels then pad to include alpha for 4 element transpose */
+ /*
+ * XXX If we include that here maybe could actually use it instead of
+ * separate alpha for blending?
+ */
+ if (dst_channels == 3 && !has_alpha) {
+ fs_src[i][3] = alpha;
+ }
+
+ /* We split the row_mask and row_alpha as we want 128bit interleave */
+ if (fs_type.length == 8) {
+ src_mask[i*2 + 0] = lp_build_extract_range(gallivm, fs_mask[i], 0, src_channels);
+ src_mask[i*2 + 1] = lp_build_extract_range(gallivm, fs_mask[i], src_channels, src_channels);
+
+ src_alpha[i*2 + 0] = lp_build_extract_range(gallivm, alpha, 0, src_channels);
+ src_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha, src_channels, src_channels);
+ } else {
+ src_mask[i] = fs_mask[i];
+ src_alpha[i] = alpha;
+ }
+ }
+ if (dual_source_blend) {
+ /* same as above except different src/dst, skip masks and comments... */
+ for (i = 0; i < num_fullblock_fs; ++i) {
+ LLVMValueRef alpha;
+ if (i < num_fs) {
+ alpha = LLVMBuildLoad(builder, fs_out_color[1][alpha_channel][i], "");
+ }
+ else {
+ alpha = undef_src_val;
+ }
+
+ for (j = 0; j < dst_channels; ++j) {
+ assert(swizzle[j] < 4);
+ if (i < num_fs) {
+ fs_src1[i][j] = LLVMBuildLoad(builder, fs_out_color[1][swizzle[j]][i], "");
+ }
+ else {
+ fs_src1[i][j] = undef_src_val;
+ }
+ }
+ if (dst_channels == 3 && !has_alpha) {
+ fs_src1[i][3] = alpha;
+ }
+ if (fs_type.length == 8) {
+ src1_alpha[i*2 + 0] = lp_build_extract_range(gallivm, alpha, 0, src_channels);
+ src1_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha, src_channels, src_channels);
+ } else {
+ src1_alpha[i] = alpha;
+ }
+ }
+ }
+
+ if (util_format_is_pure_integer(out_format)) {
+ /*
+ * In this case fs_type was really ints or uints disguised as floats,
+ * fix that up now.
+ */
+ fs_type.floating = 0;
+ fs_type.sign = dst_type.sign;
+ for (i = 0; i < num_fullblock_fs; ++i) {
+ for (j = 0; j < dst_channels; ++j) {
+ fs_src[i][j] = LLVMBuildBitCast(builder, fs_src[i][j],
+ lp_build_vec_type(gallivm, fs_type), "");
+ }
+ if (dst_channels == 3 && !has_alpha) {
+ fs_src[i][3] = LLVMBuildBitCast(builder, fs_src[i][3],
+ lp_build_vec_type(gallivm, fs_type), "");
+ }
+ }
+ }
+
+ /*
+ * Pixel twiddle from fragment shader order to memory order
+ */
+ src_count = generate_fs_twiddle(gallivm, fs_type, num_fullblock_fs,
+ dst_channels, fs_src, src, pad_inline);
+ if (dual_source_blend) {
+ generate_fs_twiddle(gallivm, fs_type, num_fullblock_fs, dst_channels,
+ fs_src1, src1, pad_inline);
+ }
+
+ src_channels = dst_channels < 3 ? dst_channels : 4;
+ if (src_count != num_fullblock_fs * src_channels) {
+ unsigned ds = src_count / (num_fullblock_fs * src_channels);
+ row_type.length /= ds;
+ fs_type.length = row_type.length;
+ }
+
+ blend_type = row_type;
+ mask_type.length = 4;
+
+ /* Convert src to row_type */
+ if (dual_source_blend) {
+ struct lp_type old_row_type = row_type;
+ lp_build_conv_auto(gallivm, fs_type, &row_type, src, src_count, src);
+ src_count = lp_build_conv_auto(gallivm, fs_type, &old_row_type, src1, src_count, src1);
+ }
+ else {
+ src_count = lp_build_conv_auto(gallivm, fs_type, &row_type, src, src_count, src);
+ }
+
+ /* If the rows are not an SSE vector, combine them to become SSE size! */
+ if ((row_type.width * row_type.length) % 128) {
+ unsigned bits = row_type.width * row_type.length;
+ unsigned combined;
+
+ assert(src_count >= (vector_width / bits));
+
+ dst_count = src_count / (vector_width / bits);
+
+ combined = lp_build_concat_n(gallivm, row_type, src, src_count, src, dst_count);
+ if (dual_source_blend) {
+ lp_build_concat_n(gallivm, row_type, src1, src_count, src1, dst_count);
+ }
+
+ row_type.length *= combined;
+ src_count /= combined;
+
+ bits = row_type.width * row_type.length;
+ assert(bits == 128 || bits == 256);
+ }
+
+
+ /*
+ * Blend Colour conversion
+ */
+ blend_color = lp_jit_context_f_blend_color(gallivm, context_ptr);
+ blend_color = LLVMBuildPointerCast(builder, blend_color, LLVMPointerType(lp_build_vec_type(gallivm, fs_type), 0), "");
+ blend_color = LLVMBuildLoad(builder, LLVMBuildGEP(builder, blend_color, &i32_zero, 1, ""), "");
+
+ /* Convert */
+ lp_build_conv(gallivm, fs_type, blend_type, &blend_color, 1, &blend_color, 1);
+
+ if (out_format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+ /*
+ * since blending is done with floats, there was no conversion.
+ * However, the rules according to fixed point renderbuffers still
+ * apply, that is we must clamp inputs to 0.0/1.0.
+ * (This would apply to separate alpha conversion too but we currently
+ * force has_alpha to be true.)
+ * TODO: should skip this with "fake" blend, since post-blend conversion
+ * will clamp anyway.
+ * TODO: could also skip this if fragment color clamping is enabled. We
+ * don't support it natively so it gets baked into the shader however, so
+ * can't really tell here.
+ */
+ struct lp_build_context f32_bld;
+ assert(row_type.floating);
+ lp_build_context_init(&f32_bld, gallivm, row_type);
+ for (i = 0; i < src_count; i++) {
+ src[i] = lp_build_clamp_zero_one_nanzero(&f32_bld, src[i]);
+ }
+ if (dual_source_blend) {
+ for (i = 0; i < src_count; i++) {
+ src1[i] = lp_build_clamp_zero_one_nanzero(&f32_bld, src1[i]);
+ }
+ }
+ /* probably can't be different than row_type but better safe than sorry... */
+ lp_build_context_init(&f32_bld, gallivm, blend_type);
+ blend_color = lp_build_clamp(&f32_bld, blend_color, f32_bld.zero, f32_bld.one);
+ }
+
+ /* Extract alpha */
+ blend_alpha = lp_build_extract_broadcast(gallivm, blend_type, row_type, blend_color, lp_build_const_int32(gallivm, 3));
+
+ /* Swizzle to appropriate channels, e.g. from RGBA to BGRA BGRA */
+ pad_inline &= (dst_channels * (block_size / src_count) * row_type.width) != vector_width;
+ if (pad_inline) {
+ /* Use all 4 channels e.g. from RGBA RGBA to RGxx RGxx */
+ blend_color = lp_build_swizzle_aos_n(gallivm, blend_color, swizzle, TGSI_NUM_CHANNELS, row_type.length);
+ } else {
+ /* Only use dst_channels e.g. RGBA RGBA to RG RG xxxx */
+ blend_color = lp_build_swizzle_aos_n(gallivm, blend_color, swizzle, dst_channels, row_type.length);
+ }
+
+ /*
+ * Mask conversion
+ */
+ lp_bld_quad_twiddle(gallivm, mask_type, &src_mask[0], block_height, &src_mask[0]);
+
+ if (src_count < block_height) {
+ lp_build_concat_n(gallivm, mask_type, src_mask, 4, src_mask, src_count);
+ } else if (src_count > block_height) {
+ for (i = src_count; i > 0; --i) {
+ unsigned pixels = block_size / src_count;
+ unsigned idx = i - 1;
+
+ src_mask[idx] = lp_build_extract_range(gallivm, src_mask[(idx * pixels) / 4],
+ (idx * pixels) % 4, pixels);
+ }
+ }
+
+ assert(mask_type.width == 32);
+
+ for (i = 0; i < src_count; ++i) {
+ unsigned pixels = block_size / src_count;
+ unsigned pixel_width = row_type.width * dst_channels;
+
+ if (pixel_width == 24) {
+ mask_type.width = 8;
+ mask_type.length = vector_width / mask_type.width;
+ } else {
+ mask_type.length = pixels;
+ mask_type.width = row_type.width * dst_channels;
+
+ src_mask[i] = LLVMBuildIntCast(builder, src_mask[i], lp_build_int_vec_type(gallivm, mask_type), "");
+
+ mask_type.length *= dst_channels;
+ mask_type.width /= dst_channels;
+ }
+
+ src_mask[i] = LLVMBuildBitCast(builder, src_mask[i], lp_build_int_vec_type(gallivm, mask_type), "");
+ src_mask[i] = lp_build_pad_vector(gallivm, src_mask[i], row_type.length);
+ }
+
+ /*
+ * Alpha conversion
+ */
+ if (!has_alpha) {
+ struct lp_type alpha_type = fs_type;
+ alpha_type.length = 4;
+ convert_alpha(gallivm, row_type, alpha_type,
+ block_size, block_height,
+ src_count, dst_channels,
+ pad_inline, src_alpha);
+ if (dual_source_blend) {
+ convert_alpha(gallivm, row_type, alpha_type,
+ block_size, block_height,
+ src_count, dst_channels,
+ pad_inline, src1_alpha);
+ }
+ }
+
+
+ /*
+ * Load dst from memory
+ */
+ if (src_count < block_height) {
+ dst_count = block_height;
+ } else {
+ dst_count = src_count;
+ }
+
+ dst_type.length *= block_size / dst_count;
+
+ if (format_expands_to_float_soa(out_format_desc)) {
+ /*
+ * we need multiple values at once for the conversion, so can as well
+ * load them vectorized here too instead of concatenating later.
+ * (Still need concatenation later for 8-wide vectors).
+ */
+ dst_count = block_height;
+ dst_type.length = block_width;
+ }
+
+ /*
+ * Compute the alignment of the destination pointer in bytes
+ * We fetch 1-4 pixels, if the format has pot alignment then those fetches
+ * are always aligned by MIN2(16, fetch_width) except for buffers (not
+ * 1d tex but can't distinguish here) so need to stick with per-pixel
+ * alignment in this case.
+ */
+ if (is_1d) {
+ dst_alignment = (out_format_desc->block.bits + 7)/(out_format_desc->block.width * 8);
+ }
+ else {
+ dst_alignment = dst_type.length * dst_type.width / 8;
+ }
+ /* Force power-of-two alignment by extracting only the least-significant-bit */
+ dst_alignment = 1 << (ffs(dst_alignment) - 1);
+ /*
+ * Resource base and stride pointers are aligned to 16 bytes, so that's
+ * the maximum alignment we can guarantee
+ */
+ dst_alignment = MIN2(16, dst_alignment);
+
+ if (is_1d) {
+ load_unswizzled_block(gallivm, color_ptr, stride, block_width, 1,
+ dst, dst_type, dst_count / 4, dst_alignment);
+ for (i = dst_count / 4; i < dst_count; i++) {
+ dst[i] = lp_build_undef(gallivm, dst_type);
+ }
+
+ }
+ else {
+ load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height,
+ dst, dst_type, dst_count, dst_alignment);
+ }
+
+
+ /*
+ * Convert from dst/output format to src/blending format.
+ *
+ * This is necessary as we can only read 1 row from memory at a time,
+ * so the minimum dst_count will ever be at this point is 4.
+ *
+ * With, for example, R8 format you can have all 16 pixels in a 128 bit vector,
+ * this will take the 4 dsts and combine them into 1 src so we can perform blending
+ * on all 16 pixels in that single vector at once.
+ */
+ if (dst_count > src_count) {
+ lp_build_concat_n(gallivm, dst_type, dst, 4, dst, src_count);
+ }
+
+ /*
+ * Blending
+ */
+ /* XXX this is broken for RGB8 formats -
+ * they get expanded from 12 to 16 elements (to include alpha)
+ * by convert_to_blend_type then reduced to 15 instead of 12
+ * by convert_from_blend_type (a simple fix though breaks A8...).
+ * R16G16B16 also crashes differently however something going wrong
+ * inside llvm handling npot vector sizes seemingly.
+ * It seems some cleanup could be done here (like skipping conversion/blend
+ * when not needed).
+ */
+ convert_to_blend_type(gallivm, block_size, out_format_desc, dst_type, row_type, dst, src_count);
+
+ /*
+ * FIXME: Really should get logic ops / masks out of generic blend / row
+ * format. Logic ops will definitely not work on the blend float format
+ * used for SRGB here and I think OpenGL expects this to work as expected
+ * (that is incoming values converted to srgb then logic op applied).
+ */
+ for (i = 0; i < src_count; ++i) {
+ dst[i] = lp_build_blend_aos(gallivm,
+ &variant->key.blend,
+ out_format,
+ row_type,
+ rt,
+ src[i],
+ has_alpha ? NULL : src_alpha[i],
+ src1[i],
+ has_alpha ? NULL : src1_alpha[i],
+ dst[i],
+ partial_mask ? src_mask[i] : NULL,
+ blend_color,
+ has_alpha ? NULL : blend_alpha,
+ swizzle,
+ pad_inline ? 4 : dst_channels);
+ }
+
+ convert_from_blend_type(gallivm, block_size, out_format_desc, row_type, dst_type, dst, src_count);
+
+ /* Split the blend rows back to memory rows */
+ if (dst_count > src_count) {
+ row_type.length = dst_type.length * (dst_count / src_count);
+
+ if (src_count == 1) {
+ dst[1] = lp_build_extract_range(gallivm, dst[0], row_type.length / 2, row_type.length / 2);
+ dst[0] = lp_build_extract_range(gallivm, dst[0], 0, row_type.length / 2);
+
+ row_type.length /= 2;
+ src_count *= 2;
+ }
+
+ dst[3] = lp_build_extract_range(gallivm, dst[1], row_type.length / 2, row_type.length / 2);
+ dst[2] = lp_build_extract_range(gallivm, dst[1], 0, row_type.length / 2);
+ dst[1] = lp_build_extract_range(gallivm, dst[0], row_type.length / 2, row_type.length / 2);
+ dst[0] = lp_build_extract_range(gallivm, dst[0], 0, row_type.length / 2);
+
+ row_type.length /= 2;
+ src_count *= 2;
+ }
+
+ /*
+ * Store blend result to memory
+ */
+ if (is_1d) {
+ store_unswizzled_block(gallivm, color_ptr, stride, block_width, 1,
+ dst, dst_type, dst_count / 4, dst_alignment);
+ }
+ else {
+ store_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height,
+ dst, dst_type, dst_count, dst_alignment);
+ }
+
+ if (have_smallfloat_format(dst_type, out_format)) {
+ lp_build_fpstate_set(gallivm, fpstate);
+ }
+
+ if (do_branch) {
+ lp_build_mask_end(&mask_ctx);
+ }
+}
+
+
+/**
+ * Generate the runtime callable function for the whole fragment pipeline.
+ * Note that the function which we generate operates on a block of 16
+ * pixels at at time. The block contains 2x2 quads. Each quad contains
+ * 2x2 pixels.
+ */
+static void
+generate_fragment(struct llvmpipe_context *lp,
+ struct lp_fragment_shader *shader,
+ struct lp_fragment_shader_variant *variant,
+ unsigned partial_mask)
+{
+ struct gallivm_state *gallivm = variant->gallivm;
+ const struct lp_fragment_shader_variant_key *key = &variant->key;
+ struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
+ char func_name[64];
+ struct lp_type fs_type;
+ struct lp_type blend_type;
+ LLVMTypeRef fs_elem_type;
+ LLVMTypeRef blend_vec_type;
+ LLVMTypeRef arg_types[13];
+ LLVMTypeRef func_type;
+ LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
+ LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
+ LLVMValueRef context_ptr;
+ LLVMValueRef x;
+ LLVMValueRef y;
+ LLVMValueRef a0_ptr;
+ LLVMValueRef dadx_ptr;
+ LLVMValueRef dady_ptr;
+ LLVMValueRef color_ptr_ptr;
+ LLVMValueRef stride_ptr;
+ LLVMValueRef depth_ptr;
+ LLVMValueRef depth_stride;
+ LLVMValueRef mask_input;
+ LLVMValueRef thread_data_ptr;
+ LLVMBasicBlockRef block;
+ LLVMBuilderRef builder;
+ struct lp_build_sampler_soa *sampler;
+ struct lp_build_interp_soa_context interp;
+ LLVMValueRef fs_mask[16 / 4];
+ LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4];
+ LLVMValueRef function;
+ LLVMValueRef facing;
+ unsigned num_fs;
+ unsigned i;
+ unsigned chan;
+ unsigned cbuf;
+ boolean cbuf0_write_all;
+ const boolean dual_source_blend = key->blend.rt[0].blend_enable &&
+ util_blend_state_is_dual(&key->blend, 0);
+
+ assert(lp_native_vector_width / 32 >= 4);
+
+ /* Adjust color input interpolation according to flatshade state:
+ */
+ memcpy(inputs, shader->inputs, shader->info.base.num_inputs * sizeof inputs[0]);
+ for (i = 0; i < shader->info.base.num_inputs; i++) {
+ if (inputs[i].interp == LP_INTERP_COLOR) {
+ if (key->flatshade)
+ inputs[i].interp = LP_INTERP_CONSTANT;
+ else
+ inputs[i].interp = LP_INTERP_PERSPECTIVE;
+ }
+ }
+
+ /* check if writes to cbuf[0] are to be copied to all cbufs */
+ cbuf0_write_all =
+ shader->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS];
+
+ /* TODO: actually pick these based on the fs and color buffer
+ * characteristics. */
+
+ memset(&fs_type, 0, sizeof fs_type);
+ fs_type.floating = TRUE; /* floating point values */
+ fs_type.sign = TRUE; /* values are signed */
+ fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */
+ fs_type.width = 32; /* 32-bit float */
+ fs_type.length = MIN2(lp_native_vector_width / 32, 16); /* n*4 elements per vector */
+
+ memset(&blend_type, 0, sizeof blend_type);
+ blend_type.floating = FALSE; /* values are integers */
+ blend_type.sign = FALSE; /* values are unsigned */
+ blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */
+ blend_type.width = 8; /* 8-bit ubyte values */
+ blend_type.length = 16; /* 16 elements per vector */
+
+ /*
+ * Generate the function prototype. Any change here must be reflected in
+ * lp_jit.h's lp_jit_frag_func function pointer type, and vice-versa.
+ */
+
+ fs_elem_type = lp_build_elem_type(gallivm, fs_type);
+
+ blend_vec_type = lp_build_vec_type(gallivm, blend_type);
+
+ util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s",
+ shader->no, variant->no, partial_mask ? "partial" : "whole");
+
+ arg_types[0] = variant->jit_context_ptr_type; /* context */
+ arg_types[1] = int32_type; /* x */
+ arg_types[2] = int32_type; /* y */
+ arg_types[3] = int32_type; /* facing */
+ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* a0 */
+ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dadx */
+ arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */
+ arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */
+ arg_types[8] = LLVMPointerType(int8_type, 0); /* depth */
+ arg_types[9] = int32_type; /* mask_input */
+ arg_types[10] = variant->jit_thread_data_ptr_type; /* per thread data */
+ arg_types[11] = LLVMPointerType(int32_type, 0); /* stride */
+ arg_types[12] = int32_type; /* depth_stride */
+
+ func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
+ arg_types, Elements(arg_types), 0);
+
+ function = LLVMAddFunction(gallivm->module, func_name, func_type);
+ LLVMSetFunctionCallConv(function, LLVMCCallConv);
+
+ variant->function[partial_mask] = function;
+
+ /* XXX: need to propagate noalias down into color param now we are
+ * passing a pointer-to-pointer?
+ */
+ for(i = 0; i < Elements(arg_types); ++i)
+ if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
+ LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute);
+
+ context_ptr = LLVMGetParam(function, 0);
+ x = LLVMGetParam(function, 1);
+ y = LLVMGetParam(function, 2);
+ facing = LLVMGetParam(function, 3);
+ a0_ptr = LLVMGetParam(function, 4);
+ dadx_ptr = LLVMGetParam(function, 5);
+ dady_ptr = LLVMGetParam(function, 6);
+ color_ptr_ptr = LLVMGetParam(function, 7);
+ depth_ptr = LLVMGetParam(function, 8);
+ mask_input = LLVMGetParam(function, 9);
+ thread_data_ptr = LLVMGetParam(function, 10);
+ stride_ptr = LLVMGetParam(function, 11);
+ depth_stride = LLVMGetParam(function, 12);
+
+ lp_build_name(context_ptr, "context");
+ lp_build_name(x, "x");
+ lp_build_name(y, "y");
+ lp_build_name(a0_ptr, "a0");
+ lp_build_name(dadx_ptr, "dadx");
+ lp_build_name(dady_ptr, "dady");
+ lp_build_name(color_ptr_ptr, "color_ptr_ptr");
+ lp_build_name(depth_ptr, "depth");
+ lp_build_name(thread_data_ptr, "thread_data");
+ lp_build_name(mask_input, "mask_input");
+ lp_build_name(stride_ptr, "stride_ptr");
+ lp_build_name(depth_stride, "depth_stride");
+
+ /*
+ * Function body
+ */
+
+ block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
+ builder = gallivm->builder;
+ assert(builder);
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ /* code generated texture sampling */
+ sampler = lp_llvm_sampler_soa_create(key->state);
+
+ num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */
+ /* for 1d resources only run "upper half" of stamp */
+ if (key->resource_1d)
+ num_fs /= 2;
+
+ {
+ LLVMValueRef num_loop = lp_build_const_int32(gallivm, num_fs);
+ LLVMTypeRef mask_type = lp_build_int_vec_type(gallivm, fs_type);
+ LLVMValueRef mask_store = lp_build_array_alloca(gallivm, mask_type,
+ num_loop, "mask_store");
+ LLVMValueRef color_store[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS];
+ boolean pixel_center_integer =
+ shader->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER];
+
+ /*
+ * The shader input interpolation info is not explicitely baked in the
+ * shader key, but everything it derives from (TGSI, and flatshade) is
+ * already included in the shader key.
+ */
+ lp_build_interp_soa_init(&interp,
+ gallivm,
+ shader->info.base.num_inputs,
+ inputs,
+ pixel_center_integer,
+ builder, fs_type,
+ a0_ptr, dadx_ptr, dady_ptr,
+ x, y);
+
+ for (i = 0; i < num_fs; i++) {
+ LLVMValueRef mask;
+ LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
+ LLVMValueRef mask_ptr = LLVMBuildGEP(builder, mask_store,
+ &indexi, 1, "mask_ptr");
+
+ if (partial_mask) {
+ mask = generate_quad_mask(gallivm, fs_type,
+ i*fs_type.length/4, mask_input);
+ }
+ else {
+ mask = lp_build_const_int_vec(gallivm, fs_type, ~0);
+ }
+ LLVMBuildStore(builder, mask, mask_ptr);
+ }
+
+ generate_fs_loop(gallivm,
+ shader, key,
+ builder,
+ fs_type,
+ context_ptr,
+ num_loop,
+ &interp,
+ sampler,
+ mask_store, /* output */
+ color_store,
+ depth_ptr,
+ depth_stride,
+ facing,
+ thread_data_ptr);
+
+ for (i = 0; i < num_fs; i++) {
+ LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
+ LLVMValueRef ptr = LLVMBuildGEP(builder, mask_store,
+ &indexi, 1, "");
+ fs_mask[i] = LLVMBuildLoad(builder, ptr, "mask");
+ /* This is fucked up need to reorganize things */
+ for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+ ptr = LLVMBuildGEP(builder,
+ color_store[cbuf * !cbuf0_write_all][chan],
+ &indexi, 1, "");
+ fs_out_color[cbuf][chan][i] = ptr;
+ }
+ }
+ if (dual_source_blend) {
+ /* only support one dual source blend target hence always use output 1 */
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+ ptr = LLVMBuildGEP(builder,
+ color_store[1][chan],
+ &indexi, 1, "");
+ fs_out_color[1][chan][i] = ptr;
+ }
+ }
+ }
+ }
+
+ sampler->destroy(sampler);
+
+ /* Loop over color outputs / color buffers to do blending.
+ */
+ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
+ if (key->cbuf_format[cbuf] != PIPE_FORMAT_NONE) {
+ LLVMValueRef color_ptr;
+ LLVMValueRef stride;
+ LLVMValueRef index = lp_build_const_int32(gallivm, cbuf);
+
+ boolean do_branch = ((key->depth.enabled
+ || key->stencil[0].enabled
+ || key->alpha.enabled)
+ && !shader->info.base.uses_kill);
+
+ color_ptr = LLVMBuildLoad(builder,
+ LLVMBuildGEP(builder, color_ptr_ptr,
+ &index, 1, ""),
+ "");
+
+ lp_build_name(color_ptr, "color_ptr%d", cbuf);
+
+ stride = LLVMBuildLoad(builder,
+ LLVMBuildGEP(builder, stride_ptr, &index, 1, ""),
+ "");
+
+ generate_unswizzled_blend(gallivm, cbuf, variant,
+ key->cbuf_format[cbuf],
+ num_fs, fs_type, fs_mask, fs_out_color,
+ context_ptr, color_ptr, stride,
+ partial_mask, do_branch);
+ }
+ }
+
+ LLVMBuildRetVoid(builder);
+
+ gallivm_verify_function(gallivm, function);
+}
+
+
+static void
+dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
+{
+ unsigned i;
+
+ debug_printf("fs variant %p:\n", (void *) key);
+
+ if (key->flatshade) {
+ debug_printf("flatshade = 1\n");
+ }
+ for (i = 0; i < key->nr_cbufs; ++i) {
+ debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i]));
+ }
+ if (key->depth.enabled) {
+ debug_printf("depth.format = %s\n", util_format_name(key->zsbuf_format));
+ debug_printf("depth.func = %s\n", util_dump_func(key->depth.func, TRUE));
+ debug_printf("depth.writemask = %u\n", key->depth.writemask);
+ }
+
+ for (i = 0; i < 2; ++i) {
+ if (key->stencil[i].enabled) {
+ debug_printf("stencil[%u].func = %s\n", i, util_dump_func(key->stencil[i].func, TRUE));
+ debug_printf("stencil[%u].fail_op = %s\n", i, util_dump_stencil_op(key->stencil[i].fail_op, TRUE));
+ debug_printf("stencil[%u].zpass_op = %s\n", i, util_dump_stencil_op(key->stencil[i].zpass_op, TRUE));
+ debug_printf("stencil[%u].zfail_op = %s\n", i, util_dump_stencil_op(key->stencil[i].zfail_op, TRUE));
+ debug_printf("stencil[%u].valuemask = 0x%x\n", i, key->stencil[i].valuemask);
+ debug_printf("stencil[%u].writemask = 0x%x\n", i, key->stencil[i].writemask);
+ }
+ }
+
+ if (key->alpha.enabled) {
+ debug_printf("alpha.func = %s\n", util_dump_func(key->alpha.func, TRUE));
+ }
+
+ if (key->occlusion_count) {
+ debug_printf("occlusion_count = 1\n");
+ }
+
+ if (key->blend.logicop_enable) {
+ debug_printf("blend.logicop_func = %s\n", util_dump_logicop(key->blend.logicop_func, TRUE));
+ }
+ else if (key->blend.rt[0].blend_enable) {
+ debug_printf("blend.rgb_func = %s\n", util_dump_blend_func (key->blend.rt[0].rgb_func, TRUE));
+ debug_printf("blend.rgb_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].rgb_src_factor, TRUE));
+ debug_printf("blend.rgb_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].rgb_dst_factor, TRUE));
+ debug_printf("blend.alpha_func = %s\n", util_dump_blend_func (key->blend.rt[0].alpha_func, TRUE));
+ debug_printf("blend.alpha_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_src_factor, TRUE));
+ debug_printf("blend.alpha_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE));
+ }
+ debug_printf("blend.colormask = 0x%x\n", key->blend.rt[0].colormask);
+ if (key->blend.alpha_to_coverage) {
+ debug_printf("blend.alpha_to_coverage is enabled\n");
+ }
+ for (i = 0; i < key->nr_samplers; ++i) {
+ const struct lp_static_sampler_state *sampler = &key->state[i].sampler_state;
+ debug_printf("sampler[%u] = \n", i);
+ debug_printf(" .wrap = %s %s %s\n",
+ util_dump_tex_wrap(sampler->wrap_s, TRUE),
+ util_dump_tex_wrap(sampler->wrap_t, TRUE),
+ util_dump_tex_wrap(sampler->wrap_r, TRUE));
+ debug_printf(" .min_img_filter = %s\n",
+ util_dump_tex_filter(sampler->min_img_filter, TRUE));
+ debug_printf(" .min_mip_filter = %s\n",
+ util_dump_tex_mipfilter(sampler->min_mip_filter, TRUE));
+ debug_printf(" .mag_img_filter = %s\n",
+ util_dump_tex_filter(sampler->mag_img_filter, TRUE));
+ if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE)
+ debug_printf(" .compare_func = %s\n", util_dump_func(sampler->compare_func, TRUE));
+ debug_printf(" .normalized_coords = %u\n", sampler->normalized_coords);
+ debug_printf(" .min_max_lod_equal = %u\n", sampler->min_max_lod_equal);
+ debug_printf(" .lod_bias_non_zero = %u\n", sampler->lod_bias_non_zero);
+ debug_printf(" .apply_min_lod = %u\n", sampler->apply_min_lod);
+ debug_printf(" .apply_max_lod = %u\n", sampler->apply_max_lod);
+ }
+ for (i = 0; i < key->nr_sampler_views; ++i) {
+ const struct lp_static_texture_state *texture = &key->state[i].texture_state;
+ debug_printf("texture[%u] = \n", i);
+ debug_printf(" .format = %s\n",
+ util_format_name(texture->format));
+ debug_printf(" .target = %s\n",
+ util_dump_tex_target(texture->target, TRUE));
+ debug_printf(" .level_zero_only = %u\n",
+ texture->level_zero_only);
+ debug_printf(" .pot = %u %u %u\n",
+ texture->pot_width,
+ texture->pot_height,
+ texture->pot_depth);
+ }
+}
+
+
+void
+lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant)
+{
+ debug_printf("llvmpipe: Fragment shader #%u variant #%u:\n",
+ variant->shader->no, variant->no);
+ tgsi_dump(variant->shader->base.tokens, 0);
+ dump_fs_variant_key(&variant->key);
+ debug_printf("variant->opaque = %u\n", variant->opaque);
+ debug_printf("\n");
+}
+
+
+/**
+ * Generate a new fragment shader variant from the shader code and
+ * other state indicated by the key.
+ */
+static struct lp_fragment_shader_variant *
+generate_variant(struct llvmpipe_context *lp,
+ struct lp_fragment_shader *shader,
+ const struct lp_fragment_shader_variant_key *key)
+{
+ struct lp_fragment_shader_variant *variant;
+ const struct util_format_description *cbuf0_format_desc;
+ boolean fullcolormask;
+ char module_name[64];
+
+ variant = CALLOC_STRUCT(lp_fragment_shader_variant);
+ if(!variant)
+ return NULL;
+
+ util_snprintf(module_name, sizeof(module_name), "fs%u_variant%u",
+ shader->no, shader->variants_created);
+
+ variant->gallivm = gallivm_create(module_name, lp->context);
+ if (!variant->gallivm) {
+ FREE(variant);
+ return NULL;
+ }
+
+ variant->shader = shader;
+ variant->list_item_global.base = variant;
+ variant->list_item_local.base = variant;
+ variant->no = shader->variants_created++;
+
+ memcpy(&variant->key, key, shader->variant_key_size);
+
+ /*
+ * Determine whether we are touching all channels in the color buffer.
+ */
+ fullcolormask = FALSE;
+ if (key->nr_cbufs == 1) {
+ cbuf0_format_desc = util_format_description(key->cbuf_format[0]);
+ fullcolormask = util_format_colormask_full(cbuf0_format_desc, key->blend.rt[0].colormask);
+ }
+
+ variant->opaque =
+ !key->blend.logicop_enable &&
+ !key->blend.rt[0].blend_enable &&
+ fullcolormask &&
+ !key->stencil[0].enabled &&
+ !key->alpha.enabled &&
+ !key->blend.alpha_to_coverage &&
+ !key->depth.enabled &&
+ !shader->info.base.uses_kill
+ ? TRUE : FALSE;
+
+ if ((shader->info.base.num_tokens <= 1) &&
+ !key->depth.enabled && !key->stencil[0].enabled) {
+ variant->ps_inv_multiplier = 0;
+ } else {
+ variant->ps_inv_multiplier = 1;
+ }
+
+ if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
+ lp_debug_fs_variant(variant);
+ }
+
+ lp_jit_init_types(variant);
+
+ if (variant->jit_function[RAST_EDGE_TEST] == NULL)
+ generate_fragment(lp, shader, variant, RAST_EDGE_TEST);
+
+ if (variant->jit_function[RAST_WHOLE] == NULL) {
+ if (variant->opaque) {
+ /* Specialized shader, which doesn't need to read the color buffer. */
+ generate_fragment(lp, shader, variant, RAST_WHOLE);
+ }
+ }
+
+ /*
+ * Compile everything
+ */
+
+ gallivm_compile_module(variant->gallivm);
+
+ variant->nr_instrs += lp_build_count_ir_module(variant->gallivm->module);
+
+ if (variant->function[RAST_EDGE_TEST]) {
+ variant->jit_function[RAST_EDGE_TEST] = (lp_jit_frag_func)
+ gallivm_jit_function(variant->gallivm,
+ variant->function[RAST_EDGE_TEST]);
+ }
+
+ if (variant->function[RAST_WHOLE]) {
+ variant->jit_function[RAST_WHOLE] = (lp_jit_frag_func)
+ gallivm_jit_function(variant->gallivm,
+ variant->function[RAST_WHOLE]);
+ } else if (!variant->jit_function[RAST_WHOLE]) {
+ variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST];
+ }
+
+ gallivm_free_ir(variant->gallivm);
+
+ return variant;
+}
+
+
+static void *
+llvmpipe_create_fs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ struct lp_fragment_shader *shader;
+ int nr_samplers;
+ int nr_sampler_views;
+ int i;
+
+ shader = CALLOC_STRUCT(lp_fragment_shader);
+ if (!shader)
+ return NULL;
+
+ shader->no = fs_no++;
+ make_empty_list(&shader->variants);
+
+ /* get/save the summary info for this shader */
+ lp_build_tgsi_info(templ->tokens, &shader->info);
+
+ /* we need to keep a local copy of the tokens */
+ shader->base.tokens = tgsi_dup_tokens(templ->tokens);
+
+ shader->draw_data = draw_create_fragment_shader(llvmpipe->draw, templ);
+ if (shader->draw_data == NULL) {
+ FREE((void *) shader->base.tokens);
+ FREE(shader);
+ return NULL;
+ }
+
+ nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
+ nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
+
+ shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key,
+ state[MAX2(nr_samplers, nr_sampler_views)]);
+
+ for (i = 0; i < shader->info.base.num_inputs; i++) {
+ shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i];
+ shader->inputs[i].cyl_wrap = shader->info.base.input_cylindrical_wrap[i];
+
+ switch (shader->info.base.input_interpolate[i]) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ shader->inputs[i].interp = LP_INTERP_CONSTANT;
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ shader->inputs[i].interp = LP_INTERP_LINEAR;
+ break;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ shader->inputs[i].interp = LP_INTERP_PERSPECTIVE;
+ break;
+ case TGSI_INTERPOLATE_COLOR:
+ shader->inputs[i].interp = LP_INTERP_COLOR;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (shader->info.base.input_semantic_name[i]) {
+ case TGSI_SEMANTIC_FACE:
+ shader->inputs[i].interp = LP_INTERP_FACING;
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ /* Position was already emitted above
+ */
+ shader->inputs[i].interp = LP_INTERP_POSITION;
+ shader->inputs[i].src_index = 0;
+ continue;
+ }
+
+ shader->inputs[i].src_index = i+1;
+ }
+
+ if (LP_DEBUG & DEBUG_TGSI) {
+ unsigned attrib;
+ debug_printf("llvmpipe: Create fragment shader #%u %p:\n",
+ shader->no, (void *) shader);
+ tgsi_dump(templ->tokens, 0);
+ debug_printf("usage masks:\n");
+ for (attrib = 0; attrib < shader->info.base.num_inputs; ++attrib) {
+ unsigned usage_mask = shader->info.base.input_usage_mask[attrib];
+ debug_printf(" IN[%u].%s%s%s%s\n",
+ attrib,
+ usage_mask & TGSI_WRITEMASK_X ? "x" : "",
+ usage_mask & TGSI_WRITEMASK_Y ? "y" : "",
+ usage_mask & TGSI_WRITEMASK_Z ? "z" : "",
+ usage_mask & TGSI_WRITEMASK_W ? "w" : "");
+ }
+ debug_printf("\n");
+ }
+
+ return shader;
+}
+
+
+static void
+llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ if (llvmpipe->fs == fs)
+ return;
+
+ llvmpipe->fs = (struct lp_fragment_shader *) fs;
+
+ draw_bind_fragment_shader(llvmpipe->draw,
+ (llvmpipe->fs ? llvmpipe->fs->draw_data : NULL));
+
+ llvmpipe->dirty |= LP_NEW_FS;
+}
+
+
+/**
+ * Remove shader variant from two lists: the shader's variant list
+ * and the context's variant list.
+ */
+void
+llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
+ struct lp_fragment_shader_variant *variant)
+{
+ if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ debug_printf("llvmpipe: del fs #%u var #%u v created #%u v cached"
+ " #%u v total cached #%u\n",
+ variant->shader->no,
+ variant->no,
+ variant->shader->variants_created,
+ variant->shader->variants_cached,
+ lp->nr_fs_variants);
+ }
+
+ gallivm_destroy(variant->gallivm);
+
+ /* remove from shader's list */
+ remove_from_list(&variant->list_item_local);
+ variant->shader->variants_cached--;
+
+ /* remove from context's list */
+ remove_from_list(&variant->list_item_global);
+ lp->nr_fs_variants--;
+ lp->nr_fs_instrs -= variant->nr_instrs;
+
+ FREE(variant);
+}
+
+
+static void
+llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ struct lp_fragment_shader *shader = fs;
+ struct lp_fs_variant_list_item *li;
+
+ assert(fs != llvmpipe->fs);
+
+ /*
+ * XXX: we need to flush the context until we have some sort of reference
+ * counting in fragment shaders as they may still be binned
+ * Flushing alone might not sufficient we need to wait on it too.
+ */
+ llvmpipe_finish(pipe, __FUNCTION__);
+
+ /* Delete all the variants */
+ li = first_elem(&shader->variants);
+ while(!at_end(&shader->variants, li)) {
+ struct lp_fs_variant_list_item *next = next_elem(li);
+ llvmpipe_remove_shader_variant(llvmpipe, li->base);
+ li = next;
+ }
+
+ /* Delete draw module's data */
+ draw_delete_fragment_shader(llvmpipe->draw, shader->draw_data);
+
+ assert(shader->variants_cached == 0);
+ FREE((void *) shader->base.tokens);
+ FREE(shader);
+}
+
+
+
+static void
+llvmpipe_set_constant_buffer(struct pipe_context *pipe,
+ uint shader, uint index,
+ struct pipe_constant_buffer *cb)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ struct pipe_resource *constants = cb ? cb->buffer : NULL;
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(index < Elements(llvmpipe->constants[shader]));
+
+ /* note: reference counting */
+ util_copy_constant_buffer(&llvmpipe->constants[shader][index], cb);
+
+ if (shader == PIPE_SHADER_VERTEX ||
+ shader == PIPE_SHADER_GEOMETRY) {
+ /* Pass the constants to the 'draw' module */
+ const unsigned size = cb ? cb->buffer_size : 0;
+ const ubyte *data;
+
+ if (constants) {
+ data = (ubyte *) llvmpipe_resource_data(constants);
+ }
+ else if (cb && cb->user_buffer) {
+ data = (ubyte *) cb->user_buffer;
+ }
+ else {
+ data = NULL;
+ }
+
+ if (data)
+ data += cb->buffer_offset;
+
+ draw_set_mapped_constant_buffer(llvmpipe->draw, shader,
+ index, data, size);
+ }
+
+ llvmpipe->dirty |= LP_NEW_CONSTANTS;
+
+ if (cb && cb->user_buffer) {
+ pipe_resource_reference(&constants, NULL);
+ }
+}
+
+
+/**
+ * Return the blend factor equivalent to a destination alpha of one.
+ */
+static inline unsigned
+force_dst_alpha_one(unsigned factor, boolean clamped_zero)
+{
+ switch(factor) {
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return PIPE_BLENDFACTOR_ONE;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return PIPE_BLENDFACTOR_ZERO;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ if (clamped_zero)
+ return PIPE_BLENDFACTOR_ZERO;
+ else
+ return PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE;
+ }
+
+ return factor;
+}
+
+
+/**
+ * We need to generate several variants of the fragment pipeline to match
+ * all the combinations of the contributing state atoms.
+ *
+ * TODO: there is actually no reason to tie this to context state -- the
+ * generated code could be cached globally in the screen.
+ */
+static void
+make_variant_key(struct llvmpipe_context *lp,
+ struct lp_fragment_shader *shader,
+ struct lp_fragment_shader_variant_key *key)
+{
+ unsigned i;
+
+ memset(key, 0, shader->variant_key_size);
+
+ if (lp->framebuffer.zsbuf) {
+ enum pipe_format zsbuf_format = lp->framebuffer.zsbuf->format;
+ const struct util_format_description *zsbuf_desc =
+ util_format_description(zsbuf_format);
+
+ if (lp->depth_stencil->depth.enabled &&
+ util_format_has_depth(zsbuf_desc)) {
+ key->zsbuf_format = zsbuf_format;
+ memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth);
+ }
+ if (lp->depth_stencil->stencil[0].enabled &&
+ util_format_has_stencil(zsbuf_desc)) {
+ key->zsbuf_format = zsbuf_format;
+ memcpy(&key->stencil, &lp->depth_stencil->stencil, sizeof key->stencil);
+ }
+ if (llvmpipe_resource_is_1d(lp->framebuffer.zsbuf->texture)) {
+ key->resource_1d = TRUE;
+ }
+ }
+
+ /*
+ * Propagate the depth clamp setting from the rasterizer state.
+ * depth_clip == 0 implies depth clamping is enabled.
+ *
+ * When clip_halfz is enabled, then always clamp the depth values.
+ */
+ if (lp->rasterizer->clip_halfz) {
+ key->depth_clamp = 1;
+ } else {
+ key->depth_clamp = (lp->rasterizer->depth_clip == 0) ? 1 : 0;
+ }
+
+ /* alpha test only applies if render buffer 0 is non-integer (or does not exist) */
+ if (!lp->framebuffer.nr_cbufs ||
+ !lp->framebuffer.cbufs[0] ||
+ !util_format_is_pure_integer(lp->framebuffer.cbufs[0]->format)) {
+ key->alpha.enabled = lp->depth_stencil->alpha.enabled;
+ }
+ if(key->alpha.enabled)
+ key->alpha.func = lp->depth_stencil->alpha.func;
+ /* alpha.ref_value is passed in jit_context */
+
+ key->flatshade = lp->rasterizer->flatshade;
+ if (lp->active_occlusion_queries) {
+ key->occlusion_count = TRUE;
+ }
+
+ if (lp->framebuffer.nr_cbufs) {
+ memcpy(&key->blend, lp->blend, sizeof key->blend);
+ }
+
+ key->nr_cbufs = lp->framebuffer.nr_cbufs;
+
+ if (!key->blend.independent_blend_enable) {
+ /* we always need independent blend otherwise the fixups below won't work */
+ for (i = 1; i < key->nr_cbufs; i++) {
+ memcpy(&key->blend.rt[i], &key->blend.rt[0], sizeof(key->blend.rt[0]));
+ }
+ key->blend.independent_blend_enable = 1;
+ }
+
+ for (i = 0; i < lp->framebuffer.nr_cbufs; i++) {
+ struct pipe_rt_blend_state *blend_rt = &key->blend.rt[i];
+
+ if (lp->framebuffer.cbufs[i]) {
+ enum pipe_format format = lp->framebuffer.cbufs[i]->format;
+ const struct util_format_description *format_desc;
+
+ key->cbuf_format[i] = format;
+
+ /*
+ * Figure out if this is a 1d resource. Note that OpenGL allows crazy
+ * mixing of 2d textures with height 1 and 1d textures, so make sure
+ * we pick 1d if any cbuf or zsbuf is 1d.
+ */
+ if (llvmpipe_resource_is_1d(lp->framebuffer.cbufs[i]->texture)) {
+ key->resource_1d = TRUE;
+ }
+
+ format_desc = util_format_description(format);
+ assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
+ format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
+
+ /*
+ * Mask out color channels not present in the color buffer.
+ */
+ blend_rt->colormask &= util_format_colormask(format_desc);
+
+ /*
+ * Disable blend for integer formats.
+ */
+ if (util_format_is_pure_integer(format)) {
+ blend_rt->blend_enable = 0;
+ }
+
+ /*
+ * Our swizzled render tiles always have an alpha channel, but the
+ * linear render target format often does not, so force here the dst
+ * alpha to be one.
+ *
+ * This is not a mere optimization. Wrong results will be produced if
+ * the dst alpha is used, the dst format does not have alpha, and the
+ * previous rendering was not flushed from the swizzled to linear
+ * buffer. For example, NonPowTwo DCT.
+ *
+ * TODO: This should be generalized to all channels for better
+ * performance, but only alpha causes correctness issues.
+ *
+ * Also, force rgb/alpha func/factors match, to make AoS blending
+ * easier.
+ */
+ if (format_desc->swizzle[3] > UTIL_FORMAT_SWIZZLE_W ||
+ format_desc->swizzle[3] == format_desc->swizzle[0]) {
+ /* Doesn't cover mixed snorm/unorm but can't render to them anyway */
+ boolean clamped_zero = !util_format_is_float(format) &&
+ !util_format_is_snorm(format);
+ blend_rt->rgb_src_factor =
+ force_dst_alpha_one(blend_rt->rgb_src_factor, clamped_zero);
+ blend_rt->rgb_dst_factor =
+ force_dst_alpha_one(blend_rt->rgb_dst_factor, clamped_zero);
+ blend_rt->alpha_func = blend_rt->rgb_func;
+ blend_rt->alpha_src_factor = blend_rt->rgb_src_factor;
+ blend_rt->alpha_dst_factor = blend_rt->rgb_dst_factor;
+ }
+ }
+ else {
+ /* no color buffer for this fragment output */
+ key->cbuf_format[i] = PIPE_FORMAT_NONE;
+ blend_rt->colormask = 0x0;
+ blend_rt->blend_enable = 0;
+ }
+ }
+
+ /* This value will be the same for all the variants of a given shader:
+ */
+ key->nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1;
+
+ for(i = 0; i < key->nr_samplers; ++i) {
+ if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
+ lp_sampler_static_sampler_state(&key->state[i].sampler_state,
+ lp->samplers[PIPE_SHADER_FRAGMENT][i]);
+ }
+ }
+
+ /*
+ * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
+ * are dx10-style? Can't really have mixed opcodes, at least not
+ * if we want to skip the holes here (without rescanning tgsi).
+ */
+ if (shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
+ key->nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
+ for(i = 0; i < key->nr_sampler_views; ++i) {
+ if(shader->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) {
+ lp_sampler_static_texture_state(&key->state[i].texture_state,
+ lp->sampler_views[PIPE_SHADER_FRAGMENT][i]);
+ }
+ }
+ }
+ else {
+ key->nr_sampler_views = key->nr_samplers;
+ for(i = 0; i < key->nr_sampler_views; ++i) {
+ if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
+ lp_sampler_static_texture_state(&key->state[i].texture_state,
+ lp->sampler_views[PIPE_SHADER_FRAGMENT][i]);
+ }
+ }
+ }
+}
+
+
+
+/**
+ * Update fragment shader state. This is called just prior to drawing
+ * something when some fragment-related state has changed.
+ */
+void
+llvmpipe_update_fs(struct llvmpipe_context *lp)
+{
+ struct lp_fragment_shader *shader = lp->fs;
+ struct lp_fragment_shader_variant_key key;
+ struct lp_fragment_shader_variant *variant = NULL;
+ struct lp_fs_variant_list_item *li;
+
+ make_variant_key(lp, shader, &key);
+
+ /* Search the variants for one which matches the key */
+ li = first_elem(&shader->variants);
+ while(!at_end(&shader->variants, li)) {
+ if(memcmp(&li->base->key, &key, shader->variant_key_size) == 0) {
+ variant = li->base;
+ break;
+ }
+ li = next_elem(li);
+ }
+
+ if (variant) {
+ /* Move this variant to the head of the list to implement LRU
+ * deletion of shader's when we have too many.
+ */
+ move_to_head(&lp->fs_variants_list, &variant->list_item_global);
+ }
+ else {
+ /* variant not found, create it now */
+ int64_t t0, t1, dt;
+ unsigned i;
+ unsigned variants_to_cull;
+
+ if (0) {
+ debug_printf("%u variants,\t%u instrs,\t%u instrs/variant\n",
+ lp->nr_fs_variants,
+ lp->nr_fs_instrs,
+ lp->nr_fs_variants ? lp->nr_fs_instrs / lp->nr_fs_variants : 0);
+ }
+
+ /* First, check if we've exceeded the max number of shader variants.
+ * If so, free 25% of them (the least recently used ones).
+ */
+ variants_to_cull = lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS ? LP_MAX_SHADER_VARIANTS / 4 : 0;
+
+ if (variants_to_cull ||
+ lp->nr_fs_instrs >= LP_MAX_SHADER_INSTRUCTIONS) {
+ struct pipe_context *pipe = &lp->pipe;
+
+ /*
+ * XXX: we need to flush the context until we have some sort of
+ * reference counting in fragment shaders as they may still be binned
+ * Flushing alone might not be sufficient we need to wait on it too.
+ */
+ llvmpipe_finish(pipe, __FUNCTION__);
+
+ /*
+ * We need to re-check lp->nr_fs_variants because an arbitrarliy large
+ * number of shader variants (potentially all of them) could be
+ * pending for destruction on flush.
+ */
+
+ for (i = 0; i < variants_to_cull || lp->nr_fs_instrs >= LP_MAX_SHADER_INSTRUCTIONS; i++) {
+ struct lp_fs_variant_list_item *item;
+ if (is_empty_list(&lp->fs_variants_list)) {
+ break;
+ }
+ item = last_elem(&lp->fs_variants_list);
+ assert(item);
+ assert(item->base);
+ llvmpipe_remove_shader_variant(lp, item->base);
+ }
+ }
+
+ /*
+ * Generate the new variant.
+ */
+ t0 = os_time_get();
+ variant = generate_variant(lp, shader, &key);
+ t1 = os_time_get();
+ dt = t1 - t0;
+ LP_COUNT_ADD(llvm_compile_time, dt);
+ LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */
+
+ /* Put the new variant into the list */
+ if (variant) {
+ insert_at_head(&shader->variants, &variant->list_item_local);
+ insert_at_head(&lp->fs_variants_list, &variant->list_item_global);
+ lp->nr_fs_variants++;
+ lp->nr_fs_instrs += variant->nr_instrs;
+ shader->variants_cached++;
+ }
+ }
+
+ /* Bind this variant */
+ lp_setup_set_fs_variant(lp->setup, variant);
+}
+
+
+
+
+
+void
+llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe)
+{
+ llvmpipe->pipe.create_fs_state = llvmpipe_create_fs_state;
+ llvmpipe->pipe.bind_fs_state = llvmpipe_bind_fs_state;
+ llvmpipe->pipe.delete_fs_state = llvmpipe_delete_fs_state;
+
+ llvmpipe->pipe.set_constant_buffer = llvmpipe_set_constant_buffer;
+}
+
+/*
+ * Rasterization is disabled if there is no pixel shader and
+ * both depth and stencil testing are disabled:
+ * http://msdn.microsoft.com/en-us/library/windows/desktop/bb205125
+ */
+boolean
+llvmpipe_rasterization_disabled(struct llvmpipe_context *lp)
+{
+ boolean null_fs = !lp->fs || lp->fs->info.base.num_tokens <= 1;
+
+ return (null_fs &&
+ !lp->depth_stencil->depth.enabled &&
+ !lp->depth_stencil->stencil[0].enabled);
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.h
new file mode 100644
index 000000000..2ddd85188
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.h
@@ -0,0 +1,157 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_STATE_FS_H_
+#define LP_STATE_FS_H_
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h" /* for tgsi_shader_info */
+#include "gallivm/lp_bld_sample.h" /* for struct lp_sampler_static_state */
+#include "gallivm/lp_bld_tgsi.h" /* for lp_tgsi_info */
+#include "lp_bld_interp.h" /* for struct lp_shader_input */
+
+
+struct tgsi_token;
+struct lp_fragment_shader;
+
+
+/** Indexes into jit_function[] array */
+#define RAST_WHOLE 0
+#define RAST_EDGE_TEST 1
+
+
+struct lp_sampler_static_state
+{
+ /*
+ * These attributes are effectively interleaved for more sane key handling.
+ * However, there might be lots of null space if the amount of samplers and
+ * textures isn't the same.
+ */
+ struct lp_static_sampler_state sampler_state;
+ struct lp_static_texture_state texture_state;
+};
+
+
+struct lp_fragment_shader_variant_key
+{
+ struct pipe_depth_state depth;
+ struct pipe_stencil_state stencil[2];
+ struct pipe_blend_state blend;
+
+ struct {
+ unsigned enabled:1;
+ unsigned func:3;
+ } alpha;
+
+ unsigned nr_cbufs:8;
+ unsigned nr_samplers:8; /* actually derivable from just the shader */
+ unsigned nr_sampler_views:8; /* actually derivable from just the shader */
+ unsigned flatshade:1;
+ unsigned occlusion_count:1;
+ unsigned resource_1d:1;
+ unsigned depth_clamp:1;
+
+ enum pipe_format zsbuf_format;
+ enum pipe_format cbuf_format[PIPE_MAX_COLOR_BUFS];
+
+ struct lp_sampler_static_state state[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+};
+
+
+/** doubly-linked list item */
+struct lp_fs_variant_list_item
+{
+ struct lp_fragment_shader_variant *base;
+ struct lp_fs_variant_list_item *next, *prev;
+};
+
+
+struct lp_fragment_shader_variant
+{
+ struct lp_fragment_shader_variant_key key;
+
+ boolean opaque;
+ uint8_t ps_inv_multiplier;
+
+ struct gallivm_state *gallivm;
+
+ LLVMTypeRef jit_context_ptr_type;
+ LLVMTypeRef jit_thread_data_ptr_type;
+ LLVMTypeRef jit_linear_context_ptr_type;
+
+ LLVMValueRef function[2];
+
+ lp_jit_frag_func jit_function[2];
+
+ /* Total number of LLVM instructions generated */
+ unsigned nr_instrs;
+
+ struct lp_fs_variant_list_item list_item_global, list_item_local;
+ struct lp_fragment_shader *shader;
+
+ /* For debugging/profiling purposes */
+ unsigned no;
+};
+
+
+/** Subclass of pipe_shader_state */
+struct lp_fragment_shader
+{
+ struct pipe_shader_state base;
+
+ struct lp_tgsi_info info;
+
+ struct lp_fs_variant_list_item variants;
+
+ struct draw_fragment_shader *draw_data;
+
+ /* For debugging/profiling purposes */
+ unsigned variant_key_size;
+ unsigned no;
+ unsigned variants_created;
+ unsigned variants_cached;
+
+ /** Fragment shader input interpolation info */
+ struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
+};
+
+
+void
+lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant);
+
+void
+llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
+ struct lp_fragment_shader_variant *variant);
+
+boolean
+llvmpipe_rasterization_disabled(struct llvmpipe_context *lp);
+
+
+#endif /* LP_STATE_FS_H_ */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_gs.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_gs.c
new file mode 100644
index 000000000..7ea7a3906
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_gs.c
@@ -0,0 +1,116 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "lp_context.h"
+#include "lp_state.h"
+#include "lp_texture.h"
+#include "lp_debug.h"
+
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "draw/draw_context.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_parse.h"
+
+
+static void *
+llvmpipe_create_gs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ struct lp_geometry_shader *state;
+
+ state = CALLOC_STRUCT(lp_geometry_shader);
+ if (state == NULL )
+ goto no_state;
+
+ /* debug */
+ if (LP_DEBUG & DEBUG_TGSI) {
+ debug_printf("llvmpipe: Create geometry shader %p:\n", (void *)state);
+ tgsi_dump(templ->tokens, 0);
+ }
+
+ /* copy stream output info */
+ state->no_tokens = !templ->tokens;
+ memcpy(&state->stream_output, &templ->stream_output, sizeof state->stream_output);
+
+ if (templ->tokens) {
+ state->dgs = draw_create_geometry_shader(llvmpipe->draw, templ);
+ if (state->dgs == NULL) {
+ goto no_dgs;
+ }
+ }
+
+ return state;
+
+no_dgs:
+ FREE( state );
+no_state:
+ return NULL;
+}
+
+
+static void
+llvmpipe_bind_gs_state(struct pipe_context *pipe, void *gs)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ llvmpipe->gs = (struct lp_geometry_shader *)gs;
+
+ draw_bind_geometry_shader(llvmpipe->draw,
+ (llvmpipe->gs ? llvmpipe->gs->dgs : NULL));
+
+ llvmpipe->dirty |= LP_NEW_GS;
+}
+
+
+static void
+llvmpipe_delete_gs_state(struct pipe_context *pipe, void *gs)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ struct lp_geometry_shader *state =
+ (struct lp_geometry_shader *)gs;
+
+ if (!state) {
+ return;
+ }
+
+ draw_delete_geometry_shader(llvmpipe->draw, state->dgs);
+ FREE(state);
+}
+
+
+void
+llvmpipe_init_gs_funcs(struct llvmpipe_context *llvmpipe)
+{
+ llvmpipe->pipe.create_gs_state = llvmpipe_create_gs_state;
+ llvmpipe->pipe.bind_gs_state = llvmpipe_bind_gs_state;
+ llvmpipe->pipe.delete_gs_state = llvmpipe_delete_gs_state;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
new file mode 100644
index 000000000..94ebf8fff
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
@@ -0,0 +1,154 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "lp_context.h"
+#include "lp_state.h"
+#include "lp_setup.h"
+#include "draw/draw_context.h"
+
+struct lp_rast_state {
+ struct pipe_rasterizer_state lp_state;
+ struct pipe_rasterizer_state draw_state;
+};
+
+/* State which might be handled in either the draw module or locally.
+ * This function is used to turn that state off in one of the two
+ * places.
+ */
+static void
+clear_flags(struct pipe_rasterizer_state *rast)
+{
+ rast->light_twoside = 0;
+ rast->offset_tri = 0;
+ rast->offset_line = 0;
+ rast->offset_point = 0;
+ rast->offset_units = 0.0f;
+ rast->offset_scale = 0.0f;
+}
+
+
+
+static void *
+llvmpipe_create_rasterizer_state(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *rast)
+{
+ boolean need_pipeline;
+
+ /* Partition rasterizer state into what we want the draw module to
+ * handle, and what we'll look after ourselves.
+ */
+ struct lp_rast_state *state = MALLOC_STRUCT(lp_rast_state);
+ if (state == NULL)
+ return NULL;
+
+ memcpy(&state->draw_state, rast, sizeof *rast);
+ memcpy(&state->lp_state, rast, sizeof *rast);
+
+ /* We rely on draw module to do unfilled polyons, AA lines and
+ * points and stipple.
+ *
+ * Over time, reduce this list of conditions, and expand the list
+ * of flags which get cleared in clear_flags().
+ */
+ need_pipeline = (rast->fill_front != PIPE_POLYGON_MODE_FILL ||
+ rast->fill_back != PIPE_POLYGON_MODE_FILL ||
+ rast->point_smooth ||
+ rast->line_smooth ||
+ rast->line_stipple_enable ||
+ rast->poly_stipple_enable);
+
+ /* If not using the pipeline, clear out the flags which we can
+ * handle ourselves. If we *are* using the pipeline, do everything
+ * on the pipeline and clear those flags on our internal copy of
+ * the state.
+ */
+ if (need_pipeline)
+ clear_flags(&state->lp_state);
+ else
+ clear_flags(&state->draw_state);
+
+ return state;
+}
+
+
+
+static void
+llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ const struct lp_rast_state *state =
+ (const struct lp_rast_state *) handle;
+
+ if (state) {
+ llvmpipe->rasterizer = &state->lp_state;
+ draw_set_rasterizer_state(llvmpipe->draw, &state->draw_state, handle);
+
+ /* XXX: just pass lp_state directly to setup.
+ */
+ lp_setup_set_triangle_state( llvmpipe->setup,
+ state->lp_state.cull_face,
+ state->lp_state.front_ccw,
+ state->lp_state.scissor,
+ state->lp_state.half_pixel_center,
+ state->lp_state.bottom_edge_rule);
+ lp_setup_set_flatshade_first( llvmpipe->setup,
+ state->lp_state.flatshade_first);
+ lp_setup_set_line_state( llvmpipe->setup,
+ state->lp_state.line_width);
+ lp_setup_set_point_state( llvmpipe->setup,
+ state->lp_state.point_size,
+ state->lp_state.point_size_per_vertex,
+ state->lp_state.sprite_coord_enable,
+ state->lp_state.sprite_coord_mode);
+ }
+ else {
+ llvmpipe->rasterizer = NULL;
+ draw_set_rasterizer_state(llvmpipe->draw, NULL, handle);
+ }
+
+ llvmpipe->dirty |= LP_NEW_RASTERIZER;
+}
+
+
+static void
+llvmpipe_delete_rasterizer_state(struct pipe_context *pipe,
+ void *rasterizer)
+{
+ FREE( rasterizer );
+}
+
+
+
+void
+llvmpipe_init_rasterizer_funcs(struct llvmpipe_context *llvmpipe)
+{
+ llvmpipe->pipe.create_rasterizer_state = llvmpipe_create_rasterizer_state;
+ llvmpipe->pipe.bind_rasterizer_state = llvmpipe_bind_rasterizer_state;
+ llvmpipe->pipe.delete_rasterizer_state = llvmpipe_delete_rasterizer_state;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c
new file mode 100644
index 000000000..b205f02fd
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -0,0 +1,390 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors:
+ * Brian Paul
+ */
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+
+#include "draw/draw_context.h"
+
+#include "lp_context.h"
+#include "lp_screen.h"
+#include "lp_state.h"
+#include "lp_debug.h"
+#include "state_tracker/sw_winsys.h"
+
+
+static void *
+llvmpipe_create_sampler_state(struct pipe_context *pipe,
+ const struct pipe_sampler_state *sampler)
+{
+ struct pipe_sampler_state *state = mem_dup(sampler, sizeof *sampler);
+
+ if (LP_PERF & PERF_NO_MIP_LINEAR) {
+ if (state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
+ state->min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
+ }
+
+ if (LP_PERF & PERF_NO_MIPMAPS)
+ state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+
+ if (LP_PERF & PERF_NO_LINEAR) {
+ state->mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+ state->min_img_filter = PIPE_TEX_FILTER_NEAREST;
+ }
+
+ return state;
+}
+
+
+static void
+llvmpipe_bind_sampler_states(struct pipe_context *pipe,
+ unsigned shader,
+ unsigned start,
+ unsigned num,
+ void **samplers)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ unsigned i;
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(start + num <= Elements(llvmpipe->samplers[shader]));
+
+ draw_flush(llvmpipe->draw);
+
+ /* set the new samplers */
+ for (i = 0; i < num; i++) {
+ llvmpipe->samplers[shader][start + i] = samplers[i];
+ }
+
+ /* find highest non-null samplers[] entry */
+ {
+ unsigned j = MAX2(llvmpipe->num_samplers[shader], start + num);
+ while (j > 0 && llvmpipe->samplers[shader][j - 1] == NULL)
+ j--;
+ llvmpipe->num_samplers[shader] = j;
+ }
+
+ if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) {
+ draw_set_samplers(llvmpipe->draw,
+ shader,
+ llvmpipe->samplers[shader],
+ llvmpipe->num_samplers[shader]);
+ }
+
+ llvmpipe->dirty |= LP_NEW_SAMPLER;
+}
+
+
+static void
+llvmpipe_set_sampler_views(struct pipe_context *pipe,
+ unsigned shader,
+ unsigned start,
+ unsigned num,
+ struct pipe_sampler_view **views)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ uint i;
+
+ assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(start + num <= Elements(llvmpipe->sampler_views[shader]));
+
+ draw_flush(llvmpipe->draw);
+
+ /* set the new sampler views */
+ for (i = 0; i < num; i++) {
+ /* Note: we're using pipe_sampler_view_release() here to work around
+ * a possible crash when the old view belongs to another context that
+ * was already destroyed.
+ */
+ pipe_sampler_view_release(pipe,
+ &llvmpipe->sampler_views[shader][start + i]);
+ pipe_sampler_view_reference(&llvmpipe->sampler_views[shader][start + i],
+ views[i]);
+ }
+
+ /* find highest non-null sampler_views[] entry */
+ {
+ unsigned j = MAX2(llvmpipe->num_sampler_views[shader], start + num);
+ while (j > 0 && llvmpipe->sampler_views[shader][j - 1] == NULL)
+ j--;
+ llvmpipe->num_sampler_views[shader] = j;
+ }
+
+ if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) {
+ draw_set_sampler_views(llvmpipe->draw,
+ shader,
+ llvmpipe->sampler_views[shader],
+ llvmpipe->num_sampler_views[shader]);
+ }
+
+ llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW;
+}
+
+
+static struct pipe_sampler_view *
+llvmpipe_create_sampler_view(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *templ)
+{
+ struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
+ /*
+ * XXX we REALLY want to see the correct bind flag here but the OpenGL
+ * state tracker can't guarantee that at least for texture buffer objects.
+ */
+ if (!(texture->bind & PIPE_BIND_SAMPLER_VIEW))
+ debug_printf("Illegal sampler view creation without bind flag\n");
+
+ if (view) {
+ *view = *templ;
+ view->reference.count = 1;
+ view->texture = NULL;
+ pipe_resource_reference(&view->texture, texture);
+ view->context = pipe;
+
+#ifdef DEBUG
+ /*
+ * This is possibly too lenient, but the primary reason is just
+ * to catch state trackers which forget to initialize this, so
+ * it only catches clearly impossible view targets.
+ */
+ if (view->target != texture->target) {
+ if (view->target == PIPE_TEXTURE_1D)
+ assert(texture->target == PIPE_TEXTURE_1D_ARRAY);
+ else if (view->target == PIPE_TEXTURE_1D_ARRAY)
+ assert(texture->target == PIPE_TEXTURE_1D);
+ else if (view->target == PIPE_TEXTURE_2D)
+ assert(texture->target == PIPE_TEXTURE_2D_ARRAY ||
+ texture->target == PIPE_TEXTURE_CUBE ||
+ texture->target == PIPE_TEXTURE_CUBE_ARRAY);
+ else if (view->target == PIPE_TEXTURE_2D_ARRAY)
+ assert(texture->target == PIPE_TEXTURE_2D ||
+ texture->target == PIPE_TEXTURE_CUBE ||
+ texture->target == PIPE_TEXTURE_CUBE_ARRAY);
+ else if (view->target == PIPE_TEXTURE_CUBE)
+ assert(texture->target == PIPE_TEXTURE_CUBE_ARRAY ||
+ texture->target == PIPE_TEXTURE_2D_ARRAY);
+ else if (view->target == PIPE_TEXTURE_CUBE_ARRAY)
+ assert(texture->target == PIPE_TEXTURE_CUBE ||
+ texture->target == PIPE_TEXTURE_2D_ARRAY);
+ else
+ assert(0);
+ }
+#endif
+ }
+
+ return view;
+}
+
+
+static void
+llvmpipe_sampler_view_destroy(struct pipe_context *pipe,
+ struct pipe_sampler_view *view)
+{
+ pipe_resource_reference(&view->texture, NULL);
+ FREE(view);
+}
+
+
+static void
+llvmpipe_delete_sampler_state(struct pipe_context *pipe,
+ void *sampler)
+{
+ FREE( sampler );
+}
+
+
+static void
+prepare_shader_sampling(
+ struct llvmpipe_context *lp,
+ unsigned num,
+ struct pipe_sampler_view **views,
+ unsigned shader_type,
+ struct pipe_resource *mapped_tex[PIPE_MAX_SHADER_SAMPLER_VIEWS])
+{
+
+ unsigned i;
+ uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS];
+ uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS];
+ uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS];
+ const void *addr;
+
+ assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
+ if (!num)
+ return;
+
+ for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
+ struct pipe_sampler_view *view = i < num ? views[i] : NULL;
+
+ if (view) {
+ struct pipe_resource *tex = view->texture;
+ struct llvmpipe_resource *lp_tex = llvmpipe_resource(tex);
+ unsigned width0 = tex->width0;
+ unsigned num_layers = tex->depth0;
+ unsigned first_level = 0;
+ unsigned last_level = 0;
+
+ /* We're referencing the texture's internal data, so save a
+ * reference to it.
+ */
+ pipe_resource_reference(&mapped_tex[i], tex);
+
+ if (!lp_tex->dt) {
+ /* regular texture - setup array of mipmap level offsets */
+ struct pipe_resource *res = view->texture;
+ int j;
+
+ if (llvmpipe_resource_is_texture(res)) {
+ first_level = view->u.tex.first_level;
+ last_level = view->u.tex.last_level;
+ assert(first_level <= last_level);
+ assert(last_level <= res->last_level);
+ addr = lp_tex->tex_data;
+
+ for (j = first_level; j <= last_level; j++) {
+ mip_offsets[j] = lp_tex->mip_offsets[j];
+ row_stride[j] = lp_tex->row_stride[j];
+ img_stride[j] = lp_tex->img_stride[j];
+ }
+ if (view->target == PIPE_TEXTURE_1D_ARRAY ||
+ view->target == PIPE_TEXTURE_2D_ARRAY ||
+ view->target == PIPE_TEXTURE_CUBE ||
+ view->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ num_layers = view->u.tex.last_layer - view->u.tex.first_layer + 1;
+ for (j = first_level; j <= last_level; j++) {
+ mip_offsets[j] += view->u.tex.first_layer *
+ lp_tex->img_stride[j];
+ }
+ if (view->target == PIPE_TEXTURE_CUBE ||
+ view->target == PIPE_TEXTURE_CUBE_ARRAY) {
+ assert(num_layers % 6 == 0);
+ }
+ assert(view->u.tex.first_layer <= view->u.tex.last_layer);
+ assert(view->u.tex.last_layer < res->array_size);
+ }
+ }
+ else {
+ unsigned view_blocksize = util_format_get_blocksize(view->format);
+ addr = lp_tex->data;
+ /* probably don't really need to fill that out */
+ mip_offsets[0] = 0;
+ row_stride[0] = 0;
+ img_stride[0] = 0;
+
+ /* everything specified in number of elements here. */
+ width0 = view->u.buf.last_element - view->u.buf.first_element + 1;
+ addr = (uint8_t *)addr + view->u.buf.first_element *
+ view_blocksize;
+ assert(view->u.buf.first_element <= view->u.buf.last_element);
+ assert(view->u.buf.last_element * view_blocksize < res->width0);
+ }
+ }
+ else {
+ /* display target texture/surface */
+ /*
+ * XXX: Where should this be unmapped?
+ */
+ struct llvmpipe_screen *screen = llvmpipe_screen(tex->screen);
+ struct sw_winsys *winsys = screen->winsys;
+ addr = winsys->displaytarget_map(winsys, lp_tex->dt,
+ PIPE_TRANSFER_READ);
+ row_stride[0] = lp_tex->row_stride[0];
+ img_stride[0] = lp_tex->img_stride[0];
+ mip_offsets[0] = 0;
+ assert(addr);
+ }
+ draw_set_mapped_texture(lp->draw,
+ shader_type,
+ i,
+ width0, tex->height0, num_layers,
+ first_level, last_level,
+ addr,
+ row_stride, img_stride, mip_offsets);
+ }
+ }
+}
+
+
+/**
+ * Called during state validation when LP_NEW_SAMPLER_VIEW is set.
+ */
+void
+llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *lp,
+ unsigned num,
+ struct pipe_sampler_view **views)
+{
+ prepare_shader_sampling(lp, num, views, PIPE_SHADER_VERTEX,
+ lp->mapped_vs_tex);
+}
+
+void
+llvmpipe_cleanup_vertex_sampling(struct llvmpipe_context *ctx)
+{
+ unsigned i;
+ for (i = 0; i < Elements(ctx->mapped_vs_tex); i++) {
+ pipe_resource_reference(&ctx->mapped_vs_tex[i], NULL);
+ }
+}
+
+
+/**
+ * Called during state validation when LP_NEW_SAMPLER_VIEW is set.
+ */
+void
+llvmpipe_prepare_geometry_sampling(struct llvmpipe_context *lp,
+ unsigned num,
+ struct pipe_sampler_view **views)
+{
+ prepare_shader_sampling(lp, num, views, PIPE_SHADER_GEOMETRY,
+ lp->mapped_gs_tex);
+}
+
+void
+llvmpipe_cleanup_geometry_sampling(struct llvmpipe_context *ctx)
+{
+ unsigned i;
+ for (i = 0; i < Elements(ctx->mapped_gs_tex); i++) {
+ pipe_resource_reference(&ctx->mapped_gs_tex[i], NULL);
+ }
+}
+
+void
+llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe)
+{
+ llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state;
+
+ llvmpipe->pipe.bind_sampler_states = llvmpipe_bind_sampler_states;
+ llvmpipe->pipe.create_sampler_view = llvmpipe_create_sampler_view;
+ llvmpipe->pipe.set_sampler_views = llvmpipe_set_sampler_views;
+ llvmpipe->pipe.sampler_view_destroy = llvmpipe_sampler_view_destroy;
+ llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c
new file mode 100644
index 000000000..6397b5196
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -0,0 +1,1021 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/simple_list.h"
+#include "os/os_time.h"
+#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_bitarit.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_logic.h"
+#include "gallivm/lp_bld_intr.h"
+#include "gallivm/lp_bld_flow.h"
+#include "gallivm/lp_bld_type.h"
+
+#include "lp_perf.h"
+#include "lp_debug.h"
+#include "lp_flush.h"
+#include "lp_screen.h"
+#include "lp_context.h"
+#include "lp_state.h"
+#include "lp_state_fs.h"
+#include "lp_state_setup.h"
+
+
+/** Setup shader number (for debugging) */
+static unsigned setup_no = 0;
+
+
+/* currently organized to interpolate full float[4] attributes even
+ * when some elements are unused. Later, can pack vertex data more
+ * closely.
+ */
+
+
+struct lp_setup_args
+{
+ /* Function arguments:
+ */
+ LLVMValueRef v0;
+ LLVMValueRef v1;
+ LLVMValueRef v2;
+ LLVMValueRef facing; /* boolean */
+ LLVMValueRef a0;
+ LLVMValueRef dadx;
+ LLVMValueRef dady;
+
+ /* Derived:
+ */
+ LLVMValueRef x0_center;
+ LLVMValueRef y0_center;
+ LLVMValueRef dy20_ooa;
+ LLVMValueRef dy01_ooa;
+ LLVMValueRef dx20_ooa;
+ LLVMValueRef dx01_ooa;
+ struct lp_build_context bld;
+};
+
+
+static void
+store_coef(struct gallivm_state *gallivm,
+ struct lp_setup_args *args,
+ unsigned slot,
+ LLVMValueRef a0,
+ LLVMValueRef dadx,
+ LLVMValueRef dady)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef idx = lp_build_const_int32(gallivm, slot);
+
+ LLVMBuildStore(builder,
+ a0,
+ LLVMBuildGEP(builder, args->a0, &idx, 1, ""));
+
+ LLVMBuildStore(builder,
+ dadx,
+ LLVMBuildGEP(builder, args->dadx, &idx, 1, ""));
+
+ LLVMBuildStore(builder,
+ dady,
+ LLVMBuildGEP(builder, args->dady, &idx, 1, ""));
+}
+
+
+
+static void
+emit_constant_coef4(struct gallivm_state *gallivm,
+ struct lp_setup_args *args,
+ unsigned slot,
+ LLVMValueRef vert)
+{
+ store_coef(gallivm, args, slot, vert, args->bld.zero, args->bld.zero);
+}
+
+
+
+/**
+ * Setup the fragment input attribute with the front-facing value.
+ * \param frontface is the triangle front facing?
+ */
+static void
+emit_facing_coef(struct gallivm_state *gallivm,
+ struct lp_setup_args *args,
+ unsigned slot )
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
+ LLVMValueRef a0_0 = args->facing;
+ LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, float_type, "");
+ LLVMValueRef a0, face_val;
+ const unsigned char swizzles[4] = { PIPE_SWIZZLE_RED, PIPE_SWIZZLE_ZERO,
+ PIPE_SWIZZLE_ZERO, PIPE_SWIZZLE_ZERO };
+ /* Our face val is either 1 or 0 so we do
+ * face = (val * 2) - 1
+ * to make it 1 or -1
+ */
+ face_val =
+ LLVMBuildFAdd(builder,
+ LLVMBuildFMul(builder, a0_0f,
+ lp_build_const_float(gallivm, 2.0),
+ ""),
+ lp_build_const_float(gallivm, -1.0),
+ "facing");
+ face_val = lp_build_broadcast_scalar(&args->bld, face_val);
+ a0 = lp_build_swizzle_aos(&args->bld, face_val, swizzles);
+
+ store_coef(gallivm, args, slot, a0, args->bld.zero, args->bld.zero);
+}
+
+
+static LLVMValueRef
+vert_attrib(struct gallivm_state *gallivm,
+ LLVMValueRef vert,
+ int attr,
+ int elem,
+ const char *name)
+{
+ LLVMBuilderRef b = gallivm->builder;
+ LLVMValueRef idx[2];
+ idx[0] = lp_build_const_int32(gallivm, attr);
+ idx[1] = lp_build_const_int32(gallivm, elem);
+ return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name);
+}
+
+
+static void
+lp_twoside(struct gallivm_state *gallivm,
+ struct lp_setup_args *args,
+ const struct lp_setup_variant_key *key,
+ int bcolor_slot,
+ LLVMValueRef attribv[3])
+{
+ LLVMBuilderRef b = gallivm->builder;
+ LLVMValueRef a0_back, a1_back, a2_back;
+ LLVMValueRef idx2 = lp_build_const_int32(gallivm, bcolor_slot);
+
+ LLVMValueRef facing = args->facing;
+ LLVMValueRef front_facing = LLVMBuildICmp(b, LLVMIntEQ, facing,
+ lp_build_const_int32(gallivm, 0), ""); /** need i1 for if condition */
+
+ a0_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx2, 1, ""), "v0a_back");
+ a1_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx2, 1, ""), "v1a_back");
+ a2_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx2, 1, ""), "v2a_back");
+
+ /* Possibly swap the front and back attrib values,
+ *
+ * Prefer select to if so we don't have to worry about phis or
+ * allocas.
+ */
+ attribv[0] = LLVMBuildSelect(b, front_facing, a0_back, attribv[0], "");
+ attribv[1] = LLVMBuildSelect(b, front_facing, a1_back, attribv[1], "");
+ attribv[2] = LLVMBuildSelect(b, front_facing, a2_back, attribv[2], "");
+
+}
+
+static void
+lp_do_offset_tri(struct gallivm_state *gallivm,
+ struct lp_setup_args *args,
+ const struct lp_setup_variant_key *key,
+ LLVMValueRef inv_det,
+ LLVMValueRef dxyz01,
+ LLVMValueRef dxyz20,
+ LLVMValueRef attribv[3])
+{
+ LLVMBuilderRef b = gallivm->builder;
+ struct lp_build_context flt_scalar_bld;
+ struct lp_build_context int_scalar_bld;
+ struct lp_build_context *bld = &args->bld;
+ LLVMValueRef zoffset, mult;
+ LLVMValueRef z0_new, z1_new, z2_new;
+ LLVMValueRef dzdxdzdy, dzdx, dzdy, dzxyz20, dyzzx01, dyzzx01_dzxyz20, dzx01_dyz20;
+ LLVMValueRef z0z1, z0z1z2;
+ LLVMValueRef max, max_value, res12;
+ LLVMValueRef shuffles[4];
+ LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
+ LLVMValueRef onei = lp_build_const_int32(gallivm, 1);
+ LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0);
+ LLVMValueRef twoi = lp_build_const_int32(gallivm, 2);
+ LLVMValueRef threei = lp_build_const_int32(gallivm, 3);
+
+ /* (res12) = cross(e,f).xy */
+ shuffles[0] = twoi;
+ shuffles[1] = zeroi;
+ shuffles[2] = onei;
+ shuffles[3] = twoi;
+ dzxyz20 = LLVMBuildShuffleVector(b, dxyz20, dxyz20, LLVMConstVector(shuffles, 4), "");
+
+ shuffles[0] = onei;
+ shuffles[1] = twoi;
+ shuffles[2] = twoi;
+ shuffles[3] = zeroi;
+ dyzzx01 = LLVMBuildShuffleVector(b, dxyz01, dxyz01, LLVMConstVector(shuffles, 4), "");
+
+ dyzzx01_dzxyz20 = LLVMBuildFMul(b, dzxyz20, dyzzx01, "dyzzx01_dzxyz20");
+
+ shuffles[0] = twoi;
+ shuffles[1] = threei;
+ shuffles[2] = LLVMGetUndef(shuf_type);
+ shuffles[3] = LLVMGetUndef(shuf_type);
+ dzx01_dyz20 = LLVMBuildShuffleVector(b, dyzzx01_dzxyz20, dyzzx01_dzxyz20,
+ LLVMConstVector(shuffles, 4), "");
+
+ res12 = LLVMBuildFSub(b, dyzzx01_dzxyz20, dzx01_dyz20, "res12");
+
+ /* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/
+ dzdxdzdy = LLVMBuildFMul(b, res12, inv_det, "dzdxdzdy");
+ dzdxdzdy = lp_build_abs(bld, dzdxdzdy);
+
+ dzdx = LLVMBuildExtractElement(b, dzdxdzdy, zeroi, "");
+ dzdy = LLVMBuildExtractElement(b, dzdxdzdy, onei, "");
+
+ /* mult = MAX2(dzdx, dzdy) * pgon_offset_scale */
+ max = LLVMBuildFCmp(b, LLVMRealUGT, dzdx, dzdy, "");
+ max_value = LLVMBuildSelect(b, max, dzdx, dzdy, "max");
+
+ mult = LLVMBuildFMul(b, max_value,
+ lp_build_const_float(gallivm, key->pgon_offset_scale), "");
+
+ lp_build_context_init(&flt_scalar_bld, gallivm, lp_type_float_vec(32, 32));
+
+ if (key->floating_point_depth) {
+ /*
+ * bias = pgon_offset_units * 2^(exponent(max(z0, z1, z2)) - mantissa_bits) +
+ * MAX2(dzdx, dzdy) * pgon_offset_scale
+ *
+ * NOTE: Assumes IEEE float32.
+ */
+ LLVMValueRef c23_shifted, exp_mask, bias, exp;
+ LLVMValueRef maxz_value, maxz0z1_value;
+
+ lp_build_context_init(&int_scalar_bld, gallivm, lp_type_int_vec(32, 32));
+
+ c23_shifted = lp_build_const_int32(gallivm, 23 << 23);
+ exp_mask = lp_build_const_int32(gallivm, 0xff << 23);
+
+ maxz0z1_value = lp_build_max(&flt_scalar_bld,
+ LLVMBuildExtractElement(b, attribv[0], twoi, ""),
+ LLVMBuildExtractElement(b, attribv[1], twoi, ""));
+
+ maxz_value = lp_build_max(&flt_scalar_bld,
+ LLVMBuildExtractElement(b, attribv[2], twoi, ""),
+ maxz0z1_value);
+
+ exp = LLVMBuildBitCast(b, maxz_value, int_scalar_bld.vec_type, "");
+ exp = lp_build_and(&int_scalar_bld, exp, exp_mask);
+ exp = lp_build_sub(&int_scalar_bld, exp, c23_shifted);
+ /* Clamping to zero means mrd will be zero for very small numbers,
+ * but specs do not indicate this should be prevented by clamping
+ * mrd to smallest normal number instead. */
+ exp = lp_build_max(&int_scalar_bld, exp, int_scalar_bld.zero);
+ exp = LLVMBuildBitCast(b, exp, flt_scalar_bld.vec_type, "");
+
+ bias = LLVMBuildFMul(b, exp,
+ lp_build_const_float(gallivm, key->pgon_offset_units),
+ "bias");
+
+ zoffset = LLVMBuildFAdd(b, bias, mult, "zoffset");
+ } else {
+ /*
+ * bias = pgon_offset_units + MAX2(dzdx, dzdy) * pgon_offset_scale
+ */
+ zoffset = LLVMBuildFAdd(b,
+ lp_build_const_float(gallivm, key->pgon_offset_units),
+ mult, "zoffset");
+ }
+
+ if (key->pgon_offset_clamp > 0) {
+ zoffset = lp_build_min(&flt_scalar_bld,
+ lp_build_const_float(gallivm, key->pgon_offset_clamp),
+ zoffset);
+ }
+ else if (key->pgon_offset_clamp < 0) {
+ zoffset = lp_build_max(&flt_scalar_bld,
+ lp_build_const_float(gallivm, key->pgon_offset_clamp),
+ zoffset);
+ }
+
+ /* yuck */
+ shuffles[0] = twoi;
+ shuffles[1] = lp_build_const_int32(gallivm, 6);
+ shuffles[2] = LLVMGetUndef(shuf_type);
+ shuffles[3] = LLVMGetUndef(shuf_type);
+ z0z1 = LLVMBuildShuffleVector(b, attribv[0], attribv[1], LLVMConstVector(shuffles, 4), "");
+ shuffles[0] = zeroi;
+ shuffles[1] = onei;
+ shuffles[2] = lp_build_const_int32(gallivm, 6);
+ shuffles[3] = LLVMGetUndef(shuf_type);
+ z0z1z2 = LLVMBuildShuffleVector(b, z0z1, attribv[2], LLVMConstVector(shuffles, 4), "");
+ zoffset = lp_build_broadcast_scalar(bld, zoffset);
+
+ /* clamp and do offset */
+ /*
+ * FIXME I suspect the clamp (is that even right to always clamp to fixed
+ * 0.0/1.0?) should really be per fragment?
+ */
+ z0z1z2 = lp_build_clamp(bld, LLVMBuildFAdd(b, z0z1z2, zoffset, ""), bld->zero, bld->one);
+
+ /* insert into args->a0.z, a1.z, a2.z:
+ */
+ z0_new = LLVMBuildExtractElement(b, z0z1z2, zeroi, "");
+ z1_new = LLVMBuildExtractElement(b, z0z1z2, onei, "");
+ z2_new = LLVMBuildExtractElement(b, z0z1z2, twoi, "");
+ attribv[0] = LLVMBuildInsertElement(b, attribv[0], z0_new, twoi, "");
+ attribv[1] = LLVMBuildInsertElement(b, attribv[1], z1_new, twoi, "");
+ attribv[2] = LLVMBuildInsertElement(b, attribv[2], z2_new, twoi, "");
+}
+
+static void
+load_attribute(struct gallivm_state *gallivm,
+ struct lp_setup_args *args,
+ const struct lp_setup_variant_key *key,
+ unsigned vert_attr,
+ LLVMValueRef attribv[3])
+{
+ LLVMBuilderRef b = gallivm->builder;
+ LLVMValueRef idx = lp_build_const_int32(gallivm, vert_attr);
+
+ /* Load the vertex data
+ */
+ attribv[0] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a");
+ attribv[1] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a");
+ attribv[2] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a");
+
+
+ /* Potentially modify it according to twoside, etc:
+ */
+ if (key->twoside) {
+ if (vert_attr == key->color_slot && key->bcolor_slot >= 0)
+ lp_twoside(gallivm, args, key, key->bcolor_slot, attribv);
+ else if (vert_attr == key->spec_slot && key->bspec_slot >= 0)
+ lp_twoside(gallivm, args, key, key->bspec_slot, attribv);
+ }
+}
+
+/*
+ * FIXME: interpolation is always done wrt fb origin (0/0).
+ * However, if some (small) tri is far away from the origin and gradients
+ * are large, this can lead to HUGE errors, since the a0 value calculated
+ * here can get very large (with the actual values inside the triangle way
+ * smaller), leading to complete loss of accuracy. This could be prevented
+ * by using some point inside (or at corner) of the tri as interpolation
+ * origin, or just use barycentric interpolation (which GL suggests and is
+ * what real hw does - you can get the barycentric coordinates from the
+ * edge functions in rasterization in principle (though we skip these
+ * sometimes completely in case of tris covering a block fully,
+ * which obviously wouldn't work)).
+ */
+static void
+emit_coef4( struct gallivm_state *gallivm,
+ struct lp_setup_args *args,
+ unsigned slot,
+ LLVMValueRef a0,
+ LLVMValueRef a1,
+ LLVMValueRef a2)
+{
+ LLVMBuilderRef b = gallivm->builder;
+ LLVMValueRef attr_0;
+ LLVMValueRef dy20_ooa = args->dy20_ooa;
+ LLVMValueRef dy01_ooa = args->dy01_ooa;
+ LLVMValueRef dx20_ooa = args->dx20_ooa;
+ LLVMValueRef dx01_ooa = args->dx01_ooa;
+ LLVMValueRef x0_center = args->x0_center;
+ LLVMValueRef y0_center = args->y0_center;
+ LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01");
+ LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20");
+
+ /* Calculate dadx (vec4f)
+ */
+ LLVMValueRef da01_dy20_ooa = LLVMBuildFMul(b, da01, dy20_ooa, "da01_dy20_ooa");
+ LLVMValueRef da20_dy01_ooa = LLVMBuildFMul(b, da20, dy01_ooa, "da20_dy01_ooa");
+ LLVMValueRef dadx = LLVMBuildFSub(b, da01_dy20_ooa, da20_dy01_ooa, "dadx");
+
+ /* Calculate dady (vec4f)
+ */
+ LLVMValueRef da01_dx20_ooa = LLVMBuildFMul(b, da01, dx20_ooa, "da01_dx20_ooa");
+ LLVMValueRef da20_dx01_ooa = LLVMBuildFMul(b, da20, dx01_ooa, "da20_dx01_ooa");
+ LLVMValueRef dady = LLVMBuildFSub(b, da20_dx01_ooa, da01_dx20_ooa, "dady");
+
+ /* Calculate a0 - the attribute value at the origin
+ */
+ LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0");
+ LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, "dady_y0");
+ LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0");
+ attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0");
+
+ store_coef(gallivm, args, slot, attr_0, dadx, dady);
+}
+
+
+static void
+emit_linear_coef( struct gallivm_state *gallivm,
+ struct lp_setup_args *args,
+ unsigned slot,
+ LLVMValueRef attribv[3])
+{
+ /* nothing to do anymore */
+ emit_coef4(gallivm,
+ args, slot,
+ attribv[0],
+ attribv[1],
+ attribv[2]);
+}
+
+
+/**
+ * Compute a0, dadx and dady for a perspective-corrected interpolant,
+ * for a triangle.
+ * We basically multiply the vertex value by 1/w before computing
+ * the plane coefficients (a0, dadx, dady).
+ * Later, when we compute the value at a particular fragment position we'll
+ * divide the interpolated value by the interpolated W at that fragment.
+ */
+static void
+apply_perspective_corr( struct gallivm_state *gallivm,
+ struct lp_setup_args *args,
+ unsigned slot,
+ LLVMValueRef attribv[3])
+{
+ LLVMBuilderRef b = gallivm->builder;
+
+ /* premultiply by 1/w (v[0][3] is always 1/w):
+ */
+ LLVMValueRef v0_oow = lp_build_broadcast_scalar(&args->bld,
+ vert_attrib(gallivm, args->v0, 0, 3, "v0_oow"));
+ LLVMValueRef v1_oow = lp_build_broadcast_scalar(&args->bld,
+ vert_attrib(gallivm, args->v1, 0, 3, "v1_oow"));
+ LLVMValueRef v2_oow = lp_build_broadcast_scalar(&args->bld,
+ vert_attrib(gallivm, args->v2, 0, 3, "v2_oow"));
+
+ attribv[0] = LLVMBuildFMul(b, attribv[0], v0_oow, "v0_oow_v0a");
+ attribv[1] = LLVMBuildFMul(b, attribv[1], v1_oow, "v1_oow_v1a");
+ attribv[2] = LLVMBuildFMul(b, attribv[2], v2_oow, "v2_oow_v2a");
+}
+
+
+/**
+ * Applys cylindrical wrapping to vertex attributes if enabled.
+ * Input coordinates must be in [0, 1] range, otherwise results are undefined.
+ *
+ * @param cyl_wrap TGSI_CYLINDRICAL_WRAP_x flags
+ */
+static void
+emit_apply_cyl_wrap(struct gallivm_state *gallivm,
+ struct lp_setup_args *args,
+ uint cyl_wrap,
+ LLVMValueRef attribv[3])
+
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_type type = args->bld.type;
+ LLVMTypeRef float_vec_type = args->bld.vec_type;
+ LLVMValueRef pos_half;
+ LLVMValueRef neg_half;
+ LLVMValueRef cyl_mask;
+ LLVMValueRef offset;
+ LLVMValueRef delta;
+ LLVMValueRef one;
+
+ if (!cyl_wrap)
+ return;
+
+ /* Constants */
+ pos_half = lp_build_const_vec(gallivm, type, +0.5f);
+ neg_half = lp_build_const_vec(gallivm, type, -0.5f);
+ cyl_mask = lp_build_const_mask_aos(gallivm, type, cyl_wrap, 4);
+
+ one = lp_build_const_vec(gallivm, type, 1.0f);
+ one = LLVMBuildBitCast(builder, one, lp_build_int_vec_type(gallivm, type), "");
+ one = LLVMBuildAnd(builder, one, cyl_mask, "");
+
+ /* Edge v0 -> v1 */
+ delta = LLVMBuildFSub(builder, attribv[1], attribv[0], "");
+
+ offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half);
+ offset = LLVMBuildAnd(builder, offset, one, "");
+ offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
+ attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, "");
+
+ offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half);
+ offset = LLVMBuildAnd(builder, offset, one, "");
+ offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
+ attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, "");
+
+ /* Edge v1 -> v2 */
+ delta = LLVMBuildFSub(builder, attribv[2], attribv[1], "");
+
+ offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half);
+ offset = LLVMBuildAnd(builder, offset, one, "");
+ offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
+ attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, "");
+
+ offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half);
+ offset = LLVMBuildAnd(builder, offset, one, "");
+ offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
+ attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, "");
+
+ /* Edge v2 -> v0 */
+ delta = LLVMBuildFSub(builder, attribv[0], attribv[2], "");
+
+ offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half);
+ offset = LLVMBuildAnd(builder, offset, one, "");
+ offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
+ attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, "");
+
+ offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half);
+ offset = LLVMBuildAnd(builder, offset, one, "");
+ offset = LLVMBuildBitCast(builder, offset, float_vec_type, "");
+ attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, "");
+}
+
+
+/**
+ * Compute the inputs-> dadx, dady, a0 values.
+ */
+static void
+emit_tri_coef( struct gallivm_state *gallivm,
+ const struct lp_setup_variant_key *key,
+ struct lp_setup_args *args)
+{
+ unsigned slot;
+
+ LLVMValueRef attribs[3];
+
+ /* setup interpolation for all the remaining attributes:
+ */
+ for (slot = 0; slot < key->num_inputs; slot++) {
+ switch (key->inputs[slot].interp) {
+ case LP_INTERP_CONSTANT:
+ load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
+ if (key->flatshade_first) {
+ emit_constant_coef4(gallivm, args, slot+1, attribs[0]);
+ }
+ else {
+ emit_constant_coef4(gallivm, args, slot+1, attribs[2]);
+ }
+ break;
+
+ case LP_INTERP_LINEAR:
+ load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
+ emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs);
+ emit_linear_coef(gallivm, args, slot+1, attribs);
+ break;
+
+ case LP_INTERP_PERSPECTIVE:
+ load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs);
+ emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs);
+ apply_perspective_corr(gallivm, args, slot+1, attribs);
+ emit_linear_coef(gallivm, args, slot+1, attribs);
+ break;
+
+ case LP_INTERP_POSITION:
+ /*
+ * The generated pixel interpolators will pick up the coeffs from
+ * slot 0.
+ */
+ break;
+
+ case LP_INTERP_FACING:
+ emit_facing_coef(gallivm, args, slot+1);
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+}
+
+
+/* XXX: generic code:
+ */
+static void
+set_noalias(LLVMBuilderRef builder,
+ LLVMValueRef function,
+ const LLVMTypeRef *arg_types,
+ int nr_args)
+{
+ int i;
+ for(i = 0; i < nr_args; ++i)
+ if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
+ LLVMAddAttribute(LLVMGetParam(function, i),
+ LLVMNoAliasAttribute);
+}
+
+static void
+init_args(struct gallivm_state *gallivm,
+ const struct lp_setup_variant_key *key,
+ struct lp_setup_args *args)
+{
+ LLVMBuilderRef b = gallivm->builder;
+ LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context);
+ LLVMValueRef onef = lp_build_const_float(gallivm, 1.0);
+ LLVMValueRef onei = lp_build_const_int32(gallivm, 1);
+ LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0);
+ LLVMValueRef pixel_center, xy0_center, dxy01, dxy20, dyx20;
+ LLVMValueRef e, f, ef, ooa;
+ LLVMValueRef shuffles[4], shuf10;
+ LLVMValueRef attr_pos[3];
+ struct lp_type typef4 = lp_type_float_vec(32, 128);
+ struct lp_build_context bld;
+
+ lp_build_context_init(&bld, gallivm, typef4);
+ args->bld = bld;
+
+ /* The internal position input is in slot zero:
+ */
+ load_attribute(gallivm, args, key, 0, attr_pos);
+
+ pixel_center = lp_build_const_vec(gallivm, typef4,
+ key->pixel_center_half ? 0.5 : 0.0);
+
+ /*
+ * xy are first two elems in v0a/v1a/v2a but just use vec4 arit
+ * also offset_tri uses actually xyz in them
+ */
+ xy0_center = LLVMBuildFSub(b, attr_pos[0], pixel_center, "xy0_center" );
+
+ dxy01 = LLVMBuildFSub(b, attr_pos[0], attr_pos[1], "dxy01");
+ dxy20 = LLVMBuildFSub(b, attr_pos[2], attr_pos[0], "dxy20");
+
+ shuffles[0] = onei;
+ shuffles[1] = zeroi;
+ shuffles[2] = LLVMGetUndef(shuf_type);
+ shuffles[3] = LLVMGetUndef(shuf_type);
+ shuf10 = LLVMConstVector(shuffles, 4);
+
+ dyx20 = LLVMBuildShuffleVector(b, dxy20, dxy20, shuf10, "");
+
+ ef = LLVMBuildFMul(b, dxy01, dyx20, "ef");
+ e = LLVMBuildExtractElement(b, ef, zeroi, "");
+ f = LLVMBuildExtractElement(b, ef, onei, "");
+
+ ooa = LLVMBuildFDiv(b, onef, LLVMBuildFSub(b, e, f, ""), "ooa");
+
+ ooa = lp_build_broadcast_scalar(&bld, ooa);
+
+ /* tri offset calc shares a lot of arithmetic, do it here */
+ if (key->pgon_offset_scale != 0.0f || key->pgon_offset_units != 0.0f) {
+ lp_do_offset_tri(gallivm, args, key, ooa, dxy01, dxy20, attr_pos);
+ }
+
+ dxy20 = LLVMBuildFMul(b, dxy20, ooa, "");
+ dxy01 = LLVMBuildFMul(b, dxy01, ooa, "");
+
+ args->dy20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, onei);
+ args->dy01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, onei);
+
+ args->dx20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, zeroi);
+ args->dx01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, zeroi);
+
+ args->x0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, zeroi);
+ args->y0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, onei);
+
+ emit_linear_coef(gallivm, args, 0, attr_pos);
+}
+
+/**
+ * Generate the runtime callable function for the coefficient calculation.
+ *
+ */
+static struct lp_setup_variant *
+generate_setup_variant(struct lp_setup_variant_key *key,
+ struct llvmpipe_context *lp)
+{
+ struct lp_setup_variant *variant = NULL;
+ struct gallivm_state *gallivm;
+ struct lp_setup_args args;
+ char func_name[64];
+ LLVMTypeRef vec4f_type;
+ LLVMTypeRef func_type;
+ LLVMTypeRef arg_types[7];
+ LLVMBasicBlockRef block;
+ LLVMBuilderRef builder;
+ int64_t t0 = 0, t1;
+
+ if (0)
+ goto fail;
+
+ variant = CALLOC_STRUCT(lp_setup_variant);
+ if (variant == NULL)
+ goto fail;
+
+ variant->no = setup_no++;
+
+ util_snprintf(func_name, sizeof(func_name), "setup_variant_%u",
+ variant->no);
+
+ variant->gallivm = gallivm = gallivm_create(func_name, lp->context);
+ if (!variant->gallivm) {
+ goto fail;
+ }
+
+ builder = gallivm->builder;
+
+ if (LP_DEBUG & DEBUG_COUNTERS) {
+ t0 = os_time_get();
+ }
+
+ memcpy(&variant->key, key, key->size);
+ variant->list_item_global.base = variant;
+
+ /* Currently always deal with full 4-wide vertex attributes from
+ * the vertices.
+ */
+
+ vec4f_type = LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4);
+
+ arg_types[0] = LLVMPointerType(vec4f_type, 0); /* v0 */
+ arg_types[1] = LLVMPointerType(vec4f_type, 0); /* v1 */
+ arg_types[2] = LLVMPointerType(vec4f_type, 0); /* v2 */
+ arg_types[3] = LLVMInt32TypeInContext(gallivm->context); /* facing */
+ arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */
+ arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */
+ arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */
+
+ func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
+ arg_types, Elements(arg_types), 0);
+
+ variant->function = LLVMAddFunction(gallivm->module, func_name, func_type);
+ if (!variant->function)
+ goto fail;
+
+ LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
+
+ args.v0 = LLVMGetParam(variant->function, 0);
+ args.v1 = LLVMGetParam(variant->function, 1);
+ args.v2 = LLVMGetParam(variant->function, 2);
+ args.facing = LLVMGetParam(variant->function, 3);
+ args.a0 = LLVMGetParam(variant->function, 4);
+ args.dadx = LLVMGetParam(variant->function, 5);
+ args.dady = LLVMGetParam(variant->function, 6);
+
+ lp_build_name(args.v0, "in_v0");
+ lp_build_name(args.v1, "in_v1");
+ lp_build_name(args.v2, "in_v2");
+ lp_build_name(args.facing, "in_facing");
+ lp_build_name(args.a0, "out_a0");
+ lp_build_name(args.dadx, "out_dadx");
+ lp_build_name(args.dady, "out_dady");
+
+ /*
+ * Function body
+ */
+ block = LLVMAppendBasicBlockInContext(gallivm->context,
+ variant->function, "entry");
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ set_noalias(builder, variant->function, arg_types, Elements(arg_types));
+ init_args(gallivm, &variant->key, &args);
+ emit_tri_coef(gallivm, &variant->key, &args);
+
+ LLVMBuildRetVoid(builder);
+
+ gallivm_verify_function(gallivm, variant->function);
+
+ gallivm_compile_module(gallivm);
+
+ variant->jit_function = (lp_jit_setup_triangle)
+ gallivm_jit_function(gallivm, variant->function);
+ if (!variant->jit_function)
+ goto fail;
+
+ gallivm_free_ir(variant->gallivm);
+
+ /*
+ * Update timing information:
+ */
+ if (LP_DEBUG & DEBUG_COUNTERS) {
+ t1 = os_time_get();
+ LP_COUNT_ADD(llvm_compile_time, t1 - t0);
+ LP_COUNT_ADD(nr_llvm_compiles, 1);
+ }
+
+ return variant;
+
+fail:
+ if (variant) {
+ if (variant->gallivm) {
+ gallivm_destroy(variant->gallivm);
+ }
+ FREE(variant);
+ }
+
+ return NULL;
+}
+
+
+
+static void
+lp_make_setup_variant_key(struct llvmpipe_context *lp,
+ struct lp_setup_variant_key *key)
+{
+ struct lp_fragment_shader *fs = lp->fs;
+ unsigned i;
+
+ assert(sizeof key->inputs[0] == sizeof(uint));
+
+ key->num_inputs = fs->info.base.num_inputs;
+ key->flatshade_first = lp->rasterizer->flatshade_first;
+ key->pixel_center_half = lp->rasterizer->half_pixel_center;
+ key->twoside = lp->rasterizer->light_twoside;
+ key->size = Offset(struct lp_setup_variant_key,
+ inputs[key->num_inputs]);
+
+ key->color_slot = lp->color_slot [0];
+ key->bcolor_slot = lp->bcolor_slot[0];
+ key->spec_slot = lp->color_slot [1];
+ key->bspec_slot = lp->bcolor_slot[1];
+ assert(key->color_slot == lp->color_slot [0]);
+ assert(key->bcolor_slot == lp->bcolor_slot[0]);
+ assert(key->spec_slot == lp->color_slot [1]);
+ assert(key->bspec_slot == lp->bcolor_slot[1]);
+
+ /*
+ * If depth is floating point, depth bias is calculated with respect
+ * to the primitive's maximum Z value. Retain the original depth bias
+ * value until that stage.
+ */
+ key->floating_point_depth = lp->floating_point_depth;
+
+ if (key->floating_point_depth) {
+ key->pgon_offset_units = (float) lp->rasterizer->offset_units;
+ } else {
+ key->pgon_offset_units =
+ (float) (lp->rasterizer->offset_units * lp->mrd);
+ }
+
+ key->pgon_offset_scale = lp->rasterizer->offset_scale;
+ key->pgon_offset_clamp = lp->rasterizer->offset_clamp;
+ key->pad = 0;
+ memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]);
+ for (i = 0; i < key->num_inputs; i++) {
+ if (key->inputs[i].interp == LP_INTERP_COLOR) {
+ if (lp->rasterizer->flatshade)
+ key->inputs[i].interp = LP_INTERP_CONSTANT;
+ else
+ key->inputs[i].interp = LP_INTERP_PERSPECTIVE;
+ }
+ }
+
+}
+
+
+static void
+remove_setup_variant(struct llvmpipe_context *lp,
+ struct lp_setup_variant *variant)
+{
+ if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ debug_printf("llvmpipe: del setup_variant #%u total %u\n",
+ variant->no, lp->nr_setup_variants);
+ }
+
+ if (variant->gallivm) {
+ gallivm_destroy(variant->gallivm);
+ }
+
+ remove_from_list(&variant->list_item_global);
+ lp->nr_setup_variants--;
+ FREE(variant);
+}
+
+
+
+/* When the number of setup variants exceeds a threshold, cull a
+ * fraction (currently a quarter) of them.
+ */
+static void
+cull_setup_variants(struct llvmpipe_context *lp)
+{
+ struct pipe_context *pipe = &lp->pipe;
+ int i;
+
+ /*
+ * XXX: we need to flush the context until we have some sort of reference
+ * counting in fragment shaders as they may still be binned
+ * Flushing alone might not be sufficient we need to wait on it too.
+ */
+ llvmpipe_finish(pipe, __FUNCTION__);
+
+ for (i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) {
+ struct lp_setup_variant_list_item *item;
+ if (is_empty_list(&lp->setup_variants_list)) {
+ break;
+ }
+ item = last_elem(&lp->setup_variants_list);
+ assert(item);
+ assert(item->base);
+ remove_setup_variant(lp, item->base);
+ }
+}
+
+
+/**
+ * Update fragment/vertex shader linkage state. This is called just
+ * prior to drawing something when some fragment-related state has
+ * changed.
+ */
+void
+llvmpipe_update_setup(struct llvmpipe_context *lp)
+{
+ struct lp_setup_variant_key *key = &lp->setup_variant.key;
+ struct lp_setup_variant *variant = NULL;
+ struct lp_setup_variant_list_item *li;
+
+ lp_make_setup_variant_key(lp, key);
+
+ foreach(li, &lp->setup_variants_list) {
+ if(li->base->key.size == key->size &&
+ memcmp(&li->base->key, key, key->size) == 0) {
+ variant = li->base;
+ break;
+ }
+ }
+
+ if (variant) {
+ move_to_head(&lp->setup_variants_list, &variant->list_item_global);
+ }
+ else {
+ if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) {
+ cull_setup_variants(lp);
+ }
+
+ variant = generate_setup_variant(key, lp);
+ if (variant) {
+ insert_at_head(&lp->setup_variants_list, &variant->list_item_global);
+ lp->nr_setup_variants++;
+ }
+ }
+
+ lp_setup_set_setup_variant(lp->setup, variant);
+}
+
+void
+lp_delete_setup_variants(struct llvmpipe_context *lp)
+{
+ struct lp_setup_variant_list_item *li;
+ li = first_elem(&lp->setup_variants_list);
+ while(!at_end(&lp->setup_variants_list, li)) {
+ struct lp_setup_variant_list_item *next = next_elem(li);
+ remove_setup_variant(lp, li->base);
+ li = next;
+ }
+}
+
+void
+lp_dump_setup_coef(const struct lp_setup_variant_key *key,
+ const float (*sa0)[4],
+ const float (*sdadx)[4],
+ const float (*sdady)[4])
+{
+ int i, slot;
+
+ for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
+ float a0 = sa0 [0][i];
+ float dadx = sdadx[0][i];
+ float dady = sdady[0][i];
+
+ debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
+ "xyzw"[i], a0, dadx, dady);
+ }
+
+ for (slot = 0; slot < key->num_inputs; slot++) {
+ unsigned usage_mask = key->inputs[slot].usage_mask;
+ for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
+ if (usage_mask & (1 << i)) {
+ float a0 = sa0 [1 + slot][i];
+ float dadx = sdadx[1 + slot][i];
+ float dady = sdady[1 + slot][i];
+
+ debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",
+ slot, "xyzw"[i], a0, dadx, dady);
+ }
+ }
+ }
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.h
new file mode 100644
index 000000000..82af8350f
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.h
@@ -0,0 +1,85 @@
+#ifndef LP_STATE_SETUP_H
+#define LP_STATE_SETUP_H
+
+#include "lp_bld_interp.h"
+
+
+struct llvmpipe_context;
+struct lp_setup_variant;
+
+struct lp_setup_variant_list_item
+{
+ struct lp_setup_variant *base;
+ struct lp_setup_variant_list_item *next, *prev;
+};
+
+
+struct lp_setup_variant_key {
+ unsigned size:16;
+ unsigned num_inputs:8;
+ int color_slot:8;
+
+ int bcolor_slot:8;
+ int spec_slot:8;
+ int bspec_slot:8;
+ unsigned flatshade_first:1;
+ unsigned pixel_center_half:1;
+ unsigned twoside:1;
+ unsigned floating_point_depth:1;
+ unsigned pad:4;
+
+ /* TODO: get those floats out of the key and use a jit_context for setup */
+ float pgon_offset_units;
+ float pgon_offset_scale;
+ float pgon_offset_clamp;
+ struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
+};
+
+
+typedef void (*lp_jit_setup_triangle)( const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4],
+ boolean front_facing,
+ float (*a0)[4],
+ float (*dadx)[4],
+ float (*dady)[4] );
+
+
+
+
+/* At this stage, for a given variant key, we create a
+ * draw_vertex_info struct telling the draw module how to format the
+ * vertices, and an llvm-generated function which calculates the
+ * attribute interpolants (a0, dadx, dady) from three of those
+ * vertices.
+ */
+struct lp_setup_variant {
+ struct lp_setup_variant_key key;
+
+ struct lp_setup_variant_list_item list_item_global;
+
+ struct gallivm_state *gallivm;
+
+ /* XXX: this is a pointer to the LLVM IR. Once jit_function is
+ * generated, we never need to use the IR again - need to find a
+ * way to release this data without destroying the generated
+ * assembly.
+ */
+ LLVMValueRef function;
+
+ /* The actual generated setup function:
+ */
+ lp_jit_setup_triangle jit_function;
+
+ unsigned no;
+};
+
+void lp_delete_setup_variants(struct llvmpipe_context *lp);
+
+void
+lp_dump_setup_coef( const struct lp_setup_variant_key *key,
+ const float (*sa0)[4],
+ const float (*sdadx)[4],
+ const float (*sdady)[4]);
+
+#endif
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_so.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_so.c
new file mode 100644
index 000000000..2af04cdf1
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_so.c
@@ -0,0 +1,93 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "lp_context.h"
+#include "lp_state.h"
+#include "lp_texture.h"
+
+#include "util/u_memory.h"
+#include "draw/draw_context.h"
+
+static struct pipe_stream_output_target *
+llvmpipe_create_so_target(struct pipe_context *pipe,
+ struct pipe_resource *buffer,
+ unsigned buffer_offset,
+ unsigned buffer_size)
+{
+ struct draw_so_target *t;
+
+ t = CALLOC_STRUCT(draw_so_target);
+ if (!t)
+ return NULL;
+
+ t->target.context = pipe;
+ t->target.reference.count = 1;
+ pipe_resource_reference(&t->target.buffer, buffer);
+ t->target.buffer_offset = buffer_offset;
+ t->target.buffer_size = buffer_size;
+ return &t->target;
+}
+
+static void
+llvmpipe_so_target_destroy(struct pipe_context *pipe,
+ struct pipe_stream_output_target *target)
+{
+ pipe_resource_reference(&target->buffer, NULL);
+ FREE(target);
+}
+
+static void
+llvmpipe_set_so_targets(struct pipe_context *pipe,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ const unsigned *offsets)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ int i;
+ for (i = 0; i < num_targets; i++) {
+ const boolean append = (offsets[i] == (unsigned)-1);
+ pipe_so_target_reference((struct pipe_stream_output_target **)&llvmpipe->so_targets[i], targets[i]);
+ /* If we're not appending then lets set the internal
+ offset to what was requested */
+ if (!append && llvmpipe->so_targets[i]) {
+ llvmpipe->so_targets[i]->internal_offset = offsets[i];
+ }
+ }
+
+ for (; i < llvmpipe->num_so_targets; i++) {
+ pipe_so_target_reference((struct pipe_stream_output_target **)&llvmpipe->so_targets[i], NULL);
+ }
+ llvmpipe->num_so_targets = num_targets;
+}
+
+void
+llvmpipe_init_so_funcs(struct llvmpipe_context *pipe)
+{
+ pipe->pipe.create_stream_output_target = llvmpipe_create_so_target;
+ pipe->pipe.stream_output_target_destroy = llvmpipe_so_target_destroy;
+ pipe->pipe.set_stream_output_targets = llvmpipe_set_so_targets;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_surface.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_surface.c
new file mode 100644
index 000000000..c879ba975
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_surface.c
@@ -0,0 +1,91 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keithw@vmware.com>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_framebuffer.h"
+#include "util/u_surface.h"
+#include "lp_context.h"
+#include "lp_scene.h"
+#include "lp_state.h"
+#include "lp_setup.h"
+
+#include "draw/draw_context.h"
+
+#include "util/u_format.h"
+
+
+/**
+ * Set the framebuffer surface info: color buffers, zbuffer, stencil buffer.
+ */
+void
+llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct llvmpipe_context *lp = llvmpipe_context(pipe);
+
+ boolean changed = !util_framebuffer_state_equal(&lp->framebuffer, fb);
+
+ assert(fb->width <= LP_MAX_WIDTH);
+ assert(fb->height <= LP_MAX_HEIGHT);
+
+ if (changed) {
+ /*
+ * If no depth buffer is bound, send the utility function the default
+ * format for no bound depth (PIPE_FORMAT_NONE).
+ */
+ enum pipe_format depth_format = fb->zsbuf ?
+ fb->zsbuf->format : PIPE_FORMAT_NONE;
+ const struct util_format_description *depth_desc =
+ util_format_description(depth_format);
+
+ util_copy_framebuffer_state(&lp->framebuffer, fb);
+
+ if (LP_PERF & PERF_NO_DEPTH) {
+ pipe_surface_reference(&lp->framebuffer.zsbuf, NULL);
+ }
+
+ /*
+ * Calculate the floating point depth sense and Minimum Resolvable Depth
+ * value for the llvmpipe module. This is separate from the draw module.
+ */
+ lp->floating_point_depth =
+ (util_get_depth_format_type(depth_desc) == UTIL_FORMAT_TYPE_FLOAT);
+
+ lp->mrd = util_get_depth_format_mrd(depth_desc);
+
+ /* Tell the draw module how deep the Z/depth buffer is. */
+ draw_set_zs_format(lp->draw, depth_format);
+
+ lp_setup_bind_framebuffer( lp->setup, &lp->framebuffer );
+
+ lp->dirty |= LP_NEW_FRAMEBUFFER;
+ }
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vertex.c
new file mode 100644
index 000000000..1e93fd867
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vertex.c
@@ -0,0 +1,117 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keithw@vmware.com>
+ */
+
+
+#include "lp_context.h"
+#include "lp_state.h"
+
+#include "draw/draw_context.h"
+#include "util/u_helpers.h"
+#include "util/u_inlines.h"
+#include "util/u_transfer.h"
+
+
+static void *
+llvmpipe_create_vertex_elements_state(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_element *attribs)
+{
+ struct lp_velems_state *velems;
+ assert(count <= PIPE_MAX_ATTRIBS);
+ velems = (struct lp_velems_state *) MALLOC(sizeof(struct lp_velems_state));
+ if (velems) {
+ velems->count = count;
+ memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+ }
+ return velems;
+}
+
+static void
+llvmpipe_bind_vertex_elements_state(struct pipe_context *pipe,
+ void *velems)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ struct lp_velems_state *lp_velems = (struct lp_velems_state *) velems;
+
+ llvmpipe->velems = lp_velems;
+
+ llvmpipe->dirty |= LP_NEW_VERTEX;
+
+ if (velems)
+ draw_set_vertex_elements(llvmpipe->draw, lp_velems->count, lp_velems->velem);
+}
+
+static void
+llvmpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+ FREE( velems );
+}
+
+static void
+llvmpipe_set_vertex_buffers(struct pipe_context *pipe,
+ unsigned start_slot, unsigned count,
+ const struct pipe_vertex_buffer *buffers)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ assert(count <= PIPE_MAX_ATTRIBS);
+
+ util_set_vertex_buffers_count(llvmpipe->vertex_buffer,
+ &llvmpipe->num_vertex_buffers,
+ buffers, start_slot, count);
+
+ llvmpipe->dirty |= LP_NEW_VERTEX;
+
+ draw_set_vertex_buffers(llvmpipe->draw, start_slot, count, buffers);
+}
+
+
+static void
+llvmpipe_set_index_buffer(struct pipe_context *pipe,
+ const struct pipe_index_buffer *ib)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ if (ib)
+ memcpy(&llvmpipe->index_buffer, ib, sizeof(llvmpipe->index_buffer));
+ else
+ memset(&llvmpipe->index_buffer, 0, sizeof(llvmpipe->index_buffer));
+}
+
+void
+llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe)
+{
+ llvmpipe->pipe.create_vertex_elements_state = llvmpipe_create_vertex_elements_state;
+ llvmpipe->pipe.bind_vertex_elements_state = llvmpipe_bind_vertex_elements_state;
+ llvmpipe->pipe.delete_vertex_elements_state = llvmpipe_delete_vertex_elements_state;
+
+ llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers;
+ llvmpipe->pipe.set_index_buffer = llvmpipe_set_index_buffer;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vs.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vs.c
new file mode 100644
index 000000000..826ee5b72
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vs.c
@@ -0,0 +1,96 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_defines.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/u_memory.h"
+#include "draw/draw_context.h"
+
+#include "lp_context.h"
+#include "lp_debug.h"
+#include "lp_state.h"
+
+
+static void *
+llvmpipe_create_vs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ struct draw_vertex_shader *vs;
+
+ vs = draw_create_vertex_shader(llvmpipe->draw, templ);
+ if (vs == NULL) {
+ return NULL;
+ }
+
+ if (LP_DEBUG & DEBUG_TGSI) {
+ debug_printf("llvmpipe: Create vertex shader %p:\n", (void *) vs);
+ tgsi_dump(templ->tokens, 0);
+ }
+
+ return vs;
+}
+
+
+static void
+llvmpipe_bind_vs_state(struct pipe_context *pipe, void *_vs)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ struct draw_vertex_shader *vs = (struct draw_vertex_shader *)_vs;
+
+ if (llvmpipe->vs == vs)
+ return;
+
+ draw_bind_vertex_shader(llvmpipe->draw, vs);
+
+ llvmpipe->vs = vs;
+
+ llvmpipe->dirty |= LP_NEW_VS;
+}
+
+
+static void
+llvmpipe_delete_vs_state(struct pipe_context *pipe, void *_vs)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ struct draw_vertex_shader *vs = (struct draw_vertex_shader *)_vs;
+
+ draw_delete_vertex_shader(llvmpipe->draw, vs);
+}
+
+
+
+void
+llvmpipe_init_vs_funcs(struct llvmpipe_context *llvmpipe)
+{
+ llvmpipe->pipe.create_vs_state = llvmpipe_create_vs_state;
+ llvmpipe->pipe.bind_vs_state = llvmpipe_bind_vs_state;
+ llvmpipe->pipe.delete_vs_state = llvmpipe_delete_vs_state;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c
new file mode 100644
index 000000000..96f8ed82c
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c
@@ -0,0 +1,229 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_rect.h"
+#include "util/u_surface.h"
+#include "lp_context.h"
+#include "lp_flush.h"
+#include "lp_limits.h"
+#include "lp_surface.h"
+#include "lp_texture.h"
+#include "lp_query.h"
+
+
+static void
+lp_resource_copy(struct pipe_context *pipe,
+ struct pipe_resource *dst, unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src, unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ llvmpipe_flush_resource(pipe,
+ dst, dst_level,
+ FALSE, /* read_only */
+ TRUE, /* cpu_access */
+ FALSE, /* do_not_block */
+ "blit dest");
+
+ llvmpipe_flush_resource(pipe,
+ src, src_level,
+ TRUE, /* read_only */
+ TRUE, /* cpu_access */
+ FALSE, /* do_not_block */
+ "blit src");
+
+ util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, src_box);
+}
+
+
+static void lp_blit(struct pipe_context *pipe,
+ const struct pipe_blit_info *blit_info)
+{
+ struct llvmpipe_context *lp = llvmpipe_context(pipe);
+ struct pipe_blit_info info = *blit_info;
+
+ if (blit_info->render_condition_enable && !llvmpipe_check_render_cond(lp))
+ return;
+
+ if (info.src.resource->nr_samples > 1 &&
+ info.dst.resource->nr_samples <= 1 &&
+ !util_format_is_depth_or_stencil(info.src.resource->format) &&
+ !util_format_is_pure_integer(info.src.resource->format)) {
+ debug_printf("llvmpipe: color resolve unimplemented\n");
+ return;
+ }
+
+ if (util_try_blit_via_copy_region(pipe, &info)) {
+ return; /* done */
+ }
+
+ if (!util_blitter_is_blit_supported(lp->blitter, &info)) {
+ debug_printf("llvmpipe: blit unsupported %s -> %s\n",
+ util_format_short_name(info.src.resource->format),
+ util_format_short_name(info.dst.resource->format));
+ return;
+ }
+
+ /* XXX turn off occlusion and streamout queries */
+
+ util_blitter_save_vertex_buffer_slot(lp->blitter, lp->vertex_buffer);
+ util_blitter_save_vertex_elements(lp->blitter, (void*)lp->velems);
+ util_blitter_save_vertex_shader(lp->blitter, (void*)lp->vs);
+ util_blitter_save_geometry_shader(lp->blitter, (void*)lp->gs);
+ util_blitter_save_so_targets(lp->blitter, lp->num_so_targets,
+ (struct pipe_stream_output_target**)lp->so_targets);
+ util_blitter_save_rasterizer(lp->blitter, (void*)lp->rasterizer);
+ util_blitter_save_viewport(lp->blitter, &lp->viewports[0]);
+ util_blitter_save_scissor(lp->blitter, &lp->scissors[0]);
+ util_blitter_save_fragment_shader(lp->blitter, lp->fs);
+ util_blitter_save_blend(lp->blitter, (void*)lp->blend);
+ util_blitter_save_depth_stencil_alpha(lp->blitter, (void*)lp->depth_stencil);
+ util_blitter_save_stencil_ref(lp->blitter, &lp->stencil_ref);
+ /*util_blitter_save_sample_mask(sp->blitter, lp->sample_mask);*/
+ util_blitter_save_framebuffer(lp->blitter, &lp->framebuffer);
+ util_blitter_save_fragment_sampler_states(lp->blitter,
+ lp->num_samplers[PIPE_SHADER_FRAGMENT],
+ (void**)lp->samplers[PIPE_SHADER_FRAGMENT]);
+ util_blitter_save_fragment_sampler_views(lp->blitter,
+ lp->num_sampler_views[PIPE_SHADER_FRAGMENT],
+ lp->sampler_views[PIPE_SHADER_FRAGMENT]);
+ util_blitter_save_render_condition(lp->blitter, lp->render_cond_query,
+ lp->render_cond_cond, lp->render_cond_mode);
+ util_blitter_blit(lp->blitter, &info);
+}
+
+
+static void
+lp_flush_resource(struct pipe_context *ctx, struct pipe_resource *resource)
+{
+}
+
+
+static struct pipe_surface *
+llvmpipe_create_surface(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ const struct pipe_surface *surf_tmpl)
+{
+ struct pipe_surface *ps;
+
+ if (!(pt->bind & (PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET)))
+ debug_printf("Illegal surface creation without bind flag\n");
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (ps) {
+ pipe_reference_init(&ps->reference, 1);
+ pipe_resource_reference(&ps->texture, pt);
+ ps->context = pipe;
+ ps->format = surf_tmpl->format;
+ if (llvmpipe_resource_is_texture(pt)) {
+ assert(surf_tmpl->u.tex.level <= pt->last_level);
+ assert(surf_tmpl->u.tex.first_layer <= surf_tmpl->u.tex.last_layer);
+ ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level);
+ ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level);
+ ps->u.tex.level = surf_tmpl->u.tex.level;
+ ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+ ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+ }
+ else {
+ /* setting width as number of elements should get us correct renderbuffer width */
+ ps->width = surf_tmpl->u.buf.last_element - surf_tmpl->u.buf.first_element + 1;
+ ps->height = pt->height0;
+ ps->u.buf.first_element = surf_tmpl->u.buf.first_element;
+ ps->u.buf.last_element = surf_tmpl->u.buf.last_element;
+ assert(ps->u.buf.first_element <= ps->u.buf.last_element);
+ assert(util_format_get_blocksize(surf_tmpl->format) *
+ (ps->u.buf.last_element + 1) <= pt->width0);
+ }
+ }
+ return ps;
+}
+
+
+static void
+llvmpipe_surface_destroy(struct pipe_context *pipe,
+ struct pipe_surface *surf)
+{
+ /* Effectively do the texture_update work here - if texture images
+ * needed post-processing to put them into hardware layout, this is
+ * where it would happen. For llvmpipe, nothing to do.
+ */
+ assert(surf->texture);
+ pipe_resource_reference(&surf->texture, NULL);
+ FREE(surf);
+}
+
+
+static void
+llvmpipe_clear_render_target(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ const union pipe_color_union *color,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ if (!llvmpipe_check_render_cond(llvmpipe))
+ return;
+
+ util_clear_render_target(pipe, dst, color,
+ dstx, dsty, width, height);
+}
+
+
+static void
+llvmpipe_clear_depth_stencil(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ unsigned clear_flags,
+ double depth,
+ unsigned stencil,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ if (!llvmpipe_check_render_cond(llvmpipe))
+ return;
+
+ util_clear_depth_stencil(pipe, dst, clear_flags,
+ depth, stencil,
+ dstx, dsty, width, height);
+}
+
+
+void
+llvmpipe_init_surface_functions(struct llvmpipe_context *lp)
+{
+ lp->pipe.clear_render_target = llvmpipe_clear_render_target;
+ lp->pipe.clear_depth_stencil = llvmpipe_clear_depth_stencil;
+ lp->pipe.create_surface = llvmpipe_create_surface;
+ lp->pipe.surface_destroy = llvmpipe_surface_destroy;
+ /* These two are not actually functions dealing with surfaces */
+ lp->pipe.resource_copy_region = lp_resource_copy;
+ lp->pipe.blit = lp_blit;
+ lp->pipe.flush_resource = lp_flush_resource;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.h
new file mode 100644
index 000000000..b50dc21f4
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.h
@@ -0,0 +1,42 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keithw@vmware.com>
+ */
+
+#ifndef LP_SURFACE_H
+#define LP_SURFACE_H
+
+
+struct llvmpipe_context;
+
+
+extern void
+llvmpipe_init_surface_functions(struct llvmpipe_context *lp);
+
+
+#endif /* LP_SURFACE_H */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test.h
new file mode 100644
index 000000000..e1b51c9c9
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test.h
@@ -0,0 +1,140 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Shared testing code.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef LP_TEST_H
+#define LP_TEST_H
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <float.h>
+
+#include "gallivm/lp_bld.h"
+
+#include "pipe/p_state.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_dump.h"
+
+#include "gallivm/lp_bld_type.h"
+
+
+#define LP_TEST_NUM_SAMPLES 32
+
+
+void
+write_tsv_header(FILE *fp);
+
+
+boolean
+test_some(unsigned verbose, FILE *fp,
+ unsigned long n);
+
+boolean
+test_single(unsigned verbose, FILE *fp);
+
+boolean
+test_all(unsigned verbose, FILE *fp);
+
+
+#if defined(PIPE_CC_MSVC)
+
+unsigned __int64 __rdtsc();
+#pragma intrinsic(__rdtsc)
+#define rdtsc() __rdtsc()
+
+#elif defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
+
+static inline uint64_t
+rdtsc(void)
+{
+ uint32_t hi, lo;
+ __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
+ return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
+#else
+
+#define rdtsc() 0
+
+#endif
+
+
+
+float
+random_float(void);
+
+
+void
+dump_type(FILE *fp, struct lp_type type);
+
+
+double
+read_elem(struct lp_type type, const void *src, unsigned index);
+
+
+void
+write_elem(struct lp_type type, void *dst, unsigned index, double src);
+
+
+void
+random_elem(struct lp_type type, void *dst, unsigned index);
+
+
+void
+read_vec(struct lp_type type, const void *src, double *dst);
+
+
+void
+write_vec(struct lp_type type, void *dst, const double *src);
+
+
+void
+random_vec(struct lp_type type, void *dst);
+
+
+boolean
+compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps);
+
+
+boolean
+compare_vec(struct lp_type type, const void *res, const void *ref);
+
+
+void
+dump_vec(FILE *fp, struct lp_type type, const void *src);
+
+
+#endif /* !LP_TEST_H */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_arit.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_arit.c
new file mode 100644
index 000000000..290c523f0
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_arit.c
@@ -0,0 +1,484 @@
+/**************************************************************************
+ *
+ * Copyright 2011 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "util/u_pointer.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_cpu_detect.h"
+
+#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_arit.h"
+
+#include "lp_test.h"
+
+
+void
+write_tsv_header(FILE *fp)
+{
+ fprintf(fp,
+ "result\t"
+ "format\n");
+
+ fflush(fp);
+}
+
+
+typedef void (*unary_func_t)(float *out, const float *in);
+
+
+/**
+ * Describe a test case of one unary function.
+ */
+struct unary_test_t
+{
+ /*
+ * Test name -- name of the mathematical function under test.
+ */
+
+ const char *name;
+
+ LLVMValueRef
+ (*builder)(struct lp_build_context *bld, LLVMValueRef a);
+
+ /*
+ * Reference (pure-C) function.
+ */
+ float
+ (*ref)(float a);
+
+ /*
+ * Test values.
+ */
+ const float *values;
+ unsigned num_values;
+
+ /*
+ * Required precision in bits.
+ */
+ double precision;
+};
+
+
+static float negf(float x)
+{
+ return -x;
+}
+
+
+static float sgnf(float x)
+{
+ if (x > 0.0f) {
+ return 1.0f;
+ }
+ if (x < 0.0f) {
+ return -1.0f;
+ }
+ return 0.0f;
+}
+
+
+const float exp2_values[] = {
+ -INFINITY,
+ -60,
+ -4,
+ -2,
+ -1,
+ -1e-007,
+ 0,
+ 1e-007,
+ 0.01,
+ 0.1,
+ 0.9,
+ 0.99,
+ 1,
+ 2,
+ 4,
+ 60,
+ INFINITY,
+ NAN
+};
+
+
+const float log2_values[] = {
+#if 0
+ /*
+ * Smallest denormalized number; meant just for experimentation, but not
+ * validation.
+ */
+ 1.4012984643248171e-45,
+#endif
+ -INFINITY,
+ 0,
+ 1e-007,
+ 0.1,
+ 0.5,
+ 0.99,
+ 1,
+ 1.01,
+ 1.1,
+ 1.9,
+ 1.99,
+ 2,
+ 4,
+ 100000,
+ 1e+018,
+ INFINITY,
+ NAN
+};
+
+
+static float rcpf(float x)
+{
+ return 1.0/x;
+}
+
+
+const float rcp_values[] = {
+ -0.0, 0.0,
+ -1.0, 1.0,
+ -1e-007, 1e-007,
+ -4.0, 4.0,
+ -1e+035, -100000,
+ 100000, 1e+035,
+ 5.88e-39f, // denormal
+#if (__STDC_VERSION__ >= 199901L)
+ INFINITY, -INFINITY,
+#endif
+};
+
+
+static float rsqrtf(float x)
+{
+ return 1.0/(float)sqrt(x);
+}
+
+
+const float rsqrt_values[] = {
+ // http://msdn.microsoft.com/en-us/library/windows/desktop/bb147346.aspx
+ 0.0, // must yield infinity
+ 1.0, // must yield 1.0
+ 1e-007, 4.0,
+ 100000, 1e+035,
+ 5.88e-39f, // denormal
+#if (__STDC_VERSION__ >= 199901L)
+ INFINITY,
+#endif
+};
+
+
+const float sincos_values[] = {
+ -INFINITY,
+ -5*M_PI/4,
+ -4*M_PI/4,
+ -4*M_PI/4,
+ -3*M_PI/4,
+ -2*M_PI/4,
+ -1*M_PI/4,
+ 1*M_PI/4,
+ 2*M_PI/4,
+ 3*M_PI/4,
+ 4*M_PI/4,
+ 5*M_PI/4,
+ INFINITY,
+ NAN
+};
+
+const float round_values[] = {
+ -10.0, -1, 0.0, 12.0,
+ -1.49, -0.25, 1.25, 2.51,
+ -0.99, -0.01, 0.01, 0.99,
+ 1.401298464324817e-45f, // smallest denormal
+ -1.401298464324817e-45f,
+ 1.62981451e-08f,
+ -1.62981451e-08f,
+ 1.62981451e15f, // large number not representable as 32bit int
+ -1.62981451e15f,
+ FLT_EPSILON,
+ -FLT_EPSILON,
+ 1.0f - 0.5f*FLT_EPSILON,
+ -1.0f + FLT_EPSILON,
+ FLT_MAX,
+ -FLT_MAX
+};
+
+static float fractf(float x)
+{
+ x -= floorf(x);
+ if (x >= 1.0f) {
+ // clamp to the largest number smaller than one
+ x = 1.0f - 0.5f*FLT_EPSILON;
+ }
+ return x;
+}
+
+
+const float fract_values[] = {
+ // http://en.wikipedia.org/wiki/IEEE_754-1985#Examples
+ 0.0f,
+ -0.0f,
+ 1.0f,
+ -1.0f,
+ 0.5f,
+ -0.5f,
+ 1.401298464324817e-45f, // smallest denormal
+ -1.401298464324817e-45f,
+ 5.88e-39f, // middle denormal
+ 1.18e-38f, // largest denormal
+ -1.18e-38f,
+ -1.62981451e-08f,
+ FLT_EPSILON,
+ -FLT_EPSILON,
+ 1.0f - 0.5f*FLT_EPSILON,
+ -1.0f + FLT_EPSILON,
+ FLT_MAX,
+ -FLT_MAX
+};
+
+
+/*
+ * Unary test cases.
+ */
+
+static const struct unary_test_t
+unary_tests[] = {
+ {"neg", &lp_build_negate, &negf, exp2_values, Elements(exp2_values), 20.0 },
+ {"exp2", &lp_build_exp2, &exp2f, exp2_values, Elements(exp2_values), 20.0 },
+ {"log2", &lp_build_log2_safe, &log2f, log2_values, Elements(log2_values), 20.0 },
+ {"exp", &lp_build_exp, &expf, exp2_values, Elements(exp2_values), 18.0 },
+ {"log", &lp_build_log_safe, &logf, log2_values, Elements(log2_values), 20.0 },
+ {"rcp", &lp_build_rcp, &rcpf, rcp_values, Elements(rcp_values), 20.0 },
+ {"rsqrt", &lp_build_rsqrt, &rsqrtf, rsqrt_values, Elements(rsqrt_values), 20.0 },
+ {"sin", &lp_build_sin, &sinf, sincos_values, Elements(sincos_values), 20.0 },
+ {"cos", &lp_build_cos, &cosf, sincos_values, Elements(sincos_values), 20.0 },
+ {"sgn", &lp_build_sgn, &sgnf, exp2_values, Elements(exp2_values), 20.0 },
+ {"round", &lp_build_round, &roundf, round_values, Elements(round_values), 24.0 },
+ {"trunc", &lp_build_trunc, &truncf, round_values, Elements(round_values), 24.0 },
+ {"floor", &lp_build_floor, &floorf, round_values, Elements(round_values), 24.0 },
+ {"ceil", &lp_build_ceil, &ceilf, round_values, Elements(round_values), 24.0 },
+ {"fract", &lp_build_fract_safe, &fractf, fract_values, Elements(fract_values), 24.0 },
+};
+
+
+/*
+ * Build LLVM function that exercises the unary operator builder.
+ */
+static LLVMValueRef
+build_unary_test_func(struct gallivm_state *gallivm,
+ const struct unary_test_t *test)
+{
+ struct lp_type type = lp_type_float_vec(32, lp_native_vector_width);
+ LLVMContextRef context = gallivm->context;
+ LLVMModuleRef module = gallivm->module;
+ LLVMTypeRef vf32t = lp_build_vec_type(gallivm, type);
+ LLVMTypeRef args[2] = { LLVMPointerType(vf32t, 0), LLVMPointerType(vf32t, 0) };
+ LLVMValueRef func = LLVMAddFunction(module, test->name,
+ LLVMFunctionType(LLVMVoidTypeInContext(context),
+ args, Elements(args), 0));
+ LLVMValueRef arg0 = LLVMGetParam(func, 0);
+ LLVMValueRef arg1 = LLVMGetParam(func, 1);
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(context, func, "entry");
+ LLVMValueRef ret;
+
+ struct lp_build_context bld;
+
+ lp_build_context_init(&bld, gallivm, type);
+
+ LLVMSetFunctionCallConv(func, LLVMCCallConv);
+
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ arg1 = LLVMBuildLoad(builder, arg1, "");
+
+ ret = test->builder(&bld, arg1);
+
+ LLVMBuildStore(builder, ret, arg0);
+
+ LLVMBuildRetVoid(builder);
+
+ gallivm_verify_function(gallivm, func);
+
+ return func;
+}
+
+
+/*
+ * Flush denorms to zero.
+ */
+static float
+flush_denorm_to_zero(float val)
+{
+ /*
+ * If we have a denorm manually set it to (+-)0.
+ * This is because the reference may or may not do the right thing
+ * otherwise because we want the result according to treating all
+ * denormals as zero (FTZ/DAZ). Not using fpclassify because
+ * a) some compilers are stuck at c89 (msvc)
+ * b) not sure it reliably works with non-standard ftz/daz mode
+ * And, right now we only disable denorms with jited code on x86/sse
+ * (albeit this should be classified as a bug) so to get results which
+ * match we must only flush them to zero here in that case too.
+ */
+ union fi fi_val;
+
+ fi_val.f = val;
+
+#if defined(PIPE_ARCH_SSE)
+ if (util_cpu_caps.has_sse) {
+ if ((fi_val.ui & 0x7f800000) == 0) {
+ fi_val.ui &= 0xff800000;
+ }
+ }
+#endif
+
+ return fi_val.f;
+}
+
+/*
+ * Test one LLVM unary arithmetic builder function.
+ */
+static boolean
+test_unary(unsigned verbose, FILE *fp, const struct unary_test_t *test)
+{
+ struct gallivm_state *gallivm;
+ LLVMValueRef test_func;
+ unary_func_t test_func_jit;
+ boolean success = TRUE;
+ int i, j;
+ int length = lp_native_vector_width / 32;
+ float *in, *out;
+
+ in = align_malloc(length * 4, length * 4);
+ out = align_malloc(length * 4, length * 4);
+
+ /* random NaNs or 0s could wreak havoc */
+ for (i = 0; i < length; i++) {
+ in[i] = 1.0;
+ }
+
+ gallivm = gallivm_create("test_module", LLVMGetGlobalContext());
+
+ test_func = build_unary_test_func(gallivm, test);
+
+ gallivm_compile_module(gallivm);
+
+ test_func_jit = (unary_func_t) gallivm_jit_function(gallivm, test_func);
+
+ gallivm_free_ir(gallivm);
+
+ for (j = 0; j < (test->num_values + length - 1) / length; j++) {
+ int num_vals = ((j + 1) * length <= test->num_values) ? length :
+ test->num_values % length;
+
+ for (i = 0; i < num_vals; ++i) {
+ in[i] = test->values[i+j*length];
+ }
+
+ test_func_jit(out, in);
+ for (i = 0; i < num_vals; ++i) {
+ float testval, ref;
+ double error, precision;
+ bool pass;
+
+ testval = flush_denorm_to_zero(in[i]);
+ ref = flush_denorm_to_zero(test->ref(testval));
+
+ if (util_inf_sign(ref) && util_inf_sign(out[i]) == util_inf_sign(ref)) {
+ error = 0;
+ } else {
+ error = fabs(out[i] - ref);
+ }
+ precision = error ? -log2(error/fabs(ref)) : FLT_MANT_DIG;
+
+ pass = precision >= test->precision;
+
+ if (isnan(ref)) {
+ continue;
+ }
+
+ if (!pass || verbose) {
+ printf("%s(%.9g): ref = %.9g, out = %.9g, precision = %f bits, %s\n",
+ test->name, in[i], ref, out[i], precision,
+ pass ? "PASS" : "FAIL");
+ fflush(stdout);
+ }
+
+ if (!pass) {
+ success = FALSE;
+ }
+ }
+ }
+
+ gallivm_destroy(gallivm);
+
+ align_free(in);
+ align_free(out);
+
+ return success;
+}
+
+
+boolean
+test_all(unsigned verbose, FILE *fp)
+{
+ boolean success = TRUE;
+ int i;
+
+ for (i = 0; i < Elements(unary_tests); ++i) {
+ if (!test_unary(verbose, fp, &unary_tests[i])) {
+ success = FALSE;
+ }
+ }
+
+ return success;
+}
+
+
+boolean
+test_some(unsigned verbose, FILE *fp,
+ unsigned long n)
+{
+ /*
+ * Not randomly generated test cases, so test all.
+ */
+
+ return test_all(verbose, fp);
+}
+
+
+boolean
+test_single(unsigned verbose, FILE *fp)
+{
+ return TRUE;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_blend.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_blend.c
new file mode 100644
index 000000000..37420b024
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -0,0 +1,737 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Unit tests for blend LLVM IR generation
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ *
+ * Blend computation code derived from code written by
+ * @author Brian Paul <brian@vmware.com>
+ */
+
+#include "util/u_memory.h"
+
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_debug.h"
+#include "lp_bld_blend.h"
+#include "lp_test.h"
+
+
+typedef void (*blend_test_ptr_t)(const void *src, const void *src1,
+ const void *dst, const void *con, void *res);
+
+
+void
+write_tsv_header(FILE *fp)
+{
+ fprintf(fp,
+ "result\t"
+ "cycles_per_channel\t"
+ "type\t"
+ "sep_func\t"
+ "sep_src_factor\t"
+ "sep_dst_factor\t"
+ "rgb_func\t"
+ "rgb_src_factor\t"
+ "rgb_dst_factor\t"
+ "alpha_func\t"
+ "alpha_src_factor\t"
+ "alpha_dst_factor\n");
+
+ fflush(fp);
+}
+
+
+static void
+write_tsv_row(FILE *fp,
+ const struct pipe_blend_state *blend,
+ struct lp_type type,
+ double cycles,
+ boolean success)
+{
+ fprintf(fp, "%s\t", success ? "pass" : "fail");
+
+ fprintf(fp, "%.1f\t", cycles / type.length);
+
+ fprintf(fp, "%s%u%sx%u\t",
+ type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
+ type.width,
+ type.norm ? "n" : "",
+ type.length);
+
+ fprintf(fp,
+ "%s\t%s\t%s\t",
+ blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false",
+ blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false",
+ blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false");
+
+ fprintf(fp,
+ "%s\t%s\t%s\t%s\t%s\t%s\n",
+ util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
+ util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
+ util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
+ util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
+ util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
+ util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
+
+ fflush(fp);
+}
+
+
+static void
+dump_blend_type(FILE *fp,
+ const struct pipe_blend_state *blend,
+ struct lp_type type)
+{
+ fprintf(fp, " type=%s%u%sx%u",
+ type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
+ type.width,
+ type.norm ? "n" : "",
+ type.length);
+
+ fprintf(fp,
+ " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
+ "rgb_func", util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
+ "rgb_src_factor", util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
+ "rgb_dst_factor", util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
+ "alpha_func", util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
+ "alpha_src_factor", util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
+ "alpha_dst_factor", util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
+
+ fprintf(fp, " ...\n");
+ fflush(fp);
+}
+
+
+static LLVMValueRef
+add_blend_test(struct gallivm_state *gallivm,
+ const struct pipe_blend_state *blend,
+ struct lp_type type)
+{
+ LLVMModuleRef module = gallivm->module;
+ LLVMContextRef context = gallivm->context;
+ LLVMTypeRef vec_type;
+ LLVMTypeRef args[5];
+ LLVMValueRef func;
+ LLVMValueRef src_ptr;
+ LLVMValueRef src1_ptr;
+ LLVMValueRef dst_ptr;
+ LLVMValueRef const_ptr;
+ LLVMValueRef res_ptr;
+ LLVMBasicBlockRef block;
+ LLVMBuilderRef builder;
+ const enum pipe_format format = PIPE_FORMAT_R8G8B8A8_UNORM;
+ const unsigned rt = 0;
+ const unsigned char swizzle[4] = { 0, 1, 2, 3 };
+ LLVMValueRef src;
+ LLVMValueRef src1;
+ LLVMValueRef dst;
+ LLVMValueRef con;
+ LLVMValueRef res;
+
+ vec_type = lp_build_vec_type(gallivm, type);
+
+ args[4] = args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
+ func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidTypeInContext(context), args, 5, 0));
+ LLVMSetFunctionCallConv(func, LLVMCCallConv);
+ src_ptr = LLVMGetParam(func, 0);
+ src1_ptr = LLVMGetParam(func, 1);
+ dst_ptr = LLVMGetParam(func, 2);
+ const_ptr = LLVMGetParam(func, 3);
+ res_ptr = LLVMGetParam(func, 4);
+
+ block = LLVMAppendBasicBlockInContext(context, func, "entry");
+ builder = gallivm->builder;
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ src = LLVMBuildLoad(builder, src_ptr, "src");
+ src1 = LLVMBuildLoad(builder, src1_ptr, "src1");
+ dst = LLVMBuildLoad(builder, dst_ptr, "dst");
+ con = LLVMBuildLoad(builder, const_ptr, "const");
+
+ res = lp_build_blend_aos(gallivm, blend, format, type, rt, src, NULL,
+ src1, NULL, dst, NULL, con, NULL, swizzle, 4);
+
+ lp_build_name(res, "res");
+
+ LLVMBuildStore(builder, res, res_ptr);
+
+ LLVMBuildRetVoid(builder);;
+
+ gallivm_verify_function(gallivm, func);
+
+ return func;
+}
+
+
+static void
+compute_blend_ref_term(unsigned rgb_factor,
+ unsigned alpha_factor,
+ const double *factor,
+ const double *src,
+ const double *src1,
+ const double *dst,
+ const double *con,
+ double *term)
+{
+ double temp;
+
+ switch (rgb_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ term[0] = factor[0]; /* R */
+ term[1] = factor[1]; /* G */
+ term[2] = factor[2]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ term[0] = factor[0] * src[0]; /* R */
+ term[1] = factor[1] * src[1]; /* G */
+ term[2] = factor[2] * src[2]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ term[0] = factor[0] * src[3]; /* R */
+ term[1] = factor[1] * src[3]; /* G */
+ term[2] = factor[2] * src[3]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ term[0] = factor[0] * dst[0]; /* R */
+ term[1] = factor[1] * dst[1]; /* G */
+ term[2] = factor[2] * dst[2]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term[0] = factor[0] * dst[3]; /* R */
+ term[1] = factor[1] * dst[3]; /* G */
+ term[2] = factor[2] * dst[3]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ temp = MIN2(src[3], 1.0f - dst[3]);
+ term[0] = factor[0] * temp; /* R */
+ term[1] = factor[1] * temp; /* G */
+ term[2] = factor[2] * temp; /* B */
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ term[0] = factor[0] * con[0]; /* R */
+ term[1] = factor[1] * con[1]; /* G */
+ term[2] = factor[2] * con[2]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term[0] = factor[0] * con[3]; /* R */
+ term[1] = factor[1] * con[3]; /* G */
+ term[2] = factor[2] * con[3]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ term[0] = factor[0] * src1[0]; /* R */
+ term[1] = factor[1] * src1[1]; /* G */
+ term[2] = factor[2] * src1[2]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ term[0] = factor[0] * src1[3]; /* R */
+ term[1] = factor[1] * src1[3]; /* G */
+ term[2] = factor[2] * src1[3]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ term[0] = 0.0f; /* R */
+ term[1] = 0.0f; /* G */
+ term[2] = 0.0f; /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ term[0] = factor[0] * (1.0f - src[0]); /* R */
+ term[1] = factor[1] * (1.0f - src[1]); /* G */
+ term[2] = factor[2] * (1.0f - src[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ term[0] = factor[0] * (1.0f - src[3]); /* R */
+ term[1] = factor[1] * (1.0f - src[3]); /* G */
+ term[2] = factor[2] * (1.0f - src[3]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ term[0] = factor[0] * (1.0f - dst[3]); /* R */
+ term[1] = factor[1] * (1.0f - dst[3]); /* G */
+ term[2] = factor[2] * (1.0f - dst[3]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ term[0] = factor[0] * (1.0f - dst[0]); /* R */
+ term[1] = factor[1] * (1.0f - dst[1]); /* G */
+ term[2] = factor[2] * (1.0f - dst[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ term[0] = factor[0] * (1.0f - con[0]); /* R */
+ term[1] = factor[1] * (1.0f - con[1]); /* G */
+ term[2] = factor[2] * (1.0f - con[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ term[0] = factor[0] * (1.0f - con[3]); /* R */
+ term[1] = factor[1] * (1.0f - con[3]); /* G */
+ term[2] = factor[2] * (1.0f - con[3]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ term[0] = factor[0] * (1.0f - src1[0]); /* R */
+ term[1] = factor[1] * (1.0f - src1[1]); /* G */
+ term[2] = factor[2] * (1.0f - src1[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ term[0] = factor[0] * (1.0f - src1[3]); /* R */
+ term[1] = factor[1] * (1.0f - src1[3]); /* G */
+ term[2] = factor[2] * (1.0f - src1[3]); /* B */
+ break;
+ default:
+ assert(0);
+ }
+
+ /*
+ * Compute src/first term A
+ */
+ switch (alpha_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ term[3] = factor[3]; /* A */
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ term[3] = factor[3] * src[3]; /* A */
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term[3] = factor[3] * dst[3]; /* A */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ term[3] = src[3]; /* A */
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term[3] = factor[3] * con[3]; /* A */
+ break;
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ term[3] = factor[3] * src1[3]; /* A */
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ term[3] = 0.0f; /* A */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ term[3] = factor[3] * (1.0f - src[3]); /* A */
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ term[3] = factor[3] * (1.0f - dst[3]); /* A */
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ term[3] = factor[3] * (1.0f - con[3]);
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ term[3] = factor[3] * (1.0f - src1[3]); /* A */
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+static void
+compute_blend_ref(const struct pipe_blend_state *blend,
+ const double *src,
+ const double *src1,
+ const double *dst,
+ const double *con,
+ double *res)
+{
+ double src_term[4];
+ double dst_term[4];
+
+ compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor,
+ src, src, src1, dst, con, src_term);
+ compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor,
+ dst, src, src1, dst, con, dst_term);
+
+ /*
+ * Combine RGB terms
+ */
+ switch (blend->rt[0].rgb_func) {
+ case PIPE_BLEND_ADD:
+ res[0] = src_term[0] + dst_term[0]; /* R */
+ res[1] = src_term[1] + dst_term[1]; /* G */
+ res[2] = src_term[2] + dst_term[2]; /* B */
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ res[0] = src_term[0] - dst_term[0]; /* R */
+ res[1] = src_term[1] - dst_term[1]; /* G */
+ res[2] = src_term[2] - dst_term[2]; /* B */
+ break;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ res[0] = dst_term[0] - src_term[0]; /* R */
+ res[1] = dst_term[1] - src_term[1]; /* G */
+ res[2] = dst_term[2] - src_term[2]; /* B */
+ break;
+ case PIPE_BLEND_MIN:
+ res[0] = MIN2(src_term[0], dst_term[0]); /* R */
+ res[1] = MIN2(src_term[1], dst_term[1]); /* G */
+ res[2] = MIN2(src_term[2], dst_term[2]); /* B */
+ break;
+ case PIPE_BLEND_MAX:
+ res[0] = MAX2(src_term[0], dst_term[0]); /* R */
+ res[1] = MAX2(src_term[1], dst_term[1]); /* G */
+ res[2] = MAX2(src_term[2], dst_term[2]); /* B */
+ break;
+ default:
+ assert(0);
+ }
+
+ /*
+ * Combine A terms
+ */
+ switch (blend->rt[0].alpha_func) {
+ case PIPE_BLEND_ADD:
+ res[3] = src_term[3] + dst_term[3]; /* A */
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ res[3] = src_term[3] - dst_term[3]; /* A */
+ break;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ res[3] = dst_term[3] - src_term[3]; /* A */
+ break;
+ case PIPE_BLEND_MIN:
+ res[3] = MIN2(src_term[3], dst_term[3]); /* A */
+ break;
+ case PIPE_BLEND_MAX:
+ res[3] = MAX2(src_term[3], dst_term[3]); /* A */
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+PIPE_ALIGN_STACK
+static boolean
+test_one(unsigned verbose,
+ FILE *fp,
+ const struct pipe_blend_state *blend,
+ struct lp_type type)
+{
+ struct gallivm_state *gallivm;
+ LLVMValueRef func = NULL;
+ blend_test_ptr_t blend_test_ptr;
+ boolean success;
+ const unsigned n = LP_TEST_NUM_SAMPLES;
+ int64_t cycles[LP_TEST_NUM_SAMPLES];
+ double cycles_avg = 0.0;
+ unsigned i, j;
+ const unsigned stride = lp_type_width(type)/8;
+
+ if(verbose >= 1)
+ dump_blend_type(stdout, blend, type);
+
+ gallivm = gallivm_create("test_module", LLVMGetGlobalContext());
+
+ func = add_blend_test(gallivm, blend, type);
+
+ gallivm_compile_module(gallivm);
+
+ blend_test_ptr = (blend_test_ptr_t)gallivm_jit_function(gallivm, func);
+
+ gallivm_free_ir(gallivm);
+
+ success = TRUE;
+
+ {
+ uint8_t *src, *src1, *dst, *con, *res, *ref;
+ src = align_malloc(stride, stride);
+ src1 = align_malloc(stride, stride);
+ dst = align_malloc(stride, stride);
+ con = align_malloc(stride, stride);
+ res = align_malloc(stride, stride);
+ ref = align_malloc(stride, stride);
+
+ for(i = 0; i < n && success; ++i) {
+ int64_t start_counter = 0;
+ int64_t end_counter = 0;
+
+ random_vec(type, src);
+ random_vec(type, src1);
+ random_vec(type, dst);
+ random_vec(type, con);
+
+ {
+ double fsrc[LP_MAX_VECTOR_LENGTH];
+ double fsrc1[LP_MAX_VECTOR_LENGTH];
+ double fdst[LP_MAX_VECTOR_LENGTH];
+ double fcon[LP_MAX_VECTOR_LENGTH];
+ double fref[LP_MAX_VECTOR_LENGTH];
+
+ read_vec(type, src, fsrc);
+ read_vec(type, src1, fsrc1);
+ read_vec(type, dst, fdst);
+ read_vec(type, con, fcon);
+
+ for(j = 0; j < type.length; j += 4)
+ compute_blend_ref(blend, fsrc + j, fsrc1 + j, fdst + j, fcon + j, fref + j);
+
+ write_vec(type, ref, fref);
+ }
+
+ start_counter = rdtsc();
+ blend_test_ptr(src, src1, dst, con, res);
+ end_counter = rdtsc();
+
+ cycles[i] = end_counter - start_counter;
+
+ if(!compare_vec(type, res, ref)) {
+ success = FALSE;
+
+ if(verbose < 1)
+ dump_blend_type(stderr, blend, type);
+ fprintf(stderr, "MISMATCH\n");
+
+ fprintf(stderr, " Src: ");
+ dump_vec(stderr, type, src);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Src1: ");
+ dump_vec(stderr, type, src1);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Dst: ");
+ dump_vec(stderr, type, dst);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Con: ");
+ dump_vec(stderr, type, con);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Res: ");
+ dump_vec(stderr, type, res);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Ref: ");
+ dump_vec(stderr, type, ref);
+ fprintf(stderr, "\n");
+ }
+ }
+ align_free(src);
+ align_free(src1);
+ align_free(dst);
+ align_free(con);
+ align_free(res);
+ align_free(ref);
+ }
+
+ /*
+ * Unfortunately the output of cycle counter is not very reliable as it comes
+ * -- sometimes we get outliers (due IRQs perhaps?) which are
+ * better removed to avoid random or biased data.
+ */
+ {
+ double sum = 0.0, sum2 = 0.0;
+ double avg, std;
+ unsigned m;
+
+ for(i = 0; i < n; ++i) {
+ sum += cycles[i];
+ sum2 += cycles[i]*cycles[i];
+ }
+
+ avg = sum/n;
+ std = sqrtf((sum2 - n*avg*avg)/n);
+
+ m = 0;
+ sum = 0.0;
+ for(i = 0; i < n; ++i) {
+ if(fabs(cycles[i] - avg) <= 4.0*std) {
+ sum += cycles[i];
+ ++m;
+ }
+ }
+
+ cycles_avg = sum/m;
+
+ }
+
+ if(fp)
+ write_tsv_row(fp, blend, type, cycles_avg, success);
+
+ gallivm_destroy(gallivm);
+
+ return success;
+}
+
+
+const unsigned
+blend_factors[] = {
+ PIPE_BLENDFACTOR_ZERO,
+ PIPE_BLENDFACTOR_ONE,
+ PIPE_BLENDFACTOR_SRC_COLOR,
+ PIPE_BLENDFACTOR_SRC_ALPHA,
+ PIPE_BLENDFACTOR_DST_COLOR,
+ PIPE_BLENDFACTOR_DST_ALPHA,
+ PIPE_BLENDFACTOR_CONST_COLOR,
+ PIPE_BLENDFACTOR_CONST_ALPHA,
+ PIPE_BLENDFACTOR_SRC1_COLOR,
+ PIPE_BLENDFACTOR_SRC1_ALPHA,
+ PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE,
+ PIPE_BLENDFACTOR_INV_SRC_COLOR,
+ PIPE_BLENDFACTOR_INV_SRC_ALPHA,
+ PIPE_BLENDFACTOR_INV_DST_COLOR,
+ PIPE_BLENDFACTOR_INV_DST_ALPHA,
+ PIPE_BLENDFACTOR_INV_CONST_COLOR,
+ PIPE_BLENDFACTOR_INV_CONST_ALPHA,
+ PIPE_BLENDFACTOR_INV_SRC1_COLOR,
+ PIPE_BLENDFACTOR_INV_SRC1_ALPHA,
+};
+
+
+const unsigned
+blend_funcs[] = {
+ PIPE_BLEND_ADD,
+ PIPE_BLEND_SUBTRACT,
+ PIPE_BLEND_REVERSE_SUBTRACT,
+ PIPE_BLEND_MIN,
+ PIPE_BLEND_MAX
+};
+
+
+const struct lp_type blend_types[] = {
+ /* float, fixed, sign, norm, width, len */
+ { TRUE, FALSE, TRUE, FALSE, 32, 4 }, /* f32 x 4 */
+ { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */
+};
+
+
+const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]);
+const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
+const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
+
+
+boolean
+test_all(unsigned verbose, FILE *fp)
+{
+ const unsigned *rgb_func;
+ const unsigned *rgb_src_factor;
+ const unsigned *rgb_dst_factor;
+ const unsigned *alpha_func;
+ const unsigned *alpha_src_factor;
+ const unsigned *alpha_dst_factor;
+ struct pipe_blend_state blend;
+ const struct lp_type *type;
+ boolean success = TRUE;
+
+ for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
+ for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) {
+ for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) {
+ for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
+ for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
+ for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
+ for(type = blend_types; type < &blend_types[num_types]; ++type) {
+
+ if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
+ continue;
+
+ memset(&blend, 0, sizeof blend);
+ blend.rt[0].blend_enable = 1;
+ blend.rt[0].rgb_func = *rgb_func;
+ blend.rt[0].rgb_src_factor = *rgb_src_factor;
+ blend.rt[0].rgb_dst_factor = *rgb_dst_factor;
+ blend.rt[0].alpha_func = *alpha_func;
+ blend.rt[0].alpha_src_factor = *alpha_src_factor;
+ blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
+ blend.rt[0].colormask = PIPE_MASK_RGBA;
+
+ if(!test_one(verbose, fp, &blend, *type))
+ success = FALSE;
+
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return success;
+}
+
+
+boolean
+test_some(unsigned verbose, FILE *fp,
+ unsigned long n)
+{
+ const unsigned *rgb_func;
+ const unsigned *rgb_src_factor;
+ const unsigned *rgb_dst_factor;
+ const unsigned *alpha_func;
+ const unsigned *alpha_src_factor;
+ const unsigned *alpha_dst_factor;
+ struct pipe_blend_state blend;
+ const struct lp_type *type;
+ unsigned long i;
+ boolean success = TRUE;
+
+ for(i = 0; i < n; ++i) {
+ rgb_func = &blend_funcs[rand() % num_funcs];
+ alpha_func = &blend_funcs[rand() % num_funcs];
+ rgb_src_factor = &blend_factors[rand() % num_factors];
+ alpha_src_factor = &blend_factors[rand() % num_factors];
+
+ do {
+ rgb_dst_factor = &blend_factors[rand() % num_factors];
+ } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
+
+ do {
+ alpha_dst_factor = &blend_factors[rand() % num_factors];
+ } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
+
+ type = &blend_types[rand() % num_types];
+
+ memset(&blend, 0, sizeof blend);
+ blend.rt[0].blend_enable = 1;
+ blend.rt[0].rgb_func = *rgb_func;
+ blend.rt[0].rgb_src_factor = *rgb_src_factor;
+ blend.rt[0].rgb_dst_factor = *rgb_dst_factor;
+ blend.rt[0].alpha_func = *alpha_func;
+ blend.rt[0].alpha_src_factor = *alpha_src_factor;
+ blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
+ blend.rt[0].colormask = PIPE_MASK_RGBA;
+
+ if(!test_one(verbose, fp, &blend, *type))
+ success = FALSE;
+ }
+
+ return success;
+}
+
+
+boolean
+test_single(unsigned verbose, FILE *fp)
+{
+ printf("no test_single()");
+ return TRUE;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_conv.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_conv.c
new file mode 100644
index 000000000..8290da400
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -0,0 +1,453 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Unit tests for type conversion.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_pointer.h"
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_conv.h"
+#include "gallivm/lp_bld_debug.h"
+#include "lp_test.h"
+
+
+typedef void (*conv_test_ptr_t)(const void *src, const void *dst);
+
+
+void
+write_tsv_header(FILE *fp)
+{
+ fprintf(fp,
+ "result\t"
+ "cycles_per_channel\t"
+ "src_type\t"
+ "dst_type\n");
+
+ fflush(fp);
+}
+
+
+static void
+write_tsv_row(FILE *fp,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ double cycles,
+ boolean success)
+{
+ fprintf(fp, "%s\t", success ? "pass" : "fail");
+
+ fprintf(fp, "%.1f\t", cycles / MAX2(src_type.length, dst_type.length));
+
+ dump_type(fp, src_type);
+ fprintf(fp, "\t");
+
+ dump_type(fp, dst_type);
+ fprintf(fp, "\n");
+
+ fflush(fp);
+}
+
+
+static void
+dump_conv_types(FILE *fp,
+ struct lp_type src_type,
+ struct lp_type dst_type)
+{
+ fprintf(fp, "src_type=");
+ dump_type(fp, src_type);
+
+ fprintf(fp, " dst_type=");
+ dump_type(fp, dst_type);
+
+ fprintf(fp, " ...\n");
+ fflush(fp);
+}
+
+
+static LLVMValueRef
+add_conv_test(struct gallivm_state *gallivm,
+ struct lp_type src_type, unsigned num_srcs,
+ struct lp_type dst_type, unsigned num_dsts)
+{
+ LLVMModuleRef module = gallivm->module;
+ LLVMContextRef context = gallivm->context;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMTypeRef args[2];
+ LLVMValueRef func;
+ LLVMValueRef src_ptr;
+ LLVMValueRef dst_ptr;
+ LLVMBasicBlockRef block;
+ LLVMValueRef src[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef dst[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+
+ args[0] = LLVMPointerType(lp_build_vec_type(gallivm, src_type), 0);
+ args[1] = LLVMPointerType(lp_build_vec_type(gallivm, dst_type), 0);
+
+ func = LLVMAddFunction(module, "test",
+ LLVMFunctionType(LLVMVoidTypeInContext(context),
+ args, 2, 0));
+ LLVMSetFunctionCallConv(func, LLVMCCallConv);
+ src_ptr = LLVMGetParam(func, 0);
+ dst_ptr = LLVMGetParam(func, 1);
+
+ block = LLVMAppendBasicBlockInContext(context, func, "entry");
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ for(i = 0; i < num_srcs; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
+ LLVMValueRef ptr = LLVMBuildGEP(builder, src_ptr, &index, 1, "");
+ src[i] = LLVMBuildLoad(builder, ptr, "");
+ }
+
+ lp_build_conv(gallivm, src_type, dst_type, src, num_srcs, dst, num_dsts);
+
+ for(i = 0; i < num_dsts; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
+ LLVMValueRef ptr = LLVMBuildGEP(builder, dst_ptr, &index, 1, "");
+ LLVMBuildStore(builder, dst[i], ptr);
+ }
+
+ LLVMBuildRetVoid(builder);;
+
+ gallivm_verify_function(gallivm, func);
+
+ return func;
+}
+
+
+PIPE_ALIGN_STACK
+static boolean
+test_one(unsigned verbose,
+ FILE *fp,
+ struct lp_type src_type,
+ struct lp_type dst_type)
+{
+ struct gallivm_state *gallivm;
+ LLVMValueRef func = NULL;
+ conv_test_ptr_t conv_test_ptr;
+ boolean success;
+ const unsigned n = LP_TEST_NUM_SAMPLES;
+ int64_t cycles[LP_TEST_NUM_SAMPLES];
+ double cycles_avg = 0.0;
+ unsigned num_srcs;
+ unsigned num_dsts;
+ double eps;
+ unsigned i, j;
+
+ if ((src_type.width >= dst_type.width && src_type.length > dst_type.length) ||
+ (src_type.width <= dst_type.width && src_type.length < dst_type.length)) {
+ return TRUE;
+ }
+
+ /* Known failures
+ * - fixed point 32 -> float 32
+ * - float 32 -> signed normalised integer 32
+ */
+ if ((src_type.floating && !dst_type.floating && dst_type.sign && dst_type.norm && src_type.width == dst_type.width) ||
+ (!src_type.floating && dst_type.floating && src_type.fixed && src_type.width == dst_type.width)) {
+ return TRUE;
+ }
+
+ /* Known failures
+ * - fixed point 32 -> float 32
+ * - float 32 -> signed normalised integer 32
+ */
+ if ((src_type.floating && !dst_type.floating && dst_type.sign && dst_type.norm && src_type.width == dst_type.width) ||
+ (!src_type.floating && dst_type.floating && src_type.fixed && src_type.width == dst_type.width)) {
+ return TRUE;
+ }
+
+ if(verbose >= 1)
+ dump_conv_types(stderr, src_type, dst_type);
+
+ if (src_type.length > dst_type.length) {
+ num_srcs = 1;
+ num_dsts = src_type.length/dst_type.length;
+ }
+ else if (src_type.length < dst_type.length) {
+ num_dsts = 1;
+ num_srcs = dst_type.length/src_type.length;
+ }
+ else {
+ num_dsts = 1;
+ num_srcs = 1;
+ }
+
+ /* We must not loose or gain channels. Only precision */
+ assert(src_type.length * num_srcs == dst_type.length * num_dsts);
+
+ eps = MAX2(lp_const_eps(src_type), lp_const_eps(dst_type));
+
+ gallivm = gallivm_create("test_module", LLVMGetGlobalContext());
+
+ func = add_conv_test(gallivm, src_type, num_srcs, dst_type, num_dsts);
+
+ gallivm_compile_module(gallivm);
+
+ conv_test_ptr = (conv_test_ptr_t)gallivm_jit_function(gallivm, func);
+
+ gallivm_free_ir(gallivm);
+
+ success = TRUE;
+ for(i = 0; i < n && success; ++i) {
+ unsigned src_stride = src_type.length*src_type.width/8;
+ unsigned dst_stride = dst_type.length*dst_type.width/8;
+ PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+ PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+ double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+ uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+ int64_t start_counter = 0;
+ int64_t end_counter = 0;
+
+ for(j = 0; j < num_srcs; ++j) {
+ random_vec(src_type, src + j*src_stride);
+ read_vec(src_type, src + j*src_stride, fref + j*src_type.length);
+ }
+
+ for(j = 0; j < num_dsts; ++j) {
+ write_vec(dst_type, ref + j*dst_stride, fref + j*dst_type.length);
+ }
+
+ start_counter = rdtsc();
+ conv_test_ptr(src, dst);
+ end_counter = rdtsc();
+
+ cycles[i] = end_counter - start_counter;
+
+ for(j = 0; j < num_dsts; ++j) {
+ if(!compare_vec_with_eps(dst_type, dst + j*dst_stride, ref + j*dst_stride, eps))
+ success = FALSE;
+ }
+
+ if (!success || verbose >= 3) {
+ if(verbose < 1)
+ dump_conv_types(stderr, src_type, dst_type);
+ if (success) {
+ fprintf(stderr, "PASS\n");
+ }
+ else {
+ fprintf(stderr, "MISMATCH\n");
+ }
+
+ for(j = 0; j < num_srcs; ++j) {
+ fprintf(stderr, " Src%u: ", j);
+ dump_vec(stderr, src_type, src + j*src_stride);
+ fprintf(stderr, "\n");
+ }
+
+#if 1
+ fprintf(stderr, " Ref: ");
+ for(j = 0; j < src_type.length*num_srcs; ++j)
+ fprintf(stderr, " %f", fref[j]);
+ fprintf(stderr, "\n");
+#endif
+
+ for(j = 0; j < num_dsts; ++j) {
+ fprintf(stderr, " Dst%u: ", j);
+ dump_vec(stderr, dst_type, dst + j*dst_stride);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Ref%u: ", j);
+ dump_vec(stderr, dst_type, ref + j*dst_stride);
+ fprintf(stderr, "\n");
+ }
+ }
+ }
+
+ /*
+ * Unfortunately the output of cycle counter is not very reliable as it comes
+ * -- sometimes we get outliers (due IRQs perhaps?) which are
+ * better removed to avoid random or biased data.
+ */
+ {
+ double sum = 0.0, sum2 = 0.0;
+ double avg, std;
+ unsigned m;
+
+ for(i = 0; i < n; ++i) {
+ sum += cycles[i];
+ sum2 += cycles[i]*cycles[i];
+ }
+
+ avg = sum/n;
+ std = sqrtf((sum2 - n*avg*avg)/n);
+
+ m = 0;
+ sum = 0.0;
+ for(i = 0; i < n; ++i) {
+ if(fabs(cycles[i] - avg) <= 4.0*std) {
+ sum += cycles[i];
+ ++m;
+ }
+ }
+
+ cycles_avg = sum/m;
+
+ }
+
+ if(fp)
+ write_tsv_row(fp, src_type, dst_type, cycles_avg, success);
+
+ gallivm_destroy(gallivm);
+
+ return success;
+}
+
+
+const struct lp_type conv_types[] = {
+ /* float, fixed, sign, norm, width, len */
+
+ /* Float */
+ { TRUE, FALSE, TRUE, TRUE, 32, 4 },
+ { TRUE, FALSE, TRUE, FALSE, 32, 4 },
+ { TRUE, FALSE, FALSE, TRUE, 32, 4 },
+ { TRUE, FALSE, FALSE, FALSE, 32, 4 },
+
+ { TRUE, FALSE, TRUE, TRUE, 32, 8 },
+ { TRUE, FALSE, TRUE, FALSE, 32, 8 },
+ { TRUE, FALSE, FALSE, TRUE, 32, 8 },
+ { TRUE, FALSE, FALSE, FALSE, 32, 8 },
+
+ /* Fixed */
+ { FALSE, TRUE, TRUE, TRUE, 32, 4 },
+ { FALSE, TRUE, TRUE, FALSE, 32, 4 },
+ { FALSE, TRUE, FALSE, TRUE, 32, 4 },
+ { FALSE, TRUE, FALSE, FALSE, 32, 4 },
+
+ { FALSE, TRUE, TRUE, TRUE, 32, 8 },
+ { FALSE, TRUE, TRUE, FALSE, 32, 8 },
+ { FALSE, TRUE, FALSE, TRUE, 32, 8 },
+ { FALSE, TRUE, FALSE, FALSE, 32, 8 },
+
+ /* Integer */
+ { FALSE, FALSE, TRUE, TRUE, 32, 4 },
+ { FALSE, FALSE, TRUE, FALSE, 32, 4 },
+ { FALSE, FALSE, FALSE, TRUE, 32, 4 },
+ { FALSE, FALSE, FALSE, FALSE, 32, 4 },
+
+ { FALSE, FALSE, TRUE, TRUE, 32, 8 },
+ { FALSE, FALSE, TRUE, FALSE, 32, 8 },
+ { FALSE, FALSE, FALSE, TRUE, 32, 8 },
+ { FALSE, FALSE, FALSE, FALSE, 32, 8 },
+
+ { FALSE, FALSE, TRUE, TRUE, 16, 8 },
+ { FALSE, FALSE, TRUE, FALSE, 16, 8 },
+ { FALSE, FALSE, FALSE, TRUE, 16, 8 },
+ { FALSE, FALSE, FALSE, FALSE, 16, 8 },
+
+ { FALSE, FALSE, TRUE, TRUE, 8, 16 },
+ { FALSE, FALSE, TRUE, FALSE, 8, 16 },
+ { FALSE, FALSE, FALSE, TRUE, 8, 16 },
+ { FALSE, FALSE, FALSE, FALSE, 8, 16 },
+
+ { FALSE, FALSE, TRUE, TRUE, 8, 4 },
+ { FALSE, FALSE, TRUE, FALSE, 8, 4 },
+ { FALSE, FALSE, FALSE, TRUE, 8, 4 },
+ { FALSE, FALSE, FALSE, FALSE, 8, 4 },
+
+ { FALSE, FALSE, FALSE, TRUE, 8, 8 },
+};
+
+
+const unsigned num_types = sizeof(conv_types)/sizeof(conv_types[0]);
+
+
+boolean
+test_all(unsigned verbose, FILE *fp)
+{
+ const struct lp_type *src_type;
+ const struct lp_type *dst_type;
+ boolean success = TRUE;
+ int error_count = 0;
+
+ for(src_type = conv_types; src_type < &conv_types[num_types]; ++src_type) {
+ for(dst_type = conv_types; dst_type < &conv_types[num_types]; ++dst_type) {
+
+ if(src_type == dst_type)
+ continue;
+
+ if(!test_one(verbose, fp, *src_type, *dst_type)){
+ success = FALSE;
+ ++error_count;
+ }
+ }
+ }
+
+ fprintf(stderr, "%d failures\n", error_count);
+
+ return success;
+}
+
+
+boolean
+test_some(unsigned verbose, FILE *fp,
+ unsigned long n)
+{
+ const struct lp_type *src_type;
+ const struct lp_type *dst_type;
+ unsigned long i;
+ boolean success = TRUE;
+
+ for(i = 0; i < n; ++i) {
+ src_type = &conv_types[rand() % num_types];
+
+ do {
+ dst_type = &conv_types[rand() % num_types];
+ } while (src_type == dst_type || src_type->norm != dst_type->norm);
+
+ if(!test_one(verbose, fp, *src_type, *dst_type))
+ success = FALSE;
+ }
+
+ return success;
+}
+
+
+boolean
+test_single(unsigned verbose, FILE *fp)
+{
+ /* float, fixed, sign, norm, width, len */
+ struct lp_type f32x4_type =
+ { TRUE, FALSE, TRUE, TRUE, 32, 4 };
+ struct lp_type ub8x4_type =
+ { FALSE, FALSE, FALSE, TRUE, 8, 16 };
+
+ boolean success;
+
+ success = test_one(verbose, fp, f32x4_type, ub8x4_type);
+
+ return success;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_format.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_format.c
new file mode 100644
index 000000000..d9abd1ae3
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -0,0 +1,384 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <float.h>
+
+#include "util/u_memory.h"
+#include "util/u_pointer.h"
+#include "util/u_string.h"
+#include "util/u_format.h"
+#include "util/u_format_tests.h"
+#include "util/u_format_s3tc.h"
+
+#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_format.h"
+#include "gallivm/lp_bld_init.h"
+
+#include "lp_test.h"
+
+
+void
+write_tsv_header(FILE *fp)
+{
+ fprintf(fp,
+ "result\t"
+ "format\n");
+
+ fflush(fp);
+}
+
+
+static void
+write_tsv_row(FILE *fp,
+ const struct util_format_description *desc,
+ boolean success)
+{
+ fprintf(fp, "%s\t", success ? "pass" : "fail");
+
+ fprintf(fp, "%s\n", desc->name);
+
+ fflush(fp);
+}
+
+
+typedef void
+(*fetch_ptr_t)(void *unpacked, const void *packed,
+ unsigned i, unsigned j);
+
+
+static LLVMValueRef
+add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
+ const struct util_format_description *desc,
+ struct lp_type type)
+{
+ char name[256];
+ LLVMContextRef context = gallivm->context;
+ LLVMModuleRef module = gallivm->module;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMTypeRef args[4];
+ LLVMValueRef func;
+ LLVMValueRef packed_ptr;
+ LLVMValueRef offset = LLVMConstNull(LLVMInt32TypeInContext(context));
+ LLVMValueRef rgba_ptr;
+ LLVMValueRef i;
+ LLVMValueRef j;
+ LLVMBasicBlockRef block;
+ LLVMValueRef rgba;
+
+ util_snprintf(name, sizeof name, "fetch_%s_%s", desc->short_name,
+ type.floating ? "float" : "unorm8");
+
+ args[0] = LLVMPointerType(lp_build_vec_type(gallivm, type), 0);
+ args[1] = LLVMPointerType(LLVMInt8TypeInContext(context), 0);
+ args[3] = args[2] = LLVMInt32TypeInContext(context);
+
+ func = LLVMAddFunction(module, name,
+ LLVMFunctionType(LLVMVoidTypeInContext(context),
+ args, Elements(args), 0));
+ LLVMSetFunctionCallConv(func, LLVMCCallConv);
+ rgba_ptr = LLVMGetParam(func, 0);
+ packed_ptr = LLVMGetParam(func, 1);
+ i = LLVMGetParam(func, 2);
+ j = LLVMGetParam(func, 3);
+
+ block = LLVMAppendBasicBlockInContext(context, func, "entry");
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ rgba = lp_build_fetch_rgba_aos(gallivm, desc, type, TRUE,
+ packed_ptr, offset, i, j);
+
+ LLVMBuildStore(builder, rgba, rgba_ptr);
+
+ LLVMBuildRetVoid(builder);
+
+ gallivm_verify_function(gallivm, func);
+
+ return func;
+}
+
+
+PIPE_ALIGN_STACK
+static boolean
+test_format_float(unsigned verbose, FILE *fp,
+ const struct util_format_description *desc)
+{
+ struct gallivm_state *gallivm;
+ LLVMValueRef fetch = NULL;
+ fetch_ptr_t fetch_ptr;
+ PIPE_ALIGN_VAR(16) uint8_t packed[UTIL_FORMAT_MAX_PACKED_BYTES];
+ PIPE_ALIGN_VAR(16) float unpacked[4];
+ boolean first = TRUE;
+ boolean success = TRUE;
+ unsigned i, j, k, l;
+
+ gallivm = gallivm_create("test_module_float", LLVMGetGlobalContext());
+
+ fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_float32_vec4_type());
+
+ gallivm_compile_module(gallivm);
+
+ fetch_ptr = (fetch_ptr_t) gallivm_jit_function(gallivm, fetch);
+
+ gallivm_free_ir(gallivm);
+
+ for (l = 0; l < util_format_nr_test_cases; ++l) {
+ const struct util_format_test_case *test = &util_format_test_cases[l];
+
+ if (test->format == desc->format) {
+
+ if (first) {
+ printf("Testing %s (float) ...\n",
+ desc->name);
+ fflush(stdout);
+ first = FALSE;
+ }
+
+ /* To ensure it's 16-byte aligned */
+ memcpy(packed, test->packed, sizeof packed);
+
+ for (i = 0; i < desc->block.height; ++i) {
+ for (j = 0; j < desc->block.width; ++j) {
+ boolean match = TRUE;
+
+ memset(unpacked, 0, sizeof unpacked);
+
+ fetch_ptr(unpacked, packed, j, i);
+
+ for(k = 0; k < 4; ++k) {
+ if (util_double_inf_sign(test->unpacked[i][j][k]) != util_inf_sign(unpacked[k])) {
+ match = FALSE;
+ }
+
+ if (util_is_double_nan(test->unpacked[i][j][k]) != util_is_nan(unpacked[k])) {
+ match = FALSE;
+ }
+
+ if (!util_is_double_inf_or_nan(test->unpacked[i][j][k]) &&
+ fabs((float)test->unpacked[i][j][k] - unpacked[k]) > FLT_EPSILON) {
+ match = FALSE;
+ }
+ }
+
+ if (!match) {
+ printf("FAILED\n");
+ printf(" Packed: %02x %02x %02x %02x\n",
+ test->packed[0], test->packed[1], test->packed[2], test->packed[3]);
+ printf(" Unpacked (%u,%u): %.9g %.9g %.9g %.9g obtained\n",
+ j, i,
+ unpacked[0], unpacked[1], unpacked[2], unpacked[3]);
+ printf(" %.9g %.9g %.9g %.9g expected\n",
+ test->unpacked[i][j][0],
+ test->unpacked[i][j][1],
+ test->unpacked[i][j][2],
+ test->unpacked[i][j][3]);
+ fflush(stdout);
+ success = FALSE;
+ }
+ }
+ }
+ }
+ }
+
+ gallivm_destroy(gallivm);
+
+ if(fp)
+ write_tsv_row(fp, desc, success);
+
+ return success;
+}
+
+
+PIPE_ALIGN_STACK
+static boolean
+test_format_unorm8(unsigned verbose, FILE *fp,
+ const struct util_format_description *desc)
+{
+ struct gallivm_state *gallivm;
+ LLVMValueRef fetch = NULL;
+ fetch_ptr_t fetch_ptr;
+ PIPE_ALIGN_VAR(16) uint8_t packed[UTIL_FORMAT_MAX_PACKED_BYTES];
+ uint8_t unpacked[4];
+ boolean first = TRUE;
+ boolean success = TRUE;
+ unsigned i, j, k, l;
+
+ gallivm = gallivm_create("test_module_unorm8", LLVMGetGlobalContext());
+
+ fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_unorm8_vec4_type());
+
+ gallivm_compile_module(gallivm);
+
+ fetch_ptr = (fetch_ptr_t) gallivm_jit_function(gallivm, fetch);
+
+ gallivm_free_ir(gallivm);
+
+ for (l = 0; l < util_format_nr_test_cases; ++l) {
+ const struct util_format_test_case *test = &util_format_test_cases[l];
+
+ if (test->format == desc->format) {
+
+ if (first) {
+ printf("Testing %s (unorm8) ...\n",
+ desc->name);
+ first = FALSE;
+ }
+
+ /* To ensure it's 16-byte aligned */
+ /* Could skip this and use unaligned lp_build_fetch_rgba_aos */
+ memcpy(packed, test->packed, sizeof packed);
+
+ for (i = 0; i < desc->block.height; ++i) {
+ for (j = 0; j < desc->block.width; ++j) {
+ boolean match;
+
+ memset(unpacked, 0, sizeof unpacked);
+
+ fetch_ptr(unpacked, packed, j, i);
+
+ match = TRUE;
+ for(k = 0; k < 4; ++k) {
+ int error = float_to_ubyte(test->unpacked[i][j][k]) - unpacked[k];
+
+ if (util_is_double_nan(test->unpacked[i][j][k]))
+ continue;
+
+ if (error < 0)
+ error = -error;
+
+ if (error > 1)
+ match = FALSE;
+ }
+
+ if (!match) {
+ printf("FAILED\n");
+ printf(" Packed: %02x %02x %02x %02x\n",
+ test->packed[0], test->packed[1], test->packed[2], test->packed[3]);
+ printf(" Unpacked (%u,%u): %02x %02x %02x %02x obtained\n",
+ j, i,
+ unpacked[0], unpacked[1], unpacked[2], unpacked[3]);
+ printf(" %02x %02x %02x %02x expected\n",
+ float_to_ubyte(test->unpacked[i][j][0]),
+ float_to_ubyte(test->unpacked[i][j][1]),
+ float_to_ubyte(test->unpacked[i][j][2]),
+ float_to_ubyte(test->unpacked[i][j][3]));
+
+ success = FALSE;
+ }
+ }
+ }
+ }
+ }
+
+ gallivm_destroy(gallivm);
+
+ if(fp)
+ write_tsv_row(fp, desc, success);
+
+ return success;
+}
+
+
+
+
+static boolean
+test_one(unsigned verbose, FILE *fp,
+ const struct util_format_description *format_desc)
+{
+ boolean success = TRUE;
+
+ if (!test_format_float(verbose, fp, format_desc)) {
+ success = FALSE;
+ }
+
+ if (!test_format_unorm8(verbose, fp, format_desc)) {
+ success = FALSE;
+ }
+
+ return success;
+}
+
+
+boolean
+test_all(unsigned verbose, FILE *fp)
+{
+ enum pipe_format format;
+ boolean success = TRUE;
+
+ util_format_s3tc_init();
+
+ for (format = 1; format < PIPE_FORMAT_COUNT; ++format) {
+ const struct util_format_description *format_desc;
+
+ format_desc = util_format_description(format);
+ if (!format_desc) {
+ continue;
+ }
+
+
+ /*
+ * TODO: test more
+ */
+
+ if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ continue;
+ }
+
+ if (util_format_is_pure_integer(format))
+ continue;
+
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC &&
+ !util_format_s3tc_enabled) {
+ continue;
+ }
+
+ if (!test_one(verbose, fp, format_desc)) {
+ success = FALSE;
+ }
+ }
+
+ return success;
+}
+
+
+boolean
+test_some(unsigned verbose, FILE *fp,
+ unsigned long n)
+{
+ return test_all(verbose, fp);
+}
+
+
+boolean
+test_single(unsigned verbose, FILE *fp)
+{
+ printf("no test_single()");
+ return TRUE;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_main.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_main.c
new file mode 100644
index 000000000..d835dbbd6
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_main.c
@@ -0,0 +1,418 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Shared testing code.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_cpu_detect.h"
+#include "util/u_math.h"
+
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_debug.h"
+#include "lp_test.h"
+
+
+void
+dump_type(FILE *fp,
+ struct lp_type type)
+{
+ fprintf(fp, "%s%s%u%sx%u",
+ type.sign ? (type.floating || type.fixed ? "" : "s") : "u",
+ type.floating ? "f" : (type.fixed ? "h" : "i"),
+ type.width,
+ type.norm ? "n" : "",
+ type.length);
+}
+
+
+double
+read_elem(struct lp_type type, const void *src, unsigned index)
+{
+ double scale = lp_const_scale(type);
+ double value;
+ assert(index < type.length);
+ if (type.floating) {
+ switch(type.width) {
+ case 32:
+ value = *((const float *)src + index);
+ break;
+ case 64:
+ value = *((const double *)src + index);
+ break;
+ default:
+ assert(0);
+ return 0.0;
+ }
+ }
+ else {
+ if(type.sign) {
+ switch(type.width) {
+ case 8:
+ value = *((const int8_t *)src + index);
+ break;
+ case 16:
+ value = *((const int16_t *)src + index);
+ break;
+ case 32:
+ value = *((const int32_t *)src + index);
+ break;
+ case 64:
+ value = *((const int64_t *)src + index);
+ break;
+ default:
+ assert(0);
+ return 0.0;
+ }
+ }
+ else {
+ switch(type.width) {
+ case 8:
+ value = *((const uint8_t *)src + index);
+ break;
+ case 16:
+ value = *((const uint16_t *)src + index);
+ break;
+ case 32:
+ value = *((const uint32_t *)src + index);
+ break;
+ case 64:
+ value = *((const uint64_t *)src + index);
+ break;
+ default:
+ assert(0);
+ return 0.0;
+ }
+ }
+ }
+ return value/scale;
+}
+
+
+void
+write_elem(struct lp_type type, void *dst, unsigned index, double value)
+{
+ assert(index < type.length);
+ if(!type.sign && value < 0.0)
+ value = 0.0;
+ if(type.norm && value < -1.0)
+ value = -1.0;
+ if(type.norm && value > 1.0)
+ value = 1.0;
+ if (type.floating) {
+ switch(type.width) {
+ case 32:
+ *((float *)dst + index) = (float)(value);
+ break;
+ case 64:
+ *((double *)dst + index) = value;
+ break;
+ default:
+ assert(0);
+ }
+ }
+ else {
+ double scale = lp_const_scale(type);
+ value = round(value*scale);
+ if(type.sign) {
+ long long lvalue = (long long)value;
+ lvalue = MIN2(lvalue, ((long long)1 << (type.width - 1)) - 1);
+ switch(type.width) {
+ case 8:
+ *((int8_t *)dst + index) = (int8_t)lvalue;
+ break;
+ case 16:
+ *((int16_t *)dst + index) = (int16_t)lvalue;
+ break;
+ case 32:
+ *((int32_t *)dst + index) = (int32_t)lvalue;
+ break;
+ case 64:
+ *((int64_t *)dst + index) = (int64_t)lvalue;
+ break;
+ default:
+ assert(0);
+ }
+ }
+ else {
+ unsigned long long lvalue = (long long)value;
+ lvalue = MIN2(lvalue, ((unsigned long long)1 << type.width) - 1);
+ switch(type.width) {
+ case 8:
+ *((uint8_t *)dst + index) = (uint8_t)lvalue;
+ break;
+ case 16:
+ *((uint16_t *)dst + index) = (uint16_t)lvalue;
+ break;
+ case 32:
+ *((uint32_t *)dst + index) = (uint32_t)lvalue;
+ break;
+ case 64:
+ *((uint64_t *)dst + index) = (uint64_t)lvalue;
+ break;
+ default:
+ assert(0);
+ }
+ }
+ }
+}
+
+
+void
+random_elem(struct lp_type type, void *dst, unsigned index)
+{
+ double value;
+ assert(index < type.length);
+ value = (double)rand()/(double)RAND_MAX;
+ if(!type.norm) {
+ if (type.floating) {
+ value *= 2.0;
+ }
+ else {
+ unsigned long long mask;
+ if (type.fixed)
+ mask = ((unsigned long long)1 << (type.width / 2)) - 1;
+ else if (type.sign)
+ mask = ((unsigned long long)1 << (type.width - 1)) - 1;
+ else
+ mask = ((unsigned long long)1 << type.width) - 1;
+ value += (double)(mask & rand());
+ }
+ }
+ if(!type.sign)
+ if(rand() & 1)
+ value = -value;
+ write_elem(type, dst, index, value);
+}
+
+
+void
+read_vec(struct lp_type type, const void *src, double *dst)
+{
+ unsigned i;
+ for (i = 0; i < type.length; ++i)
+ dst[i] = read_elem(type, src, i);
+}
+
+
+void
+write_vec(struct lp_type type, void *dst, const double *src)
+{
+ unsigned i;
+ for (i = 0; i < type.length; ++i)
+ write_elem(type, dst, i, src[i]);
+}
+
+
+float
+random_float(void)
+{
+ return (float)((double)rand()/(double)RAND_MAX);
+}
+
+
+void
+random_vec(struct lp_type type, void *dst)
+{
+ unsigned i;
+ for (i = 0; i < type.length; ++i)
+ random_elem(type, dst, i);
+}
+
+
+boolean
+compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps)
+{
+ unsigned i;
+ eps *= type.floating ? 8.0 : 2.0;
+ for (i = 0; i < type.length; ++i) {
+ double res_elem = read_elem(type, res, i);
+ double ref_elem = read_elem(type, ref, i);
+ double delta = res_elem - ref_elem;
+ if (ref_elem < -1.0 || ref_elem > 1.0) {
+ delta /= ref_elem;
+ }
+ delta = fabs(delta);
+ if (delta >= eps) {
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+
+boolean
+compare_vec(struct lp_type type, const void *res, const void *ref)
+{
+ double eps = lp_const_eps(type);
+ return compare_vec_with_eps(type, res, ref, eps);
+}
+
+
+void
+dump_vec(FILE *fp, struct lp_type type, const void *src)
+{
+ unsigned i;
+ for (i = 0; i < type.length; ++i) {
+ if(i)
+ fprintf(fp, " ");
+ if (type.floating) {
+ double value;
+ switch(type.width) {
+ case 32:
+ value = *((const float *)src + i);
+ break;
+ case 64:
+ value = *((const double *)src + i);
+ break;
+ default:
+ assert(0);
+ value = 0.0;
+ }
+ fprintf(fp, "%f", value);
+ }
+ else {
+ if(type.sign && !type.norm) {
+ long long value;
+ const char *format;
+ switch(type.width) {
+ case 8:
+ value = *((const int8_t *)src + i);
+ format = "%3lli";
+ break;
+ case 16:
+ value = *((const int16_t *)src + i);
+ format = "%5lli";
+ break;
+ case 32:
+ value = *((const int32_t *)src + i);
+ format = "%10lli";
+ break;
+ case 64:
+ value = *((const int64_t *)src + i);
+ format = "%20lli";
+ break;
+ default:
+ assert(0);
+ value = 0.0;
+ format = "?";
+ }
+ fprintf(fp, format, value);
+ }
+ else {
+ unsigned long long value;
+ const char *format;
+ switch(type.width) {
+ case 8:
+ value = *((const uint8_t *)src + i);
+ format = type.norm ? "%2x" : "%4llu";
+ break;
+ case 16:
+ value = *((const uint16_t *)src + i);
+ format = type.norm ? "%4x" : "%6llx";
+ break;
+ case 32:
+ value = *((const uint32_t *)src + i);
+ format = type.norm ? "%8x" : "%11llx";
+ break;
+ case 64:
+ value = *((const uint64_t *)src + i);
+ format = type.norm ? "%16x" : "%21llx";
+ break;
+ default:
+ assert(0);
+ value = 0.0;
+ format = "?";
+ }
+ fprintf(fp, format, value);
+ }
+ }
+ }
+}
+
+
+int main(int argc, char **argv)
+{
+ unsigned verbose = 0;
+ FILE *fp = NULL;
+ unsigned long n = 1000;
+ unsigned i;
+ boolean success;
+ boolean single = FALSE;
+ unsigned fpstate;
+
+ util_cpu_detect();
+ fpstate = util_fpstate_get();
+ util_fpstate_set_denorms_to_zero(fpstate);
+
+ if (!lp_build_init())
+ return 1;
+
+ for(i = 1; i < argc; ++i) {
+ if(strcmp(argv[i], "-v") == 0)
+ ++verbose;
+ else if(strcmp(argv[i], "-s") == 0)
+ single = TRUE;
+ else if(strcmp(argv[i], "-o") == 0)
+ fp = fopen(argv[++i], "wt");
+ else
+ n = atoi(argv[i]);
+ }
+
+#ifdef DEBUG
+ if (verbose >= 2) {
+ gallivm_debug |= GALLIVM_DEBUG_IR;
+ gallivm_debug |= GALLIVM_DEBUG_ASM;
+ }
+#endif
+
+ if(fp) {
+ /* Warm up the caches */
+ test_some(0, NULL, 100);
+
+ write_tsv_header(fp);
+ }
+
+ if (single)
+ success = test_single(verbose, fp);
+ else if (n)
+ success = test_some(verbose, fp, n);
+ else
+ success = test_all(verbose, fp);
+
+ if(fp)
+ fclose(fp);
+
+ return success ? 0 : 1;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_printf.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_printf.c
new file mode 100644
index 000000000..fe4ce0fc5
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_printf.c
@@ -0,0 +1,139 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "util/u_pointer.h"
+#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_assert.h"
+#include "gallivm/lp_bld_printf.h"
+
+#include "lp_test.h"
+
+
+struct printf_test_case {
+ int foo;
+};
+
+void
+write_tsv_header(FILE *fp)
+{
+ fprintf(fp,
+ "result\t"
+ "format\n");
+
+ fflush(fp);
+}
+
+
+
+typedef void (*test_printf_t)(int i);
+
+
+static LLVMValueRef
+add_printf_test(struct gallivm_state *gallivm)
+{
+ LLVMModuleRef module = gallivm->module;
+ LLVMTypeRef args[1] = { LLVMIntTypeInContext(gallivm->context, 32) };
+ LLVMValueRef func = LLVMAddFunction(module, "test_printf", LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), args, 1, 0));
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(gallivm->context, func, "entry");
+
+ LLVMSetFunctionCallConv(func, LLVMCCallConv);
+
+ LLVMPositionBuilderAtEnd(builder, block);
+ lp_build_printf(gallivm, "hello, world\n");
+ lp_build_printf(gallivm, "print 5 6: %d %d\n", LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), 5, 0),
+ LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), 6, 0));
+
+ /* Also test lp_build_assert(). This should not fail. */
+ lp_build_assert(gallivm, LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), 1, 0), "assert(1)");
+
+ LLVMBuildRetVoid(builder);
+
+ gallivm_verify_function(gallivm, func);
+
+ return func;
+}
+
+
+PIPE_ALIGN_STACK
+static boolean
+test_printf(unsigned verbose, FILE *fp,
+ const struct printf_test_case *testcase)
+{
+ struct gallivm_state *gallivm;
+ LLVMValueRef test;
+ test_printf_t test_printf_func;
+ boolean success = TRUE;
+
+ gallivm = gallivm_create("test_module", LLVMGetGlobalContext());
+
+ test = add_printf_test(gallivm);
+
+ gallivm_compile_module(gallivm);
+
+ test_printf_func = (test_printf_t) gallivm_jit_function(gallivm, test);
+
+ gallivm_free_ir(gallivm);
+
+ test_printf_func(0);
+
+ gallivm_destroy(gallivm);
+
+ return success;
+}
+
+
+boolean
+test_all(unsigned verbose, FILE *fp)
+{
+ boolean success = TRUE;
+
+ test_printf(verbose, fp, NULL);
+
+ return success;
+}
+
+
+boolean
+test_some(unsigned verbose, FILE *fp,
+ unsigned long n)
+{
+ return test_all(verbose, fp);
+}
+
+
+boolean
+test_single(unsigned verbose, FILE *fp)
+{
+ printf("no test_single()");
+ return TRUE;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_tex_sample.c
new file mode 100644
index 000000000..316d1c550
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_tex_sample.c
@@ -0,0 +1,321 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Texture sampling code generation
+ *
+ * This file is nothing more than ugly glue between three largely independent
+ * entities:
+ * - TGSI -> LLVM translation (i.e., lp_build_tgsi_soa)
+ * - texture sampling code generation (i.e., lp_build_sample_soa)
+ * - LLVM pipe driver
+ *
+ * All interesting code is in the functions mentioned above. There is really
+ * nothing to see here.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_sample.h"
+#include "gallivm/lp_bld_tgsi.h"
+#include "lp_jit.h"
+#include "lp_tex_sample.h"
+#include "lp_state_fs.h"
+#include "lp_debug.h"
+
+
+/**
+ * This provides the bridge between the sampler state store in
+ * lp_jit_context and lp_jit_texture and the sampler code
+ * generator. It provides the texture layout information required by
+ * the texture sampler code generator in terms of the state stored in
+ * lp_jit_context and lp_jit_texture in runtime.
+ */
+struct llvmpipe_sampler_dynamic_state
+{
+ struct lp_sampler_dynamic_state base;
+
+ const struct lp_sampler_static_state *static_state;
+};
+
+
+/**
+ * This is the bridge between our sampler and the TGSI translator.
+ */
+struct lp_llvm_sampler_soa
+{
+ struct lp_build_sampler_soa base;
+
+ struct llvmpipe_sampler_dynamic_state dynamic_state;
+};
+
+
+/**
+ * Fetch the specified member of the lp_jit_texture structure.
+ * \param emit_load if TRUE, emit the LLVM load instruction to actually
+ * fetch the field's value. Otherwise, just emit the
+ * GEP code to address the field.
+ *
+ * @sa http://llvm.org/docs/GetElementPtr.html
+ */
+static LLVMValueRef
+lp_llvm_texture_member(const struct lp_sampler_dynamic_state *base,
+ struct gallivm_state *gallivm,
+ LLVMValueRef context_ptr,
+ unsigned texture_unit,
+ unsigned member_index,
+ const char *member_name,
+ boolean emit_load)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef indices[4];
+ LLVMValueRef ptr;
+ LLVMValueRef res;
+
+ assert(texture_unit < PIPE_MAX_SHADER_SAMPLER_VIEWS);
+
+ /* context[0] */
+ indices[0] = lp_build_const_int32(gallivm, 0);
+ /* context[0].textures */
+ indices[1] = lp_build_const_int32(gallivm, LP_JIT_CTX_TEXTURES);
+ /* context[0].textures[unit] */
+ indices[2] = lp_build_const_int32(gallivm, texture_unit);
+ /* context[0].textures[unit].member */
+ indices[3] = lp_build_const_int32(gallivm, member_index);
+
+ ptr = LLVMBuildGEP(builder, context_ptr, indices, Elements(indices), "");
+
+ if (emit_load)
+ res = LLVMBuildLoad(builder, ptr, "");
+ else
+ res = ptr;
+
+ lp_build_name(res, "context.texture%u.%s", texture_unit, member_name);
+
+ return res;
+}
+
+
+/**
+ * Helper macro to instantiate the functions that generate the code to
+ * fetch the members of lp_jit_texture to fulfill the sampler code
+ * generator requests.
+ *
+ * This complexity is the price we have to pay to keep the texture
+ * sampler code generator a reusable module without dependencies to
+ * llvmpipe internals.
+ */
+#define LP_LLVM_TEXTURE_MEMBER(_name, _index, _emit_load) \
+ static LLVMValueRef \
+ lp_llvm_texture_##_name( const struct lp_sampler_dynamic_state *base, \
+ struct gallivm_state *gallivm, \
+ LLVMValueRef context_ptr, \
+ unsigned texture_unit) \
+ { \
+ return lp_llvm_texture_member(base, gallivm, context_ptr, \
+ texture_unit, _index, #_name, _emit_load ); \
+ }
+
+
+LP_LLVM_TEXTURE_MEMBER(width, LP_JIT_TEXTURE_WIDTH, TRUE)
+LP_LLVM_TEXTURE_MEMBER(height, LP_JIT_TEXTURE_HEIGHT, TRUE)
+LP_LLVM_TEXTURE_MEMBER(depth, LP_JIT_TEXTURE_DEPTH, TRUE)
+LP_LLVM_TEXTURE_MEMBER(first_level, LP_JIT_TEXTURE_FIRST_LEVEL, TRUE)
+LP_LLVM_TEXTURE_MEMBER(last_level, LP_JIT_TEXTURE_LAST_LEVEL, TRUE)
+LP_LLVM_TEXTURE_MEMBER(base_ptr, LP_JIT_TEXTURE_BASE, TRUE)
+LP_LLVM_TEXTURE_MEMBER(row_stride, LP_JIT_TEXTURE_ROW_STRIDE, FALSE)
+LP_LLVM_TEXTURE_MEMBER(img_stride, LP_JIT_TEXTURE_IMG_STRIDE, FALSE)
+LP_LLVM_TEXTURE_MEMBER(mip_offsets, LP_JIT_TEXTURE_MIP_OFFSETS, FALSE)
+
+
+/**
+ * Fetch the specified member of the lp_jit_sampler structure.
+ * \param emit_load if TRUE, emit the LLVM load instruction to actually
+ * fetch the field's value. Otherwise, just emit the
+ * GEP code to address the field.
+ *
+ * @sa http://llvm.org/docs/GetElementPtr.html
+ */
+static LLVMValueRef
+lp_llvm_sampler_member(const struct lp_sampler_dynamic_state *base,
+ struct gallivm_state *gallivm,
+ LLVMValueRef context_ptr,
+ unsigned sampler_unit,
+ unsigned member_index,
+ const char *member_name,
+ boolean emit_load)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef indices[4];
+ LLVMValueRef ptr;
+ LLVMValueRef res;
+
+ assert(sampler_unit < PIPE_MAX_SAMPLERS);
+
+ /* context[0] */
+ indices[0] = lp_build_const_int32(gallivm, 0);
+ /* context[0].samplers */
+ indices[1] = lp_build_const_int32(gallivm, LP_JIT_CTX_SAMPLERS);
+ /* context[0].samplers[unit] */
+ indices[2] = lp_build_const_int32(gallivm, sampler_unit);
+ /* context[0].samplers[unit].member */
+ indices[3] = lp_build_const_int32(gallivm, member_index);
+
+ ptr = LLVMBuildGEP(builder, context_ptr, indices, Elements(indices), "");
+
+ if (emit_load)
+ res = LLVMBuildLoad(builder, ptr, "");
+ else
+ res = ptr;
+
+ lp_build_name(res, "context.sampler%u.%s", sampler_unit, member_name);
+
+ return res;
+}
+
+
+#define LP_LLVM_SAMPLER_MEMBER(_name, _index, _emit_load) \
+ static LLVMValueRef \
+ lp_llvm_sampler_##_name( const struct lp_sampler_dynamic_state *base, \
+ struct gallivm_state *gallivm, \
+ LLVMValueRef context_ptr, \
+ unsigned sampler_unit) \
+ { \
+ return lp_llvm_sampler_member(base, gallivm, context_ptr, \
+ sampler_unit, _index, #_name, _emit_load ); \
+ }
+
+
+LP_LLVM_SAMPLER_MEMBER(min_lod, LP_JIT_SAMPLER_MIN_LOD, TRUE)
+LP_LLVM_SAMPLER_MEMBER(max_lod, LP_JIT_SAMPLER_MAX_LOD, TRUE)
+LP_LLVM_SAMPLER_MEMBER(lod_bias, LP_JIT_SAMPLER_LOD_BIAS, TRUE)
+LP_LLVM_SAMPLER_MEMBER(border_color, LP_JIT_SAMPLER_BORDER_COLOR, FALSE)
+
+
+static void
+lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
+{
+ FREE(sampler);
+}
+
+
+/**
+ * Fetch filtered values from texture.
+ * The 'texel' parameter returns four vectors corresponding to R, G, B, A.
+ */
+static void
+lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
+ struct gallivm_state *gallivm,
+ const struct lp_sampler_params *params)
+{
+ struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base;
+ unsigned texture_index = params->texture_index;
+ unsigned sampler_index = params->sampler_index;
+
+ assert(sampler_index < PIPE_MAX_SAMPLERS);
+ assert(texture_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
+
+ if (LP_PERF & PERF_NO_TEX) {
+ lp_build_sample_nop(gallivm, params->type, params->coords, params->texel);
+ return;
+ }
+
+ lp_build_sample_soa(&sampler->dynamic_state.static_state[texture_index].texture_state,
+ &sampler->dynamic_state.static_state[sampler_index].sampler_state,
+ &sampler->dynamic_state.base,
+ gallivm, params);
+}
+
+/**
+ * Fetch the texture size.
+ */
+static void
+lp_llvm_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base,
+ struct gallivm_state *gallivm,
+ struct lp_type type,
+ unsigned texture_unit,
+ unsigned target,
+ LLVMValueRef context_ptr,
+ boolean is_sviewinfo,
+ enum lp_sampler_lod_property lod_property,
+ LLVMValueRef explicit_lod, /* optional */
+ LLVMValueRef *sizes_out)
+{
+ struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base;
+
+ assert(texture_unit < PIPE_MAX_SHADER_SAMPLER_VIEWS);
+
+ lp_build_size_query_soa(gallivm,
+ &sampler->dynamic_state.static_state[texture_unit].texture_state,
+ &sampler->dynamic_state.base,
+ type,
+ texture_unit,
+ target,
+ context_ptr,
+ is_sviewinfo,
+ lod_property,
+ explicit_lod,
+ sizes_out);
+}
+
+
+struct lp_build_sampler_soa *
+lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state)
+{
+ struct lp_llvm_sampler_soa *sampler;
+
+ sampler = CALLOC_STRUCT(lp_llvm_sampler_soa);
+ if(!sampler)
+ return NULL;
+
+ sampler->base.destroy = lp_llvm_sampler_soa_destroy;
+ sampler->base.emit_tex_sample = lp_llvm_sampler_soa_emit_fetch_texel;
+ sampler->base.emit_size_query = lp_llvm_sampler_soa_emit_size_query;
+ sampler->dynamic_state.base.width = lp_llvm_texture_width;
+ sampler->dynamic_state.base.height = lp_llvm_texture_height;
+ sampler->dynamic_state.base.depth = lp_llvm_texture_depth;
+ sampler->dynamic_state.base.first_level = lp_llvm_texture_first_level;
+ sampler->dynamic_state.base.last_level = lp_llvm_texture_last_level;
+ sampler->dynamic_state.base.base_ptr = lp_llvm_texture_base_ptr;
+ sampler->dynamic_state.base.row_stride = lp_llvm_texture_row_stride;
+ sampler->dynamic_state.base.img_stride = lp_llvm_texture_img_stride;
+ sampler->dynamic_state.base.mip_offsets = lp_llvm_texture_mip_offsets;
+ sampler->dynamic_state.base.min_lod = lp_llvm_sampler_min_lod;
+ sampler->dynamic_state.base.max_lod = lp_llvm_sampler_max_lod;
+ sampler->dynamic_state.base.lod_bias = lp_llvm_sampler_lod_bias;
+ sampler->dynamic_state.base.border_color = lp_llvm_sampler_border_color;
+
+ sampler->dynamic_state.static_state = static_state;
+
+ return &sampler->base;
+}
+
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_tex_sample.h
new file mode 100644
index 000000000..f4aff226c
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_tex_sample.h
@@ -0,0 +1,46 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_TEX_SAMPLE_H
+#define LP_TEX_SAMPLE_H
+
+
+#include "gallivm/lp_bld.h"
+
+
+struct lp_sampler_static_state;
+
+
+/**
+ * Pure-LLVM texture sampling code generator.
+ *
+ */
+struct lp_build_sampler_soa *
+lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *key);
+
+
+#endif /* LP_TEX_SAMPLE_H */
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.c
new file mode 100644
index 000000000..af46342fd
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -0,0 +1,815 @@
+/**************************************************************************
+ *
+ * Copyright 2006 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keithw@vmware.com>
+ * Michel Dänzer <daenzer@vmware.com>
+ */
+
+#include <stdio.h>
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+
+#include "util/u_inlines.h"
+#include "util/u_cpu_detect.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/simple_list.h"
+#include "util/u_transfer.h"
+
+#include "lp_context.h"
+#include "lp_flush.h"
+#include "lp_screen.h"
+#include "lp_texture.h"
+#include "lp_setup.h"
+#include "lp_state.h"
+#include "lp_rast.h"
+
+#include "state_tracker/sw_winsys.h"
+
+
+#ifdef DEBUG
+static struct llvmpipe_resource resource_list;
+#endif
+static unsigned id_counter = 0;
+
+
+/**
+ * Conventional allocation path for non-display textures:
+ * Compute strides and allocate data (unless asked not to).
+ */
+static boolean
+llvmpipe_texture_layout(struct llvmpipe_screen *screen,
+ struct llvmpipe_resource *lpr,
+ boolean allocate)
+{
+ struct pipe_resource *pt = &lpr->base;
+ unsigned level;
+ unsigned width = pt->width0;
+ unsigned height = pt->height0;
+ unsigned depth = pt->depth0;
+ uint64_t total_size = 0;
+ unsigned layers = pt->array_size;
+ /* XXX:
+ * This alignment here (same for displaytarget) was added for the purpose of
+ * ARB_map_buffer_alignment. I am not convinced it's needed for non-buffer
+ * resources. Otherwise we'd want the max of cacheline size and 16 (max size
+ * of a block for all formats) though this should not be strictly necessary
+ * neither. In any case it can only affect compressed or 1d textures.
+ */
+ unsigned mip_align = MAX2(64, util_cpu_caps.cacheline);
+
+ assert(LP_MAX_TEXTURE_2D_LEVELS <= LP_MAX_TEXTURE_LEVELS);
+ assert(LP_MAX_TEXTURE_3D_LEVELS <= LP_MAX_TEXTURE_LEVELS);
+
+ for (level = 0; level <= pt->last_level; level++) {
+ uint64_t mipsize;
+ unsigned align_x, align_y, nblocksx, nblocksy, block_size, num_slices;
+
+ /* Row stride and image stride */
+
+ /* For non-compressed formats we need 4x4 pixel alignment
+ * so we can read/write LP_RASTER_BLOCK_SIZE when rendering to them.
+ * We also want cache line size in x direction,
+ * otherwise same cache line could end up in multiple threads.
+ * For explicit 1d resources however we reduce this to 4x1 and
+ * handle specially in render output code (as we need to do special
+ * handling there for buffers in any case).
+ */
+ if (util_format_is_compressed(pt->format))
+ align_x = align_y = 1;
+ else {
+ align_x = LP_RASTER_BLOCK_SIZE;
+ if (llvmpipe_resource_is_1d(&lpr->base))
+ align_y = 1;
+ else
+ align_y = LP_RASTER_BLOCK_SIZE;
+ }
+
+ nblocksx = util_format_get_nblocksx(pt->format,
+ align(width, align_x));
+ nblocksy = util_format_get_nblocksy(pt->format,
+ align(height, align_y));
+ block_size = util_format_get_blocksize(pt->format);
+
+ if (util_format_is_compressed(pt->format))
+ lpr->row_stride[level] = nblocksx * block_size;
+ else
+ lpr->row_stride[level] = align(nblocksx * block_size, util_cpu_caps.cacheline);
+
+ /* if row_stride * height > LP_MAX_TEXTURE_SIZE */
+ if ((uint64_t)lpr->row_stride[level] * nblocksy > LP_MAX_TEXTURE_SIZE) {
+ /* image too large */
+ goto fail;
+ }
+
+ lpr->img_stride[level] = lpr->row_stride[level] * nblocksy;
+
+ /* Number of 3D image slices, cube faces or texture array layers */
+ if (lpr->base.target == PIPE_TEXTURE_CUBE) {
+ assert(layers == 6);
+ }
+
+ if (lpr->base.target == PIPE_TEXTURE_3D)
+ num_slices = depth;
+ else if (lpr->base.target == PIPE_TEXTURE_1D_ARRAY ||
+ lpr->base.target == PIPE_TEXTURE_2D_ARRAY ||
+ lpr->base.target == PIPE_TEXTURE_CUBE ||
+ lpr->base.target == PIPE_TEXTURE_CUBE_ARRAY)
+ num_slices = layers;
+ else
+ num_slices = 1;
+
+ /* if img_stride * num_slices_faces > LP_MAX_TEXTURE_SIZE */
+ mipsize = (uint64_t)lpr->img_stride[level] * num_slices;
+ if (mipsize > LP_MAX_TEXTURE_SIZE) {
+ /* volume too large */
+ goto fail;
+ }
+
+ lpr->mip_offsets[level] = total_size;
+
+ total_size += align((unsigned)mipsize, mip_align);
+ if (total_size > LP_MAX_TEXTURE_SIZE) {
+ goto fail;
+ }
+
+ /* Compute size of next mipmap level */
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ depth = u_minify(depth, 1);
+ }
+
+ if (allocate) {
+ lpr->tex_data = align_malloc(total_size, mip_align);
+ if (!lpr->tex_data) {
+ return FALSE;
+ }
+ else {
+ memset(lpr->tex_data, 0, total_size);
+ }
+ }
+
+ return TRUE;
+
+fail:
+ return FALSE;
+}
+
+
+/**
+ * Check the size of the texture specified by 'res'.
+ * \return TRUE if OK, FALSE if too large.
+ */
+static boolean
+llvmpipe_can_create_resource(struct pipe_screen *screen,
+ const struct pipe_resource *res)
+{
+ struct llvmpipe_resource lpr;
+ memset(&lpr, 0, sizeof(lpr));
+ lpr.base = *res;
+ return llvmpipe_texture_layout(llvmpipe_screen(screen), &lpr, false);
+}
+
+
+static boolean
+llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
+ struct llvmpipe_resource *lpr)
+{
+ struct sw_winsys *winsys = screen->winsys;
+
+ /* Round up the surface size to a multiple of the tile size to
+ * avoid tile clipping.
+ */
+ const unsigned width = MAX2(1, align(lpr->base.width0, TILE_SIZE));
+ const unsigned height = MAX2(1, align(lpr->base.height0, TILE_SIZE));
+
+ lpr->dt = winsys->displaytarget_create(winsys,
+ lpr->base.bind,
+ lpr->base.format,
+ width, height,
+ 64,
+ &lpr->row_stride[0] );
+
+ if (lpr->dt == NULL)
+ return FALSE;
+
+ {
+ void *map = winsys->displaytarget_map(winsys, lpr->dt,
+ PIPE_TRANSFER_WRITE);
+
+ if (map)
+ memset(map, 0, height * lpr->row_stride[0]);
+
+ winsys->displaytarget_unmap(winsys, lpr->dt);
+ }
+
+ return TRUE;
+}
+
+
+static struct pipe_resource *
+llvmpipe_resource_create(struct pipe_screen *_screen,
+ const struct pipe_resource *templat)
+{
+ struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
+ struct llvmpipe_resource *lpr = CALLOC_STRUCT(llvmpipe_resource);
+ if (!lpr)
+ return NULL;
+
+ lpr->base = *templat;
+ pipe_reference_init(&lpr->base.reference, 1);
+ lpr->base.screen = &screen->base;
+
+ /* assert(lpr->base.bind); */
+
+ if (llvmpipe_resource_is_texture(&lpr->base)) {
+ if (lpr->base.bind & (PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT |
+ PIPE_BIND_SHARED)) {
+ /* displayable surface */
+ if (!llvmpipe_displaytarget_layout(screen, lpr))
+ goto fail;
+ }
+ else {
+ /* texture map */
+ if (!llvmpipe_texture_layout(screen, lpr, true))
+ goto fail;
+ }
+ }
+ else {
+ /* other data (vertex buffer, const buffer, etc) */
+ const uint bytes = templat->width0;
+ assert(util_format_get_blocksize(templat->format) == 1);
+ assert(templat->height0 == 1);
+ assert(templat->depth0 == 1);
+ assert(templat->last_level == 0);
+ /*
+ * Reserve some extra storage since if we'd render to a buffer we
+ * read/write always LP_RASTER_BLOCK_SIZE pixels, but the element
+ * offset doesn't need to be aligned to LP_RASTER_BLOCK_SIZE.
+ */
+ lpr->data = align_malloc(bytes + (LP_RASTER_BLOCK_SIZE - 1) * 4 * sizeof(float), 64);
+
+ /*
+ * buffers don't really have stride but it's probably safer
+ * (for code doing same calculations for buffers and textures)
+ * to put something sane in there.
+ */
+ lpr->row_stride[0] = bytes;
+ if (!lpr->data)
+ goto fail;
+ memset(lpr->data, 0, bytes);
+ }
+
+ lpr->id = id_counter++;
+
+#ifdef DEBUG
+ insert_at_tail(&resource_list, lpr);
+#endif
+
+ return &lpr->base;
+
+ fail:
+ FREE(lpr);
+ return NULL;
+}
+
+
+static void
+llvmpipe_resource_destroy(struct pipe_screen *pscreen,
+ struct pipe_resource *pt)
+{
+ struct llvmpipe_screen *screen = llvmpipe_screen(pscreen);
+ struct llvmpipe_resource *lpr = llvmpipe_resource(pt);
+
+ if (lpr->dt) {
+ /* display target */
+ struct sw_winsys *winsys = screen->winsys;
+ winsys->displaytarget_destroy(winsys, lpr->dt);
+ }
+ else if (llvmpipe_resource_is_texture(pt)) {
+ /* free linear image data */
+ if (lpr->tex_data) {
+ align_free(lpr->tex_data);
+ lpr->tex_data = NULL;
+ }
+ }
+ else if (!lpr->userBuffer) {
+ assert(lpr->data);
+ align_free(lpr->data);
+ }
+
+#ifdef DEBUG
+ if (lpr->next)
+ remove_from_list(lpr);
+#endif
+
+ FREE(lpr);
+}
+
+
+/**
+ * Map a resource for read/write.
+ */
+void *
+llvmpipe_resource_map(struct pipe_resource *resource,
+ unsigned level,
+ unsigned layer,
+ enum lp_texture_usage tex_usage)
+{
+ struct llvmpipe_resource *lpr = llvmpipe_resource(resource);
+ uint8_t *map;
+
+ assert(level < LP_MAX_TEXTURE_LEVELS);
+ assert(layer < (u_minify(resource->depth0, level) + resource->array_size - 1));
+
+ assert(tex_usage == LP_TEX_USAGE_READ ||
+ tex_usage == LP_TEX_USAGE_READ_WRITE ||
+ tex_usage == LP_TEX_USAGE_WRITE_ALL);
+
+ if (lpr->dt) {
+ /* display target */
+ struct llvmpipe_screen *screen = llvmpipe_screen(resource->screen);
+ struct sw_winsys *winsys = screen->winsys;
+ unsigned dt_usage;
+
+ if (tex_usage == LP_TEX_USAGE_READ) {
+ dt_usage = PIPE_TRANSFER_READ;
+ }
+ else {
+ dt_usage = PIPE_TRANSFER_READ_WRITE;
+ }
+
+ assert(level == 0);
+ assert(layer == 0);
+
+ /* FIXME: keep map count? */
+ map = winsys->displaytarget_map(winsys, lpr->dt, dt_usage);
+
+ /* install this linear image in texture data structure */
+ lpr->tex_data = map;
+
+ return map;
+ }
+ else if (llvmpipe_resource_is_texture(resource)) {
+
+ map = llvmpipe_get_texture_image_address(lpr, layer, level);
+ return map;
+ }
+ else {
+ return lpr->data;
+ }
+}
+
+
+/**
+ * Unmap a resource.
+ */
+void
+llvmpipe_resource_unmap(struct pipe_resource *resource,
+ unsigned level,
+ unsigned layer)
+{
+ struct llvmpipe_resource *lpr = llvmpipe_resource(resource);
+
+ if (lpr->dt) {
+ /* display target */
+ struct llvmpipe_screen *lp_screen = llvmpipe_screen(resource->screen);
+ struct sw_winsys *winsys = lp_screen->winsys;
+
+ assert(level == 0);
+ assert(layer == 0);
+
+ winsys->displaytarget_unmap(winsys, lpr->dt);
+ }
+}
+
+
+void *
+llvmpipe_resource_data(struct pipe_resource *resource)
+{
+ struct llvmpipe_resource *lpr = llvmpipe_resource(resource);
+
+ assert(!llvmpipe_resource_is_texture(resource));
+
+ return lpr->data;
+}
+
+
+static struct pipe_resource *
+llvmpipe_resource_from_handle(struct pipe_screen *screen,
+ const struct pipe_resource *template,
+ struct winsys_handle *whandle)
+{
+ struct sw_winsys *winsys = llvmpipe_screen(screen)->winsys;
+ struct llvmpipe_resource *lpr;
+
+ /* XXX Seems like from_handled depth textures doesn't work that well */
+
+ lpr = CALLOC_STRUCT(llvmpipe_resource);
+ if (!lpr) {
+ goto no_lpr;
+ }
+
+ lpr->base = *template;
+ pipe_reference_init(&lpr->base.reference, 1);
+ lpr->base.screen = screen;
+
+ /*
+ * Looks like unaligned displaytargets work just fine,
+ * at least sampler/render ones.
+ */
+#if 0
+ assert(lpr->base.width0 == width);
+ assert(lpr->base.height0 == height);
+#endif
+
+ lpr->dt = winsys->displaytarget_from_handle(winsys,
+ template,
+ whandle,
+ &lpr->row_stride[0]);
+ if (!lpr->dt) {
+ goto no_dt;
+ }
+
+ lpr->id = id_counter++;
+
+#ifdef DEBUG
+ insert_at_tail(&resource_list, lpr);
+#endif
+
+ return &lpr->base;
+
+no_dt:
+ FREE(lpr);
+no_lpr:
+ return NULL;
+}
+
+
+static boolean
+llvmpipe_resource_get_handle(struct pipe_screen *screen,
+ struct pipe_resource *pt,
+ struct winsys_handle *whandle)
+{
+ struct sw_winsys *winsys = llvmpipe_screen(screen)->winsys;
+ struct llvmpipe_resource *lpr = llvmpipe_resource(pt);
+
+ assert(lpr->dt);
+ if (!lpr->dt)
+ return FALSE;
+
+ return winsys->displaytarget_get_handle(winsys, lpr->dt, whandle);
+}
+
+
+static void *
+llvmpipe_transfer_map( struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **transfer )
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
+ struct llvmpipe_resource *lpr = llvmpipe_resource(resource);
+ struct llvmpipe_transfer *lpt;
+ struct pipe_transfer *pt;
+ ubyte *map;
+ enum pipe_format format;
+ enum lp_texture_usage tex_usage;
+ const char *mode;
+
+ assert(resource);
+ assert(level <= resource->last_level);
+
+ /*
+ * Transfers, like other pipe operations, must happen in order, so flush the
+ * context if necessary.
+ */
+ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+ boolean read_only = !(usage & PIPE_TRANSFER_WRITE);
+ boolean do_not_block = !!(usage & PIPE_TRANSFER_DONTBLOCK);
+ if (!llvmpipe_flush_resource(pipe, resource,
+ level,
+ read_only,
+ TRUE, /* cpu_access */
+ do_not_block,
+ __FUNCTION__)) {
+ /*
+ * It would have blocked, but state tracker requested no to.
+ */
+ assert(do_not_block);
+ return NULL;
+ }
+ }
+
+ /* Check if we're mapping the current constant buffer */
+ if ((usage & PIPE_TRANSFER_WRITE) &&
+ (resource->bind & PIPE_BIND_CONSTANT_BUFFER)) {
+ unsigned i;
+ for (i = 0; i < Elements(llvmpipe->constants[PIPE_SHADER_FRAGMENT]); ++i) {
+ if (resource == llvmpipe->constants[PIPE_SHADER_FRAGMENT][i].buffer) {
+ /* constants may have changed */
+ llvmpipe->dirty |= LP_NEW_CONSTANTS;
+ break;
+ }
+ }
+ }
+
+ lpt = CALLOC_STRUCT(llvmpipe_transfer);
+ if (!lpt)
+ return NULL;
+ pt = &lpt->base;
+ pipe_resource_reference(&pt->resource, resource);
+ pt->box = *box;
+ pt->level = level;
+ pt->stride = lpr->row_stride[level];
+ pt->layer_stride = lpr->img_stride[level];
+ pt->usage = usage;
+ *transfer = pt;
+
+ assert(level < LP_MAX_TEXTURE_LEVELS);
+
+ /*
+ printf("tex_transfer_map(%d, %d %d x %d of %d x %d, usage %d )\n",
+ transfer->x, transfer->y, transfer->width, transfer->height,
+ transfer->texture->width0,
+ transfer->texture->height0,
+ transfer->usage);
+ */
+
+ if (usage == PIPE_TRANSFER_READ) {
+ tex_usage = LP_TEX_USAGE_READ;
+ mode = "read";
+ }
+ else {
+ tex_usage = LP_TEX_USAGE_READ_WRITE;
+ mode = "read/write";
+ }
+
+ if (0) {
+ printf("transfer map tex %u mode %s\n", lpr->id, mode);
+ }
+
+ format = lpr->base.format;
+
+ map = llvmpipe_resource_map(resource,
+ level,
+ box->z,
+ tex_usage);
+
+
+ /* May want to do different things here depending on read/write nature
+ * of the map:
+ */
+ if (usage & PIPE_TRANSFER_WRITE) {
+ /* Do something to notify sharing contexts of a texture change.
+ */
+ screen->timestamp++;
+ }
+
+ map +=
+ box->y / util_format_get_blockheight(format) * pt->stride +
+ box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
+
+ return map;
+}
+
+
+static void
+llvmpipe_transfer_unmap(struct pipe_context *pipe,
+ struct pipe_transfer *transfer)
+{
+ assert(transfer->resource);
+
+ llvmpipe_resource_unmap(transfer->resource,
+ transfer->level,
+ transfer->box.z);
+
+ /* Effectively do the texture_update work here - if texture images
+ * needed post-processing to put them into hardware layout, this is
+ * where it would happen. For llvmpipe, nothing to do.
+ */
+ assert (transfer->resource);
+ pipe_resource_reference(&transfer->resource, NULL);
+ FREE(transfer);
+}
+
+unsigned int
+llvmpipe_is_resource_referenced( struct pipe_context *pipe,
+ struct pipe_resource *presource,
+ unsigned level)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
+
+ /*
+ * XXX checking only resources with the right bind flags
+ * is unsafe since with opengl state tracker we can end up
+ * with resources bound to places they weren't supposed to be
+ * (buffers bound as sampler views is one possibility here).
+ */
+ if (!(presource->bind & (PIPE_BIND_DEPTH_STENCIL |
+ PIPE_BIND_RENDER_TARGET |
+ PIPE_BIND_SAMPLER_VIEW)))
+ return LP_UNREFERENCED;
+
+ return lp_setup_is_resource_referenced(llvmpipe->setup, presource);
+}
+
+
+/**
+ * Returns the largest possible alignment for a format in llvmpipe
+ */
+unsigned
+llvmpipe_get_format_alignment( enum pipe_format format )
+{
+ const struct util_format_description *desc = util_format_description(format);
+ unsigned size = 0;
+ unsigned bytes;
+ unsigned i;
+
+ for (i = 0; i < desc->nr_channels; ++i) {
+ size += desc->channel[i].size;
+ }
+
+ bytes = size / 8;
+
+ if (!util_is_power_of_two(bytes)) {
+ bytes /= desc->nr_channels;
+ }
+
+ if (bytes % 2 || bytes < 1) {
+ return 1;
+ } else {
+ return bytes;
+ }
+}
+
+
+/**
+ * Create buffer which wraps user-space data.
+ */
+struct pipe_resource *
+llvmpipe_user_buffer_create(struct pipe_screen *screen,
+ void *ptr,
+ unsigned bytes,
+ unsigned bind_flags)
+{
+ struct llvmpipe_resource *buffer;
+
+ buffer = CALLOC_STRUCT(llvmpipe_resource);
+ if(!buffer)
+ return NULL;
+
+ pipe_reference_init(&buffer->base.reference, 1);
+ buffer->base.screen = screen;
+ buffer->base.format = PIPE_FORMAT_R8_UNORM; /* ?? */
+ buffer->base.bind = bind_flags;
+ buffer->base.usage = PIPE_USAGE_IMMUTABLE;
+ buffer->base.flags = 0;
+ buffer->base.width0 = bytes;
+ buffer->base.height0 = 1;
+ buffer->base.depth0 = 1;
+ buffer->base.array_size = 1;
+ buffer->userBuffer = TRUE;
+ buffer->data = ptr;
+
+ return &buffer->base;
+}
+
+
+/**
+ * Compute size (in bytes) need to store a texture image / mipmap level,
+ * for just one cube face, one array layer or one 3D texture slice
+ */
+static unsigned
+tex_image_face_size(const struct llvmpipe_resource *lpr, unsigned level)
+{
+ return lpr->img_stride[level];
+}
+
+
+/**
+ * Return pointer to a 2D texture image/face/slice.
+ * No tiled/linear conversion is done.
+ */
+ubyte *
+llvmpipe_get_texture_image_address(struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level)
+{
+ unsigned offset;
+
+ assert(llvmpipe_resource_is_texture(&lpr->base));
+
+ offset = lpr->mip_offsets[level];
+
+ if (face_slice > 0)
+ offset += face_slice * tex_image_face_size(lpr, level);
+
+ return (ubyte *) lpr->tex_data + offset;
+}
+
+
+/**
+ * Return size of resource in bytes
+ */
+unsigned
+llvmpipe_resource_size(const struct pipe_resource *resource)
+{
+ const struct llvmpipe_resource *lpr = llvmpipe_resource_const(resource);
+ unsigned size = 0;
+
+ if (llvmpipe_resource_is_texture(resource)) {
+ /* Note this will always return 0 for displaytarget resources */
+ size = lpr->total_alloc_size;
+ }
+ else {
+ size = resource->width0;
+ }
+ return size;
+}
+
+
+#ifdef DEBUG
+void
+llvmpipe_print_resources(void)
+{
+ struct llvmpipe_resource *lpr;
+ unsigned n = 0, total = 0;
+
+ debug_printf("LLVMPIPE: current resources:\n");
+ foreach(lpr, &resource_list) {
+ unsigned size = llvmpipe_resource_size(&lpr->base);
+ debug_printf("resource %u at %p, size %ux%ux%u: %u bytes, refcount %u\n",
+ lpr->id, (void *) lpr,
+ lpr->base.width0, lpr->base.height0, lpr->base.depth0,
+ size, lpr->base.reference.count);
+ total += size;
+ n++;
+ }
+ debug_printf("LLVMPIPE: total size of %u resources: %u\n", n, total);
+}
+#endif
+
+
+void
+llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen)
+{
+#ifdef DEBUG
+ /* init linked list for tracking resources */
+ {
+ static boolean first_call = TRUE;
+ if (first_call) {
+ memset(&resource_list, 0, sizeof(resource_list));
+ make_empty_list(&resource_list);
+ first_call = FALSE;
+ }
+ }
+#endif
+
+ screen->resource_create = llvmpipe_resource_create;
+ screen->resource_destroy = llvmpipe_resource_destroy;
+ screen->resource_from_handle = llvmpipe_resource_from_handle;
+ screen->resource_get_handle = llvmpipe_resource_get_handle;
+ screen->can_create_resource = llvmpipe_can_create_resource;
+}
+
+
+void
+llvmpipe_init_context_resource_funcs(struct pipe_context *pipe)
+{
+ pipe->transfer_map = llvmpipe_transfer_map;
+ pipe->transfer_unmap = llvmpipe_transfer_unmap;
+
+ pipe->transfer_flush_region = u_default_transfer_flush_region;
+ pipe->transfer_inline_write = u_default_transfer_inline_write;
+}
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.h
new file mode 100644
index 000000000..3d315bb9a
--- /dev/null
+++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.h
@@ -0,0 +1,239 @@
+/**************************************************************************
+ *
+ * Copyright 2007 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_TEXTURE_H
+#define LP_TEXTURE_H
+
+
+#include "pipe/p_state.h"
+#include "util/u_debug.h"
+#include "lp_limits.h"
+
+
+enum lp_texture_usage
+{
+ LP_TEX_USAGE_READ = 100,
+ LP_TEX_USAGE_READ_WRITE,
+ LP_TEX_USAGE_WRITE_ALL
+};
+
+
+struct pipe_context;
+struct pipe_screen;
+struct llvmpipe_context;
+
+struct sw_displaytarget;
+
+
+/**
+ * llvmpipe subclass of pipe_resource. A texture, drawing surface,
+ * vertex buffer, const buffer, etc.
+ * Textures are stored differently than other types of objects such as
+ * vertex buffers and const buffers.
+ * The latter are simple malloc'd blocks of memory.
+ */
+struct llvmpipe_resource
+{
+ struct pipe_resource base;
+
+ /** Row stride in bytes */
+ unsigned row_stride[LP_MAX_TEXTURE_LEVELS];
+ /** Image stride (for cube maps, array or 3D textures) in bytes */
+ unsigned img_stride[LP_MAX_TEXTURE_LEVELS];
+ /** Offset to start of mipmap level, in bytes */
+ unsigned mip_offsets[LP_MAX_TEXTURE_LEVELS];
+ /** allocated total size (for non-display target texture resources only) */
+ unsigned total_alloc_size;
+
+ /**
+ * Display target, for textures with the PIPE_BIND_DISPLAY_TARGET
+ * usage.
+ */
+ struct sw_displaytarget *dt;
+
+ /**
+ * Malloc'ed data for regular textures, or a mapping to dt above.
+ */
+ void *tex_data;
+
+ /**
+ * Data for non-texture resources.
+ */
+ void *data;
+
+ boolean userBuffer; /** Is this a user-space buffer? */
+ unsigned timestamp;
+
+ unsigned id; /**< temporary, for debugging */
+
+#ifdef DEBUG
+ /** for linked list */
+ struct llvmpipe_resource *prev, *next;
+#endif
+};
+
+
+struct llvmpipe_transfer
+{
+ struct pipe_transfer base;
+
+ unsigned long offset;
+};
+
+
+/** cast wrappers */
+static inline struct llvmpipe_resource *
+llvmpipe_resource(struct pipe_resource *pt)
+{
+ return (struct llvmpipe_resource *) pt;
+}
+
+
+static inline const struct llvmpipe_resource *
+llvmpipe_resource_const(const struct pipe_resource *pt)
+{
+ return (const struct llvmpipe_resource *) pt;
+}
+
+
+static inline struct llvmpipe_transfer *
+llvmpipe_transfer(struct pipe_transfer *pt)
+{
+ return (struct llvmpipe_transfer *) pt;
+}
+
+
+void llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen);
+void llvmpipe_init_context_resource_funcs(struct pipe_context *pipe);
+
+
+static inline boolean
+llvmpipe_resource_is_texture(const struct pipe_resource *resource)
+{
+ switch (resource->target) {
+ case PIPE_BUFFER:
+ return FALSE;
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_3D:
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return TRUE;
+ default:
+ assert(0);
+ return FALSE;
+ }
+}
+
+
+static inline boolean
+llvmpipe_resource_is_1d(const struct pipe_resource *resource)
+{
+ switch (resource->target) {
+ case PIPE_BUFFER:
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ return TRUE;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_3D:
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return FALSE;
+ default:
+ assert(0);
+ return FALSE;
+ }
+}
+
+
+static inline unsigned
+llvmpipe_layer_stride(struct pipe_resource *resource,
+ unsigned level)
+{
+ struct llvmpipe_resource *lpr = llvmpipe_resource(resource);
+ assert(level < LP_MAX_TEXTURE_2D_LEVELS);
+ return lpr->img_stride[level];
+}
+
+
+static inline unsigned
+llvmpipe_resource_stride(struct pipe_resource *resource,
+ unsigned level)
+{
+ struct llvmpipe_resource *lpr = llvmpipe_resource(resource);
+ assert(level < LP_MAX_TEXTURE_2D_LEVELS);
+ return lpr->row_stride[level];
+}
+
+
+void *
+llvmpipe_resource_map(struct pipe_resource *resource,
+ unsigned level,
+ unsigned layer,
+ enum lp_texture_usage tex_usage);
+
+void
+llvmpipe_resource_unmap(struct pipe_resource *resource,
+ unsigned level,
+ unsigned layer);
+
+
+void *
+llvmpipe_resource_data(struct pipe_resource *resource);
+
+
+unsigned
+llvmpipe_resource_size(const struct pipe_resource *resource);
+
+
+ubyte *
+llvmpipe_get_texture_image_address(struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level);
+
+
+extern void
+llvmpipe_print_resources(void);
+
+
+#define LP_UNREFERENCED 0
+#define LP_REFERENCED_FOR_READ (1 << 0)
+#define LP_REFERENCED_FOR_WRITE (1 << 1)
+
+unsigned int
+llvmpipe_is_resource_referenced( struct pipe_context *pipe,
+ struct pipe_resource *presource,
+ unsigned level);
+
+unsigned
+llvmpipe_get_format_alignment(enum pipe_format format);
+
+#endif /* LP_TEXTURE_H */