diff options
Diffstat (limited to 'lib/mesa/src/gallium/drivers/llvmpipe')
82 files changed, 27704 insertions, 0 deletions
diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/Automake.inc b/lib/mesa/src/gallium/drivers/llvmpipe/Automake.inc new file mode 100644 index 000000000..0a0aa34e7 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/Automake.inc @@ -0,0 +1,7 @@ +if HAVE_GALLIUM_LLVMPIPE + +TARGET_CPPFLAGS += -DGALLIUM_LLVMPIPE +TARGET_LIB_DEPS += \ + $(top_builddir)/src/gallium/drivers/llvmpipe/libllvmpipe.la + +endif diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am new file mode 100644 index 000000000..1d3853e41 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.am @@ -0,0 +1,79 @@ +# Copyright © 2012 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +include Makefile.sources +include $(top_srcdir)/src/gallium/Automake.inc + +AM_CFLAGS = \ + $(GALLIUM_DRIVER_CFLAGS) \ + $(LLVM_CFLAGS) \ + $(MSVC2008_COMPAT_CFLAGS) +AM_CXXFLAGS= \ + $(GALLIUM_DRIVER_CXXFLAGS) \ + $(LLVM_CXXFLAGS) \ + $(MSVC2008_COMPAT_CXXFLAGS) + +noinst_LTLIBRARIES = libllvmpipe.la + +libllvmpipe_la_SOURCES = $(C_SOURCES) + +libllvmpipe_la_LDFLAGS = $(LLVM_LDFLAGS) + +noinst_HEADERS = lp_test.h + +check_PROGRAMS = \ + lp_test_format \ + lp_test_arit \ + lp_test_blend \ + lp_test_conv \ + lp_test_printf +TESTS = $(check_PROGRAMS) + +TEST_LIBS = \ + libllvmpipe.la \ + $(top_builddir)/src/gallium/auxiliary/libgallium.la \ + $(top_builddir)/src/util/libmesautil.la \ + $(LLVM_LIBS) \ + $(DLOPEN_LIBS) \ + $(PTHREAD_LIBS) + +lp_test_format_SOURCES = lp_test_format.c lp_test_main.c +lp_test_format_LDADD = $(TEST_LIBS) +nodist_EXTRA_lp_test_format_SOURCES = dummy.cpp + +lp_test_arit_SOURCES = lp_test_arit.c lp_test_main.c +lp_test_arit_LDADD = $(TEST_LIBS) +nodist_EXTRA_lp_test_arit_SOURCES = dummy.cpp + +lp_test_blend_SOURCES = lp_test_blend.c lp_test_main.c +lp_test_blend_LDADD = $(TEST_LIBS) +nodist_EXTRA_lp_test_blend_SOURCES = dummy.cpp + +lp_test_conv_SOURCES = lp_test_conv.c lp_test_main.c +lp_test_conv_LDADD = $(TEST_LIBS) +nodist_EXTRA_lp_test_conv_SOURCES = dummy.cpp + +lp_test_printf_SOURCES = lp_test_printf.c lp_test_main.c +lp_test_printf_LDADD = $(TEST_LIBS) +nodist_EXTRA_lp_test_printf_SOURCES = dummy.cpp + +EXTRA_DIST = SConscript diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in new file mode 100644 index 000000000..0274f7e87 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.in @@ -0,0 +1,1529 @@ +# Makefile.in generated by automake 1.15 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2014 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Copyright © 2012 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +@HAVE_DRISW_TRUE@am__append_1 = \ +@HAVE_DRISW_TRUE@ $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la + +check_PROGRAMS = lp_test_format$(EXEEXT) lp_test_arit$(EXEEXT) \ + lp_test_blend$(EXEEXT) lp_test_conv$(EXEEXT) \ + lp_test_printf$(EXEEXT) +subdir = src/gallium/drivers/llvmpipe +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \ + $(top_srcdir)/m4/ax_check_python_mako_module.m4 \ + $(top_srcdir)/m4/ax_gcc_builtin.m4 \ + $(top_srcdir)/m4/ax_gcc_func_attribute.m4 \ + $(top_srcdir)/m4/ax_prog_bison.m4 \ + $(top_srcdir)/m4/ax_prog_flex.m4 \ + $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/VERSION $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ + $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libllvmpipe_la_LIBADD = +am__objects_1 = lp_bld_alpha.lo lp_bld_blend_aos.lo lp_bld_blend.lo \ + lp_bld_blend_logicop.lo lp_bld_depth.lo lp_bld_interp.lo \ + lp_clear.lo lp_context.lo lp_draw_arrays.lo lp_fence.lo \ + lp_flush.lo lp_jit.lo lp_memory.lo lp_perf.lo lp_query.lo \ + lp_rast.lo lp_rast_debug.lo lp_rast_tri.lo lp_scene.lo \ + lp_scene_queue.lo lp_screen.lo lp_setup.lo lp_setup_line.lo \ + lp_setup_point.lo lp_setup_tri.lo lp_setup_vbuf.lo \ + lp_state_blend.lo lp_state_clip.lo lp_state_derived.lo \ + lp_state_fs.lo lp_state_gs.lo lp_state_rasterizer.lo \ + lp_state_sampler.lo lp_state_setup.lo lp_state_so.lo \ + lp_state_surface.lo lp_state_vertex.lo lp_state_vs.lo \ + lp_surface.lo lp_tex_sample.lo lp_texture.lo +am_libllvmpipe_la_OBJECTS = $(am__objects_1) +libllvmpipe_la_OBJECTS = $(am_libllvmpipe_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libllvmpipe_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(libllvmpipe_la_LDFLAGS) $(LDFLAGS) -o \ + $@ +am_lp_test_arit_OBJECTS = lp_test_arit.$(OBJEXT) \ + lp_test_main.$(OBJEXT) +lp_test_arit_OBJECTS = $(am_lp_test_arit_OBJECTS) +am__DEPENDENCIES_1 = +am__DEPENDENCIES_2 = libllvmpipe.la \ + $(top_builddir)/src/gallium/auxiliary/libgallium.la \ + $(top_builddir)/src/util/libmesautil.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) +lp_test_arit_DEPENDENCIES = $(am__DEPENDENCIES_2) +am_lp_test_blend_OBJECTS = lp_test_blend.$(OBJEXT) \ + lp_test_main.$(OBJEXT) +lp_test_blend_OBJECTS = $(am_lp_test_blend_OBJECTS) +lp_test_blend_DEPENDENCIES = $(am__DEPENDENCIES_2) +am_lp_test_conv_OBJECTS = lp_test_conv.$(OBJEXT) \ + lp_test_main.$(OBJEXT) +lp_test_conv_OBJECTS = $(am_lp_test_conv_OBJECTS) +lp_test_conv_DEPENDENCIES = $(am__DEPENDENCIES_2) +am_lp_test_format_OBJECTS = lp_test_format.$(OBJEXT) \ + lp_test_main.$(OBJEXT) +lp_test_format_OBJECTS = $(am_lp_test_format_OBJECTS) +lp_test_format_DEPENDENCIES = $(am__DEPENDENCIES_2) +am_lp_test_printf_OBJECTS = lp_test_printf.$(OBJEXT) \ + lp_test_main.$(OBJEXT) +lp_test_printf_OBJECTS = $(am_lp_test_printf_OBJECTS) +lp_test_printf_DEPENDENCIES = $(am__DEPENDENCIES_2) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ +depcomp = $(SHELL) $(top_srcdir)/bin/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) +AM_V_CXX = $(am__v_CXX_@AM_V@) +am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@) +am__v_CXX_0 = @echo " CXX " $@; +am__v_CXX_1 = +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ + $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CXXLD = $(am__v_CXXLD_@AM_V@) +am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@) +am__v_CXXLD_0 = @echo " CXXLD " $@; +am__v_CXXLD_1 = +SOURCES = $(libllvmpipe_la_SOURCES) $(lp_test_arit_SOURCES) \ + $(nodist_EXTRA_lp_test_arit_SOURCES) $(lp_test_blend_SOURCES) \ + $(nodist_EXTRA_lp_test_blend_SOURCES) $(lp_test_conv_SOURCES) \ + $(nodist_EXTRA_lp_test_conv_SOURCES) $(lp_test_format_SOURCES) \ + $(nodist_EXTRA_lp_test_format_SOURCES) \ + $(lp_test_printf_SOURCES) \ + $(nodist_EXTRA_lp_test_printf_SOURCES) +DIST_SOURCES = $(libllvmpipe_la_SOURCES) $(lp_test_arit_SOURCES) \ + $(lp_test_blend_SOURCES) $(lp_test_conv_SOURCES) \ + $(lp_test_format_SOURCES) $(lp_test_printf_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(noinst_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red='[0;31m'; \ + grn='[0;32m'; \ + lgn='[1;32m'; \ + blu='[1;34m'; \ + mgn='[0;35m'; \ + brg='[1m'; \ + std='[m'; \ + fi; \ +} +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +RECHECK_LOGS = $(TEST_LOGS) +AM_RECURSIVE_TARGETS = check recheck +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/bin/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/bin/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.sources \ + $(top_srcdir)/bin/depcomp $(top_srcdir)/bin/test-driver \ + $(top_srcdir)/src/gallium/Automake.inc +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMDGPU_CFLAGS = @AMDGPU_CFLAGS@ +AMDGPU_LIBS = @AMDGPU_LIBS@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BSYMBOLIC = @BSYMBOLIC@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@ +CLOCK_LIB = @CLOCK_LIB@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +D3D_DRIVER_INSTALL_DIR = @D3D_DRIVER_INSTALL_DIR@ +DEFINES = @DEFINES@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DLOPEN_LIBS = @DLOPEN_LIBS@ +DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@ +DRI2PROTO_LIBS = @DRI2PROTO_LIBS@ +DRI3PROTO_CFLAGS = @DRI3PROTO_CFLAGS@ +DRI3PROTO_LIBS = @DRI3PROTO_LIBS@ +DRIGL_CFLAGS = @DRIGL_CFLAGS@ +DRIGL_LIBS = @DRIGL_LIBS@ +DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@ +DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@ +DRI_LIB_DEPS = @DRI_LIB_DEPS@ +DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGL_CFLAGS = @EGL_CFLAGS@ +EGL_CLIENT_APIS = @EGL_CLIENT_APIS@ +EGL_LIB_DEPS = @EGL_LIB_DEPS@ +EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@ +EGREP = @EGREP@ +ELF_LIB = @ELF_LIB@ +EXEEXT = @EXEEXT@ +EXPAT_CFLAGS = @EXPAT_CFLAGS@ +EXPAT_LIBS = @EXPAT_LIBS@ +FGREP = @FGREP@ +FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@ +FREEDRENO_LIBS = @FREEDRENO_LIBS@ +GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@ +GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@ +GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@ +GC_SECTIONS = @GC_SECTIONS@ +GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@ +GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@ +GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@ +GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@ +GLPROTO_CFLAGS = @GLPROTO_CFLAGS@ +GLPROTO_LIBS = @GLPROTO_LIBS@ +GLX_TLS = @GLX_TLS@ +GL_LIB = @GL_LIB@ +GL_LIB_DEPS = @GL_LIB_DEPS@ +GL_PC_CFLAGS = @GL_PC_CFLAGS@ +GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@ +GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@ +GREP = @GREP@ +HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@ +INDENT = @INDENT@ +INDENT_FLAGS = @INDENT_FLAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +INTEL_CFLAGS = @INTEL_CFLAGS@ +INTEL_LIBS = @INTEL_LIBS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LD_NO_UNDEFINED = @LD_NO_UNDEFINED@ +LEX = @LEX@ +LEXLIB = @LEXLIB@ +LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ +LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@ +LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@ +LIBDRM_CFLAGS = @LIBDRM_CFLAGS@ +LIBDRM_LIBS = @LIBDRM_LIBS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@ +LIBUDEV_LIBS = @LIBUDEV_LIBS@ +LIB_DIR = @LIB_DIR@ +LIB_EXT = @LIB_EXT@ +LIPO = @LIPO@ +LLVM_BINDIR = @LLVM_BINDIR@ +LLVM_CFLAGS = @LLVM_CFLAGS@ +LLVM_CONFIG = @LLVM_CONFIG@ +LLVM_CPPFLAGS = @LLVM_CPPFLAGS@ +LLVM_CXXFLAGS = @LLVM_CXXFLAGS@ +LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@ +LLVM_LDFLAGS = @LLVM_LDFLAGS@ +LLVM_LIBDIR = @LLVM_LIBDIR@ +LLVM_LIBS = @LLVM_LIBS@ +LLVM_VERSION = @LLVM_VERSION@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MESA_LLVM = @MESA_LLVM@ +MKDIR_P = @MKDIR_P@ +MSVC2008_COMPAT_CFLAGS = @MSVC2008_COMPAT_CFLAGS@ +MSVC2008_COMPAT_CXXFLAGS = @MSVC2008_COMPAT_CXXFLAGS@ +MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@ +MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@ +NINE_MAJOR = @NINE_MAJOR@ +NINE_MINOR = @NINE_MINOR@ +NINE_TINY = @NINE_TINY@ +NINE_VERSION = @NINE_VERSION@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@ +NOUVEAU_LIBS = @NOUVEAU_LIBS@ +NVVIEUX_CFLAGS = @NVVIEUX_CFLAGS@ +NVVIEUX_LIBS = @NVVIEUX_LIBS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OMX_CFLAGS = @OMX_CFLAGS@ +OMX_LIBS = @OMX_LIBS@ +OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@ +OPENCL_LIBNAME = @OPENCL_LIBNAME@ +OPENCL_VERSION = @OPENCL_VERSION@ +OSMESA_LIB = @OSMESA_LIB@ +OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@ +OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@ +OSMESA_PC_REQ = @OSMESA_PC_REQ@ +OSMESA_VERSION = @OSMESA_VERSION@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +POSIX_SHELL = @POSIX_SHELL@ +PRESENTPROTO_CFLAGS = @PRESENTPROTO_CFLAGS@ +PRESENTPROTO_LIBS = @PRESENTPROTO_LIBS@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +PYTHON2 = @PYTHON2@ +RADEON_CFLAGS = @RADEON_CFLAGS@ +RADEON_LIBS = @RADEON_LIBS@ +RANLIB = @RANLIB@ +RM = @RM@ +SED = @SED@ +SELINUX_CFLAGS = @SELINUX_CFLAGS@ +SELINUX_LIBS = @SELINUX_LIBS@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SSE41_CFLAGS = @SSE41_CFLAGS@ +STRIP = @STRIP@ +VA_CFLAGS = @VA_CFLAGS@ +VA_LIBS = @VA_LIBS@ +VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@ +VA_MAJOR = @VA_MAJOR@ +VA_MINOR = @VA_MINOR@ +VDPAU_CFLAGS = @VDPAU_CFLAGS@ +VDPAU_LIBS = @VDPAU_LIBS@ +VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@ +VDPAU_MAJOR = @VDPAU_MAJOR@ +VDPAU_MINOR = @VDPAU_MINOR@ +VERSION = @VERSION@ +VG_LIB_DEPS = @VG_LIB_DEPS@ +VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@ +VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@ +VL_CFLAGS = @VL_CFLAGS@ +VL_LIBS = @VL_LIBS@ +WAYLAND_CFLAGS = @WAYLAND_CFLAGS@ +WAYLAND_LIBS = @WAYLAND_LIBS@ +WAYLAND_SCANNER = @WAYLAND_SCANNER@ +WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@ +WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@ +X11_INCLUDES = @X11_INCLUDES@ +XA_MAJOR = @XA_MAJOR@ +XA_MINOR = @XA_MINOR@ +XA_TINY = @XA_TINY@ +XA_VERSION = @XA_VERSION@ +XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@ +XCB_DRI2_LIBS = @XCB_DRI2_LIBS@ +XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@ +XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@ +XLIBGL_CFLAGS = @XLIBGL_CFLAGS@ +XLIBGL_LIBS = @XLIBGL_LIBS@ +XVMC_CFLAGS = @XVMC_CFLAGS@ +XVMC_LIBS = @XVMC_LIBS@ +XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@ +XVMC_MAJOR = @XVMC_MAJOR@ +XVMC_MINOR = @XVMC_MINOR@ +YACC = @YACC@ +YFLAGS = @YFLAGS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +acv_mako_found = @acv_mako_found@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +ax_pthread_config = @ax_pthread_config@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +ifGNUmake = @ifGNUmake@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +C_SOURCES := \ + lp_bld_alpha.c \ + lp_bld_alpha.h \ + lp_bld_blend_aos.c \ + lp_bld_blend.c \ + lp_bld_blend.h \ + lp_bld_blend_logicop.c \ + lp_bld_depth.c \ + lp_bld_depth.h \ + lp_bld_interp.c \ + lp_bld_interp.h \ + lp_clear.c \ + lp_clear.h \ + lp_context.c \ + lp_context.h \ + lp_debug.h \ + lp_draw_arrays.c \ + lp_fence.c \ + lp_fence.h \ + lp_flush.c \ + lp_flush.h \ + lp_jit.c \ + lp_jit.h \ + lp_limits.h \ + lp_memory.c \ + lp_memory.h \ + lp_perf.c \ + lp_perf.h \ + lp_public.h \ + lp_query.c \ + lp_query.h \ + lp_rast.c \ + lp_rast_debug.c \ + lp_rast.h \ + lp_rast_priv.h \ + lp_rast_tri.c \ + lp_rast_tri_tmp.h \ + lp_scene.c \ + lp_scene.h \ + lp_scene_queue.c \ + lp_scene_queue.h \ + lp_screen.c \ + lp_screen.h \ + lp_setup.c \ + lp_setup_context.h \ + lp_setup.h \ + lp_setup_line.c \ + lp_setup_point.c \ + lp_setup_tri.c \ + lp_setup_vbuf.c \ + lp_state_blend.c \ + lp_state_clip.c \ + lp_state_derived.c \ + lp_state_fs.c \ + lp_state_fs.h \ + lp_state_gs.c \ + lp_state.h \ + lp_state_rasterizer.c \ + lp_state_sampler.c \ + lp_state_setup.c \ + lp_state_setup.h \ + lp_state_so.c \ + lp_state_surface.c \ + lp_state_vertex.c \ + lp_state_vs.c \ + lp_surface.c \ + lp_surface.h \ + lp_tex_sample.c \ + lp_tex_sample.h \ + lp_texture.c \ + lp_texture.h + +GALLIUM_CFLAGS = \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + $(DEFINES) + + +# src/gallium/auxiliary must appear before src/gallium/drivers +# because there are stupidly two rbug_context.h files in +# different directories, and which one is included by the +# preprocessor is determined by the ordering of the -I flags. +GALLIUM_DRIVER_CFLAGS = \ + -I$(srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/winsys \ + $(DEFINES) \ + $(VISIBILITY_CFLAGS) + +GALLIUM_DRIVER_CXXFLAGS = \ + -I$(srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/winsys \ + $(DEFINES) \ + $(VISIBILITY_CXXFLAGS) + +GALLIUM_TARGET_CFLAGS = \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/loader \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/winsys \ + $(DEFINES) \ + $(PTHREAD_CFLAGS) \ + $(LIBDRM_CFLAGS) \ + $(VISIBILITY_CFLAGS) + +GALLIUM_COMMON_LIB_DEPS = \ + -lm \ + $(CLOCK_LIB) \ + $(PTHREAD_LIBS) \ + $(DLOPEN_LIBS) + +GALLIUM_WINSYS_CFLAGS = \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + $(DEFINES) \ + $(VISIBILITY_CFLAGS) + +GALLIUM_PIPE_LOADER_WINSYS_LIBS = \ + $(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \ + $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \ + $(am__append_1) +AM_CFLAGS = \ + $(GALLIUM_DRIVER_CFLAGS) \ + $(LLVM_CFLAGS) \ + $(MSVC2008_COMPAT_CFLAGS) + +AM_CXXFLAGS = \ + $(GALLIUM_DRIVER_CXXFLAGS) \ + $(LLVM_CXXFLAGS) \ + $(MSVC2008_COMPAT_CXXFLAGS) + +noinst_LTLIBRARIES = libllvmpipe.la +libllvmpipe_la_SOURCES = $(C_SOURCES) +libllvmpipe_la_LDFLAGS = $(LLVM_LDFLAGS) +noinst_HEADERS = lp_test.h +TESTS = $(check_PROGRAMS) +TEST_LIBS = \ + libllvmpipe.la \ + $(top_builddir)/src/gallium/auxiliary/libgallium.la \ + $(top_builddir)/src/util/libmesautil.la \ + $(LLVM_LIBS) \ + $(DLOPEN_LIBS) \ + $(PTHREAD_LIBS) + +lp_test_format_SOURCES = lp_test_format.c lp_test_main.c +lp_test_format_LDADD = $(TEST_LIBS) +nodist_EXTRA_lp_test_format_SOURCES = dummy.cpp +lp_test_arit_SOURCES = lp_test_arit.c lp_test_main.c +lp_test_arit_LDADD = $(TEST_LIBS) +nodist_EXTRA_lp_test_arit_SOURCES = dummy.cpp +lp_test_blend_SOURCES = lp_test_blend.c lp_test_main.c +lp_test_blend_LDADD = $(TEST_LIBS) +nodist_EXTRA_lp_test_blend_SOURCES = dummy.cpp +lp_test_conv_SOURCES = lp_test_conv.c lp_test_main.c +lp_test_conv_LDADD = $(TEST_LIBS) +nodist_EXTRA_lp_test_conv_SOURCES = dummy.cpp +lp_test_printf_SOURCES = lp_test_printf.c lp_test_main.c +lp_test_printf_LDADD = $(TEST_LIBS) +nodist_EXTRA_lp_test_printf_SOURCES = dummy.cpp +EXTRA_DIST = SConscript +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .cpp .lo .log .o .obj .test .test$(EXEEXT) .trs +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/drivers/llvmpipe/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/gallium/drivers/llvmpipe/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +$(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__empty): + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libllvmpipe.la: $(libllvmpipe_la_OBJECTS) $(libllvmpipe_la_DEPENDENCIES) $(EXTRA_libllvmpipe_la_DEPENDENCIES) + $(AM_V_CCLD)$(libllvmpipe_la_LINK) $(libllvmpipe_la_OBJECTS) $(libllvmpipe_la_LIBADD) $(LIBS) + +clean-checkPROGRAMS: + @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +lp_test_arit$(EXEEXT): $(lp_test_arit_OBJECTS) $(lp_test_arit_DEPENDENCIES) $(EXTRA_lp_test_arit_DEPENDENCIES) + @rm -f lp_test_arit$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(lp_test_arit_OBJECTS) $(lp_test_arit_LDADD) $(LIBS) + +lp_test_blend$(EXEEXT): $(lp_test_blend_OBJECTS) $(lp_test_blend_DEPENDENCIES) $(EXTRA_lp_test_blend_DEPENDENCIES) + @rm -f lp_test_blend$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(lp_test_blend_OBJECTS) $(lp_test_blend_LDADD) $(LIBS) + +lp_test_conv$(EXEEXT): $(lp_test_conv_OBJECTS) $(lp_test_conv_DEPENDENCIES) $(EXTRA_lp_test_conv_DEPENDENCIES) + @rm -f lp_test_conv$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(lp_test_conv_OBJECTS) $(lp_test_conv_LDADD) $(LIBS) + +lp_test_format$(EXEEXT): $(lp_test_format_OBJECTS) $(lp_test_format_DEPENDENCIES) $(EXTRA_lp_test_format_DEPENDENCIES) + @rm -f lp_test_format$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(lp_test_format_OBJECTS) $(lp_test_format_LDADD) $(LIBS) + +lp_test_printf$(EXEEXT): $(lp_test_printf_OBJECTS) $(lp_test_printf_DEPENDENCIES) $(EXTRA_lp_test_printf_DEPENDENCIES) + @rm -f lp_test_printf$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(lp_test_printf_OBJECTS) $(lp_test_printf_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dummy.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_bld_alpha.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_bld_blend.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_bld_blend_aos.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_bld_blend_logicop.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_bld_depth.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_bld_interp.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_clear.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_context.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_draw_arrays.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_fence.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_flush.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_jit.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_memory.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_perf.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_query.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_rast.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_rast_debug.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_rast_tri.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_scene.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_scene_queue.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_screen.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_setup.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_setup_line.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_setup_point.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_setup_tri.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_setup_vbuf.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_blend.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_clip.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_derived.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_fs.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_gs.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_rasterizer.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_sampler.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_setup.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_so.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_surface.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_vertex.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_state_vs.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_surface.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_test_arit.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_test_blend.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_test_conv.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_test_format.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_test_main.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_test_printf.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_tex_sample.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lp_texture.Plo@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +.cpp.o: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\ +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $< + +.cpp.obj: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\ +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.cpp.lo: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\ +@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\ +@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary for $(PACKAGE_STRING)$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all $(check_PROGRAMS) + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +lp_test_format.log: lp_test_format$(EXEEXT) + @p='lp_test_format$(EXEEXT)'; \ + b='lp_test_format'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +lp_test_arit.log: lp_test_arit$(EXEEXT) + @p='lp_test_arit$(EXEEXT)'; \ + b='lp_test_arit'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +lp_test_blend.log: lp_test_blend$(EXEEXT) + @p='lp_test_blend$(EXEEXT)'; \ + b='lp_test_blend'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +lp_test_conv.log: lp_test_conv$(EXEEXT) + @p='lp_test_conv$(EXEEXT)'; \ + b='lp_test_conv'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +lp_test_printf.log: lp_test_printf$(EXEEXT) + @p='lp_test_printf$(EXEEXT)'; \ + b='lp_test_printf'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: check-am +all-am: Makefile $(LTLIBRARIES) $(HEADERS) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-checkPROGRAMS clean-generic clean-libtool \ + clean-noinstLTLIBRARIES mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: check-am install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am check check-TESTS check-am clean \ + clean-checkPROGRAMS clean-generic clean-libtool \ + clean-noinstLTLIBRARIES cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-libtool \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am recheck tags tags-am uninstall \ + uninstall-am + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.sources b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.sources new file mode 100644 index 000000000..d928ccba4 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/Makefile.sources @@ -0,0 +1,71 @@ +C_SOURCES := \ + lp_bld_alpha.c \ + lp_bld_alpha.h \ + lp_bld_blend_aos.c \ + lp_bld_blend.c \ + lp_bld_blend.h \ + lp_bld_blend_logicop.c \ + lp_bld_depth.c \ + lp_bld_depth.h \ + lp_bld_interp.c \ + lp_bld_interp.h \ + lp_clear.c \ + lp_clear.h \ + lp_context.c \ + lp_context.h \ + lp_debug.h \ + lp_draw_arrays.c \ + lp_fence.c \ + lp_fence.h \ + lp_flush.c \ + lp_flush.h \ + lp_jit.c \ + lp_jit.h \ + lp_limits.h \ + lp_memory.c \ + lp_memory.h \ + lp_perf.c \ + lp_perf.h \ + lp_public.h \ + lp_query.c \ + lp_query.h \ + lp_rast.c \ + lp_rast_debug.c \ + lp_rast.h \ + lp_rast_priv.h \ + lp_rast_tri.c \ + lp_rast_tri_tmp.h \ + lp_scene.c \ + lp_scene.h \ + lp_scene_queue.c \ + lp_scene_queue.h \ + lp_screen.c \ + lp_screen.h \ + lp_setup.c \ + lp_setup_context.h \ + lp_setup.h \ + lp_setup_line.c \ + lp_setup_point.c \ + lp_setup_tri.c \ + lp_setup_vbuf.c \ + lp_state_blend.c \ + lp_state_clip.c \ + lp_state_derived.c \ + lp_state_fs.c \ + lp_state_fs.h \ + lp_state_gs.c \ + lp_state.h \ + lp_state_rasterizer.c \ + lp_state_sampler.c \ + lp_state_setup.c \ + lp_state_setup.h \ + lp_state_so.c \ + lp_state_surface.c \ + lp_state_vertex.c \ + lp_state_vs.c \ + lp_surface.c \ + lp_surface.h \ + lp_tex_sample.c \ + lp_tex_sample.h \ + lp_texture.c \ + lp_texture.h diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/SConscript b/lib/mesa/src/gallium/drivers/llvmpipe/SConscript new file mode 100644 index 000000000..3a51efcd5 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/SConscript @@ -0,0 +1,49 @@ +from sys import executable as python_cmd +import distutils.version + +Import('*') + +if not env['llvm']: + print 'warning: LLVM disabled: not building llvmpipe' + Return() + +env = env.Clone() + +env.MSVC2008Compat() + +llvmpipe = env.ConvenienceLibrary( + target = 'llvmpipe', + source = env.ParseSourceList('Makefile.sources', 'C_SOURCES') + ) + +env.Alias('llvmpipe', llvmpipe) + + +if not env['embedded']: + env = env.Clone() + + env.Prepend(LIBS = [llvmpipe, gallium, mesautil]) + + tests = [ + 'format', + 'blend', + 'conv', + 'printf', + ] + + if not env['msvc']: + tests.append('arit') + + for test in tests: + testname = 'lp_test_' + test + target = env.Program( + target = testname, + source = [testname + '.c', 'lp_test_main.c'], + ) + env.InstallProgram(target) + + # http://www.scons.org/wiki/UnitTests + alias = env.Alias(testname, [target], target[0].abspath) + AlwaysBuild(alias) + +Export('llvmpipe') diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_alpha.c new file mode 100644 index 000000000..6e2d0376d --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_alpha.c @@ -0,0 +1,95 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Alpha testing to LLVM IR translation. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#include "pipe/p_state.h" +#include "util/u_format.h" + +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_conv.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_debug.h" + +#include "lp_bld_alpha.h" + + +void +lp_build_alpha_test(struct gallivm_state *gallivm, + unsigned func, + struct lp_type type, + const struct util_format_description *cbuf_format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef alpha, + LLVMValueRef ref, + boolean do_branch) +{ + struct lp_build_context bld; + LLVMValueRef test; + + lp_build_context_init(&bld, gallivm, type); + + /* + * Alpha testing needs to be done in the color buffer precision. + * + * TODO: Ideally, instead of duplicating the color conversion code, we would do + * alpha testing after converting the output colors, but that's not very + * convenient, because it needs to be done before depth testing. Hopefully + * LLVM will detect and remove the duplicate expression. + * + * FIXME: This should be generalized to formats other than rgba8 variants. + */ + if (type.floating && + util_format_is_rgba8_variant(cbuf_format_desc)) { + const unsigned dst_width = 8; + + alpha = lp_build_clamp(&bld, alpha, bld.zero, bld.one); + ref = lp_build_clamp(&bld, ref, bld.zero, bld.one); + + alpha = lp_build_clamped_float_to_unsigned_norm(gallivm, type, dst_width, alpha); + ref = lp_build_clamped_float_to_unsigned_norm(gallivm, type, dst_width, ref); + + type.floating = 0; + lp_build_context_init(&bld, gallivm, type); + } + + test = lp_build_cmp(&bld, func, alpha, ref); + + lp_build_name(test, "alpha_mask"); + + lp_build_mask_update(mask, test); + + if (do_branch) + lp_build_mask_check(mask); +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_alpha.h new file mode 100644 index 000000000..15f1284c5 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_alpha.h @@ -0,0 +1,59 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Alpha testing to LLVM IR translation. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#ifndef LP_BLD_ALPHA_H +#define LP_BLD_ALPHA_H + +#include "pipe/p_compiler.h" + +#include "gallivm/lp_bld.h" + +struct pipe_alpha_state; +struct util_format_description; +struct gallivm_state; +struct lp_type; +struct lp_build_mask_context; + + +void +lp_build_alpha_test(struct gallivm_state *gallivm, + unsigned func, + struct lp_type type, + const struct util_format_description *cbuf_format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef alpha, + LLVMValueRef ref, + boolean do_branch); + + +#endif /* !LP_BLD_ALPHA_H */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend.c new file mode 100644 index 000000000..1feb415c9 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend.c @@ -0,0 +1,223 @@ +/************************************************************************** + * + * Copyright 2012 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_state.h" +#include "util/u_debug.h" + +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_swizzle.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_debug.h" + +#include "lp_bld_blend.h" + +/** + * Is (a OP b) == (b OP a)? + */ +boolean +lp_build_blend_func_commutative(unsigned func) +{ + switch (func) { + case PIPE_BLEND_ADD: + case PIPE_BLEND_MIN: + case PIPE_BLEND_MAX: + return TRUE; + case PIPE_BLEND_SUBTRACT: + case PIPE_BLEND_REVERSE_SUBTRACT: + return FALSE; + default: + assert(0); + return TRUE; + } +} + + +/** + * Whether the blending functions are the reverse of each other. + */ +boolean +lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func) +{ + if(rgb_func == alpha_func) + return FALSE; + if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT) + return TRUE; + if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT) + return TRUE; + return FALSE; +} + + +/** + * Whether the blending factors are complementary of each other. + */ +static inline boolean +lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor) +{ + return dst_factor == (src_factor ^ 0x10); +} + + +/** + * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml + */ +LLVMValueRef +lp_build_blend_func(struct lp_build_context *bld, + unsigned func, + LLVMValueRef term1, + LLVMValueRef term2) +{ + switch (func) { + case PIPE_BLEND_ADD: + return lp_build_add(bld, term1, term2); + case PIPE_BLEND_SUBTRACT: + return lp_build_sub(bld, term1, term2); + case PIPE_BLEND_REVERSE_SUBTRACT: + return lp_build_sub(bld, term2, term1); + case PIPE_BLEND_MIN: + return lp_build_min(bld, term1, term2); + case PIPE_BLEND_MAX: + return lp_build_max(bld, term1, term2); + default: + assert(0); + return bld->zero; + } +} + + +/** + * Performs optimisations and blending independent of SoA/AoS + * + * @param func the blend function + * @param factor_src PIPE_BLENDFACTOR_xxx + * @param factor_dst PIPE_BLENDFACTOR_xxx + * @param src source rgba + * @param dst dest rgba + * @param src_factor src factor computed value + * @param dst_factor dst factor computed value + * @param not_alpha_dependent same factors accross all channels of src/dst + * + * not_alpha_dependent should be: + * SoA: always true as it is only one channel at a time + * AoS: rgb_src_factor == alpha_src_factor && rgb_dst_factor == alpha_dst_factor + * + * Note that pretty much every possible optimisation can only be done on non-unorm targets + * due to unorm values not going above 1.0 meaning factorisation can change results. + * e.g. (0.9 * 0.9) + (0.9 * 0.9) != 0.9 * (0.9 + 0.9) as result of + is always <= 1. + */ +LLVMValueRef +lp_build_blend(struct lp_build_context *bld, + unsigned func, + unsigned factor_src, + unsigned factor_dst, + LLVMValueRef src, + LLVMValueRef dst, + LLVMValueRef src_factor, + LLVMValueRef dst_factor, + boolean not_alpha_dependent, + boolean optimise_only) +{ + LLVMValueRef result, src_term, dst_term; + + /* If we are not alpha dependent we can mess with the src/dst factors */ + if (not_alpha_dependent) { + if (lp_build_blend_factor_complementary(factor_src, factor_dst)) { + if (func == PIPE_BLEND_ADD) { + if (factor_src < factor_dst) { + return lp_build_lerp(bld, src_factor, dst, src, 0); + } else { + return lp_build_lerp(bld, dst_factor, src, dst, 0); + } + } else if(bld->type.floating && func == PIPE_BLEND_SUBTRACT) { + result = lp_build_add(bld, src, dst); + + if (factor_src < factor_dst) { + result = lp_build_mul(bld, result, src_factor); + return lp_build_sub(bld, result, dst); + } else { + result = lp_build_mul(bld, result, dst_factor); + return lp_build_sub(bld, src, result); + } + } else if(bld->type.floating && func == PIPE_BLEND_REVERSE_SUBTRACT) { + result = lp_build_add(bld, src, dst); + + if (factor_src < factor_dst) { + result = lp_build_mul(bld, result, src_factor); + return lp_build_sub(bld, dst, result); + } else { + result = lp_build_mul(bld, result, dst_factor); + return lp_build_sub(bld, result, src); + } + } + } + + if (bld->type.floating && factor_src == factor_dst) { + if (func == PIPE_BLEND_ADD || + func == PIPE_BLEND_SUBTRACT || + func == PIPE_BLEND_REVERSE_SUBTRACT) { + LLVMValueRef result; + result = lp_build_blend_func(bld, func, src, dst); + return lp_build_mul(bld, result, src_factor); + } + } + } + + if (optimise_only) + return NULL; + + src_term = lp_build_mul(bld, src, src_factor); + dst_term = lp_build_mul(bld, dst, dst_factor); + return lp_build_blend_func(bld, func, src_term, dst_term); +} + +void +lp_build_alpha_to_coverage(struct gallivm_state *gallivm, + struct lp_type type, + struct lp_build_mask_context *mask, + LLVMValueRef alpha, + boolean do_branch) +{ + struct lp_build_context bld; + LLVMValueRef test; + LLVMValueRef alpha_ref_value; + + lp_build_context_init(&bld, gallivm, type); + + alpha_ref_value = lp_build_const_vec(gallivm, type, 0.5); + + test = lp_build_cmp(&bld, PIPE_FUNC_GREATER, alpha, alpha_ref_value); + + lp_build_name(test, "alpha_to_coverage"); + + lp_build_mask_update(mask, test); + + if (do_branch) + lp_build_mask_check(mask); +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend.h new file mode 100644 index 000000000..adfab85dc --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend.h @@ -0,0 +1,110 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef LP_BLD_BLEND_H +#define LP_BLD_BLEND_H + + +#include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_init.h" + +#include "pipe/p_format.h" + + +struct pipe_blend_state; +struct lp_type; +struct lp_build_context; +struct lp_build_mask_context; + + +LLVMValueRef +lp_build_blend(struct lp_build_context *bld, + unsigned func, + unsigned factor_src, + unsigned factor_dst, + LLVMValueRef src, + LLVMValueRef dst, + LLVMValueRef src_factor, + LLVMValueRef dst_factor, + boolean not_alpha_dependent, + boolean optimise_only); + + +LLVMValueRef +lp_build_blend_aos(struct gallivm_state *gallivm, + const struct pipe_blend_state *blend, + enum pipe_format cbuf_format, + struct lp_type type, + unsigned rt, + LLVMValueRef src, + LLVMValueRef src_alpha, + LLVMValueRef src1, + LLVMValueRef src1_alpha, + LLVMValueRef dst, + LLVMValueRef mask, + LLVMValueRef const_, + LLVMValueRef const_alpha, + const unsigned char swizzle[4], + int nr_channels); + + +/** + * Apply a logic op. + * + * src/dst parameters are packed values. It should work regardless the inputs + * are scalars, or a vector. + */ +LLVMValueRef +lp_build_logicop(LLVMBuilderRef builder, + unsigned logicop_func, + LLVMValueRef src, + LLVMValueRef dst); + + +LLVMValueRef +lp_build_blend_func(struct lp_build_context *bld, + unsigned func, + LLVMValueRef term1, + LLVMValueRef term2); + + +boolean +lp_build_blend_func_reverse(unsigned rgb_func, + unsigned alpha_func); + + +boolean +lp_build_blend_func_commutative(unsigned func); + +void +lp_build_alpha_to_coverage(struct gallivm_state *gallivm, + struct lp_type type, + struct lp_build_mask_context *mask, + LLVMValueRef alpha, + boolean do_branch); + +#endif /* !LP_BLD_BLEND_H */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c new file mode 100644 index 000000000..564e19a15 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -0,0 +1,423 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Blend LLVM IR generation -- AoS layout. + * + * AoS blending is in general much slower than SoA, but there are some cases + * where it might be faster. In particular, if a pixel is rendered only once + * then the overhead of tiling and untiling will dominate over the speedup that + * SoA gives. So we might want to detect such cases and fallback to AoS in the + * future, but for now this function is here for historical/benchmarking + * purposes. + * + * Run lp_blend_test after any change to this file. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + + +#include "pipe/p_state.h" +#include "util/u_debug.h" +#include "util/u_format.h" + +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_swizzle.h" +#include "gallivm/lp_bld_bitarit.h" +#include "gallivm/lp_bld_debug.h" + +#include "lp_bld_blend.h" + + +/** + * We may the same values several times, so we keep them here to avoid + * recomputing them. Also reusing the values allows us to do simplifications + * that LLVM optimization passes wouldn't normally be able to do. + */ +struct lp_build_blend_aos_context +{ + struct lp_build_context base; + + LLVMValueRef src; + LLVMValueRef src_alpha; + LLVMValueRef src1; + LLVMValueRef src1_alpha; + LLVMValueRef dst; + LLVMValueRef const_; + LLVMValueRef const_alpha; + + LLVMValueRef inv_src; + LLVMValueRef inv_src_alpha; + LLVMValueRef inv_dst; + LLVMValueRef inv_const; + LLVMValueRef inv_const_alpha; + LLVMValueRef saturate; + + LLVMValueRef rgb_src_factor; + LLVMValueRef alpha_src_factor; + LLVMValueRef rgb_dst_factor; + LLVMValueRef alpha_dst_factor; +}; + + +static LLVMValueRef +lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld, + unsigned factor, + boolean alpha) +{ + LLVMValueRef src_alpha = bld->src_alpha ? bld->src_alpha : bld->src; + LLVMValueRef src1_alpha = bld->src1_alpha ? bld->src1_alpha : bld->src1; + LLVMValueRef const_alpha = bld->const_alpha ? bld->const_alpha : bld->const_; + + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: + return bld->base.zero; + case PIPE_BLENDFACTOR_ONE: + return bld->base.one; + case PIPE_BLENDFACTOR_SRC_COLOR: + return bld->src; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return src_alpha; + case PIPE_BLENDFACTOR_DST_COLOR: + case PIPE_BLENDFACTOR_DST_ALPHA: + return bld->dst; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + if(alpha) + return bld->base.one; + else { + /* + * if there's separate src_alpha there's no dst alpha hence the complement + * is zero but for unclamped float inputs min can be non-zero (negative). + */ + if (bld->src_alpha) { + if (!bld->saturate) + bld->saturate = lp_build_min(&bld->base, src_alpha, bld->base.zero); + } + else { + if(!bld->inv_dst) + bld->inv_dst = lp_build_comp(&bld->base, bld->dst); + if(!bld->saturate) + bld->saturate = lp_build_min(&bld->base, src_alpha, bld->inv_dst); + } + return bld->saturate; + } + case PIPE_BLENDFACTOR_CONST_COLOR: + return bld->const_; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return const_alpha; + case PIPE_BLENDFACTOR_SRC1_COLOR: + return bld->src1; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + return src1_alpha; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + if(!bld->inv_src) + bld->inv_src = lp_build_comp(&bld->base, bld->src); + return bld->inv_src; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + if(!bld->inv_src_alpha) + bld->inv_src_alpha = lp_build_comp(&bld->base, src_alpha); + return bld->inv_src_alpha; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + if(!bld->inv_dst) + bld->inv_dst = lp_build_comp(&bld->base, bld->dst); + return bld->inv_dst; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + if(!bld->inv_const) + bld->inv_const = lp_build_comp(&bld->base, bld->const_); + return bld->inv_const; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + if(!bld->inv_const_alpha) + bld->inv_const_alpha = lp_build_comp(&bld->base, const_alpha); + return bld->inv_const_alpha; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + return lp_build_comp(&bld->base, bld->src1); + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + return lp_build_comp(&bld->base, src1_alpha); + default: + assert(0); + return bld->base.zero; + } +} + + +enum lp_build_blend_swizzle { + LP_BUILD_BLEND_SWIZZLE_RGBA = 0, + LP_BUILD_BLEND_SWIZZLE_AAAA = 1 +}; + + +/** + * How should we shuffle the base factor. + */ +static enum lp_build_blend_swizzle +lp_build_blend_factor_swizzle(unsigned factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ONE: + case PIPE_BLENDFACTOR_ZERO: + case PIPE_BLENDFACTOR_SRC_COLOR: + case PIPE_BLENDFACTOR_DST_COLOR: + case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + case PIPE_BLENDFACTOR_INV_DST_COLOR: + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + return LP_BUILD_BLEND_SWIZZLE_RGBA; + case PIPE_BLENDFACTOR_SRC_ALPHA: + case PIPE_BLENDFACTOR_DST_ALPHA: + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + return LP_BUILD_BLEND_SWIZZLE_AAAA; + default: + assert(0); + return LP_BUILD_BLEND_SWIZZLE_RGBA; + } +} + + +static LLVMValueRef +lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld, + LLVMValueRef rgb, + LLVMValueRef alpha, + enum lp_build_blend_swizzle rgb_swizzle, + unsigned alpha_swizzle, + unsigned num_channels) +{ + LLVMValueRef swizzled_rgb; + + switch (rgb_swizzle) { + case LP_BUILD_BLEND_SWIZZLE_RGBA: + swizzled_rgb = rgb; + break; + case LP_BUILD_BLEND_SWIZZLE_AAAA: + swizzled_rgb = lp_build_swizzle_scalar_aos(&bld->base, rgb, alpha_swizzle, num_channels); + break; + default: + assert(0); + swizzled_rgb = bld->base.undef; + } + + if (rgb != alpha) { + swizzled_rgb = lp_build_select_aos(&bld->base, 1 << alpha_swizzle, + alpha, swizzled_rgb, + num_channels); + } + + return swizzled_rgb; +} + +/** + * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml + */ +static LLVMValueRef +lp_build_blend_factor(struct lp_build_blend_aos_context *bld, + unsigned rgb_factor, + unsigned alpha_factor, + unsigned alpha_swizzle, + unsigned num_channels) +{ + LLVMValueRef rgb_factor_, alpha_factor_; + enum lp_build_blend_swizzle rgb_swizzle; + + if (alpha_swizzle == UTIL_FORMAT_SWIZZLE_X && num_channels == 1) { + return lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); + } + + rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE); + + if (alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) { + rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor); + alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); + return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, num_channels); + } else { + return rgb_factor_; + } +} + + +/** + * Performs blending of src and dst pixels + * + * @param blend the blend state of the shader variant + * @param cbuf_format format of the colour buffer + * @param type data type of the pixel vector + * @param rt render target index + * @param src blend src + * @param src_alpha blend src alpha (if not included in src) + * @param src1 second blend src (for dual source blend) + * @param src1_alpha second blend src alpha (if not included in src1) + * @param dst blend dst + * @param mask optional mask to apply to the blending result + * @param const_ const blend color + * @param const_alpha const blend color alpha (if not included in const_) + * @param swizzle swizzle values for RGBA + * + * @return the result of blending src and dst + */ +LLVMValueRef +lp_build_blend_aos(struct gallivm_state *gallivm, + const struct pipe_blend_state *blend, + enum pipe_format cbuf_format, + struct lp_type type, + unsigned rt, + LLVMValueRef src, + LLVMValueRef src_alpha, + LLVMValueRef src1, + LLVMValueRef src1_alpha, + LLVMValueRef dst, + LLVMValueRef mask, + LLVMValueRef const_, + LLVMValueRef const_alpha, + const unsigned char swizzle[4], + int nr_channels) +{ + const struct pipe_rt_blend_state * state = &blend->rt[rt]; + const struct util_format_description * desc; + struct lp_build_blend_aos_context bld; + LLVMValueRef src_factor, dst_factor; + LLVMValueRef result; + unsigned alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE; + unsigned i; + + desc = util_format_description(cbuf_format); + + /* Setup build context */ + memset(&bld, 0, sizeof bld); + lp_build_context_init(&bld.base, gallivm, type); + bld.src = src; + bld.src1 = src1; + bld.dst = dst; + bld.const_ = const_; + bld.src_alpha = src_alpha; + bld.src1_alpha = src1_alpha; + bld.const_alpha = const_alpha; + + /* Find the alpha channel if not provided seperately */ + if (!src_alpha) { + for (i = 0; i < 4; ++i) { + if (swizzle[i] == 3) { + alpha_swizzle = i; + } + } + } + + if (blend->logicop_enable) { + if(!type.floating) { + result = lp_build_logicop(gallivm->builder, blend->logicop_func, src, dst); + } + else { + result = src; + } + } else if (!state->blend_enable) { + result = src; + } else { + boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor) || nr_channels == 1; + + src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor, + state->alpha_src_factor, + alpha_swizzle, + nr_channels); + + dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor, + state->alpha_dst_factor, + alpha_swizzle, + nr_channels); + + result = lp_build_blend(&bld.base, + state->rgb_func, + state->rgb_src_factor, + state->rgb_dst_factor, + src, + dst, + src_factor, + dst_factor, + rgb_alpha_same, + false); + + if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) { + LLVMValueRef alpha; + + alpha = lp_build_blend(&bld.base, + state->alpha_func, + state->alpha_src_factor, + state->alpha_dst_factor, + src, + dst, + src_factor, + dst_factor, + rgb_alpha_same, + false); + + result = lp_build_blend_swizzle(&bld, + result, + alpha, + LP_BUILD_BLEND_SWIZZLE_RGBA, + alpha_swizzle, + nr_channels); + } + } + + /* Check if color mask is necessary */ + if (!util_format_colormask_full(desc, state->colormask)) { + LLVMValueRef color_mask; + + color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, nr_channels, swizzle); + lp_build_name(color_mask, "color_mask"); + + /* Combine with input mask if necessary */ + if (mask) { + /* We can be blending floating values but masks are always integer... */ + unsigned floating = bld.base.type.floating; + bld.base.type.floating = 0; + + mask = lp_build_and(&bld.base, color_mask, mask); + + bld.base.type.floating = floating; + } else { + mask = color_mask; + } + } + + /* Apply mask, if one exists */ + if (mask) { + result = lp_build_select(&bld.base, mask, result, dst); + } + + return result; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c new file mode 100644 index 000000000..1eac0a5c8 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c @@ -0,0 +1,109 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Blend LLVM IR generation -- logic ops. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + + +#include "pipe/p_state.h" +#include "util/u_debug.h" + +#include "lp_bld_blend.h" + + +LLVMValueRef +lp_build_logicop(LLVMBuilderRef builder, + unsigned logicop_func, + LLVMValueRef src, + LLVMValueRef dst) +{ + LLVMTypeRef type; + LLVMValueRef res; + + type = LLVMTypeOf(src); + + switch (logicop_func) { + case PIPE_LOGICOP_CLEAR: + res = LLVMConstNull(type); + break; + case PIPE_LOGICOP_NOR: + res = LLVMBuildNot(builder, LLVMBuildOr(builder, src, dst, ""), ""); + break; + case PIPE_LOGICOP_AND_INVERTED: + res = LLVMBuildAnd(builder, LLVMBuildNot(builder, src, ""), dst, ""); + break; + case PIPE_LOGICOP_COPY_INVERTED: + res = LLVMBuildNot(builder, src, ""); + break; + case PIPE_LOGICOP_AND_REVERSE: + res = LLVMBuildAnd(builder, src, LLVMBuildNot(builder, dst, ""), ""); + break; + case PIPE_LOGICOP_INVERT: + res = LLVMBuildNot(builder, dst, ""); + break; + case PIPE_LOGICOP_XOR: + res = LLVMBuildXor(builder, src, dst, ""); + break; + case PIPE_LOGICOP_NAND: + res = LLVMBuildNot(builder, LLVMBuildAnd(builder, src, dst, ""), ""); + break; + case PIPE_LOGICOP_AND: + res = LLVMBuildAnd(builder, src, dst, ""); + break; + case PIPE_LOGICOP_EQUIV: + res = LLVMBuildNot(builder, LLVMBuildXor(builder, src, dst, ""), ""); + break; + case PIPE_LOGICOP_NOOP: + res = dst; + break; + case PIPE_LOGICOP_OR_INVERTED: + res = LLVMBuildOr(builder, LLVMBuildNot(builder, src, ""), dst, ""); + break; + case PIPE_LOGICOP_COPY: + res = src; + break; + case PIPE_LOGICOP_OR_REVERSE: + res = LLVMBuildOr(builder, src, LLVMBuildNot(builder, dst, ""), ""); + break; + case PIPE_LOGICOP_OR: + res = LLVMBuildOr(builder, src, dst, ""); + break; + case PIPE_LOGICOP_SET: + res = LLVMConstAllOnes(type); + break; + default: + assert(0); + res = src; + } + + return res; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c new file mode 100644 index 000000000..b25e04137 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -0,0 +1,1118 @@ +/************************************************************************** + * + * Copyright 2009-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Depth/stencil testing to LLVM IR translation. + * + * To be done accurately/efficiently the depth/stencil test must be done with + * the same type/format of the depth/stencil buffer, which implies massaging + * the incoming depths to fit into place. Using a more straightforward + * type/format for depth/stencil values internally and only convert when + * flushing would avoid this, but it would most likely result in depth fighting + * artifacts. + * + * Since we're using linear layout for everything, but we need to deal with + * 2x2 quads, we need to load/store multiple values and swizzle them into + * place (we could avoid this by doing depth/stencil testing in linear format, + * which would be easy for late depth/stencil test as we could do that after + * the fragment shader loop just as we do for color buffers, but more tricky + * for early depth test as we'd need both masks and interpolated depth in + * linear format). + * + * + * @author Jose Fonseca <jfonseca@vmware.com> + * @author Brian Paul <jfonseca@vmware.com> + */ + +#include "pipe/p_state.h" +#include "util/u_format.h" +#include "util/u_cpu_detect.h" + +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_bitarit.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_conv.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_intr.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_swizzle.h" +#include "gallivm/lp_bld_pack.h" + +#include "lp_bld_depth.h" + + +/** Used to select fields from pipe_stencil_state */ +enum stencil_op { + S_FAIL_OP, + Z_FAIL_OP, + Z_PASS_OP +}; + + + +/** + * Do the stencil test comparison (compare FB stencil values against ref value). + * This will be used twice when generating two-sided stencil code. + * \param stencil the front/back stencil state + * \param stencilRef the stencil reference value, replicated as a vector + * \param stencilVals vector of stencil values from framebuffer + * \return vector mask of pass/fail values (~0 or 0) + */ +static LLVMValueRef +lp_build_stencil_test_single(struct lp_build_context *bld, + const struct pipe_stencil_state *stencil, + LLVMValueRef stencilRef, + LLVMValueRef stencilVals) +{ + LLVMBuilderRef builder = bld->gallivm->builder; + const unsigned stencilMax = 255; /* XXX fix */ + struct lp_type type = bld->type; + LLVMValueRef res; + + /* + * SSE2 has intrinsics for signed comparisons, but not unsigned ones. Values + * are between 0..255 so ensure we generate the fastest comparisons for + * wider elements. + */ + if (type.width <= 8) { + assert(!type.sign); + } else { + assert(type.sign); + } + + assert(stencil->enabled); + + if (stencil->valuemask != stencilMax) { + /* compute stencilRef = stencilRef & valuemask */ + LLVMValueRef valuemask = lp_build_const_int_vec(bld->gallivm, type, stencil->valuemask); + stencilRef = LLVMBuildAnd(builder, stencilRef, valuemask, ""); + /* compute stencilVals = stencilVals & valuemask */ + stencilVals = LLVMBuildAnd(builder, stencilVals, valuemask, ""); + } + + res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals); + + return res; +} + + +/** + * Do the one or two-sided stencil test comparison. + * \sa lp_build_stencil_test_single + * \param front_facing an integer vector mask, indicating front (~0) or back + * (0) facing polygon. If NULL, assume front-facing. + */ +static LLVMValueRef +lp_build_stencil_test(struct lp_build_context *bld, + const struct pipe_stencil_state stencil[2], + LLVMValueRef stencilRefs[2], + LLVMValueRef stencilVals, + LLVMValueRef front_facing) +{ + LLVMValueRef res; + + assert(stencil[0].enabled); + + /* do front face test */ + res = lp_build_stencil_test_single(bld, &stencil[0], + stencilRefs[0], stencilVals); + + if (stencil[1].enabled && front_facing != NULL) { + /* do back face test */ + LLVMValueRef back_res; + + back_res = lp_build_stencil_test_single(bld, &stencil[1], + stencilRefs[1], stencilVals); + + res = lp_build_select(bld, front_facing, res, back_res); + } + + return res; +} + + +/** + * Apply the stencil operator (add/sub/keep/etc) to the given vector + * of stencil values. + * \return new stencil values vector + */ +static LLVMValueRef +lp_build_stencil_op_single(struct lp_build_context *bld, + const struct pipe_stencil_state *stencil, + enum stencil_op op, + LLVMValueRef stencilRef, + LLVMValueRef stencilVals) + +{ + LLVMBuilderRef builder = bld->gallivm->builder; + struct lp_type type = bld->type; + LLVMValueRef res; + LLVMValueRef max = lp_build_const_int_vec(bld->gallivm, type, 0xff); + unsigned stencil_op; + + assert(type.sign); + + switch (op) { + case S_FAIL_OP: + stencil_op = stencil->fail_op; + break; + case Z_FAIL_OP: + stencil_op = stencil->zfail_op; + break; + case Z_PASS_OP: + stencil_op = stencil->zpass_op; + break; + default: + assert(0 && "Invalid stencil_op mode"); + stencil_op = PIPE_STENCIL_OP_KEEP; + } + + switch (stencil_op) { + case PIPE_STENCIL_OP_KEEP: + res = stencilVals; + /* we can return early for this case */ + return res; + case PIPE_STENCIL_OP_ZERO: + res = bld->zero; + break; + case PIPE_STENCIL_OP_REPLACE: + res = stencilRef; + break; + case PIPE_STENCIL_OP_INCR: + res = lp_build_add(bld, stencilVals, bld->one); + res = lp_build_min(bld, res, max); + break; + case PIPE_STENCIL_OP_DECR: + res = lp_build_sub(bld, stencilVals, bld->one); + res = lp_build_max(bld, res, bld->zero); + break; + case PIPE_STENCIL_OP_INCR_WRAP: + res = lp_build_add(bld, stencilVals, bld->one); + res = LLVMBuildAnd(builder, res, max, ""); + break; + case PIPE_STENCIL_OP_DECR_WRAP: + res = lp_build_sub(bld, stencilVals, bld->one); + res = LLVMBuildAnd(builder, res, max, ""); + break; + case PIPE_STENCIL_OP_INVERT: + res = LLVMBuildNot(builder, stencilVals, ""); + res = LLVMBuildAnd(builder, res, max, ""); + break; + default: + assert(0 && "bad stencil op mode"); + res = bld->undef; + } + + return res; +} + + +/** + * Do the one or two-sided stencil test op/update. + */ +static LLVMValueRef +lp_build_stencil_op(struct lp_build_context *bld, + const struct pipe_stencil_state stencil[2], + enum stencil_op op, + LLVMValueRef stencilRefs[2], + LLVMValueRef stencilVals, + LLVMValueRef mask, + LLVMValueRef front_facing) + +{ + LLVMBuilderRef builder = bld->gallivm->builder; + LLVMValueRef res; + + assert(stencil[0].enabled); + + /* do front face op */ + res = lp_build_stencil_op_single(bld, &stencil[0], op, + stencilRefs[0], stencilVals); + + if (stencil[1].enabled && front_facing != NULL) { + /* do back face op */ + LLVMValueRef back_res; + + back_res = lp_build_stencil_op_single(bld, &stencil[1], op, + stencilRefs[1], stencilVals); + + res = lp_build_select(bld, front_facing, res, back_res); + } + + if (stencil[0].writemask != 0xff || + (stencil[1].enabled && front_facing != NULL && stencil[1].writemask != 0xff)) { + /* mask &= stencil[0].writemask */ + LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type, + stencil[0].writemask); + if (stencil[1].enabled && stencil[1].writemask != stencil[0].writemask && front_facing != NULL) { + LLVMValueRef back_writemask = lp_build_const_int_vec(bld->gallivm, bld->type, + stencil[1].writemask); + writemask = lp_build_select(bld, front_facing, writemask, back_writemask); + } + + mask = LLVMBuildAnd(builder, mask, writemask, ""); + /* res = (res & mask) | (stencilVals & ~mask) */ + res = lp_build_select_bitwise(bld, mask, res, stencilVals); + } + else { + /* res = mask ? res : stencilVals */ + res = lp_build_select(bld, mask, res, stencilVals); + } + + return res; +} + + + +/** + * Return a type that matches the depth/stencil format. + */ +struct lp_type +lp_depth_type(const struct util_format_description *format_desc, + unsigned length) +{ + struct lp_type type; + unsigned z_swizzle; + + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + + memset(&type, 0, sizeof type); + type.width = format_desc->block.bits; + + z_swizzle = format_desc->swizzle[0]; + if (z_swizzle < 4) { + if (format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { + type.floating = TRUE; + assert(z_swizzle == 0); + assert(format_desc->channel[z_swizzle].size == 32); + } + else if(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { + assert(format_desc->block.bits <= 32); + assert(format_desc->channel[z_swizzle].normalized); + if (format_desc->channel[z_swizzle].size < format_desc->block.bits) { + /* Prefer signed integers when possible, as SSE has less support + * for unsigned comparison; + */ + type.sign = TRUE; + } + } + else + assert(0); + } + + type.length = length; + + return type; +} + + +/** + * Compute bitmask and bit shift to apply to the incoming fragment Z values + * and the Z buffer values needed before doing the Z comparison. + * + * Note that we leave the Z bits in the position that we find them + * in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us + * get by with fewer bit twiddling steps. + */ +static boolean +get_z_shift_and_mask(const struct util_format_description *format_desc, + unsigned *shift, unsigned *width, unsigned *mask) +{ + unsigned total_bits; + unsigned z_swizzle; + + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + + /* 64bit d/s format is special already extracted 32 bits */ + total_bits = format_desc->block.bits > 32 ? 32 : format_desc->block.bits; + + z_swizzle = format_desc->swizzle[0]; + + if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) + return FALSE; + + *width = format_desc->channel[z_swizzle].size; + /* & 31 is for the same reason as the 32-bit limit above */ + *shift = format_desc->channel[z_swizzle].shift & 31; + + if (*width == total_bits) { + *mask = 0xffffffff; + } else { + *mask = ((1 << *width) - 1) << *shift; + } + + return TRUE; +} + + +/** + * Compute bitmask and bit shift to apply to the framebuffer pixel values + * to put the stencil bits in the least significant position. + * (i.e. 0x000000ff) + */ +static boolean +get_s_shift_and_mask(const struct util_format_description *format_desc, + unsigned *shift, unsigned *mask) +{ + unsigned s_swizzle; + unsigned sz; + + s_swizzle = format_desc->swizzle[1]; + + if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE) + return FALSE; + + /* just special case 64bit d/s format */ + if (format_desc->block.bits > 32) { + /* XXX big-endian? */ + assert(format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); + *shift = 0; + *mask = 0xff; + return TRUE; + } + + *shift = format_desc->channel[s_swizzle].shift; + sz = format_desc->channel[s_swizzle].size; + *mask = (1U << sz) - 1U; + + return TRUE; +} + + +/** + * Perform the occlusion test and increase the counter. + * Test the depth mask. Add the number of channel which has none zero mask + * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}. + * The counter will add 4. + * TODO: could get that out of the fs loop. + * + * \param type holds element type of the mask vector. + * \param maskvalue is the depth test mask. + * \param counter is a pointer of the uint32 counter. + */ +void +lp_build_occlusion_count(struct gallivm_state *gallivm, + struct lp_type type, + LLVMValueRef maskvalue, + LLVMValueRef counter) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMContextRef context = gallivm->context; + LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1); + LLVMValueRef count, newcount; + + assert(type.length <= 16); + assert(type.floating); + + if(util_cpu_caps.has_sse && type.length == 4) { + const char *movmskintr = "llvm.x86.sse.movmsk.ps"; + const char *popcntintr = "llvm.ctpop.i32"; + LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, + lp_build_vec_type(gallivm, type), ""); + bits = lp_build_intrinsic_unary(builder, movmskintr, + LLVMInt32TypeInContext(context), bits); + count = lp_build_intrinsic_unary(builder, popcntintr, + LLVMInt32TypeInContext(context), bits); + count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), ""); + } + else if(util_cpu_caps.has_avx && type.length == 8) { + const char *movmskintr = "llvm.x86.avx.movmsk.ps.256"; + const char *popcntintr = "llvm.ctpop.i32"; + LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, + lp_build_vec_type(gallivm, type), ""); + bits = lp_build_intrinsic_unary(builder, movmskintr, + LLVMInt32TypeInContext(context), bits); + count = lp_build_intrinsic_unary(builder, popcntintr, + LLVMInt32TypeInContext(context), bits); + count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), ""); + } + else { + unsigned i; + LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv"); + LLVMTypeRef counttype = LLVMIntTypeInContext(context, type.length * 8); + LLVMTypeRef i8vntype = LLVMVectorType(LLVMInt8TypeInContext(context), type.length * 4); + LLVMValueRef shufflev, countd; + LLVMValueRef shuffles[16]; + const char *popcntintr = NULL; + + countv = LLVMBuildBitCast(builder, countv, i8vntype, ""); + + for (i = 0; i < type.length; i++) { + shuffles[i] = lp_build_const_int32(gallivm, 4*i); + } + + shufflev = LLVMConstVector(shuffles, type.length); + countd = LLVMBuildShuffleVector(builder, countv, LLVMGetUndef(i8vntype), shufflev, ""); + countd = LLVMBuildBitCast(builder, countd, counttype, "countd"); + + /* + * XXX FIXME + * this is bad on cpus without popcount (on x86 supported by intel + * nehalem, amd barcelona, and up - not tied to sse42). + * Would be much faster to just sum the 4 elements of the vector with + * some horizontal add (shuffle/add/shuffle/add after the initial and). + */ + switch (type.length) { + case 4: + popcntintr = "llvm.ctpop.i32"; + break; + case 8: + popcntintr = "llvm.ctpop.i64"; + break; + case 16: + popcntintr = "llvm.ctpop.i128"; + break; + default: + assert(0); + } + count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd); + + if (type.length > 8) { + count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 64), ""); + } + else if (type.length < 8) { + count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), ""); + } + } + newcount = LLVMBuildLoad(builder, counter, "origcount"); + newcount = LLVMBuildAdd(builder, newcount, count, "newcount"); + LLVMBuildStore(builder, newcount, counter); +} + + +/** + * Load depth/stencil values. + * The stored values are linear, swizzle them. + * + * \param type the data type of the fragment depth/stencil values + * \param format_desc description of the depth/stencil surface + * \param is_1d whether this resource has only one dimension + * \param loop_counter the current loop iteration + * \param depth_ptr pointer to the depth/stencil values of this 4x4 block + * \param depth_stride stride of the depth/stencil buffer + * \param z_fb contains z values loaded from fb (may include padding) + * \param s_fb contains s values loaded from fb (may include padding) + */ +void +lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + boolean is_1d, + LLVMValueRef depth_ptr, + LLVMValueRef depth_stride, + LLVMValueRef *z_fb, + LLVMValueRef *s_fb, + LLVMValueRef loop_counter) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; + LLVMValueRef zs_dst1, zs_dst2; + LLVMValueRef zs_dst_ptr; + LLVMValueRef depth_offset1, depth_offset2; + LLVMTypeRef load_ptr_type; + unsigned depth_bytes = format_desc->block.bits / 8; + struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); + struct lp_type zs_load_type = zs_type; + + zs_load_type.length = zs_load_type.length / 2; + load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0); + + if (z_src_type.length == 4) { + unsigned i; + LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter, + lp_build_const_int32(gallivm, 1), ""); + LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter, + lp_build_const_int32(gallivm, 2), ""); + LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, + depth_stride, ""); + depth_offset1 = LLVMBuildMul(builder, looplsb, + lp_build_const_int32(gallivm, depth_bytes * 2), ""); + depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); + + /* just concatenate the loaded 2x2 values into 4-wide vector */ + for (i = 0; i < 4; i++) { + shuffles[i] = lp_build_const_int32(gallivm, i); + } + } + else { + unsigned i; + LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter, + lp_build_const_int32(gallivm, 1), ""); + assert(z_src_type.length == 8); + depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, ""); + /* + * We load 2x4 values, and need to swizzle them (order + * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately. + */ + for (i = 0; i < 8; i++) { + shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); + } + } + + depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, ""); + + /* Load current z/stencil values from z/stencil buffer */ + zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); + zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); + zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, ""); + if (is_1d) { + zs_dst2 = lp_build_undef(gallivm, zs_load_type); + } + else { + zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); + zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); + zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, ""); + } + + *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2, + LLVMConstVector(shuffles, zs_type.length), ""); + *s_fb = *z_fb; + + if (format_desc->block.bits < z_src_type.width) { + /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */ + *z_fb = LLVMBuildZExt(builder, *z_fb, + lp_build_int_vec_type(gallivm, z_src_type), ""); + } + + else if (format_desc->block.bits > 32) { + /* rely on llvm to handle too wide vector we have here nicely */ + unsigned i; + struct lp_type typex2 = zs_type; + struct lp_type s_type = zs_type; + LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4]; + LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4]; + LLVMValueRef tmp; + + typex2.width = typex2.width / 2; + typex2.length = typex2.length * 2; + s_type.width = s_type.width / 2; + s_type.floating = 0; + + tmp = LLVMBuildBitCast(builder, *z_fb, + lp_build_vec_type(gallivm, typex2), ""); + + for (i = 0; i < zs_type.length; i++) { + shuffles1[i] = lp_build_const_int32(gallivm, i * 2); + shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1); + } + *z_fb = LLVMBuildShuffleVector(builder, tmp, tmp, + LLVMConstVector(shuffles1, zs_type.length), ""); + *s_fb = LLVMBuildShuffleVector(builder, tmp, tmp, + LLVMConstVector(shuffles2, zs_type.length), ""); + *s_fb = LLVMBuildBitCast(builder, *s_fb, + lp_build_vec_type(gallivm, s_type), ""); + lp_build_name(*s_fb, "s_dst"); + } + + lp_build_name(*z_fb, "z_dst"); + lp_build_name(*s_fb, "s_dst"); + lp_build_name(*z_fb, "z_dst"); +} + +/** + * Store depth/stencil values. + * Incoming values are swizzled (typically n 2x2 quads), stored linear. + * If there's a mask it will do select/store otherwise just store. + * + * \param type the data type of the fragment depth/stencil values + * \param format_desc description of the depth/stencil surface + * \param is_1d whether this resource has only one dimension + * \param mask the alive/dead pixel mask for the quad (vector) + * \param z_fb z values read from fb (with padding) + * \param s_fb s values read from fb (with padding) + * \param loop_counter the current loop iteration + * \param depth_ptr pointer to the depth/stencil values of this 4x4 block + * \param depth_stride stride of the depth/stencil buffer + * \param z_value the depth values to store (with padding) + * \param s_value the stencil values to store (with padding) + */ +void +lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + boolean is_1d, + struct lp_build_mask_context *mask, + LLVMValueRef z_fb, + LLVMValueRef s_fb, + LLVMValueRef loop_counter, + LLVMValueRef depth_ptr, + LLVMValueRef depth_stride, + LLVMValueRef z_value, + LLVMValueRef s_value) +{ + struct lp_build_context z_bld; + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef mask_value = NULL; + LLVMValueRef zs_dst1, zs_dst2; + LLVMValueRef zs_dst_ptr1, zs_dst_ptr2; + LLVMValueRef depth_offset1, depth_offset2; + LLVMTypeRef load_ptr_type; + unsigned depth_bytes = format_desc->block.bits / 8; + struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); + struct lp_type z_type = zs_type; + struct lp_type zs_load_type = zs_type; + + zs_load_type.length = zs_load_type.length / 2; + load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0); + + z_type.width = z_src_type.width; + + lp_build_context_init(&z_bld, gallivm, z_type); + + /* + * This is far from ideal, at least for late depth write we should do this + * outside the fs loop to avoid all the swizzle stuff. + */ + if (z_src_type.length == 4) { + LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter, + lp_build_const_int32(gallivm, 1), ""); + LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter, + lp_build_const_int32(gallivm, 2), ""); + LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, + depth_stride, ""); + depth_offset1 = LLVMBuildMul(builder, looplsb, + lp_build_const_int32(gallivm, depth_bytes * 2), ""); + depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); + } + else { + unsigned i; + LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter, + lp_build_const_int32(gallivm, 1), ""); + assert(z_src_type.length == 8); + depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, ""); + /* + * We load 2x4 values, and need to swizzle them (order + * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately. + */ + for (i = 0; i < 8; i++) { + shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); + } + } + + depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, ""); + + zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); + zs_dst_ptr1 = LLVMBuildBitCast(builder, zs_dst_ptr1, load_ptr_type, ""); + zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); + zs_dst_ptr2 = LLVMBuildBitCast(builder, zs_dst_ptr2, load_ptr_type, ""); + + if (format_desc->block.bits > 32) { + s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, ""); + } + + if (mask) { + mask_value = lp_build_mask_value(mask); + z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb); + if (format_desc->block.bits > 32) { + s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, ""); + s_value = lp_build_select(&z_bld, mask_value, s_value, s_fb); + } + } + + if (zs_type.width < z_src_type.width) { + /* Truncate ZS values (e.g., when writing to Z16_UNORM) */ + z_value = LLVMBuildTrunc(builder, z_value, + lp_build_int_vec_type(gallivm, zs_type), ""); + } + + if (format_desc->block.bits <= 32) { + if (z_src_type.length == 4) { + zs_dst1 = lp_build_extract_range(gallivm, z_value, 0, 2); + zs_dst2 = lp_build_extract_range(gallivm, z_value, 2, 2); + } + else { + assert(z_src_type.length == 8); + zs_dst1 = LLVMBuildShuffleVector(builder, z_value, z_value, + LLVMConstVector(&shuffles[0], + zs_load_type.length), ""); + zs_dst2 = LLVMBuildShuffleVector(builder, z_value, z_value, + LLVMConstVector(&shuffles[4], + zs_load_type.length), ""); + } + } + else { + if (z_src_type.length == 4) { + zs_dst1 = lp_build_interleave2(gallivm, z_type, + z_value, s_value, 0); + zs_dst2 = lp_build_interleave2(gallivm, z_type, + z_value, s_value, 1); + } + else { + unsigned i; + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 2]; + assert(z_src_type.length == 8); + for (i = 0; i < 8; i++) { + shuffles[i*2] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); + shuffles[i*2+1] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2 + + z_src_type.length); + } + zs_dst1 = LLVMBuildShuffleVector(builder, z_value, s_value, + LLVMConstVector(&shuffles[0], + z_src_type.length), ""); + zs_dst2 = LLVMBuildShuffleVector(builder, z_value, s_value, + LLVMConstVector(&shuffles[8], + z_src_type.length), ""); + } + zs_dst1 = LLVMBuildBitCast(builder, zs_dst1, + lp_build_vec_type(gallivm, zs_load_type), ""); + zs_dst2 = LLVMBuildBitCast(builder, zs_dst2, + lp_build_vec_type(gallivm, zs_load_type), ""); + } + + LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1); + if (!is_1d) { + LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2); + } +} + +/** + * Generate code for performing depth and/or stencil tests. + * We operate on a vector of values (typically n 2x2 quads). + * + * \param depth the depth test state + * \param stencil the front/back stencil state + * \param type the data type of the fragment depth/stencil values + * \param format_desc description of the depth/stencil surface + * \param mask the alive/dead pixel mask for the quad (vector) + * \param stencil_refs the front/back stencil ref values (scalar) + * \param z_src the incoming depth/stencil values (n 2x2 quad values, float32) + * \param zs_dst the depth/stencil values in framebuffer + * \param face contains boolean value indicating front/back facing polygon + */ +void +lp_build_depth_stencil_test(struct gallivm_state *gallivm, + const struct pipe_depth_state *depth, + const struct pipe_stencil_state stencil[2], + struct lp_type z_src_type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef stencil_refs[2], + LLVMValueRef z_src, + LLVMValueRef z_fb, + LLVMValueRef s_fb, + LLVMValueRef face, + LLVMValueRef *z_value, + LLVMValueRef *s_value, + boolean do_branch) +{ + LLVMBuilderRef builder = gallivm->builder; + struct lp_type z_type; + struct lp_build_context z_bld; + struct lp_build_context s_bld; + struct lp_type s_type; + unsigned z_shift = 0, z_width = 0, z_mask = 0; + LLVMValueRef z_dst = NULL; + LLVMValueRef stencil_vals = NULL; + LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; + LLVMValueRef z_pass = NULL, s_pass_mask = NULL; + LLVMValueRef current_mask = lp_build_mask_value(mask); + LLVMValueRef front_facing = NULL; + boolean have_z, have_s; + + /* + * Depths are expected to be between 0 and 1, even if they are stored in + * floats. Setting these bits here will ensure that the lp_build_conv() call + * below won't try to unnecessarily clamp the incoming values. + */ + if(z_src_type.floating) { + z_src_type.sign = FALSE; + z_src_type.norm = TRUE; + } + else { + assert(!z_src_type.sign); + assert(z_src_type.norm); + } + + /* Pick the type matching the depth-stencil format. */ + z_type = lp_depth_type(format_desc, z_src_type.length); + + /* Pick the intermediate type for depth operations. */ + z_type.width = z_src_type.width; + assert(z_type.length == z_src_type.length); + + /* FIXME: for non-float depth/stencil might generate better code + * if we'd always split it up to use 128bit operations. + * For stencil we'd almost certainly want to pack to 8xi16 values, + * for z just run twice. + */ + + /* Sanity checking */ + { + const unsigned z_swizzle = format_desc->swizzle[0]; + const unsigned s_swizzle = format_desc->swizzle[1]; + + assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE || + s_swizzle != UTIL_FORMAT_SWIZZLE_NONE); + + assert(depth->enabled || stencil[0].enabled); + + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + + if (stencil[0].enabled) { + assert(s_swizzle < 4); + assert(format_desc->channel[s_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); + assert(format_desc->channel[s_swizzle].pure_integer); + assert(!format_desc->channel[s_swizzle].normalized); + assert(format_desc->channel[s_swizzle].size == 8); + } + + if (depth->enabled) { + assert(z_swizzle < 4); + if (z_type.floating) { + assert(z_swizzle == 0); + assert(format_desc->channel[z_swizzle].type == + UTIL_FORMAT_TYPE_FLOAT); + assert(format_desc->channel[z_swizzle].size == 32); + } + else { + assert(format_desc->channel[z_swizzle].type == + UTIL_FORMAT_TYPE_UNSIGNED); + assert(format_desc->channel[z_swizzle].normalized); + assert(!z_type.fixed); + } + } + } + + + /* Setup build context for Z vals */ + lp_build_context_init(&z_bld, gallivm, z_type); + + /* Setup build context for stencil vals */ + s_type = lp_int_type(z_type); + lp_build_context_init(&s_bld, gallivm, s_type); + + /* Compute and apply the Z/stencil bitmasks and shifts. + */ + { + unsigned s_shift, s_mask; + + z_dst = z_fb; + stencil_vals = s_fb; + + have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask); + have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask); + + if (have_z) { + if (z_mask != 0xffffffff) { + z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask); + } + + /* + * Align the framebuffer Z 's LSB to the right. + */ + if (z_shift) { + LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); + z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst"); + } else if (z_bitmask) { + z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst"); + } else { + lp_build_name(z_dst, "z_dst"); + } + } + + if (have_s) { + if (s_shift) { + LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift); + stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, ""); + stencil_shift = shift; /* used below */ + } + + if (s_mask != 0xffffffff) { + LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask); + stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); + } + + lp_build_name(stencil_vals, "s_dst"); + } + } + + if (stencil[0].enabled) { + + if (face) { + LLVMValueRef zero = lp_build_const_int32(gallivm, 0); + + /* front_facing = face != 0 ? ~0 : 0 */ + front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, ""); + front_facing = LLVMBuildSExt(builder, front_facing, + LLVMIntTypeInContext(gallivm->context, + s_bld.type.length*s_bld.type.width), + ""); + front_facing = LLVMBuildBitCast(builder, front_facing, + s_bld.int_vec_type, ""); + } + + s_pass_mask = lp_build_stencil_test(&s_bld, stencil, + stencil_refs, stencil_vals, + front_facing); + + /* apply stencil-fail operator */ + { + LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, current_mask, s_pass_mask); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP, + stencil_refs, stencil_vals, + s_fail_mask, front_facing); + } + } + + if (depth->enabled) { + /* + * Convert fragment Z to the desired type, aligning the LSB to the right. + */ + + assert(z_type.width == z_src_type.width); + assert(z_type.length == z_src_type.length); + assert(lp_check_value(z_src_type, z_src)); + if (z_src_type.floating) { + /* + * Convert from floating point values + */ + + if (!z_type.floating) { + z_src = lp_build_clamped_float_to_unsigned_norm(gallivm, + z_src_type, + z_width, + z_src); + } + } else { + /* + * Convert from unsigned normalized values. + */ + + assert(!z_src_type.sign); + assert(!z_src_type.fixed); + assert(z_src_type.norm); + assert(!z_type.floating); + if (z_src_type.width > z_width) { + LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type, + z_src_type.width - z_width); + z_src = LLVMBuildLShr(builder, z_src, shift, ""); + } + } + assert(lp_check_value(z_type, z_src)); + + lp_build_name(z_src, "z_src"); + + /* compare src Z to dst Z, returning 'pass' mask */ + z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst); + + /* mask off bits that failed stencil test */ + if (s_pass_mask) { + current_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, ""); + } + + if (!stencil[0].enabled) { + /* We can potentially skip all remaining operations here, but only + * if stencil is disabled because we still need to update the stencil + * buffer values. Don't need to update Z buffer values. + */ + lp_build_mask_update(mask, z_pass); + + if (do_branch) { + lp_build_mask_check(mask); + } + } + + if (depth->writemask) { + LLVMValueRef z_pass_mask; + + /* mask off bits that failed Z test */ + z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, ""); + + /* Mix the old and new Z buffer values. + * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i] + */ + z_dst = lp_build_select(&z_bld, z_pass_mask, z_src, z_dst); + } + + if (stencil[0].enabled) { + /* update stencil buffer values according to z pass/fail result */ + LLVMValueRef z_fail_mask, z_pass_mask; + + /* apply Z-fail operator */ + z_fail_mask = lp_build_andnot(&s_bld, current_mask, z_pass); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP, + stencil_refs, stencil_vals, + z_fail_mask, front_facing); + + /* apply Z-pass operator */ + z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, ""); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, + stencil_refs, stencil_vals, + z_pass_mask, front_facing); + } + } + else { + /* No depth test: apply Z-pass operator to stencil buffer values which + * passed the stencil test. + */ + s_pass_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, ""); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, + stencil_refs, stencil_vals, + s_pass_mask, front_facing); + } + + /* Put Z and stencil bits in the right place */ + if (have_z && z_shift) { + LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); + z_dst = LLVMBuildShl(builder, z_dst, shift, ""); + } + if (stencil_vals && stencil_shift) + stencil_vals = LLVMBuildShl(builder, stencil_vals, + stencil_shift, ""); + + /* Finally, merge the z/stencil values */ + if (format_desc->block.bits <= 32) { + if (have_z && have_s) + *z_value = LLVMBuildOr(builder, z_dst, stencil_vals, ""); + else if (have_z) + *z_value = z_dst; + else + *z_value = stencil_vals; + *s_value = *z_value; + } + else { + *z_value = z_dst; + *s_value = stencil_vals; + } + + if (s_pass_mask) + lp_build_mask_update(mask, s_pass_mask); + + if (depth->enabled && stencil[0].enabled) + lp_build_mask_update(mask, z_pass); +} + diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.h new file mode 100644 index 000000000..d169c8967 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -0,0 +1,105 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Depth/stencil testing to LLVM IR translation. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#ifndef LP_BLD_DEPTH_H +#define LP_BLD_DEPTH_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + +#include "gallivm/lp_bld.h" + + +struct pipe_depth_state; +struct gallivm_state; +struct util_format_description; +struct lp_type; +struct lp_build_mask_context; + + +struct lp_type +lp_depth_type(const struct util_format_description *format_desc, + unsigned length); + + +void +lp_build_depth_stencil_test(struct gallivm_state *gallivm, + const struct pipe_depth_state *depth, + const struct pipe_stencil_state stencil[2], + struct lp_type z_src_type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef stencil_refs[2], + LLVMValueRef z_src, + LLVMValueRef z_fb, + LLVMValueRef s_fb, + LLVMValueRef face, + LLVMValueRef *z_value, + LLVMValueRef *s_value, + boolean do_branch); + +void +lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + boolean is_1d, + LLVMValueRef depth_ptr, + LLVMValueRef depth_stride, + LLVMValueRef *z_fb, + LLVMValueRef *s_fb, + LLVMValueRef loop_counter); + +void +lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + boolean is_1d, + struct lp_build_mask_context *mask, + LLVMValueRef z_fb, + LLVMValueRef s_fb, + LLVMValueRef loop_counter, + LLVMValueRef depth_ptr, + LLVMValueRef depth_stride, + LLVMValueRef z_value, + LLVMValueRef s_value); + + +void +lp_build_occlusion_count(struct gallivm_state *gallivm, + struct lp_type type, + LLVMValueRef maskvalue, + LLVMValueRef counter); + +#endif /* !LP_BLD_DEPTH_H */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_interp.c new file mode 100644 index 000000000..ceac86abe --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -0,0 +1,819 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * Copyright 2007-2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Position and shader input interpolation. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#include "pipe/p_shader_tokens.h" +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "tgsi/tgsi_scan.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_swizzle.h" +#include "gallivm/lp_bld_flow.h" +#include "lp_bld_interp.h" + + +/* + * The shader JIT function operates on blocks of quads. + * Each block has 2x2 quads and each quad has 2x2 pixels. + * + * We iterate over the quads in order 0, 1, 2, 3: + * + * ################# + * # | # | # + * #---0---#---1---# + * # | # | # + * ################# + * # | # | # + * #---2---#---3---# + * # | # | # + * ################# + * + * If we iterate over multiple quads at once, quads 01 and 23 are processed + * together. + * + * Within each quad, we have four pixels which are represented in SOA + * order: + * + * ######### + * # 0 | 1 # + * #---+---# + * # 2 | 3 # + * ######### + * + * So the green channel (for example) of the four pixels is stored in + * a single vector register: {g0, g1, g2, g3}. + * The order stays the same even with multiple quads: + * 0 1 4 5 + * 2 3 6 7 + * is stored as g0..g7 + */ + + +/** + * Do one perspective divide per quad. + * + * For perspective interpolation, the final attribute value is given + * + * a' = a/w = a * oow + * + * where + * + * a = a0 + dadx*x + dady*y + * w = w0 + dwdx*x + dwdy*y + * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y) + * + * Instead of computing the division per pixel, with this macro we compute the + * division on the upper left pixel of each quad, and use a linear + * approximation in the remaining pixels, given by: + * + * da'dx = (dadx - dwdx*a)*oow + * da'dy = (dady - dwdy*a)*oow + * + * Ironically, this actually makes things slower -- probably because the + * divide hardware unit is rarely used, whereas the multiply unit is typically + * already saturated. + */ +#define PERSPECTIVE_DIVIDE_PER_QUAD 0 + + +static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3}; +static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3}; + + +static void +attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix) +{ + if(attrib == 0) + lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix); + else + lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix); +} + +static void +calc_offsets(struct lp_build_context *coeff_bld, + unsigned quad_start_index, + LLVMValueRef *pixoffx, + LLVMValueRef *pixoffy) +{ + unsigned i; + unsigned num_pix = coeff_bld->type.length; + struct gallivm_state *gallivm = coeff_bld->gallivm; + LLVMBuilderRef builder = coeff_bld->gallivm->builder; + LLVMValueRef nr, pixxf, pixyf; + + *pixoffx = coeff_bld->undef; + *pixoffy = coeff_bld->undef; + + for (i = 0; i < num_pix; i++) { + nr = lp_build_const_int32(gallivm, i); + pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] + + (quad_start_index & 1) * 2); + pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] + + (quad_start_index & 2)); + *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, ""); + *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, ""); + } +} + + +/* Much easier, and significantly less instructions in the per-stamp + * part (less than half) but overall more instructions so a loss if + * most quads are active. Might be a win though with larger vectors. + * No ability to do per-quad divide (doable but not implemented) + * Could be made to work with passed in pixel offsets (i.e. active quad merging). + */ +static void +coeffs_init_simple(struct lp_build_interp_soa_context *bld, + LLVMValueRef a0_ptr, + LLVMValueRef dadx_ptr, + LLVMValueRef dady_ptr) +{ + struct lp_build_context *coeff_bld = &bld->coeff_bld; + struct lp_build_context *setup_bld = &bld->setup_bld; + struct gallivm_state *gallivm = coeff_bld->gallivm; + LLVMBuilderRef builder = gallivm->builder; + unsigned attrib; + + for (attrib = 0; attrib < bld->num_attribs; ++attrib) { + /* + * always fetch all 4 values for performance/simplicity + * Note: we do that here because it seems to generate better + * code. It generates a lot of moves initially but less + * moves later. As far as I can tell this looks like a + * llvm issue, instead of simply reloading the values from + * the passed in pointers it if it runs out of registers + * it spills/reloads them. Maybe some optimization passes + * would help. + * Might want to investigate this again later. + */ + const unsigned interp = bld->interp[attrib]; + LLVMValueRef index = lp_build_const_int32(gallivm, + attrib * TGSI_NUM_CHANNELS); + LLVMValueRef ptr; + LLVMValueRef dadxaos = setup_bld->zero; + LLVMValueRef dadyaos = setup_bld->zero; + LLVMValueRef a0aos = setup_bld->zero; + + switch (interp) { + case LP_INTERP_PERSPECTIVE: + /* fall-through */ + + case LP_INTERP_LINEAR: + ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""); + ptr = LLVMBuildBitCast(builder, ptr, + LLVMPointerType(setup_bld->vec_type, 0), ""); + dadxaos = LLVMBuildLoad(builder, ptr, ""); + + ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, ""); + ptr = LLVMBuildBitCast(builder, ptr, + LLVMPointerType(setup_bld->vec_type, 0), ""); + dadyaos = LLVMBuildLoad(builder, ptr, ""); + + attrib_name(dadxaos, attrib, 0, ".dadxaos"); + attrib_name(dadyaos, attrib, 0, ".dadyaos"); + /* fall-through */ + + case LP_INTERP_CONSTANT: + case LP_INTERP_FACING: + ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, ""); + ptr = LLVMBuildBitCast(builder, ptr, + LLVMPointerType(setup_bld->vec_type, 0), ""); + a0aos = LLVMBuildLoad(builder, ptr, ""); + attrib_name(a0aos, attrib, 0, ".a0aos"); + break; + + case LP_INTERP_POSITION: + /* Nothing to do as the position coeffs are already setup in slot 0 */ + continue; + + default: + assert(0); + break; + } + bld->a0aos[attrib] = a0aos; + bld->dadxaos[attrib] = dadxaos; + bld->dadyaos[attrib] = dadyaos; + } +} + +/** + * Interpolate the shader input attribute values. + * This is called for each (group of) quad(s). + */ +static void +attribs_update_simple(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, + LLVMValueRef loop_iter, + int start, + int end) +{ + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *coeff_bld = &bld->coeff_bld; + struct lp_build_context *setup_bld = &bld->setup_bld; + LLVMValueRef oow = NULL; + unsigned attrib; + LLVMValueRef pixoffx; + LLVMValueRef pixoffy; + LLVMValueRef ptr; + + /* could do this with code-generated passed in pixel offsets too */ + + assert(loop_iter); + ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, ""); + pixoffx = LLVMBuildLoad(builder, ptr, ""); + ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, ""); + pixoffy = LLVMBuildLoad(builder, ptr, ""); + + pixoffx = LLVMBuildFAdd(builder, pixoffx, + lp_build_broadcast_scalar(coeff_bld, bld->x), ""); + pixoffy = LLVMBuildFAdd(builder, pixoffy, + lp_build_broadcast_scalar(coeff_bld, bld->y), ""); + + for (attrib = start; attrib < end; attrib++) { + const unsigned mask = bld->mask[attrib]; + const unsigned interp = bld->interp[attrib]; + unsigned chan; + + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (mask & (1 << chan)) { + LLVMValueRef index; + LLVMValueRef dadx = coeff_bld->zero; + LLVMValueRef dady = coeff_bld->zero; + LLVMValueRef a = coeff_bld->zero; + + index = lp_build_const_int32(gallivm, chan); + switch (interp) { + case LP_INTERP_PERSPECTIVE: + /* fall-through */ + + case LP_INTERP_LINEAR: + if (attrib == 0 && chan == 0) { + dadx = coeff_bld->one; + if (bld->pos_offset) { + a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset); + } + } + else if (attrib == 0 && chan == 1) { + dady = coeff_bld->one; + if (bld->pos_offset) { + a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset); + } + } + else { + dadx = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, bld->dadxaos[attrib], + index); + dady = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, bld->dadyaos[attrib], + index); + a = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, bld->a0aos[attrib], + index); + } + /* + * a = a0 + (x * dadx + y * dady) + */ + dadx = LLVMBuildFMul(builder, dadx, pixoffx, ""); + dady = LLVMBuildFMul(builder, dady, pixoffy, ""); + a = LLVMBuildFAdd(builder, a, dadx, ""); + a = LLVMBuildFAdd(builder, a, dady, ""); + + if (interp == LP_INTERP_PERSPECTIVE) { + if (oow == NULL) { + LLVMValueRef w = bld->attribs[0][3]; + assert(attrib != 0); + assert(bld->mask[0] & TGSI_WRITEMASK_W); + oow = lp_build_rcp(coeff_bld, w); + } + a = lp_build_mul(coeff_bld, a, oow); + } + break; + + case LP_INTERP_CONSTANT: + case LP_INTERP_FACING: + a = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, bld->a0aos[attrib], + index); + break; + + case LP_INTERP_POSITION: + assert(attrib > 0); + a = bld->attribs[0][chan]; + break; + + default: + assert(0); + break; + } + + if ((attrib == 0) && (chan == 2)){ + /* FIXME: Depth values can exceed 1.0, due to the fact that + * setup interpolation coefficients refer to (0,0) which causes + * precision loss. So we must clamp to 1.0 here to avoid artifacts + */ + a = lp_build_min(coeff_bld, a, coeff_bld->one); + } + bld->attribs[attrib][chan] = a; + } + } + } +} + +/** + * Initialize the bld->a, dadq fields. This involves fetching + * those values from the arrays which are passed into the JIT function. + */ +static void +coeffs_init(struct lp_build_interp_soa_context *bld, + LLVMValueRef a0_ptr, + LLVMValueRef dadx_ptr, + LLVMValueRef dady_ptr) +{ + struct lp_build_context *coeff_bld = &bld->coeff_bld; + struct lp_build_context *setup_bld = &bld->setup_bld; + struct gallivm_state *gallivm = coeff_bld->gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef pixoffx, pixoffy; + unsigned attrib; + unsigned chan; + unsigned i; + + pixoffx = coeff_bld->undef; + pixoffy = coeff_bld->undef; + for (i = 0; i < coeff_bld->type.length; i++) { + LLVMValueRef nr = lp_build_const_int32(gallivm, i); + LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i]); + LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i]); + pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, ""); + pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, ""); + } + + + for (attrib = 0; attrib < bld->num_attribs; ++attrib) { + const unsigned mask = bld->mask[attrib]; + const unsigned interp = bld->interp[attrib]; + LLVMValueRef index = lp_build_const_int32(gallivm, + attrib * TGSI_NUM_CHANNELS); + LLVMValueRef ptr; + LLVMValueRef dadxaos = setup_bld->zero; + LLVMValueRef dadyaos = setup_bld->zero; + LLVMValueRef a0aos = setup_bld->zero; + + /* always fetch all 4 values for performance/simplicity */ + switch (interp) { + case LP_INTERP_PERSPECTIVE: + /* fall-through */ + + case LP_INTERP_LINEAR: + ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""); + ptr = LLVMBuildBitCast(builder, ptr, + LLVMPointerType(setup_bld->vec_type, 0), ""); + dadxaos = LLVMBuildLoad(builder, ptr, ""); + + ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, ""); + ptr = LLVMBuildBitCast(builder, ptr, + LLVMPointerType(setup_bld->vec_type, 0), ""); + dadyaos = LLVMBuildLoad(builder, ptr, ""); + + attrib_name(dadxaos, attrib, 0, ".dadxaos"); + attrib_name(dadyaos, attrib, 0, ".dadyaos"); + /* fall-through */ + + case LP_INTERP_CONSTANT: + case LP_INTERP_FACING: + ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, ""); + ptr = LLVMBuildBitCast(builder, ptr, + LLVMPointerType(setup_bld->vec_type, 0), ""); + a0aos = LLVMBuildLoad(builder, ptr, ""); + attrib_name(a0aos, attrib, 0, ".a0aos"); + break; + + case LP_INTERP_POSITION: + /* Nothing to do as the position coeffs are already setup in slot 0 */ + continue; + + default: + assert(0); + break; + } + + /* + * a = a0 + (x * dadx + y * dady) + * a0aos is the attrib value at top left corner of stamp + */ + if (interp != LP_INTERP_CONSTANT && + interp != LP_INTERP_FACING) { + LLVMValueRef axaos, ayaos; + axaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->x), + dadxaos, ""); + ayaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->y), + dadyaos, ""); + a0aos = LLVMBuildFAdd(builder, a0aos, ayaos, ""); + a0aos = LLVMBuildFAdd(builder, a0aos, axaos, ""); + } + + /* + * dadq = {0, dadx, dady, dadx + dady} + * for two quads (side by side) this is: + * {0, dadx, dady, dadx+dady, 2*dadx, 2*dadx+dady, 3*dadx+dady} + */ + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + /* this generates a CRAPLOAD of shuffles... */ + if (mask & (1 << chan)) { + LLVMValueRef dadx, dady; + LLVMValueRef dadq, dadq2; + LLVMValueRef a; + LLVMValueRef chan_index = lp_build_const_int32(gallivm, chan); + + if (attrib == 0 && chan == 0) { + a = bld->x; + if (bld->pos_offset) { + a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), ""); + } + a = lp_build_broadcast_scalar(coeff_bld, a); + dadx = coeff_bld->one; + dady = coeff_bld->zero; + } + else if (attrib == 0 && chan == 1) { + a = bld->y; + if (bld->pos_offset) { + a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), ""); + } + a = lp_build_broadcast_scalar(coeff_bld, a); + dady = coeff_bld->one; + dadx = coeff_bld->zero; + } + else { + dadx = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, dadxaos, chan_index); + dady = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, dadyaos, chan_index); + + /* + * a = {a, a, a, a} + */ + a = lp_build_extract_broadcast(gallivm, setup_bld->type, + coeff_bld->type, a0aos, chan_index); + } + + dadx = LLVMBuildFMul(builder, dadx, pixoffx, ""); + dady = LLVMBuildFMul(builder, dady, pixoffy, ""); + dadq = LLVMBuildFAdd(builder, dadx, dady, ""); + + /* + * Compute the attrib values on the upper-left corner of each + * group of quads. + * Note that if we process 2 quads at once this doesn't + * really exactly to what we want. + * We need to access elem 0 and 2 respectively later if we process + * 2 quads at once. + */ + + if (interp != LP_INTERP_CONSTANT && + interp != LP_INTERP_FACING) { + dadq2 = LLVMBuildFAdd(builder, dadq, dadq, ""); + a = LLVMBuildFAdd(builder, a, dadq2, ""); + } + +#if PERSPECTIVE_DIVIDE_PER_QUAD + /* + * a *= 1 / w + */ + + /* + * XXX since we're only going to access elements 0,2 out of 8 + * if we have 8-wide vectors we should do the division only 4-wide. + * a is really a 2-elements in a 4-wide vector disguised as 8-wide + * in this case. + */ + if (interp == LP_INTERP_PERSPECTIVE) { + LLVMValueRef w = bld->a[0][3]; + assert(attrib != 0); + assert(bld->mask[0] & TGSI_WRITEMASK_W); + if (!bld->oow) { + bld->oow = lp_build_rcp(coeff_bld, w); + lp_build_name(bld->oow, "oow"); + } + a = lp_build_mul(coeff_bld, a, bld->oow); + } +#endif + + attrib_name(a, attrib, chan, ".a"); + attrib_name(dadq, attrib, chan, ".dadq"); + + bld->a[attrib][chan] = lp_build_alloca(gallivm, + LLVMTypeOf(a), ""); + LLVMBuildStore(builder, a, bld->a[attrib][chan]); + bld->dadq[attrib][chan] = dadq; + } + } + } +} + + +/** + * Increment the shader input attribute values. + * This is called when we move from one quad to the next. + */ +static void +attribs_update(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, + LLVMValueRef loop_iter, + int start, + int end) +{ + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *coeff_bld = &bld->coeff_bld; + LLVMValueRef oow = NULL; + unsigned attrib; + unsigned chan; + + for(attrib = start; attrib < end; ++attrib) { + const unsigned mask = bld->mask[attrib]; + const unsigned interp = bld->interp[attrib]; + for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + if(mask & (1 << chan)) { + LLVMValueRef a; + if (interp == LP_INTERP_CONSTANT || + interp == LP_INTERP_FACING) { + a = LLVMBuildLoad(builder, bld->a[attrib][chan], ""); + } + else if (interp == LP_INTERP_POSITION) { + assert(attrib > 0); + a = bld->attribs[0][chan]; + } + else { + LLVMValueRef dadq; + + a = bld->a[attrib][chan]; + + /* + * Broadcast the attribute value for this quad into all elements + */ + + { + /* stored as vector load as float */ + LLVMTypeRef ptr_type = LLVMPointerType(LLVMFloatTypeInContext( + gallivm->context), 0); + LLVMValueRef ptr; + a = LLVMBuildBitCast(builder, a, ptr_type, ""); + ptr = LLVMBuildGEP(builder, a, &loop_iter, 1, ""); + a = LLVMBuildLoad(builder, ptr, ""); + a = lp_build_broadcast_scalar(&bld->coeff_bld, a); + } + + /* + * Get the derivatives. + */ + + dadq = bld->dadq[attrib][chan]; + +#if PERSPECTIVE_DIVIDE_PER_QUAD + if (interp == LP_INTERP_PERSPECTIVE) { + LLVMValueRef dwdq = bld->dadq[0][3]; + + if (oow == NULL) { + assert(bld->oow); + oow = LLVMBuildShuffleVector(coeff_bld->builder, + bld->oow, coeff_bld->undef, + shuffle, ""); + } + + dadq = lp_build_sub(coeff_bld, + dadq, + lp_build_mul(coeff_bld, a, dwdq)); + dadq = lp_build_mul(coeff_bld, dadq, oow); + } +#endif + + /* + * Add the derivatives + */ + + a = lp_build_add(coeff_bld, a, dadq); + +#if !PERSPECTIVE_DIVIDE_PER_QUAD + if (interp == LP_INTERP_PERSPECTIVE) { + if (oow == NULL) { + LLVMValueRef w = bld->attribs[0][3]; + assert(attrib != 0); + assert(bld->mask[0] & TGSI_WRITEMASK_W); + oow = lp_build_rcp(coeff_bld, w); + } + a = lp_build_mul(coeff_bld, a, oow); + } +#endif + + if (attrib == 0 && chan == 2) { + /* FIXME: Depth values can exceed 1.0, due to the fact that + * setup interpolation coefficients refer to (0,0) which causes + * precision loss. So we must clamp to 1.0 here to avoid artifacts + */ + a = lp_build_min(coeff_bld, a, coeff_bld->one); + } + + attrib_name(a, attrib, chan, ""); + } + bld->attribs[attrib][chan] = a; + } + } + } +} + + +/** + * Generate the position vectors. + * + * Parameter x0, y0 are the integer values with upper left coordinates. + */ +static void +pos_init(struct lp_build_interp_soa_context *bld, + LLVMValueRef x0, + LLVMValueRef y0) +{ + LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder; + struct lp_build_context *coeff_bld = &bld->coeff_bld; + + bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, ""); + bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, ""); +} + + +/** + * Initialize fragment shader input attribute info. + */ +void +lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, + unsigned num_inputs, + const struct lp_shader_input *inputs, + boolean pixel_center_integer, + LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef a0_ptr, + LLVMValueRef dadx_ptr, + LLVMValueRef dady_ptr, + LLVMValueRef x0, + LLVMValueRef y0) +{ + struct lp_type coeff_type; + struct lp_type setup_type; + unsigned attrib; + unsigned chan; + + memset(bld, 0, sizeof *bld); + + memset(&coeff_type, 0, sizeof coeff_type); + coeff_type.floating = TRUE; + coeff_type.sign = TRUE; + coeff_type.width = 32; + coeff_type.length = type.length; + + memset(&setup_type, 0, sizeof setup_type); + setup_type.floating = TRUE; + setup_type.sign = TRUE; + setup_type.width = 32; + setup_type.length = TGSI_NUM_CHANNELS; + + + /* XXX: we don't support interpolating into any other types */ + assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0); + + lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type); + lp_build_context_init(&bld->setup_bld, gallivm, setup_type); + + /* For convenience */ + bld->pos = bld->attribs[0]; + bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1]; + + /* Position */ + bld->mask[0] = TGSI_WRITEMASK_XYZW; + bld->interp[0] = LP_INTERP_LINEAR; + + /* Inputs */ + for (attrib = 0; attrib < num_inputs; ++attrib) { + bld->mask[1 + attrib] = inputs[attrib].usage_mask; + bld->interp[1 + attrib] = inputs[attrib].interp; + } + bld->num_attribs = 1 + num_inputs; + + /* Ensure all masked out input channels have a valid value */ + for (attrib = 0; attrib < bld->num_attribs; ++attrib) { + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + bld->attribs[attrib][chan] = bld->coeff_bld.undef; + } + } + + if (pixel_center_integer) { + bld->pos_offset = 0.0; + } else { + bld->pos_offset = 0.5; + } + + pos_init(bld, x0, y0); + + /* + * Simple method (single step interpolation) may be slower if vector length + * is just 4, but the results are different (generally less accurate) with + * the other method, so always use more accurate version. + */ + if (1) { + bld->simple_interp = TRUE; + { + /* XXX this should use a global static table */ + unsigned i; + unsigned num_loops = 16 / type.length; + LLVMValueRef pixoffx, pixoffy, index; + LLVMValueRef ptr; + + bld->xoffset_store = lp_build_array_alloca(gallivm, + lp_build_vec_type(gallivm, type), + lp_build_const_int32(gallivm, num_loops), + ""); + bld->yoffset_store = lp_build_array_alloca(gallivm, + lp_build_vec_type(gallivm, type), + lp_build_const_int32(gallivm, num_loops), + ""); + for (i = 0; i < num_loops; i++) { + index = lp_build_const_int32(gallivm, i); + calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy); + ptr = LLVMBuildGEP(builder, bld->xoffset_store, &index, 1, ""); + LLVMBuildStore(builder, pixoffx, ptr); + ptr = LLVMBuildGEP(builder, bld->yoffset_store, &index, 1, ""); + LLVMBuildStore(builder, pixoffy, ptr); + } + } + coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr); + } + else { + bld->simple_interp = FALSE; + coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); + } + +} + + +/* + * Advance the position and inputs to the given quad within the block. + */ + +void +lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, + LLVMValueRef quad_start_index) +{ + if (bld->simple_interp) { + attribs_update_simple(bld, gallivm, quad_start_index, 1, bld->num_attribs); + } + else { + attribs_update(bld, gallivm, quad_start_index, 1, bld->num_attribs); + } +} + +void +lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, + LLVMValueRef quad_start_index) +{ + if (bld->simple_interp) { + attribs_update_simple(bld, gallivm, quad_start_index, 0, 1); + } + else { + attribs_update(bld, gallivm, quad_start_index, 0, 1); + } +} + diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_interp.h new file mode 100644 index 000000000..9029d2a41 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -0,0 +1,137 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Position and shader input interpolation. + * + * Special attention is given to the interpolation of side by side quads. + * Multiplications are made only for the first quad. Interpolation of + * inputs for posterior quads are done exclusively with additions, and + * perspective divide if necessary. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#ifndef LP_BLD_INTERP_H +#define LP_BLD_INTERP_H + + +#include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_type.h" + +#include "tgsi/tgsi_exec.h" + +/** + * Describes how to compute the interpolation coefficients (a0, dadx, dady) + * from the vertices passed into our triangle/line/point functions by the + * draw module. + * + * Vertices are treated as an array of float[4] values, indexed by + * src_index. + * + * LP_INTERP_COLOR is translated to either LP_INTERP_CONSTANT or + * PERSPECTIVE depending on flatshade state. + */ +enum lp_interp { + LP_INTERP_CONSTANT, + LP_INTERP_COLOR, + LP_INTERP_LINEAR, + LP_INTERP_PERSPECTIVE, + LP_INTERP_POSITION, + LP_INTERP_FACING +}; + +struct lp_shader_input { + uint interp:4; /* enum lp_interp */ + uint usage_mask:4; /* bitmask of TGSI_WRITEMASK_x flags */ + uint src_index:8; /* where to find values in incoming vertices */ + uint cyl_wrap:4; /* TGSI_CYLINDRICAL_WRAP_x flags */ + uint padding:12; +}; + + +struct lp_build_interp_soa_context +{ + /* TGSI_QUAD_SIZE x float */ + struct lp_build_context coeff_bld; + struct lp_build_context setup_bld; + + unsigned num_attribs; + unsigned mask[1 + PIPE_MAX_SHADER_INPUTS]; /**< TGSI_WRITE_MASK_x */ + enum lp_interp interp[1 + PIPE_MAX_SHADER_INPUTS]; + boolean simple_interp; + + double pos_offset; + + LLVMValueRef x; + LLVMValueRef y; + + LLVMValueRef a[1 + PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; + LLVMValueRef dadq[1 + PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; + LLVMValueRef a0aos[1 + PIPE_MAX_SHADER_INPUTS]; + LLVMValueRef dadxaos[1 + PIPE_MAX_SHADER_INPUTS]; + LLVMValueRef dadyaos[1 + PIPE_MAX_SHADER_INPUTS]; + + LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; + + LLVMValueRef xoffset_store; + LLVMValueRef yoffset_store; + + /* + * Convenience pointers. Callers may access this one. + */ + const LLVMValueRef *pos; + const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS]; +}; + + +void +lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, + unsigned num_inputs, + const struct lp_shader_input *inputs, + boolean pixel_center_integer, + LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef a0_ptr, + LLVMValueRef dadx_ptr, + LLVMValueRef dady_ptr, + LLVMValueRef x, + LLVMValueRef y); + +void +lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, + LLVMValueRef quad_start_index); + +void +lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, + LLVMValueRef quad_start_index); + +#endif /* LP_BLD_INTERP_H */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_clear.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_clear.c new file mode 100644 index 000000000..064206fc2 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_clear.c @@ -0,0 +1,63 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * Copyright 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + * Michel Dänzer + */ + + +#include "pipe/p_defines.h" +#include "lp_clear.h" +#include "lp_context.h" +#include "lp_setup.h" +#include "lp_query.h" +#include "lp_debug.h" + + +/** + * Clear the given buffers to the specified values. + * No masking, no scissor (clear entire buffer). + */ +void +llvmpipe_clear(struct pipe_context *pipe, + unsigned buffers, + const union pipe_color_union *color, + double depth, + unsigned stencil) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + if (!llvmpipe_check_render_cond(llvmpipe)) + return; + + if (LP_PERF & PERF_NO_DEPTH) + buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; + + lp_setup_clear( llvmpipe->setup, color, depth, stencil, buffers ); +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_clear.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_clear.h new file mode 100644 index 000000000..7249929cb --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_clear.h @@ -0,0 +1,44 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + */ + +#ifndef LP_CLEAR_H +#define LP_CLEAR_H + +#include "pipe/p_state.h" +struct pipe_context; + +extern void +llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, + const union pipe_color_union *color, + double depth, unsigned stencil); + + +#endif /* LP_CLEAR_H */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c new file mode 100644 index 000000000..80cb6578b --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.c @@ -0,0 +1,226 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * Copyright 2008 VMware, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell <keithw@vmware.com> + */ + +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/simple_list.h" +#include "lp_clear.h" +#include "lp_context.h" +#include "lp_flush.h" +#include "lp_perf.h" +#include "lp_state.h" +#include "lp_surface.h" +#include "lp_query.h" +#include "lp_setup.h" + +/* This is only safe if there's just one concurrent context */ +#ifdef PIPE_SUBSYSTEM_EMBEDDED +#define USE_GLOBAL_LLVM_CONTEXT +#endif + +static void llvmpipe_destroy( struct pipe_context *pipe ) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); + uint i, j; + + lp_print_counters(); + + if (llvmpipe->blitter) { + util_blitter_destroy(llvmpipe->blitter); + } + + /* This will also destroy llvmpipe->setup: + */ + if (llvmpipe->draw) + draw_destroy( llvmpipe->draw ); + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&llvmpipe->framebuffer.cbufs[i], NULL); + } + + pipe_surface_reference(&llvmpipe->framebuffer.zsbuf, NULL); + + for (i = 0; i < Elements(llvmpipe->sampler_views[0]); i++) { + pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_FRAGMENT][i], NULL); + } + + for (i = 0; i < Elements(llvmpipe->sampler_views[0]); i++) { + pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_VERTEX][i], NULL); + } + + for (i = 0; i < Elements(llvmpipe->sampler_views[0]); i++) { + pipe_sampler_view_reference(&llvmpipe->sampler_views[PIPE_SHADER_GEOMETRY][i], NULL); + } + + for (i = 0; i < Elements(llvmpipe->constants); i++) { + for (j = 0; j < Elements(llvmpipe->constants[i]); j++) { + pipe_resource_reference(&llvmpipe->constants[i][j].buffer, NULL); + } + } + + for (i = 0; i < llvmpipe->num_vertex_buffers; i++) { + pipe_resource_reference(&llvmpipe->vertex_buffer[i].buffer, NULL); + } + + lp_delete_setup_variants(llvmpipe); + +#ifndef USE_GLOBAL_LLVM_CONTEXT + LLVMContextDispose(llvmpipe->context); +#endif + llvmpipe->context = NULL; + + align_free( llvmpipe ); +} + +static void +do_flush( struct pipe_context *pipe, + struct pipe_fence_handle **fence, + unsigned flags) +{ + llvmpipe_flush(pipe, fence, __FUNCTION__); +} + + +static void +llvmpipe_render_condition ( struct pipe_context *pipe, + struct pipe_query *query, + boolean condition, + uint mode ) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); + + llvmpipe->render_cond_query = query; + llvmpipe->render_cond_mode = mode; + llvmpipe->render_cond_cond = condition; +} + +struct pipe_context * +llvmpipe_create_context( struct pipe_screen *screen, void *priv ) +{ + struct llvmpipe_context *llvmpipe; + + llvmpipe = align_malloc(sizeof(struct llvmpipe_context), 16); + if (!llvmpipe) + return NULL; + + util_init_math(); + + memset(llvmpipe, 0, sizeof *llvmpipe); + + make_empty_list(&llvmpipe->fs_variants_list); + + make_empty_list(&llvmpipe->setup_variants_list); + + + llvmpipe->pipe.screen = screen; + llvmpipe->pipe.priv = priv; + + /* Init the pipe context methods */ + llvmpipe->pipe.destroy = llvmpipe_destroy; + llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state; + llvmpipe->pipe.clear = llvmpipe_clear; + llvmpipe->pipe.flush = do_flush; + + llvmpipe->pipe.render_condition = llvmpipe_render_condition; + + llvmpipe_init_blend_funcs(llvmpipe); + llvmpipe_init_clip_funcs(llvmpipe); + llvmpipe_init_draw_funcs(llvmpipe); + llvmpipe_init_sampler_funcs(llvmpipe); + llvmpipe_init_query_funcs( llvmpipe ); + llvmpipe_init_vertex_funcs(llvmpipe); + llvmpipe_init_so_funcs(llvmpipe); + llvmpipe_init_fs_funcs(llvmpipe); + llvmpipe_init_vs_funcs(llvmpipe); + llvmpipe_init_gs_funcs(llvmpipe); + llvmpipe_init_rasterizer_funcs(llvmpipe); + llvmpipe_init_context_resource_funcs( &llvmpipe->pipe ); + llvmpipe_init_surface_functions(llvmpipe); + +#ifdef USE_GLOBAL_LLVM_CONTEXT + llvmpipe->context = LLVMGetGlobalContext(); +#else + llvmpipe->context = LLVMContextCreate(); +#endif + + if (!llvmpipe->context) + goto fail; + + /* + * Create drawing context and plug our rendering stage into it. + */ + llvmpipe->draw = draw_create_with_llvm_context(&llvmpipe->pipe, + llvmpipe->context); + if (!llvmpipe->draw) + goto fail; + + /* FIXME: devise alternative to draw_texture_samplers */ + + llvmpipe->setup = lp_setup_create( &llvmpipe->pipe, + llvmpipe->draw ); + if (!llvmpipe->setup) + goto fail; + + llvmpipe->blitter = util_blitter_create(&llvmpipe->pipe); + if (!llvmpipe->blitter) { + goto fail; + } + + /* must be done before installing Draw stages */ + util_blitter_cache_all_shaders(llvmpipe->blitter); + + /* plug in AA line/point stages */ + draw_install_aaline_stage(llvmpipe->draw, &llvmpipe->pipe); + draw_install_aapoint_stage(llvmpipe->draw, &llvmpipe->pipe); + draw_install_pstipple_stage(llvmpipe->draw, &llvmpipe->pipe); + + /* convert points and lines into triangles: + * (otherwise, draw points and lines natively) + */ + draw_wide_point_sprites(llvmpipe->draw, FALSE); + draw_enable_point_sprites(llvmpipe->draw, FALSE); + draw_wide_point_threshold(llvmpipe->draw, 10000.0); + draw_wide_line_threshold(llvmpipe->draw, 10000.0); + + lp_reset_counters(); + + return &llvmpipe->pipe; + + fail: + llvmpipe_destroy(&llvmpipe->pipe); + return NULL; +} + diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h new file mode 100644 index 000000000..c273b25f0 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_context.h @@ -0,0 +1,179 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keithw@vmware.com> + */ + +#ifndef LP_CONTEXT_H +#define LP_CONTEXT_H + +#include "pipe/p_context.h" + +#include "draw/draw_vertex.h" +#include "util/u_blitter.h" + +#include "lp_tex_sample.h" +#include "lp_jit.h" +#include "lp_setup.h" +#include "lp_state_fs.h" +#include "lp_state_setup.h" + + +struct llvmpipe_vbuf_render; +struct draw_context; +struct draw_stage; +struct draw_vertex_shader; +struct lp_fragment_shader; +struct lp_blend_state; +struct lp_setup_context; +struct lp_setup_variant; +struct lp_velems_state; + +struct llvmpipe_context { + struct pipe_context pipe; /**< base class */ + + /** Constant state objects */ + const struct pipe_blend_state *blend; + struct pipe_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; + + const struct pipe_depth_stencil_alpha_state *depth_stencil; + const struct pipe_rasterizer_state *rasterizer; + struct lp_fragment_shader *fs; + struct draw_vertex_shader *vs; + const struct lp_geometry_shader *gs; + const struct lp_velems_state *velems; + const struct lp_so_state *so; + + /** Other rendering state */ + unsigned sample_mask; + struct pipe_blend_color blend_color; + struct pipe_stencil_ref stencil_ref; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[PIPE_SHADER_TYPES][LP_MAX_TGSI_CONST_BUFFERS]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissors[PIPE_MAX_VIEWPORTS]; + struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS]; + + struct pipe_viewport_state viewports[PIPE_MAX_VIEWPORTS]; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer; + struct pipe_resource *mapped_vs_tex[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + struct pipe_resource *mapped_gs_tex[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + + unsigned num_samplers[PIPE_SHADER_TYPES]; + unsigned num_sampler_views[PIPE_SHADER_TYPES]; + + unsigned num_vertex_buffers; + + struct draw_so_target *so_targets[PIPE_MAX_SO_BUFFERS]; + int num_so_targets; + struct pipe_query_data_so_statistics so_stats; + + struct pipe_query_data_pipeline_statistics pipeline_statistics; + unsigned active_statistics_queries; + + unsigned active_occlusion_queries; + + unsigned dirty; /**< Mask of LP_NEW_x flags */ + + /** Mapped vertex buffers */ + ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS]; + + /** Vertex format */ + struct vertex_info vertex_info; + + /** Which vertex shader output slot contains color */ + int color_slot[2]; + + /** Which vertex shader output slot contains bcolor */ + int bcolor_slot[2]; + + /** Which vertex shader output slot contains point size */ + int psize_slot; + + /** Which vertex shader output slot contains viewport index */ + int viewport_index_slot; + + /** Which geometry shader output slot contains layer */ + int layer_slot; + + /** A fake frontface output for unfilled primitives */ + int face_slot; + + /** Depth format and bias settings. */ + boolean floating_point_depth; + double mrd; /**< minimum resolvable depth value, for polygon offset */ + + /** The tiling engine */ + struct lp_setup_context *setup; + struct lp_setup_variant setup_variant; + + /** The primitive drawing context */ + struct draw_context *draw; + + struct blitter_context *blitter; + + unsigned tex_timestamp; + boolean no_rast; + + /** List of all fragment shader variants */ + struct lp_fs_variant_list_item fs_variants_list; + unsigned nr_fs_variants; + unsigned nr_fs_instrs; + + struct lp_setup_variant_list_item setup_variants_list; + unsigned nr_setup_variants; + + /** Conditional query object and mode */ + struct pipe_query *render_cond_query; + uint render_cond_mode; + boolean render_cond_cond; + + /** The LLVMContext to use for LLVM related work */ + LLVMContextRef context; +}; + + +struct pipe_context * +llvmpipe_create_context( struct pipe_screen *screen, void *priv ); + +struct pipe_resource * +llvmpipe_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes, + unsigned bind_flags); + + +static inline struct llvmpipe_context * +llvmpipe_context( struct pipe_context *pipe ) +{ + return (struct llvmpipe_context *)pipe; +} + +#endif /* LP_CONTEXT_H */ + diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_debug.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_debug.h new file mode 100644 index 000000000..1038c5fe1 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_debug.h @@ -0,0 +1,88 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_DEBUG_H +#define LP_DEBUG_H + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" + +extern void +st_print_current(void); + + +#define DEBUG_PIPE 0x1 +#define DEBUG_TGSI 0x2 +#define DEBUG_TEX 0x4 +#define DEBUG_SETUP 0x10 +#define DEBUG_RAST 0x20 +#define DEBUG_QUERY 0x40 +#define DEBUG_SCREEN 0x80 +#define DEBUG_COUNTERS 0x800 +#define DEBUG_SCENE 0x1000 +#define DEBUG_FENCE 0x2000 +#define DEBUG_MEM 0x4000 +#define DEBUG_FS 0x8000 + +/* Performance flags. These are active even on release builds. + */ +#define PERF_TEX_MEM 0x1 /* minimize texture cache footprint */ +#define PERF_NO_MIP_LINEAR 0x2 /* MIP_FILTER_LINEAR ==> _NEAREST */ +#define PERF_NO_MIPMAPS 0x4 /* MIP_FILTER_NONE always */ +#define PERF_NO_LINEAR 0x8 /* FILTER_NEAREST always */ +#define PERF_NO_TEX 0x10 /* sample white always */ +#define PERF_NO_BLEND 0x20 /* disable blending */ +#define PERF_NO_DEPTH 0x40 /* disable depth buffering entirely */ +#define PERF_NO_ALPHATEST 0x80 /* disable alpha testing */ + + +extern int LP_PERF; + +#ifdef DEBUG +extern int LP_DEBUG; +#else +#define LP_DEBUG 0 +#endif + +void st_debug_init( void ); + +static inline void +LP_DBG( unsigned flag, const char *fmt, ... ) +{ + if (LP_DEBUG & flag) + { + va_list args; + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); + } +} + + +#endif /* LP_DEBUG_H */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c new file mode 100644 index 000000000..edfb20409 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -0,0 +1,169 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + * Keith Whitwell + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "util/u_draw.h" +#include "util/u_prim.h" + +#include "lp_context.h" +#include "lp_state.h" +#include "lp_query.h" + +#include "draw/draw_context.h" + + + +/** + * Draw vertex arrays, with optional indexing, optional instancing. + * All the other drawing functions are implemented in terms of this function. + * Basically, map the vertex buffers (and drawing surfaces), then hand off + * the drawing to the 'draw' module. + */ +static void +llvmpipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct llvmpipe_context *lp = llvmpipe_context(pipe); + struct draw_context *draw = lp->draw; + const void *mapped_indices = NULL; + unsigned i; + + if (!llvmpipe_check_render_cond(lp)) + return; + + if (info->indirect) { + util_draw_indirect(pipe, info); + return; + } + + if (lp->dirty) + llvmpipe_update_derived( lp ); + + /* + * Map vertex buffers + */ + for (i = 0; i < lp->num_vertex_buffers; i++) { + const void *buf = lp->vertex_buffer[i].user_buffer; + size_t size = ~0; + if (!buf) { + if (!lp->vertex_buffer[i].buffer) { + continue; + } + buf = llvmpipe_resource_data(lp->vertex_buffer[i].buffer); + size = lp->vertex_buffer[i].buffer->width0; + } + draw_set_mapped_vertex_buffer(draw, i, buf, size); + } + + /* Map index buffer, if present */ + if (info->indexed) { + unsigned available_space = ~0; + mapped_indices = lp->index_buffer.user_buffer; + if (!mapped_indices) { + mapped_indices = llvmpipe_resource_data(lp->index_buffer.buffer); + if (lp->index_buffer.buffer->width0 > lp->index_buffer.offset) + available_space = + (lp->index_buffer.buffer->width0 - lp->index_buffer.offset); + else + available_space = 0; + } + draw_set_indexes(draw, + (ubyte *) mapped_indices + lp->index_buffer.offset, + lp->index_buffer.index_size, available_space); + } + + for (i = 0; i < lp->num_so_targets; i++) { + void *buf = 0; + if (lp->so_targets[i]) { + buf = llvmpipe_resource(lp->so_targets[i]->target.buffer)->data; + lp->so_targets[i]->mapping = buf; + } + } + draw_set_mapped_so_targets(draw, lp->num_so_targets, + lp->so_targets); + + llvmpipe_prepare_vertex_sampling(lp, + lp->num_sampler_views[PIPE_SHADER_VERTEX], + lp->sampler_views[PIPE_SHADER_VERTEX]); + llvmpipe_prepare_geometry_sampling(lp, + lp->num_sampler_views[PIPE_SHADER_GEOMETRY], + lp->sampler_views[PIPE_SHADER_GEOMETRY]); + if (lp->gs && lp->gs->no_tokens) { + /* we have an empty geometry shader with stream output, so + attach the stream output info to the current vertex shader */ + if (lp->vs) { + draw_vs_attach_so(lp->vs, &lp->gs->stream_output); + } + } + draw_collect_pipeline_statistics(draw, + lp->active_statistics_queries > 0); + + /* draw! */ + draw_vbo(draw, info); + + /* + * unmap vertex/index buffers + */ + for (i = 0; i < lp->num_vertex_buffers; i++) { + draw_set_mapped_vertex_buffer(draw, i, NULL, 0); + } + if (mapped_indices) { + draw_set_indexes(draw, NULL, 0, 0); + } + draw_set_mapped_so_targets(draw, 0, NULL); + + if (lp->gs && lp->gs->no_tokens) { + /* we have attached stream output to the vs for rendering, + now lets reset it */ + if (lp->vs) { + draw_vs_reset_so(lp->vs); + } + } + + llvmpipe_cleanup_vertex_sampling(lp); + llvmpipe_cleanup_geometry_sampling(lp); + + /* + * TODO: Flush only when a user vertex/index buffer is present + * (or even better, modify draw module to do this + * internally when this condition is seen?) + */ + draw_flush(draw); +} + + +void +llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.draw_vbo = llvmpipe_draw_vbo; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_fence.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_fence.c new file mode 100644 index 000000000..a21a3c744 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_fence.c @@ -0,0 +1,127 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "lp_debug.h" +#include "lp_fence.h" + + +/** + * Create a new fence object. + * + * The rank will be the number of bins in the scene. Whenever a rendering + * thread hits a fence command, it'll increment the fence counter. When + * the counter == the rank, the fence is finished. + * + * \param rank the expected finished value of the fence counter. + */ +struct lp_fence * +lp_fence_create(unsigned rank) +{ + static int fence_id; + struct lp_fence *fence = CALLOC_STRUCT(lp_fence); + + if (!fence) + return NULL; + + pipe_reference_init(&fence->reference, 1); + + pipe_mutex_init(fence->mutex); + pipe_condvar_init(fence->signalled); + + fence->id = fence_id++; + fence->rank = rank; + + if (LP_DEBUG & DEBUG_FENCE) + debug_printf("%s %d\n", __FUNCTION__, fence->id); + + return fence; +} + + +/** Destroy a fence. Called when refcount hits zero. */ +void +lp_fence_destroy(struct lp_fence *fence) +{ + if (LP_DEBUG & DEBUG_FENCE) + debug_printf("%s %d\n", __FUNCTION__, fence->id); + + pipe_mutex_destroy(fence->mutex); + pipe_condvar_destroy(fence->signalled); + FREE(fence); +} + + +/** + * Called by the rendering threads to increment the fence counter. + * When the counter == the rank, the fence is finished. + */ +void +lp_fence_signal(struct lp_fence *fence) +{ + if (LP_DEBUG & DEBUG_FENCE) + debug_printf("%s %d\n", __FUNCTION__, fence->id); + + pipe_mutex_lock(fence->mutex); + + fence->count++; + assert(fence->count <= fence->rank); + + if (LP_DEBUG & DEBUG_FENCE) + debug_printf("%s count=%u rank=%u\n", __FUNCTION__, + fence->count, fence->rank); + + /* Wakeup all threads waiting on the mutex: + */ + pipe_condvar_broadcast(fence->signalled); + + pipe_mutex_unlock(fence->mutex); +} + +boolean +lp_fence_signalled(struct lp_fence *f) +{ + return f->count == f->rank; +} + +void +lp_fence_wait(struct lp_fence *f) +{ + if (LP_DEBUG & DEBUG_FENCE) + debug_printf("%s %d\n", __FUNCTION__, f->id); + + pipe_mutex_lock(f->mutex); + assert(f->issued); + while (f->count < f->rank) { + pipe_condvar_wait(f->signalled, f->mutex); + } + pipe_mutex_unlock(f->mutex); +} + + diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_fence.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_fence.h new file mode 100644 index 000000000..d7f0c153e --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_fence.h @@ -0,0 +1,95 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_FENCE_H +#define LP_FENCE_H + + +#include "os/os_thread.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" + + +struct pipe_screen; + + +struct lp_fence +{ + struct pipe_reference reference; + unsigned id; + + pipe_mutex mutex; + pipe_condvar signalled; + + boolean issued; + unsigned rank; + unsigned count; +}; + + +struct lp_fence * +lp_fence_create(unsigned rank); + + +void +lp_fence_signal(struct lp_fence *fence); + +boolean +lp_fence_signalled(struct lp_fence *fence); + +void +lp_fence_wait(struct lp_fence *fence); + +void +llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen); + + +void +lp_fence_destroy(struct lp_fence *fence); + +static inline void +lp_fence_reference(struct lp_fence **ptr, + struct lp_fence *f) +{ + struct lp_fence *old = *ptr; + + if (pipe_reference(&old->reference, &f->reference)) { + lp_fence_destroy(old); + } + + *ptr = f; +} + +static inline boolean +lp_fence_issued(const struct lp_fence *fence) +{ + return fence->issued; +} + + +#endif /* LP_FENCE_H */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_flush.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_flush.c new file mode 100644 index 000000000..268aab26c --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_flush.c @@ -0,0 +1,131 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell <keithw@vmware.com> + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" +#include "util/u_string.h" +#include "draw/draw_context.h" +#include "lp_flush.h" +#include "lp_context.h" +#include "lp_setup.h" + + +/** + * \param fence if non-null, returns pointer to a fence which can be waited on + */ +void +llvmpipe_flush( struct pipe_context *pipe, + struct pipe_fence_handle **fence, + const char *reason) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + draw_flush(llvmpipe->draw); + + /* ask the setup module to flush */ + lp_setup_flush(llvmpipe->setup, fence, reason); + + /* Enable to dump BMPs of the color/depth buffers each frame */ + if (0) { + static unsigned frame_no = 1; + char filename[256]; + unsigned i; + + for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { + util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no); + debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[i]); + } + + if (0) { + util_snprintf(filename, sizeof(filename), "zsbuf_%u", frame_no); + debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.zsbuf); + } + + ++frame_no; + } +} + +void +llvmpipe_finish( struct pipe_context *pipe, + const char *reason ) +{ + struct pipe_fence_handle *fence = NULL; + llvmpipe_flush(pipe, &fence, reason); + if (fence) { + pipe->screen->fence_finish(pipe->screen, fence, PIPE_TIMEOUT_INFINITE); + pipe->screen->fence_reference(pipe->screen, &fence, NULL); + } +} + +/** + * Flush context if necessary. + * + * Returns FALSE if it would have block, but do_not_block was set, TRUE + * otherwise. + * + * TODO: move this logic to an auxiliary library? + */ +boolean +llvmpipe_flush_resource(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + boolean read_only, + boolean cpu_access, + boolean do_not_block, + const char *reason) +{ + unsigned referenced; + + referenced = llvmpipe_is_resource_referenced(pipe, resource, level); + + if ((referenced & LP_REFERENCED_FOR_WRITE) || + ((referenced & LP_REFERENCED_FOR_READ) && !read_only)) { + + if (cpu_access) { + /* + * Flush and wait. + */ + if (do_not_block) + return FALSE; + + llvmpipe_finish(pipe, reason); + } else { + /* + * Just flush. + */ + + llvmpipe_flush(pipe, NULL, reason); + } + } + + return TRUE; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_flush.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_flush.h new file mode 100644 index 000000000..68f513028 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_flush.h @@ -0,0 +1,55 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef LP_FLUSH_H +#define LP_FLUSH_H + +#include "pipe/p_compiler.h" + +struct pipe_context; +struct pipe_fence_handle; +struct pipe_resource; + +void +llvmpipe_flush(struct pipe_context *pipe, + struct pipe_fence_handle **fence, + const char *reason); + +void +llvmpipe_finish( struct pipe_context *pipe, + const char *reason ); + +boolean +llvmpipe_flush_resource(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + boolean read_only, + boolean cpu_access, + boolean do_not_block, + const char *reason); + +#endif diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c new file mode 100644 index 000000000..9acde4f1b --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.c @@ -0,0 +1,246 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * C - JIT interfaces + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + + +#include "util/u_memory.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_debug.h" +#include "lp_context.h" +#include "lp_jit.h" + + +static void +lp_jit_create_types(struct lp_fragment_shader_variant *lp) +{ + struct gallivm_state *gallivm = lp->gallivm; + LLVMContextRef lc = gallivm->context; + LLVMTypeRef viewport_type, texture_type, sampler_type; + + /* struct lp_jit_viewport */ + { + LLVMTypeRef elem_types[LP_JIT_VIEWPORT_NUM_FIELDS]; + + elem_types[LP_JIT_VIEWPORT_MIN_DEPTH] = + elem_types[LP_JIT_VIEWPORT_MAX_DEPTH] = LLVMFloatTypeInContext(lc); + + viewport_type = LLVMStructTypeInContext(lc, elem_types, + Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct lp_jit_viewport, min_depth, + gallivm->target, viewport_type, + LP_JIT_VIEWPORT_MIN_DEPTH); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_viewport, max_depth, + gallivm->target, viewport_type, + LP_JIT_VIEWPORT_MAX_DEPTH); + LP_CHECK_STRUCT_SIZE(struct lp_jit_viewport, + gallivm->target, viewport_type); + } + + /* struct lp_jit_texture */ + { + LLVMTypeRef elem_types[LP_JIT_TEXTURE_NUM_FIELDS]; + + elem_types[LP_JIT_TEXTURE_WIDTH] = + elem_types[LP_JIT_TEXTURE_HEIGHT] = + elem_types[LP_JIT_TEXTURE_DEPTH] = + elem_types[LP_JIT_TEXTURE_FIRST_LEVEL] = + elem_types[LP_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32TypeInContext(lc); + elem_types[LP_JIT_TEXTURE_BASE] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0); + elem_types[LP_JIT_TEXTURE_ROW_STRIDE] = + elem_types[LP_JIT_TEXTURE_IMG_STRIDE] = + elem_types[LP_JIT_TEXTURE_MIP_OFFSETS] = + LLVMArrayType(LLVMInt32TypeInContext(lc), LP_MAX_TEXTURE_LEVELS); + + texture_type = LLVMStructTypeInContext(lc, elem_types, + Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width, + gallivm->target, texture_type, + LP_JIT_TEXTURE_WIDTH); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height, + gallivm->target, texture_type, + LP_JIT_TEXTURE_HEIGHT); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, depth, + gallivm->target, texture_type, + LP_JIT_TEXTURE_DEPTH); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, first_level, + gallivm->target, texture_type, + LP_JIT_TEXTURE_FIRST_LEVEL); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, last_level, + gallivm->target, texture_type, + LP_JIT_TEXTURE_LAST_LEVEL); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, base, + gallivm->target, texture_type, + LP_JIT_TEXTURE_BASE); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, row_stride, + gallivm->target, texture_type, + LP_JIT_TEXTURE_ROW_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, img_stride, + gallivm->target, texture_type, + LP_JIT_TEXTURE_IMG_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, mip_offsets, + gallivm->target, texture_type, + LP_JIT_TEXTURE_MIP_OFFSETS); + LP_CHECK_STRUCT_SIZE(struct lp_jit_texture, + gallivm->target, texture_type); + } + + /* struct lp_jit_sampler */ + { + LLVMTypeRef elem_types[LP_JIT_SAMPLER_NUM_FIELDS]; + elem_types[LP_JIT_SAMPLER_MIN_LOD] = + elem_types[LP_JIT_SAMPLER_MAX_LOD] = + elem_types[LP_JIT_SAMPLER_LOD_BIAS] = LLVMFloatTypeInContext(lc); + elem_types[LP_JIT_SAMPLER_BORDER_COLOR] = + LLVMArrayType(LLVMFloatTypeInContext(lc), 4); + + sampler_type = LLVMStructTypeInContext(lc, elem_types, + Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct lp_jit_sampler, min_lod, + gallivm->target, sampler_type, + LP_JIT_SAMPLER_MIN_LOD); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_sampler, max_lod, + gallivm->target, sampler_type, + LP_JIT_SAMPLER_MAX_LOD); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_sampler, lod_bias, + gallivm->target, sampler_type, + LP_JIT_SAMPLER_LOD_BIAS); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_sampler, border_color, + gallivm->target, sampler_type, + LP_JIT_SAMPLER_BORDER_COLOR); + LP_CHECK_STRUCT_SIZE(struct lp_jit_sampler, + gallivm->target, sampler_type); + } + + /* struct lp_jit_context */ + { + LLVMTypeRef elem_types[LP_JIT_CTX_COUNT]; + LLVMTypeRef context_type; + + elem_types[LP_JIT_CTX_CONSTANTS] = + LLVMArrayType(LLVMPointerType(LLVMFloatTypeInContext(lc), 0), LP_MAX_TGSI_CONST_BUFFERS); + elem_types[LP_JIT_CTX_NUM_CONSTANTS] = + LLVMArrayType(LLVMInt32TypeInContext(lc), LP_MAX_TGSI_CONST_BUFFERS); + elem_types[LP_JIT_CTX_ALPHA_REF] = LLVMFloatTypeInContext(lc); + elem_types[LP_JIT_CTX_STENCIL_REF_FRONT] = + elem_types[LP_JIT_CTX_STENCIL_REF_BACK] = LLVMInt32TypeInContext(lc); + elem_types[LP_JIT_CTX_U8_BLEND_COLOR] = LLVMPointerType(LLVMInt8TypeInContext(lc), 0); + elem_types[LP_JIT_CTX_F_BLEND_COLOR] = LLVMPointerType(LLVMFloatTypeInContext(lc), 0); + elem_types[LP_JIT_CTX_VIEWPORTS] = LLVMPointerType(viewport_type, 0); + elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type, + PIPE_MAX_SHADER_SAMPLER_VIEWS); + elem_types[LP_JIT_CTX_SAMPLERS] = LLVMArrayType(sampler_type, + PIPE_MAX_SAMPLERS); + + context_type = LLVMStructTypeInContext(lc, elem_types, + Elements(elem_types), 0); + + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants, + gallivm->target, context_type, + LP_JIT_CTX_CONSTANTS); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, num_constants, + gallivm->target, context_type, + LP_JIT_CTX_NUM_CONSTANTS); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value, + gallivm->target, context_type, + LP_JIT_CTX_ALPHA_REF); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_front, + gallivm->target, context_type, + LP_JIT_CTX_STENCIL_REF_FRONT); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_back, + gallivm->target, context_type, + LP_JIT_CTX_STENCIL_REF_BACK); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, u8_blend_color, + gallivm->target, context_type, + LP_JIT_CTX_U8_BLEND_COLOR); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, f_blend_color, + gallivm->target, context_type, + LP_JIT_CTX_F_BLEND_COLOR); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, viewports, + gallivm->target, context_type, + LP_JIT_CTX_VIEWPORTS); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures, + gallivm->target, context_type, + LP_JIT_CTX_TEXTURES); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, samplers, + gallivm->target, context_type, + LP_JIT_CTX_SAMPLERS); + LP_CHECK_STRUCT_SIZE(struct lp_jit_context, + gallivm->target, context_type); + + lp->jit_context_ptr_type = LLVMPointerType(context_type, 0); + } + + /* struct lp_jit_thread_data */ + { + LLVMTypeRef elem_types[LP_JIT_THREAD_DATA_COUNT]; + LLVMTypeRef thread_data_type; + + elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc); + elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX] = + LLVMInt32TypeInContext(lc); + + thread_data_type = LLVMStructTypeInContext(lc, elem_types, + Elements(elem_types), 0); + + lp->jit_thread_data_ptr_type = LLVMPointerType(thread_data_type, 0); + } + + if (gallivm_debug & GALLIVM_DEBUG_IR) { + LLVMDumpModule(gallivm->module); + } +} + + +void +lp_jit_screen_cleanup(struct llvmpipe_screen *screen) +{ + /* nothing */ +} + + +boolean +lp_jit_screen_init(struct llvmpipe_screen *screen) +{ + return lp_build_init(); +} + + +void +lp_jit_init_types(struct lp_fragment_shader_variant *lp) +{ + if (!lp->jit_context_ptr_type) + lp_jit_create_types(lp); +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.h new file mode 100644 index 000000000..097fa7dce --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_jit.h @@ -0,0 +1,263 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * C - JIT interfaces + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#ifndef LP_JIT_H +#define LP_JIT_H + + +#include "gallivm/lp_bld_struct.h" +#include "gallivm/lp_bld_limits.h" + +#include "pipe/p_state.h" +#include "lp_texture.h" + + +struct lp_fragment_shader_variant; +struct llvmpipe_screen; + + +struct lp_jit_texture +{ + uint32_t width; /* same as number of elements */ + uint32_t height; + uint32_t depth; /* doubles as array size */ + uint32_t first_level; + uint32_t last_level; + const void *base; + uint32_t row_stride[LP_MAX_TEXTURE_LEVELS]; + uint32_t img_stride[LP_MAX_TEXTURE_LEVELS]; + uint32_t mip_offsets[LP_MAX_TEXTURE_LEVELS]; +}; + + +struct lp_jit_sampler +{ + float min_lod; + float max_lod; + float lod_bias; + float border_color[4]; +}; + + +struct lp_jit_viewport +{ + float min_depth; + float max_depth; +}; + + +enum { + LP_JIT_TEXTURE_WIDTH = 0, + LP_JIT_TEXTURE_HEIGHT, + LP_JIT_TEXTURE_DEPTH, + LP_JIT_TEXTURE_FIRST_LEVEL, + LP_JIT_TEXTURE_LAST_LEVEL, + LP_JIT_TEXTURE_BASE, + LP_JIT_TEXTURE_ROW_STRIDE, + LP_JIT_TEXTURE_IMG_STRIDE, + LP_JIT_TEXTURE_MIP_OFFSETS, + LP_JIT_TEXTURE_NUM_FIELDS /* number of fields above */ +}; + + +enum { + LP_JIT_SAMPLER_MIN_LOD, + LP_JIT_SAMPLER_MAX_LOD, + LP_JIT_SAMPLER_LOD_BIAS, + LP_JIT_SAMPLER_BORDER_COLOR, + LP_JIT_SAMPLER_NUM_FIELDS /* number of fields above */ +}; + + +enum { + LP_JIT_VIEWPORT_MIN_DEPTH, + LP_JIT_VIEWPORT_MAX_DEPTH, + LP_JIT_VIEWPORT_NUM_FIELDS /* number of fields above */ +}; + + +/** + * This structure is passed directly to the generated fragment shader. + * + * It contains the derived state. + * + * Changes here must be reflected in the lp_jit_context_* macros and + * lp_jit_init_types function. Changes to the ordering should be avoided. + * + * Only use types with a clear size and padding here, in particular prefer the + * stdint.h types to the basic integer types. + */ +struct lp_jit_context +{ + const float *constants[LP_MAX_TGSI_CONST_BUFFERS]; + int num_constants[LP_MAX_TGSI_CONST_BUFFERS]; + + float alpha_ref_value; + + uint32_t stencil_ref_front, stencil_ref_back; + + uint8_t *u8_blend_color; + float *f_blend_color; + + struct lp_jit_viewport *viewports; + + struct lp_jit_texture textures[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + struct lp_jit_sampler samplers[PIPE_MAX_SAMPLERS]; +}; + + +/** + * These enum values must match the position of the fields in the + * lp_jit_context struct above. + */ +enum { + LP_JIT_CTX_CONSTANTS = 0, + LP_JIT_CTX_NUM_CONSTANTS, + LP_JIT_CTX_ALPHA_REF, + LP_JIT_CTX_STENCIL_REF_FRONT, + LP_JIT_CTX_STENCIL_REF_BACK, + LP_JIT_CTX_U8_BLEND_COLOR, + LP_JIT_CTX_F_BLEND_COLOR, + LP_JIT_CTX_VIEWPORTS, + LP_JIT_CTX_TEXTURES, + LP_JIT_CTX_SAMPLERS, + LP_JIT_CTX_COUNT +}; + + +#define lp_jit_context_constants(_gallivm, _ptr) \ + lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_CTX_CONSTANTS, "constants") + +#define lp_jit_context_num_constants(_gallivm, _ptr) \ + lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_CTX_NUM_CONSTANTS, "num_constants") + +#define lp_jit_context_alpha_ref_value(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_ALPHA_REF, "alpha_ref_value") + +#define lp_jit_context_stencil_ref_front_value(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_STENCIL_REF_FRONT, "stencil_ref_front") + +#define lp_jit_context_stencil_ref_back_value(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_STENCIL_REF_BACK, "stencil_ref_back") + +#define lp_jit_context_u8_blend_color(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_U8_BLEND_COLOR, "u8_blend_color") + +#define lp_jit_context_f_blend_color(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_F_BLEND_COLOR, "f_blend_color") + +#define lp_jit_context_viewports(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, LP_JIT_CTX_VIEWPORTS, "viewports") + +#define lp_jit_context_textures(_gallivm, _ptr) \ + lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_CTX_TEXTURES, "textures") + +#define lp_jit_context_samplers(_gallivm, _ptr) \ + lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_CTX_SAMPLERS, "samplers") + + +struct lp_jit_thread_data +{ + uint64_t vis_counter; + + /* + * Non-interpolated rasterizer state passed through to the fragment shader. + */ + struct { + uint32_t viewport_index; + } raster_state; +}; + + +enum { + LP_JIT_THREAD_DATA_COUNTER = 0, + LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX, + LP_JIT_THREAD_DATA_COUNT +}; + + +#define lp_jit_thread_data_counter(_gallivm, _ptr) \ + lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_THREAD_DATA_COUNTER, "counter") + +#define lp_jit_thread_data_raster_state_viewport_index(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, \ + LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX, \ + "raster_state.viewport_index") + +/** + * typedef for fragment shader function + * + * @param context jit context + * @param x block start x + * @param y block start y + * @param facing is front facing + * @param a0 shader input a0 + * @param dadx shader input dadx + * @param dady shader input dady + * @param color color buffer + * @param depth depth buffer + * @param mask mask of visible pixels in block + * @param thread_data task thread data + * @param stride color buffer row stride in bytes + * @param depth_stride depth buffer row stride in bytes + */ +typedef void +(*lp_jit_frag_func)(const struct lp_jit_context *context, + uint32_t x, + uint32_t y, + uint32_t facing, + const void *a0, + const void *dadx, + const void *dady, + uint8_t **color, + uint8_t *depth, + uint32_t mask, + struct lp_jit_thread_data *thread_data, + unsigned *stride, + unsigned depth_stride); + + +void +lp_jit_screen_cleanup(struct llvmpipe_screen *screen); + + +boolean +lp_jit_screen_init(struct llvmpipe_screen *screen); + + +void +lp_jit_init_types(struct lp_fragment_shader_variant *lp); + + +#endif /* LP_JIT_H */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_limits.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_limits.h new file mode 100644 index 000000000..5294ced3c --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_limits.h @@ -0,0 +1,96 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Implementation limits for LLVMpipe driver. + */ + +#ifndef LP_LIMITS_H +#define LP_LIMITS_H + + +/** + * Tile size (width and height). This needs to be a power of two. + */ +#define TILE_ORDER 6 +#define TILE_SIZE (1 << TILE_ORDER) + + +/** + * Max texture sizes + */ +#define LP_MAX_TEXTURE_SIZE (1 * 1024 * 1024 * 1024ULL) /* 1GB for now */ +#define LP_MAX_TEXTURE_2D_LEVELS 14 /* 8K x 8K for now */ +#define LP_MAX_TEXTURE_3D_LEVELS 12 /* 2K x 2K x 2K for now */ +#define LP_MAX_TEXTURE_CUBE_LEVELS 14 /* 8K x 8K for now */ +#define LP_MAX_TEXTURE_ARRAY_LAYERS 512 /* 8K x 512 / 8K x 8K x 512 */ + + +/** This must be the larger of LP_MAX_TEXTURE_2D/3D_LEVELS */ +#define LP_MAX_TEXTURE_LEVELS LP_MAX_TEXTURE_2D_LEVELS + + +/** + * Max drawing surface size is the max texture size + */ +#define LP_MAX_HEIGHT (1 << (LP_MAX_TEXTURE_LEVELS - 1)) +#define LP_MAX_WIDTH (1 << (LP_MAX_TEXTURE_LEVELS - 1)) + + +#define LP_MAX_THREADS 16 + + +/** + * Max bytes per scene. This may be replaced by a runtime parameter. + */ +#define LP_MAX_SCENE_SIZE (512 * 1024 * 1024) + +/** + * Max number of shader variants (for all shaders combined, + * per context) that will be kept around. + */ +#define LP_MAX_SHADER_VARIANTS 1024 + +/** + * Max number of instructions (for all fragment shaders combined per context) + * that will be kept around (counted in terms of llvm ir). + * Note: the definition looks odd, but there's branches which use a different + * number of max shader variants. + */ +#define LP_MAX_SHADER_INSTRUCTIONS MAX2(256*1024, 512*LP_MAX_SHADER_VARIANTS) + +/** + * Max number of setup variants that will be kept around. + * + * These are determined by the combination of the fragment shader + * input signature and a small amount of rasterization state (eg + * flatshading). It is likely that many active fragment shaders will + * share the same setup variant. + */ +#define LP_MAX_SETUP_VARIANTS 64 + +#endif /* LP_LIMITS_H */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_memory.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_memory.c new file mode 100644 index 000000000..712e28ea3 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_memory.c @@ -0,0 +1,36 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_debug.h" +#include "lp_limits.h" +#include "lp_memory.h" + +/* A single dummy tile used in a couple of out-of-memory situations. + */ +PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN) +uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4]; + diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_memory.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_memory.h new file mode 100644 index 000000000..0acd4e6b8 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_memory.h @@ -0,0 +1,40 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_MEMORY_H +#define LP_MEMORY_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "lp_limits.h" +#include "gallivm/lp_bld_type.h" + +extern PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN) +uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4]; + +#endif /* LP_MEMORY_H */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_perf.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_perf.c new file mode 100644 index 000000000..a4548bccf --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_perf.c @@ -0,0 +1,110 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_debug.h" +#include "lp_debug.h" +#include "lp_perf.h" + + + +struct lp_counters lp_count; + + +void +lp_reset_counters(void) +{ + memset(&lp_count, 0, sizeof(lp_count)); +} + + +void +lp_print_counters(void) +{ + if (LP_DEBUG & DEBUG_COUNTERS) { + unsigned total_64, total_16, total_4; + float p1, p2, p3, p4, p5, p6; + + debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris); + debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris); + + total_64 = (lp_count.nr_empty_64 + + lp_count.nr_fully_covered_64 + + lp_count.nr_partially_covered_64); + + p1 = 100.0 * (float) lp_count.nr_empty_64 / (float) total_64; + p2 = 100.0 * (float) lp_count.nr_fully_covered_64 / (float) total_64; + p3 = 100.0 * (float) lp_count.nr_partially_covered_64 / (float) total_64; + p5 = 100.0 * (float) lp_count.nr_shade_opaque_64 / (float) total_64; + p6 = 100.0 * (float) lp_count.nr_shade_64 / (float) total_64; + + debug_printf("llvmpipe: nr_64x64: %9u\n", total_64); + debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64); + debug_printf("llvmpipe: nr_shade_opaque_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_opaque_64, p5, total_64); + debug_printf("llvmpipe: nr_pure_shade_opaque: %9u (%3.0f%% of %u)\n", lp_count.nr_pure_shade_opaque_64, 0.0, lp_count.nr_shade_opaque_64); + debug_printf("llvmpipe: nr_shade_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_64, p6, total_64); + debug_printf("llvmpipe: nr_pure_shade: %9u (%3.0f%% of %u)\n", lp_count.nr_pure_shade_64, 0.0, lp_count.nr_shade_64); + debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64); + debug_printf("llvmpipe: nr_empty_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64); + + total_16 = (lp_count.nr_empty_16 + + lp_count.nr_fully_covered_16 + + lp_count.nr_partially_covered_16); + + p1 = 100.0 * (float) lp_count.nr_empty_16 / (float) total_16; + p2 = 100.0 * (float) lp_count.nr_fully_covered_16 / (float) total_16; + p3 = 100.0 * (float) lp_count.nr_partially_covered_16 / (float) total_16; + + debug_printf("llvmpipe: nr_16x16: %9u\n", total_16); + debug_printf("llvmpipe: nr_fully_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_16, p2, total_16); + debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16); + debug_printf("llvmpipe: nr_empty_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16); + + total_4 = (lp_count.nr_empty_4 + + lp_count.nr_fully_covered_4 + + lp_count.nr_partially_covered_4); + + p1 = 100.0 * (float) lp_count.nr_empty_4 / (float) total_4; + p2 = 100.0 * (float) lp_count.nr_fully_covered_4 / (float) total_4; + p3 = 100.0 * (float) lp_count.nr_partially_covered_4 / (float) total_4; + p4 = 100.0 * (float) lp_count.nr_non_empty_4 / (float) total_4; + + debug_printf("llvmpipe: nr_tri_4x4: %9u\n", total_4); + debug_printf("llvmpipe: nr_fully_covered_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_4, p2, total_4); + debug_printf("llvmpipe: nr_partially_covered_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_4, p3, total_4); + debug_printf("llvmpipe: nr_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4); + debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_non_empty_4, p4, total_4); + + debug_printf("llvmpipe: nr_color_tile_clear: %9u\n", lp_count.nr_color_tile_clear); + debug_printf("llvmpipe: nr_color_tile_load: %9u\n", lp_count.nr_color_tile_load); + debug_printf("llvmpipe: nr_color_tile_store: %9u\n", lp_count.nr_color_tile_store); + + debug_printf("llvmpipe: nr_llvm_compiles: %u\n", lp_count.nr_llvm_compiles); + debug_printf("llvmpipe: total LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0); + debug_printf("llvmpipe: average LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0 / lp_count.nr_llvm_compiles); + + } +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_perf.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_perf.h new file mode 100644 index 000000000..455adf7d6 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_perf.h @@ -0,0 +1,91 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Performance / statistic counters, etc. + */ + + +#ifndef LP_PERF_H +#define LP_PERF_H + +#include "pipe/p_compiler.h" + +/** + * Various counters + */ +struct lp_counters +{ + unsigned nr_tris; + unsigned nr_culled_tris; + unsigned nr_empty_64; + unsigned nr_fully_covered_64; + unsigned nr_partially_covered_64; + unsigned nr_pure_shade_opaque_64; + unsigned nr_pure_shade_64; + unsigned nr_shade_64; + unsigned nr_shade_opaque_64; + unsigned nr_empty_16; + unsigned nr_fully_covered_16; + unsigned nr_partially_covered_16; + unsigned nr_empty_4; + unsigned nr_fully_covered_4; + unsigned nr_partially_covered_4; + unsigned nr_non_empty_4; + unsigned nr_llvm_compiles; + int64_t llvm_compile_time; /**< total, in microseconds */ + + unsigned nr_color_tile_clear; + unsigned nr_color_tile_load; + unsigned nr_color_tile_store; +}; + + +extern struct lp_counters lp_count; + + +/** Increment the named counter (only for debug builds) */ +#ifdef DEBUG +#define LP_COUNT(counter) lp_count.counter++ +#define LP_COUNT_ADD(counter, incr) lp_count.counter += (incr) +#define LP_COUNT_GET(counter) (lp_count.counter) +#else +#define LP_COUNT(counter) +#define LP_COUNT_ADD(counter, incr) (void)(incr) +#define LP_COUNT_GET(counter) 0 +#endif + + +extern void +lp_reset_counters(void); + + +extern void +lp_print_counters(void); + + +#endif /* LP_PERF_H */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_public.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_public.h new file mode 100644 index 000000000..27ab1baef --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_public.h @@ -0,0 +1,18 @@ +#ifndef LP_PUBLIC_H +#define LP_PUBLIC_H + +#ifdef __cplusplus +extern "C" { +#endif + +struct pipe_screen; +struct sw_winsys; + +struct pipe_screen * +llvmpipe_create_screen(struct sw_winsys *winsys); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.c new file mode 100644 index 000000000..fc5936706 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.c @@ -0,0 +1,332 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: + * Keith Whitwell, Qicheng Christopher Li, Brian Paul + */ + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "os/os_time.h" +#include "lp_context.h" +#include "lp_flush.h" +#include "lp_fence.h" +#include "lp_query.h" +#include "lp_screen.h" +#include "lp_state.h" +#include "lp_rast.h" + + +static struct llvmpipe_query *llvmpipe_query( struct pipe_query *p ) +{ + return (struct llvmpipe_query *)p; +} + +static struct pipe_query * +llvmpipe_create_query(struct pipe_context *pipe, + unsigned type, + unsigned index) +{ + struct llvmpipe_query *pq; + + assert(type < PIPE_QUERY_TYPES); + + pq = CALLOC_STRUCT( llvmpipe_query ); + + if (pq) { + pq->type = type; + } + + return (struct pipe_query *) pq; +} + + +static void +llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct llvmpipe_query *pq = llvmpipe_query(q); + + /* Ideally we would refcount queries & not get destroyed until the + * last scene had finished with us. + */ + if (pq->fence) { + if (!lp_fence_issued(pq->fence)) + llvmpipe_flush(pipe, NULL, __FUNCTION__); + + if (!lp_fence_signalled(pq->fence)) + lp_fence_wait(pq->fence); + + lp_fence_reference(&pq->fence, NULL); + } + + FREE(pq); +} + + +static boolean +llvmpipe_get_query_result(struct pipe_context *pipe, + struct pipe_query *q, + boolean wait, + union pipe_query_result *vresult) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen); + unsigned num_threads = MAX2(1, screen->num_threads); + struct llvmpipe_query *pq = llvmpipe_query(q); + uint64_t *result = (uint64_t *)vresult; + int i; + + if (pq->fence) { + /* only have a fence if there was a scene */ + if (!lp_fence_signalled(pq->fence)) { + if (!lp_fence_issued(pq->fence)) + llvmpipe_flush(pipe, NULL, __FUNCTION__); + + if (!wait) + return FALSE; + + lp_fence_wait(pq->fence); + } + } + + /* Sum the results from each of the threads: + */ + *result = 0; + + switch (pq->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + for (i = 0; i < num_threads; i++) { + *result += pq->end[i]; + } + break; + case PIPE_QUERY_OCCLUSION_PREDICATE: + for (i = 0; i < num_threads; i++) { + /* safer (still not guaranteed) when there's an overflow */ + vresult->b = vresult->b || pq->end[i]; + } + break; + case PIPE_QUERY_TIMESTAMP: + for (i = 0; i < num_threads; i++) { + if (pq->end[i] > *result) { + *result = pq->end[i]; + } + } + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: { + struct pipe_query_data_timestamp_disjoint *td = + (struct pipe_query_data_timestamp_disjoint *)vresult; + /* os_get_time_nano return nanoseconds */ + td->frequency = UINT64_C(1000000000); + td->disjoint = FALSE; + } + break; + case PIPE_QUERY_GPU_FINISHED: + vresult->b = TRUE; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + *result = pq->num_primitives_generated; + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + *result = pq->num_primitives_written; + break; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + vresult->b = pq->num_primitives_generated > pq->num_primitives_written; + break; + case PIPE_QUERY_SO_STATISTICS: { + struct pipe_query_data_so_statistics *stats = + (struct pipe_query_data_so_statistics *)vresult; + stats->num_primitives_written = pq->num_primitives_written; + stats->primitives_storage_needed = pq->num_primitives_generated; + } + break; + case PIPE_QUERY_PIPELINE_STATISTICS: { + struct pipe_query_data_pipeline_statistics *stats = + (struct pipe_query_data_pipeline_statistics *)vresult; + /* only ps_invocations come from binned query */ + for (i = 0; i < num_threads; i++) { + pq->stats.ps_invocations += pq->end[i]; + } + pq->stats.ps_invocations *= LP_RASTER_BLOCK_SIZE * LP_RASTER_BLOCK_SIZE; + *stats = pq->stats; + } + break; + default: + assert(0); + break; + } + + return TRUE; +} + + +static boolean +llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); + struct llvmpipe_query *pq = llvmpipe_query(q); + + /* Check if the query is already in the scene. If so, we need to + * flush the scene now. Real apps shouldn't re-use a query in a + * frame of rendering. + */ + if (pq->fence && !lp_fence_issued(pq->fence)) { + llvmpipe_finish(pipe, __FUNCTION__); + } + + + memset(pq->start, 0, sizeof(pq->start)); + memset(pq->end, 0, sizeof(pq->end)); + lp_setup_begin_query(llvmpipe->setup, pq); + + switch (pq->type) { + case PIPE_QUERY_PRIMITIVES_EMITTED: + pq->num_primitives_written = llvmpipe->so_stats.num_primitives_written; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + pq->num_primitives_generated = llvmpipe->so_stats.primitives_storage_needed; + break; + case PIPE_QUERY_SO_STATISTICS: + pq->num_primitives_written = llvmpipe->so_stats.num_primitives_written; + pq->num_primitives_generated = llvmpipe->so_stats.primitives_storage_needed; + break; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + pq->num_primitives_written = llvmpipe->so_stats.num_primitives_written; + pq->num_primitives_generated = llvmpipe->so_stats.primitives_storage_needed; + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + /* reset our cache */ + if (llvmpipe->active_statistics_queries == 0) { + memset(&llvmpipe->pipeline_statistics, 0, + sizeof(llvmpipe->pipeline_statistics)); + } + memcpy(&pq->stats, &llvmpipe->pipeline_statistics, sizeof(pq->stats)); + llvmpipe->active_statistics_queries++; + break; + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + llvmpipe->active_occlusion_queries++; + llvmpipe->dirty |= LP_NEW_OCCLUSION_QUERY; + break; + default: + break; + } + return true; +} + + +static void +llvmpipe_end_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); + struct llvmpipe_query *pq = llvmpipe_query(q); + + lp_setup_end_query(llvmpipe->setup, pq); + + switch (pq->type) { + + case PIPE_QUERY_PRIMITIVES_EMITTED: + pq->num_primitives_written = + llvmpipe->so_stats.num_primitives_written - pq->num_primitives_written; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + pq->num_primitives_generated = + llvmpipe->so_stats.primitives_storage_needed - pq->num_primitives_generated; + break; + case PIPE_QUERY_SO_STATISTICS: + pq->num_primitives_written = + llvmpipe->so_stats.num_primitives_written - pq->num_primitives_written; + pq->num_primitives_generated = + llvmpipe->so_stats.primitives_storage_needed - pq->num_primitives_generated; + break; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + pq->num_primitives_written = + llvmpipe->so_stats.num_primitives_written - pq->num_primitives_written; + pq->num_primitives_generated = + llvmpipe->so_stats.primitives_storage_needed - pq->num_primitives_generated; + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + pq->stats.ia_vertices = + llvmpipe->pipeline_statistics.ia_vertices - pq->stats.ia_vertices; + pq->stats.ia_primitives = + llvmpipe->pipeline_statistics.ia_primitives - pq->stats.ia_primitives; + pq->stats.vs_invocations = + llvmpipe->pipeline_statistics.vs_invocations - pq->stats.vs_invocations; + pq->stats.gs_invocations = + llvmpipe->pipeline_statistics.gs_invocations - pq->stats.gs_invocations; + pq->stats.gs_primitives = + llvmpipe->pipeline_statistics.gs_primitives - pq->stats.gs_primitives; + pq->stats.c_invocations = + llvmpipe->pipeline_statistics.c_invocations - pq->stats.c_invocations; + pq->stats.c_primitives = + llvmpipe->pipeline_statistics.c_primitives - pq->stats.c_primitives; + pq->stats.ps_invocations = + llvmpipe->pipeline_statistics.ps_invocations - pq->stats.ps_invocations; + + llvmpipe->active_statistics_queries--; + break; + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + assert(llvmpipe->active_occlusion_queries); + llvmpipe->active_occlusion_queries--; + llvmpipe->dirty |= LP_NEW_OCCLUSION_QUERY; + break; + default: + break; + } +} + +boolean +llvmpipe_check_render_cond(struct llvmpipe_context *lp) +{ + struct pipe_context *pipe = &lp->pipe; + boolean b, wait; + uint64_t result; + + if (!lp->render_cond_query) + return TRUE; /* no query predicate, draw normally */ + + wait = (lp->render_cond_mode == PIPE_RENDER_COND_WAIT || + lp->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT); + + b = pipe->get_query_result(pipe, lp->render_cond_query, wait, (void*)&result); + if (b) + return ((!result) == lp->render_cond_cond); + else + return TRUE; +} + +void llvmpipe_init_query_funcs(struct llvmpipe_context *llvmpipe ) +{ + llvmpipe->pipe.create_query = llvmpipe_create_query; + llvmpipe->pipe.destroy_query = llvmpipe_destroy_query; + llvmpipe->pipe.begin_query = llvmpipe_begin_query; + llvmpipe->pipe.end_query = llvmpipe_end_query; + llvmpipe->pipe.get_query_result = llvmpipe_get_query_result; +} + + diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.h new file mode 100644 index 000000000..797375c88 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_query.h @@ -0,0 +1,60 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: + * Keith Whitwell, Qicheng Christopher Li, Brian Paul + */ + +#ifndef LP_QUERY_H +#define LP_QUERY_H + +#include <limits.h> +#include "os/os_thread.h" +#include "lp_limits.h" + + +struct llvmpipe_context; + + +struct llvmpipe_query { + uint64_t start[LP_MAX_THREADS]; /* start count value for each thread */ + uint64_t end[LP_MAX_THREADS]; /* end count value for each thread */ + struct lp_fence *fence; /* fence from last scene this was binned in */ + unsigned type; /* PIPE_QUERY_* */ + unsigned num_primitives_generated; + unsigned num_primitives_written; + + struct pipe_query_data_pipeline_statistics stats; +}; + + +extern void llvmpipe_init_query_funcs(struct llvmpipe_context * ); + +extern boolean llvmpipe_check_render_cond(struct llvmpipe_context *); + +#endif /* LP_QUERY_H */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c new file mode 100644 index 000000000..c726707c0 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.c @@ -0,0 +1,935 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <limits.h> +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_rect.h" +#include "util/u_surface.h" +#include "util/u_pack_color.h" +#include "util/u_string.h" + +#include "os/os_time.h" + +#include "lp_scene_queue.h" +#include "lp_context.h" +#include "lp_debug.h" +#include "lp_fence.h" +#include "lp_perf.h" +#include "lp_query.h" +#include "lp_rast.h" +#include "lp_rast_priv.h" +#include "gallivm/lp_bld_debug.h" +#include "lp_scene.h" +#include "lp_tex_sample.h" + + +#ifdef DEBUG +int jit_line = 0; +const struct lp_rast_state *jit_state = NULL; +const struct lp_rasterizer_task *jit_task = NULL; +#endif + + +/** + * Begin rasterizing a scene. + * Called once per scene by one thread. + */ +static void +lp_rast_begin( struct lp_rasterizer *rast, + struct lp_scene *scene ) +{ + rast->curr_scene = scene; + + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + + lp_scene_begin_rasterization( scene ); + lp_scene_bin_iter_begin( scene ); +} + + +static void +lp_rast_end( struct lp_rasterizer *rast ) +{ + lp_scene_end_rasterization( rast->curr_scene ); + + rast->curr_scene = NULL; +} + + +/** + * Beginning rasterization of a tile. + * \param x window X position of the tile, in pixels + * \param y window Y position of the tile, in pixels + */ +static void +lp_rast_tile_begin(struct lp_rasterizer_task *task, + const struct cmd_bin *bin, + int x, int y) +{ + unsigned i; + struct lp_scene *scene = task->scene; + + LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); + + task->bin = bin; + task->x = x * TILE_SIZE; + task->y = y * TILE_SIZE; + task->width = TILE_SIZE + x * TILE_SIZE > task->scene->fb.width ? + task->scene->fb.width - x * TILE_SIZE : TILE_SIZE; + task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ? + task->scene->fb.height - y * TILE_SIZE : TILE_SIZE; + + task->thread_data.vis_counter = 0; + task->ps_invocations = 0; + + for (i = 0; i < task->scene->fb.nr_cbufs; i++) { + if (task->scene->fb.cbufs[i]) { + task->color_tiles[i] = scene->cbufs[i].map + + scene->cbufs[i].stride * task->y + + scene->cbufs[i].format_bytes * task->x; + } + } + if (task->scene->fb.zsbuf) { + task->depth_tile = scene->zsbuf.map + + scene->zsbuf.stride * task->y + + scene->zsbuf.format_bytes * task->x; + } +} + + +/** + * Clear the rasterizer's current color tile. + * This is a bin command called during bin processing. + * Clear commands always clear all bound layers. + */ +static void +lp_rast_clear_color(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_scene *scene = task->scene; + unsigned cbuf = arg.clear_rb->cbuf; + union util_color uc; + enum pipe_format format; + + /* we never bin clear commands for non-existing buffers */ + assert(cbuf < scene->fb.nr_cbufs); + assert(scene->fb.cbufs[cbuf]); + + format = scene->fb.cbufs[cbuf]->format; + uc = arg.clear_rb->color_val; + + /* + * this is pretty rough since we have target format (bunch of bytes...) here. + * dump it as raw 4 dwords. + */ + LP_DBG(DEBUG_RAST, "%s clear value (target format %d) raw 0x%x,0x%x,0x%x,0x%x\n", + __FUNCTION__, format, uc.ui[0], uc.ui[1], uc.ui[2], uc.ui[3]); + + + util_fill_box(scene->cbufs[cbuf].map, + format, + scene->cbufs[cbuf].stride, + scene->cbufs[cbuf].layer_stride, + task->x, + task->y, + 0, + task->width, + task->height, + scene->fb_max_layer + 1, + &uc); + + /* this will increase for each rb which probably doesn't mean much */ + LP_COUNT(nr_color_tile_clear); +} + + +/** + * Clear the rasterizer's current z/stencil tile. + * This is a bin command called during bin processing. + * Clear commands always clear all bound layers. + */ +static void +lp_rast_clear_zstencil(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_scene *scene = task->scene; + uint64_t clear_value64 = arg.clear_zstencil.value; + uint64_t clear_mask64 = arg.clear_zstencil.mask; + uint32_t clear_value = (uint32_t) clear_value64; + uint32_t clear_mask = (uint32_t) clear_mask64; + const unsigned height = task->height; + const unsigned width = task->width; + const unsigned dst_stride = scene->zsbuf.stride; + uint8_t *dst; + unsigned i, j; + unsigned block_size; + + LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n", + __FUNCTION__, clear_value, clear_mask); + + /* + * Clear the area of the depth/depth buffer matching this tile. + */ + + if (scene->fb.zsbuf) { + unsigned layer; + uint8_t *dst_layer = task->depth_tile; + block_size = util_format_get_blocksize(scene->fb.zsbuf->format); + + clear_value &= clear_mask; + + for (layer = 0; layer <= scene->fb_max_layer; layer++) { + dst = dst_layer; + + switch (block_size) { + case 1: + assert(clear_mask == 0xff); + memset(dst, (uint8_t) clear_value, height * width); + break; + case 2: + if (clear_mask == 0xffff) { + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) + *row++ = (uint16_t) clear_value; + dst += dst_stride; + } + } + else { + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) { + uint16_t tmp = ~clear_mask & *row; + *row++ = clear_value | tmp; + } + dst += dst_stride; + } + } + break; + case 4: + if (clear_mask == 0xffffffff) { + for (i = 0; i < height; i++) { + uint32_t *row = (uint32_t *)dst; + for (j = 0; j < width; j++) + *row++ = clear_value; + dst += dst_stride; + } + } + else { + for (i = 0; i < height; i++) { + uint32_t *row = (uint32_t *)dst; + for (j = 0; j < width; j++) { + uint32_t tmp = ~clear_mask & *row; + *row++ = clear_value | tmp; + } + dst += dst_stride; + } + } + break; + case 8: + clear_value64 &= clear_mask64; + if (clear_mask64 == 0xffffffffffULL) { + for (i = 0; i < height; i++) { + uint64_t *row = (uint64_t *)dst; + for (j = 0; j < width; j++) + *row++ = clear_value64; + dst += dst_stride; + } + } + else { + for (i = 0; i < height; i++) { + uint64_t *row = (uint64_t *)dst; + for (j = 0; j < width; j++) { + uint64_t tmp = ~clear_mask64 & *row; + *row++ = clear_value64 | tmp; + } + dst += dst_stride; + } + } + break; + + default: + assert(0); + break; + } + dst_layer += scene->zsbuf.layer_stride; + } + } +} + + + +/** + * Run the shader on all blocks in a tile. This is used when a tile is + * completely contained inside a triangle. + * This is a bin command called during bin processing. + */ +static void +lp_rast_shade_tile(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_scene *scene = task->scene; + const struct lp_rast_shader_inputs *inputs = arg.shade_tile; + const struct lp_rast_state *state; + struct lp_fragment_shader_variant *variant; + const unsigned tile_x = task->x, tile_y = task->y; + unsigned x, y; + + if (inputs->disable) { + /* This command was partially binned and has been disabled */ + return; + } + + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + + state = task->state; + assert(state); + if (!state) { + return; + } + variant = state->variant; + + /* render the whole 64x64 tile in 4x4 chunks */ + for (y = 0; y < task->height; y += 4){ + for (x = 0; x < task->width; x += 4) { + uint8_t *color[PIPE_MAX_COLOR_BUFS]; + unsigned stride[PIPE_MAX_COLOR_BUFS]; + uint8_t *depth = NULL; + unsigned depth_stride = 0; + unsigned i; + + /* color buffer */ + for (i = 0; i < scene->fb.nr_cbufs; i++){ + if (scene->fb.cbufs[i]) { + stride[i] = scene->cbufs[i].stride; + color[i] = lp_rast_get_color_block_pointer(task, i, tile_x + x, + tile_y + y, inputs->layer); + } + else { + stride[i] = 0; + color[i] = NULL; + } + } + + /* depth buffer */ + if (scene->zsbuf.map) { + depth = lp_rast_get_depth_block_pointer(task, tile_x + x, + tile_y + y, inputs->layer); + depth_stride = scene->zsbuf.stride; + } + + /* Propagate non-interpolated raster state. */ + task->thread_data.raster_state.viewport_index = inputs->viewport_index; + + /* run shader on 4x4 block */ + BEGIN_JIT_CALL(state, task); + variant->jit_function[RAST_WHOLE]( &state->jit_context, + tile_x + x, tile_y + y, + inputs->frontfacing, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), + color, + depth, + 0xffff, + &task->thread_data, + stride, + depth_stride); + END_JIT_CALL(); + } + } +} + + +/** + * Run the shader on all blocks in a tile. This is used when a tile is + * completely contained inside a triangle, and the shader is opaque. + * This is a bin command called during bin processing. + */ +static void +lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + + assert(task->state); + if (!task->state) { + return; + } + + lp_rast_shade_tile(task, arg); +} + + +/** + * Compute shading for a 4x4 block of pixels inside a triangle. + * This is a bin command called during bin processing. + * \param x X position of quad in window coords + * \param y Y position of quad in window coords + */ +void +lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y, + unsigned mask) +{ + const struct lp_rast_state *state = task->state; + struct lp_fragment_shader_variant *variant = state->variant; + const struct lp_scene *scene = task->scene; + uint8_t *color[PIPE_MAX_COLOR_BUFS]; + unsigned stride[PIPE_MAX_COLOR_BUFS]; + uint8_t *depth = NULL; + unsigned depth_stride = 0; + unsigned i; + + assert(state); + + /* Sanity checks */ + assert(x < scene->tiles_x * TILE_SIZE); + assert(y < scene->tiles_y * TILE_SIZE); + assert(x % TILE_VECTOR_WIDTH == 0); + assert(y % TILE_VECTOR_HEIGHT == 0); + + assert((x % 4) == 0); + assert((y % 4) == 0); + + /* color buffer */ + for (i = 0; i < scene->fb.nr_cbufs; i++) { + if (scene->fb.cbufs[i]) { + stride[i] = scene->cbufs[i].stride; + color[i] = lp_rast_get_color_block_pointer(task, i, x, y, + inputs->layer); + } + else { + stride[i] = 0; + color[i] = NULL; + } + } + + /* depth buffer */ + if (scene->zsbuf.map) { + depth_stride = scene->zsbuf.stride; + depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer); + } + + assert(lp_check_alignment(state->jit_context.u8_blend_color, 16)); + + /* + * The rasterizer may produce fragments outside our + * allocated 4x4 blocks hence need to filter them out here. + */ + if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { + /* not very accurate would need a popcount on the mask */ + /* always count this not worth bothering? */ + task->ps_invocations += 1 * variant->ps_inv_multiplier; + + /* Propagate non-interpolated raster state. */ + task->thread_data.raster_state.viewport_index = inputs->viewport_index; + + /* run shader on 4x4 block */ + BEGIN_JIT_CALL(state, task); + variant->jit_function[RAST_EDGE_TEST](&state->jit_context, + x, y, + inputs->frontfacing, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), + color, + depth, + mask, + &task->thread_data, + stride, + depth_stride); + END_JIT_CALL(); + } +} + + + +/** + * Begin a new occlusion query. + * This is a bin command put in all bins. + * Called per thread. + */ +static void +lp_rast_begin_query(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + struct llvmpipe_query *pq = arg.query_obj; + + switch (pq->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + pq->start[task->thread_index] = task->thread_data.vis_counter; + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + pq->start[task->thread_index] = task->ps_invocations; + break; + default: + assert(0); + break; + } +} + + +/** + * End the current occlusion query. + * This is a bin command put in all bins. + * Called per thread. + */ +static void +lp_rast_end_query(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + struct llvmpipe_query *pq = arg.query_obj; + + switch (pq->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + pq->end[task->thread_index] += + task->thread_data.vis_counter - pq->start[task->thread_index]; + pq->start[task->thread_index] = 0; + break; + case PIPE_QUERY_TIMESTAMP: + pq->end[task->thread_index] = os_time_get_nano(); + break; + case PIPE_QUERY_PIPELINE_STATISTICS: + pq->end[task->thread_index] += + task->ps_invocations - pq->start[task->thread_index]; + pq->start[task->thread_index] = 0; + break; + default: + assert(0); + break; + } +} + + +void +lp_rast_set_state(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + task->state = arg.state; +} + + + +/** + * Called when we're done writing to a color tile. + */ +static void +lp_rast_tile_end(struct lp_rasterizer_task *task) +{ + unsigned i; + + for (i = 0; i < task->scene->num_active_queries; ++i) { + lp_rast_end_query(task, lp_rast_arg_query(task->scene->active_queries[i])); + } + + /* debug */ + memset(task->color_tiles, 0, sizeof(task->color_tiles)); + task->depth_tile = NULL; + + task->bin = NULL; +} + +static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] = +{ + lp_rast_clear_color, + lp_rast_clear_zstencil, + lp_rast_triangle_1, + lp_rast_triangle_2, + lp_rast_triangle_3, + lp_rast_triangle_4, + lp_rast_triangle_5, + lp_rast_triangle_6, + lp_rast_triangle_7, + lp_rast_triangle_8, + lp_rast_triangle_3_4, + lp_rast_triangle_3_16, + lp_rast_triangle_4_16, + lp_rast_shade_tile, + lp_rast_shade_tile_opaque, + lp_rast_begin_query, + lp_rast_end_query, + lp_rast_set_state, + lp_rast_triangle_32_1, + lp_rast_triangle_32_2, + lp_rast_triangle_32_3, + lp_rast_triangle_32_4, + lp_rast_triangle_32_5, + lp_rast_triangle_32_6, + lp_rast_triangle_32_7, + lp_rast_triangle_32_8, + lp_rast_triangle_32_3_4, + lp_rast_triangle_32_3_16, + lp_rast_triangle_32_4_16 +}; + + +static void +do_rasterize_bin(struct lp_rasterizer_task *task, + const struct cmd_bin *bin, + int x, int y) +{ + const struct cmd_block *block; + unsigned k; + + if (0) + lp_debug_bin(bin, x, y); + + for (block = bin->head; block; block = block->next) { + for (k = 0; k < block->count; k++) { + dispatch[block->cmd[k]]( task, block->arg[k] ); + } + } +} + + + +/** + * Rasterize commands for a single bin. + * \param x, y position of the bin's tile in the framebuffer + * Must be called between lp_rast_begin() and lp_rast_end(). + * Called per thread. + */ +static void +rasterize_bin(struct lp_rasterizer_task *task, + const struct cmd_bin *bin, int x, int y ) +{ + lp_rast_tile_begin( task, bin, x, y ); + + do_rasterize_bin(task, bin, x, y); + + lp_rast_tile_end(task); + + + /* Debug/Perf flags: + */ + if (bin->head->count == 1) { + if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE) + LP_COUNT(nr_pure_shade_opaque_64); + else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE) + LP_COUNT(nr_pure_shade_64); + } +} + + +/* An empty bin is one that just loads the contents of the tile and + * stores them again unchanged. This typically happens when bins have + * been flushed for some reason in the middle of a frame, or when + * incremental updates are being made to a render target. + * + * Try to avoid doing pointless work in this case. + */ +static boolean +is_empty_bin( const struct cmd_bin *bin ) +{ + return bin->head == NULL; +} + + +/** + * Rasterize/execute all bins within a scene. + * Called per thread. + */ +static void +rasterize_scene(struct lp_rasterizer_task *task, + struct lp_scene *scene) +{ + task->scene = scene; + + if (!task->rast->no_rast && !scene->discard) { + /* loop over scene bins, rasterize each */ + { + struct cmd_bin *bin; + int i, j; + + assert(scene); + while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) { + if (!is_empty_bin( bin )) + rasterize_bin(task, bin, i, j); + } + } + } + + + if (scene->fence) { + lp_fence_signal(scene->fence); + } + + task->scene = NULL; +} + + +/** + * Called by setup module when it has something for us to render. + */ +void +lp_rast_queue_scene( struct lp_rasterizer *rast, + struct lp_scene *scene) +{ + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + if (rast->num_threads == 0) { + /* no threading */ + unsigned fpstate = util_fpstate_get(); + + /* Make sure that denorms are treated like zeros. This is + * the behavior required by D3D10. OpenGL doesn't care. + */ + util_fpstate_set_denorms_to_zero(fpstate); + + lp_rast_begin( rast, scene ); + + rasterize_scene( &rast->tasks[0], scene ); + + lp_rast_end( rast ); + + util_fpstate_set(fpstate); + + rast->curr_scene = NULL; + } + else { + /* threaded rendering! */ + unsigned i; + + lp_scene_enqueue( rast->full_scenes, scene ); + + /* signal the threads that there's work to do */ + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_signal(&rast->tasks[i].work_ready); + } + } + + LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); +} + + +void +lp_rast_finish( struct lp_rasterizer *rast ) +{ + if (rast->num_threads == 0) { + /* nothing to do */ + } + else { + int i; + + /* wait for work to complete */ + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_wait(&rast->tasks[i].work_done); + } + } +} + + +/** + * This is the thread's main entrypoint. + * It's a simple loop: + * 1. wait for work + * 2. do work + * 3. signal that we're done + */ +static PIPE_THREAD_ROUTINE( thread_function, init_data ) +{ + struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data; + struct lp_rasterizer *rast = task->rast; + boolean debug = false; + char thread_name[16]; + unsigned fpstate; + + util_snprintf(thread_name, sizeof thread_name, "llvmpipe-%u", task->thread_index); + pipe_thread_setname(thread_name); + + /* Make sure that denorms are treated like zeros. This is + * the behavior required by D3D10. OpenGL doesn't care. + */ + fpstate = util_fpstate_get(); + util_fpstate_set_denorms_to_zero(fpstate); + + while (1) { + /* wait for work */ + if (debug) + debug_printf("thread %d waiting for work\n", task->thread_index); + pipe_semaphore_wait(&task->work_ready); + + if (rast->exit_flag) + break; + + if (task->thread_index == 0) { + /* thread[0]: + * - get next scene to rasterize + * - map the framebuffer surfaces + */ + lp_rast_begin( rast, + lp_scene_dequeue( rast->full_scenes, TRUE ) ); + } + + /* Wait for all threads to get here so that threads[1+] don't + * get a null rast->curr_scene pointer. + */ + pipe_barrier_wait( &rast->barrier ); + + /* do work */ + if (debug) + debug_printf("thread %d doing work\n", task->thread_index); + + rasterize_scene(task, + rast->curr_scene); + + /* wait for all threads to finish with this scene */ + pipe_barrier_wait( &rast->barrier ); + + /* XXX: shouldn't be necessary: + */ + if (task->thread_index == 0) { + lp_rast_end( rast ); + } + + /* signal done with work */ + if (debug) + debug_printf("thread %d done working\n", task->thread_index); + + pipe_semaphore_signal(&task->work_done); + } + +#ifdef _WIN32 + pipe_semaphore_signal(&task->work_done); +#endif + + return 0; +} + + +/** + * Initialize semaphores and spawn the threads. + */ +static void +create_rast_threads(struct lp_rasterizer *rast) +{ + unsigned i; + + /* NOTE: if num_threads is zero, we won't use any threads */ + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_init(&rast->tasks[i].work_ready, 0); + pipe_semaphore_init(&rast->tasks[i].work_done, 0); + rast->threads[i] = pipe_thread_create(thread_function, + (void *) &rast->tasks[i]); + } +} + + + +/** + * Create new lp_rasterizer. If num_threads is zero, don't create any + * new threads, do rendering synchronously. + * \param num_threads number of rasterizer threads to create + */ +struct lp_rasterizer * +lp_rast_create( unsigned num_threads ) +{ + struct lp_rasterizer *rast; + unsigned i; + + rast = CALLOC_STRUCT(lp_rasterizer); + if (!rast) { + goto no_rast; + } + + rast->full_scenes = lp_scene_queue_create(); + if (!rast->full_scenes) { + goto no_full_scenes; + } + + for (i = 0; i < Elements(rast->tasks); i++) { + struct lp_rasterizer_task *task = &rast->tasks[i]; + task->rast = rast; + task->thread_index = i; + } + + rast->num_threads = num_threads; + + rast->no_rast = debug_get_bool_option("LP_NO_RAST", FALSE); + + create_rast_threads(rast); + + /* for synchronizing rasterization threads */ + pipe_barrier_init( &rast->barrier, rast->num_threads ); + + memset(lp_dummy_tile, 0, sizeof lp_dummy_tile); + + return rast; + +no_full_scenes: + FREE(rast); +no_rast: + return NULL; +} + + +/* Shutdown: + */ +void lp_rast_destroy( struct lp_rasterizer *rast ) +{ + unsigned i; + + /* Set exit_flag and signal each thread's work_ready semaphore. + * Each thread will be woken up, notice that the exit_flag is set and + * break out of its main loop. The thread will then exit. + */ + rast->exit_flag = TRUE; + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_signal(&rast->tasks[i].work_ready); + } + + /* Wait for threads to terminate before cleaning up per-thread data. + * We don't actually call pipe_thread_wait to avoid dead lock on Windows + * per https://bugs.freedesktop.org/show_bug.cgi?id=76252 */ + for (i = 0; i < rast->num_threads; i++) { +#ifdef _WIN32 + pipe_semaphore_wait(&rast->tasks[i].work_done); +#else + pipe_thread_wait(rast->threads[i]); +#endif + } + + /* Clean up per-thread data */ + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_destroy(&rast->tasks[i].work_ready); + pipe_semaphore_destroy(&rast->tasks[i].work_done); + } + + /* for synchronizing rasterization threads */ + pipe_barrier_destroy( &rast->barrier ); + + lp_scene_queue_destroy(rast->full_scenes); + + FREE(rast); +} + + diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.h new file mode 100644 index 000000000..c19f93180 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast.h @@ -0,0 +1,324 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * The rast code is concerned with rasterization of command bins. + * Each screen tile has a bin associated with it. To render the + * scene we iterate over the tile bins and execute the commands + * in each bin. + * We'll do that with multiple threads... + */ + + +#ifndef LP_RAST_H +#define LP_RAST_H + +#include "pipe/p_compiler.h" +#include "util/u_pack_color.h" +#include "lp_jit.h" + + +struct lp_rasterizer; +struct lp_scene; +struct lp_fence; +struct cmd_bin; + +#define FIXED_TYPE_WIDTH 64 +/** For sub-pixel positioning */ +#define FIXED_ORDER 8 +#define FIXED_ONE (1<<FIXED_ORDER) +#define FIXED_SHIFT (FIXED_TYPE_WIDTH - 1) +/** Maximum length of an edge in a primitive in pixels. + * If the framebuffer is large we have to think about fixed-point + * integer overflow. Coordinates need ((FIXED_TYPE_WIDTH/2) - 1) bits + * to be able to fit product of two such coordinates inside + * FIXED_TYPE_WIDTH, any larger and we could overflow a + * FIXED_TYPE_WIDTH_-bit int. + */ +#define MAX_FIXED_LENGTH (1 << (((FIXED_TYPE_WIDTH/2) - 1) - FIXED_ORDER)) + +#define MAX_FIXED_LENGTH32 (1 << (((32/2) - 1) - FIXED_ORDER)) + +/* Rasterizer output size going to jit fs, width/height */ +#define LP_RASTER_BLOCK_SIZE 4 + +#define LP_MAX_ACTIVE_BINNED_QUERIES 64 + +#define IMUL64(a, b) (((int64_t)(a)) * ((int64_t)(b))) + +struct lp_rasterizer_task; + + +/** + * Rasterization state. + * Objects of this type are put into the shared data bin and pointed + * to by commands in the per-tile bins. + */ +struct lp_rast_state { + /* State for the shader. This also contains state which feeds into + * the fragment shader, such as blend color and alpha ref value. + */ + struct lp_jit_context jit_context; + + /* The shader itself. Probably we also need to pass a pointer to + * the tile color/z/stencil data somehow + */ + struct lp_fragment_shader_variant *variant; +}; + + +/** + * Coefficients necessary to run the shader at a given location. + * First coefficient is position. + * These pointers point into the bin data buffer. + */ +struct lp_rast_shader_inputs { + unsigned frontfacing:1; /** True for front-facing */ + unsigned disable:1; /** Partially binned, disable this command */ + unsigned opaque:1; /** Is opaque */ + unsigned pad0:29; /* wasted space */ + unsigned stride; /* how much to advance data between a0, dadx, dady */ + unsigned layer; /* the layer to render to (from gs, already clamped) */ + unsigned viewport_index; /* the active viewport index (from gs, already clamped) */ + /* followed by a0, dadx, dady and planes[] */ +}; + +struct lp_rast_plane { + /* edge function values at minx,miny ?? */ + int64_t c; + + int32_t dcdx; + int32_t dcdy; + + /* one-pixel sized trivial reject offsets for each plane */ + int64_t eo; +}; + +/** + * Rasterization information for a triangle known to be in this bin, + * plus inputs to run the shader: + * These fields are tile- and bin-independent. + * Objects of this type are put into the lp_setup_context::data buffer. + */ +struct lp_rast_triangle { +#ifdef DEBUG + float v[3][2]; + float pad0; + float pad1; +#endif + + /* inputs for the shader */ + struct lp_rast_shader_inputs inputs; + /* planes are also allocated here */ +}; + + +struct lp_rast_clear_rb { + union util_color color_val; + unsigned cbuf; +}; + + +#define GET_A0(inputs) ((float (*)[4])((inputs)+1)) +#define GET_DADX(inputs) ((float (*)[4])((char *)((inputs) + 1) + (inputs)->stride)) +#define GET_DADY(inputs) ((float (*)[4])((char *)((inputs) + 1) + 2 * (inputs)->stride)) +#define GET_PLANES(tri) ((struct lp_rast_plane *)((char *)(&(tri)->inputs + 1) + 3 * (tri)->inputs.stride)) + + + +struct lp_rasterizer * +lp_rast_create( unsigned num_threads ); + +void +lp_rast_destroy( struct lp_rasterizer * ); + +void +lp_rast_queue_scene( struct lp_rasterizer *rast, + struct lp_scene *scene ); + +void +lp_rast_finish( struct lp_rasterizer *rast ); + + +union lp_rast_cmd_arg { + const struct lp_rast_shader_inputs *shade_tile; + struct { + const struct lp_rast_triangle *tri; + unsigned plane_mask; + } triangle; + const struct lp_rast_state *set_state; + const struct lp_rast_clear_rb *clear_rb; + struct { + uint64_t value; + uint64_t mask; + } clear_zstencil; + const struct lp_rast_state *state; + struct lp_fence *fence; + struct llvmpipe_query *query_obj; +}; + + +/* Cast wrappers. Hopefully these compile to noops! + */ +static inline union lp_rast_cmd_arg +lp_rast_arg_inputs( const struct lp_rast_shader_inputs *shade_tile ) +{ + union lp_rast_cmd_arg arg; + arg.shade_tile = shade_tile; + return arg; +} + +static inline union lp_rast_cmd_arg +lp_rast_arg_triangle( const struct lp_rast_triangle *triangle, + unsigned plane_mask) +{ + union lp_rast_cmd_arg arg; + arg.triangle.tri = triangle; + arg.triangle.plane_mask = plane_mask; + return arg; +} + +/** + * Build argument for a contained triangle. + * + * All planes are enabled, so instead of the plane mask we pass the upper + * left coordinates of the a block that fully encloses the triangle. + */ +static inline union lp_rast_cmd_arg +lp_rast_arg_triangle_contained( const struct lp_rast_triangle *triangle, + unsigned x, unsigned y) +{ + union lp_rast_cmd_arg arg; + arg.triangle.tri = triangle; + arg.triangle.plane_mask = x | (y << 8); + return arg; +} + +static inline union lp_rast_cmd_arg +lp_rast_arg_state( const struct lp_rast_state *state ) +{ + union lp_rast_cmd_arg arg; + arg.set_state = state; + return arg; +} + +static inline union lp_rast_cmd_arg +lp_rast_arg_fence( struct lp_fence *fence ) +{ + union lp_rast_cmd_arg arg; + arg.fence = fence; + return arg; +} + + +static inline union lp_rast_cmd_arg +lp_rast_arg_clearzs( uint64_t value, uint64_t mask ) +{ + union lp_rast_cmd_arg arg; + arg.clear_zstencil.value = value; + arg.clear_zstencil.mask = mask; + return arg; +} + + +static inline union lp_rast_cmd_arg +lp_rast_arg_query( struct llvmpipe_query *pq ) +{ + union lp_rast_cmd_arg arg; + arg.query_obj = pq; + return arg; +} + +static inline union lp_rast_cmd_arg +lp_rast_arg_null( void ) +{ + union lp_rast_cmd_arg arg; + arg.set_state = NULL; + return arg; +} + + +/** + * Binnable Commands. + * These get put into bins by the setup code and are called when + * the bins are executed. + */ +#define LP_RAST_OP_CLEAR_COLOR 0x0 +#define LP_RAST_OP_CLEAR_ZSTENCIL 0x1 +#define LP_RAST_OP_TRIANGLE_1 0x2 +#define LP_RAST_OP_TRIANGLE_2 0x3 +#define LP_RAST_OP_TRIANGLE_3 0x4 +#define LP_RAST_OP_TRIANGLE_4 0x5 +#define LP_RAST_OP_TRIANGLE_5 0x6 +#define LP_RAST_OP_TRIANGLE_6 0x7 +#define LP_RAST_OP_TRIANGLE_7 0x8 +#define LP_RAST_OP_TRIANGLE_8 0x9 +#define LP_RAST_OP_TRIANGLE_3_4 0xa +#define LP_RAST_OP_TRIANGLE_3_16 0xb +#define LP_RAST_OP_TRIANGLE_4_16 0xc +#define LP_RAST_OP_SHADE_TILE 0xd +#define LP_RAST_OP_SHADE_TILE_OPAQUE 0xe +#define LP_RAST_OP_BEGIN_QUERY 0xf +#define LP_RAST_OP_END_QUERY 0x10 +#define LP_RAST_OP_SET_STATE 0x11 +#define LP_RAST_OP_TRIANGLE_32_1 0x12 +#define LP_RAST_OP_TRIANGLE_32_2 0x13 +#define LP_RAST_OP_TRIANGLE_32_3 0x14 +#define LP_RAST_OP_TRIANGLE_32_4 0x15 +#define LP_RAST_OP_TRIANGLE_32_5 0x16 +#define LP_RAST_OP_TRIANGLE_32_6 0x17 +#define LP_RAST_OP_TRIANGLE_32_7 0x18 +#define LP_RAST_OP_TRIANGLE_32_8 0x19 +#define LP_RAST_OP_TRIANGLE_32_3_4 0x1a +#define LP_RAST_OP_TRIANGLE_32_3_16 0x1b +#define LP_RAST_OP_TRIANGLE_32_4_16 0x1c + +#define LP_RAST_OP_MAX 0x1d +#define LP_RAST_OP_MASK 0xff + +void +lp_debug_bins( struct lp_scene *scene ); +void +lp_debug_draw_bins_by_cmd_length( struct lp_scene *scene ); +void +lp_debug_draw_bins_by_coverage( struct lp_scene *scene ); + + +#ifdef PIPE_ARCH_SSE +#include <emmintrin.h> +#include "util/u_sse.h" + +static inline __m128i +lp_plane_to_m128i(const struct lp_rast_plane *plane) +{ + return _mm_setr_epi32((int32_t)plane->c, (int32_t)plane->dcdx, + (int32_t)plane->dcdy, (int32_t)plane->eo); +} + +#endif + +#endif diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_debug.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_debug.c new file mode 100644 index 000000000..b5ae9dadf --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_debug.c @@ -0,0 +1,438 @@ +#include <inttypes.h> /* for PRIu64 macro */ +#include "util/u_math.h" +#include "lp_rast_priv.h" +#include "lp_state_fs.h" + +struct tile { + int coverage; + int overdraw; + const struct lp_rast_state *state; + char data[TILE_SIZE][TILE_SIZE]; +}; + +static char get_label( int i ) +{ + static const char *cmd_labels = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + unsigned max_label = (2*26+10); + + if (i < max_label) + return cmd_labels[i]; + else + return '?'; +} + + + +static const char *cmd_names[LP_RAST_OP_MAX] = +{ + "clear_color", + "clear_zstencil", + "triangle_1", + "triangle_2", + "triangle_3", + "triangle_4", + "triangle_5", + "triangle_6", + "triangle_7", + "triangle_8", + "triangle_3_4", + "triangle_3_16", + "triangle_4_16", + "shade_tile", + "shade_tile_opaque", + "begin_query", + "end_query", + "set_state", + "triangle_32_1", + "triangle_32_2", + "triangle_32_3", + "triangle_32_4", + "triangle_32_5", + "triangle_32_6", + "triangle_32_7", + "triangle_32_8", + "triangle_32_3_4", + "triangle_32_3_16", + "triangle_32_4_16", +}; + +static const char *cmd_name(unsigned cmd) +{ + assert(Elements(cmd_names) > cmd); + return cmd_names[cmd]; +} + +static const struct lp_fragment_shader_variant * +get_variant( const struct lp_rast_state *state, + const struct cmd_block *block, + int k ) +{ + if (!state) + return NULL; + + if (block->cmd[k] == LP_RAST_OP_SHADE_TILE || + block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE || + block->cmd[k] == LP_RAST_OP_TRIANGLE_1 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_2 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_3 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_4 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_5 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_6 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_7) + return state->variant; + + return NULL; +} + + +static boolean +is_blend( const struct lp_rast_state *state, + const struct cmd_block *block, + int k ) +{ + const struct lp_fragment_shader_variant *variant = get_variant(state, block, k); + + if (variant) + return variant->key.blend.rt[0].blend_enable; + + return FALSE; +} + + + +static void +debug_bin( const struct cmd_bin *bin, int x, int y ) +{ + const struct lp_rast_state *state = NULL; + const struct cmd_block *head = bin->head; + int i, j = 0; + + debug_printf("bin %d,%d:\n", x, y); + + while (head) { + for (i = 0; i < head->count; i++, j++) { + if (head->cmd[i] == LP_RAST_OP_SET_STATE) + state = head->arg[i].state; + + debug_printf("%d: %s %s\n", j, + cmd_name(head->cmd[i]), + is_blend(state, head, i) ? "blended" : ""); + } + head = head->next; + } +} + + +static void plot(struct tile *tile, + int x, int y, + char val, + boolean blend) +{ + if (tile->data[x][y] == ' ') + tile->coverage++; + else + tile->overdraw++; + + tile->data[x][y] = val; +} + + + + + + +static int +debug_shade_tile(int x, int y, + const union lp_rast_cmd_arg arg, + struct tile *tile, + char val) +{ + const struct lp_rast_shader_inputs *inputs = arg.shade_tile; + boolean blend; + unsigned i,j; + + if (!tile->state) + return 0; + + blend = tile->state->variant->key.blend.rt[0].blend_enable; + + if (inputs->disable) + return 0; + + for (i = 0; i < TILE_SIZE; i++) + for (j = 0; j < TILE_SIZE; j++) + plot(tile, i, j, val, blend); + + return TILE_SIZE * TILE_SIZE; +} + +static int +debug_clear_tile(int x, int y, + const union lp_rast_cmd_arg arg, + struct tile *tile, + char val) +{ + unsigned i,j; + + for (i = 0; i < TILE_SIZE; i++) + for (j = 0; j < TILE_SIZE; j++) + plot(tile, i, j, val, FALSE); + + return TILE_SIZE * TILE_SIZE; + +} + + +static int +debug_triangle(int tilex, int tiley, + const union lp_rast_cmd_arg arg, + struct tile *tile, + char val) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + unsigned plane_mask = arg.triangle.plane_mask; + const struct lp_rast_plane *tri_plane = GET_PLANES(tri); + struct lp_rast_plane plane[8]; + int x, y; + int count = 0; + unsigned i, nr_planes = 0; + boolean blend = tile->state->variant->key.blend.rt[0].blend_enable; + + if (tri->inputs.disable) { + /* This triangle was partially binned and has been disabled */ + return 0; + } + + while (plane_mask) { + plane[nr_planes] = tri_plane[u_bit_scan(&plane_mask)]; + plane[nr_planes].c = (plane[nr_planes].c + + IMUL64(plane[nr_planes].dcdy, tiley) - + IMUL64(plane[nr_planes].dcdx, tilex)); + nr_planes++; + } + + for(y = 0; y < TILE_SIZE; y++) + { + for(x = 0; x < TILE_SIZE; x++) + { + for (i = 0; i < nr_planes; i++) + if (plane[i].c <= 0) + goto out; + + plot(tile, x, y, val, blend); + count++; + + out: + for (i = 0; i < nr_planes; i++) + plane[i].c -= plane[i].dcdx; + } + + for (i = 0; i < nr_planes; i++) { + plane[i].c += IMUL64(plane[i].dcdx, TILE_SIZE); + plane[i].c += plane[i].dcdy; + } + } + return count; +} + + + + + +static void +do_debug_bin( struct tile *tile, + const struct cmd_bin *bin, + int x, int y, + boolean print_cmds) +{ + unsigned k, j = 0; + const struct cmd_block *block; + + int tx = x * TILE_SIZE; + int ty = y * TILE_SIZE; + + memset(tile->data, ' ', sizeof tile->data); + tile->coverage = 0; + tile->overdraw = 0; + tile->state = NULL; + + for (block = bin->head; block; block = block->next) { + for (k = 0; k < block->count; k++, j++) { + boolean blend = is_blend(tile->state, block, k); + char val = get_label(j); + int count = 0; + + if (print_cmds) + debug_printf("%c: %15s", val, cmd_name(block->cmd[k])); + + if (block->cmd[k] == LP_RAST_OP_SET_STATE) + tile->state = block->arg[k].state; + + if (block->cmd[k] == LP_RAST_OP_CLEAR_COLOR || + block->cmd[k] == LP_RAST_OP_CLEAR_ZSTENCIL) + count = debug_clear_tile(tx, ty, block->arg[k], tile, val); + + if (block->cmd[k] == LP_RAST_OP_SHADE_TILE || + block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE) + count = debug_shade_tile(tx, ty, block->arg[k], tile, val); + + if (block->cmd[k] == LP_RAST_OP_TRIANGLE_1 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_2 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_3 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_4 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_5 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_6 || + block->cmd[k] == LP_RAST_OP_TRIANGLE_7) + count = debug_triangle(tx, ty, block->arg[k], tile, val); + + if (print_cmds) { + debug_printf(" % 5d", count); + + if (blend) + debug_printf(" blended"); + + debug_printf("\n"); + } + } + } +} + +void +lp_debug_bin( const struct cmd_bin *bin, int i, int j) +{ + struct tile tile; + int x,y; + + if (bin->head) { + do_debug_bin(&tile, bin, i, j, TRUE); + + debug_printf("------------------------------------------------------------------\n"); + for (y = 0; y < TILE_SIZE; y++) { + for (x = 0; x < TILE_SIZE; x++) { + debug_printf("%c", tile.data[y][x]); + } + debug_printf("|\n"); + } + debug_printf("------------------------------------------------------------------\n"); + + debug_printf("each pixel drawn avg %f times\n", + ((float)tile.overdraw + tile.coverage)/(float)tile.coverage); + } +} + + + + + + +/** Return number of bytes used for a single bin */ +static unsigned +lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y ) +{ + struct cmd_bin *bin = lp_scene_get_bin((struct lp_scene *) scene, x, y); + const struct cmd_block *cmd; + unsigned size = 0; + for (cmd = bin->head; cmd; cmd = cmd->next) { + size += (cmd->count * + (sizeof(uint8_t) + sizeof(union lp_rast_cmd_arg))); + } + return size; +} + + + +void +lp_debug_draw_bins_by_coverage( struct lp_scene *scene ) +{ + unsigned x, y; + unsigned total = 0; + unsigned possible = 0; + static uint64_t _total = 0; + static uint64_t _possible = 0; + + for (x = 0; x < scene->tiles_x; x++) + debug_printf("-"); + debug_printf("\n"); + + for (y = 0; y < scene->tiles_y; y++) { + for (x = 0; x < scene->tiles_x; x++) { + struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + const char *bits = "0123456789"; + struct tile tile; + + if (bin->head) { + //lp_debug_bin(bin, x, y); + + do_debug_bin(&tile, bin, x, y, FALSE); + + total += tile.coverage; + possible += 64*64; + + if (tile.coverage == 64*64) + debug_printf("*"); + else if (tile.coverage) { + int bit = tile.coverage/(64.0*64.0)*10; + debug_printf("%c", bits[MIN2(bit,10)]); + } + else + debug_printf("?"); + } + else { + debug_printf(" "); + } + } + debug_printf("|\n"); + } + + for (x = 0; x < scene->tiles_x; x++) + debug_printf("-"); + debug_printf("\n"); + + debug_printf("this tile total: %u possible %u: percentage: %f\n", + total, + possible, + total * 100.0 / (float)possible); + + _total += total; + _possible += possible; + + + debug_printf("overall total: %" PRIu64 + " possible %" PRIu64 ": percentage: %f\n", + _total, + _possible, + (double) _total * 100.0 / (double)_possible); +} + + +void +lp_debug_draw_bins_by_cmd_length( struct lp_scene *scene ) +{ + unsigned x, y; + + for (y = 0; y < scene->tiles_y; y++) { + for (x = 0; x < scene->tiles_x; x++) { + const char *bits = " ...,-~:;=o+xaw*#XAWWWWWWWWWWWWWWWW"; + unsigned sz = lp_scene_bin_size(scene, x, y); + unsigned sz2 = util_logbase2(sz); + debug_printf("%c", bits[MIN2(sz2,32)]); + } + debug_printf("\n"); + } +} + + +void +lp_debug_bins( struct lp_scene *scene ) +{ + unsigned x, y; + + for (y = 0; y < scene->tiles_y; y++) { + for (x = 0; x < scene->tiles_x; x++) { + struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + if (bin->head) { + debug_bin(bin, x, y); + } + } + } +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h new file mode 100644 index 000000000..9aa7e8746 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -0,0 +1,347 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef LP_RAST_PRIV_H +#define LP_RAST_PRIV_H + +#include "os/os_thread.h" +#include "util/u_format.h" +#include "gallivm/lp_bld_debug.h" +#include "lp_memory.h" +#include "lp_rast.h" +#include "lp_scene.h" +#include "lp_state.h" +#include "lp_texture.h" +#include "lp_limits.h" + + +#define TILE_VECTOR_HEIGHT 4 +#define TILE_VECTOR_WIDTH 4 + +/* If we crash in a jitted function, we can examine jit_line and jit_state + * to get some info. This is not thread-safe, however. + */ +#ifdef DEBUG + +struct lp_rasterizer_task; +extern int jit_line; +extern const struct lp_rast_state *jit_state; +extern const struct lp_rasterizer_task *jit_task; + +#define BEGIN_JIT_CALL(state, task) \ + do { \ + jit_line = __LINE__; \ + jit_state = state; \ + jit_task = task; \ + } while (0) + +#define END_JIT_CALL() \ + do { \ + jit_line = 0; \ + jit_state = NULL; \ + } while (0) + +#else + +#define BEGIN_JIT_CALL(X, Y) +#define END_JIT_CALL() + +#endif + + +struct lp_rasterizer; +struct cmd_bin; + +/** + * Per-thread rasterization state + */ +struct lp_rasterizer_task +{ + const struct cmd_bin *bin; + const struct lp_rast_state *state; + + struct lp_scene *scene; + unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */ + unsigned width, height; /**< width, height of current tile, in pixels */ + + uint8_t *color_tiles[PIPE_MAX_COLOR_BUFS]; + uint8_t *depth_tile; + + /** "back" pointer */ + struct lp_rasterizer *rast; + + /** "my" index */ + unsigned thread_index; + + /** Non-interpolated passthru state and occlude counter for visible pixels */ + struct lp_jit_thread_data thread_data; + uint64_t ps_invocations; + uint8_t ps_inv_multiplier; + + pipe_semaphore work_ready; + pipe_semaphore work_done; +}; + + +/** + * This is the state required while rasterizing tiles. + * Note that this contains per-thread information too. + * The tile size is TILE_SIZE x TILE_SIZE pixels. + */ +struct lp_rasterizer +{ + boolean exit_flag; + boolean no_rast; /**< For debugging/profiling */ + + /** The incoming queue of scenes ready to rasterize */ + struct lp_scene_queue *full_scenes; + + /** The scene currently being rasterized by the threads */ + struct lp_scene *curr_scene; + + /** A task object for each rasterization thread */ + struct lp_rasterizer_task tasks[LP_MAX_THREADS]; + + unsigned num_threads; + pipe_thread threads[LP_MAX_THREADS]; + + /** For synchronizing the rasterization threads */ + pipe_barrier barrier; +}; + + +void +lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y, + unsigned mask); + + +/** + * Get the pointer to a 4x4 color block (within a 64x64 tile). + * \param x, y location of 4x4 block in window coords + */ +static inline uint8_t * +lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task, + unsigned buf, unsigned x, unsigned y, + unsigned layer) +{ + unsigned px, py, pixel_offset; + uint8_t *color; + + assert(x < task->scene->tiles_x * TILE_SIZE); + assert(y < task->scene->tiles_y * TILE_SIZE); + assert((x % TILE_VECTOR_WIDTH) == 0); + assert((y % TILE_VECTOR_HEIGHT) == 0); + assert(buf < task->scene->fb.nr_cbufs); + + assert(task->color_tiles[buf]); + + /* + * We don't actually benefit from having per tile cbuf/zsbuf pointers, + * it's just extra work - the mul/add would be exactly the same anyway. + * Fortunately the extra work (modulo) here is very cheap at least... + */ + px = x % TILE_SIZE; + py = y % TILE_SIZE; + + pixel_offset = px * task->scene->cbufs[buf].format_bytes + + py * task->scene->cbufs[buf].stride; + color = task->color_tiles[buf] + pixel_offset; + + if (layer) { + color += layer * task->scene->cbufs[buf].layer_stride; + } + + assert(lp_check_alignment(color, llvmpipe_get_format_alignment(task->scene->fb.cbufs[buf]->format))); + return color; +} + + +/** + * Get the pointer to a 4x4 depth block (within a 64x64 tile). + * \param x, y location of 4x4 block in window coords + */ +static inline uint8_t * +lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task, + unsigned x, unsigned y, unsigned layer) +{ + unsigned px, py, pixel_offset; + uint8_t *depth; + + assert(x < task->scene->tiles_x * TILE_SIZE); + assert(y < task->scene->tiles_y * TILE_SIZE); + assert((x % TILE_VECTOR_WIDTH) == 0); + assert((y % TILE_VECTOR_HEIGHT) == 0); + + assert(task->depth_tile); + + px = x % TILE_SIZE; + py = y % TILE_SIZE; + + pixel_offset = px * task->scene->zsbuf.format_bytes + + py * task->scene->zsbuf.stride; + depth = task->depth_tile + pixel_offset; + + if (layer) { + depth += layer * task->scene->zsbuf.layer_stride; + } + + assert(lp_check_alignment(depth, llvmpipe_get_format_alignment(task->scene->fb.zsbuf->format))); + return depth; +} + + + +/** + * Shade all pixels in a 4x4 block. The fragment code omits the + * triangle in/out tests. + * \param x, y location of 4x4 block in window coords + */ +static inline void +lp_rast_shade_quads_all( struct lp_rasterizer_task *task, + const struct lp_rast_shader_inputs *inputs, + unsigned x, unsigned y ) +{ + const struct lp_scene *scene = task->scene; + const struct lp_rast_state *state = task->state; + struct lp_fragment_shader_variant *variant = state->variant; + uint8_t *color[PIPE_MAX_COLOR_BUFS]; + unsigned stride[PIPE_MAX_COLOR_BUFS]; + uint8_t *depth = NULL; + unsigned depth_stride = 0; + unsigned i; + + /* color buffer */ + for (i = 0; i < scene->fb.nr_cbufs; i++) { + if (scene->fb.cbufs[i]) { + stride[i] = scene->cbufs[i].stride; + color[i] = lp_rast_get_color_block_pointer(task, i, x, y, + inputs->layer); + } + else { + stride[i] = 0; + color[i] = NULL; + } + } + + if (scene->zsbuf.map) { + depth = lp_rast_get_depth_block_pointer(task, x, y, inputs->layer); + depth_stride = scene->zsbuf.stride; + } + + /* + * The rasterizer may produce fragments outside our + * allocated 4x4 blocks hence need to filter them out here. + */ + if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { + /* not very accurate would need a popcount on the mask */ + /* always count this not worth bothering? */ + task->ps_invocations += 1 * variant->ps_inv_multiplier; + + /* Propagate non-interpolated raster state. */ + task->thread_data.raster_state.viewport_index = inputs->viewport_index; + + /* run shader on 4x4 block */ + BEGIN_JIT_CALL(state, task); + variant->jit_function[RAST_WHOLE]( &state->jit_context, + x, y, + inputs->frontfacing, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), + color, + depth, + 0xffff, + &task->thread_data, + stride, + depth_stride); + END_JIT_CALL(); + } +} + +void lp_rast_triangle_1( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_2( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_3( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_4( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_5( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_6( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_7( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_8( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void lp_rast_triangle_3_4(struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void lp_rast_triangle_3_16( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void lp_rast_triangle_4_16( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + + +void lp_rast_triangle_32_1( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_32_2( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_32_3( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_32_4( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_32_5( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_32_6( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_32_7( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); +void lp_rast_triangle_32_8( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void lp_rast_triangle_32_3_4(struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void lp_rast_triangle_32_3_16( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void lp_rast_triangle_32_4_16( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + +void +lp_rast_set_state(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg); + +void +lp_debug_bin( const struct cmd_bin *bin, int x, int y ); + +#endif diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_tri.c new file mode 100644 index 000000000..c9b9221d8 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -0,0 +1,558 @@ +/************************************************************************** + * + * Copyright 2007-2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Rasterization for binned triangles within a tile + */ + +#include <limits.h> +#include "util/u_math.h" +#include "lp_debug.h" +#include "lp_perf.h" +#include "lp_rast_priv.h" + +/** + * Shade all pixels in a 4x4 block. + */ +static void +block_full_4(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + int x, int y) +{ + lp_rast_shade_quads_all(task, &tri->inputs, x, y); +} + + +/** + * Shade all pixels in a 16x16 block. + */ +static void +block_full_16(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + int x, int y) +{ + unsigned ix, iy; + assert(x % 16 == 0); + assert(y % 16 == 0); + for (iy = 0; iy < 16; iy += 4) + for (ix = 0; ix < 16; ix += 4) + block_full_4(task, tri, x + ix, y + iy); +} + +static inline unsigned +build_mask_linear(int64_t c, int64_t dcdx, int64_t dcdy) +{ + unsigned mask = 0; + + int64_t c0 = c; + int64_t c1 = c0 + dcdy; + int64_t c2 = c1 + dcdy; + int64_t c3 = c2 + dcdy; + + mask |= ((c0 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 0); + mask |= ((c0 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 1); + mask |= ((c0 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 2); + mask |= ((c0 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 3); + mask |= ((c1 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 4); + mask |= ((c1 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 5); + mask |= ((c1 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 6); + mask |= ((c1 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 7); + mask |= ((c2 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 8); + mask |= ((c2 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 9); + mask |= ((c2 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 10); + mask |= ((c2 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 11); + mask |= ((c3 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 12); + mask |= ((c3 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 13); + mask |= ((c3 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 14); + mask |= ((c3 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 15); + + return mask; +} + + +static inline void +build_masks(int64_t c, + int64_t cdiff, + int64_t dcdx, + int64_t dcdy, + unsigned *outmask, + unsigned *partmask) +{ + *outmask |= build_mask_linear(c, dcdx, dcdy); + *partmask |= build_mask_linear(c + cdiff, dcdx, dcdy); +} + +void +lp_rast_triangle_3_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + union lp_rast_cmd_arg arg2; + arg2.triangle.tri = arg.triangle.tri; + arg2.triangle.plane_mask = (1<<3)-1; + lp_rast_triangle_3(task, arg2); +} + +void +lp_rast_triangle_3_4(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + lp_rast_triangle_3_16(task, arg); +} + +void +lp_rast_triangle_4_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + union lp_rast_cmd_arg arg2; + arg2.triangle.tri = arg.triangle.tri; + arg2.triangle.plane_mask = (1<<4)-1; + lp_rast_triangle_4(task, arg2); +} + +#if !defined(PIPE_ARCH_SSE) + +void +lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + union lp_rast_cmd_arg arg2; + arg2.triangle.tri = arg.triangle.tri; + arg2.triangle.plane_mask = (1<<3)-1; + lp_rast_triangle_32_3(task, arg2); +} + +void +lp_rast_triangle_32_4_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + union lp_rast_cmd_arg arg2; + arg2.triangle.tri = arg.triangle.tri; + arg2.triangle.plane_mask = (1<<4)-1; + lp_rast_triangle_32_4(task, arg2); +} + +void +lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + lp_rast_triangle_32_3_16(task, arg); +} + +#else +#include <emmintrin.h> +#include "util/u_sse.h" + + +static inline void +build_masks_32(int c, + int cdiff, + int dcdx, + int dcdy, + unsigned *outmask, + unsigned *partmask) +{ + __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3); + __m128i xdcdy = _mm_set1_epi32(dcdy); + + /* Get values across the quad + */ + __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); + __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); + __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); + + { + __m128i cstep01, cstep23, result; + + cstep01 = _mm_packs_epi32(cstep0, cstep1); + cstep23 = _mm_packs_epi32(cstep2, cstep3); + result = _mm_packs_epi16(cstep01, cstep23); + + *outmask |= _mm_movemask_epi8(result); + } + + + { + __m128i cio4 = _mm_set1_epi32(cdiff); + __m128i cstep01, cstep23, result; + + cstep0 = _mm_add_epi32(cstep0, cio4); + cstep1 = _mm_add_epi32(cstep1, cio4); + cstep2 = _mm_add_epi32(cstep2, cio4); + cstep3 = _mm_add_epi32(cstep3, cio4); + + cstep01 = _mm_packs_epi32(cstep0, cstep1); + cstep23 = _mm_packs_epi32(cstep2, cstep3); + result = _mm_packs_epi16(cstep01, cstep23); + + *partmask |= _mm_movemask_epi8(result); + } +} + + +static inline unsigned +build_mask_linear_32(int c, int dcdx, int dcdy) +{ + __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3); + __m128i xdcdy = _mm_set1_epi32(dcdy); + + /* Get values across the quad + */ + __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); + __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); + __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); + + /* pack pairs of results into epi16 + */ + __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); + __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); + + /* pack into epi8, preserving sign bits + */ + __m128i result = _mm_packs_epi16(cstep01, cstep23); + + /* extract sign bits to create mask + */ + return _mm_movemask_epi8(result); +} + +static inline unsigned +sign_bits4(const __m128i *cstep, int cdiff) +{ + + /* Adjust the step values + */ + __m128i cio4 = _mm_set1_epi32(cdiff); + __m128i cstep0 = _mm_add_epi32(cstep[0], cio4); + __m128i cstep1 = _mm_add_epi32(cstep[1], cio4); + __m128i cstep2 = _mm_add_epi32(cstep[2], cio4); + __m128i cstep3 = _mm_add_epi32(cstep[3], cio4); + + /* Pack down to epi8 + */ + __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); + __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); + __m128i result = _mm_packs_epi16(cstep01, cstep23); + + /* Extract the sign bits + */ + return _mm_movemask_epi8(result); +} + + +#define NR_PLANES 3 + + + + + + + +void +lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + const struct lp_rast_plane *plane = GET_PLANES(tri); + int x = (arg.triangle.plane_mask & 0xff) + task->x; + int y = (arg.triangle.plane_mask >> 8) + task->y; + unsigned i, j; + + struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16]; + unsigned nr = 0; + + __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */ + __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */ + __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */ + __m128i zero = _mm_setzero_si128(); + + __m128i c; + __m128i dcdx; + __m128i dcdy; + __m128i rej4; + + __m128i dcdx2; + __m128i dcdx3; + + __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */ + __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */ + __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ + __m128i unused; + + transpose4_epi32(&p0, &p1, &p2, &zero, + &c, &dcdx, &dcdy, &rej4); + + /* Adjust dcdx; + */ + dcdx = _mm_sub_epi32(zero, dcdx); + + c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x))); + c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); + rej4 = _mm_slli_epi32(rej4, 2); + + /* Adjust so we can just check the sign bit (< 0 comparison), instead of having to do a less efficient <= 0 comparison */ + c = _mm_sub_epi32(c, _mm_set1_epi32(1)); + rej4 = _mm_add_epi32(rej4, _mm_set1_epi32(1)); + + dcdx2 = _mm_add_epi32(dcdx, dcdx); + dcdx3 = _mm_add_epi32(dcdx2, dcdx); + + transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3, + &span_0, &span_1, &span_2, &unused); + + for (i = 0; i < 4; i++) { + __m128i cx = c; + + for (j = 0; j < 4; j++) { + __m128i c4rej = _mm_add_epi32(cx, rej4); + __m128i rej_masks = _mm_srai_epi32(c4rej, 31); + + /* if (is_zero(rej_masks)) */ + if (_mm_movemask_epi8(rej_masks) == 0) { + __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(cx, 0), span_0); + __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(cx, 1), span_1); + __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(cx, 2), span_2); + + __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0); + + __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0)); + __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1)); + __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2)); + + __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1); + __m128i c_01 = _mm_packs_epi32(c_0, c_1); + + __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0)); + __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1)); + __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2)); + + __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2); + + __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0)); + __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1)); + __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2)); + + __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3); + __m128i c_23 = _mm_packs_epi32(c_2, c_3); + __m128i c_0123 = _mm_packs_epi16(c_01, c_23); + + unsigned mask = _mm_movemask_epi8(c_0123); + + out[nr].i = i; + out[nr].j = j; + out[nr].mask = mask; + if (mask != 0xffff) + nr++; + } + cx = _mm_add_epi32(cx, _mm_slli_epi32(dcdx, 2)); + } + + c = _mm_add_epi32(c, _mm_slli_epi32(dcdy, 2)); + } + + for (i = 0; i < nr; i++) + lp_rast_shade_quads_mask(task, + &tri->inputs, + x + 4 * out[i].j, + y + 4 * out[i].i, + 0xffff & ~out[i].mask); +} + + + + + +void +lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + const struct lp_rast_plane *plane = GET_PLANES(tri); + unsigned x = (arg.triangle.plane_mask & 0xff) + task->x; + unsigned y = (arg.triangle.plane_mask >> 8) + task->y; + + __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */ + __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */ + __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */ + __m128i zero = _mm_setzero_si128(); + + __m128i c; + __m128i dcdx; + __m128i dcdy; + + __m128i dcdx2; + __m128i dcdx3; + + __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */ + __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */ + __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ + __m128i unused; + + transpose4_epi32(&p0, &p1, &p2, &zero, + &c, &dcdx, &dcdy, &unused); + + /* Adjust dcdx; + */ + dcdx = _mm_sub_epi32(zero, dcdx); + + c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x))); + c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); + + /* Adjust so we can just check the sign bit (< 0 comparison), instead of having to do a less efficient <= 0 comparison */ + c = _mm_sub_epi32(c, _mm_set1_epi32(1)); + + dcdx2 = _mm_add_epi32(dcdx, dcdx); + dcdx3 = _mm_add_epi32(dcdx2, dcdx); + + transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3, + &span_0, &span_1, &span_2, &unused); + + + { + __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(c, 0), span_0); + __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(c, 1), span_1); + __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(c, 2), span_2); + + __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0); + + __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0)); + __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1)); + __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2)); + + __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1); + __m128i c_01 = _mm_packs_epi32(c_0, c_1); + + __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0)); + __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1)); + __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2)); + + __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2); + + __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0)); + __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1)); + __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2)); + + __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3); + __m128i c_23 = _mm_packs_epi32(c_2, c_3); + __m128i c_0123 = _mm_packs_epi16(c_01, c_23); + + unsigned mask = _mm_movemask_epi8(c_0123); + + if (mask != 0xffff) + lp_rast_shade_quads_mask(task, + &tri->inputs, + x, + y, + 0xffff & ~mask); + } +} + +#undef NR_PLANES +#endif + + +#define BUILD_MASKS(c, cdiff, dcdx, dcdy, omask, pmask) build_masks(c, cdiff, dcdx, dcdy, omask, pmask) +#define BUILD_MASK_LINEAR(c, dcdx, dcdy) build_mask_linear(c, dcdx, dcdy) + +#define TAG(x) x##_1 +#define NR_PLANES 1 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_2 +#define NR_PLANES 2 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_3 +#define NR_PLANES 3 +/*#define TRI_4 lp_rast_triangle_3_4*/ +/*#define TRI_16 lp_rast_triangle_3_16*/ +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_4 +#define NR_PLANES 4 +/*#define TRI_16 lp_rast_triangle_4_16*/ +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_5 +#define NR_PLANES 5 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_6 +#define NR_PLANES 6 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_7 +#define NR_PLANES 7 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_8 +#define NR_PLANES 8 +#include "lp_rast_tri_tmp.h" + +#ifdef PIPE_ARCH_SSE +#undef BUILD_MASKS +#undef BUILD_MASK_LINEAR +#define BUILD_MASKS(c, cdiff, dcdx, dcdy, omask, pmask) build_masks_32((int)c, (int)cdiff, dcdx, dcdy, omask, pmask) +#define BUILD_MASK_LINEAR(c, dcdx, dcdy) build_mask_linear_32((int)c, dcdx, dcdy) +#endif + +#define TAG(x) x##_32_1 +#define NR_PLANES 1 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_32_2 +#define NR_PLANES 2 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_32_3 +#define NR_PLANES 3 +/*#define TRI_4 lp_rast_triangle_3_4*/ +/*#define TRI_16 lp_rast_triangle_3_16*/ +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_32_4 +#define NR_PLANES 4 +#ifdef PIPE_ARCH_SSE +#define TRI_16 lp_rast_triangle_32_4_16 +#endif +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_32_5 +#define NR_PLANES 5 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_32_6 +#define NR_PLANES 6 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_32_7 +#define NR_PLANES 7 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_32_8 +#define NR_PLANES 8 +#include "lp_rast_tri_tmp.h" + diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h new file mode 100644 index 000000000..52f6e9996 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -0,0 +1,380 @@ +/************************************************************************** + * + * Copyright 2007-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Rasterization for binned triangles within a tile + */ + + + +/** + * Prototype for a 8 plane rasterizer function. Will codegenerate + * several of these. + * + * XXX: Varients for more/fewer planes. + * XXX: Need ways of dropping planes as we descend. + * XXX: SIMD + */ +static void +TAG(do_block_4)(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + const struct lp_rast_plane *plane, + int x, int y, + const int64_t *c) +{ + unsigned mask = 0xffff; + int j; + + for (j = 0; j < NR_PLANES; j++) { + mask &= ~BUILD_MASK_LINEAR(c[j] - 1, + -plane[j].dcdx, + plane[j].dcdy); + } + + /* Now pass to the shader: + */ + if (mask) + lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask); +} + +/** + * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out + * of the triangle's bounds. + */ +static void +TAG(do_block_16)(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + const struct lp_rast_plane *plane, + int x, int y, + const int64_t *c) +{ + unsigned outmask, inmask, partmask, partial_mask; + unsigned j; + + outmask = 0; /* outside one or more trivial reject planes */ + partmask = 0; /* outside one or more trivial accept planes */ + + for (j = 0; j < NR_PLANES; j++) { + const int64_t dcdx = -IMUL64(plane[j].dcdx, 4); + const int64_t dcdy = IMUL64(plane[j].dcdy, 4); + const int64_t cox = IMUL64(plane[j].eo, 4); + const int64_t ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo; + const int64_t cio = IMUL64(ei, 4) - 1; + + BUILD_MASKS(c[j] + cox, + cio - cox, + dcdx, dcdy, + &outmask, /* sign bits from c[i][0..15] + cox */ + &partmask); /* sign bits from c[i][0..15] + cio */ + } + + if (outmask == 0xffff) + return; + + /* Mask of sub-blocks which are inside all trivial accept planes: + */ + inmask = ~partmask & 0xffff; + + /* Mask of sub-blocks which are inside all trivial reject planes, + * but outside at least one trivial accept plane: + */ + partial_mask = partmask & ~outmask; + + assert((partial_mask & inmask) == 0); + + LP_COUNT_ADD(nr_empty_4, util_bitcount(0xffff & ~(partial_mask | inmask))); + + /* Iterate over partials: + */ + while (partial_mask) { + int i = ffs(partial_mask) - 1; + int ix = (i & 3) * 4; + int iy = (i >> 2) * 4; + int px = x + ix; + int py = y + iy; + int64_t cx[NR_PLANES]; + + partial_mask &= ~(1 << i); + + LP_COUNT(nr_partially_covered_4); + + for (j = 0; j < NR_PLANES; j++) + cx[j] = (c[j] + - IMUL64(plane[j].dcdx, ix) + + IMUL64(plane[j].dcdy, iy)); + + TAG(do_block_4)(task, tri, plane, px, py, cx); + } + + /* Iterate over fulls: + */ + while (inmask) { + int i = ffs(inmask) - 1; + int ix = (i & 3) * 4; + int iy = (i >> 2) * 4; + int px = x + ix; + int py = y + iy; + + inmask &= ~(1 << i); + + LP_COUNT(nr_fully_covered_4); + block_full_4(task, tri, px, py); + } +} + + +/** + * Scan the tile in chunks and figure out which pixels to rasterize + * for this triangle. + */ +void +TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + unsigned plane_mask = arg.triangle.plane_mask; + const struct lp_rast_plane *tri_plane = GET_PLANES(tri); + const int x = task->x, y = task->y; + struct lp_rast_plane plane[NR_PLANES]; + int64_t c[NR_PLANES]; + unsigned outmask, inmask, partmask, partial_mask; + unsigned j = 0; + + if (tri->inputs.disable) { + /* This triangle was partially binned and has been disabled */ + return; + } + + outmask = 0; /* outside one or more trivial reject planes */ + partmask = 0; /* outside one or more trivial accept planes */ + + while (plane_mask) { + int i = ffs(plane_mask) - 1; + plane[j] = tri_plane[i]; + plane_mask &= ~(1 << i); + c[j] = plane[j].c + IMUL64(plane[j].dcdy, y) - IMUL64(plane[j].dcdx, x); + + { + const int64_t dcdx = -IMUL64(plane[j].dcdx, 16); + const int64_t dcdy = IMUL64(plane[j].dcdy, 16); + const int64_t cox = IMUL64(plane[j].eo, 16); + const int64_t ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo; + const int64_t cio = IMUL64(ei, 16) - 1; + + BUILD_MASKS(c[j] + cox, + cio - cox, + dcdx, dcdy, + &outmask, /* sign bits from c[i][0..15] + cox */ + &partmask); /* sign bits from c[i][0..15] + cio */ + } + + j++; + } + + if (outmask == 0xffff) + return; + + /* Mask of sub-blocks which are inside all trivial accept planes: + */ + inmask = ~partmask & 0xffff; + + /* Mask of sub-blocks which are inside all trivial reject planes, + * but outside at least one trivial accept plane: + */ + partial_mask = partmask & ~outmask; + + assert((partial_mask & inmask) == 0); + + LP_COUNT_ADD(nr_empty_16, util_bitcount(0xffff & ~(partial_mask | inmask))); + + /* Iterate over partials: + */ + while (partial_mask) { + int i = ffs(partial_mask) - 1; + int ix = (i & 3) * 16; + int iy = (i >> 2) * 16; + int px = x + ix; + int py = y + iy; + int64_t cx[NR_PLANES]; + + for (j = 0; j < NR_PLANES; j++) + cx[j] = (c[j] + - IMUL64(plane[j].dcdx, ix) + + IMUL64(plane[j].dcdy, iy)); + + partial_mask &= ~(1 << i); + + LP_COUNT(nr_partially_covered_16); + TAG(do_block_16)(task, tri, plane, px, py, cx); + } + + /* Iterate over fulls: + */ + while (inmask) { + int i = ffs(inmask) - 1; + int ix = (i & 3) * 16; + int iy = (i >> 2) * 16; + int px = x + ix; + int py = y + iy; + + inmask &= ~(1 << i); + + LP_COUNT(nr_fully_covered_16); + block_full_16(task, tri, px, py); + } +} + +#if defined(PIPE_ARCH_SSE) && defined(TRI_16) +/* XXX: special case this when intersection is not required. + * - tile completely within bbox, + * - bbox completely within tile. + */ +void +TRI_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + const struct lp_rast_plane *plane = GET_PLANES(tri); + unsigned mask = arg.triangle.plane_mask; + unsigned outmask, partial_mask; + unsigned j; + __m128i cstep4[NR_PLANES][4]; + + int x = (mask & 0xff); + int y = (mask >> 8); + + outmask = 0; /* outside one or more trivial reject planes */ + + x += task->x; + y += task->y; + + for (j = 0; j < NR_PLANES; j++) { + const int dcdx = -plane[j].dcdx * 4; + const int dcdy = plane[j].dcdy * 4; + __m128i xdcdy = _mm_set1_epi32(dcdy); + + cstep4[j][0] = _mm_setr_epi32(0, dcdx, dcdx*2, dcdx*3); + cstep4[j][1] = _mm_add_epi32(cstep4[j][0], xdcdy); + cstep4[j][2] = _mm_add_epi32(cstep4[j][1], xdcdy); + cstep4[j][3] = _mm_add_epi32(cstep4[j][2], xdcdy); + + { + const int c = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; + const int cox = plane[j].eo * 4; + + outmask |= sign_bits4(cstep4[j], c + cox); + } + } + + if (outmask == 0xffff) + return; + + + /* Mask of sub-blocks which are inside all trivial reject planes, + * but outside at least one trivial accept plane: + */ + partial_mask = 0xffff & ~outmask; + + /* Iterate over partials: + */ + while (partial_mask) { + int i = ffs(partial_mask) - 1; + int ix = (i & 3) * 4; + int iy = (i >> 2) * 4; + int px = x + ix; + int py = y + iy; + unsigned mask = 0xffff; + + partial_mask &= ~(1 << i); + + for (j = 0; j < NR_PLANES; j++) { + const int cx = (plane[j].c - 1 + - plane[j].dcdx * px + + plane[j].dcdy * py) * 4; + + mask &= ~sign_bits4(cstep4[j], cx); + } + + if (mask) + lp_rast_shade_quads_mask(task, &tri->inputs, px, py, mask); + } +} +#endif + +#if defined(PIPE_ARCH_SSE) && defined(TRI_4) +void +TRI_4(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + const struct lp_rast_plane *plane = GET_PLANES(tri); + unsigned mask = arg.triangle.plane_mask; + const int x = task->x + (mask & 0xff); + const int y = task->y + (mask >> 8); + unsigned j; + + /* Iterate over partials: + */ + { + unsigned mask = 0xffff; + + for (j = 0; j < NR_PLANES; j++) { + const int cx = (plane[j].c + - plane[j].dcdx * x + + plane[j].dcdy * y); + + const int dcdx = -plane[j].dcdx; + const int dcdy = plane[j].dcdy; + __m128i xdcdy = _mm_set1_epi32(dcdy); + + __m128i cstep0 = _mm_setr_epi32(cx, cx + dcdx, cx + dcdx*2, cx + dcdx*3); + __m128i cstep1 = _mm_add_epi32(cstep0, xdcdy); + __m128i cstep2 = _mm_add_epi32(cstep1, xdcdy); + __m128i cstep3 = _mm_add_epi32(cstep2, xdcdy); + + __m128i cstep01 = _mm_packs_epi32(cstep0, cstep1); + __m128i cstep23 = _mm_packs_epi32(cstep2, cstep3); + __m128i result = _mm_packs_epi16(cstep01, cstep23); + + /* Extract the sign bits + */ + mask &= ~_mm_movemask_epi8(result); + } + + if (mask) + lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask); + } +} +#endif + + + +#undef TAG +#undef TRI_4 +#undef TRI_16 +#undef NR_PLANES + diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c new file mode 100644 index 000000000..2441b3c0d --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.c @@ -0,0 +1,564 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_framebuffer.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/simple_list.h" +#include "util/u_format.h" +#include "lp_scene.h" +#include "lp_fence.h" +#include "lp_debug.h" + + +#define RESOURCE_REF_SZ 32 + +/** List of resource references */ +struct resource_ref { + struct pipe_resource *resource[RESOURCE_REF_SZ]; + int count; + struct resource_ref *next; +}; + + +/** + * Create a new scene object. + * \param queue the queue to put newly rendered/emptied scenes into + */ +struct lp_scene * +lp_scene_create( struct pipe_context *pipe ) +{ + struct lp_scene *scene = CALLOC_STRUCT(lp_scene); + if (!scene) + return NULL; + + scene->pipe = pipe; + + scene->data.head = + CALLOC_STRUCT(data_block); + + pipe_mutex_init(scene->mutex); + +#ifdef DEBUG + /* Do some scene limit sanity checks here */ + { + size_t maxBins = TILES_X * TILES_Y; + size_t maxCommandBytes = sizeof(struct cmd_block) * maxBins; + size_t maxCommandPlusData = maxCommandBytes + DATA_BLOCK_SIZE; + /* We'll need at least one command block per bin. Make sure that's + * less than the max allowed scene size. + */ + assert(maxCommandBytes < LP_SCENE_MAX_SIZE); + /* We'll also need space for at least one other data block */ + assert(maxCommandPlusData <= LP_SCENE_MAX_SIZE); + } +#endif + + return scene; +} + + +/** + * Free all data associated with the given scene, and the scene itself. + */ +void +lp_scene_destroy(struct lp_scene *scene) +{ + lp_fence_reference(&scene->fence, NULL); + pipe_mutex_destroy(scene->mutex); + assert(scene->data.head->next == NULL); + FREE(scene->data.head); + FREE(scene); +} + + +/** + * Check if the scene's bins are all empty. + * For debugging purposes. + */ +boolean +lp_scene_is_empty(struct lp_scene *scene ) +{ + unsigned x, y; + + for (y = 0; y < TILES_Y; y++) { + for (x = 0; x < TILES_X; x++) { + const struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + if (bin->head) { + return FALSE; + } + } + } + return TRUE; +} + + +/* Returns true if there has ever been a failed allocation attempt in + * this scene. Used in triangle emit to avoid having to check success + * at each bin. + */ +boolean +lp_scene_is_oom(struct lp_scene *scene) +{ + return scene->alloc_failed; +} + + +/* Remove all commands from a bin. Tries to reuse some of the memory + * allocated to the bin, however. + */ +void +lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y) +{ + struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + + bin->last_state = NULL; + bin->head = bin->tail; + if (bin->tail) { + bin->tail->next = NULL; + bin->tail->count = 0; + } +} + + +void +lp_scene_begin_rasterization(struct lp_scene *scene) +{ + const struct pipe_framebuffer_state *fb = &scene->fb; + int i; + + //LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + + for (i = 0; i < scene->fb.nr_cbufs; i++) { + struct pipe_surface *cbuf = scene->fb.cbufs[i]; + + if (!cbuf) { + scene->cbufs[i].stride = 0; + scene->cbufs[i].layer_stride = 0; + scene->cbufs[i].map = NULL; + continue; + } + + if (llvmpipe_resource_is_texture(cbuf->texture)) { + scene->cbufs[i].stride = llvmpipe_resource_stride(cbuf->texture, + cbuf->u.tex.level); + scene->cbufs[i].layer_stride = llvmpipe_layer_stride(cbuf->texture, + cbuf->u.tex.level); + + scene->cbufs[i].map = llvmpipe_resource_map(cbuf->texture, + cbuf->u.tex.level, + cbuf->u.tex.first_layer, + LP_TEX_USAGE_READ_WRITE); + scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format); + } + else { + struct llvmpipe_resource *lpr = llvmpipe_resource(cbuf->texture); + unsigned pixstride = util_format_get_blocksize(cbuf->format); + scene->cbufs[i].stride = cbuf->texture->width0; + scene->cbufs[i].layer_stride = 0; + scene->cbufs[i].map = lpr->data; + scene->cbufs[i].map += cbuf->u.buf.first_element * pixstride; + scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format); + } + } + + if (fb->zsbuf) { + struct pipe_surface *zsbuf = scene->fb.zsbuf; + scene->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->u.tex.level); + scene->zsbuf.layer_stride = llvmpipe_layer_stride(zsbuf->texture, zsbuf->u.tex.level); + + scene->zsbuf.map = llvmpipe_resource_map(zsbuf->texture, + zsbuf->u.tex.level, + zsbuf->u.tex.first_layer, + LP_TEX_USAGE_READ_WRITE); + scene->zsbuf.format_bytes = util_format_get_blocksize(zsbuf->format); + } +} + + + + +/** + * Free all the temporary data in a scene. + */ +void +lp_scene_end_rasterization(struct lp_scene *scene ) +{ + int i, j; + + /* Unmap color buffers */ + for (i = 0; i < scene->fb.nr_cbufs; i++) { + if (scene->cbufs[i].map) { + struct pipe_surface *cbuf = scene->fb.cbufs[i]; + if (llvmpipe_resource_is_texture(cbuf->texture)) { + llvmpipe_resource_unmap(cbuf->texture, + cbuf->u.tex.level, + cbuf->u.tex.first_layer); + } + scene->cbufs[i].map = NULL; + } + } + + /* Unmap z/stencil buffer */ + if (scene->zsbuf.map) { + struct pipe_surface *zsbuf = scene->fb.zsbuf; + llvmpipe_resource_unmap(zsbuf->texture, + zsbuf->u.tex.level, + zsbuf->u.tex.first_layer); + scene->zsbuf.map = NULL; + } + + /* Reset all command lists: + */ + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + bin->head = NULL; + bin->tail = NULL; + bin->last_state = NULL; + } + } + + /* If there are any bins which weren't cleared by the loop above, + * they will be caught (on debug builds at least) by this assert: + */ + assert(lp_scene_is_empty(scene)); + + /* Decrement texture ref counts + */ + { + struct resource_ref *ref; + int i, j = 0; + + for (ref = scene->resources; ref; ref = ref->next) { + for (i = 0; i < ref->count; i++) { + if (LP_DEBUG & DEBUG_SETUP) + debug_printf("resource %d: %p %dx%d sz %d\n", + j, + (void *) ref->resource[i], + ref->resource[i]->width0, + ref->resource[i]->height0, + llvmpipe_resource_size(ref->resource[i])); + j++; + pipe_resource_reference(&ref->resource[i], NULL); + } + } + + if (LP_DEBUG & DEBUG_SETUP) + debug_printf("scene %d resources, sz %d\n", + j, scene->resource_reference_size); + } + + /* Free all scene data blocks: + */ + { + struct data_block_list *list = &scene->data; + struct data_block *block, *tmp; + + for (block = list->head->next; block; block = tmp) { + tmp = block->next; + FREE(block); + } + + list->head->next = NULL; + list->head->used = 0; + } + + lp_fence_reference(&scene->fence, NULL); + + scene->resources = NULL; + scene->scene_size = 0; + scene->resource_reference_size = 0; + + scene->alloc_failed = FALSE; + + util_unreference_framebuffer_state( &scene->fb ); +} + + + + + + +struct cmd_block * +lp_scene_new_cmd_block( struct lp_scene *scene, + struct cmd_bin *bin ) +{ + struct cmd_block *block = lp_scene_alloc(scene, sizeof(struct cmd_block)); + if (block) { + if (bin->tail) { + bin->tail->next = block; + bin->tail = block; + } + else { + bin->head = block; + bin->tail = block; + } + //memset(block, 0, sizeof *block); + block->next = NULL; + block->count = 0; + } + return block; +} + + +struct data_block * +lp_scene_new_data_block( struct lp_scene *scene ) +{ + if (scene->scene_size + DATA_BLOCK_SIZE > LP_SCENE_MAX_SIZE) { + if (0) debug_printf("%s: failed\n", __FUNCTION__); + scene->alloc_failed = TRUE; + return NULL; + } + else { + struct data_block *block = MALLOC_STRUCT(data_block); + if (block == NULL) + return NULL; + + scene->scene_size += sizeof *block; + + block->used = 0; + block->next = scene->data.head; + scene->data.head = block; + + return block; + } +} + + +/** + * Return number of bytes used for all bin data within a scene. + * This does not include resources (textures) referenced by the scene. + */ +static unsigned +lp_scene_data_size( const struct lp_scene *scene ) +{ + unsigned size = 0; + const struct data_block *block; + for (block = scene->data.head; block; block = block->next) { + size += block->used; + } + return size; +} + + + +/** + * Add a reference to a resource by the scene. + */ +boolean +lp_scene_add_resource_reference(struct lp_scene *scene, + struct pipe_resource *resource, + boolean initializing_scene) +{ + struct resource_ref *ref, **last = &scene->resources; + int i; + + /* Look at existing resource blocks: + */ + for (ref = scene->resources; ref; ref = ref->next) { + last = &ref->next; + + /* Search for this resource: + */ + for (i = 0; i < ref->count; i++) + if (ref->resource[i] == resource) + return TRUE; + + if (ref->count < RESOURCE_REF_SZ) { + /* If the block is half-empty, then append the reference here. + */ + break; + } + } + + /* Create a new block if no half-empty block was found. + */ + if (!ref) { + assert(*last == NULL); + *last = lp_scene_alloc(scene, sizeof *ref); + if (*last == NULL) + return FALSE; + + ref = *last; + memset(ref, 0, sizeof *ref); + } + + /* Append the reference to the reference block. + */ + pipe_resource_reference(&ref->resource[ref->count++], resource); + scene->resource_reference_size += llvmpipe_resource_size(resource); + + /* Heuristic to advise scene flushes. This isn't helpful in the + * initial setup of the scene, but after that point flush on the + * next resource added which exceeds 64MB in referenced texture + * data. + */ + if (!initializing_scene && + scene->resource_reference_size >= LP_SCENE_MAX_RESOURCE_SIZE) + return FALSE; + + return TRUE; +} + + +/** + * Does this scene have a reference to the given resource? + */ +boolean +lp_scene_is_resource_referenced(const struct lp_scene *scene, + const struct pipe_resource *resource) +{ + const struct resource_ref *ref; + int i; + + for (ref = scene->resources; ref; ref = ref->next) { + for (i = 0; i < ref->count; i++) + if (ref->resource[i] == resource) + return TRUE; + } + + return FALSE; +} + + + + +/** advance curr_x,y to the next bin */ +static boolean +next_bin(struct lp_scene *scene) +{ + scene->curr_x++; + if (scene->curr_x >= scene->tiles_x) { + scene->curr_x = 0; + scene->curr_y++; + } + if (scene->curr_y >= scene->tiles_y) { + /* no more bins */ + return FALSE; + } + return TRUE; +} + + +void +lp_scene_bin_iter_begin( struct lp_scene *scene ) +{ + scene->curr_x = scene->curr_y = -1; +} + + +/** + * Return pointer to next bin to be rendered. + * The lp_scene::curr_x and ::curr_y fields will be advanced. + * Multiple rendering threads will call this function to get a chunk + * of work (a bin) to work on. + */ +struct cmd_bin * +lp_scene_bin_iter_next( struct lp_scene *scene , int *x, int *y) +{ + struct cmd_bin *bin = NULL; + + pipe_mutex_lock(scene->mutex); + + if (scene->curr_x < 0) { + /* first bin */ + scene->curr_x = 0; + scene->curr_y = 0; + } + else if (!next_bin(scene)) { + /* no more bins left */ + goto end; + } + + bin = lp_scene_get_bin(scene, scene->curr_x, scene->curr_y); + *x = scene->curr_x; + *y = scene->curr_y; + +end: + /*printf("return bin %p at %d, %d\n", (void *) bin, *bin_x, *bin_y);*/ + pipe_mutex_unlock(scene->mutex); + return bin; +} + + +void lp_scene_begin_binning( struct lp_scene *scene, + struct pipe_framebuffer_state *fb, boolean discard ) +{ + int i; + unsigned max_layer = ~0; + + assert(lp_scene_is_empty(scene)); + + scene->discard = discard; + util_copy_framebuffer_state(&scene->fb, fb); + + scene->tiles_x = align(fb->width, TILE_SIZE) / TILE_SIZE; + scene->tiles_y = align(fb->height, TILE_SIZE) / TILE_SIZE; + assert(scene->tiles_x <= TILES_X); + assert(scene->tiles_y <= TILES_Y); + + /* + * Determine how many layers the fb has (used for clamping layer value). + * OpenGL (but not d3d10) permits different amount of layers per rt, however + * results are undefined if layer exceeds the amount of layers of ANY + * attachment hence don't need separate per cbuf and zsbuf max. + */ + for (i = 0; i < scene->fb.nr_cbufs; i++) { + struct pipe_surface *cbuf = scene->fb.cbufs[i]; + if (cbuf) { + if (llvmpipe_resource_is_texture(cbuf->texture)) { + max_layer = MIN2(max_layer, + cbuf->u.tex.last_layer - cbuf->u.tex.first_layer); + } + else { + max_layer = 0; + } + } + } + if (fb->zsbuf) { + struct pipe_surface *zsbuf = scene->fb.zsbuf; + max_layer = MIN2(max_layer, zsbuf->u.tex.last_layer - zsbuf->u.tex.first_layer); + } + scene->fb_max_layer = max_layer; +} + + +void lp_scene_end_binning( struct lp_scene *scene ) +{ + if (LP_DEBUG & DEBUG_SCENE) { + debug_printf("rasterize scene:\n"); + debug_printf(" scene_size: %u\n", + scene->scene_size); + debug_printf(" data size: %u\n", + lp_scene_data_size(scene)); + + if (0) + lp_debug_bins( scene ); + } +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h new file mode 100644 index 000000000..b1464bb54 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene.h @@ -0,0 +1,412 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Binner data structures and bin-related functions. + * Note: the "setup" code is concerned with building scenes while + * The "rast" code is concerned with consuming/executing scenes. + */ + +#ifndef LP_SCENE_H +#define LP_SCENE_H + +#include "os/os_thread.h" +#include "lp_rast.h" +#include "lp_debug.h" + +struct lp_scene_queue; +struct lp_rast_state; + +/* We're limited to 2K by 2K for 32bit fixed point rasterization. + * Will need a 64-bit version for larger framebuffers. + */ +#define TILES_X (LP_MAX_WIDTH / TILE_SIZE) +#define TILES_Y (LP_MAX_HEIGHT / TILE_SIZE) + + +/* Commands per command block (ideally so sizeof(cmd_block) is a power of + * two in size.) + */ +#define CMD_BLOCK_MAX 29 + +/* Bytes per data block. + */ +#define DATA_BLOCK_SIZE (64 * 1024) + +/* Scene temporary storage is clamped to this size: + */ +#define LP_SCENE_MAX_SIZE (9*1024*1024) + +/* The maximum amount of texture storage referenced by a scene is + * clamped to this size: + */ +#define LP_SCENE_MAX_RESOURCE_SIZE (64*1024*1024) + + +/* switch to a non-pointer value for this: + */ +typedef void (*lp_rast_cmd_func)( struct lp_rasterizer_task *, + const union lp_rast_cmd_arg ); + + +struct cmd_block { + uint8_t cmd[CMD_BLOCK_MAX]; + union lp_rast_cmd_arg arg[CMD_BLOCK_MAX]; + unsigned count; + struct cmd_block *next; +}; + + +struct data_block { + ubyte data[DATA_BLOCK_SIZE]; + unsigned used; + struct data_block *next; +}; + + + +/** + * For each screen tile we have one of these bins. + */ +struct cmd_bin { + const struct lp_rast_state *last_state; /* most recent state set in bin */ + struct cmd_block *head; + struct cmd_block *tail; +}; + + +/** + * This stores bulk data which is used for all memory allocations + * within a scene. + * + * Examples include triangle data and state data. The commands in + * the per-tile bins will point to chunks of data in this structure. + * + * Include the first block of data statically to ensure we can always + * initiate a scene without relying on malloc succeeding. + */ +struct data_block_list { + struct data_block first; + struct data_block *head; +}; + +struct resource_ref; + +/** + * All bins and bin data are contained here. + * Per-bin data goes into the 'tile' bins. + * Shared data goes into the 'data' buffer. + * + * When there are multiple threads, will want to double-buffer between + * scenes: + */ +struct lp_scene { + struct pipe_context *pipe; + struct lp_fence *fence; + + /* The queries still active at end of scene */ + struct llvmpipe_query *active_queries[LP_MAX_ACTIVE_BINNED_QUERIES]; + unsigned num_active_queries; + /* If queries were either active or there were begin/end query commands */ + boolean had_queries; + + /* Framebuffer mappings - valid only between begin_rasterization() + * and end_rasterization(). + */ + struct { + uint8_t *map; + unsigned stride; + unsigned layer_stride; + unsigned format_bytes; + } zsbuf, cbufs[PIPE_MAX_COLOR_BUFS]; + + /* The amount of layers in the fb (minimum of all attachments) */ + unsigned fb_max_layer; + + /** the framebuffer to render the scene into */ + struct pipe_framebuffer_state fb; + + /** list of resources referenced by the scene commands */ + struct resource_ref *resources; + + /** Total memory used by the scene (in bytes). This sums all the + * data blocks and counts all bins, state, resource references and + * other random allocations within the scene. + */ + unsigned scene_size; + + /** Sum of sizes of all resources referenced by the scene. Sums + * all the textures read by the scene: + */ + unsigned resource_reference_size; + + boolean alloc_failed; + boolean discard; + /** + * Number of active tiles in each dimension. + * This basically the framebuffer size divided by tile size + */ + unsigned tiles_x, tiles_y; + + int curr_x, curr_y; /**< for iterating over bins */ + pipe_mutex mutex; + + struct cmd_bin tile[TILES_X][TILES_Y]; + struct data_block_list data; +}; + + + +struct lp_scene *lp_scene_create(struct pipe_context *pipe); + +void lp_scene_destroy(struct lp_scene *scene); + +boolean lp_scene_is_empty(struct lp_scene *scene ); +boolean lp_scene_is_oom(struct lp_scene *scene ); + + +struct data_block *lp_scene_new_data_block( struct lp_scene *scene ); + +struct cmd_block *lp_scene_new_cmd_block( struct lp_scene *scene, + struct cmd_bin *bin ); + +boolean lp_scene_add_resource_reference(struct lp_scene *scene, + struct pipe_resource *resource, + boolean initializing_scene); + +boolean lp_scene_is_resource_referenced(const struct lp_scene *scene, + const struct pipe_resource *resource ); + + +/** + * Allocate space for a command/data in the bin's data buffer. + * Grow the block list if needed. + */ +static inline void * +lp_scene_alloc( struct lp_scene *scene, unsigned size) +{ + struct data_block_list *list = &scene->data; + struct data_block *block = list->head; + + assert(size <= DATA_BLOCK_SIZE); + assert(block != NULL); + + if (LP_DEBUG & DEBUG_MEM) + debug_printf("alloc %u block %u/%u tot %u/%u\n", + size, block->used, DATA_BLOCK_SIZE, + scene->scene_size, LP_SCENE_MAX_SIZE); + + if (block->used + size > DATA_BLOCK_SIZE) { + block = lp_scene_new_data_block( scene ); + if (!block) { + /* out of memory */ + return NULL; + } + } + + { + ubyte *data = block->data + block->used; + block->used += size; + return data; + } +} + + +/** + * As above, but with specific alignment. + */ +static inline void * +lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size, + unsigned alignment ) +{ + struct data_block_list *list = &scene->data; + struct data_block *block = list->head; + + assert(block != NULL); + + if (LP_DEBUG & DEBUG_MEM) + debug_printf("alloc %u block %u/%u tot %u/%u\n", + size + alignment - 1, + block->used, DATA_BLOCK_SIZE, + scene->scene_size, LP_SCENE_MAX_SIZE); + + if (block->used + size + alignment - 1 > DATA_BLOCK_SIZE) { + block = lp_scene_new_data_block( scene ); + if (!block) + return NULL; + } + + { + ubyte *data = block->data + block->used; + unsigned offset = (((uintptr_t)data + alignment - 1) & ~(alignment - 1)) - (uintptr_t)data; + block->used += offset + size; + return data + offset; + } +} + + +/* Put back data if we decide not to use it, eg. culled triangles. + */ +static inline void +lp_scene_putback_data( struct lp_scene *scene, unsigned size) +{ + struct data_block_list *list = &scene->data; + assert(list->head && list->head->used >= size); + list->head->used -= size; +} + + +/** Return pointer to a particular tile's bin. */ +static inline struct cmd_bin * +lp_scene_get_bin(struct lp_scene *scene, unsigned x, unsigned y) +{ + return &scene->tile[x][y]; +} + + +/** Remove all commands from a bin */ +void +lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y); + + +/* Add a command to bin[x][y]. + */ +static inline boolean +lp_scene_bin_command( struct lp_scene *scene, + unsigned x, unsigned y, + unsigned cmd, + union lp_rast_cmd_arg arg ) +{ + struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + struct cmd_block *tail = bin->tail; + + assert(x < scene->tiles_x); + assert(y < scene->tiles_y); + assert(cmd < LP_RAST_OP_MAX); + + if (tail == NULL || tail->count == CMD_BLOCK_MAX) { + tail = lp_scene_new_cmd_block( scene, bin ); + if (!tail) { + return FALSE; + } + assert(tail->count == 0); + } + + { + unsigned i = tail->count; + tail->cmd[i] = cmd & LP_RAST_OP_MASK; + tail->arg[i] = arg; + tail->count++; + } + + return TRUE; +} + + +static inline boolean +lp_scene_bin_cmd_with_state( struct lp_scene *scene, + unsigned x, unsigned y, + const struct lp_rast_state *state, + unsigned cmd, + union lp_rast_cmd_arg arg ) +{ + struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + + if (state != bin->last_state) { + bin->last_state = state; + if (!lp_scene_bin_command(scene, x, y, + LP_RAST_OP_SET_STATE, + lp_rast_arg_state(state))) + return FALSE; + } + + if (!lp_scene_bin_command( scene, x, y, cmd, arg )) + return FALSE; + + return TRUE; +} + + +/* Add a command to all active bins. + */ +static inline boolean +lp_scene_bin_everywhere( struct lp_scene *scene, + unsigned cmd, + const union lp_rast_cmd_arg arg ) +{ + unsigned i, j; + for (i = 0; i < scene->tiles_x; i++) { + for (j = 0; j < scene->tiles_y; j++) { + if (!lp_scene_bin_command( scene, i, j, cmd, arg )) + return FALSE; + } + } + + return TRUE; +} + + +static inline unsigned +lp_scene_get_num_bins( const struct lp_scene *scene ) +{ + return scene->tiles_x * scene->tiles_y; +} + + +void +lp_scene_bin_iter_begin( struct lp_scene *scene ); + +struct cmd_bin * +lp_scene_bin_iter_next( struct lp_scene *scene, int *x, int *y ); + + + +/* Begin/end binning of a scene + */ +void +lp_scene_begin_binning( struct lp_scene *scene, + struct pipe_framebuffer_state *fb, + boolean discard ); + +void +lp_scene_end_binning( struct lp_scene *scene ); + + +/* Begin/end rasterization of a scene + */ +void +lp_scene_begin_rasterization(struct lp_scene *scene); + +void +lp_scene_end_rasterization(struct lp_scene *scene ); + + + + + +#endif /* LP_BIN_H */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene_queue.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene_queue.c new file mode 100644 index 000000000..975db43c4 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene_queue.c @@ -0,0 +1,124 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Scene queue. We'll use two queues. One contains "full" scenes which + * are produced by the "setup" code. The other contains "empty" scenes + * which are produced by the "rast" code when it finishes rendering a scene. + */ + +#include "util/u_ringbuffer.h" +#include "util/u_memory.h" +#include "lp_scene_queue.h" + + + +#define MAX_SCENE_QUEUE 4 + +struct scene_packet { + struct util_packet header; + struct lp_scene *scene; +}; + +/** + * A queue of scenes + */ +struct lp_scene_queue +{ + struct util_ringbuffer *ring; +}; + + + +/** Allocate a new scene queue */ +struct lp_scene_queue * +lp_scene_queue_create(void) +{ + struct lp_scene_queue *queue = CALLOC_STRUCT(lp_scene_queue); + if (queue == NULL) + return NULL; + + queue->ring = util_ringbuffer_create( MAX_SCENE_QUEUE * + sizeof( struct scene_packet ) / 4); + if (queue->ring == NULL) + goto fail; + + return queue; + +fail: + FREE(queue); + return NULL; +} + + +/** Delete a scene queue */ +void +lp_scene_queue_destroy(struct lp_scene_queue *queue) +{ + util_ringbuffer_destroy(queue->ring); + FREE(queue); +} + + +/** Remove first lp_scene from head of queue */ +struct lp_scene * +lp_scene_dequeue(struct lp_scene_queue *queue, boolean wait) +{ + struct scene_packet packet; + enum pipe_error ret; + + packet.scene = NULL; + + ret = util_ringbuffer_dequeue(queue->ring, + &packet.header, + sizeof packet / 4, + wait ); + if (ret != PIPE_OK) + return NULL; + + return packet.scene; +} + + +/** Add an lp_scene to tail of queue */ +void +lp_scene_enqueue(struct lp_scene_queue *queue, struct lp_scene *scene) +{ + struct scene_packet packet; + + packet.header.dwords = sizeof packet / 4; + packet.header.data24 = 0; + packet.scene = scene; + + util_ringbuffer_enqueue(queue->ring, &packet.header); +} + + + + + diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene_queue.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene_queue.h new file mode 100644 index 000000000..dd9ab593b --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_scene_queue.h @@ -0,0 +1,53 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef LP_SCENE_QUEUE +#define LP_SCENE_QUEUE + +#include "pipe/p_compiler.h" + +struct lp_scene_queue; +struct lp_scene; + + +struct lp_scene_queue * +lp_scene_queue_create(void); + +void +lp_scene_queue_destroy(struct lp_scene_queue *queue); + +struct lp_scene * +lp_scene_dequeue(struct lp_scene_queue *queue, boolean wait); + +void +lp_scene_enqueue(struct lp_scene_queue *queue, struct lp_scene *scene); + + + + +#endif /* LP_BIN_QUEUE */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c new file mode 100644 index 000000000..14eeab033 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.c @@ -0,0 +1,623 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_cpu_detect.h" +#include "util/u_format.h" +#include "util/u_string.h" +#include "util/u_format_s3tc.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" +#include "draw/draw_context.h" +#include "gallivm/lp_bld_type.h" + +#include "os/os_misc.h" +#include "os/os_time.h" +#include "lp_texture.h" +#include "lp_fence.h" +#include "lp_jit.h" +#include "lp_screen.h" +#include "lp_context.h" +#include "lp_debug.h" +#include "lp_public.h" +#include "lp_limits.h" +#include "lp_rast.h" + +#include "state_tracker/sw_winsys.h" + +#ifdef DEBUG +int LP_DEBUG = 0; + +static const struct debug_named_value lp_debug_flags[] = { + { "pipe", DEBUG_PIPE, NULL }, + { "tgsi", DEBUG_TGSI, NULL }, + { "tex", DEBUG_TEX, NULL }, + { "setup", DEBUG_SETUP, NULL }, + { "rast", DEBUG_RAST, NULL }, + { "query", DEBUG_QUERY, NULL }, + { "screen", DEBUG_SCREEN, NULL }, + { "counters", DEBUG_COUNTERS, NULL }, + { "scene", DEBUG_SCENE, NULL }, + { "fence", DEBUG_FENCE, NULL }, + { "mem", DEBUG_MEM, NULL }, + { "fs", DEBUG_FS, NULL }, + DEBUG_NAMED_VALUE_END +}; +#endif + +int LP_PERF = 0; +static const struct debug_named_value lp_perf_flags[] = { + { "texmem", PERF_TEX_MEM, NULL }, + { "no_mipmap", PERF_NO_MIPMAPS, NULL }, + { "no_linear", PERF_NO_LINEAR, NULL }, + { "no_mip_linear", PERF_NO_MIP_LINEAR, NULL }, + { "no_tex", PERF_NO_TEX, NULL }, + { "no_blend", PERF_NO_BLEND, NULL }, + { "no_depth", PERF_NO_DEPTH, NULL }, + { "no_alphatest", PERF_NO_ALPHATEST, NULL }, + DEBUG_NAMED_VALUE_END +}; + + +static const char * +llvmpipe_get_vendor(struct pipe_screen *screen) +{ + return "VMware, Inc."; +} + + +static const char * +llvmpipe_get_name(struct pipe_screen *screen) +{ + static char buf[100]; + util_snprintf(buf, sizeof(buf), "llvmpipe (LLVM %u.%u, %u bits)", + HAVE_LLVM >> 8, HAVE_LLVM & 0xff, + lp_native_vector_width ); + return buf; +} + + +static int +llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) +{ + switch (param) { + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_SM3: + return 1; + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + return 1; + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + return PIPE_MAX_SO_BUFFERS; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return PIPE_MAX_COLOR_BUFS; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_QUERY_TIME_ELAPSED: + return 0; + case PIPE_CAP_QUERY_TIMESTAMP: + return 1; + case PIPE_CAP_QUERY_PIPELINE_STATISTICS: + return 0; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_TEXTURE_SWIZZLE: + return 1; + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: + return 0; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return LP_MAX_TEXTURE_2D_LEVELS; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return LP_MAX_TEXTURE_3D_LEVELS; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return LP_MAX_TEXTURE_CUBE_LEVELS; + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return LP_MAX_TEXTURE_ARRAY_LAYERS; + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + return 1; + case PIPE_CAP_INDEP_BLEND_ENABLE: + return 1; + case PIPE_CAP_INDEP_BLEND_FUNC: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + return 0; + case PIPE_CAP_PRIMITIVE_RESTART: + return 1; + case PIPE_CAP_DEPTH_CLIP_DISABLE: + return 1; + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return 1; + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_START_INSTANCE: + return 1; + case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: + return 0; + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + return 1; + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + return 1; + /* this is a lie could support arbitrary large offsets */ + case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: + case PIPE_CAP_MIN_TEXEL_OFFSET: + return -32; + case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: + case PIPE_CAP_MAX_TEXEL_OFFSET: + return 31; + case PIPE_CAP_CONDITIONAL_RENDER: + return 1; + case PIPE_CAP_TEXTURE_BARRIER: + return 0; + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return 16*4; + case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: + case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: + return 1024; + case PIPE_CAP_MAX_VERTEX_STREAMS: + return 1; + case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: + return 2048; + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + return 1; + case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: + return 0; + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: + case PIPE_CAP_VERTEX_COLOR_CLAMPED: + return 1; + case PIPE_CAP_GLSL_FEATURE_LEVEL: + return 330; + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + return 0; + case PIPE_CAP_COMPUTE: + return 0; + case PIPE_CAP_USER_VERTEX_BUFFERS: + case PIPE_CAP_USER_INDEX_BUFFERS: + return 1; + case PIPE_CAP_USER_CONSTANT_BUFFERS: + return 0; + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_TGSI_TEXCOORD: + return 0; + case PIPE_CAP_DRAW_INDIRECT: + return 1; + + case PIPE_CAP_CUBE_MAP_ARRAY: + return 1; + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 16; + case PIPE_CAP_TEXTURE_MULTISAMPLE: + return 0; + case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: + return 64; + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + return 1; + case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: + return 65536; + case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: + return 1; + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + return 0; + case PIPE_CAP_MAX_VIEWPORTS: + return PIPE_MAX_VIEWPORTS; + case PIPE_CAP_ENDIANNESS: + return PIPE_ENDIAN_NATIVE; + case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: + return 1; + case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: + return 1; + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: + return 4; + case PIPE_CAP_TEXTURE_GATHER_SM5: + case PIPE_CAP_TEXTURE_QUERY_LOD: + case PIPE_CAP_SAMPLE_SHADING: + case PIPE_CAP_TEXTURE_GATHER_OFFSETS: + return 0; + case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: + return 1; + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: + return 0; + case PIPE_CAP_SAMPLER_VIEW_TARGET: + return 1; + case PIPE_CAP_FAKE_SW_MSAA: + return 1; + case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: + return 1; + + case PIPE_CAP_VENDOR_ID: + return 0xFFFFFFFF; + case PIPE_CAP_DEVICE_ID: + return 0xFFFFFFFF; + case PIPE_CAP_ACCELERATED: + return 0; + case PIPE_CAP_VIDEO_MEMORY: { + /* XXX: Do we want to return the full amount fo system memory ? */ + uint64_t system_memory; + + if (!os_get_total_physical_memory(&system_memory)) + return 0; + + return (int)(system_memory >> 20); + } + case PIPE_CAP_UMA: + return 0; + case PIPE_CAP_CLIP_HALFZ: + return 1; + case PIPE_CAP_VERTEXID_NOBASE: + return 0; + case PIPE_CAP_POLYGON_OFFSET_CLAMP: + case PIPE_CAP_TEXTURE_FLOAT_LINEAR: + case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + return 1; + case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: + case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: + case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: + case PIPE_CAP_DEPTH_BOUNDS_TEST: + return 0; + } + /* should only get here on unhandled cases */ + debug_printf("Unexpected PIPE_CAP %d query\n", param); + return 0; +} + +static int +llvmpipe_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param) +{ + switch(shader) + { + case PIPE_SHADER_FRAGMENT: + switch (param) { + default: + return gallivm_get_shader_param(param); + } + case PIPE_SHADER_VERTEX: + case PIPE_SHADER_GEOMETRY: + switch (param) { + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + /* At this time, the draw module and llvmpipe driver only + * support vertex shader texture lookups when LLVM is enabled in + * the draw module. + */ + if (debug_get_bool_option("DRAW_USE_LLVM", TRUE)) + return PIPE_MAX_SAMPLERS; + else + return 0; + case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: + if (debug_get_bool_option("DRAW_USE_LLVM", TRUE)) + return PIPE_MAX_SHADER_SAMPLER_VIEWS; + else + return 0; + default: + return draw_get_shader_param(shader, param); + } + default: + return 0; + } +} + +static float +llvmpipe_get_paramf(struct pipe_screen *screen, enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + return 255.0; /* arbitrary */ + case PIPE_CAPF_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 255.0; /* arbitrary */ + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 16.0; /* not actually signficant at this time */ + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 16.0; /* arbitrary */ + case PIPE_CAPF_GUARD_BAND_LEFT: + case PIPE_CAPF_GUARD_BAND_TOP: + case PIPE_CAPF_GUARD_BAND_RIGHT: + case PIPE_CAPF_GUARD_BAND_BOTTOM: + return 0.0; + } + /* should only get here on unhandled cases */ + debug_printf("Unexpected PIPE_CAP %d query\n", param); + return 0.0; +} + + +/** + * Query format support for creating a texture, drawing surface, etc. + * \param format the format to test + * \param type one of PIPE_TEXTURE, PIPE_SURFACE + */ +static boolean +llvmpipe_is_format_supported( struct pipe_screen *_screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned bind) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(_screen); + struct sw_winsys *winsys = screen->winsys; + const struct util_format_description *format_desc; + + format_desc = util_format_description(format); + if (!format_desc) + return FALSE; + + assert(target == PIPE_BUFFER || + target == PIPE_TEXTURE_1D || + target == PIPE_TEXTURE_1D_ARRAY || + target == PIPE_TEXTURE_2D || + target == PIPE_TEXTURE_2D_ARRAY || + target == PIPE_TEXTURE_RECT || + target == PIPE_TEXTURE_3D || + target == PIPE_TEXTURE_CUBE || + target == PIPE_TEXTURE_CUBE_ARRAY); + + if (sample_count > 1) + return FALSE; + + if (bind & PIPE_BIND_RENDER_TARGET) { + if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { + /* this is a lie actually other formats COULD exist where we would fail */ + if (format_desc->nr_channels < 3) + return FALSE; + } + else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB) + return FALSE; + + if (format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN && + format != PIPE_FORMAT_R11G11B10_FLOAT) + return FALSE; + + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + + if (format_desc->is_mixed) + return FALSE; + + if (!format_desc->is_array && !format_desc->is_bitmask && + format != PIPE_FORMAT_R11G11B10_FLOAT) + return FALSE; + + /* + * XXX refuse formats known to crash in generate_unswizzled_blend(). + * These include all 3-channel 24bit RGB8 variants, plus 48bit + * (except those using floats) 3-channel RGB16 variants (the latter + * seems to be more of a llvm bug though). + * The mesa state tracker only seems to use these for SINT/UINT formats. + */ + if (format_desc->is_array && format_desc->nr_channels == 3) { + if (format_desc->block.bits == 24 || (format_desc->block.bits == 48 && + !util_format_is_float(format))) { + return FALSE; + } + } + } + + if (bind & PIPE_BIND_DISPLAY_TARGET) { + if(!winsys->is_displaytarget_format_supported(winsys, bind, format)) + return FALSE; + } + + if (bind & PIPE_BIND_DEPTH_STENCIL) { + if (format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) + return FALSE; + + if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; + + /* TODO: Support stencil-only formats */ + if (format_desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) { + return FALSE; + } + } + + if (format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { + /* Software decoding is not hooked up. */ + return FALSE; + } + + if (format_desc->layout == UTIL_FORMAT_LAYOUT_ETC && + format != PIPE_FORMAT_ETC1_RGB8) + return FALSE; + + if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { + return util_format_s3tc_enabled; + } + + /* + * Everything can be supported by u_format + * (those without fetch_rgba_float might be not but shouldn't hit that) + */ + + return TRUE; +} + + + + +static void +llvmpipe_flush_frontbuffer(struct pipe_screen *_screen, + struct pipe_resource *resource, + unsigned level, unsigned layer, + void *context_private, + struct pipe_box *sub_box) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(_screen); + struct sw_winsys *winsys = screen->winsys; + struct llvmpipe_resource *texture = llvmpipe_resource(resource); + + assert(texture->dt); + if (texture->dt) + winsys->displaytarget_display(winsys, texture->dt, context_private, sub_box); +} + +static void +llvmpipe_destroy_screen( struct pipe_screen *_screen ) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(_screen); + struct sw_winsys *winsys = screen->winsys; + + if (screen->rast) + lp_rast_destroy(screen->rast); + + lp_jit_screen_cleanup(screen); + + if(winsys->destroy) + winsys->destroy(winsys); + + pipe_mutex_destroy(screen->rast_mutex); + + FREE(screen); +} + + + + +/** + * Fence reference counting. + */ +static void +llvmpipe_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct lp_fence **old = (struct lp_fence **) ptr; + struct lp_fence *f = (struct lp_fence *) fence; + + lp_fence_reference(old, f); +} + + +/** + * Wait for the fence to finish. + */ +static boolean +llvmpipe_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence_handle, + uint64_t timeout) +{ + struct lp_fence *f = (struct lp_fence *) fence_handle; + + if (!timeout) + return lp_fence_signalled(f); + + lp_fence_wait(f); + return TRUE; +} + +static uint64_t +llvmpipe_get_timestamp(struct pipe_screen *_screen) +{ + return os_time_get_nano(); +} + +/** + * Create a new pipe_screen object + * Note: we're not presently subclassing pipe_screen (no llvmpipe_screen). + */ +struct pipe_screen * +llvmpipe_create_screen(struct sw_winsys *winsys) +{ + struct llvmpipe_screen *screen; + + util_cpu_detect(); + +#ifdef DEBUG + LP_DEBUG = debug_get_flags_option("LP_DEBUG", lp_debug_flags, 0 ); +#endif + + LP_PERF = debug_get_flags_option("LP_PERF", lp_perf_flags, 0 ); + + screen = CALLOC_STRUCT(llvmpipe_screen); + if (!screen) + return NULL; + + if (!lp_jit_screen_init(screen)) { + FREE(screen); + return NULL; + } + + screen->winsys = winsys; + + screen->base.destroy = llvmpipe_destroy_screen; + + screen->base.get_name = llvmpipe_get_name; + screen->base.get_vendor = llvmpipe_get_vendor; + screen->base.get_device_vendor = llvmpipe_get_vendor; // TODO should be the CPU vendor + screen->base.get_param = llvmpipe_get_param; + screen->base.get_shader_param = llvmpipe_get_shader_param; + screen->base.get_paramf = llvmpipe_get_paramf; + screen->base.is_format_supported = llvmpipe_is_format_supported; + + screen->base.context_create = llvmpipe_create_context; + screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer; + screen->base.fence_reference = llvmpipe_fence_reference; + screen->base.fence_finish = llvmpipe_fence_finish; + + screen->base.get_timestamp = llvmpipe_get_timestamp; + + llvmpipe_init_screen_resource_funcs(&screen->base); + + screen->num_threads = util_cpu_caps.nr_cpus > 1 ? util_cpu_caps.nr_cpus : 0; +#ifdef PIPE_SUBSYSTEM_EMBEDDED + screen->num_threads = 0; +#endif + screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads); + screen->num_threads = MIN2(screen->num_threads, LP_MAX_THREADS); + + screen->rast = lp_rast_create(screen->num_threads); + if (!screen->rast) { + lp_jit_screen_cleanup(screen); + FREE(screen); + return NULL; + } + pipe_mutex_init(screen->rast_mutex); + + util_format_s3tc_init(); + + return &screen->base; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h new file mode 100644 index 000000000..00bf20c8c --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_screen.h @@ -0,0 +1,73 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @author Jose Fonseca <jfonseca@vmware.com> + * @author Keith Whitwell <keithw@vmware.com> + */ + +#ifndef LP_SCREEN_H +#define LP_SCREEN_H + +#include "pipe/p_screen.h" +#include "pipe/p_defines.h" +#include "os/os_thread.h" +#include "gallivm/lp_bld.h" + + +struct sw_winsys; + + +struct llvmpipe_screen +{ + struct pipe_screen base; + + struct sw_winsys *winsys; + + unsigned num_threads; + + /* Increments whenever textures are modified. Contexts can track this. + */ + unsigned timestamp; + + struct lp_rasterizer *rast; + pipe_mutex rast_mutex; +}; + + + + +static inline struct llvmpipe_screen * +llvmpipe_screen( struct pipe_screen *pipe ) +{ + return (struct llvmpipe_screen *)pipe; +} + + + +#endif /* LP_SCREEN_H */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c new file mode 100644 index 000000000..4c8167a9e --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.c @@ -0,0 +1,1493 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Tiling engine. + * + * Builds per-tile display lists and executes them on calls to + * lp_setup_flush(). + */ + +#include <limits.h> + +#include "pipe/p_defines.h" +#include "util/u_framebuffer.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" +#include "draw/draw_pipe.h" +#include "os/os_time.h" +#include "lp_context.h" +#include "lp_memory.h" +#include "lp_scene.h" +#include "lp_texture.h" +#include "lp_debug.h" +#include "lp_fence.h" +#include "lp_query.h" +#include "lp_rast.h" +#include "lp_setup_context.h" +#include "lp_screen.h" +#include "lp_state.h" +#include "state_tracker/sw_winsys.h" + +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" + + +static boolean set_scene_state( struct lp_setup_context *, enum setup_state, + const char *reason); +static boolean try_update_scene_state( struct lp_setup_context *setup ); + + +static void +lp_setup_get_empty_scene(struct lp_setup_context *setup) +{ + assert(setup->scene == NULL); + + setup->scene_idx++; + setup->scene_idx %= Elements(setup->scenes); + + setup->scene = setup->scenes[setup->scene_idx]; + + if (setup->scene->fence) { + if (LP_DEBUG & DEBUG_SETUP) + debug_printf("%s: wait for scene %d\n", + __FUNCTION__, setup->scene->fence->id); + + lp_fence_wait(setup->scene->fence); + } + + lp_scene_begin_binning(setup->scene, &setup->fb, setup->rasterizer_discard); + +} + + +static void +first_triangle( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + assert(setup->state == SETUP_ACTIVE); + lp_setup_choose_triangle( setup ); + setup->triangle( setup, v0, v1, v2 ); +} + +static void +first_line( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]) +{ + assert(setup->state == SETUP_ACTIVE); + lp_setup_choose_line( setup ); + setup->line( setup, v0, v1 ); +} + +static void +first_point( struct lp_setup_context *setup, + const float (*v0)[4]) +{ + assert(setup->state == SETUP_ACTIVE); + lp_setup_choose_point( setup ); + setup->point( setup, v0 ); +} + +void lp_setup_reset( struct lp_setup_context *setup ) +{ + unsigned i; + + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + /* Reset derived state */ + for (i = 0; i < Elements(setup->constants); ++i) { + setup->constants[i].stored_size = 0; + setup->constants[i].stored_data = NULL; + } + setup->fs.stored = NULL; + setup->dirty = ~0; + + /* no current bin */ + setup->scene = NULL; + + /* Reset some state: + */ + memset(&setup->clear, 0, sizeof setup->clear); + + /* Have an explicit "start-binning" call and get rid of this + * pointer twiddling? + */ + setup->line = first_line; + setup->point = first_point; + setup->triangle = first_triangle; +} + + +/** Rasterize all scene's bins */ +static void +lp_setup_rasterize_scene( struct lp_setup_context *setup ) +{ + struct lp_scene *scene = setup->scene; + struct llvmpipe_screen *screen = llvmpipe_screen(scene->pipe->screen); + + scene->num_active_queries = setup->active_binned_queries; + memcpy(scene->active_queries, setup->active_queries, + scene->num_active_queries * sizeof(scene->active_queries[0])); + + lp_scene_end_binning(scene); + + lp_fence_reference(&setup->last_fence, scene->fence); + + if (setup->last_fence) + setup->last_fence->issued = TRUE; + + pipe_mutex_lock(screen->rast_mutex); + + /* FIXME: We enqueue the scene then wait on the rasterizer to finish. + * This means we never actually run any vertex stuff in parallel to + * rasterization (not in the same context at least) which is what the + * multiple scenes per setup is about - when we get a new empty scene + * any old one is already empty again because we waited here for + * raster tasks to be finished. Ideally, we shouldn't need to wait here + * and rely on fences elsewhere when waiting is necessary. + * Certainly, lp_scene_end_rasterization() would need to be deferred too + * and there's probably other bits why this doesn't actually work. + */ + lp_rast_queue_scene(screen->rast, scene); + lp_rast_finish(screen->rast); + pipe_mutex_unlock(screen->rast_mutex); + + lp_scene_end_rasterization(setup->scene); + lp_setup_reset( setup ); + + LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); +} + + + +static boolean +begin_binning( struct lp_setup_context *setup ) +{ + struct lp_scene *scene = setup->scene; + boolean need_zsload = FALSE; + boolean ok; + + assert(scene); + assert(scene->fence == NULL); + + /* Always create a fence: + */ + scene->fence = lp_fence_create(MAX2(1, setup->num_threads)); + if (!scene->fence) + return FALSE; + + ok = try_update_scene_state(setup); + if (!ok) + return FALSE; + + if (setup->fb.zsbuf && + ((setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) && + util_format_is_depth_and_stencil(setup->fb.zsbuf->format)) + need_zsload = TRUE; + + LP_DBG(DEBUG_SETUP, "%s color clear bufs: %x depth: %s\n", __FUNCTION__, + setup->clear.flags >> 2, + need_zsload ? "clear": "load"); + + if (setup->clear.flags & PIPE_CLEAR_COLOR) { + unsigned cbuf; + for (cbuf = 0; cbuf < setup->fb.nr_cbufs; cbuf++) { + assert(PIPE_CLEAR_COLOR0 == 1 << 2); + if (setup->clear.flags & (1 << (2 + cbuf))) { + union lp_rast_cmd_arg clearrb_arg; + struct lp_rast_clear_rb *cc_scene = + (struct lp_rast_clear_rb *) + lp_scene_alloc(scene, sizeof(struct lp_rast_clear_rb)); + + if (!cc_scene) { + return FALSE; + } + + cc_scene->cbuf = cbuf; + cc_scene->color_val = setup->clear.color_val[cbuf]; + clearrb_arg.clear_rb = cc_scene; + + if (!lp_scene_bin_everywhere(scene, + LP_RAST_OP_CLEAR_COLOR, + clearrb_arg)) + return FALSE; + } + } + } + + if (setup->fb.zsbuf) { + if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) { + ok = lp_scene_bin_everywhere( scene, + LP_RAST_OP_CLEAR_ZSTENCIL, + lp_rast_arg_clearzs( + setup->clear.zsvalue, + setup->clear.zsmask)); + if (!ok) + return FALSE; + } + } + + setup->clear.flags = 0; + setup->clear.zsmask = 0; + setup->clear.zsvalue = 0; + + scene->had_queries = !!setup->active_binned_queries; + + LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__); + return TRUE; +} + + +/* This basically bins and then flushes any outstanding full-screen + * clears. + * + * TODO: fast path for fullscreen clears and no triangles. + */ +static boolean +execute_clears( struct lp_setup_context *setup ) +{ + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + return begin_binning( setup ); +} + +const char *states[] = { + "FLUSHED", + "CLEARED", + "ACTIVE " +}; + + +static boolean +set_scene_state( struct lp_setup_context *setup, + enum setup_state new_state, + const char *reason) +{ + unsigned old_state = setup->state; + + if (old_state == new_state) + return TRUE; + + if (LP_DEBUG & DEBUG_SCENE) { + debug_printf("%s old %s new %s%s%s\n", + __FUNCTION__, + states[old_state], + states[new_state], + (new_state == SETUP_FLUSHED) ? ": " : "", + (new_state == SETUP_FLUSHED) ? reason : ""); + + if (new_state == SETUP_FLUSHED && setup->scene) + lp_debug_draw_bins_by_cmd_length(setup->scene); + } + + /* wait for a free/empty scene + */ + if (old_state == SETUP_FLUSHED) + lp_setup_get_empty_scene(setup); + + switch (new_state) { + case SETUP_CLEARED: + break; + + case SETUP_ACTIVE: + if (!begin_binning( setup )) + goto fail; + break; + + case SETUP_FLUSHED: + if (old_state == SETUP_CLEARED) + if (!execute_clears( setup )) + goto fail; + + lp_setup_rasterize_scene( setup ); + assert(setup->scene == NULL); + break; + + default: + assert(0 && "invalid setup state mode"); + goto fail; + } + + setup->state = new_state; + return TRUE; + +fail: + if (setup->scene) { + lp_scene_end_rasterization(setup->scene); + setup->scene = NULL; + } + + setup->state = SETUP_FLUSHED; + lp_setup_reset( setup ); + return FALSE; +} + + +void +lp_setup_flush( struct lp_setup_context *setup, + struct pipe_fence_handle **fence, + const char *reason) +{ + set_scene_state( setup, SETUP_FLUSHED, reason ); + + if (fence) { + lp_fence_reference((struct lp_fence **)fence, setup->last_fence); + } +} + + +void +lp_setup_bind_framebuffer( struct lp_setup_context *setup, + const struct pipe_framebuffer_state *fb ) +{ + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + /* Flush any old scene. + */ + set_scene_state( setup, SETUP_FLUSHED, __FUNCTION__ ); + + /* + * Ensure the old scene is not reused. + */ + assert(!setup->scene); + + /* Set new state. This will be picked up later when we next need a + * scene. + */ + util_copy_framebuffer_state(&setup->fb, fb); + setup->framebuffer.x0 = 0; + setup->framebuffer.y0 = 0; + setup->framebuffer.x1 = fb->width-1; + setup->framebuffer.y1 = fb->height-1; + setup->dirty |= LP_SETUP_NEW_SCISSOR; +} + + +/* + * Try to clear one color buffer of the attached fb, either by binning a clear + * command or queuing up the clear for later (when binning is started). + */ +static boolean +lp_setup_try_clear_color_buffer(struct lp_setup_context *setup, + const union pipe_color_union *color, + unsigned cbuf) +{ + union lp_rast_cmd_arg clearrb_arg; + union util_color uc; + enum pipe_format format = setup->fb.cbufs[cbuf]->format; + + LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state); + + if (util_format_is_pure_integer(format)) { + /* + * We expect int/uint clear values here, though some APIs + * might disagree (but in any case util_pack_color() + * couldn't handle it)... + */ + if (util_format_is_pure_sint(format)) { + util_format_write_4i(format, color->i, 0, &uc, 0, 0, 0, 1, 1); + } + else { + assert(util_format_is_pure_uint(format)); + util_format_write_4ui(format, color->ui, 0, &uc, 0, 0, 0, 1, 1); + } + } + else { + util_pack_color(color->f, format, &uc); + } + + if (setup->state == SETUP_ACTIVE) { + struct lp_scene *scene = setup->scene; + + /* Add the clear to existing scene. In the unusual case where + * both color and depth-stencil are being cleared when there's + * already been some rendering, we could discard the currently + * binned scene and start again, but I don't see that as being + * a common usage. + */ + struct lp_rast_clear_rb *cc_scene = + (struct lp_rast_clear_rb *) + lp_scene_alloc_aligned(scene, sizeof(struct lp_rast_clear_rb), 8); + + if (!cc_scene) { + return FALSE; + } + + cc_scene->cbuf = cbuf; + cc_scene->color_val = uc; + clearrb_arg.clear_rb = cc_scene; + + if (!lp_scene_bin_everywhere(scene, + LP_RAST_OP_CLEAR_COLOR, + clearrb_arg)) + return FALSE; + } + else { + /* Put ourselves into the 'pre-clear' state, specifically to try + * and accumulate multiple clears to color and depth_stencil + * buffers which the app or state-tracker might issue + * separately. + */ + set_scene_state( setup, SETUP_CLEARED, __FUNCTION__ ); + + assert(PIPE_CLEAR_COLOR0 == (1 << 2)); + setup->clear.flags |= 1 << (cbuf + 2); + setup->clear.color_val[cbuf] = uc; + } + + return TRUE; +} + +static boolean +lp_setup_try_clear_zs(struct lp_setup_context *setup, + double depth, + unsigned stencil, + unsigned flags) +{ + uint64_t zsmask = 0; + uint64_t zsvalue = 0; + uint32_t zmask32; + uint8_t smask8; + + LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state); + + zmask32 = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0; + smask8 = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0; + + zsvalue = util_pack64_z_stencil(setup->fb.zsbuf->format, + depth, + stencil); + + zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format, + zmask32, + smask8); + + zsvalue &= zsmask; + + if (setup->state == SETUP_ACTIVE) { + struct lp_scene *scene = setup->scene; + + /* Add the clear to existing scene. In the unusual case where + * both color and depth-stencil are being cleared when there's + * already been some rendering, we could discard the currently + * binned scene and start again, but I don't see that as being + * a common usage. + */ + if (!lp_scene_bin_everywhere(scene, + LP_RAST_OP_CLEAR_ZSTENCIL, + lp_rast_arg_clearzs(zsvalue, zsmask))) + return FALSE; + } + else { + /* Put ourselves into the 'pre-clear' state, specifically to try + * and accumulate multiple clears to color and depth_stencil + * buffers which the app or state-tracker might issue + * separately. + */ + set_scene_state( setup, SETUP_CLEARED, __FUNCTION__ ); + + setup->clear.flags |= flags; + + setup->clear.zsmask |= zsmask; + setup->clear.zsvalue = + (setup->clear.zsvalue & ~zsmask) | (zsvalue & zsmask); + } + + return TRUE; +} + +void +lp_setup_clear( struct lp_setup_context *setup, + const union pipe_color_union *color, + double depth, + unsigned stencil, + unsigned flags ) +{ + unsigned i; + + /* + * Note any of these (max 9) clears could fail (but at most there should + * be just one failure!). This avoids doing the previous succeeded + * clears again (we still clear tiles twice if a clear command succeeded + * partially for one buffer). + */ + if (flags & PIPE_CLEAR_DEPTHSTENCIL) { + unsigned flagszs = flags & PIPE_CLEAR_DEPTHSTENCIL; + if (!lp_setup_try_clear_zs(setup, depth, stencil, flagszs)) { + lp_setup_flush(setup, NULL, __FUNCTION__); + + if (!lp_setup_try_clear_zs(setup, depth, stencil, flagszs)) + assert(0); + } + } + + if (flags & PIPE_CLEAR_COLOR) { + assert(PIPE_CLEAR_COLOR0 == (1 << 2)); + for (i = 0; i < setup->fb.nr_cbufs; i++) { + if ((flags & (1 << (2 + i))) && setup->fb.cbufs[i]) { + if (!lp_setup_try_clear_color_buffer(setup, color, i)) { + lp_setup_flush(setup, NULL, __FUNCTION__); + + if (!lp_setup_try_clear_color_buffer(setup, color, i)) + assert(0); + } + } + } + } +} + + + +void +lp_setup_set_triangle_state( struct lp_setup_context *setup, + unsigned cull_mode, + boolean ccw_is_frontface, + boolean scissor, + boolean half_pixel_center, + boolean bottom_edge_rule) +{ + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + setup->ccw_is_frontface = ccw_is_frontface; + setup->cullmode = cull_mode; + setup->triangle = first_triangle; + setup->pixel_offset = half_pixel_center ? 0.5f : 0.0f; + setup->bottom_edge_rule = bottom_edge_rule; + + if (setup->scissor_test != scissor) { + setup->dirty |= LP_SETUP_NEW_SCISSOR; + setup->scissor_test = scissor; + } +} + +void +lp_setup_set_line_state( struct lp_setup_context *setup, + float line_width) +{ + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + setup->line_width = line_width; +} + +void +lp_setup_set_point_state( struct lp_setup_context *setup, + float point_size, + boolean point_size_per_vertex, + uint sprite_coord_enable, + uint sprite_coord_origin) +{ + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + setup->point_size = point_size; + setup->sprite_coord_enable = sprite_coord_enable; + setup->sprite_coord_origin = sprite_coord_origin; + setup->point_size_per_vertex = point_size_per_vertex; +} + +void +lp_setup_set_setup_variant( struct lp_setup_context *setup, + const struct lp_setup_variant *variant) +{ + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + setup->setup.variant = variant; +} + +void +lp_setup_set_fs_variant( struct lp_setup_context *setup, + struct lp_fragment_shader_variant *variant) +{ + LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, + variant); + /* FIXME: reference count */ + + setup->fs.current.variant = variant; + setup->dirty |= LP_SETUP_NEW_FS; +} + +void +lp_setup_set_fs_constants(struct lp_setup_context *setup, + unsigned num, + struct pipe_constant_buffer *buffers) +{ + unsigned i; + + LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) buffers); + + assert(num <= Elements(setup->constants)); + + for (i = 0; i < num; ++i) { + util_copy_constant_buffer(&setup->constants[i].current, &buffers[i]); + } + for (; i < Elements(setup->constants); i++) { + util_copy_constant_buffer(&setup->constants[i].current, NULL); + } + setup->dirty |= LP_SETUP_NEW_CONSTANTS; +} + + +void +lp_setup_set_alpha_ref_value( struct lp_setup_context *setup, + float alpha_ref_value ) +{ + LP_DBG(DEBUG_SETUP, "%s %f\n", __FUNCTION__, alpha_ref_value); + + if(setup->fs.current.jit_context.alpha_ref_value != alpha_ref_value) { + setup->fs.current.jit_context.alpha_ref_value = alpha_ref_value; + setup->dirty |= LP_SETUP_NEW_FS; + } +} + +void +lp_setup_set_stencil_ref_values( struct lp_setup_context *setup, + const ubyte refs[2] ) +{ + LP_DBG(DEBUG_SETUP, "%s %d %d\n", __FUNCTION__, refs[0], refs[1]); + + if (setup->fs.current.jit_context.stencil_ref_front != refs[0] || + setup->fs.current.jit_context.stencil_ref_back != refs[1]) { + setup->fs.current.jit_context.stencil_ref_front = refs[0]; + setup->fs.current.jit_context.stencil_ref_back = refs[1]; + setup->dirty |= LP_SETUP_NEW_FS; + } +} + +void +lp_setup_set_blend_color( struct lp_setup_context *setup, + const struct pipe_blend_color *blend_color ) +{ + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + assert(blend_color); + + if(memcmp(&setup->blend_color.current, blend_color, sizeof *blend_color) != 0) { + memcpy(&setup->blend_color.current, blend_color, sizeof *blend_color); + setup->dirty |= LP_SETUP_NEW_BLEND_COLOR; + } +} + + +void +lp_setup_set_scissors( struct lp_setup_context *setup, + const struct pipe_scissor_state *scissors ) +{ + unsigned i; + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + assert(scissors); + + for (i = 0; i < PIPE_MAX_VIEWPORTS; ++i) { + setup->scissors[i].x0 = scissors[i].minx; + setup->scissors[i].x1 = scissors[i].maxx-1; + setup->scissors[i].y0 = scissors[i].miny; + setup->scissors[i].y1 = scissors[i].maxy-1; + } + setup->dirty |= LP_SETUP_NEW_SCISSOR; +} + + +void +lp_setup_set_flatshade_first( struct lp_setup_context *setup, + boolean flatshade_first ) +{ + setup->flatshade_first = flatshade_first; +} + +void +lp_setup_set_rasterizer_discard( struct lp_setup_context *setup, + boolean rasterizer_discard ) +{ + if (setup->rasterizer_discard != rasterizer_discard) { + setup->rasterizer_discard = rasterizer_discard; + set_scene_state( setup, SETUP_FLUSHED, __FUNCTION__ ); + } +} + +void +lp_setup_set_vertex_info( struct lp_setup_context *setup, + struct vertex_info *vertex_info ) +{ + /* XXX: just silently holding onto the pointer: + */ + setup->vertex_info = vertex_info; +} + + +/** + * Called during state validation when LP_NEW_VIEWPORT is set. + */ +void +lp_setup_set_viewports(struct lp_setup_context *setup, + unsigned num_viewports, + const struct pipe_viewport_state *viewports) +{ + struct llvmpipe_context *lp = llvmpipe_context(setup->pipe); + unsigned i; + + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + assert(num_viewports <= PIPE_MAX_VIEWPORTS); + assert(viewports); + + /* + * For use in lp_state_fs.c, propagate the viewport values for all viewports. + */ + for (i = 0; i < num_viewports; i++) { + float min_depth; + float max_depth; + + if (lp->rasterizer->clip_halfz == 0) { + float half_depth = viewports[i].scale[2]; + min_depth = viewports[i].translate[2] - half_depth; + max_depth = min_depth + half_depth * 2.0f; + } else { + min_depth = viewports[i].translate[2]; + max_depth = min_depth + viewports[i].scale[2]; + } + + if (setup->viewports[i].min_depth != min_depth || + setup->viewports[i].max_depth != max_depth) { + setup->viewports[i].min_depth = min_depth; + setup->viewports[i].max_depth = max_depth; + setup->dirty |= LP_SETUP_NEW_VIEWPORTS; + } + } +} + + +/** + * Called during state validation when LP_NEW_SAMPLER_VIEW is set. + */ +void +lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, + unsigned num, + struct pipe_sampler_view **views) +{ + unsigned i; + + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS); + + for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) { + struct pipe_sampler_view *view = i < num ? views[i] : NULL; + + if (view) { + struct pipe_resource *res = view->texture; + struct llvmpipe_resource *lp_tex = llvmpipe_resource(res); + struct lp_jit_texture *jit_tex; + jit_tex = &setup->fs.current.jit_context.textures[i]; + + /* We're referencing the texture's internal data, so save a + * reference to it. + */ + pipe_resource_reference(&setup->fs.current_tex[i], res); + + if (!lp_tex->dt) { + /* regular texture - setup array of mipmap level offsets */ + int j; + unsigned first_level = 0; + unsigned last_level = 0; + + if (llvmpipe_resource_is_texture(res)) { + first_level = view->u.tex.first_level; + last_level = view->u.tex.last_level; + assert(first_level <= last_level); + assert(last_level <= res->last_level); + jit_tex->base = lp_tex->tex_data; + } + else { + jit_tex->base = lp_tex->data; + } + + if (LP_PERF & PERF_TEX_MEM) { + /* use dummy tile memory */ + jit_tex->base = lp_dummy_tile; + jit_tex->width = TILE_SIZE/8; + jit_tex->height = TILE_SIZE/8; + jit_tex->depth = 1; + jit_tex->first_level = 0; + jit_tex->last_level = 0; + jit_tex->mip_offsets[0] = 0; + jit_tex->row_stride[0] = 0; + jit_tex->img_stride[0] = 0; + } + else { + jit_tex->width = res->width0; + jit_tex->height = res->height0; + jit_tex->depth = res->depth0; + jit_tex->first_level = first_level; + jit_tex->last_level = last_level; + + if (llvmpipe_resource_is_texture(res)) { + for (j = first_level; j <= last_level; j++) { + jit_tex->mip_offsets[j] = lp_tex->mip_offsets[j]; + jit_tex->row_stride[j] = lp_tex->row_stride[j]; + jit_tex->img_stride[j] = lp_tex->img_stride[j]; + } + + if (view->target == PIPE_TEXTURE_1D_ARRAY || + view->target == PIPE_TEXTURE_2D_ARRAY || + view->target == PIPE_TEXTURE_CUBE || + view->target == PIPE_TEXTURE_CUBE_ARRAY) { + /* + * For array textures, we don't have first_layer, instead + * adjust last_layer (stored as depth) plus the mip level offsets + * (as we have mip-first layout can't just adjust base ptr). + * XXX For mip levels, could do something similar. + */ + jit_tex->depth = view->u.tex.last_layer - view->u.tex.first_layer + 1; + for (j = first_level; j <= last_level; j++) { + jit_tex->mip_offsets[j] += view->u.tex.first_layer * + lp_tex->img_stride[j]; + } + if (view->target == PIPE_TEXTURE_CUBE || + view->target == PIPE_TEXTURE_CUBE_ARRAY) { + assert(jit_tex->depth % 6 == 0); + } + assert(view->u.tex.first_layer <= view->u.tex.last_layer); + assert(view->u.tex.last_layer < res->array_size); + } + } + else { + /* + * For buffers, we don't have first_element, instead adjust + * last_element (stored as width) plus the base pointer. + */ + unsigned view_blocksize = util_format_get_blocksize(view->format); + /* probably don't really need to fill that out */ + jit_tex->mip_offsets[0] = 0; + jit_tex->row_stride[0] = 0; + jit_tex->img_stride[0] = 0; + + /* everything specified in number of elements here. */ + jit_tex->width = view->u.buf.last_element - view->u.buf.first_element + 1; + jit_tex->base = (uint8_t *)jit_tex->base + view->u.buf.first_element * + view_blocksize; + /* XXX Unsure if we need to sanitize parameters? */ + assert(view->u.buf.first_element <= view->u.buf.last_element); + assert(view->u.buf.last_element * view_blocksize < res->width0); + } + } + } + else { + /* display target texture/surface */ + /* + * XXX: Where should this be unmapped? + */ + struct llvmpipe_screen *screen = llvmpipe_screen(res->screen); + struct sw_winsys *winsys = screen->winsys; + jit_tex->base = winsys->displaytarget_map(winsys, lp_tex->dt, + PIPE_TRANSFER_READ); + jit_tex->row_stride[0] = lp_tex->row_stride[0]; + jit_tex->img_stride[0] = lp_tex->img_stride[0]; + jit_tex->mip_offsets[0] = 0; + jit_tex->width = res->width0; + jit_tex->height = res->height0; + jit_tex->depth = res->depth0; + jit_tex->first_level = jit_tex->last_level = 0; + assert(jit_tex->base); + } + } + } + + setup->dirty |= LP_SETUP_NEW_FS; +} + + +/** + * Called during state validation when LP_NEW_SAMPLER is set. + */ +void +lp_setup_set_fragment_sampler_state(struct lp_setup_context *setup, + unsigned num, + struct pipe_sampler_state **samplers) +{ + unsigned i; + + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + assert(num <= PIPE_MAX_SAMPLERS); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + const struct pipe_sampler_state *sampler = i < num ? samplers[i] : NULL; + + if (sampler) { + struct lp_jit_sampler *jit_sam; + jit_sam = &setup->fs.current.jit_context.samplers[i]; + + jit_sam->min_lod = sampler->min_lod; + jit_sam->max_lod = sampler->max_lod; + jit_sam->lod_bias = sampler->lod_bias; + COPY_4V(jit_sam->border_color, sampler->border_color.f); + } + } + + setup->dirty |= LP_SETUP_NEW_FS; +} + + +/** + * Is the given texture referenced by any scene? + * Note: we have to check all scenes including any scenes currently + * being rendered and the current scene being built. + */ +unsigned +lp_setup_is_resource_referenced( const struct lp_setup_context *setup, + const struct pipe_resource *texture ) +{ + unsigned i; + + /* check the render targets */ + for (i = 0; i < setup->fb.nr_cbufs; i++) { + if (setup->fb.cbufs[i] && setup->fb.cbufs[i]->texture == texture) + return LP_REFERENCED_FOR_READ | LP_REFERENCED_FOR_WRITE; + } + if (setup->fb.zsbuf && setup->fb.zsbuf->texture == texture) { + return LP_REFERENCED_FOR_READ | LP_REFERENCED_FOR_WRITE; + } + + /* check textures referenced by the scene */ + for (i = 0; i < Elements(setup->scenes); i++) { + if (lp_scene_is_resource_referenced(setup->scenes[i], texture)) { + return LP_REFERENCED_FOR_READ; + } + } + + return LP_UNREFERENCED; +} + + +/** + * Called by vbuf code when we're about to draw something. + * + * This function stores all dirty state in the current scene's display list + * memory, via lp_scene_alloc(). We can not pass pointers of mutable state to + * the JIT functions, as the JIT functions will be called later on, most likely + * on a different thread. + * + * When processing dirty state it is imperative that we don't refer to any + * pointers previously allocated with lp_scene_alloc() in this function (or any + * function) as they may belong to a scene freed since then. + */ +static boolean +try_update_scene_state( struct lp_setup_context *setup ) +{ + static const float fake_const_buf[4]; + boolean new_scene = (setup->fs.stored == NULL); + struct lp_scene *scene = setup->scene; + unsigned i; + + assert(scene); + + if (setup->dirty & LP_SETUP_NEW_VIEWPORTS) { + /* + * Record new depth range state for changes due to viewport updates. + * + * TODO: Collapse the existing viewport and depth range information + * into one structure, for access by JIT. + */ + struct lp_jit_viewport *stored; + + stored = (struct lp_jit_viewport *) + lp_scene_alloc(scene, sizeof setup->viewports); + + if (!stored) { + assert(!new_scene); + return FALSE; + } + + memcpy(stored, setup->viewports, sizeof setup->viewports); + + setup->fs.current.jit_context.viewports = stored; + setup->dirty |= LP_SETUP_NEW_FS; + } + + if(setup->dirty & LP_SETUP_NEW_BLEND_COLOR) { + uint8_t *stored; + float* fstored; + unsigned i, j; + unsigned size; + + /* Alloc u8_blend_color (16 x i8) and f_blend_color (4 or 8 x f32) */ + size = 4 * 16 * sizeof(uint8_t); + size += (LP_MAX_VECTOR_LENGTH / 4) * sizeof(float); + stored = lp_scene_alloc_aligned(scene, size, LP_MIN_VECTOR_ALIGN); + + if (!stored) { + assert(!new_scene); + return FALSE; + } + + /* Store floating point colour */ + fstored = (float*)(stored + 4*16); + for (i = 0; i < (LP_MAX_VECTOR_LENGTH / 4); ++i) { + fstored[i] = setup->blend_color.current.color[i % 4]; + } + + /* smear each blend color component across 16 ubyte elements */ + for (i = 0; i < 4; ++i) { + uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]); + for (j = 0; j < 16; ++j) + stored[i*16 + j] = c; + } + + setup->blend_color.stored = stored; + setup->fs.current.jit_context.u8_blend_color = stored; + setup->fs.current.jit_context.f_blend_color = fstored; + setup->dirty |= LP_SETUP_NEW_FS; + } + + if (setup->dirty & LP_SETUP_NEW_CONSTANTS) { + for (i = 0; i < Elements(setup->constants); ++i) { + struct pipe_resource *buffer = setup->constants[i].current.buffer; + const unsigned current_size = MIN2(setup->constants[i].current.buffer_size, + LP_MAX_TGSI_CONST_BUFFER_SIZE); + const ubyte *current_data = NULL; + int num_constants; + + STATIC_ASSERT(DATA_BLOCK_SIZE >= LP_MAX_TGSI_CONST_BUFFER_SIZE); + + if (buffer) { + /* resource buffer */ + current_data = (ubyte *) llvmpipe_resource_data(buffer); + } + else if (setup->constants[i].current.user_buffer) { + /* user-space buffer */ + current_data = (ubyte *) setup->constants[i].current.user_buffer; + } + + if (current_data) { + current_data += setup->constants[i].current.buffer_offset; + + /* TODO: copy only the actually used constants? */ + + if (setup->constants[i].stored_size != current_size || + !setup->constants[i].stored_data || + memcmp(setup->constants[i].stored_data, + current_data, + current_size) != 0) { + void *stored; + + stored = lp_scene_alloc(scene, current_size); + if (!stored) { + assert(!new_scene); + return FALSE; + } + + memcpy(stored, + current_data, + current_size); + setup->constants[i].stored_size = current_size; + setup->constants[i].stored_data = stored; + } + setup->fs.current.jit_context.constants[i] = + setup->constants[i].stored_data; + } + else { + setup->constants[i].stored_size = 0; + setup->constants[i].stored_data = NULL; + setup->fs.current.jit_context.constants[i] = fake_const_buf; + } + + num_constants = + setup->constants[i].stored_size / (sizeof(float) * 4); + setup->fs.current.jit_context.num_constants[i] = num_constants; + setup->dirty |= LP_SETUP_NEW_FS; + } + } + + + if (setup->dirty & LP_SETUP_NEW_FS) { + if (!setup->fs.stored || + memcmp(setup->fs.stored, + &setup->fs.current, + sizeof setup->fs.current) != 0) + { + struct lp_rast_state *stored; + + /* The fs state that's been stored in the scene is different from + * the new, current state. So allocate a new lp_rast_state object + * and append it to the bin's setup data buffer. + */ + stored = (struct lp_rast_state *) lp_scene_alloc(scene, sizeof *stored); + if (!stored) { + assert(!new_scene); + return FALSE; + } + + memcpy(stored, + &setup->fs.current, + sizeof setup->fs.current); + setup->fs.stored = stored; + + /* The scene now references the textures in the rasterization + * state record. Note that now. + */ + for (i = 0; i < Elements(setup->fs.current_tex); i++) { + if (setup->fs.current_tex[i]) { + if (!lp_scene_add_resource_reference(scene, + setup->fs.current_tex[i], + new_scene)) { + assert(!new_scene); + return FALSE; + } + } + } + } + } + + if (setup->dirty & LP_SETUP_NEW_SCISSOR) { + unsigned i; + for (i = 0; i < PIPE_MAX_VIEWPORTS; ++i) { + setup->draw_regions[i] = setup->framebuffer; + if (setup->scissor_test) { + u_rect_possible_intersection(&setup->scissors[i], + &setup->draw_regions[i]); + } + } + } + + setup->dirty = 0; + + assert(setup->fs.stored); + return TRUE; +} + +boolean +lp_setup_update_state( struct lp_setup_context *setup, + boolean update_scene ) +{ + /* Some of the 'draw' pipeline stages may have changed some driver state. + * Make sure we've processed those state changes before anything else. + * + * XXX this is the only place where llvmpipe_context is used in the + * setup code. This may get refactored/changed... + */ + { + struct llvmpipe_context *lp = llvmpipe_context(setup->pipe); + if (lp->dirty) { + llvmpipe_update_derived(lp); + } + + if (lp->setup->dirty) { + llvmpipe_update_setup(lp); + } + + assert(setup->setup.variant); + + /* Will probably need to move this somewhere else, just need + * to know about vertex shader point size attribute. + */ + setup->psize = lp->psize_slot; + setup->viewport_index_slot = lp->viewport_index_slot; + setup->layer_slot = lp->layer_slot; + setup->face_slot = lp->face_slot; + + assert(lp->dirty == 0); + + assert(lp->setup_variant.key.size == + setup->setup.variant->key.size); + + assert(memcmp(&lp->setup_variant.key, + &setup->setup.variant->key, + setup->setup.variant->key.size) == 0); + } + + if (update_scene && setup->state != SETUP_ACTIVE) { + if (!set_scene_state( setup, SETUP_ACTIVE, __FUNCTION__ )) + return FALSE; + } + + /* Only call into update_scene_state() if we already have a + * scene: + */ + if (update_scene && setup->scene) { + assert(setup->state == SETUP_ACTIVE); + + if (try_update_scene_state(setup)) + return TRUE; + + /* Update failed, try to restart the scene. + * + * Cannot call lp_setup_flush_and_restart() directly here + * because of potential recursion. + */ + if (!set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__)) + return FALSE; + + if (!set_scene_state(setup, SETUP_ACTIVE, __FUNCTION__)) + return FALSE; + + if (!setup->scene) + return FALSE; + + return try_update_scene_state(setup); + } + + return TRUE; +} + + + +/* Only caller is lp_setup_vbuf_destroy() + */ +void +lp_setup_destroy( struct lp_setup_context *setup ) +{ + uint i; + + lp_setup_reset( setup ); + + util_unreference_framebuffer_state(&setup->fb); + + for (i = 0; i < Elements(setup->fs.current_tex); i++) { + pipe_resource_reference(&setup->fs.current_tex[i], NULL); + } + + for (i = 0; i < Elements(setup->constants); i++) { + pipe_resource_reference(&setup->constants[i].current.buffer, NULL); + } + + /* free the scenes in the 'empty' queue */ + for (i = 0; i < Elements(setup->scenes); i++) { + struct lp_scene *scene = setup->scenes[i]; + + if (scene->fence) + lp_fence_wait(scene->fence); + + lp_scene_destroy(scene); + } + + lp_fence_reference(&setup->last_fence, NULL); + + FREE( setup ); +} + + +/** + * Create a new primitive tiling engine. Plug it into the backend of + * the draw module. Currently also creates a rasterizer to use with + * it. + */ +struct lp_setup_context * +lp_setup_create( struct pipe_context *pipe, + struct draw_context *draw ) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen); + struct lp_setup_context *setup; + unsigned i; + + setup = CALLOC_STRUCT(lp_setup_context); + if (!setup) { + goto no_setup; + } + + lp_setup_init_vbuf(setup); + + /* Used only in update_state(): + */ + setup->pipe = pipe; + + + setup->num_threads = screen->num_threads; + setup->vbuf = draw_vbuf_stage(draw, &setup->base); + if (!setup->vbuf) { + goto no_vbuf; + } + + draw_set_rasterize_stage(draw, setup->vbuf); + draw_set_render(draw, &setup->base); + + /* create some empty scenes */ + for (i = 0; i < MAX_SCENES; i++) { + setup->scenes[i] = lp_scene_create( pipe ); + if (!setup->scenes[i]) { + goto no_scenes; + } + } + + setup->triangle = first_triangle; + setup->line = first_line; + setup->point = first_point; + + setup->dirty = ~0; + + return setup; + +no_scenes: + for (i = 0; i < MAX_SCENES; i++) { + if (setup->scenes[i]) { + lp_scene_destroy(setup->scenes[i]); + } + } + + setup->vbuf->destroy(setup->vbuf); +no_vbuf: + FREE(setup); +no_setup: + return NULL; +} + + +/** + * Put a BeginQuery command into all bins. + */ +void +lp_setup_begin_query(struct lp_setup_context *setup, + struct llvmpipe_query *pq) +{ + + set_scene_state(setup, SETUP_ACTIVE, "begin_query"); + + if (!(pq->type == PIPE_QUERY_OCCLUSION_COUNTER || + pq->type == PIPE_QUERY_OCCLUSION_PREDICATE || + pq->type == PIPE_QUERY_PIPELINE_STATISTICS)) + return; + + /* init the query to its beginning state */ + assert(setup->active_binned_queries < LP_MAX_ACTIVE_BINNED_QUERIES); + /* exceeding list size so just ignore the query */ + if (setup->active_binned_queries >= LP_MAX_ACTIVE_BINNED_QUERIES) { + return; + } + assert(setup->active_queries[setup->active_binned_queries] == NULL); + setup->active_queries[setup->active_binned_queries] = pq; + setup->active_binned_queries++; + + assert(setup->scene); + if (setup->scene) { + if (!lp_scene_bin_everywhere(setup->scene, + LP_RAST_OP_BEGIN_QUERY, + lp_rast_arg_query(pq))) { + + if (!lp_setup_flush_and_restart(setup)) + return; + + if (!lp_scene_bin_everywhere(setup->scene, + LP_RAST_OP_BEGIN_QUERY, + lp_rast_arg_query(pq))) { + return; + } + } + setup->scene->had_queries |= TRUE; + } +} + + +/** + * Put an EndQuery command into all bins. + */ +void +lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq) +{ + set_scene_state(setup, SETUP_ACTIVE, "end_query"); + + assert(setup->scene); + if (setup->scene) { + /* pq->fence should be the fence of the *last* scene which + * contributed to the query result. + */ + lp_fence_reference(&pq->fence, setup->scene->fence); + + if (pq->type == PIPE_QUERY_OCCLUSION_COUNTER || + pq->type == PIPE_QUERY_OCCLUSION_PREDICATE || + pq->type == PIPE_QUERY_PIPELINE_STATISTICS || + pq->type == PIPE_QUERY_TIMESTAMP) { + if (pq->type == PIPE_QUERY_TIMESTAMP && + !(setup->scene->tiles_x | setup->scene->tiles_y)) { + /* + * If there's a zero width/height framebuffer, there's no bins and + * hence no rast task is ever run. So fill in something here instead. + */ + pq->end[0] = os_time_get_nano(); + } + + if (!lp_scene_bin_everywhere(setup->scene, + LP_RAST_OP_END_QUERY, + lp_rast_arg_query(pq))) { + if (!lp_setup_flush_and_restart(setup)) + goto fail; + + if (!lp_scene_bin_everywhere(setup->scene, + LP_RAST_OP_END_QUERY, + lp_rast_arg_query(pq))) { + goto fail; + } + } + setup->scene->had_queries |= TRUE; + } + } + else { + lp_fence_reference(&pq->fence, setup->last_fence); + } + +fail: + /* Need to do this now not earlier since it still needs to be marked as + * active when binning it would cause a flush. + */ + if (pq->type == PIPE_QUERY_OCCLUSION_COUNTER || + pq->type == PIPE_QUERY_OCCLUSION_PREDICATE || + pq->type == PIPE_QUERY_PIPELINE_STATISTICS) { + unsigned i; + + /* remove from active binned query list */ + for (i = 0; i < setup->active_binned_queries; i++) { + if (setup->active_queries[i] == pq) + break; + } + assert(i < setup->active_binned_queries); + if (i == setup->active_binned_queries) + return; + setup->active_binned_queries--; + setup->active_queries[i] = setup->active_queries[setup->active_binned_queries]; + setup->active_queries[setup->active_binned_queries] = NULL; + } +} + + +boolean +lp_setup_flush_and_restart(struct lp_setup_context *setup) +{ + if (0) debug_printf("%s\n", __FUNCTION__); + + assert(setup->state == SETUP_ACTIVE); + + if (!set_scene_state(setup, SETUP_FLUSHED, __FUNCTION__)) + return FALSE; + + if (!lp_setup_update_state(setup, TRUE)) + return FALSE; + + return TRUE; +} + + diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.h new file mode 100644 index 000000000..a42df2dc9 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup.h @@ -0,0 +1,168 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +#ifndef LP_SETUP_H +#define LP_SETUP_H + +#include "pipe/p_compiler.h" +#include "lp_jit.h" + +struct draw_context; +struct vertex_info; + + +struct pipe_resource; +struct pipe_query; +struct pipe_surface; +struct pipe_blend_color; +struct pipe_screen; +struct pipe_framebuffer_state; +struct lp_fragment_shader_variant; +struct lp_jit_context; +struct llvmpipe_query; +struct pipe_fence_handle; +struct lp_setup_variant; +struct lp_setup_context; + +void lp_setup_reset( struct lp_setup_context *setup ); + +struct lp_setup_context * +lp_setup_create( struct pipe_context *pipe, + struct draw_context *draw ); + +void +lp_setup_clear(struct lp_setup_context *setup, + const union pipe_color_union *clear_color, + double clear_depth, + unsigned clear_stencil, + unsigned flags); + + + +void +lp_setup_flush( struct lp_setup_context *setup, + struct pipe_fence_handle **fence, + const char *reason); + + +void +lp_setup_bind_framebuffer( struct lp_setup_context *setup, + const struct pipe_framebuffer_state *fb ); + +void +lp_setup_set_triangle_state( struct lp_setup_context *setup, + unsigned cullmode, + boolean front_is_ccw, + boolean scissor, + boolean half_pixel_center, + boolean bottom_edge_rule); + +void +lp_setup_set_line_state( struct lp_setup_context *setup, + float line_width); + +void +lp_setup_set_point_state( struct lp_setup_context *setup, + float point_size, + boolean point_size_per_vertex, + uint sprite_coord_enable, + uint sprite_coord_origin); + +void +lp_setup_set_setup_variant( struct lp_setup_context *setup, + const struct lp_setup_variant *variant ); + +void +lp_setup_set_fs_variant( struct lp_setup_context *setup, + struct lp_fragment_shader_variant *variant ); + +void +lp_setup_set_fs_constants(struct lp_setup_context *setup, + unsigned num, + struct pipe_constant_buffer *buffers); + +void +lp_setup_set_alpha_ref_value( struct lp_setup_context *setup, + float alpha_ref_value ); + +void +lp_setup_set_stencil_ref_values( struct lp_setup_context *setup, + const ubyte refs[2] ); + +void +lp_setup_set_blend_color( struct lp_setup_context *setup, + const struct pipe_blend_color *blend_color ); + +void +lp_setup_set_scissors( struct lp_setup_context *setup, + const struct pipe_scissor_state *scissors ); + +void +lp_setup_set_viewports(struct lp_setup_context *setup, + unsigned num_viewports, + const struct pipe_viewport_state *viewports); + +void +lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup, + unsigned num, + struct pipe_sampler_view **views); + +void +lp_setup_set_fragment_sampler_state(struct lp_setup_context *setup, + unsigned num, + struct pipe_sampler_state **samplers); + +unsigned +lp_setup_is_resource_referenced( const struct lp_setup_context *setup, + const struct pipe_resource *texture ); + +void +lp_setup_set_flatshade_first( struct lp_setup_context *setup, + boolean flatshade_first ); + +void +lp_setup_set_rasterizer_discard( struct lp_setup_context *setup, + boolean rasterizer_discard ); + +void +lp_setup_set_vertex_info( struct lp_setup_context *setup, + struct vertex_info *info ); + +void +lp_setup_begin_query(struct lp_setup_context *setup, + struct llvmpipe_query *pq); + +void +lp_setup_end_query(struct lp_setup_context *setup, + struct llvmpipe_query *pq); + +static inline unsigned +lp_clamp_viewport_idx(int idx) +{ + return (PIPE_MAX_VIEWPORTS > idx && idx >= 0) ? idx : 0; +} + +#endif diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h new file mode 100644 index 000000000..2410e2384 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -0,0 +1,208 @@ +/************************************************************************** + * + * Copyright 2007-2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * The setup code is concerned with point/line/triangle setup and + * putting commands/data into the bins. + */ + + +#ifndef LP_SETUP_CONTEXT_H +#define LP_SETUP_CONTEXT_H + +#include "lp_setup.h" +#include "lp_rast.h" +#include "lp_scene.h" +#include "lp_bld_interp.h" /* for struct lp_shader_input */ + +#include "draw/draw_vbuf.h" +#include "util/u_rect.h" +#include "util/u_pack_color.h" + +#define LP_SETUP_NEW_FS 0x01 +#define LP_SETUP_NEW_CONSTANTS 0x02 +#define LP_SETUP_NEW_BLEND_COLOR 0x04 +#define LP_SETUP_NEW_SCISSOR 0x08 +#define LP_SETUP_NEW_VIEWPORTS 0x10 + + +struct lp_setup_variant; + + +/** Max number of scenes */ +/* XXX: make multiple scenes per context work, see lp_setup_rasterize_scene */ +#define MAX_SCENES 1 + + + +/** + * Point/line/triangle setup context. + * Note: "stored" below indicates data which is stored in the bins, + * not arbitrary malloc'd memory. + * + * + * Subclass of vbuf_render, plugged directly into the draw module as + * the rendering backend. + */ +struct lp_setup_context +{ + struct vbuf_render base; + + struct pipe_context *pipe; + struct vertex_info *vertex_info; + uint prim; + uint vertex_size; + uint nr_vertices; + uint sprite_coord_enable, sprite_coord_origin; + uint vertex_buffer_size; + void *vertex_buffer; + + /* Final pipeline stage for draw module. Draw module should + * create/install this itself now. + */ + struct draw_stage *vbuf; + unsigned num_threads; + unsigned scene_idx; + struct lp_scene *scenes[MAX_SCENES]; /**< all the scenes */ + struct lp_scene *scene; /**< current scene being built */ + + struct lp_fence *last_fence; + struct llvmpipe_query *active_queries[LP_MAX_ACTIVE_BINNED_QUERIES]; + unsigned active_binned_queries; + + boolean flatshade_first; + boolean ccw_is_frontface; + boolean scissor_test; + boolean point_size_per_vertex; + boolean rasterizer_discard; + unsigned cullmode; + unsigned bottom_edge_rule; + float pixel_offset; + float line_width; + float point_size; + float psize; + unsigned viewport_index_slot; + unsigned layer_slot; + int face_slot; + + struct pipe_framebuffer_state fb; + struct u_rect framebuffer; + struct u_rect scissors[PIPE_MAX_VIEWPORTS]; + struct u_rect draw_regions[PIPE_MAX_VIEWPORTS]; /* intersection of fb & scissor */ + struct lp_jit_viewport viewports[PIPE_MAX_VIEWPORTS]; + + struct { + unsigned flags; + union util_color color_val[PIPE_MAX_COLOR_BUFS]; + uint64_t zsmask; + uint64_t zsvalue; /**< lp_rast_clear_zstencil() cmd */ + } clear; + + enum setup_state { + SETUP_FLUSHED, /**< scene is null */ + SETUP_CLEARED, /**< scene exists but has only clears */ + SETUP_ACTIVE /**< scene exists and has at least one draw/query */ + } state; + + struct { + const struct lp_rast_state *stored; /**< what's in the scene */ + struct lp_rast_state current; /**< currently set state */ + struct pipe_resource *current_tex[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + } fs; + + /** fragment shader constants */ + struct { + struct pipe_constant_buffer current; + unsigned stored_size; + const void *stored_data; + } constants[LP_MAX_TGSI_CONST_BUFFERS]; + + struct { + struct pipe_blend_color current; + uint8_t *stored; + } blend_color; + + + struct { + const struct lp_setup_variant *variant; + } setup; + + unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */ + + void (*point)( struct lp_setup_context *, + const float (*v0)[4]); + + void (*line)( struct lp_setup_context *, + const float (*v0)[4], + const float (*v1)[4]); + + void (*triangle)( struct lp_setup_context *, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]); +}; + +void lp_setup_choose_triangle( struct lp_setup_context *setup ); +void lp_setup_choose_line( struct lp_setup_context *setup ); +void lp_setup_choose_point( struct lp_setup_context *setup ); + +void lp_setup_init_vbuf(struct lp_setup_context *setup); + +boolean lp_setup_update_state( struct lp_setup_context *setup, + boolean update_scene); + +void lp_setup_destroy( struct lp_setup_context *setup ); + +boolean lp_setup_flush_and_restart(struct lp_setup_context *setup); + +void +lp_setup_print_triangle(struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]); + +void +lp_setup_print_vertex(struct lp_setup_context *setup, + const char *name, + const float (*v)[4]); + + +struct lp_rast_triangle * +lp_setup_alloc_triangle(struct lp_scene *scene, + unsigned num_inputs, + unsigned nr_planes, + unsigned *tri_size); + +boolean +lp_setup_bin_triangle( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + const struct u_rect *bbox, + int nr_planes, + unsigned scissor_index ); + +#endif diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c new file mode 100644 index 000000000..a190254d9 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -0,0 +1,748 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Binning code for lines + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "lp_perf.h" +#include "lp_setup_context.h" +#include "lp_rast.h" +#include "lp_state_fs.h" +#include "lp_state_setup.h" +#include "lp_context.h" +#include "draw/draw_context.h" + +#define NUM_CHANNELS 4 + +struct lp_line_info { + + float dx; + float dy; + float oneoverarea; + boolean frontfacing; + + const float (*v1)[4]; + const float (*v2)[4]; + + float (*a0)[4]; + float (*dadx)[4]; + float (*dady)[4]; +}; + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + */ +static void constant_coef( struct lp_setup_context *setup, + struct lp_line_info *info, + unsigned slot, + const float value, + unsigned i ) +{ + info->a0[slot][i] = value; + info->dadx[slot][i] = 0.0f; + info->dady[slot][i] = 0.0f; +} + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static void linear_coef( struct lp_setup_context *setup, + struct lp_line_info *info, + unsigned slot, + unsigned vert_attr, + unsigned i) +{ + float a1 = info->v1[vert_attr][i]; + float a2 = info->v2[vert_attr][i]; + + float da21 = a1 - a2; + float dadx = da21 * info->dx * info->oneoverarea; + float dady = da21 * info->dy * info->oneoverarea; + + info->dadx[slot][i] = dadx; + info->dady[slot][i] = dady; + + info->a0[slot][i] = (a1 - + (dadx * (info->v1[0][0] - setup->pixel_offset) + + dady * (info->v1[0][1] - setup->pixel_offset))); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void perspective_coef( struct lp_setup_context *setup, + struct lp_line_info *info, + unsigned slot, + unsigned vert_attr, + unsigned i) +{ + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + float a1 = info->v1[vert_attr][i] * info->v1[0][3]; + float a2 = info->v2[vert_attr][i] * info->v2[0][3]; + + float da21 = a1 - a2; + float dadx = da21 * info->dx * info->oneoverarea; + float dady = da21 * info->dy * info->oneoverarea; + + info->dadx[slot][i] = dadx; + info->dady[slot][i] = dady; + + info->a0[slot][i] = (a1 - + (dadx * (info->v1[0][0] - setup->pixel_offset) + + dady * (info->v1[0][1] - setup->pixel_offset))); +} + +static void +setup_fragcoord_coef( struct lp_setup_context *setup, + struct lp_line_info *info, + unsigned slot, + unsigned usage_mask) +{ + /*X*/ + if (usage_mask & TGSI_WRITEMASK_X) { + info->a0[slot][0] = 0.0; + info->dadx[slot][0] = 1.0; + info->dady[slot][0] = 0.0; + } + + /*Y*/ + if (usage_mask & TGSI_WRITEMASK_Y) { + info->a0[slot][1] = 0.0; + info->dadx[slot][1] = 0.0; + info->dady[slot][1] = 1.0; + } + + /*Z*/ + if (usage_mask & TGSI_WRITEMASK_Z) { + linear_coef(setup, info, slot, 0, 2); + } + + /*W*/ + if (usage_mask & TGSI_WRITEMASK_W) { + linear_coef(setup, info, slot, 0, 3); + } +} + +/** + * Compute the tri->coef[] array dadx, dady, a0 values. + */ +static void setup_line_coefficients( struct lp_setup_context *setup, + struct lp_line_info *info) +{ + const struct lp_setup_variant_key *key = &setup->setup.variant->key; + unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; + unsigned slot; + + /* setup interpolation for all the remaining attributes: + */ + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned vert_attr = key->inputs[slot].src_index; + unsigned usage_mask = key->inputs[slot].usage_mask; + unsigned i; + + switch (key->inputs[slot].interp) { + case LP_INTERP_CONSTANT: + if (key->flatshade_first) { + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + constant_coef(setup, info, slot+1, info->v1[vert_attr][i], i); + } + else { + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + constant_coef(setup, info, slot+1, info->v2[vert_attr][i], i); + } + break; + + case LP_INTERP_LINEAR: + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + linear_coef(setup, info, slot+1, vert_attr, i); + break; + + case LP_INTERP_PERSPECTIVE: + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + perspective_coef(setup, info, slot+1, vert_attr, i); + fragcoord_usage_mask |= TGSI_WRITEMASK_W; + break; + + case LP_INTERP_POSITION: + /* + * The generated pixel interpolators will pick up the coeffs from + * slot 0, so all need to ensure that the usage mask is covers all + * usages. + */ + fragcoord_usage_mask |= usage_mask; + break; + + case LP_INTERP_FACING: + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + constant_coef(setup, info, slot+1, + info->frontfacing ? 1.0f : -1.0f, i); + break; + + default: + assert(0); + } + } + + /* The internal position input is in slot zero: + */ + setup_fragcoord_coef(setup, info, 0, + fragcoord_usage_mask); +} + + + +static inline int subpixel_snap( float a ) +{ + return util_iround(FIXED_ONE * a); +} + + +/** + * Print line vertex attribs (for debug). + */ +static void +print_line(struct lp_setup_context *setup, + const float (*v1)[4], + const float (*v2)[4]) +{ + const struct lp_setup_variant_key *key = &setup->setup.variant->key; + uint i; + + debug_printf("llvmpipe line\n"); + for (i = 0; i < 1 + key->num_inputs; i++) { + debug_printf(" v1[%d]: %f %f %f %f\n", i, + v1[i][0], v1[i][1], v1[i][2], v1[i][3]); + } + for (i = 0; i < 1 + key->num_inputs; i++) { + debug_printf(" v2[%d]: %f %f %f %f\n", i, + v2[i][0], v2[i][1], v2[i][2], v2[i][3]); + } +} + + +static inline boolean sign(float x){ + return x >= 0; +} + + +/* Used on positive floats only: + */ +static inline float fracf(float f) +{ + return f - floorf(f); +} + + + +static boolean +try_setup_line( struct lp_setup_context *setup, + const float (*v1)[4], + const float (*v2)[4]) +{ + struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe; + struct lp_scene *scene = setup->scene; + const struct lp_setup_variant_key *key = &setup->setup.variant->key; + struct lp_rast_triangle *line; + struct lp_rast_plane *plane; + struct lp_line_info info; + float width = MAX2(1.0, setup->line_width); + struct u_rect bbox; + unsigned tri_bytes; + int x[4]; + int y[4]; + int i; + int nr_planes = 4; + unsigned viewport_index = 0; + unsigned layer = 0; + + /* linewidth should be interpreted as integer */ + int fixed_width = util_iround(width) * FIXED_ONE; + + float x_offset=0; + float y_offset=0; + float x_offset_end=0; + float y_offset_end=0; + + float x1diff; + float y1diff; + float x2diff; + float y2diff; + float dx, dy; + float area; + + boolean draw_start; + boolean draw_end; + boolean will_draw_start; + boolean will_draw_end; + + if (0) + print_line(setup, v1, v2); + + if (setup->scissor_test) { + nr_planes = 8; + if (setup->viewport_index_slot > 0) { + unsigned *udata = (unsigned*)v1[setup->viewport_index_slot]; + viewport_index = lp_clamp_viewport_idx(*udata); + } + } + else { + nr_planes = 4; + } + + if (setup->layer_slot > 0) { + layer = *(unsigned*)v1[setup->layer_slot]; + layer = MIN2(layer, scene->fb_max_layer); + } + + dx = v1[0][0] - v2[0][0]; + dy = v1[0][1] - v2[0][1]; + area = (dx * dx + dy * dy); + if (area == 0) { + LP_COUNT(nr_culled_tris); + return TRUE; + } + + info.oneoverarea = 1.0f / area; + info.dx = dx; + info.dy = dy; + info.v1 = v1; + info.v2 = v2; + + + /* X-MAJOR LINE */ + if (fabsf(dx) >= fabsf(dy)) { + float dydx = dy / dx; + + x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5; + y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5; + x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5; + y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5; + + if (y2diff==-0.5 && dy<0){ + y2diff = 0.5; + } + + /* + * Diamond exit rule test for starting point + */ + if (fabsf(x1diff) + fabsf(y1diff) < 0.5) { + draw_start = TRUE; + } + else if (sign(x1diff) == sign(-dx)) { + draw_start = FALSE; + } + else if (sign(-y1diff) != sign(dy)) { + draw_start = TRUE; + } + else { + /* do intersection test */ + float yintersect = fracf(v1[0][1]) + x1diff * dydx; + draw_start = (yintersect < 1.0 && yintersect > 0.0); + } + + + /* + * Diamond exit rule test for ending point + */ + if (fabsf(x2diff) + fabsf(y2diff) < 0.5) { + draw_end = FALSE; + } + else if (sign(x2diff) != sign(-dx)) { + draw_end = FALSE; + } + else if (sign(-y2diff) == sign(dy)) { + draw_end = TRUE; + } + else { + /* do intersection test */ + float yintersect = fracf(v2[0][1]) + x2diff * dydx; + draw_end = (yintersect < 1.0 && yintersect > 0.0); + } + + /* Are we already drawing start/end? + */ + will_draw_start = sign(-x1diff) != sign(dx); + will_draw_end = (sign(x2diff) == sign(-dx)) || x2diff==0; + + if (dx < 0) { + /* if v2 is to the right of v1, swap pointers */ + const float (*temp)[4] = v1; + v1 = v2; + v2 = temp; + dx = -dx; + dy = -dy; + /* Otherwise shift planes appropriately */ + if (will_draw_start != draw_start) { + x_offset_end = - x1diff - 0.5; + y_offset_end = x_offset_end * dydx; + + } + if (will_draw_end != draw_end) { + x_offset = - x2diff - 0.5; + y_offset = x_offset * dydx; + } + + } + else{ + /* Otherwise shift planes appropriately */ + if (will_draw_start != draw_start) { + x_offset = - x1diff + 0.5; + y_offset = x_offset * dydx; + } + if (will_draw_end != draw_end) { + x_offset_end = - x2diff + 0.5; + y_offset_end = x_offset_end * dydx; + } + } + + /* x/y positions in fixed point */ + x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset); + x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset); + x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset); + x[3] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset); + + y[0] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset) - fixed_width/2; + y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) - fixed_width/2; + y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) + fixed_width/2; + y[3] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset) + fixed_width/2; + + } + else { + const float dxdy = dx / dy; + + /* Y-MAJOR LINE */ + x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5; + y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5; + x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5; + y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5; + + if (x2diff==-0.5 && dx<0) { + x2diff = 0.5; + } + + /* + * Diamond exit rule test for starting point + */ + if (fabsf(x1diff) + fabsf(y1diff) < 0.5) { + draw_start = TRUE; + } + else if (sign(-y1diff) == sign(dy)) { + draw_start = FALSE; + } + else if (sign(x1diff) != sign(-dx)) { + draw_start = TRUE; + } + else { + /* do intersection test */ + float xintersect = fracf(v1[0][0]) + y1diff * dxdy; + draw_start = (xintersect < 1.0 && xintersect > 0.0); + } + + /* + * Diamond exit rule test for ending point + */ + if (fabsf(x2diff) + fabsf(y2diff) < 0.5) { + draw_end = FALSE; + } + else if (sign(-y2diff) != sign(dy) ) { + draw_end = FALSE; + } + else if (sign(x2diff) == sign(-dx) ) { + draw_end = TRUE; + } + else { + /* do intersection test */ + float xintersect = fracf(v2[0][0]) + y2diff * dxdy; + draw_end = (xintersect < 1.0 && xintersect >= 0.0); + } + + /* Are we already drawing start/end? + */ + will_draw_start = sign(y1diff) == sign(dy); + will_draw_end = (sign(-y2diff) == sign(dy)) || y2diff==0; + + if (dy > 0) { + /* if v2 is on top of v1, swap pointers */ + const float (*temp)[4] = v1; + v1 = v2; + v2 = temp; + dx = -dx; + dy = -dy; + + /* Otherwise shift planes appropriately */ + if (will_draw_start != draw_start) { + y_offset_end = - y1diff + 0.5; + x_offset_end = y_offset_end * dxdy; + } + if (will_draw_end != draw_end) { + y_offset = - y2diff + 0.5; + x_offset = y_offset * dxdy; + } + } + else { + /* Otherwise shift planes appropriately */ + if (will_draw_start != draw_start) { + y_offset = - y1diff - 0.5; + x_offset = y_offset * dxdy; + + } + if (will_draw_end != draw_end) { + y_offset_end = - y2diff - 0.5; + x_offset_end = y_offset_end * dxdy; + } + } + + /* x/y positions in fixed point */ + x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) - fixed_width/2; + x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - fixed_width/2; + x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) + fixed_width/2; + x[3] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) + fixed_width/2; + + y[0] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset); + y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset); + y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset); + y[3] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset); + } + + /* Bounding rectangle (in pixels) */ + { + /* Yes this is necessary to accurately calculate bounding boxes + * with the two fill-conventions we support. GL (normally) ends + * up needing a bottom-left fill convention, which requires + * slightly different rounding. + */ + int adj = (setup->bottom_edge_rule != 0) ? 1 : 0; + + bbox.x0 = (MIN4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.x1 = (MAX4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.y0 = (MIN4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + bbox.y1 = (MAX4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + + /* Inclusive coordinates: + */ + bbox.x1--; + bbox.y1--; + } + + if (bbox.x1 < bbox.x0 || + bbox.y1 < bbox.y0) { + if (0) debug_printf("empty bounding box\n"); + LP_COUNT(nr_culled_tris); + return TRUE; + } + + if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) { + if (0) debug_printf("offscreen\n"); + LP_COUNT(nr_culled_tris); + return TRUE; + } + + /* Can safely discard negative regions: + */ + bbox.x0 = MAX2(bbox.x0, 0); + bbox.y0 = MAX2(bbox.y0, 0); + + line = lp_setup_alloc_triangle(scene, + key->num_inputs, + nr_planes, + &tri_bytes); + if (!line) + return FALSE; + +#ifdef DEBUG + line->v[0][0] = v1[0][0]; + line->v[1][0] = v2[0][0]; + line->v[0][1] = v1[0][1]; + line->v[1][1] = v2[0][1]; +#endif + + LP_COUNT(nr_tris); + + if (lp_context->active_statistics_queries && + !llvmpipe_rasterization_disabled(lp_context)) { + lp_context->pipeline_statistics.c_primitives++; + } + + /* calculate the deltas */ + plane = GET_PLANES(line); + plane[0].dcdy = x[0] - x[1]; + plane[1].dcdy = x[1] - x[2]; + plane[2].dcdy = x[2] - x[3]; + plane[3].dcdy = x[3] - x[0]; + + plane[0].dcdx = y[0] - y[1]; + plane[1].dcdx = y[1] - y[2]; + plane[2].dcdx = y[2] - y[3]; + plane[3].dcdx = y[3] - y[0]; + + if (draw_will_inject_frontface(lp_context->draw) && + setup->face_slot > 0) { + line->inputs.frontfacing = v1[setup->face_slot][0]; + } else { + line->inputs.frontfacing = TRUE; + } + + /* Setup parameter interpolants: + */ + info.a0 = GET_A0(&line->inputs); + info.dadx = GET_DADX(&line->inputs); + info.dady = GET_DADY(&line->inputs); + info.frontfacing = line->inputs.frontfacing; + setup_line_coefficients(setup, &info); + + line->inputs.disable = FALSE; + line->inputs.opaque = FALSE; + line->inputs.layer = layer; + line->inputs.viewport_index = viewport_index; + + for (i = 0; i < 4; i++) { + + /* half-edge constants, will be interated over the whole render + * target. + */ + plane[i].c = IMUL64(plane[i].dcdx, x[i]) - IMUL64(plane[i].dcdy, y[i]); + + + /* correct for top-left vs. bottom-left fill convention. + */ + if (plane[i].dcdx < 0) { + /* both fill conventions want this - adjust for left edges */ + plane[i].c++; + } + else if (plane[i].dcdx == 0) { + if (setup->pixel_offset == 0) { + /* correct for top-left fill convention: + */ + if (plane[i].dcdy > 0) plane[i].c++; + } + else { + /* correct for bottom-left fill convention: + */ + if (plane[i].dcdy < 0) plane[i].c++; + } + } + + plane[i].dcdx *= FIXED_ONE; + plane[i].dcdy *= FIXED_ONE; + + /* find trivial reject offsets for each edge for a single-pixel + * sized block. These will be scaled up at each recursive level to + * match the active blocksize. Scaling in this way works best if + * the blocks are square. + */ + plane[i].eo = 0; + if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; + if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; + } + + + /* + * When rasterizing scissored tris, use the intersection of the + * triangle bounding box and the scissor rect to generate the + * scissor planes. + * + * This permits us to cut off the triangle "tails" that are present + * in the intermediate recursive levels caused when two of the + * triangles edges don't diverge quickly enough to trivially reject + * exterior blocks from the triangle. + * + * It's not really clear if it's worth worrying about these tails, + * but since we generate the planes for each scissored tri, it's + * free to trim them in this case. + * + * Note that otherwise, the scissor planes only vary in 'C' value, + * and even then only on state-changes. Could alternatively store + * these planes elsewhere. + */ + if (nr_planes == 8) { + const struct u_rect *scissor = + &setup->scissors[viewport_index]; + + plane[4].dcdx = -1; + plane[4].dcdy = 0; + plane[4].c = 1-scissor->x0; + plane[4].eo = 1; + + plane[5].dcdx = 1; + plane[5].dcdy = 0; + plane[5].c = scissor->x1+1; + plane[5].eo = 0; + + plane[6].dcdx = 0; + plane[6].dcdy = 1; + plane[6].c = 1-scissor->y0; + plane[6].eo = 1; + + plane[7].dcdx = 0; + plane[7].dcdy = -1; + plane[7].c = scissor->y1+1; + plane[7].eo = 0; + } + + return lp_setup_bin_triangle(setup, line, &bbox, nr_planes, viewport_index); +} + + +static void lp_setup_line( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4] ) +{ + if (!try_setup_line( setup, v0, v1 )) + { + if (!lp_setup_flush_and_restart(setup)) + return; + + if (!try_setup_line( setup, v0, v1 )) + return; + } +} + + +void lp_setup_choose_line( struct lp_setup_context *setup ) +{ + setup->line = lp_setup_line; +} + + diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c new file mode 100644 index 000000000..75544b524 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -0,0 +1,541 @@ +/************************************************************************** + * + * Copyright 2010, VMware Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Binning code for points + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "lp_setup_context.h" +#include "lp_perf.h" +#include "lp_rast.h" +#include "lp_state_fs.h" +#include "lp_state_setup.h" +#include "lp_context.h" +#include "tgsi/tgsi_scan.h" +#include "draw/draw_context.h" + +#define NUM_CHANNELS 4 + +struct point_info { + /* x,y deltas */ + int dy01, dy12; + int dx01, dx12; + + const float (*v0)[4]; + + float (*a0)[4]; + float (*dadx)[4]; + float (*dady)[4]; + + boolean frontfacing; +}; + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + */ +static void +constant_coef(struct lp_setup_context *setup, + struct point_info *info, + unsigned slot, + const float value, + unsigned i) +{ + info->a0[slot][i] = value; + info->dadx[slot][i] = 0.0f; + info->dady[slot][i] = 0.0f; +} + + +static void +point_persp_coeff(struct lp_setup_context *setup, + const struct point_info *info, + unsigned slot, + unsigned i) +{ + /* + * Fragment shader expects pre-multiplied w for LP_INTERP_PERSPECTIVE. A + * better stratergy would be to take the primitive in consideration when + * generating the fragment shader key, and therefore avoid the per-fragment + * perspective divide. + */ + + float w0 = info->v0[0][3]; + + assert(i < 4); + + info->a0[slot][i] = info->v0[slot][i]*w0; + info->dadx[slot][i] = 0.0f; + info->dady[slot][i] = 0.0f; +} + + +/** + * Setup automatic texcoord coefficients (for sprite rendering). + * \param slot the vertex attribute slot to setup + * \param i the attribute channel in [0,3] + * \param sprite_coord_origin one of PIPE_SPRITE_COORD_x + * \param perspective does the shader expects pre-multiplied w, i.e., + * LP_INTERP_PERSPECTIVE is specified in the shader key + */ +static void +texcoord_coef(struct lp_setup_context *setup, + const struct point_info *info, + unsigned slot, + unsigned i, + unsigned sprite_coord_origin, + boolean perspective) +{ + float w0 = info->v0[0][3]; + + assert(i < 4); + + if (i == 0) { + float dadx = FIXED_ONE / (float)info->dx12; + float dady = 0.0f; + float x0 = info->v0[0][0] - setup->pixel_offset; + float y0 = info->v0[0][1] - setup->pixel_offset; + + info->dadx[slot][0] = dadx; + info->dady[slot][0] = dady; + info->a0[slot][0] = 0.5 - (dadx * x0 + dady * y0); + + if (perspective) { + info->dadx[slot][0] *= w0; + info->dady[slot][0] *= w0; + info->a0[slot][0] *= w0; + } + } + else if (i == 1) { + float dadx = 0.0f; + float dady = FIXED_ONE / (float)info->dx12; + float x0 = info->v0[0][0] - setup->pixel_offset; + float y0 = info->v0[0][1] - setup->pixel_offset; + + if (sprite_coord_origin == PIPE_SPRITE_COORD_LOWER_LEFT) { + dady = -dady; + } + + info->dadx[slot][1] = dadx; + info->dady[slot][1] = dady; + info->a0[slot][1] = 0.5 - (dadx * x0 + dady * y0); + + if (perspective) { + info->dadx[slot][1] *= w0; + info->dady[slot][1] *= w0; + info->a0[slot][1] *= w0; + } + } + else if (i == 2) { + info->a0[slot][2] = 0.0f; + info->dadx[slot][2] = 0.0f; + info->dady[slot][2] = 0.0f; + } + else { + info->a0[slot][3] = perspective ? w0 : 1.0f; + info->dadx[slot][3] = 0.0f; + info->dady[slot][3] = 0.0f; + } +} + + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial + * Z and W are copied from position_coef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_point_fragcoord_coef(struct lp_setup_context *setup, + struct point_info *info, + unsigned slot, + unsigned usage_mask) +{ + /*X*/ + if (usage_mask & TGSI_WRITEMASK_X) { + info->a0[slot][0] = 0.0; + info->dadx[slot][0] = 1.0; + info->dady[slot][0] = 0.0; + } + + /*Y*/ + if (usage_mask & TGSI_WRITEMASK_Y) { + info->a0[slot][1] = 0.0; + info->dadx[slot][1] = 0.0; + info->dady[slot][1] = 1.0; + } + + /*Z*/ + if (usage_mask & TGSI_WRITEMASK_Z) { + constant_coef(setup, info, slot, info->v0[0][2], 2); + } + + /*W*/ + if (usage_mask & TGSI_WRITEMASK_W) { + constant_coef(setup, info, slot, info->v0[0][3], 3); + } +} + + +/** + * Compute the point->coef[] array dadx, dady, a0 values. + */ +static void +setup_point_coefficients( struct lp_setup_context *setup, + struct point_info *info) +{ + const struct lp_setup_variant_key *key = &setup->setup.variant->key; + const struct lp_fragment_shader *shader = setup->fs.current.variant->shader; + unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; + unsigned slot; + + /* setup interpolation for all the remaining attributes: + */ + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned vert_attr = key->inputs[slot].src_index; + unsigned usage_mask = key->inputs[slot].usage_mask; + enum lp_interp interp = key->inputs[slot].interp; + boolean perspective = !!(interp == LP_INTERP_PERSPECTIVE); + unsigned i; + + if (perspective & usage_mask) { + fragcoord_usage_mask |= TGSI_WRITEMASK_W; + } + + switch (interp) { + case LP_INTERP_POSITION: + /* + * The generated pixel interpolators will pick up the coeffs from + * slot 0, so all need to ensure that the usage mask is covers all + * usages. + */ + fragcoord_usage_mask |= usage_mask; + break; + + case LP_INTERP_LINEAR: + /* Sprite tex coords may use linear interpolation someday */ + /* fall-through */ + case LP_INTERP_PERSPECTIVE: + /* check if the sprite coord flag is set for this attribute. + * If so, set it up so it up so x and y vary from 0 to 1. + */ + if (shader->info.base.input_semantic_name[slot] == TGSI_SEMANTIC_GENERIC) { + unsigned semantic_index = shader->info.base.input_semantic_index[slot]; + /* Note that sprite_coord enable is a bitfield of + * PIPE_MAX_SHADER_OUTPUTS bits. + */ + if (semantic_index < PIPE_MAX_SHADER_OUTPUTS && + (setup->sprite_coord_enable & (1 << semantic_index))) { + for (i = 0; i < NUM_CHANNELS; i++) { + if (usage_mask & (1 << i)) { + texcoord_coef(setup, info, slot + 1, i, + setup->sprite_coord_origin, + perspective); + } + } + break; + } + } + /* fall-through */ + case LP_INTERP_CONSTANT: + for (i = 0; i < NUM_CHANNELS; i++) { + if (usage_mask & (1 << i)) { + if (perspective) { + point_persp_coeff(setup, info, slot+1, i); + } + else { + constant_coef(setup, info, slot+1, info->v0[vert_attr][i], i); + } + } + } + break; + + case LP_INTERP_FACING: + for (i = 0; i < NUM_CHANNELS; i++) + if (usage_mask & (1 << i)) + constant_coef(setup, info, slot+1, + info->frontfacing ? 1.0f : -1.0f, i); + break; + + default: + assert(0); + break; + } + } + + /* The internal position input is in slot zero: + */ + setup_point_fragcoord_coef(setup, info, 0, + fragcoord_usage_mask); +} + + +static inline int +subpixel_snap(float a) +{ + return util_iround(FIXED_ONE * a); +} + +/** + * Print point vertex attribs (for debug). + */ +static void +print_point(struct lp_setup_context *setup, + const float (*v0)[4], + const float size) +{ + const struct lp_setup_variant_key *key = &setup->setup.variant->key; + uint i; + + debug_printf("llvmpipe point, width %f\n", size); + for (i = 0; i < 1 + key->num_inputs; i++) { + debug_printf(" v0[%d]: %f %f %f %f\n", i, + v0[i][0], v0[i][1], v0[i][2], v0[i][3]); + } +} + + +static boolean +try_setup_point( struct lp_setup_context *setup, + const float (*v0)[4] ) +{ + struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe; + /* x/y positions in fixed point */ + const struct lp_setup_variant_key *key = &setup->setup.variant->key; + const int sizeAttr = setup->psize; + const float size + = (setup->point_size_per_vertex && sizeAttr > 0) ? v0[sizeAttr][0] + : setup->point_size; + + /* Yes this is necessary to accurately calculate bounding boxes + * with the two fill-conventions we support. GL (normally) ends + * up needing a bottom-left fill convention, which requires + * slightly different rounding. + */ + int adj = (setup->bottom_edge_rule != 0) ? 1 : 0; + + struct lp_scene *scene = setup->scene; + struct lp_rast_triangle *point; + unsigned bytes; + struct u_rect bbox; + unsigned nr_planes = 4; + struct point_info info; + unsigned viewport_index = 0; + unsigned layer = 0; + int fixed_width; + + if (setup->viewport_index_slot > 0) { + unsigned *udata = (unsigned*)v0[setup->viewport_index_slot]; + viewport_index = lp_clamp_viewport_idx(*udata); + } + if (setup->layer_slot > 0) { + layer = *(unsigned*)v0[setup->layer_slot]; + layer = MIN2(layer, scene->fb_max_layer); + } + + if (0) + print_point(setup, v0, size); + + /* Bounding rectangle (in pixels) */ + if (!lp_context->rasterizer || + lp_context->rasterizer->point_quad_rasterization) { + /* + * Rasterize points as quads. + */ + int x0, y0; + /* Point size as fixed point integer, remove rounding errors + * and gives minimum width for very small points. + */ + fixed_width = MAX2(FIXED_ONE, subpixel_snap(size)); + + x0 = subpixel_snap(v0[0][0] - setup->pixel_offset) - fixed_width/2; + y0 = subpixel_snap(v0[0][1] - setup->pixel_offset) - fixed_width/2; + + bbox.x0 = (x0 + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.x1 = (x0 + fixed_width + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.y0 = (y0 + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + bbox.y1 = (y0 + fixed_width + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + + /* Inclusive coordinates: + */ + bbox.x1--; + bbox.y1--; + } else { + /* + * OpenGL legacy rasterization rules for non-sprite points. + * + * Per OpenGL 2.1 spec, section 3.3.1, "Basic Point Rasterization". + * + * This type of point rasterization is only available in pre 3.0 contexts + * (or compatibilility contexts which we don't support) anyway. + */ + + const int x0 = subpixel_snap(v0[0][0]); + const int y0 = subpixel_snap(v0[0][1]) - adj; + + int int_width; + /* Point size as fixed point integer. For GL legacy points + * the point size is always a whole integer. + */ + fixed_width = MAX2(FIXED_ONE, + (subpixel_snap(size) + FIXED_ONE/2 - 1) & ~(FIXED_ONE-1)); + int_width = fixed_width >> FIXED_ORDER; + + assert(setup->pixel_offset != 0); + + if (int_width == 1) { + bbox.x0 = x0 >> FIXED_ORDER; + bbox.y0 = y0 >> FIXED_ORDER; + bbox.x1 = bbox.x0; + bbox.y1 = bbox.y0; + } else { + if (int_width & 1) { + /* Odd width */ + bbox.x0 = (x0 >> FIXED_ORDER) - (int_width - 1)/2; + bbox.y0 = (y0 >> FIXED_ORDER) - (int_width - 1)/2; + } else { + /* Even width */ + bbox.x0 = ((x0 + FIXED_ONE/2) >> FIXED_ORDER) - int_width/2; + bbox.y0 = ((y0 + FIXED_ONE/2) >> FIXED_ORDER) - int_width/2; + } + + bbox.x1 = bbox.x0 + int_width - 1; + bbox.y1 = bbox.y0 + int_width - 1; + } + } + + if (0) { + debug_printf(" bbox: (%i, %i) - (%i, %i)\n", + bbox.x0, bbox.y0, + bbox.x1, bbox.y1); + } + + if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) { + if (0) debug_printf("offscreen\n"); + LP_COUNT(nr_culled_tris); + return TRUE; + } + + u_rect_find_intersection(&setup->draw_regions[viewport_index], &bbox); + + point = lp_setup_alloc_triangle(scene, + key->num_inputs, + nr_planes, + &bytes); + if (!point) + return FALSE; + +#ifdef DEBUG + point->v[0][0] = v0[0][0]; + point->v[0][1] = v0[0][1]; +#endif + + LP_COUNT(nr_tris); + + if (lp_context->active_statistics_queries && + !llvmpipe_rasterization_disabled(lp_context)) { + lp_context->pipeline_statistics.c_primitives++; + } + + if (draw_will_inject_frontface(lp_context->draw) && + setup->face_slot > 0) { + point->inputs.frontfacing = v0[setup->face_slot][0]; + } else { + point->inputs.frontfacing = TRUE; + } + + info.v0 = v0; + info.dx01 = 0; + info.dx12 = fixed_width; + info.dy01 = fixed_width; + info.dy12 = 0; + info.a0 = GET_A0(&point->inputs); + info.dadx = GET_DADX(&point->inputs); + info.dady = GET_DADY(&point->inputs); + info.frontfacing = point->inputs.frontfacing; + + /* Setup parameter interpolants: + */ + setup_point_coefficients(setup, &info); + + point->inputs.disable = FALSE; + point->inputs.opaque = FALSE; + point->inputs.layer = layer; + point->inputs.viewport_index = viewport_index; + + { + struct lp_rast_plane *plane = GET_PLANES(point); + + plane[0].dcdx = -1; + plane[0].dcdy = 0; + plane[0].c = 1-bbox.x0; + plane[0].eo = 1; + + plane[1].dcdx = 1; + plane[1].dcdy = 0; + plane[1].c = bbox.x1+1; + plane[1].eo = 0; + + plane[2].dcdx = 0; + plane[2].dcdy = 1; + plane[2].c = 1-bbox.y0; + plane[2].eo = 1; + + plane[3].dcdx = 0; + plane[3].dcdy = -1; + plane[3].c = bbox.y1+1; + plane[3].eo = 0; + } + + return lp_setup_bin_triangle(setup, point, &bbox, nr_planes, viewport_index); +} + + +static void +lp_setup_point(struct lp_setup_context *setup, + const float (*v0)[4]) +{ + if (!try_setup_point( setup, v0 )) + { + if (!lp_setup_flush_and_restart(setup)) + return; + + if (!try_setup_point( setup, v0 )) + return; + } +} + + +void +lp_setup_choose_point( struct lp_setup_context *setup ) +{ + setup->point = lp_setup_point; +} + + diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c new file mode 100644 index 000000000..98a9d4bc2 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -0,0 +1,1027 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Binning code for triangles + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_rect.h" +#include "util/u_sse.h" +#include "lp_perf.h" +#include "lp_setup_context.h" +#include "lp_rast.h" +#include "lp_state_fs.h" +#include "lp_state_setup.h" +#include "lp_context.h" + +#include <inttypes.h> + +#define NUM_CHANNELS 4 + +#if defined(PIPE_ARCH_SSE) +#include <emmintrin.h> +#endif + +static inline int +subpixel_snap(float a) +{ + return util_iround(FIXED_ONE * a); +} + +static inline float +fixed_to_float(int a) +{ + return a * (1.0f / FIXED_ONE); +} + + +/* Position and area in fixed point coordinates */ +struct fixed_position { + int32_t x[4]; + int32_t y[4]; + int64_t area; + int32_t dx01; + int32_t dy01; + int32_t dx20; + int32_t dy20; +}; + + +/** + * Alloc space for a new triangle plus the input.a0/dadx/dady arrays + * immediately after it. + * The memory is allocated from the per-scene pool, not per-tile. + * \param tri_size returns number of bytes allocated + * \param num_inputs number of fragment shader inputs + * \return pointer to triangle space + */ +struct lp_rast_triangle * +lp_setup_alloc_triangle(struct lp_scene *scene, + unsigned nr_inputs, + unsigned nr_planes, + unsigned *tri_size) +{ + unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float); + unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane); + struct lp_rast_triangle *tri; + + *tri_size = (sizeof(struct lp_rast_triangle) + + 3 * input_array_sz + + plane_sz); + + tri = lp_scene_alloc_aligned( scene, *tri_size, 16 ); + if (tri == NULL) + return NULL; + + tri->inputs.stride = input_array_sz; + + { + char *a = (char *)tri; + char *b = (char *)&GET_PLANES(tri)[nr_planes]; + assert(b - a == *tri_size); + } + + return tri; +} + +void +lp_setup_print_vertex(struct lp_setup_context *setup, + const char *name, + const float (*v)[4]) +{ + const struct lp_setup_variant_key *key = &setup->setup.variant->key; + int i, j; + + debug_printf(" wpos (%s[0]) xyzw %f %f %f %f\n", + name, + v[0][0], v[0][1], v[0][2], v[0][3]); + + for (i = 0; i < key->num_inputs; i++) { + const float *in = v[key->inputs[i].src_index]; + + debug_printf(" in[%d] (%s[%d]) %s%s%s%s ", + i, + name, key->inputs[i].src_index, + (key->inputs[i].usage_mask & 0x1) ? "x" : " ", + (key->inputs[i].usage_mask & 0x2) ? "y" : " ", + (key->inputs[i].usage_mask & 0x4) ? "z" : " ", + (key->inputs[i].usage_mask & 0x8) ? "w" : " "); + + for (j = 0; j < 4; j++) + if (key->inputs[i].usage_mask & (1<<j)) + debug_printf("%.5f ", in[j]); + + debug_printf("\n"); + } +} + + +/** + * Print triangle vertex attribs (for debug). + */ +void +lp_setup_print_triangle(struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + debug_printf("triangle\n"); + + { + const float ex = v0[0][0] - v2[0][0]; + const float ey = v0[0][1] - v2[0][1]; + const float fx = v1[0][0] - v2[0][0]; + const float fy = v1[0][1] - v2[0][1]; + + /* det = cross(e,f).z */ + const float det = ex * fy - ey * fx; + if (det < 0.0f) + debug_printf(" - ccw\n"); + else if (det > 0.0f) + debug_printf(" - cw\n"); + else + debug_printf(" - zero area\n"); + } + + lp_setup_print_vertex(setup, "v0", v0); + lp_setup_print_vertex(setup, "v1", v1); + lp_setup_print_vertex(setup, "v2", v2); +} + + +#define MAX_PLANES 8 +static unsigned +lp_rast_tri_tab[MAX_PLANES+1] = { + 0, /* should be impossible */ + LP_RAST_OP_TRIANGLE_1, + LP_RAST_OP_TRIANGLE_2, + LP_RAST_OP_TRIANGLE_3, + LP_RAST_OP_TRIANGLE_4, + LP_RAST_OP_TRIANGLE_5, + LP_RAST_OP_TRIANGLE_6, + LP_RAST_OP_TRIANGLE_7, + LP_RAST_OP_TRIANGLE_8 +}; + +static unsigned +lp_rast_32_tri_tab[MAX_PLANES+1] = { + 0, /* should be impossible */ + LP_RAST_OP_TRIANGLE_32_1, + LP_RAST_OP_TRIANGLE_32_2, + LP_RAST_OP_TRIANGLE_32_3, + LP_RAST_OP_TRIANGLE_32_4, + LP_RAST_OP_TRIANGLE_32_5, + LP_RAST_OP_TRIANGLE_32_6, + LP_RAST_OP_TRIANGLE_32_7, + LP_RAST_OP_TRIANGLE_32_8 +}; + + + +/** + * The primitive covers the whole tile- shade whole tile. + * + * \param tx, ty the tile position in tiles, not pixels + */ +static boolean +lp_setup_whole_tile(struct lp_setup_context *setup, + const struct lp_rast_shader_inputs *inputs, + int tx, int ty) +{ + struct lp_scene *scene = setup->scene; + + LP_COUNT(nr_fully_covered_64); + + /* if variant is opaque and scissor doesn't effect the tile */ + if (inputs->opaque) { + /* Several things prevent this optimization from working: + * - For layered rendering we can't determine if this covers the same layer + * as previous rendering (or in case of clears those actually always cover + * all layers so optimization is impossible). Need to use fb_max_layer and + * not setup->layer_slot to determine this since even if there's currently + * no slot assigned previous rendering could have used one. + * - If there were any Begin/End query commands in the scene then those + * would get removed which would be very wrong. Furthermore, if queries + * were just active we also can't do the optimization since to get + * accurate query results we unfortunately need to execute the rendering + * commands. + */ + if (!scene->fb.zsbuf && scene->fb_max_layer == 0 && !scene->had_queries) { + /* + * All previous rendering will be overwritten so reset the bin. + */ + lp_scene_bin_reset( scene, tx, ty ); + } + + LP_COUNT(nr_shade_opaque_64); + return lp_scene_bin_cmd_with_state( scene, tx, ty, + setup->fs.stored, + LP_RAST_OP_SHADE_TILE_OPAQUE, + lp_rast_arg_inputs(inputs) ); + } else { + LP_COUNT(nr_shade_64); + return lp_scene_bin_cmd_with_state( scene, tx, ty, + setup->fs.stored, + LP_RAST_OP_SHADE_TILE, + lp_rast_arg_inputs(inputs) ); + } +} + + +/** + * Do basic setup for triangle rasterization and determine which + * framebuffer tiles are touched. Put the triangle in the scene's + * bins for the tiles which we overlap. + */ +static boolean +do_triangle_ccw(struct lp_setup_context *setup, + struct fixed_position* position, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean frontfacing ) +{ + struct lp_scene *scene = setup->scene; + const struct lp_setup_variant_key *key = &setup->setup.variant->key; + struct lp_rast_triangle *tri; + struct lp_rast_plane *plane; + struct u_rect bbox; + unsigned tri_bytes; + int nr_planes = 3; + unsigned viewport_index = 0; + unsigned layer = 0; + + /* Area should always be positive here */ + assert(position->area > 0); + + if (0) + lp_setup_print_triangle(setup, v0, v1, v2); + + if (setup->scissor_test) { + nr_planes = 7; + if (setup->viewport_index_slot > 0) { + unsigned *udata = (unsigned*)v0[setup->viewport_index_slot]; + viewport_index = lp_clamp_viewport_idx(*udata); + } + } + else { + nr_planes = 3; + } + if (setup->layer_slot > 0) { + layer = *(unsigned*)v1[setup->layer_slot]; + layer = MIN2(layer, scene->fb_max_layer); + } + + /* Bounding rectangle (in pixels) */ + { + /* Yes this is necessary to accurately calculate bounding boxes + * with the two fill-conventions we support. GL (normally) ends + * up needing a bottom-left fill convention, which requires + * slightly different rounding. + */ + int adj = (setup->bottom_edge_rule != 0) ? 1 : 0; + + /* Inclusive x0, exclusive x1 */ + bbox.x0 = MIN3(position->x[0], position->x[1], position->x[2]) >> FIXED_ORDER; + bbox.x1 = (MAX3(position->x[0], position->x[1], position->x[2]) - 1) >> FIXED_ORDER; + + /* Inclusive / exclusive depending upon adj (bottom-left or top-right) */ + bbox.y0 = (MIN3(position->y[0], position->y[1], position->y[2]) + adj) >> FIXED_ORDER; + bbox.y1 = (MAX3(position->y[0], position->y[1], position->y[2]) - 1 + adj) >> FIXED_ORDER; + } + + if (bbox.x1 < bbox.x0 || + bbox.y1 < bbox.y0) { + if (0) debug_printf("empty bounding box\n"); + LP_COUNT(nr_culled_tris); + return TRUE; + } + + if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) { + if (0) debug_printf("offscreen\n"); + LP_COUNT(nr_culled_tris); + return TRUE; + } + + /* Can safely discard negative regions, but need to keep hold of + * information about when the triangle extends past screen + * boundaries. See trimmed_box in lp_setup_bin_triangle(). + */ + bbox.x0 = MAX2(bbox.x0, 0); + bbox.y0 = MAX2(bbox.y0, 0); + + tri = lp_setup_alloc_triangle(scene, + key->num_inputs, + nr_planes, + &tri_bytes); + if (!tri) + return FALSE; + +#if 0 + tri->v[0][0] = v0[0][0]; + tri->v[1][0] = v1[0][0]; + tri->v[2][0] = v2[0][0]; + tri->v[0][1] = v0[0][1]; + tri->v[1][1] = v1[0][1]; + tri->v[2][1] = v2[0][1]; +#endif + + LP_COUNT(nr_tris); + + /* Setup parameter interpolants: + */ + setup->setup.variant->jit_function( v0, + v1, + v2, + frontfacing, + GET_A0(&tri->inputs), + GET_DADX(&tri->inputs), + GET_DADY(&tri->inputs) ); + + tri->inputs.frontfacing = frontfacing; + tri->inputs.disable = FALSE; + tri->inputs.opaque = setup->fs.current.variant->opaque; + tri->inputs.layer = layer; + tri->inputs.viewport_index = viewport_index; + + if (0) + lp_dump_setup_coef(&setup->setup.variant->key, + (const float (*)[4])GET_A0(&tri->inputs), + (const float (*)[4])GET_DADX(&tri->inputs), + (const float (*)[4])GET_DADY(&tri->inputs)); + + plane = GET_PLANES(tri); + +#if defined(PIPE_ARCH_SSE) + if (setup->fb.width <= MAX_FIXED_LENGTH32 && + setup->fb.height <= MAX_FIXED_LENGTH32 && + (bbox.x1 - bbox.x0) <= MAX_FIXED_LENGTH32 && + (bbox.y1 - bbox.y0) <= MAX_FIXED_LENGTH32) { + __m128i vertx, verty; + __m128i shufx, shufy; + __m128i dcdx, dcdy, c; + __m128i unused; + __m128i dcdx_neg_mask; + __m128i dcdy_neg_mask; + __m128i dcdx_zero_mask; + __m128i top_left_flag; + __m128i c_inc_mask, c_inc; + __m128i eo, p0, p1, p2; + __m128i zero = _mm_setzero_si128(); + PIPE_ALIGN_VAR(16) int32_t temp_vec[4]; + + vertx = _mm_loadu_si128((__m128i *)position->x); /* vertex x coords */ + verty = _mm_loadu_si128((__m128i *)position->y); /* vertex y coords */ + + shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1)); + shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1)); + + dcdx = _mm_sub_epi32(verty, shufy); + dcdy = _mm_sub_epi32(vertx, shufx); + + dcdx_neg_mask = _mm_srai_epi32(dcdx, 31); + dcdx_zero_mask = _mm_cmpeq_epi32(dcdx, zero); + dcdy_neg_mask = _mm_srai_epi32(dcdy, 31); + + top_left_flag = _mm_set1_epi32((setup->bottom_edge_rule == 0) ? ~0 : 0); + + c_inc_mask = _mm_or_si128(dcdx_neg_mask, + _mm_and_si128(dcdx_zero_mask, + _mm_xor_si128(dcdy_neg_mask, + top_left_flag))); + + c_inc = _mm_srli_epi32(c_inc_mask, 31); + + c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx), + mm_mullo_epi32(dcdy, verty)); + + c = _mm_add_epi32(c, c_inc); + + /* Scale up to match c: + */ + dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER); + dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER); + + /* Calculate trivial reject values: + */ + eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy), + _mm_and_si128(dcdx_neg_mask, dcdx)); + + /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */ + + /* Pointless transpose which gets undone immediately in + * rasterization: + */ + transpose4_epi32(&c, &dcdx, &dcdy, &eo, + &p0, &p1, &p2, &unused); + +#define STORE_PLANE(plane, vec) do { \ + _mm_store_si128((__m128i *)&temp_vec, vec); \ + plane.c = (int64_t)temp_vec[0]; \ + plane.dcdx = temp_vec[1]; \ + plane.dcdy = temp_vec[2]; \ + plane.eo = temp_vec[3]; \ + } while(0) + + STORE_PLANE(plane[0], p0); + STORE_PLANE(plane[1], p1); + STORE_PLANE(plane[2], p2); +#undef STORE_PLANE + } else +#endif + { + int i; + plane[0].dcdy = position->dx01; + plane[1].dcdy = position->x[1] - position->x[2]; + plane[2].dcdy = position->dx20; + plane[0].dcdx = position->dy01; + plane[1].dcdx = position->y[1] - position->y[2]; + plane[2].dcdx = position->dy20; + + for (i = 0; i < 3; i++) { + /* half-edge constants, will be interated over the whole render + * target. + */ + plane[i].c = IMUL64(plane[i].dcdx, position->x[i]) - + IMUL64(plane[i].dcdy, position->y[i]); + + /* correct for top-left vs. bottom-left fill convention. + */ + if (plane[i].dcdx < 0) { + /* both fill conventions want this - adjust for left edges */ + plane[i].c++; + } + else if (plane[i].dcdx == 0) { + if (setup->bottom_edge_rule == 0){ + /* correct for top-left fill convention: + */ + if (plane[i].dcdy > 0) plane[i].c++; + } + else { + /* correct for bottom-left fill convention: + */ + if (plane[i].dcdy < 0) plane[i].c++; + } + } + + /* Scale up to match c: + */ + assert((plane[i].dcdx << FIXED_ORDER) >> FIXED_ORDER == plane[i].dcdx); + assert((plane[i].dcdy << FIXED_ORDER) >> FIXED_ORDER == plane[i].dcdy); + plane[i].dcdx <<= FIXED_ORDER; + plane[i].dcdy <<= FIXED_ORDER; + + /* find trivial reject offsets for each edge for a single-pixel + * sized block. These will be scaled up at each recursive level to + * match the active blocksize. Scaling in this way works best if + * the blocks are square. + */ + plane[i].eo = 0; + if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; + if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; + } + } + + if (0) { + debug_printf("p0: %"PRIx64"/%08x/%08x/%"PRIx64"\n", + plane[0].c, + plane[0].dcdx, + plane[0].dcdy, + plane[0].eo); + + debug_printf("p1: %"PRIx64"/%08x/%08x/%"PRIx64"\n", + plane[1].c, + plane[1].dcdx, + plane[1].dcdy, + plane[1].eo); + + debug_printf("p2: %"PRIx64"/%08x/%08x/%"PRIx64"\n", + plane[2].c, + plane[2].dcdx, + plane[2].dcdy, + plane[2].eo); + } + + + /* + * When rasterizing scissored tris, use the intersection of the + * triangle bounding box and the scissor rect to generate the + * scissor planes. + * + * This permits us to cut off the triangle "tails" that are present + * in the intermediate recursive levels caused when two of the + * triangles edges don't diverge quickly enough to trivially reject + * exterior blocks from the triangle. + * + * It's not really clear if it's worth worrying about these tails, + * but since we generate the planes for each scissored tri, it's + * free to trim them in this case. + * + * Note that otherwise, the scissor planes only vary in 'C' value, + * and even then only on state-changes. Could alternatively store + * these planes elsewhere. + */ + if (nr_planes == 7) { + const struct u_rect *scissor = &setup->scissors[viewport_index]; + + plane[3].dcdx = -1; + plane[3].dcdy = 0; + plane[3].c = 1-scissor->x0; + plane[3].eo = 1; + + plane[4].dcdx = 1; + plane[4].dcdy = 0; + plane[4].c = scissor->x1+1; + plane[4].eo = 0; + + plane[5].dcdx = 0; + plane[5].dcdy = 1; + plane[5].c = 1-scissor->y0; + plane[5].eo = 1; + + plane[6].dcdx = 0; + plane[6].dcdy = -1; + plane[6].c = scissor->y1+1; + plane[6].eo = 0; + } + + return lp_setup_bin_triangle(setup, tri, &bbox, nr_planes, viewport_index); +} + +/* + * Round to nearest less or equal power of two of the input. + * + * Undefined if no bit set exists, so code should check against 0 first. + */ +static inline uint32_t +floor_pot(uint32_t n) +{ +#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) + if (n == 0) + return 0; + + __asm__("bsr %1,%0" + : "=r" (n) + : "rm" (n)); + return 1 << n; +#else + n |= (n >> 1); + n |= (n >> 2); + n |= (n >> 4); + n |= (n >> 8); + n |= (n >> 16); + return n - (n >> 1); +#endif +} + + +boolean +lp_setup_bin_triangle( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + const struct u_rect *bbox, + int nr_planes, + unsigned viewport_index ) +{ + struct lp_scene *scene = setup->scene; + struct u_rect trimmed_box = *bbox; + int i; + /* What is the largest power-of-two boundary this triangle crosses: + */ + int dx = floor_pot((bbox->x0 ^ bbox->x1) | + (bbox->y0 ^ bbox->y1)); + + /* The largest dimension of the rasterized area of the triangle + * (aligned to a 4x4 grid), rounded down to the nearest power of two: + */ + int max_sz = ((bbox->x1 - (bbox->x0 & ~3)) | + (bbox->y1 - (bbox->y0 & ~3))); + int sz = floor_pot(max_sz); + boolean use_32bits = max_sz <= MAX_FIXED_LENGTH32; + + /* Now apply scissor, etc to the bounding box. Could do this + * earlier, but it confuses the logic for tri-16 and would force + * the rasterizer to also respect scissor, etc, just for the rare + * cases where a small triangle extends beyond the scissor. + */ + u_rect_find_intersection(&setup->draw_regions[viewport_index], + &trimmed_box); + + /* Determine which tile(s) intersect the triangle's bounding box + */ + if (dx < TILE_SIZE) + { + int ix0 = bbox->x0 / TILE_SIZE; + int iy0 = bbox->y0 / TILE_SIZE; + unsigned px = bbox->x0 & 63 & ~3; + unsigned py = bbox->y0 & 63 & ~3; + + assert(iy0 == bbox->y1 / TILE_SIZE && + ix0 == bbox->x1 / TILE_SIZE); + + if (nr_planes == 3) { + if (sz < 4) + { + /* Triangle is contained in a single 4x4 stamp: + */ + assert(px + 4 <= TILE_SIZE); + assert(py + 4 <= TILE_SIZE); + return lp_scene_bin_cmd_with_state( scene, ix0, iy0, + setup->fs.stored, + use_32bits ? + LP_RAST_OP_TRIANGLE_32_3_4 : + LP_RAST_OP_TRIANGLE_3_4, + lp_rast_arg_triangle_contained(tri, px, py) ); + } + + if (sz < 16) + { + /* Triangle is contained in a single 16x16 block: + */ + + /* + * The 16x16 block is only 4x4 aligned, and can exceed the tile + * dimensions if the triangle is 16 pixels in one dimension but 4 + * in the other. So budge the 16x16 back inside the tile. + */ + px = MIN2(px, TILE_SIZE - 16); + py = MIN2(py, TILE_SIZE - 16); + + assert(px + 16 <= TILE_SIZE); + assert(py + 16 <= TILE_SIZE); + + return lp_scene_bin_cmd_with_state( scene, ix0, iy0, + setup->fs.stored, + use_32bits ? + LP_RAST_OP_TRIANGLE_32_3_16 : + LP_RAST_OP_TRIANGLE_3_16, + lp_rast_arg_triangle_contained(tri, px, py) ); + } + } + else if (nr_planes == 4 && sz < 16) + { + px = MIN2(px, TILE_SIZE - 16); + py = MIN2(py, TILE_SIZE - 16); + + assert(px + 16 <= TILE_SIZE); + assert(py + 16 <= TILE_SIZE); + + return lp_scene_bin_cmd_with_state(scene, ix0, iy0, + setup->fs.stored, + use_32bits ? + LP_RAST_OP_TRIANGLE_32_4_16 : + LP_RAST_OP_TRIANGLE_4_16, + lp_rast_arg_triangle_contained(tri, px, py)); + } + + + /* Triangle is contained in a single tile: + */ + return lp_scene_bin_cmd_with_state( + scene, ix0, iy0, setup->fs.stored, + use_32bits ? lp_rast_32_tri_tab[nr_planes] : lp_rast_tri_tab[nr_planes], + lp_rast_arg_triangle(tri, (1<<nr_planes)-1)); + } + else + { + struct lp_rast_plane *plane = GET_PLANES(tri); + int64_t c[MAX_PLANES]; + int64_t ei[MAX_PLANES]; + + int64_t eo[MAX_PLANES]; + int64_t xstep[MAX_PLANES]; + int64_t ystep[MAX_PLANES]; + int x, y; + + int ix0 = trimmed_box.x0 / TILE_SIZE; + int iy0 = trimmed_box.y0 / TILE_SIZE; + int ix1 = trimmed_box.x1 / TILE_SIZE; + int iy1 = trimmed_box.y1 / TILE_SIZE; + + for (i = 0; i < nr_planes; i++) { + c[i] = (plane[i].c + + IMUL64(plane[i].dcdy, iy0) * TILE_SIZE - + IMUL64(plane[i].dcdx, ix0) * TILE_SIZE); + + ei[i] = (plane[i].dcdy - + plane[i].dcdx - + plane[i].eo) << TILE_ORDER; + + eo[i] = plane[i].eo << TILE_ORDER; + xstep[i] = -(((int64_t)plane[i].dcdx) << TILE_ORDER); + ystep[i] = ((int64_t)plane[i].dcdy) << TILE_ORDER; + } + + + + /* Test tile-sized blocks against the triangle. + * Discard blocks fully outside the tri. If the block is fully + * contained inside the tri, bin an lp_rast_shade_tile command. + * Else, bin a lp_rast_triangle command. + */ + for (y = iy0; y <= iy1; y++) + { + boolean in = FALSE; /* are we inside the triangle? */ + int64_t cx[MAX_PLANES]; + + for (i = 0; i < nr_planes; i++) + cx[i] = c[i]; + + for (x = ix0; x <= ix1; x++) + { + int out = 0; + int partial = 0; + + for (i = 0; i < nr_planes; i++) { + int64_t planeout = cx[i] + eo[i]; + int64_t planepartial = cx[i] + ei[i] - 1; + out |= (int) (planeout >> 63); + partial |= ((int) (planepartial >> 63)) & (1<<i); + } + + if (out) { + /* do nothing */ + if (in) + break; /* exiting triangle, all done with this row */ + LP_COUNT(nr_empty_64); + } + else if (partial) { + /* Not trivially accepted by at least one plane - + * rasterize/shade partial tile + */ + int count = util_bitcount(partial); + in = TRUE; + + if (!lp_scene_bin_cmd_with_state( scene, x, y, + setup->fs.stored, + use_32bits ? + lp_rast_32_tri_tab[count] : + lp_rast_tri_tab[count], + lp_rast_arg_triangle(tri, partial) )) + goto fail; + + LP_COUNT(nr_partially_covered_64); + } + else { + /* triangle covers the whole tile- shade whole tile */ + LP_COUNT(nr_fully_covered_64); + in = TRUE; + if (!lp_setup_whole_tile(setup, &tri->inputs, x, y)) + goto fail; + } + + /* Iterate cx values across the region: */ + for (i = 0; i < nr_planes; i++) + cx[i] += xstep[i]; + } + + /* Iterate c values down the region: */ + for (i = 0; i < nr_planes; i++) + c[i] += ystep[i]; + } + } + + return TRUE; + +fail: + /* Need to disable any partially binned triangle. This is easier + * than trying to locate all the triangle, shade-tile, etc, + * commands which may have been binned. + */ + tri->inputs.disable = TRUE; + return FALSE; +} + + +/** + * Try to draw the triangle, restart the scene on failure. + */ +static void retry_triangle_ccw( struct lp_setup_context *setup, + struct fixed_position* position, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean front) +{ + if (!do_triangle_ccw( setup, position, v0, v1, v2, front )) + { + if (!lp_setup_flush_and_restart(setup)) + return; + + if (!do_triangle_ccw( setup, position, v0, v1, v2, front )) + return; + } +} + +/** + * Calculate fixed position data for a triangle + */ +static inline void +calc_fixed_position( struct lp_setup_context *setup, + struct fixed_position* position, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + position->x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset); + position->x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset); + position->x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset); + position->x[3] = 0; + + position->y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset); + position->y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset); + position->y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset); + position->y[3] = 0; + + position->dx01 = position->x[0] - position->x[1]; + position->dy01 = position->y[0] - position->y[1]; + + position->dx20 = position->x[2] - position->x[0]; + position->dy20 = position->y[2] - position->y[0]; + + position->area = IMUL64(position->dx01, position->dy20) - + IMUL64(position->dx20, position->dy01); +} + + +/** + * Rotate a triangle, flipping its clockwise direction, + * Swaps values for xy[0] and xy[1] + */ +static inline void +rotate_fixed_position_01( struct fixed_position* position ) +{ + int x, y; + + x = position->x[1]; + y = position->y[1]; + position->x[1] = position->x[0]; + position->y[1] = position->y[0]; + position->x[0] = x; + position->y[0] = y; + + position->dx01 = -position->dx01; + position->dy01 = -position->dy01; + position->dx20 = position->x[2] - position->x[0]; + position->dy20 = position->y[2] - position->y[0]; + + position->area = -position->area; +} + + +/** + * Rotate a triangle, flipping its clockwise direction, + * Swaps values for xy[1] and xy[2] + */ +static inline void +rotate_fixed_position_12( struct fixed_position* position ) +{ + int x, y; + + x = position->x[2]; + y = position->y[2]; + position->x[2] = position->x[1]; + position->y[2] = position->y[1]; + position->x[1] = x; + position->y[1] = y; + + x = position->dx01; + y = position->dy01; + position->dx01 = -position->dx20; + position->dy01 = -position->dy20; + position->dx20 = -x; + position->dy20 = -y; + + position->area = -position->area; +} + + +/** + * Draw triangle if it's CW, cull otherwise. + */ +static void triangle_cw( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + struct fixed_position position; + + calc_fixed_position(setup, &position, v0, v1, v2); + + if (position.area < 0) { + if (setup->flatshade_first) { + rotate_fixed_position_12(&position); + retry_triangle_ccw(setup, &position, v0, v2, v1, !setup->ccw_is_frontface); + } else { + rotate_fixed_position_01(&position); + retry_triangle_ccw(setup, &position, v1, v0, v2, !setup->ccw_is_frontface); + } + } +} + + +static void triangle_ccw( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + struct fixed_position position; + + calc_fixed_position(setup, &position, v0, v1, v2); + + if (position.area > 0) + retry_triangle_ccw(setup, &position, v0, v1, v2, setup->ccw_is_frontface); +} + +/** + * Draw triangle whether it's CW or CCW. + */ +static void triangle_both( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + struct fixed_position position; + struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe; + + if (lp_context->active_statistics_queries && + !llvmpipe_rasterization_disabled(lp_context)) { + lp_context->pipeline_statistics.c_primitives++; + } + + calc_fixed_position(setup, &position, v0, v1, v2); + + if (0) { + assert(!util_is_inf_or_nan(v0[0][0])); + assert(!util_is_inf_or_nan(v0[0][1])); + assert(!util_is_inf_or_nan(v1[0][0])); + assert(!util_is_inf_or_nan(v1[0][1])); + assert(!util_is_inf_or_nan(v2[0][0])); + assert(!util_is_inf_or_nan(v2[0][1])); + } + + if (position.area > 0) + retry_triangle_ccw( setup, &position, v0, v1, v2, setup->ccw_is_frontface ); + else if (position.area < 0) { + if (setup->flatshade_first) { + rotate_fixed_position_12( &position ); + retry_triangle_ccw( setup, &position, v0, v2, v1, !setup->ccw_is_frontface ); + } else { + rotate_fixed_position_01( &position ); + retry_triangle_ccw( setup, &position, v1, v0, v2, !setup->ccw_is_frontface ); + } + } +} + + +static void triangle_nop( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ +} + + +void +lp_setup_choose_triangle( struct lp_setup_context *setup ) +{ + switch (setup->cullmode) { + case PIPE_FACE_NONE: + setup->triangle = triangle_both; + break; + case PIPE_FACE_BACK: + setup->triangle = setup->ccw_is_frontface ? triangle_ccw : triangle_cw; + break; + case PIPE_FACE_FRONT: + setup->triangle = setup->ccw_is_frontface ? triangle_cw : triangle_ccw; + break; + default: + setup->triangle = triangle_nop; + break; + } +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c new file mode 100644 index 000000000..534c5f48a --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -0,0 +1,602 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Interface between 'draw' module's output and the llvmpipe rasterizer/setup + * code. When the 'draw' module has finished filling a vertex buffer, the + * draw_arrays() functions below will be called. Loop over the vertices and + * call the point/line/tri setup functions. + * + * Authors + * Brian Paul + */ + + +#include "lp_setup_context.h" +#include "lp_context.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "util/u_memory.h" + + +#define LP_MAX_VBUF_INDEXES 1024 +#define LP_MAX_VBUF_SIZE 4096 + + + +/** cast wrapper */ +static struct lp_setup_context * +lp_setup_context(struct vbuf_render *vbr) +{ + return (struct lp_setup_context *) vbr; +} + + + +static const struct vertex_info * +lp_setup_get_vertex_info(struct vbuf_render *vbr) +{ + struct lp_setup_context *setup = lp_setup_context(vbr); + + /* Vertex size/info depends on the latest state. + * The draw module may have issued additional state-change commands. + */ + lp_setup_update_state(setup, FALSE); + + return setup->vertex_info; +} + + +static boolean +lp_setup_allocate_vertices(struct vbuf_render *vbr, + ushort vertex_size, ushort nr_vertices) +{ + struct lp_setup_context *setup = lp_setup_context(vbr); + unsigned size = vertex_size * nr_vertices; + + if (setup->vertex_buffer_size < size) { + align_free(setup->vertex_buffer); + setup->vertex_buffer = align_malloc(size, 16); + setup->vertex_buffer_size = size; + } + + setup->vertex_size = vertex_size; + setup->nr_vertices = nr_vertices; + + return setup->vertex_buffer != NULL; +} + +static void +lp_setup_release_vertices(struct vbuf_render *vbr) +{ + /* keep the old allocation for next time */ +} + +static void * +lp_setup_map_vertices(struct vbuf_render *vbr) +{ + struct lp_setup_context *setup = lp_setup_context(vbr); + return setup->vertex_buffer; +} + +static void +lp_setup_unmap_vertices(struct vbuf_render *vbr, + ushort min_index, + ushort max_index ) +{ + struct lp_setup_context *setup = lp_setup_context(vbr); + assert( setup->vertex_buffer_size >= (max_index+1) * setup->vertex_size ); + /* do nothing */ +} + + +static void +lp_setup_set_primitive(struct vbuf_render *vbr, unsigned prim) +{ + lp_setup_context(vbr)->prim = prim; +} + +typedef const float (*const_float4_ptr)[4]; + +static inline const_float4_ptr get_vert( const void *vertex_buffer, + int index, + int stride ) +{ + return (const_float4_ptr)((char *)vertex_buffer + index * stride); +} + +/** + * draw elements / indexed primitives + */ +static void +lp_setup_draw_elements(struct vbuf_render *vbr, const ushort *indices, uint nr) +{ + struct lp_setup_context *setup = lp_setup_context(vbr); + const unsigned stride = setup->vertex_info->size * sizeof(float); + const void *vertex_buffer = setup->vertex_buffer; + const boolean flatshade_first = setup->flatshade_first; + unsigned i; + + assert(setup->setup.variant); + + if (!lp_setup_update_state(setup, TRUE)) + return; + + switch (setup->prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < nr; i++) { + setup->point( setup, + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 1; i < nr; i += 2) { + setup->line( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < nr; i ++) { + setup->line( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + + case PIPE_PRIM_LINE_LOOP: + for (i = 1; i < nr; i ++) { + setup->line( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + if (nr) { + setup->line( setup, + get_vert(vertex_buffer, indices[nr-1], stride), + get_vert(vertex_buffer, indices[0], stride) ); + } + break; + + case PIPE_PRIM_TRIANGLES: + for (i = 2; i < nr; i += 3) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (flatshade_first) { + for (i = 2; i < nr; i += 1) { + /* emit first triangle vertex as first triangle vertex */ + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i+(i&1)-1], stride), + get_vert(vertex_buffer, indices[i-(i&1)], stride) ); + + } + } + else { + for (i = 2; i < nr; i += 1) { + /* emit last triangle vertex as last triangle vertex */ + setup->triangle( setup, + get_vert(vertex_buffer, indices[i+(i&1)-2], stride), + get_vert(vertex_buffer, indices[i-(i&1)-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (flatshade_first) { + for (i = 2; i < nr; i += 1) { + /* emit first non-spoke vertex as first vertex */ + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[0], stride) ); + } + } + else { + for (i = 2; i < nr; i += 1) { + /* emit last non-spoke vertex as last vertex */ + setup->triangle( setup, + get_vert(vertex_buffer, indices[0], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + break; + + case PIPE_PRIM_QUADS: + /* GL quads don't follow provoking vertex convention */ + if (flatshade_first) { + /* emit last quad vertex as first triangle vertex */ + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride) ); + + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride) ); + } + } + else { + /* emit last quad vertex as last triangle vertex */ + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + break; + + case PIPE_PRIM_QUAD_STRIP: + /* GL quad strips don't follow provoking vertex convention */ + if (flatshade_first) { + /* emit last quad vertex as first triangle vertex */ + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-3], stride) ); + } + } + else { + /* emit last quad vertex as last triangle vertex */ + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + break; + + case PIPE_PRIM_POLYGON: + /* Almost same as tri fan but the _first_ vertex specifies the flat + * shading color. + */ + if (flatshade_first) { + /* emit first polygon vertex as first triangle vertex */ + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[0], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + } + else { + /* emit first polygon vertex as last triangle vertex */ + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride), + get_vert(vertex_buffer, indices[0], stride) ); + } + } + break; + + default: + assert(0); + } +} + + +/** + * This function is hit when the draw module is working in pass-through mode. + * It's up to us to convert the vertex array into point/line/tri prims. + */ +static void +lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) +{ + struct lp_setup_context *setup = lp_setup_context(vbr); + const unsigned stride = setup->vertex_info->size * sizeof(float); + const void *vertex_buffer = + (void *) get_vert(setup->vertex_buffer, start, stride); + const boolean flatshade_first = setup->flatshade_first; + unsigned i; + + if (!lp_setup_update_state(setup, TRUE)) + return; + + switch (setup->prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < nr; i++) { + setup->point( setup, + get_vert(vertex_buffer, i-0, stride) ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 1; i < nr; i += 2) { + setup->line( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + break; + + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < nr; i ++) { + setup->line( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + break; + + case PIPE_PRIM_LINE_LOOP: + for (i = 1; i < nr; i ++) { + setup->line( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + if (nr) { + setup->line( setup, + get_vert(vertex_buffer, nr-1, stride), + get_vert(vertex_buffer, 0, stride) ); + } + break; + + case PIPE_PRIM_TRIANGLES: + for (i = 2; i < nr; i += 3) { + setup->triangle( setup, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (flatshade_first) { + for (i = 2; i < nr; i++) { + /* emit first triangle vertex as first triangle vertex */ + setup->triangle( setup, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i+(i&1)-1, stride), + get_vert(vertex_buffer, i-(i&1), stride) ); + } + } + else { + for (i = 2; i < nr; i++) { + /* emit last triangle vertex as last triangle vertex */ + setup->triangle( setup, + get_vert(vertex_buffer, i+(i&1)-2, stride), + get_vert(vertex_buffer, i-(i&1)-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (flatshade_first) { + for (i = 2; i < nr; i += 1) { + /* emit first non-spoke vertex as first vertex */ + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, 0, stride) ); + } + } + else { + for (i = 2; i < nr; i += 1) { + /* emit last non-spoke vertex as last vertex */ + setup->triangle( setup, + get_vert(vertex_buffer, 0, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + break; + + case PIPE_PRIM_QUADS: + /* GL quads don't follow provoking vertex convention */ + if (flatshade_first) { + /* emit last quad vertex as first triangle vertex */ + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride) ); + } + } + else { + /* emit last quad vertex as last triangle vertex */ + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + break; + + case PIPE_PRIM_QUAD_STRIP: + /* GL quad strips don't follow provoking vertex convention */ + if (flatshade_first) { + /* emit last quad vertex as first triangle vertex */ + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-3, stride) ); + } + } + else { + /* emit last quad vertex as last triangle vertex */ + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + break; + + case PIPE_PRIM_POLYGON: + /* Almost same as tri fan but the _first_ vertex specifies the flat + * shading color. + */ + if (flatshade_first) { + /* emit first polygon vertex as first triangle vertex */ + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, 0, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + } + else { + /* emit first polygon vertex as last triangle vertex */ + for (i = 2; i < nr; i += 1) { + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride), + get_vert(vertex_buffer, 0, stride) ); + } + } + break; + + default: + assert(0); + } +} + + + +static void +lp_setup_vbuf_destroy(struct vbuf_render *vbr) +{ + struct lp_setup_context *setup = lp_setup_context(vbr); + if (setup->vertex_buffer) { + align_free(setup->vertex_buffer); + setup->vertex_buffer = NULL; + } + lp_setup_destroy(setup); +} + +/* + * FIXME: it is unclear if primitives_storage_needed (which is generally + * the same as pipe query num_primitives_generated) should increase + * if SO is disabled for d3d10, but for GL we definitely need to + * increase num_primitives_generated and this is only called for active + * SO. If it must not increase for d3d10 need to disambiguate the counters + * in the driver and do some work for getting correct values, if it should + * increase too should call this from outside streamout code. + */ +static void +lp_setup_so_info(struct vbuf_render *vbr, uint primitives, uint prim_generated) +{ + struct lp_setup_context *setup = lp_setup_context(vbr); + struct llvmpipe_context *lp = llvmpipe_context(setup->pipe); + + lp->so_stats.num_primitives_written += primitives; + lp->so_stats.primitives_storage_needed += prim_generated; +} + +static void +lp_setup_pipeline_statistics( + struct vbuf_render *vbr, + const struct pipe_query_data_pipeline_statistics *stats) +{ + struct lp_setup_context *setup = lp_setup_context(vbr); + struct llvmpipe_context *llvmpipe = llvmpipe_context(setup->pipe); + + llvmpipe->pipeline_statistics.ia_vertices += + stats->ia_vertices; + llvmpipe->pipeline_statistics.ia_primitives += + stats->ia_primitives; + llvmpipe->pipeline_statistics.vs_invocations += + stats->vs_invocations; + llvmpipe->pipeline_statistics.gs_invocations += + stats->gs_invocations; + llvmpipe->pipeline_statistics.gs_primitives += + stats->gs_primitives; + if (!llvmpipe_rasterization_disabled(llvmpipe)) { + llvmpipe->pipeline_statistics.c_invocations += + stats->c_invocations; + } else { + llvmpipe->pipeline_statistics.c_invocations = 0; + } +} + +/** + * Create the post-transform vertex handler for the given context. + */ +void +lp_setup_init_vbuf(struct lp_setup_context *setup) +{ + setup->base.max_indices = LP_MAX_VBUF_INDEXES; + setup->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; + + setup->base.get_vertex_info = lp_setup_get_vertex_info; + setup->base.allocate_vertices = lp_setup_allocate_vertices; + setup->base.map_vertices = lp_setup_map_vertices; + setup->base.unmap_vertices = lp_setup_unmap_vertices; + setup->base.set_primitive = lp_setup_set_primitive; + setup->base.draw_elements = lp_setup_draw_elements; + setup->base.draw_arrays = lp_setup_draw_arrays; + setup->base.release_vertices = lp_setup_release_vertices; + setup->base.destroy = lp_setup_vbuf_destroy; + setup->base.set_stream_output_info = lp_setup_so_info; + setup->base.pipeline_statistics = lp_setup_pipeline_statistics; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state.h new file mode 100644 index 000000000..2da6caaef --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state.h @@ -0,0 +1,145 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keithw@vmware.com> + */ + +#ifndef LP_STATE_H +#define LP_STATE_H + +#include "pipe/p_state.h" +#include "lp_jit.h" +#include "lp_state_fs.h" +#include "gallivm/lp_bld.h" + + +#define LP_NEW_VIEWPORT 0x1 +#define LP_NEW_RASTERIZER 0x2 +#define LP_NEW_FS 0x4 +#define LP_NEW_BLEND 0x8 +#define LP_NEW_CLIP 0x10 +#define LP_NEW_SCISSOR 0x20 +#define LP_NEW_STIPPLE 0x40 +#define LP_NEW_FRAMEBUFFER 0x80 +#define LP_NEW_DEPTH_STENCIL_ALPHA 0x100 +#define LP_NEW_CONSTANTS 0x200 +#define LP_NEW_SAMPLER 0x400 +#define LP_NEW_SAMPLER_VIEW 0x800 +#define LP_NEW_VERTEX 0x1000 +#define LP_NEW_VS 0x2000 +#define LP_NEW_OCCLUSION_QUERY 0x4000 +#define LP_NEW_BLEND_COLOR 0x8000 +#define LP_NEW_GS 0x10000 +#define LP_NEW_SO 0x20000 +#define LP_NEW_SO_BUFFERS 0x40000 + + + +struct vertex_info; +struct pipe_context; +struct llvmpipe_context; + + + +struct lp_geometry_shader { + boolean no_tokens; + struct pipe_stream_output_info stream_output; + struct draw_geometry_shader *dgs; +}; + +/** Vertex element state */ +struct lp_velems_state +{ + unsigned count; + struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; +}; + +struct lp_so_state { + struct pipe_stream_output_info base; +}; + + +void +llvmpipe_set_framebuffer_state(struct pipe_context *, + const struct pipe_framebuffer_state *); + +void +llvmpipe_update_fs(struct llvmpipe_context *lp); + +void +llvmpipe_update_setup(struct llvmpipe_context *lp); + +void +llvmpipe_update_derived(struct llvmpipe_context *llvmpipe); + +void +llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe); + +void +llvmpipe_init_blend_funcs(struct llvmpipe_context *llvmpipe); + +void +llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe); + +void +llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe); + +void +llvmpipe_init_clip_funcs(struct llvmpipe_context *llvmpipe); + +void +llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe); + +void +llvmpipe_init_vs_funcs(struct llvmpipe_context *llvmpipe); + +void +llvmpipe_init_gs_funcs(struct llvmpipe_context *llvmpipe); + +void +llvmpipe_init_rasterizer_funcs(struct llvmpipe_context *llvmpipe); + +void +llvmpipe_init_so_funcs(struct llvmpipe_context *llvmpipe); + +void +llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *ctx, + unsigned num, + struct pipe_sampler_view **views); +void +llvmpipe_cleanup_vertex_sampling(struct llvmpipe_context *ctx); + + +void +llvmpipe_prepare_geometry_sampling(struct llvmpipe_context *ctx, + unsigned num, + struct pipe_sampler_view **views); +void +llvmpipe_cleanup_geometry_sampling(struct llvmpipe_context *ctx); + + +#endif diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_blend.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_blend.c new file mode 100644 index 000000000..e38de9aca --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -0,0 +1,206 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @author Jose Fonseca <jfonseca@vmware.com> + * @author Keith Whitwell <keithw@vmware.com> + */ + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_dump.h" +#include "draw/draw_context.h" +#include "lp_screen.h" +#include "lp_context.h" +#include "lp_state.h" +#include "lp_debug.h" + + +static void * +llvmpipe_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + struct pipe_blend_state *state = mem_dup(blend, sizeof *blend); + int i; + + if (LP_PERF & PERF_NO_BLEND) { + state->independent_blend_enable = 0; + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + state->rt[i].blend_enable = 0; + } + + return state; +} + + +static void +llvmpipe_bind_blend_state(struct pipe_context *pipe, void *blend) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + if (llvmpipe->blend == blend) + return; + + draw_flush(llvmpipe->draw); + + llvmpipe->blend = blend; + + llvmpipe->dirty |= LP_NEW_BLEND; +} + + +static void +llvmpipe_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + FREE( blend ); +} + + +static void +llvmpipe_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *blend_color) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + if(!blend_color) + return; + + if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0) + return; + + draw_flush(llvmpipe->draw); + + memcpy(&llvmpipe->blend_color, blend_color, sizeof *blend_color); + + llvmpipe->dirty |= LP_NEW_BLEND_COLOR; +} + + +/** XXX move someday? Or consolidate all these simple state setters + * into one file. + */ + + +static void * +llvmpipe_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + struct pipe_depth_stencil_alpha_state *state; + + state = mem_dup(depth_stencil, sizeof *depth_stencil); + + if (LP_PERF & PERF_NO_DEPTH) { + state->depth.enabled = 0; + state->depth.writemask = 0; + state->stencil[0].enabled = 0; + state->stencil[1].enabled = 0; + } + + if (LP_PERF & PERF_NO_ALPHATEST) { + state->alpha.enabled = 0; + } + + return state; +} + + +static void +llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + if (llvmpipe->depth_stencil == depth_stencil) + return; + + draw_flush(llvmpipe->draw); + + llvmpipe->depth_stencil = depth_stencil; + + llvmpipe->dirty |= LP_NEW_DEPTH_STENCIL_ALPHA; +} + + +static void +llvmpipe_delete_depth_stencil_state(struct pipe_context *pipe, void *depth) +{ + FREE( depth ); +} + + +static void +llvmpipe_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *stencil_ref) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + if(!stencil_ref) + return; + + if(memcmp(&llvmpipe->stencil_ref, stencil_ref, sizeof *stencil_ref) == 0) + return; + + draw_flush(llvmpipe->draw); + + memcpy(&llvmpipe->stencil_ref, stencil_ref, sizeof *stencil_ref); + + /* not sure. want new flag? */ + llvmpipe->dirty |= LP_NEW_DEPTH_STENCIL_ALPHA; +} + +static void +llvmpipe_set_sample_mask(struct pipe_context *pipe, + unsigned sample_mask) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + if (sample_mask != llvmpipe->sample_mask) { + llvmpipe->sample_mask = sample_mask; + + llvmpipe->dirty |= LP_NEW_RASTERIZER; + } +} + +void +llvmpipe_init_blend_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_blend_state = llvmpipe_create_blend_state; + llvmpipe->pipe.bind_blend_state = llvmpipe_bind_blend_state; + llvmpipe->pipe.delete_blend_state = llvmpipe_delete_blend_state; + + llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state; + llvmpipe->pipe.bind_depth_stencil_alpha_state = llvmpipe_bind_depth_stencil_state; + llvmpipe->pipe.delete_depth_stencil_alpha_state = llvmpipe_delete_depth_stencil_state; + + llvmpipe->pipe.set_blend_color = llvmpipe_set_blend_color; + + llvmpipe->pipe.set_stencil_ref = llvmpipe_set_stencil_ref; + llvmpipe->pipe.set_sample_mask = llvmpipe_set_sample_mask; + + llvmpipe->sample_mask = ~0; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_clip.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_clip.c new file mode 100644 index 000000000..1b9b84c08 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_clip.c @@ -0,0 +1,105 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keithw@vmware.com> + */ +#include "lp_context.h" +#include "lp_state.h" +#include "draw/draw_context.h" + + +static void +llvmpipe_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + /* pass the clip state to the draw module */ + draw_set_clip_state(llvmpipe->draw, clip); +} + + +static void +llvmpipe_set_viewport_states(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *viewports) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + /* pass the viewport info to the draw module */ + draw_set_viewport_states(llvmpipe->draw, start_slot, num_viewports, + viewports); + + memcpy(llvmpipe->viewports + start_slot, viewports, + sizeof(struct pipe_viewport_state) * num_viewports); + llvmpipe->dirty |= LP_NEW_VIEWPORT; +} + + +static void +llvmpipe_set_scissor_states(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_scissors, + const struct pipe_scissor_state *scissors) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + draw_flush(llvmpipe->draw); + + debug_assert(start_slot < PIPE_MAX_VIEWPORTS); + debug_assert((start_slot + num_scissors) <= PIPE_MAX_VIEWPORTS); + + memcpy(llvmpipe->scissors + start_slot, scissors, + sizeof(struct pipe_scissor_state) * num_scissors); + + llvmpipe->dirty |= LP_NEW_SCISSOR; +} + + +static void +llvmpipe_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + draw_flush(llvmpipe->draw); + + llvmpipe->poly_stipple = *stipple; /* struct copy */ + llvmpipe->dirty |= LP_NEW_STIPPLE; +} + + + +void +llvmpipe_init_clip_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.set_clip_state = llvmpipe_set_clip_state; + llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple; + llvmpipe->pipe.set_scissor_states = llvmpipe_set_scissor_states; + llvmpipe->pipe.set_viewport_states = llvmpipe_set_viewport_states; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c new file mode 100644 index 000000000..a25e83261 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -0,0 +1,244 @@ +/************************************************************************** + * + * Copyright 2003 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "draw/draw_private.h" +#include "lp_context.h" +#include "lp_screen.h" +#include "lp_setup.h" +#include "lp_state.h" + + + +/** + * The vertex info describes how to convert the post-transformed vertices + * (simple float[][4]) used by the 'draw' module into vertices for + * rasterization. + * + * This function validates the vertex layout. + */ +static void +compute_vertex_info(struct llvmpipe_context *llvmpipe) +{ + const struct lp_fragment_shader *lpfs = llvmpipe->fs; + struct vertex_info *vinfo = &llvmpipe->vertex_info; + int vs_index; + uint i; + + draw_prepare_shader_outputs(llvmpipe->draw); + + llvmpipe->color_slot[0] = -1; + llvmpipe->color_slot[1] = -1; + llvmpipe->bcolor_slot[0] = -1; + llvmpipe->bcolor_slot[1] = -1; + + /* + * Match FS inputs against VS outputs, emitting the necessary + * attributes. Could cache these structs and look them up with a + * combination of fragment shader, vertex shader ids. + */ + + vinfo->num_attribs = 0; + + vs_index = draw_find_shader_output(llvmpipe->draw, + TGSI_SEMANTIC_POSITION, + 0); + + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index); + + for (i = 0; i < lpfs->info.base.num_inputs; i++) { + /* + * Search for each input in current vs output: + */ + + vs_index = draw_find_shader_output(llvmpipe->draw, + lpfs->info.base.input_semantic_name[i], + lpfs->info.base.input_semantic_index[i]); + + if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_COLOR && + lpfs->info.base.input_semantic_index[i] < 2) { + int idx = lpfs->info.base.input_semantic_index[i]; + llvmpipe->color_slot[idx] = (int)vinfo->num_attribs; + } + + if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_FACE) { + llvmpipe->face_slot = vinfo->num_attribs; + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index); + } else if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_PRIMID) { + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index); + } else { + /* + * Emit the requested fs attribute for all but position. + */ + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index); + } + } + /* Figure out if we need bcolor as well. + */ + for (i = 0; i < 2; i++) { + vs_index = draw_find_shader_output(llvmpipe->draw, + TGSI_SEMANTIC_BCOLOR, i); + + if (vs_index >= 0) { + llvmpipe->bcolor_slot[i] = (int)vinfo->num_attribs; + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index); + } + } + + + /* Figure out if we need pointsize as well. + */ + vs_index = draw_find_shader_output(llvmpipe->draw, + TGSI_SEMANTIC_PSIZE, 0); + + if (vs_index >= 0) { + llvmpipe->psize_slot = vinfo->num_attribs; + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index); + } + + /* Figure out if we need viewport index */ + vs_index = draw_find_shader_output(llvmpipe->draw, + TGSI_SEMANTIC_VIEWPORT_INDEX, + 0); + if (vs_index >= 0) { + llvmpipe->viewport_index_slot = vinfo->num_attribs; + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index); + } else { + llvmpipe->viewport_index_slot = 0; + } + + /* Figure out if we need layer */ + vs_index = draw_find_shader_output(llvmpipe->draw, + TGSI_SEMANTIC_LAYER, + 0); + if (vs_index >= 0) { + llvmpipe->layer_slot = vinfo->num_attribs; + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index); + } else { + llvmpipe->layer_slot = 0; + } + + draw_compute_vertex_size(vinfo); + lp_setup_set_vertex_info(llvmpipe->setup, vinfo); +} + + +/** + * Handle state changes. + * Called just prior to drawing anything (pipe::draw_arrays(), etc). + * + * Hopefully this will remain quite simple, otherwise need to pull in + * something like the state tracker mechanism. + */ +void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) +{ + struct llvmpipe_screen *lp_screen = llvmpipe_screen(llvmpipe->pipe.screen); + + /* Check for updated textures. + */ + if (llvmpipe->tex_timestamp != lp_screen->timestamp) { + llvmpipe->tex_timestamp = lp_screen->timestamp; + llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW; + } + + if (llvmpipe->dirty & (LP_NEW_RASTERIZER | + LP_NEW_FS | + LP_NEW_VS)) + compute_vertex_info( llvmpipe ); + + if (llvmpipe->dirty & (LP_NEW_FS | + LP_NEW_FRAMEBUFFER | + LP_NEW_BLEND | + LP_NEW_SCISSOR | + LP_NEW_DEPTH_STENCIL_ALPHA | + LP_NEW_RASTERIZER | + LP_NEW_SAMPLER | + LP_NEW_SAMPLER_VIEW | + LP_NEW_OCCLUSION_QUERY)) + llvmpipe_update_fs( llvmpipe ); + + if (llvmpipe->dirty & (LP_NEW_RASTERIZER)) { + boolean discard = + (llvmpipe->sample_mask & 1) == 0 || + (llvmpipe->rasterizer ? llvmpipe->rasterizer->rasterizer_discard : FALSE); + + lp_setup_set_rasterizer_discard(llvmpipe->setup, discard); + } + + if (llvmpipe->dirty & (LP_NEW_FS | + LP_NEW_FRAMEBUFFER | + LP_NEW_RASTERIZER)) + llvmpipe_update_setup( llvmpipe ); + + if (llvmpipe->dirty & LP_NEW_BLEND_COLOR) + lp_setup_set_blend_color(llvmpipe->setup, + &llvmpipe->blend_color); + + if (llvmpipe->dirty & LP_NEW_SCISSOR) + lp_setup_set_scissors(llvmpipe->setup, llvmpipe->scissors); + + if (llvmpipe->dirty & LP_NEW_DEPTH_STENCIL_ALPHA) { + lp_setup_set_alpha_ref_value(llvmpipe->setup, + llvmpipe->depth_stencil->alpha.ref_value); + lp_setup_set_stencil_ref_values(llvmpipe->setup, + llvmpipe->stencil_ref.ref_value); + } + + if (llvmpipe->dirty & LP_NEW_CONSTANTS) + lp_setup_set_fs_constants(llvmpipe->setup, + Elements(llvmpipe->constants[PIPE_SHADER_FRAGMENT]), + llvmpipe->constants[PIPE_SHADER_FRAGMENT]); + + if (llvmpipe->dirty & (LP_NEW_SAMPLER_VIEW)) + lp_setup_set_fragment_sampler_views(llvmpipe->setup, + llvmpipe->num_sampler_views[PIPE_SHADER_FRAGMENT], + llvmpipe->sampler_views[PIPE_SHADER_FRAGMENT]); + + if (llvmpipe->dirty & (LP_NEW_SAMPLER)) + lp_setup_set_fragment_sampler_state(llvmpipe->setup, + llvmpipe->num_samplers[PIPE_SHADER_FRAGMENT], + llvmpipe->samplers[PIPE_SHADER_FRAGMENT]); + + if (llvmpipe->dirty & LP_NEW_VIEWPORT) { + /* + * Update setup and fragment's view of the active viewport state. + * + * XXX TODO: It is possible to only loop over the active viewports + * instead of all viewports (PIPE_MAX_VIEWPORTS). + */ + lp_setup_set_viewports(llvmpipe->setup, + PIPE_MAX_VIEWPORTS, + llvmpipe->viewports); + } + + llvmpipe->dirty = 0; +} + diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c new file mode 100644 index 000000000..fd6c49aac --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -0,0 +1,3217 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Code generate the whole fragment pipeline. + * + * The fragment pipeline consists of the following stages: + * - early depth test + * - fragment shader + * - alpha test + * - depth/stencil test + * - blending + * + * This file has only the glue to assemble the fragment pipeline. The actual + * plumbing of converting Gallium state into LLVM IR is done elsewhere, in the + * lp_bld_*.[ch] files, and in a complete generic and reusable way. Here we + * muster the LLVM JIT execution engine to create a function that follows an + * established binary interface and that can be called from C directly. + * + * A big source of complexity here is that we often want to run different + * stages with different precisions and data types and precisions. For example, + * the fragment shader needs typically to be done in floats, but the + * depth/stencil test and blending is better done in the type that most closely + * matches the depth/stencil and color buffer respectively. + * + * Since the width of a SIMD vector register stays the same regardless of the + * element type, different types imply different number of elements, so we must + * code generate more instances of the stages with larger types to be able to + * feed/consume the stages with smaller types. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#include <limits.h> +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_pointer.h" +#include "util/u_format.h" +#include "util/u_dump.h" +#include "util/u_string.h" +#include "util/simple_list.h" +#include "util/u_dual_blend.h" +#include "os/os_time.h" +#include "pipe/p_shader_tokens.h" +#include "draw/draw_context.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_parse.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_conv.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_intr.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_tgsi.h" +#include "gallivm/lp_bld_swizzle.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_pack.h" +#include "gallivm/lp_bld_format.h" +#include "gallivm/lp_bld_quad.h" + +#include "lp_bld_alpha.h" +#include "lp_bld_blend.h" +#include "lp_bld_depth.h" +#include "lp_bld_interp.h" +#include "lp_context.h" +#include "lp_debug.h" +#include "lp_perf.h" +#include "lp_setup.h" +#include "lp_state.h" +#include "lp_tex_sample.h" +#include "lp_flush.h" +#include "lp_state_fs.h" +#include "lp_rast.h" + + +/** Fragment shader number (for debugging) */ +static unsigned fs_no = 0; + + +/** + * Expand the relevant bits of mask_input to a n*4-dword mask for the + * n*four pixels in n 2x2 quads. This will set the n*four elements of the + * quad mask vector to 0 or ~0. + * Grouping is 01, 23 for 2 quad mode hence only 0 and 2 are valid + * quad arguments with fs length 8. + * + * \param first_quad which quad(s) of the quad group to test, in [0,3] + * \param mask_input bitwise mask for the whole 4x4 stamp + */ +static LLVMValueRef +generate_quad_mask(struct gallivm_state *gallivm, + struct lp_type fs_type, + unsigned first_quad, + LLVMValueRef mask_input) /* int32 */ +{ + LLVMBuilderRef builder = gallivm->builder; + struct lp_type mask_type; + LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); + LLVMValueRef bits[16]; + LLVMValueRef mask; + int shift, i; + + /* + * XXX: We'll need a different path for 16 x u8 + */ + assert(fs_type.width == 32); + assert(fs_type.length <= Elements(bits)); + mask_type = lp_int_type(fs_type); + + /* + * mask_input >>= (quad * 4) + */ + switch (first_quad) { + case 0: + shift = 0; + break; + case 1: + assert(fs_type.length == 4); + shift = 2; + break; + case 2: + shift = 8; + break; + case 3: + assert(fs_type.length == 4); + shift = 10; + break; + default: + assert(0); + shift = 0; + } + + mask_input = LLVMBuildLShr(builder, + mask_input, + LLVMConstInt(i32t, shift, 0), + ""); + + /* + * mask = { mask_input & (1 << i), for i in [0,3] } + */ + mask = lp_build_broadcast(gallivm, + lp_build_vec_type(gallivm, mask_type), + mask_input); + + for (i = 0; i < fs_type.length / 4; i++) { + unsigned j = 2 * (i % 2) + (i / 2) * 8; + bits[4*i + 0] = LLVMConstInt(i32t, 1ULL << (j + 0), 0); + bits[4*i + 1] = LLVMConstInt(i32t, 1ULL << (j + 1), 0); + bits[4*i + 2] = LLVMConstInt(i32t, 1ULL << (j + 4), 0); + bits[4*i + 3] = LLVMConstInt(i32t, 1ULL << (j + 5), 0); + } + mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, fs_type.length), ""); + + /* + * mask = mask != 0 ? ~0 : 0 + */ + mask = lp_build_compare(gallivm, + mask_type, PIPE_FUNC_NOTEQUAL, + mask, + lp_build_const_int_vec(gallivm, mask_type, 0)); + + return mask; +} + + +#define EARLY_DEPTH_TEST 0x1 +#define LATE_DEPTH_TEST 0x2 +#define EARLY_DEPTH_WRITE 0x4 +#define LATE_DEPTH_WRITE 0x8 + +static int +find_output_by_semantic( const struct tgsi_shader_info *info, + unsigned semantic, + unsigned index ) +{ + int i; + + for (i = 0; i < info->num_outputs; i++) + if (info->output_semantic_name[i] == semantic && + info->output_semantic_index[i] == index) + return i; + + return -1; +} + + +/** + * Fetch the specified lp_jit_viewport structure for a given viewport_index. + */ +static LLVMValueRef +lp_llvm_viewport(LLVMValueRef context_ptr, + struct gallivm_state *gallivm, + LLVMValueRef viewport_index) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef ptr; + LLVMValueRef res; + struct lp_type viewport_type = + lp_type_float_vec(32, 32 * LP_JIT_VIEWPORT_NUM_FIELDS); + + ptr = lp_jit_context_viewports(gallivm, context_ptr); + ptr = LLVMBuildPointerCast(builder, ptr, + LLVMPointerType(lp_build_vec_type(gallivm, viewport_type), 0), ""); + + res = lp_build_pointer_get(builder, ptr, viewport_index); + + return res; +} + + +/** + * Generate the fragment shader, depth/stencil test, and alpha tests. + */ +static void +generate_fs_loop(struct gallivm_state *gallivm, + struct lp_fragment_shader *shader, + const struct lp_fragment_shader_variant_key *key, + LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef context_ptr, + LLVMValueRef num_loop, + struct lp_build_interp_soa_context *interp, + struct lp_build_sampler_soa *sampler, + LLVMValueRef mask_store, + LLVMValueRef (*out_color)[4], + LLVMValueRef depth_ptr, + LLVMValueRef depth_stride, + LLVMValueRef facing, + LLVMValueRef thread_data_ptr) +{ + const struct util_format_description *zs_format_desc = NULL; + const struct tgsi_token *tokens = shader->base.tokens; + struct lp_type int_type = lp_int_type(type); + LLVMTypeRef vec_type, int_vec_type; + LLVMValueRef mask_ptr, mask_val; + LLVMValueRef consts_ptr, num_consts_ptr; + LLVMValueRef z; + LLVMValueRef z_value, s_value; + LLVMValueRef z_fb, s_fb; + LLVMValueRef stencil_refs[2]; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; + struct lp_build_for_loop_state loop_state; + struct lp_build_mask_context mask; + /* + * TODO: figure out if simple_shader optimization is really worthwile to + * keep. Disabled because it may hide some real bugs in the (depth/stencil) + * code since tests tend to take another codepath than real shaders. + */ + boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] == 0 && + shader->info.base.num_inputs < 3 && + shader->info.base.num_instructions < 8) && 0; + const boolean dual_source_blend = key->blend.rt[0].blend_enable && + util_blend_state_is_dual(&key->blend, 0); + unsigned attrib; + unsigned chan; + unsigned cbuf; + unsigned depth_mode; + + struct lp_bld_tgsi_system_values system_values; + + memset(&system_values, 0, sizeof(system_values)); + + if (key->depth.enabled || + key->stencil[0].enabled) { + + zs_format_desc = util_format_description(key->zsbuf_format); + assert(zs_format_desc); + + if (!shader->info.base.writes_z && !shader->info.base.writes_stencil) { + if (key->alpha.enabled || + key->blend.alpha_to_coverage || + shader->info.base.uses_kill) { + /* With alpha test and kill, can do the depth test early + * and hopefully eliminate some quads. But need to do a + * special deferred depth write once the final mask value + * is known. This only works though if there's either no + * stencil test or the stencil value isn't written. + */ + if (key->stencil[0].enabled && (key->stencil[0].writemask || + (key->stencil[1].enabled && + key->stencil[1].writemask))) + depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE; + else + depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE; + } + else + depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE; + } + else { + depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE; + } + + if (!(key->depth.enabled && key->depth.writemask) && + !(key->stencil[0].enabled && (key->stencil[0].writemask || + (key->stencil[1].enabled && + key->stencil[1].writemask)))) + depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE); + } + else { + depth_mode = 0; + } + + vec_type = lp_build_vec_type(gallivm, type); + int_vec_type = lp_build_vec_type(gallivm, int_type); + + stencil_refs[0] = lp_jit_context_stencil_ref_front_value(gallivm, context_ptr); + stencil_refs[1] = lp_jit_context_stencil_ref_back_value(gallivm, context_ptr); + /* convert scalar stencil refs into vectors */ + stencil_refs[0] = lp_build_broadcast(gallivm, int_vec_type, stencil_refs[0]); + stencil_refs[1] = lp_build_broadcast(gallivm, int_vec_type, stencil_refs[1]); + + consts_ptr = lp_jit_context_constants(gallivm, context_ptr); + num_consts_ptr = lp_jit_context_num_constants(gallivm, context_ptr); + + lp_build_for_loop_begin(&loop_state, gallivm, + lp_build_const_int32(gallivm, 0), + LLVMIntULT, + num_loop, + lp_build_const_int32(gallivm, 1)); + + mask_ptr = LLVMBuildGEP(builder, mask_store, + &loop_state.counter, 1, "mask_ptr"); + mask_val = LLVMBuildLoad(builder, mask_ptr, ""); + + memset(outputs, 0, sizeof outputs); + + for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { + for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + out_color[cbuf][chan] = lp_build_array_alloca(gallivm, + lp_build_vec_type(gallivm, + type), + num_loop, "color"); + } + } + if (dual_source_blend) { + assert(key->nr_cbufs <= 1); + for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + out_color[1][chan] = lp_build_array_alloca(gallivm, + lp_build_vec_type(gallivm, + type), + num_loop, "color1"); + } + } + + + /* 'mask' will control execution based on quad's pixel alive/killed state */ + lp_build_mask_begin(&mask, gallivm, type, mask_val); + + if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader) + lp_build_mask_check(&mask); + + lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter); + z = interp->pos[2]; + + if (depth_mode & EARLY_DEPTH_TEST) { + lp_build_depth_stencil_load_swizzled(gallivm, type, + zs_format_desc, key->resource_1d, + depth_ptr, depth_stride, + &z_fb, &s_fb, loop_state.counter); + lp_build_depth_stencil_test(gallivm, + &key->depth, + key->stencil, + type, + zs_format_desc, + &mask, + stencil_refs, + z, z_fb, s_fb, + facing, + &z_value, &s_value, + !simple_shader); + + if (depth_mode & EARLY_DEPTH_WRITE) { + lp_build_depth_stencil_write_swizzled(gallivm, type, + zs_format_desc, key->resource_1d, + NULL, NULL, NULL, loop_state.counter, + depth_ptr, depth_stride, + z_value, s_value); + } + /* + * Note mask check if stencil is enabled must be after ds write not after + * stencil test otherwise new stencil values may not get written if all + * fragments got killed by depth/stencil test. + */ + if (!simple_shader && key->stencil[0].enabled) + lp_build_mask_check(&mask); + } + + lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter); + + /* Build the actual shader */ + lp_build_tgsi_soa(gallivm, tokens, type, &mask, + consts_ptr, num_consts_ptr, &system_values, + interp->inputs, + outputs, context_ptr, + sampler, &shader->info.base, NULL); + + /* Alpha test */ + if (key->alpha.enabled) { + int color0 = find_output_by_semantic(&shader->info.base, + TGSI_SEMANTIC_COLOR, + 0); + + if (color0 != -1 && outputs[color0][3]) { + const struct util_format_description *cbuf_format_desc; + LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha"); + LLVMValueRef alpha_ref_value; + + alpha_ref_value = lp_jit_context_alpha_ref_value(gallivm, context_ptr); + alpha_ref_value = lp_build_broadcast(gallivm, vec_type, alpha_ref_value); + + cbuf_format_desc = util_format_description(key->cbuf_format[0]); + + lp_build_alpha_test(gallivm, key->alpha.func, type, cbuf_format_desc, + &mask, alpha, alpha_ref_value, + (depth_mode & LATE_DEPTH_TEST) != 0); + } + } + + /* Emulate Alpha to Coverage with Alpha test */ + if (key->blend.alpha_to_coverage) { + int color0 = find_output_by_semantic(&shader->info.base, + TGSI_SEMANTIC_COLOR, + 0); + + if (color0 != -1 && outputs[color0][3]) { + LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha"); + + lp_build_alpha_to_coverage(gallivm, type, + &mask, alpha, + (depth_mode & LATE_DEPTH_TEST) != 0); + } + } + + /* Late Z test */ + if (depth_mode & LATE_DEPTH_TEST) { + int pos0 = find_output_by_semantic(&shader->info.base, + TGSI_SEMANTIC_POSITION, + 0); + int s_out = find_output_by_semantic(&shader->info.base, + TGSI_SEMANTIC_STENCIL, + 0); + if (pos0 != -1 && outputs[pos0][2]) { + z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z"); + + /* + * Clamp according to ARB_depth_clamp semantics. + */ + if (key->depth_clamp) { + LLVMValueRef viewport, min_depth, max_depth; + LLVMValueRef viewport_index; + struct lp_build_context f32_bld; + + assert(type.floating); + lp_build_context_init(&f32_bld, gallivm, type); + + /* + * Assumes clamping of the viewport index will occur in setup/gs. Value + * is passed through the rasterization stage via lp_rast_shader_inputs. + * + * See: draw_clamp_viewport_idx and lp_clamp_viewport_idx for clamping + * semantics. + */ + viewport_index = lp_jit_thread_data_raster_state_viewport_index(gallivm, + thread_data_ptr); + + /* + * Load the min and max depth from the lp_jit_context.viewports + * array of lp_jit_viewport structures. + */ + viewport = lp_llvm_viewport(context_ptr, gallivm, viewport_index); + + /* viewports[viewport_index].min_depth */ + min_depth = LLVMBuildExtractElement(builder, viewport, + lp_build_const_int32(gallivm, LP_JIT_VIEWPORT_MIN_DEPTH), + ""); + min_depth = lp_build_broadcast_scalar(&f32_bld, min_depth); + + /* viewports[viewport_index].max_depth */ + max_depth = LLVMBuildExtractElement(builder, viewport, + lp_build_const_int32(gallivm, LP_JIT_VIEWPORT_MAX_DEPTH), + ""); + max_depth = lp_build_broadcast_scalar(&f32_bld, max_depth); + + /* + * Clamp to the min and max depth values for the given viewport. + */ + z = lp_build_clamp(&f32_bld, z, min_depth, max_depth); + } + } + + if (s_out != -1 && outputs[s_out][1]) { + /* there's only one value, and spec says to discard additional bits */ + LLVMValueRef s_max_mask = lp_build_const_int_vec(gallivm, int_type, 255); + stencil_refs[0] = LLVMBuildLoad(builder, outputs[s_out][1], "output.s"); + stencil_refs[0] = LLVMBuildBitCast(builder, stencil_refs[0], int_vec_type, ""); + stencil_refs[0] = LLVMBuildAnd(builder, stencil_refs[0], s_max_mask, ""); + stencil_refs[1] = stencil_refs[0]; + } + + lp_build_depth_stencil_load_swizzled(gallivm, type, + zs_format_desc, key->resource_1d, + depth_ptr, depth_stride, + &z_fb, &s_fb, loop_state.counter); + + lp_build_depth_stencil_test(gallivm, + &key->depth, + key->stencil, + type, + zs_format_desc, + &mask, + stencil_refs, + z, z_fb, s_fb, + facing, + &z_value, &s_value, + !simple_shader); + /* Late Z write */ + if (depth_mode & LATE_DEPTH_WRITE) { + lp_build_depth_stencil_write_swizzled(gallivm, type, + zs_format_desc, key->resource_1d, + NULL, NULL, NULL, loop_state.counter, + depth_ptr, depth_stride, + z_value, s_value); + } + } + else if ((depth_mode & EARLY_DEPTH_TEST) && + (depth_mode & LATE_DEPTH_WRITE)) + { + /* Need to apply a reduced mask to the depth write. Reload the + * depth value, update from zs_value with the new mask value and + * write that out. + */ + lp_build_depth_stencil_write_swizzled(gallivm, type, + zs_format_desc, key->resource_1d, + &mask, z_fb, s_fb, loop_state.counter, + depth_ptr, depth_stride, + z_value, s_value); + } + + + /* Color write */ + for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib) + { + unsigned cbuf = shader->info.base.output_semantic_index[attrib]; + if ((shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) && + ((cbuf < key->nr_cbufs) || (cbuf == 1 && dual_source_blend))) + { + for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + if(outputs[attrib][chan]) { + /* XXX: just initialize outputs to point at colors[] and + * skip this. + */ + LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); + LLVMValueRef color_ptr; + color_ptr = LLVMBuildGEP(builder, out_color[cbuf][chan], + &loop_state.counter, 1, ""); + lp_build_name(out, "color%u.%c", attrib, "rgba"[chan]); + LLVMBuildStore(builder, out, color_ptr); + } + } + } + } + + if (key->occlusion_count) { + LLVMValueRef counter = lp_jit_thread_data_counter(gallivm, thread_data_ptr); + lp_build_name(counter, "counter"); + lp_build_occlusion_count(gallivm, type, + lp_build_mask_value(&mask), counter); + } + + mask_val = lp_build_mask_end(&mask); + LLVMBuildStore(builder, mask_val, mask_ptr); + lp_build_for_loop_end(&loop_state); +} + + +/** + * This function will reorder pixels from the fragment shader SoA to memory layout AoS + * + * Fragment Shader outputs pixels in small 2x2 blocks + * e.g. (0, 0), (1, 0), (0, 1), (1, 1) ; (2, 0) ... + * + * However in memory pixels are stored in rows + * e.g. (0, 0), (1, 0), (2, 0), (3, 0) ; (0, 1) ... + * + * @param type fragment shader type (4x or 8x float) + * @param num_fs number of fs_src + * @param is_1d whether we're outputting to a 1d resource + * @param dst_channels number of output channels + * @param fs_src output from fragment shader + * @param dst pointer to store result + * @param pad_inline is channel padding inline or at end of row + * @return the number of dsts + */ +static int +generate_fs_twiddle(struct gallivm_state *gallivm, + struct lp_type type, + unsigned num_fs, + unsigned dst_channels, + LLVMValueRef fs_src[][4], + LLVMValueRef* dst, + bool pad_inline) +{ + LLVMValueRef src[16]; + + bool swizzle_pad; + bool twiddle; + bool split; + + unsigned pixels = type.length / 4; + unsigned reorder_group; + unsigned src_channels; + unsigned src_count; + unsigned i; + + src_channels = dst_channels < 3 ? dst_channels : 4; + src_count = num_fs * src_channels; + + assert(pixels == 2 || pixels == 1); + assert(num_fs * src_channels <= Elements(src)); + + /* + * Transpose from SoA -> AoS + */ + for (i = 0; i < num_fs; ++i) { + lp_build_transpose_aos_n(gallivm, type, &fs_src[i][0], src_channels, &src[i * src_channels]); + } + + /* + * Pick transformation options + */ + swizzle_pad = false; + twiddle = false; + split = false; + reorder_group = 0; + + if (dst_channels == 1) { + twiddle = true; + + if (pixels == 2) { + split = true; + } + } else if (dst_channels == 2) { + if (pixels == 1) { + reorder_group = 1; + } + } else if (dst_channels > 2) { + if (pixels == 1) { + reorder_group = 2; + } else { + twiddle = true; + } + + if (!pad_inline && dst_channels == 3 && pixels > 1) { + swizzle_pad = true; + } + } + + /* + * Split the src in half + */ + if (split) { + for (i = num_fs; i > 0; --i) { + src[(i - 1)*2 + 1] = lp_build_extract_range(gallivm, src[i - 1], 4, 4); + src[(i - 1)*2 + 0] = lp_build_extract_range(gallivm, src[i - 1], 0, 4); + } + + src_count *= 2; + type.length = 4; + } + + /* + * Ensure pixels are in memory order + */ + if (reorder_group) { + /* Twiddle pixels by reordering the array, e.g.: + * + * src_count = 8 -> 0 2 1 3 4 6 5 7 + * src_count = 16 -> 0 1 4 5 2 3 6 7 8 9 12 13 10 11 14 15 + */ + const unsigned reorder_sw[] = { 0, 2, 1, 3 }; + + for (i = 0; i < src_count; ++i) { + unsigned group = i / reorder_group; + unsigned block = (group / 4) * 4 * reorder_group; + unsigned j = block + (reorder_sw[group % 4] * reorder_group) + (i % reorder_group); + dst[i] = src[j]; + } + } else if (twiddle) { + /* Twiddle pixels across elements of array */ + lp_bld_quad_twiddle(gallivm, type, src, src_count, dst); + } else { + /* Do nothing */ + memcpy(dst, src, sizeof(LLVMValueRef) * src_count); + } + + /* + * Moves any padding between pixels to the end + * e.g. RGBXRGBX -> RGBRGBXX + */ + if (swizzle_pad) { + unsigned char swizzles[16]; + unsigned elems = pixels * dst_channels; + + for (i = 0; i < type.length; ++i) { + if (i < elems) + swizzles[i] = i % dst_channels + (i / dst_channels) * 4; + else + swizzles[i] = LP_BLD_SWIZZLE_DONTCARE; + } + + for (i = 0; i < src_count; ++i) { + dst[i] = lp_build_swizzle_aos_n(gallivm, dst[i], swizzles, type.length, type.length); + } + } + + return src_count; +} + + +/** + * Load an unswizzled block of pixels from memory + */ +static void +load_unswizzled_block(struct gallivm_state *gallivm, + LLVMValueRef base_ptr, + LLVMValueRef stride, + unsigned block_width, + unsigned block_height, + LLVMValueRef* dst, + struct lp_type dst_type, + unsigned dst_count, + unsigned dst_alignment) +{ + LLVMBuilderRef builder = gallivm->builder; + unsigned row_size = dst_count / block_height; + unsigned i; + + /* Ensure block exactly fits into dst */ + assert((block_width * block_height) % dst_count == 0); + + for (i = 0; i < dst_count; ++i) { + unsigned x = i % row_size; + unsigned y = i / row_size; + + LLVMValueRef bx = lp_build_const_int32(gallivm, x * (dst_type.width / 8) * dst_type.length); + LLVMValueRef by = LLVMBuildMul(builder, lp_build_const_int32(gallivm, y), stride, ""); + + LLVMValueRef gep[2]; + LLVMValueRef dst_ptr; + + gep[0] = lp_build_const_int32(gallivm, 0); + gep[1] = LLVMBuildAdd(builder, bx, by, ""); + + dst_ptr = LLVMBuildGEP(builder, base_ptr, gep, 2, ""); + dst_ptr = LLVMBuildBitCast(builder, dst_ptr, LLVMPointerType(lp_build_vec_type(gallivm, dst_type), 0), ""); + + dst[i] = LLVMBuildLoad(builder, dst_ptr, ""); + + lp_set_load_alignment(dst[i], dst_alignment); + } +} + + +/** + * Store an unswizzled block of pixels to memory + */ +static void +store_unswizzled_block(struct gallivm_state *gallivm, + LLVMValueRef base_ptr, + LLVMValueRef stride, + unsigned block_width, + unsigned block_height, + LLVMValueRef* src, + struct lp_type src_type, + unsigned src_count, + unsigned src_alignment) +{ + LLVMBuilderRef builder = gallivm->builder; + unsigned row_size = src_count / block_height; + unsigned i; + + /* Ensure src exactly fits into block */ + assert((block_width * block_height) % src_count == 0); + + for (i = 0; i < src_count; ++i) { + unsigned x = i % row_size; + unsigned y = i / row_size; + + LLVMValueRef bx = lp_build_const_int32(gallivm, x * (src_type.width / 8) * src_type.length); + LLVMValueRef by = LLVMBuildMul(builder, lp_build_const_int32(gallivm, y), stride, ""); + + LLVMValueRef gep[2]; + LLVMValueRef src_ptr; + + gep[0] = lp_build_const_int32(gallivm, 0); + gep[1] = LLVMBuildAdd(builder, bx, by, ""); + + src_ptr = LLVMBuildGEP(builder, base_ptr, gep, 2, ""); + src_ptr = LLVMBuildBitCast(builder, src_ptr, LLVMPointerType(lp_build_vec_type(gallivm, src_type), 0), ""); + + src_ptr = LLVMBuildStore(builder, src[i], src_ptr); + + lp_set_store_alignment(src_ptr, src_alignment); + } +} + + +/** + * Checks if a format description is an arithmetic format + * + * A format which has irregular channel sizes such as R3_G3_B2 or R5_G6_B5. + */ +static inline boolean +is_arithmetic_format(const struct util_format_description *format_desc) +{ + boolean arith = false; + unsigned i; + + for (i = 0; i < format_desc->nr_channels; ++i) { + arith |= format_desc->channel[i].size != format_desc->channel[0].size; + arith |= (format_desc->channel[i].size % 8) != 0; + } + + return arith; +} + + +/** + * Checks if this format requires special handling due to required expansion + * to floats for blending, and furthermore has "natural" packed AoS -> unpacked + * SoA conversion. + */ +static inline boolean +format_expands_to_float_soa(const struct util_format_description *format_desc) +{ + if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT || + format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { + return true; + } + return false; +} + + +/** + * Retrieves the type representing the memory layout for a format + * + * e.g. RGBA16F = 4x half-float and R3G3B2 = 1x byte + */ +static inline void +lp_mem_type_from_format_desc(const struct util_format_description *format_desc, + struct lp_type* type) +{ + unsigned i; + unsigned chan; + + if (format_expands_to_float_soa(format_desc)) { + /* just make this a uint with width of block */ + type->floating = false; + type->fixed = false; + type->sign = false; + type->norm = false; + type->width = format_desc->block.bits; + type->length = 1; + return; + } + + for (i = 0; i < 4; i++) + if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) + break; + chan = i; + + memset(type, 0, sizeof(struct lp_type)); + type->floating = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT; + type->fixed = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FIXED; + type->sign = format_desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED; + type->norm = format_desc->channel[chan].normalized; + + if (is_arithmetic_format(format_desc)) { + type->width = 0; + type->length = 1; + + for (i = 0; i < format_desc->nr_channels; ++i) { + type->width += format_desc->channel[i].size; + } + } else { + type->width = format_desc->channel[chan].size; + type->length = format_desc->nr_channels; + } +} + + +/** + * Retrieves the type for a format which is usable in the blending code. + * + * e.g. RGBA16F = 4x float, R3G3B2 = 3x byte + */ +static inline void +lp_blend_type_from_format_desc(const struct util_format_description *format_desc, + struct lp_type* type) +{ + unsigned i; + unsigned chan; + + if (format_expands_to_float_soa(format_desc)) { + /* always use ordinary floats for blending */ + type->floating = true; + type->fixed = false; + type->sign = true; + type->norm = false; + type->width = 32; + type->length = 4; + return; + } + + for (i = 0; i < 4; i++) + if (format_desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) + break; + chan = i; + + memset(type, 0, sizeof(struct lp_type)); + type->floating = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT; + type->fixed = format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FIXED; + type->sign = format_desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED; + type->norm = format_desc->channel[chan].normalized; + type->width = format_desc->channel[chan].size; + type->length = format_desc->nr_channels; + + for (i = 1; i < format_desc->nr_channels; ++i) { + if (format_desc->channel[i].size > type->width) + type->width = format_desc->channel[i].size; + } + + if (type->floating) { + type->width = 32; + } else { + if (type->width <= 8) { + type->width = 8; + } else if (type->width <= 16) { + type->width = 16; + } else { + type->width = 32; + } + } + + if (is_arithmetic_format(format_desc) && type->length == 3) { + type->length = 4; + } +} + + +/** + * Scale a normalized value from src_bits to dst_bits. + * + * The exact calculation is + * + * dst = iround(src * dst_mask / src_mask) + * + * or with integer rounding + * + * dst = src * (2*dst_mask + sign(src)*src_mask) / (2*src_mask) + * + * where + * + * src_mask = (1 << src_bits) - 1 + * dst_mask = (1 << dst_bits) - 1 + * + * but we try to avoid division and multiplication through shifts. + */ +static inline LLVMValueRef +scale_bits(struct gallivm_state *gallivm, + int src_bits, + int dst_bits, + LLVMValueRef src, + struct lp_type src_type) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef result = src; + + if (dst_bits < src_bits) { + int delta_bits = src_bits - dst_bits; + + if (delta_bits <= dst_bits) { + /* + * Approximate the rescaling with a single shift. + * + * This gives the wrong rounding. + */ + + result = LLVMBuildLShr(builder, + src, + lp_build_const_int_vec(gallivm, src_type, delta_bits), + ""); + + } else { + /* + * Try more accurate rescaling. + */ + + /* + * Drop the least significant bits to make space for the multiplication. + * + * XXX: A better approach would be to use a wider integer type as intermediate. But + * this is enough to convert alpha from 16bits -> 2 when rendering to + * PIPE_FORMAT_R10G10B10A2_UNORM. + */ + result = LLVMBuildLShr(builder, + src, + lp_build_const_int_vec(gallivm, src_type, dst_bits), + ""); + + + result = LLVMBuildMul(builder, + result, + lp_build_const_int_vec(gallivm, src_type, (1LL << dst_bits) - 1), + ""); + + /* + * Add a rounding term before the division. + * + * TODO: Handle signed integers too. + */ + if (!src_type.sign) { + result = LLVMBuildAdd(builder, + result, + lp_build_const_int_vec(gallivm, src_type, (1LL << (delta_bits - 1))), + ""); + } + + /* + * Approximate the division by src_mask with a src_bits shift. + * + * Given the src has already been shifted by dst_bits, all we need + * to do is to shift by the difference. + */ + + result = LLVMBuildLShr(builder, + result, + lp_build_const_int_vec(gallivm, src_type, delta_bits), + ""); + } + + } else if (dst_bits > src_bits) { + /* Scale up bits */ + int db = dst_bits - src_bits; + + /* Shift left by difference in bits */ + result = LLVMBuildShl(builder, + src, + lp_build_const_int_vec(gallivm, src_type, db), + ""); + + if (db < src_bits) { + /* Enough bits in src to fill the remainder */ + LLVMValueRef lower = LLVMBuildLShr(builder, + src, + lp_build_const_int_vec(gallivm, src_type, src_bits - db), + ""); + + result = LLVMBuildOr(builder, result, lower, ""); + } else if (db > src_bits) { + /* Need to repeatedly copy src bits to fill remainder in dst */ + unsigned n; + + for (n = src_bits; n < dst_bits; n *= 2) { + LLVMValueRef shuv = lp_build_const_int_vec(gallivm, src_type, n); + + result = LLVMBuildOr(builder, + result, + LLVMBuildLShr(builder, result, shuv, ""), + ""); + } + } + } + + return result; +} + +/** + * If RT is a smallfloat (needing denorms) format + */ +static inline int +have_smallfloat_format(struct lp_type dst_type, + enum pipe_format format) +{ + return ((dst_type.floating && dst_type.width != 32) || + /* due to format handling hacks this format doesn't have floating set + * here (and actually has width set to 32 too) so special case this. */ + (format == PIPE_FORMAT_R11G11B10_FLOAT)); +} + + +/** + * Convert from memory format to blending format + * + * e.g. GL_R3G3B2 is 1 byte in memory but 3 bytes for blending + */ +static void +convert_to_blend_type(struct gallivm_state *gallivm, + unsigned block_size, + const struct util_format_description *src_fmt, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef* src, // and dst + unsigned num_srcs) +{ + LLVMValueRef *dst = src; + LLVMBuilderRef builder = gallivm->builder; + struct lp_type blend_type; + struct lp_type mem_type; + unsigned i, j, k; + unsigned pixels = block_size / num_srcs; + bool is_arith; + + /* + * full custom path for packed floats and srgb formats - none of the later + * functions would do anything useful, and given the lp_type representation they + * can't be fixed. Should really have some SoA blend path for these kind of + * formats rather than hacking them in here. + */ + if (format_expands_to_float_soa(src_fmt)) { + LLVMValueRef tmpsrc[4]; + /* + * This is pretty suboptimal for this case blending in SoA would be much + * better, since conversion gets us SoA values so need to convert back. + */ + assert(src_type.width == 32 || src_type.width == 16); + assert(dst_type.floating); + assert(dst_type.width == 32); + assert(dst_type.length % 4 == 0); + assert(num_srcs % 4 == 0); + + if (src_type.width == 16) { + /* expand 4x16bit values to 4x32bit */ + struct lp_type type32x4 = src_type; + LLVMTypeRef ltype32x4; + unsigned num_fetch = dst_type.length == 8 ? num_srcs / 2 : num_srcs / 4; + type32x4.width = 32; + ltype32x4 = lp_build_vec_type(gallivm, type32x4); + for (i = 0; i < num_fetch; i++) { + src[i] = LLVMBuildZExt(builder, src[i], ltype32x4, ""); + } + src_type.width = 32; + } + for (i = 0; i < 4; i++) { + tmpsrc[i] = src[i]; + } + for (i = 0; i < num_srcs / 4; i++) { + LLVMValueRef tmpsoa[4]; + LLVMValueRef tmps = tmpsrc[i]; + if (dst_type.length == 8) { + LLVMValueRef shuffles[8]; + unsigned j; + /* fetch was 4 values but need 8-wide output values */ + tmps = lp_build_concat(gallivm, &tmpsrc[i * 2], src_type, 2); + /* + * for 8-wide aos transpose would give us wrong order not matching + * incoming converted fs values and mask. ARGH. + */ + for (j = 0; j < 4; j++) { + shuffles[j] = lp_build_const_int32(gallivm, j * 2); + shuffles[j + 4] = lp_build_const_int32(gallivm, j * 2 + 1); + } + tmps = LLVMBuildShuffleVector(builder, tmps, tmps, + LLVMConstVector(shuffles, 8), ""); + } + if (src_fmt->format == PIPE_FORMAT_R11G11B10_FLOAT) { + lp_build_r11g11b10_to_float(gallivm, tmps, tmpsoa); + } + else { + lp_build_unpack_rgba_soa(gallivm, src_fmt, dst_type, tmps, tmpsoa); + } + lp_build_transpose_aos(gallivm, dst_type, tmpsoa, &src[i * 4]); + } + return; + } + + lp_mem_type_from_format_desc(src_fmt, &mem_type); + lp_blend_type_from_format_desc(src_fmt, &blend_type); + + /* Is the format arithmetic */ + is_arith = blend_type.length * blend_type.width != mem_type.width * mem_type.length; + is_arith &= !(mem_type.width == 16 && mem_type.floating); + + /* Pad if necessary */ + if (!is_arith && src_type.length < dst_type.length) { + for (i = 0; i < num_srcs; ++i) { + dst[i] = lp_build_pad_vector(gallivm, src[i], dst_type.length); + } + + src_type.length = dst_type.length; + } + + /* Special case for half-floats */ + if (mem_type.width == 16 && mem_type.floating) { + assert(blend_type.width == 32 && blend_type.floating); + lp_build_conv_auto(gallivm, src_type, &dst_type, dst, num_srcs, dst); + is_arith = false; + } + + if (!is_arith) { + return; + } + + src_type.width = blend_type.width * blend_type.length; + blend_type.length *= pixels; + src_type.length *= pixels / (src_type.length / mem_type.length); + + for (i = 0; i < num_srcs; ++i) { + LLVMValueRef chans[4]; + LLVMValueRef res = NULL; + + dst[i] = LLVMBuildZExt(builder, src[i], lp_build_vec_type(gallivm, src_type), ""); + + for (j = 0; j < src_fmt->nr_channels; ++j) { + unsigned mask = 0; + unsigned sa = src_fmt->channel[j].shift; +#ifdef PIPE_ARCH_LITTLE_ENDIAN + unsigned from_lsb = j; +#else + unsigned from_lsb = src_fmt->nr_channels - j - 1; +#endif + + for (k = 0; k < src_fmt->channel[j].size; ++k) { + mask |= 1 << k; + } + + /* Extract bits from source */ + chans[j] = LLVMBuildLShr(builder, + dst[i], + lp_build_const_int_vec(gallivm, src_type, sa), + ""); + + chans[j] = LLVMBuildAnd(builder, + chans[j], + lp_build_const_int_vec(gallivm, src_type, mask), + ""); + + /* Scale bits */ + if (src_type.norm) { + chans[j] = scale_bits(gallivm, src_fmt->channel[j].size, + blend_type.width, chans[j], src_type); + } + + /* Insert bits into correct position */ + chans[j] = LLVMBuildShl(builder, + chans[j], + lp_build_const_int_vec(gallivm, src_type, from_lsb * blend_type.width), + ""); + + if (j == 0) { + res = chans[j]; + } else { + res = LLVMBuildOr(builder, res, chans[j], ""); + } + } + + dst[i] = LLVMBuildBitCast(builder, res, lp_build_vec_type(gallivm, blend_type), ""); + } +} + + +/** + * Convert from blending format to memory format + * + * e.g. GL_R3G3B2 is 3 bytes for blending but 1 byte in memory + */ +static void +convert_from_blend_type(struct gallivm_state *gallivm, + unsigned block_size, + const struct util_format_description *src_fmt, + struct lp_type src_type, + struct lp_type dst_type, + LLVMValueRef* src, // and dst + unsigned num_srcs) +{ + LLVMValueRef* dst = src; + unsigned i, j, k; + struct lp_type mem_type; + struct lp_type blend_type; + LLVMBuilderRef builder = gallivm->builder; + unsigned pixels = block_size / num_srcs; + bool is_arith; + + /* + * full custom path for packed floats and srgb formats - none of the later + * functions would do anything useful, and given the lp_type representation they + * can't be fixed. Should really have some SoA blend path for these kind of + * formats rather than hacking them in here. + */ + if (format_expands_to_float_soa(src_fmt)) { + /* + * This is pretty suboptimal for this case blending in SoA would be much + * better - we need to transpose the AoS values back to SoA values for + * conversion/packing. + */ + assert(src_type.floating); + assert(src_type.width == 32); + assert(src_type.length % 4 == 0); + assert(dst_type.width == 32 || dst_type.width == 16); + + for (i = 0; i < num_srcs / 4; i++) { + LLVMValueRef tmpsoa[4], tmpdst; + lp_build_transpose_aos(gallivm, src_type, &src[i * 4], tmpsoa); + /* really really need SoA here */ + + if (src_fmt->format == PIPE_FORMAT_R11G11B10_FLOAT) { + tmpdst = lp_build_float_to_r11g11b10(gallivm, tmpsoa); + } + else { + tmpdst = lp_build_float_to_srgb_packed(gallivm, src_fmt, + src_type, tmpsoa); + } + + if (src_type.length == 8) { + LLVMValueRef tmpaos, shuffles[8]; + unsigned j; + /* + * for 8-wide aos transpose has given us wrong order not matching + * output order. HMPF. Also need to split the output values manually. + */ + for (j = 0; j < 4; j++) { + shuffles[j * 2] = lp_build_const_int32(gallivm, j); + shuffles[j * 2 + 1] = lp_build_const_int32(gallivm, j + 4); + } + tmpaos = LLVMBuildShuffleVector(builder, tmpdst, tmpdst, + LLVMConstVector(shuffles, 8), ""); + src[i * 2] = lp_build_extract_range(gallivm, tmpaos, 0, 4); + src[i * 2 + 1] = lp_build_extract_range(gallivm, tmpaos, 4, 4); + } + else { + src[i] = tmpdst; + } + } + if (dst_type.width == 16) { + struct lp_type type16x8 = dst_type; + struct lp_type type32x4 = dst_type; + LLVMTypeRef ltype16x4, ltypei64, ltypei128; + unsigned num_fetch = src_type.length == 8 ? num_srcs / 2 : num_srcs / 4; + type16x8.length = 8; + type32x4.width = 32; + ltypei128 = LLVMIntTypeInContext(gallivm->context, 128); + ltypei64 = LLVMIntTypeInContext(gallivm->context, 64); + ltype16x4 = lp_build_vec_type(gallivm, dst_type); + /* We could do vector truncation but it doesn't generate very good code */ + for (i = 0; i < num_fetch; i++) { + src[i] = lp_build_pack2(gallivm, type32x4, type16x8, + src[i], lp_build_zero(gallivm, type32x4)); + src[i] = LLVMBuildBitCast(builder, src[i], ltypei128, ""); + src[i] = LLVMBuildTrunc(builder, src[i], ltypei64, ""); + src[i] = LLVMBuildBitCast(builder, src[i], ltype16x4, ""); + } + } + return; + } + + lp_mem_type_from_format_desc(src_fmt, &mem_type); + lp_blend_type_from_format_desc(src_fmt, &blend_type); + + is_arith = (blend_type.length * blend_type.width != mem_type.width * mem_type.length); + + /* Special case for half-floats */ + if (mem_type.width == 16 && mem_type.floating) { + int length = dst_type.length; + assert(blend_type.width == 32 && blend_type.floating); + + dst_type.length = src_type.length; + + lp_build_conv_auto(gallivm, src_type, &dst_type, dst, num_srcs, dst); + + dst_type.length = length; + is_arith = false; + } + + /* Remove any padding */ + if (!is_arith && (src_type.length % mem_type.length)) { + src_type.length -= (src_type.length % mem_type.length); + + for (i = 0; i < num_srcs; ++i) { + dst[i] = lp_build_extract_range(gallivm, dst[i], 0, src_type.length); + } + } + + /* No bit arithmetic to do */ + if (!is_arith) { + return; + } + + src_type.length = pixels; + src_type.width = blend_type.length * blend_type.width; + dst_type.length = pixels; + + for (i = 0; i < num_srcs; ++i) { + LLVMValueRef chans[4]; + LLVMValueRef res = NULL; + + dst[i] = LLVMBuildBitCast(builder, src[i], lp_build_vec_type(gallivm, src_type), ""); + + for (j = 0; j < src_fmt->nr_channels; ++j) { + unsigned mask = 0; + unsigned sa = src_fmt->channel[j].shift; +#ifdef PIPE_ARCH_LITTLE_ENDIAN + unsigned from_lsb = j; +#else + unsigned from_lsb = src_fmt->nr_channels - j - 1; +#endif + + assert(blend_type.width > src_fmt->channel[j].size); + + for (k = 0; k < blend_type.width; ++k) { + mask |= 1 << k; + } + + /* Extract bits */ + chans[j] = LLVMBuildLShr(builder, + dst[i], + lp_build_const_int_vec(gallivm, src_type, from_lsb * blend_type.width), + ""); + + chans[j] = LLVMBuildAnd(builder, + chans[j], + lp_build_const_int_vec(gallivm, src_type, mask), + ""); + + /* Scale down bits */ + if (src_type.norm) { + chans[j] = scale_bits(gallivm, blend_type.width, + src_fmt->channel[j].size, chans[j], src_type); + } + + /* Insert bits */ + chans[j] = LLVMBuildShl(builder, + chans[j], + lp_build_const_int_vec(gallivm, src_type, sa), + ""); + + sa += src_fmt->channel[j].size; + + if (j == 0) { + res = chans[j]; + } else { + res = LLVMBuildOr(builder, res, chans[j], ""); + } + } + + assert (dst_type.width != 24); + + dst[i] = LLVMBuildTrunc(builder, res, lp_build_vec_type(gallivm, dst_type), ""); + } +} + + +/** + * Convert alpha to same blend type as src + */ +static void +convert_alpha(struct gallivm_state *gallivm, + struct lp_type row_type, + struct lp_type alpha_type, + const unsigned block_size, + const unsigned block_height, + const unsigned src_count, + const unsigned dst_channels, + const bool pad_inline, + LLVMValueRef* src_alpha) +{ + LLVMBuilderRef builder = gallivm->builder; + unsigned i, j; + unsigned length = row_type.length; + row_type.length = alpha_type.length; + + /* Twiddle the alpha to match pixels */ + lp_bld_quad_twiddle(gallivm, alpha_type, src_alpha, block_height, src_alpha); + + /* + * TODO this should use single lp_build_conv call for + * src_count == 1 && dst_channels == 1 case (dropping the concat below) + */ + for (i = 0; i < block_height; ++i) { + lp_build_conv(gallivm, alpha_type, row_type, &src_alpha[i], 1, &src_alpha[i], 1); + } + + alpha_type = row_type; + row_type.length = length; + + /* If only one channel we can only need the single alpha value per pixel */ + if (src_count == 1 && dst_channels == 1) { + + lp_build_concat_n(gallivm, alpha_type, src_alpha, block_height, src_alpha, src_count); + } else { + /* If there are more srcs than rows then we need to split alpha up */ + if (src_count > block_height) { + for (i = src_count; i > 0; --i) { + unsigned pixels = block_size / src_count; + unsigned idx = i - 1; + + src_alpha[idx] = lp_build_extract_range(gallivm, src_alpha[(idx * pixels) / 4], + (idx * pixels) % 4, pixels); + } + } + + /* If there is a src for each pixel broadcast the alpha across whole row */ + if (src_count == block_size) { + for (i = 0; i < src_count; ++i) { + src_alpha[i] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, row_type), src_alpha[i]); + } + } else { + unsigned pixels = block_size / src_count; + unsigned channels = pad_inline ? TGSI_NUM_CHANNELS : dst_channels; + unsigned alpha_span = 1; + LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; + + /* Check if we need 2 src_alphas for our shuffles */ + if (pixels > alpha_type.length) { + alpha_span = 2; + } + + /* Broadcast alpha across all channels, e.g. a1a2 to a1a1a1a1a2a2a2a2 */ + for (j = 0; j < row_type.length; ++j) { + if (j < pixels * channels) { + shuffles[j] = lp_build_const_int32(gallivm, j / channels); + } else { + shuffles[j] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); + } + } + + for (i = 0; i < src_count; ++i) { + unsigned idx1 = i, idx2 = i; + + if (alpha_span > 1){ + idx1 *= alpha_span; + idx2 = idx1 + 1; + } + + src_alpha[i] = LLVMBuildShuffleVector(builder, + src_alpha[idx1], + src_alpha[idx2], + LLVMConstVector(shuffles, row_type.length), + ""); + } + } + } +} + + +/** + * Generates the blend function for unswizzled colour buffers + * Also generates the read & write from colour buffer + */ +static void +generate_unswizzled_blend(struct gallivm_state *gallivm, + unsigned rt, + struct lp_fragment_shader_variant *variant, + enum pipe_format out_format, + unsigned int num_fs, + struct lp_type fs_type, + LLVMValueRef* fs_mask, + LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][4], + LLVMValueRef context_ptr, + LLVMValueRef color_ptr, + LLVMValueRef stride, + unsigned partial_mask, + boolean do_branch) +{ + const unsigned alpha_channel = 3; + const unsigned block_width = LP_RASTER_BLOCK_SIZE; + const unsigned block_height = LP_RASTER_BLOCK_SIZE; + const unsigned block_size = block_width * block_height; + const unsigned lp_integer_vector_width = 128; + + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef fs_src[4][TGSI_NUM_CHANNELS]; + LLVMValueRef fs_src1[4][TGSI_NUM_CHANNELS]; + LLVMValueRef src_alpha[4 * 4]; + LLVMValueRef src1_alpha[4 * 4]; + LLVMValueRef src_mask[4 * 4]; + LLVMValueRef src[4 * 4]; + LLVMValueRef src1[4 * 4]; + LLVMValueRef dst[4 * 4]; + LLVMValueRef blend_color; + LLVMValueRef blend_alpha; + LLVMValueRef i32_zero; + LLVMValueRef check_mask; + LLVMValueRef undef_src_val; + + struct lp_build_mask_context mask_ctx; + struct lp_type mask_type; + struct lp_type blend_type; + struct lp_type row_type; + struct lp_type dst_type; + + unsigned char swizzle[TGSI_NUM_CHANNELS]; + unsigned vector_width; + unsigned src_channels = TGSI_NUM_CHANNELS; + unsigned dst_channels; + unsigned dst_count; + unsigned src_count; + unsigned i, j; + + const struct util_format_description* out_format_desc = util_format_description(out_format); + + unsigned dst_alignment; + + bool pad_inline = is_arithmetic_format(out_format_desc); + bool has_alpha = false; + const boolean dual_source_blend = variant->key.blend.rt[0].blend_enable && + util_blend_state_is_dual(&variant->key.blend, 0); + + const boolean is_1d = variant->key.resource_1d; + unsigned num_fullblock_fs = is_1d ? 2 * num_fs : num_fs; + LLVMValueRef fpstate = 0; + + /* Get type from output format */ + lp_blend_type_from_format_desc(out_format_desc, &row_type); + lp_mem_type_from_format_desc(out_format_desc, &dst_type); + + /* + * Technically this code should go into lp_build_smallfloat_to_float + * and lp_build_float_to_smallfloat but due to the + * http://llvm.org/bugs/show_bug.cgi?id=6393 + * llvm reorders the mxcsr intrinsics in a way that breaks the code. + * So the ordering is important here and there shouldn't be any + * llvm ir instrunctions in this function before + * this, otherwise half-float format conversions won't work + * (again due to llvm bug #6393). + */ + if (have_smallfloat_format(dst_type, out_format)) { + /* We need to make sure that denorms are ok for half float + conversions */ + fpstate = lp_build_fpstate_get(gallivm); + lp_build_fpstate_set_denorms_zero(gallivm, FALSE); + } + + mask_type = lp_int32_vec4_type(); + mask_type.length = fs_type.length; + + for (i = num_fs; i < num_fullblock_fs; i++) { + fs_mask[i] = lp_build_zero(gallivm, mask_type); + } + + /* Do not bother executing code when mask is empty.. */ + if (do_branch) { + check_mask = LLVMConstNull(lp_build_int_vec_type(gallivm, mask_type)); + + for (i = 0; i < num_fullblock_fs; ++i) { + check_mask = LLVMBuildOr(builder, check_mask, fs_mask[i], ""); + } + + lp_build_mask_begin(&mask_ctx, gallivm, mask_type, check_mask); + lp_build_mask_check(&mask_ctx); + } + + partial_mask |= !variant->opaque; + i32_zero = lp_build_const_int32(gallivm, 0); + + undef_src_val = lp_build_undef(gallivm, fs_type); + + row_type.length = fs_type.length; + vector_width = dst_type.floating ? lp_native_vector_width : lp_integer_vector_width; + + /* Compute correct swizzle and count channels */ + memset(swizzle, LP_BLD_SWIZZLE_DONTCARE, TGSI_NUM_CHANNELS); + dst_channels = 0; + + for (i = 0; i < TGSI_NUM_CHANNELS; ++i) { + /* Ensure channel is used */ + if (out_format_desc->swizzle[i] >= TGSI_NUM_CHANNELS) { + continue; + } + + /* Ensure not already written to (happens in case with GL_ALPHA) */ + if (swizzle[out_format_desc->swizzle[i]] < TGSI_NUM_CHANNELS) { + continue; + } + + /* Ensure we havn't already found all channels */ + if (dst_channels >= out_format_desc->nr_channels) { + continue; + } + + swizzle[out_format_desc->swizzle[i]] = i; + ++dst_channels; + + if (i == alpha_channel) { + has_alpha = true; + } + } + + if (format_expands_to_float_soa(out_format_desc)) { + /* + * the code above can't work for layout_other + * for srgb it would sort of work but we short-circuit swizzles, etc. + * as that is done as part of unpack / pack. + */ + dst_channels = 4; /* HACK: this is fake 4 really but need it due to transpose stuff later */ + has_alpha = true; + swizzle[0] = 0; + swizzle[1] = 1; + swizzle[2] = 2; + swizzle[3] = 3; + pad_inline = true; /* HACK: prevent rgbxrgbx->rgbrgbxx conversion later */ + } + + /* If 3 channels then pad to include alpha for 4 element transpose */ + if (dst_channels == 3 && !has_alpha) { + for (i = 0; i < TGSI_NUM_CHANNELS; i++) { + if (swizzle[i] > TGSI_NUM_CHANNELS) + swizzle[i] = 3; + } + if (out_format_desc->nr_channels == 4) { + dst_channels = 4; + } + } + + /* + * Load shader output + */ + for (i = 0; i < num_fullblock_fs; ++i) { + /* Always load alpha for use in blending */ + LLVMValueRef alpha; + if (i < num_fs) { + alpha = LLVMBuildLoad(builder, fs_out_color[rt][alpha_channel][i], ""); + } + else { + alpha = undef_src_val; + } + + /* Load each channel */ + for (j = 0; j < dst_channels; ++j) { + assert(swizzle[j] < 4); + if (i < num_fs) { + fs_src[i][j] = LLVMBuildLoad(builder, fs_out_color[rt][swizzle[j]][i], ""); + } + else { + fs_src[i][j] = undef_src_val; + } + } + + /* If 3 channels then pad to include alpha for 4 element transpose */ + /* + * XXX If we include that here maybe could actually use it instead of + * separate alpha for blending? + */ + if (dst_channels == 3 && !has_alpha) { + fs_src[i][3] = alpha; + } + + /* We split the row_mask and row_alpha as we want 128bit interleave */ + if (fs_type.length == 8) { + src_mask[i*2 + 0] = lp_build_extract_range(gallivm, fs_mask[i], 0, src_channels); + src_mask[i*2 + 1] = lp_build_extract_range(gallivm, fs_mask[i], src_channels, src_channels); + + src_alpha[i*2 + 0] = lp_build_extract_range(gallivm, alpha, 0, src_channels); + src_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha, src_channels, src_channels); + } else { + src_mask[i] = fs_mask[i]; + src_alpha[i] = alpha; + } + } + if (dual_source_blend) { + /* same as above except different src/dst, skip masks and comments... */ + for (i = 0; i < num_fullblock_fs; ++i) { + LLVMValueRef alpha; + if (i < num_fs) { + alpha = LLVMBuildLoad(builder, fs_out_color[1][alpha_channel][i], ""); + } + else { + alpha = undef_src_val; + } + + for (j = 0; j < dst_channels; ++j) { + assert(swizzle[j] < 4); + if (i < num_fs) { + fs_src1[i][j] = LLVMBuildLoad(builder, fs_out_color[1][swizzle[j]][i], ""); + } + else { + fs_src1[i][j] = undef_src_val; + } + } + if (dst_channels == 3 && !has_alpha) { + fs_src1[i][3] = alpha; + } + if (fs_type.length == 8) { + src1_alpha[i*2 + 0] = lp_build_extract_range(gallivm, alpha, 0, src_channels); + src1_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha, src_channels, src_channels); + } else { + src1_alpha[i] = alpha; + } + } + } + + if (util_format_is_pure_integer(out_format)) { + /* + * In this case fs_type was really ints or uints disguised as floats, + * fix that up now. + */ + fs_type.floating = 0; + fs_type.sign = dst_type.sign; + for (i = 0; i < num_fullblock_fs; ++i) { + for (j = 0; j < dst_channels; ++j) { + fs_src[i][j] = LLVMBuildBitCast(builder, fs_src[i][j], + lp_build_vec_type(gallivm, fs_type), ""); + } + if (dst_channels == 3 && !has_alpha) { + fs_src[i][3] = LLVMBuildBitCast(builder, fs_src[i][3], + lp_build_vec_type(gallivm, fs_type), ""); + } + } + } + + /* + * Pixel twiddle from fragment shader order to memory order + */ + src_count = generate_fs_twiddle(gallivm, fs_type, num_fullblock_fs, + dst_channels, fs_src, src, pad_inline); + if (dual_source_blend) { + generate_fs_twiddle(gallivm, fs_type, num_fullblock_fs, dst_channels, + fs_src1, src1, pad_inline); + } + + src_channels = dst_channels < 3 ? dst_channels : 4; + if (src_count != num_fullblock_fs * src_channels) { + unsigned ds = src_count / (num_fullblock_fs * src_channels); + row_type.length /= ds; + fs_type.length = row_type.length; + } + + blend_type = row_type; + mask_type.length = 4; + + /* Convert src to row_type */ + if (dual_source_blend) { + struct lp_type old_row_type = row_type; + lp_build_conv_auto(gallivm, fs_type, &row_type, src, src_count, src); + src_count = lp_build_conv_auto(gallivm, fs_type, &old_row_type, src1, src_count, src1); + } + else { + src_count = lp_build_conv_auto(gallivm, fs_type, &row_type, src, src_count, src); + } + + /* If the rows are not an SSE vector, combine them to become SSE size! */ + if ((row_type.width * row_type.length) % 128) { + unsigned bits = row_type.width * row_type.length; + unsigned combined; + + assert(src_count >= (vector_width / bits)); + + dst_count = src_count / (vector_width / bits); + + combined = lp_build_concat_n(gallivm, row_type, src, src_count, src, dst_count); + if (dual_source_blend) { + lp_build_concat_n(gallivm, row_type, src1, src_count, src1, dst_count); + } + + row_type.length *= combined; + src_count /= combined; + + bits = row_type.width * row_type.length; + assert(bits == 128 || bits == 256); + } + + + /* + * Blend Colour conversion + */ + blend_color = lp_jit_context_f_blend_color(gallivm, context_ptr); + blend_color = LLVMBuildPointerCast(builder, blend_color, LLVMPointerType(lp_build_vec_type(gallivm, fs_type), 0), ""); + blend_color = LLVMBuildLoad(builder, LLVMBuildGEP(builder, blend_color, &i32_zero, 1, ""), ""); + + /* Convert */ + lp_build_conv(gallivm, fs_type, blend_type, &blend_color, 1, &blend_color, 1); + + if (out_format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { + /* + * since blending is done with floats, there was no conversion. + * However, the rules according to fixed point renderbuffers still + * apply, that is we must clamp inputs to 0.0/1.0. + * (This would apply to separate alpha conversion too but we currently + * force has_alpha to be true.) + * TODO: should skip this with "fake" blend, since post-blend conversion + * will clamp anyway. + * TODO: could also skip this if fragment color clamping is enabled. We + * don't support it natively so it gets baked into the shader however, so + * can't really tell here. + */ + struct lp_build_context f32_bld; + assert(row_type.floating); + lp_build_context_init(&f32_bld, gallivm, row_type); + for (i = 0; i < src_count; i++) { + src[i] = lp_build_clamp_zero_one_nanzero(&f32_bld, src[i]); + } + if (dual_source_blend) { + for (i = 0; i < src_count; i++) { + src1[i] = lp_build_clamp_zero_one_nanzero(&f32_bld, src1[i]); + } + } + /* probably can't be different than row_type but better safe than sorry... */ + lp_build_context_init(&f32_bld, gallivm, blend_type); + blend_color = lp_build_clamp(&f32_bld, blend_color, f32_bld.zero, f32_bld.one); + } + + /* Extract alpha */ + blend_alpha = lp_build_extract_broadcast(gallivm, blend_type, row_type, blend_color, lp_build_const_int32(gallivm, 3)); + + /* Swizzle to appropriate channels, e.g. from RGBA to BGRA BGRA */ + pad_inline &= (dst_channels * (block_size / src_count) * row_type.width) != vector_width; + if (pad_inline) { + /* Use all 4 channels e.g. from RGBA RGBA to RGxx RGxx */ + blend_color = lp_build_swizzle_aos_n(gallivm, blend_color, swizzle, TGSI_NUM_CHANNELS, row_type.length); + } else { + /* Only use dst_channels e.g. RGBA RGBA to RG RG xxxx */ + blend_color = lp_build_swizzle_aos_n(gallivm, blend_color, swizzle, dst_channels, row_type.length); + } + + /* + * Mask conversion + */ + lp_bld_quad_twiddle(gallivm, mask_type, &src_mask[0], block_height, &src_mask[0]); + + if (src_count < block_height) { + lp_build_concat_n(gallivm, mask_type, src_mask, 4, src_mask, src_count); + } else if (src_count > block_height) { + for (i = src_count; i > 0; --i) { + unsigned pixels = block_size / src_count; + unsigned idx = i - 1; + + src_mask[idx] = lp_build_extract_range(gallivm, src_mask[(idx * pixels) / 4], + (idx * pixels) % 4, pixels); + } + } + + assert(mask_type.width == 32); + + for (i = 0; i < src_count; ++i) { + unsigned pixels = block_size / src_count; + unsigned pixel_width = row_type.width * dst_channels; + + if (pixel_width == 24) { + mask_type.width = 8; + mask_type.length = vector_width / mask_type.width; + } else { + mask_type.length = pixels; + mask_type.width = row_type.width * dst_channels; + + src_mask[i] = LLVMBuildIntCast(builder, src_mask[i], lp_build_int_vec_type(gallivm, mask_type), ""); + + mask_type.length *= dst_channels; + mask_type.width /= dst_channels; + } + + src_mask[i] = LLVMBuildBitCast(builder, src_mask[i], lp_build_int_vec_type(gallivm, mask_type), ""); + src_mask[i] = lp_build_pad_vector(gallivm, src_mask[i], row_type.length); + } + + /* + * Alpha conversion + */ + if (!has_alpha) { + struct lp_type alpha_type = fs_type; + alpha_type.length = 4; + convert_alpha(gallivm, row_type, alpha_type, + block_size, block_height, + src_count, dst_channels, + pad_inline, src_alpha); + if (dual_source_blend) { + convert_alpha(gallivm, row_type, alpha_type, + block_size, block_height, + src_count, dst_channels, + pad_inline, src1_alpha); + } + } + + + /* + * Load dst from memory + */ + if (src_count < block_height) { + dst_count = block_height; + } else { + dst_count = src_count; + } + + dst_type.length *= block_size / dst_count; + + if (format_expands_to_float_soa(out_format_desc)) { + /* + * we need multiple values at once for the conversion, so can as well + * load them vectorized here too instead of concatenating later. + * (Still need concatenation later for 8-wide vectors). + */ + dst_count = block_height; + dst_type.length = block_width; + } + + /* + * Compute the alignment of the destination pointer in bytes + * We fetch 1-4 pixels, if the format has pot alignment then those fetches + * are always aligned by MIN2(16, fetch_width) except for buffers (not + * 1d tex but can't distinguish here) so need to stick with per-pixel + * alignment in this case. + */ + if (is_1d) { + dst_alignment = (out_format_desc->block.bits + 7)/(out_format_desc->block.width * 8); + } + else { + dst_alignment = dst_type.length * dst_type.width / 8; + } + /* Force power-of-two alignment by extracting only the least-significant-bit */ + dst_alignment = 1 << (ffs(dst_alignment) - 1); + /* + * Resource base and stride pointers are aligned to 16 bytes, so that's + * the maximum alignment we can guarantee + */ + dst_alignment = MIN2(16, dst_alignment); + + if (is_1d) { + load_unswizzled_block(gallivm, color_ptr, stride, block_width, 1, + dst, dst_type, dst_count / 4, dst_alignment); + for (i = dst_count / 4; i < dst_count; i++) { + dst[i] = lp_build_undef(gallivm, dst_type); + } + + } + else { + load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height, + dst, dst_type, dst_count, dst_alignment); + } + + + /* + * Convert from dst/output format to src/blending format. + * + * This is necessary as we can only read 1 row from memory at a time, + * so the minimum dst_count will ever be at this point is 4. + * + * With, for example, R8 format you can have all 16 pixels in a 128 bit vector, + * this will take the 4 dsts and combine them into 1 src so we can perform blending + * on all 16 pixels in that single vector at once. + */ + if (dst_count > src_count) { + lp_build_concat_n(gallivm, dst_type, dst, 4, dst, src_count); + } + + /* + * Blending + */ + /* XXX this is broken for RGB8 formats - + * they get expanded from 12 to 16 elements (to include alpha) + * by convert_to_blend_type then reduced to 15 instead of 12 + * by convert_from_blend_type (a simple fix though breaks A8...). + * R16G16B16 also crashes differently however something going wrong + * inside llvm handling npot vector sizes seemingly. + * It seems some cleanup could be done here (like skipping conversion/blend + * when not needed). + */ + convert_to_blend_type(gallivm, block_size, out_format_desc, dst_type, row_type, dst, src_count); + + /* + * FIXME: Really should get logic ops / masks out of generic blend / row + * format. Logic ops will definitely not work on the blend float format + * used for SRGB here and I think OpenGL expects this to work as expected + * (that is incoming values converted to srgb then logic op applied). + */ + for (i = 0; i < src_count; ++i) { + dst[i] = lp_build_blend_aos(gallivm, + &variant->key.blend, + out_format, + row_type, + rt, + src[i], + has_alpha ? NULL : src_alpha[i], + src1[i], + has_alpha ? NULL : src1_alpha[i], + dst[i], + partial_mask ? src_mask[i] : NULL, + blend_color, + has_alpha ? NULL : blend_alpha, + swizzle, + pad_inline ? 4 : dst_channels); + } + + convert_from_blend_type(gallivm, block_size, out_format_desc, row_type, dst_type, dst, src_count); + + /* Split the blend rows back to memory rows */ + if (dst_count > src_count) { + row_type.length = dst_type.length * (dst_count / src_count); + + if (src_count == 1) { + dst[1] = lp_build_extract_range(gallivm, dst[0], row_type.length / 2, row_type.length / 2); + dst[0] = lp_build_extract_range(gallivm, dst[0], 0, row_type.length / 2); + + row_type.length /= 2; + src_count *= 2; + } + + dst[3] = lp_build_extract_range(gallivm, dst[1], row_type.length / 2, row_type.length / 2); + dst[2] = lp_build_extract_range(gallivm, dst[1], 0, row_type.length / 2); + dst[1] = lp_build_extract_range(gallivm, dst[0], row_type.length / 2, row_type.length / 2); + dst[0] = lp_build_extract_range(gallivm, dst[0], 0, row_type.length / 2); + + row_type.length /= 2; + src_count *= 2; + } + + /* + * Store blend result to memory + */ + if (is_1d) { + store_unswizzled_block(gallivm, color_ptr, stride, block_width, 1, + dst, dst_type, dst_count / 4, dst_alignment); + } + else { + store_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height, + dst, dst_type, dst_count, dst_alignment); + } + + if (have_smallfloat_format(dst_type, out_format)) { + lp_build_fpstate_set(gallivm, fpstate); + } + + if (do_branch) { + lp_build_mask_end(&mask_ctx); + } +} + + +/** + * Generate the runtime callable function for the whole fragment pipeline. + * Note that the function which we generate operates on a block of 16 + * pixels at at time. The block contains 2x2 quads. Each quad contains + * 2x2 pixels. + */ +static void +generate_fragment(struct llvmpipe_context *lp, + struct lp_fragment_shader *shader, + struct lp_fragment_shader_variant *variant, + unsigned partial_mask) +{ + struct gallivm_state *gallivm = variant->gallivm; + const struct lp_fragment_shader_variant_key *key = &variant->key; + struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; + char func_name[64]; + struct lp_type fs_type; + struct lp_type blend_type; + LLVMTypeRef fs_elem_type; + LLVMTypeRef blend_vec_type; + LLVMTypeRef arg_types[13]; + LLVMTypeRef func_type; + LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context); + LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context); + LLVMValueRef context_ptr; + LLVMValueRef x; + LLVMValueRef y; + LLVMValueRef a0_ptr; + LLVMValueRef dadx_ptr; + LLVMValueRef dady_ptr; + LLVMValueRef color_ptr_ptr; + LLVMValueRef stride_ptr; + LLVMValueRef depth_ptr; + LLVMValueRef depth_stride; + LLVMValueRef mask_input; + LLVMValueRef thread_data_ptr; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + struct lp_build_sampler_soa *sampler; + struct lp_build_interp_soa_context interp; + LLVMValueRef fs_mask[16 / 4]; + LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4]; + LLVMValueRef function; + LLVMValueRef facing; + unsigned num_fs; + unsigned i; + unsigned chan; + unsigned cbuf; + boolean cbuf0_write_all; + const boolean dual_source_blend = key->blend.rt[0].blend_enable && + util_blend_state_is_dual(&key->blend, 0); + + assert(lp_native_vector_width / 32 >= 4); + + /* Adjust color input interpolation according to flatshade state: + */ + memcpy(inputs, shader->inputs, shader->info.base.num_inputs * sizeof inputs[0]); + for (i = 0; i < shader->info.base.num_inputs; i++) { + if (inputs[i].interp == LP_INTERP_COLOR) { + if (key->flatshade) + inputs[i].interp = LP_INTERP_CONSTANT; + else + inputs[i].interp = LP_INTERP_PERSPECTIVE; + } + } + + /* check if writes to cbuf[0] are to be copied to all cbufs */ + cbuf0_write_all = + shader->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]; + + /* TODO: actually pick these based on the fs and color buffer + * characteristics. */ + + memset(&fs_type, 0, sizeof fs_type); + fs_type.floating = TRUE; /* floating point values */ + fs_type.sign = TRUE; /* values are signed */ + fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ + fs_type.width = 32; /* 32-bit float */ + fs_type.length = MIN2(lp_native_vector_width / 32, 16); /* n*4 elements per vector */ + + memset(&blend_type, 0, sizeof blend_type); + blend_type.floating = FALSE; /* values are integers */ + blend_type.sign = FALSE; /* values are unsigned */ + blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */ + blend_type.width = 8; /* 8-bit ubyte values */ + blend_type.length = 16; /* 16 elements per vector */ + + /* + * Generate the function prototype. Any change here must be reflected in + * lp_jit.h's lp_jit_frag_func function pointer type, and vice-versa. + */ + + fs_elem_type = lp_build_elem_type(gallivm, fs_type); + + blend_vec_type = lp_build_vec_type(gallivm, blend_type); + + util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s", + shader->no, variant->no, partial_mask ? "partial" : "whole"); + + arg_types[0] = variant->jit_context_ptr_type; /* context */ + arg_types[1] = int32_type; /* x */ + arg_types[2] = int32_type; /* y */ + arg_types[3] = int32_type; /* facing */ + arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* a0 */ + arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dadx */ + arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */ + arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ + arg_types[8] = LLVMPointerType(int8_type, 0); /* depth */ + arg_types[9] = int32_type; /* mask_input */ + arg_types[10] = variant->jit_thread_data_ptr_type; /* per thread data */ + arg_types[11] = LLVMPointerType(int32_type, 0); /* stride */ + arg_types[12] = int32_type; /* depth_stride */ + + func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), + arg_types, Elements(arg_types), 0); + + function = LLVMAddFunction(gallivm->module, func_name, func_type); + LLVMSetFunctionCallConv(function, LLVMCCallConv); + + variant->function[partial_mask] = function; + + /* XXX: need to propagate noalias down into color param now we are + * passing a pointer-to-pointer? + */ + for(i = 0; i < Elements(arg_types); ++i) + if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute); + + context_ptr = LLVMGetParam(function, 0); + x = LLVMGetParam(function, 1); + y = LLVMGetParam(function, 2); + facing = LLVMGetParam(function, 3); + a0_ptr = LLVMGetParam(function, 4); + dadx_ptr = LLVMGetParam(function, 5); + dady_ptr = LLVMGetParam(function, 6); + color_ptr_ptr = LLVMGetParam(function, 7); + depth_ptr = LLVMGetParam(function, 8); + mask_input = LLVMGetParam(function, 9); + thread_data_ptr = LLVMGetParam(function, 10); + stride_ptr = LLVMGetParam(function, 11); + depth_stride = LLVMGetParam(function, 12); + + lp_build_name(context_ptr, "context"); + lp_build_name(x, "x"); + lp_build_name(y, "y"); + lp_build_name(a0_ptr, "a0"); + lp_build_name(dadx_ptr, "dadx"); + lp_build_name(dady_ptr, "dady"); + lp_build_name(color_ptr_ptr, "color_ptr_ptr"); + lp_build_name(depth_ptr, "depth"); + lp_build_name(thread_data_ptr, "thread_data"); + lp_build_name(mask_input, "mask_input"); + lp_build_name(stride_ptr, "stride_ptr"); + lp_build_name(depth_stride, "depth_stride"); + + /* + * Function body + */ + + block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry"); + builder = gallivm->builder; + assert(builder); + LLVMPositionBuilderAtEnd(builder, block); + + /* code generated texture sampling */ + sampler = lp_llvm_sampler_soa_create(key->state); + + num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */ + /* for 1d resources only run "upper half" of stamp */ + if (key->resource_1d) + num_fs /= 2; + + { + LLVMValueRef num_loop = lp_build_const_int32(gallivm, num_fs); + LLVMTypeRef mask_type = lp_build_int_vec_type(gallivm, fs_type); + LLVMValueRef mask_store = lp_build_array_alloca(gallivm, mask_type, + num_loop, "mask_store"); + LLVMValueRef color_store[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS]; + boolean pixel_center_integer = + shader->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER]; + + /* + * The shader input interpolation info is not explicitely baked in the + * shader key, but everything it derives from (TGSI, and flatshade) is + * already included in the shader key. + */ + lp_build_interp_soa_init(&interp, + gallivm, + shader->info.base.num_inputs, + inputs, + pixel_center_integer, + builder, fs_type, + a0_ptr, dadx_ptr, dady_ptr, + x, y); + + for (i = 0; i < num_fs; i++) { + LLVMValueRef mask; + LLVMValueRef indexi = lp_build_const_int32(gallivm, i); + LLVMValueRef mask_ptr = LLVMBuildGEP(builder, mask_store, + &indexi, 1, "mask_ptr"); + + if (partial_mask) { + mask = generate_quad_mask(gallivm, fs_type, + i*fs_type.length/4, mask_input); + } + else { + mask = lp_build_const_int_vec(gallivm, fs_type, ~0); + } + LLVMBuildStore(builder, mask, mask_ptr); + } + + generate_fs_loop(gallivm, + shader, key, + builder, + fs_type, + context_ptr, + num_loop, + &interp, + sampler, + mask_store, /* output */ + color_store, + depth_ptr, + depth_stride, + facing, + thread_data_ptr); + + for (i = 0; i < num_fs; i++) { + LLVMValueRef indexi = lp_build_const_int32(gallivm, i); + LLVMValueRef ptr = LLVMBuildGEP(builder, mask_store, + &indexi, 1, ""); + fs_mask[i] = LLVMBuildLoad(builder, ptr, "mask"); + /* This is fucked up need to reorganize things */ + for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + ptr = LLVMBuildGEP(builder, + color_store[cbuf * !cbuf0_write_all][chan], + &indexi, 1, ""); + fs_out_color[cbuf][chan][i] = ptr; + } + } + if (dual_source_blend) { + /* only support one dual source blend target hence always use output 1 */ + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + ptr = LLVMBuildGEP(builder, + color_store[1][chan], + &indexi, 1, ""); + fs_out_color[1][chan][i] = ptr; + } + } + } + } + + sampler->destroy(sampler); + + /* Loop over color outputs / color buffers to do blending. + */ + for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { + if (key->cbuf_format[cbuf] != PIPE_FORMAT_NONE) { + LLVMValueRef color_ptr; + LLVMValueRef stride; + LLVMValueRef index = lp_build_const_int32(gallivm, cbuf); + + boolean do_branch = ((key->depth.enabled + || key->stencil[0].enabled + || key->alpha.enabled) + && !shader->info.base.uses_kill); + + color_ptr = LLVMBuildLoad(builder, + LLVMBuildGEP(builder, color_ptr_ptr, + &index, 1, ""), + ""); + + lp_build_name(color_ptr, "color_ptr%d", cbuf); + + stride = LLVMBuildLoad(builder, + LLVMBuildGEP(builder, stride_ptr, &index, 1, ""), + ""); + + generate_unswizzled_blend(gallivm, cbuf, variant, + key->cbuf_format[cbuf], + num_fs, fs_type, fs_mask, fs_out_color, + context_ptr, color_ptr, stride, + partial_mask, do_branch); + } + } + + LLVMBuildRetVoid(builder); + + gallivm_verify_function(gallivm, function); +} + + +static void +dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key) +{ + unsigned i; + + debug_printf("fs variant %p:\n", (void *) key); + + if (key->flatshade) { + debug_printf("flatshade = 1\n"); + } + for (i = 0; i < key->nr_cbufs; ++i) { + debug_printf("cbuf_format[%u] = %s\n", i, util_format_name(key->cbuf_format[i])); + } + if (key->depth.enabled) { + debug_printf("depth.format = %s\n", util_format_name(key->zsbuf_format)); + debug_printf("depth.func = %s\n", util_dump_func(key->depth.func, TRUE)); + debug_printf("depth.writemask = %u\n", key->depth.writemask); + } + + for (i = 0; i < 2; ++i) { + if (key->stencil[i].enabled) { + debug_printf("stencil[%u].func = %s\n", i, util_dump_func(key->stencil[i].func, TRUE)); + debug_printf("stencil[%u].fail_op = %s\n", i, util_dump_stencil_op(key->stencil[i].fail_op, TRUE)); + debug_printf("stencil[%u].zpass_op = %s\n", i, util_dump_stencil_op(key->stencil[i].zpass_op, TRUE)); + debug_printf("stencil[%u].zfail_op = %s\n", i, util_dump_stencil_op(key->stencil[i].zfail_op, TRUE)); + debug_printf("stencil[%u].valuemask = 0x%x\n", i, key->stencil[i].valuemask); + debug_printf("stencil[%u].writemask = 0x%x\n", i, key->stencil[i].writemask); + } + } + + if (key->alpha.enabled) { + debug_printf("alpha.func = %s\n", util_dump_func(key->alpha.func, TRUE)); + } + + if (key->occlusion_count) { + debug_printf("occlusion_count = 1\n"); + } + + if (key->blend.logicop_enable) { + debug_printf("blend.logicop_func = %s\n", util_dump_logicop(key->blend.logicop_func, TRUE)); + } + else if (key->blend.rt[0].blend_enable) { + debug_printf("blend.rgb_func = %s\n", util_dump_blend_func (key->blend.rt[0].rgb_func, TRUE)); + debug_printf("blend.rgb_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].rgb_src_factor, TRUE)); + debug_printf("blend.rgb_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].rgb_dst_factor, TRUE)); + debug_printf("blend.alpha_func = %s\n", util_dump_blend_func (key->blend.rt[0].alpha_func, TRUE)); + debug_printf("blend.alpha_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_src_factor, TRUE)); + debug_printf("blend.alpha_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE)); + } + debug_printf("blend.colormask = 0x%x\n", key->blend.rt[0].colormask); + if (key->blend.alpha_to_coverage) { + debug_printf("blend.alpha_to_coverage is enabled\n"); + } + for (i = 0; i < key->nr_samplers; ++i) { + const struct lp_static_sampler_state *sampler = &key->state[i].sampler_state; + debug_printf("sampler[%u] = \n", i); + debug_printf(" .wrap = %s %s %s\n", + util_dump_tex_wrap(sampler->wrap_s, TRUE), + util_dump_tex_wrap(sampler->wrap_t, TRUE), + util_dump_tex_wrap(sampler->wrap_r, TRUE)); + debug_printf(" .min_img_filter = %s\n", + util_dump_tex_filter(sampler->min_img_filter, TRUE)); + debug_printf(" .min_mip_filter = %s\n", + util_dump_tex_mipfilter(sampler->min_mip_filter, TRUE)); + debug_printf(" .mag_img_filter = %s\n", + util_dump_tex_filter(sampler->mag_img_filter, TRUE)); + if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) + debug_printf(" .compare_func = %s\n", util_dump_func(sampler->compare_func, TRUE)); + debug_printf(" .normalized_coords = %u\n", sampler->normalized_coords); + debug_printf(" .min_max_lod_equal = %u\n", sampler->min_max_lod_equal); + debug_printf(" .lod_bias_non_zero = %u\n", sampler->lod_bias_non_zero); + debug_printf(" .apply_min_lod = %u\n", sampler->apply_min_lod); + debug_printf(" .apply_max_lod = %u\n", sampler->apply_max_lod); + } + for (i = 0; i < key->nr_sampler_views; ++i) { + const struct lp_static_texture_state *texture = &key->state[i].texture_state; + debug_printf("texture[%u] = \n", i); + debug_printf(" .format = %s\n", + util_format_name(texture->format)); + debug_printf(" .target = %s\n", + util_dump_tex_target(texture->target, TRUE)); + debug_printf(" .level_zero_only = %u\n", + texture->level_zero_only); + debug_printf(" .pot = %u %u %u\n", + texture->pot_width, + texture->pot_height, + texture->pot_depth); + } +} + + +void +lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant) +{ + debug_printf("llvmpipe: Fragment shader #%u variant #%u:\n", + variant->shader->no, variant->no); + tgsi_dump(variant->shader->base.tokens, 0); + dump_fs_variant_key(&variant->key); + debug_printf("variant->opaque = %u\n", variant->opaque); + debug_printf("\n"); +} + + +/** + * Generate a new fragment shader variant from the shader code and + * other state indicated by the key. + */ +static struct lp_fragment_shader_variant * +generate_variant(struct llvmpipe_context *lp, + struct lp_fragment_shader *shader, + const struct lp_fragment_shader_variant_key *key) +{ + struct lp_fragment_shader_variant *variant; + const struct util_format_description *cbuf0_format_desc; + boolean fullcolormask; + char module_name[64]; + + variant = CALLOC_STRUCT(lp_fragment_shader_variant); + if(!variant) + return NULL; + + util_snprintf(module_name, sizeof(module_name), "fs%u_variant%u", + shader->no, shader->variants_created); + + variant->gallivm = gallivm_create(module_name, lp->context); + if (!variant->gallivm) { + FREE(variant); + return NULL; + } + + variant->shader = shader; + variant->list_item_global.base = variant; + variant->list_item_local.base = variant; + variant->no = shader->variants_created++; + + memcpy(&variant->key, key, shader->variant_key_size); + + /* + * Determine whether we are touching all channels in the color buffer. + */ + fullcolormask = FALSE; + if (key->nr_cbufs == 1) { + cbuf0_format_desc = util_format_description(key->cbuf_format[0]); + fullcolormask = util_format_colormask_full(cbuf0_format_desc, key->blend.rt[0].colormask); + } + + variant->opaque = + !key->blend.logicop_enable && + !key->blend.rt[0].blend_enable && + fullcolormask && + !key->stencil[0].enabled && + !key->alpha.enabled && + !key->blend.alpha_to_coverage && + !key->depth.enabled && + !shader->info.base.uses_kill + ? TRUE : FALSE; + + if ((shader->info.base.num_tokens <= 1) && + !key->depth.enabled && !key->stencil[0].enabled) { + variant->ps_inv_multiplier = 0; + } else { + variant->ps_inv_multiplier = 1; + } + + if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) { + lp_debug_fs_variant(variant); + } + + lp_jit_init_types(variant); + + if (variant->jit_function[RAST_EDGE_TEST] == NULL) + generate_fragment(lp, shader, variant, RAST_EDGE_TEST); + + if (variant->jit_function[RAST_WHOLE] == NULL) { + if (variant->opaque) { + /* Specialized shader, which doesn't need to read the color buffer. */ + generate_fragment(lp, shader, variant, RAST_WHOLE); + } + } + + /* + * Compile everything + */ + + gallivm_compile_module(variant->gallivm); + + variant->nr_instrs += lp_build_count_ir_module(variant->gallivm->module); + + if (variant->function[RAST_EDGE_TEST]) { + variant->jit_function[RAST_EDGE_TEST] = (lp_jit_frag_func) + gallivm_jit_function(variant->gallivm, + variant->function[RAST_EDGE_TEST]); + } + + if (variant->function[RAST_WHOLE]) { + variant->jit_function[RAST_WHOLE] = (lp_jit_frag_func) + gallivm_jit_function(variant->gallivm, + variant->function[RAST_WHOLE]); + } else if (!variant->jit_function[RAST_WHOLE]) { + variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST]; + } + + gallivm_free_ir(variant->gallivm); + + return variant; +} + + +static void * +llvmpipe_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + struct lp_fragment_shader *shader; + int nr_samplers; + int nr_sampler_views; + int i; + + shader = CALLOC_STRUCT(lp_fragment_shader); + if (!shader) + return NULL; + + shader->no = fs_no++; + make_empty_list(&shader->variants); + + /* get/save the summary info for this shader */ + lp_build_tgsi_info(templ->tokens, &shader->info); + + /* we need to keep a local copy of the tokens */ + shader->base.tokens = tgsi_dup_tokens(templ->tokens); + + shader->draw_data = draw_create_fragment_shader(llvmpipe->draw, templ); + if (shader->draw_data == NULL) { + FREE((void *) shader->base.tokens); + FREE(shader); + return NULL; + } + + nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1; + nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; + + shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key, + state[MAX2(nr_samplers, nr_sampler_views)]); + + for (i = 0; i < shader->info.base.num_inputs; i++) { + shader->inputs[i].usage_mask = shader->info.base.input_usage_mask[i]; + shader->inputs[i].cyl_wrap = shader->info.base.input_cylindrical_wrap[i]; + + switch (shader->info.base.input_interpolate[i]) { + case TGSI_INTERPOLATE_CONSTANT: + shader->inputs[i].interp = LP_INTERP_CONSTANT; + break; + case TGSI_INTERPOLATE_LINEAR: + shader->inputs[i].interp = LP_INTERP_LINEAR; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + shader->inputs[i].interp = LP_INTERP_PERSPECTIVE; + break; + case TGSI_INTERPOLATE_COLOR: + shader->inputs[i].interp = LP_INTERP_COLOR; + break; + default: + assert(0); + break; + } + + switch (shader->info.base.input_semantic_name[i]) { + case TGSI_SEMANTIC_FACE: + shader->inputs[i].interp = LP_INTERP_FACING; + break; + case TGSI_SEMANTIC_POSITION: + /* Position was already emitted above + */ + shader->inputs[i].interp = LP_INTERP_POSITION; + shader->inputs[i].src_index = 0; + continue; + } + + shader->inputs[i].src_index = i+1; + } + + if (LP_DEBUG & DEBUG_TGSI) { + unsigned attrib; + debug_printf("llvmpipe: Create fragment shader #%u %p:\n", + shader->no, (void *) shader); + tgsi_dump(templ->tokens, 0); + debug_printf("usage masks:\n"); + for (attrib = 0; attrib < shader->info.base.num_inputs; ++attrib) { + unsigned usage_mask = shader->info.base.input_usage_mask[attrib]; + debug_printf(" IN[%u].%s%s%s%s\n", + attrib, + usage_mask & TGSI_WRITEMASK_X ? "x" : "", + usage_mask & TGSI_WRITEMASK_Y ? "y" : "", + usage_mask & TGSI_WRITEMASK_Z ? "z" : "", + usage_mask & TGSI_WRITEMASK_W ? "w" : ""); + } + debug_printf("\n"); + } + + return shader; +} + + +static void +llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + if (llvmpipe->fs == fs) + return; + + llvmpipe->fs = (struct lp_fragment_shader *) fs; + + draw_bind_fragment_shader(llvmpipe->draw, + (llvmpipe->fs ? llvmpipe->fs->draw_data : NULL)); + + llvmpipe->dirty |= LP_NEW_FS; +} + + +/** + * Remove shader variant from two lists: the shader's variant list + * and the context's variant list. + */ +void +llvmpipe_remove_shader_variant(struct llvmpipe_context *lp, + struct lp_fragment_shader_variant *variant) +{ + if (gallivm_debug & GALLIVM_DEBUG_IR) { + debug_printf("llvmpipe: del fs #%u var #%u v created #%u v cached" + " #%u v total cached #%u\n", + variant->shader->no, + variant->no, + variant->shader->variants_created, + variant->shader->variants_cached, + lp->nr_fs_variants); + } + + gallivm_destroy(variant->gallivm); + + /* remove from shader's list */ + remove_from_list(&variant->list_item_local); + variant->shader->variants_cached--; + + /* remove from context's list */ + remove_from_list(&variant->list_item_global); + lp->nr_fs_variants--; + lp->nr_fs_instrs -= variant->nr_instrs; + + FREE(variant); +} + + +static void +llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + struct lp_fragment_shader *shader = fs; + struct lp_fs_variant_list_item *li; + + assert(fs != llvmpipe->fs); + + /* + * XXX: we need to flush the context until we have some sort of reference + * counting in fragment shaders as they may still be binned + * Flushing alone might not sufficient we need to wait on it too. + */ + llvmpipe_finish(pipe, __FUNCTION__); + + /* Delete all the variants */ + li = first_elem(&shader->variants); + while(!at_end(&shader->variants, li)) { + struct lp_fs_variant_list_item *next = next_elem(li); + llvmpipe_remove_shader_variant(llvmpipe, li->base); + li = next; + } + + /* Delete draw module's data */ + draw_delete_fragment_shader(llvmpipe->draw, shader->draw_data); + + assert(shader->variants_cached == 0); + FREE((void *) shader->base.tokens); + FREE(shader); +} + + + +static void +llvmpipe_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + struct pipe_constant_buffer *cb) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + struct pipe_resource *constants = cb ? cb->buffer : NULL; + + assert(shader < PIPE_SHADER_TYPES); + assert(index < Elements(llvmpipe->constants[shader])); + + /* note: reference counting */ + util_copy_constant_buffer(&llvmpipe->constants[shader][index], cb); + + if (shader == PIPE_SHADER_VERTEX || + shader == PIPE_SHADER_GEOMETRY) { + /* Pass the constants to the 'draw' module */ + const unsigned size = cb ? cb->buffer_size : 0; + const ubyte *data; + + if (constants) { + data = (ubyte *) llvmpipe_resource_data(constants); + } + else if (cb && cb->user_buffer) { + data = (ubyte *) cb->user_buffer; + } + else { + data = NULL; + } + + if (data) + data += cb->buffer_offset; + + draw_set_mapped_constant_buffer(llvmpipe->draw, shader, + index, data, size); + } + + llvmpipe->dirty |= LP_NEW_CONSTANTS; + + if (cb && cb->user_buffer) { + pipe_resource_reference(&constants, NULL); + } +} + + +/** + * Return the blend factor equivalent to a destination alpha of one. + */ +static inline unsigned +force_dst_alpha_one(unsigned factor, boolean clamped_zero) +{ + switch(factor) { + case PIPE_BLENDFACTOR_DST_ALPHA: + return PIPE_BLENDFACTOR_ONE; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return PIPE_BLENDFACTOR_ZERO; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + if (clamped_zero) + return PIPE_BLENDFACTOR_ZERO; + else + return PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE; + } + + return factor; +} + + +/** + * We need to generate several variants of the fragment pipeline to match + * all the combinations of the contributing state atoms. + * + * TODO: there is actually no reason to tie this to context state -- the + * generated code could be cached globally in the screen. + */ +static void +make_variant_key(struct llvmpipe_context *lp, + struct lp_fragment_shader *shader, + struct lp_fragment_shader_variant_key *key) +{ + unsigned i; + + memset(key, 0, shader->variant_key_size); + + if (lp->framebuffer.zsbuf) { + enum pipe_format zsbuf_format = lp->framebuffer.zsbuf->format; + const struct util_format_description *zsbuf_desc = + util_format_description(zsbuf_format); + + if (lp->depth_stencil->depth.enabled && + util_format_has_depth(zsbuf_desc)) { + key->zsbuf_format = zsbuf_format; + memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth); + } + if (lp->depth_stencil->stencil[0].enabled && + util_format_has_stencil(zsbuf_desc)) { + key->zsbuf_format = zsbuf_format; + memcpy(&key->stencil, &lp->depth_stencil->stencil, sizeof key->stencil); + } + if (llvmpipe_resource_is_1d(lp->framebuffer.zsbuf->texture)) { + key->resource_1d = TRUE; + } + } + + /* + * Propagate the depth clamp setting from the rasterizer state. + * depth_clip == 0 implies depth clamping is enabled. + * + * When clip_halfz is enabled, then always clamp the depth values. + */ + if (lp->rasterizer->clip_halfz) { + key->depth_clamp = 1; + } else { + key->depth_clamp = (lp->rasterizer->depth_clip == 0) ? 1 : 0; + } + + /* alpha test only applies if render buffer 0 is non-integer (or does not exist) */ + if (!lp->framebuffer.nr_cbufs || + !lp->framebuffer.cbufs[0] || + !util_format_is_pure_integer(lp->framebuffer.cbufs[0]->format)) { + key->alpha.enabled = lp->depth_stencil->alpha.enabled; + } + if(key->alpha.enabled) + key->alpha.func = lp->depth_stencil->alpha.func; + /* alpha.ref_value is passed in jit_context */ + + key->flatshade = lp->rasterizer->flatshade; + if (lp->active_occlusion_queries) { + key->occlusion_count = TRUE; + } + + if (lp->framebuffer.nr_cbufs) { + memcpy(&key->blend, lp->blend, sizeof key->blend); + } + + key->nr_cbufs = lp->framebuffer.nr_cbufs; + + if (!key->blend.independent_blend_enable) { + /* we always need independent blend otherwise the fixups below won't work */ + for (i = 1; i < key->nr_cbufs; i++) { + memcpy(&key->blend.rt[i], &key->blend.rt[0], sizeof(key->blend.rt[0])); + } + key->blend.independent_blend_enable = 1; + } + + for (i = 0; i < lp->framebuffer.nr_cbufs; i++) { + struct pipe_rt_blend_state *blend_rt = &key->blend.rt[i]; + + if (lp->framebuffer.cbufs[i]) { + enum pipe_format format = lp->framebuffer.cbufs[i]->format; + const struct util_format_description *format_desc; + + key->cbuf_format[i] = format; + + /* + * Figure out if this is a 1d resource. Note that OpenGL allows crazy + * mixing of 2d textures with height 1 and 1d textures, so make sure + * we pick 1d if any cbuf or zsbuf is 1d. + */ + if (llvmpipe_resource_is_1d(lp->framebuffer.cbufs[i]->texture)) { + key->resource_1d = TRUE; + } + + format_desc = util_format_description(format); + assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || + format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB); + + /* + * Mask out color channels not present in the color buffer. + */ + blend_rt->colormask &= util_format_colormask(format_desc); + + /* + * Disable blend for integer formats. + */ + if (util_format_is_pure_integer(format)) { + blend_rt->blend_enable = 0; + } + + /* + * Our swizzled render tiles always have an alpha channel, but the + * linear render target format often does not, so force here the dst + * alpha to be one. + * + * This is not a mere optimization. Wrong results will be produced if + * the dst alpha is used, the dst format does not have alpha, and the + * previous rendering was not flushed from the swizzled to linear + * buffer. For example, NonPowTwo DCT. + * + * TODO: This should be generalized to all channels for better + * performance, but only alpha causes correctness issues. + * + * Also, force rgb/alpha func/factors match, to make AoS blending + * easier. + */ + if (format_desc->swizzle[3] > UTIL_FORMAT_SWIZZLE_W || + format_desc->swizzle[3] == format_desc->swizzle[0]) { + /* Doesn't cover mixed snorm/unorm but can't render to them anyway */ + boolean clamped_zero = !util_format_is_float(format) && + !util_format_is_snorm(format); + blend_rt->rgb_src_factor = + force_dst_alpha_one(blend_rt->rgb_src_factor, clamped_zero); + blend_rt->rgb_dst_factor = + force_dst_alpha_one(blend_rt->rgb_dst_factor, clamped_zero); + blend_rt->alpha_func = blend_rt->rgb_func; + blend_rt->alpha_src_factor = blend_rt->rgb_src_factor; + blend_rt->alpha_dst_factor = blend_rt->rgb_dst_factor; + } + } + else { + /* no color buffer for this fragment output */ + key->cbuf_format[i] = PIPE_FORMAT_NONE; + blend_rt->colormask = 0x0; + blend_rt->blend_enable = 0; + } + } + + /* This value will be the same for all the variants of a given shader: + */ + key->nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1; + + for(i = 0; i < key->nr_samplers; ++i) { + if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { + lp_sampler_static_sampler_state(&key->state[i].sampler_state, + lp->samplers[PIPE_SHADER_FRAGMENT][i]); + } + } + + /* + * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes + * are dx10-style? Can't really have mixed opcodes, at least not + * if we want to skip the holes here (without rescanning tgsi). + */ + if (shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { + key->nr_sampler_views = shader->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; + for(i = 0; i < key->nr_sampler_views; ++i) { + if(shader->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) { + lp_sampler_static_texture_state(&key->state[i].texture_state, + lp->sampler_views[PIPE_SHADER_FRAGMENT][i]); + } + } + } + else { + key->nr_sampler_views = key->nr_samplers; + for(i = 0; i < key->nr_sampler_views; ++i) { + if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { + lp_sampler_static_texture_state(&key->state[i].texture_state, + lp->sampler_views[PIPE_SHADER_FRAGMENT][i]); + } + } + } +} + + + +/** + * Update fragment shader state. This is called just prior to drawing + * something when some fragment-related state has changed. + */ +void +llvmpipe_update_fs(struct llvmpipe_context *lp) +{ + struct lp_fragment_shader *shader = lp->fs; + struct lp_fragment_shader_variant_key key; + struct lp_fragment_shader_variant *variant = NULL; + struct lp_fs_variant_list_item *li; + + make_variant_key(lp, shader, &key); + + /* Search the variants for one which matches the key */ + li = first_elem(&shader->variants); + while(!at_end(&shader->variants, li)) { + if(memcmp(&li->base->key, &key, shader->variant_key_size) == 0) { + variant = li->base; + break; + } + li = next_elem(li); + } + + if (variant) { + /* Move this variant to the head of the list to implement LRU + * deletion of shader's when we have too many. + */ + move_to_head(&lp->fs_variants_list, &variant->list_item_global); + } + else { + /* variant not found, create it now */ + int64_t t0, t1, dt; + unsigned i; + unsigned variants_to_cull; + + if (0) { + debug_printf("%u variants,\t%u instrs,\t%u instrs/variant\n", + lp->nr_fs_variants, + lp->nr_fs_instrs, + lp->nr_fs_variants ? lp->nr_fs_instrs / lp->nr_fs_variants : 0); + } + + /* First, check if we've exceeded the max number of shader variants. + * If so, free 25% of them (the least recently used ones). + */ + variants_to_cull = lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS ? LP_MAX_SHADER_VARIANTS / 4 : 0; + + if (variants_to_cull || + lp->nr_fs_instrs >= LP_MAX_SHADER_INSTRUCTIONS) { + struct pipe_context *pipe = &lp->pipe; + + /* + * XXX: we need to flush the context until we have some sort of + * reference counting in fragment shaders as they may still be binned + * Flushing alone might not be sufficient we need to wait on it too. + */ + llvmpipe_finish(pipe, __FUNCTION__); + + /* + * We need to re-check lp->nr_fs_variants because an arbitrarliy large + * number of shader variants (potentially all of them) could be + * pending for destruction on flush. + */ + + for (i = 0; i < variants_to_cull || lp->nr_fs_instrs >= LP_MAX_SHADER_INSTRUCTIONS; i++) { + struct lp_fs_variant_list_item *item; + if (is_empty_list(&lp->fs_variants_list)) { + break; + } + item = last_elem(&lp->fs_variants_list); + assert(item); + assert(item->base); + llvmpipe_remove_shader_variant(lp, item->base); + } + } + + /* + * Generate the new variant. + */ + t0 = os_time_get(); + variant = generate_variant(lp, shader, &key); + t1 = os_time_get(); + dt = t1 - t0; + LP_COUNT_ADD(llvm_compile_time, dt); + LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */ + + /* Put the new variant into the list */ + if (variant) { + insert_at_head(&shader->variants, &variant->list_item_local); + insert_at_head(&lp->fs_variants_list, &variant->list_item_global); + lp->nr_fs_variants++; + lp->nr_fs_instrs += variant->nr_instrs; + shader->variants_cached++; + } + } + + /* Bind this variant */ + lp_setup_set_fs_variant(lp->setup, variant); +} + + + + + +void +llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_fs_state = llvmpipe_create_fs_state; + llvmpipe->pipe.bind_fs_state = llvmpipe_bind_fs_state; + llvmpipe->pipe.delete_fs_state = llvmpipe_delete_fs_state; + + llvmpipe->pipe.set_constant_buffer = llvmpipe_set_constant_buffer; +} + +/* + * Rasterization is disabled if there is no pixel shader and + * both depth and stencil testing are disabled: + * http://msdn.microsoft.com/en-us/library/windows/desktop/bb205125 + */ +boolean +llvmpipe_rasterization_disabled(struct llvmpipe_context *lp) +{ + boolean null_fs = !lp->fs || lp->fs->info.base.num_tokens <= 1; + + return (null_fs && + !lp->depth_stencil->depth.enabled && + !lp->depth_stencil->stencil[0].enabled); +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.h new file mode 100644 index 000000000..2ddd85188 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -0,0 +1,157 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#ifndef LP_STATE_FS_H_ +#define LP_STATE_FS_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" /* for tgsi_shader_info */ +#include "gallivm/lp_bld_sample.h" /* for struct lp_sampler_static_state */ +#include "gallivm/lp_bld_tgsi.h" /* for lp_tgsi_info */ +#include "lp_bld_interp.h" /* for struct lp_shader_input */ + + +struct tgsi_token; +struct lp_fragment_shader; + + +/** Indexes into jit_function[] array */ +#define RAST_WHOLE 0 +#define RAST_EDGE_TEST 1 + + +struct lp_sampler_static_state +{ + /* + * These attributes are effectively interleaved for more sane key handling. + * However, there might be lots of null space if the amount of samplers and + * textures isn't the same. + */ + struct lp_static_sampler_state sampler_state; + struct lp_static_texture_state texture_state; +}; + + +struct lp_fragment_shader_variant_key +{ + struct pipe_depth_state depth; + struct pipe_stencil_state stencil[2]; + struct pipe_blend_state blend; + + struct { + unsigned enabled:1; + unsigned func:3; + } alpha; + + unsigned nr_cbufs:8; + unsigned nr_samplers:8; /* actually derivable from just the shader */ + unsigned nr_sampler_views:8; /* actually derivable from just the shader */ + unsigned flatshade:1; + unsigned occlusion_count:1; + unsigned resource_1d:1; + unsigned depth_clamp:1; + + enum pipe_format zsbuf_format; + enum pipe_format cbuf_format[PIPE_MAX_COLOR_BUFS]; + + struct lp_sampler_static_state state[PIPE_MAX_SHADER_SAMPLER_VIEWS]; +}; + + +/** doubly-linked list item */ +struct lp_fs_variant_list_item +{ + struct lp_fragment_shader_variant *base; + struct lp_fs_variant_list_item *next, *prev; +}; + + +struct lp_fragment_shader_variant +{ + struct lp_fragment_shader_variant_key key; + + boolean opaque; + uint8_t ps_inv_multiplier; + + struct gallivm_state *gallivm; + + LLVMTypeRef jit_context_ptr_type; + LLVMTypeRef jit_thread_data_ptr_type; + LLVMTypeRef jit_linear_context_ptr_type; + + LLVMValueRef function[2]; + + lp_jit_frag_func jit_function[2]; + + /* Total number of LLVM instructions generated */ + unsigned nr_instrs; + + struct lp_fs_variant_list_item list_item_global, list_item_local; + struct lp_fragment_shader *shader; + + /* For debugging/profiling purposes */ + unsigned no; +}; + + +/** Subclass of pipe_shader_state */ +struct lp_fragment_shader +{ + struct pipe_shader_state base; + + struct lp_tgsi_info info; + + struct lp_fs_variant_list_item variants; + + struct draw_fragment_shader *draw_data; + + /* For debugging/profiling purposes */ + unsigned variant_key_size; + unsigned no; + unsigned variants_created; + unsigned variants_cached; + + /** Fragment shader input interpolation info */ + struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; +}; + + +void +lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant); + +void +llvmpipe_remove_shader_variant(struct llvmpipe_context *lp, + struct lp_fragment_shader_variant *variant); + +boolean +llvmpipe_rasterization_disabled(struct llvmpipe_context *lp); + + +#endif /* LP_STATE_FS_H_ */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_gs.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_gs.c new file mode 100644 index 000000000..7ea7a3906 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_gs.c @@ -0,0 +1,116 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "lp_context.h" +#include "lp_state.h" +#include "lp_texture.h" +#include "lp_debug.h" + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "draw/draw_context.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_parse.h" + + +static void * +llvmpipe_create_gs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + struct lp_geometry_shader *state; + + state = CALLOC_STRUCT(lp_geometry_shader); + if (state == NULL ) + goto no_state; + + /* debug */ + if (LP_DEBUG & DEBUG_TGSI) { + debug_printf("llvmpipe: Create geometry shader %p:\n", (void *)state); + tgsi_dump(templ->tokens, 0); + } + + /* copy stream output info */ + state->no_tokens = !templ->tokens; + memcpy(&state->stream_output, &templ->stream_output, sizeof state->stream_output); + + if (templ->tokens) { + state->dgs = draw_create_geometry_shader(llvmpipe->draw, templ); + if (state->dgs == NULL) { + goto no_dgs; + } + } + + return state; + +no_dgs: + FREE( state ); +no_state: + return NULL; +} + + +static void +llvmpipe_bind_gs_state(struct pipe_context *pipe, void *gs) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + llvmpipe->gs = (struct lp_geometry_shader *)gs; + + draw_bind_geometry_shader(llvmpipe->draw, + (llvmpipe->gs ? llvmpipe->gs->dgs : NULL)); + + llvmpipe->dirty |= LP_NEW_GS; +} + + +static void +llvmpipe_delete_gs_state(struct pipe_context *pipe, void *gs) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + struct lp_geometry_shader *state = + (struct lp_geometry_shader *)gs; + + if (!state) { + return; + } + + draw_delete_geometry_shader(llvmpipe->draw, state->dgs); + FREE(state); +} + + +void +llvmpipe_init_gs_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_gs_state = llvmpipe_create_gs_state; + llvmpipe->pipe.bind_gs_state = llvmpipe_bind_gs_state; + llvmpipe->pipe.delete_gs_state = llvmpipe_delete_gs_state; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c new file mode 100644 index 000000000..94ebf8fff --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -0,0 +1,154 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "lp_context.h" +#include "lp_state.h" +#include "lp_setup.h" +#include "draw/draw_context.h" + +struct lp_rast_state { + struct pipe_rasterizer_state lp_state; + struct pipe_rasterizer_state draw_state; +}; + +/* State which might be handled in either the draw module or locally. + * This function is used to turn that state off in one of the two + * places. + */ +static void +clear_flags(struct pipe_rasterizer_state *rast) +{ + rast->light_twoside = 0; + rast->offset_tri = 0; + rast->offset_line = 0; + rast->offset_point = 0; + rast->offset_units = 0.0f; + rast->offset_scale = 0.0f; +} + + + +static void * +llvmpipe_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rast) +{ + boolean need_pipeline; + + /* Partition rasterizer state into what we want the draw module to + * handle, and what we'll look after ourselves. + */ + struct lp_rast_state *state = MALLOC_STRUCT(lp_rast_state); + if (state == NULL) + return NULL; + + memcpy(&state->draw_state, rast, sizeof *rast); + memcpy(&state->lp_state, rast, sizeof *rast); + + /* We rely on draw module to do unfilled polyons, AA lines and + * points and stipple. + * + * Over time, reduce this list of conditions, and expand the list + * of flags which get cleared in clear_flags(). + */ + need_pipeline = (rast->fill_front != PIPE_POLYGON_MODE_FILL || + rast->fill_back != PIPE_POLYGON_MODE_FILL || + rast->point_smooth || + rast->line_smooth || + rast->line_stipple_enable || + rast->poly_stipple_enable); + + /* If not using the pipeline, clear out the flags which we can + * handle ourselves. If we *are* using the pipeline, do everything + * on the pipeline and clear those flags on our internal copy of + * the state. + */ + if (need_pipeline) + clear_flags(&state->lp_state); + else + clear_flags(&state->draw_state); + + return state; +} + + + +static void +llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + const struct lp_rast_state *state = + (const struct lp_rast_state *) handle; + + if (state) { + llvmpipe->rasterizer = &state->lp_state; + draw_set_rasterizer_state(llvmpipe->draw, &state->draw_state, handle); + + /* XXX: just pass lp_state directly to setup. + */ + lp_setup_set_triangle_state( llvmpipe->setup, + state->lp_state.cull_face, + state->lp_state.front_ccw, + state->lp_state.scissor, + state->lp_state.half_pixel_center, + state->lp_state.bottom_edge_rule); + lp_setup_set_flatshade_first( llvmpipe->setup, + state->lp_state.flatshade_first); + lp_setup_set_line_state( llvmpipe->setup, + state->lp_state.line_width); + lp_setup_set_point_state( llvmpipe->setup, + state->lp_state.point_size, + state->lp_state.point_size_per_vertex, + state->lp_state.sprite_coord_enable, + state->lp_state.sprite_coord_mode); + } + else { + llvmpipe->rasterizer = NULL; + draw_set_rasterizer_state(llvmpipe->draw, NULL, handle); + } + + llvmpipe->dirty |= LP_NEW_RASTERIZER; +} + + +static void +llvmpipe_delete_rasterizer_state(struct pipe_context *pipe, + void *rasterizer) +{ + FREE( rasterizer ); +} + + + +void +llvmpipe_init_rasterizer_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_rasterizer_state = llvmpipe_create_rasterizer_state; + llvmpipe->pipe.bind_rasterizer_state = llvmpipe_bind_rasterizer_state; + llvmpipe->pipe.delete_rasterizer_state = llvmpipe_delete_rasterizer_state; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c new file mode 100644 index 000000000..b205f02fd --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -0,0 +1,390 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: + * Brian Paul + */ + +#include "util/u_inlines.h" +#include "util/u_memory.h" + +#include "draw/draw_context.h" + +#include "lp_context.h" +#include "lp_screen.h" +#include "lp_state.h" +#include "lp_debug.h" +#include "state_tracker/sw_winsys.h" + + +static void * +llvmpipe_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + struct pipe_sampler_state *state = mem_dup(sampler, sizeof *sampler); + + if (LP_PERF & PERF_NO_MIP_LINEAR) { + if (state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) + state->min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; + } + + if (LP_PERF & PERF_NO_MIPMAPS) + state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + + if (LP_PERF & PERF_NO_LINEAR) { + state->mag_img_filter = PIPE_TEX_FILTER_NEAREST; + state->min_img_filter = PIPE_TEX_FILTER_NEAREST; + } + + return state; +} + + +static void +llvmpipe_bind_sampler_states(struct pipe_context *pipe, + unsigned shader, + unsigned start, + unsigned num, + void **samplers) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + unsigned i; + + assert(shader < PIPE_SHADER_TYPES); + assert(start + num <= Elements(llvmpipe->samplers[shader])); + + draw_flush(llvmpipe->draw); + + /* set the new samplers */ + for (i = 0; i < num; i++) { + llvmpipe->samplers[shader][start + i] = samplers[i]; + } + + /* find highest non-null samplers[] entry */ + { + unsigned j = MAX2(llvmpipe->num_samplers[shader], start + num); + while (j > 0 && llvmpipe->samplers[shader][j - 1] == NULL) + j--; + llvmpipe->num_samplers[shader] = j; + } + + if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) { + draw_set_samplers(llvmpipe->draw, + shader, + llvmpipe->samplers[shader], + llvmpipe->num_samplers[shader]); + } + + llvmpipe->dirty |= LP_NEW_SAMPLER; +} + + +static void +llvmpipe_set_sampler_views(struct pipe_context *pipe, + unsigned shader, + unsigned start, + unsigned num, + struct pipe_sampler_view **views) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + uint i; + + assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS); + + assert(shader < PIPE_SHADER_TYPES); + assert(start + num <= Elements(llvmpipe->sampler_views[shader])); + + draw_flush(llvmpipe->draw); + + /* set the new sampler views */ + for (i = 0; i < num; i++) { + /* Note: we're using pipe_sampler_view_release() here to work around + * a possible crash when the old view belongs to another context that + * was already destroyed. + */ + pipe_sampler_view_release(pipe, + &llvmpipe->sampler_views[shader][start + i]); + pipe_sampler_view_reference(&llvmpipe->sampler_views[shader][start + i], + views[i]); + } + + /* find highest non-null sampler_views[] entry */ + { + unsigned j = MAX2(llvmpipe->num_sampler_views[shader], start + num); + while (j > 0 && llvmpipe->sampler_views[shader][j - 1] == NULL) + j--; + llvmpipe->num_sampler_views[shader] = j; + } + + if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) { + draw_set_sampler_views(llvmpipe->draw, + shader, + llvmpipe->sampler_views[shader], + llvmpipe->num_sampler_views[shader]); + } + + llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW; +} + + +static struct pipe_sampler_view * +llvmpipe_create_sampler_view(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_sampler_view *templ) +{ + struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view); + /* + * XXX we REALLY want to see the correct bind flag here but the OpenGL + * state tracker can't guarantee that at least for texture buffer objects. + */ + if (!(texture->bind & PIPE_BIND_SAMPLER_VIEW)) + debug_printf("Illegal sampler view creation without bind flag\n"); + + if (view) { + *view = *templ; + view->reference.count = 1; + view->texture = NULL; + pipe_resource_reference(&view->texture, texture); + view->context = pipe; + +#ifdef DEBUG + /* + * This is possibly too lenient, but the primary reason is just + * to catch state trackers which forget to initialize this, so + * it only catches clearly impossible view targets. + */ + if (view->target != texture->target) { + if (view->target == PIPE_TEXTURE_1D) + assert(texture->target == PIPE_TEXTURE_1D_ARRAY); + else if (view->target == PIPE_TEXTURE_1D_ARRAY) + assert(texture->target == PIPE_TEXTURE_1D); + else if (view->target == PIPE_TEXTURE_2D) + assert(texture->target == PIPE_TEXTURE_2D_ARRAY || + texture->target == PIPE_TEXTURE_CUBE || + texture->target == PIPE_TEXTURE_CUBE_ARRAY); + else if (view->target == PIPE_TEXTURE_2D_ARRAY) + assert(texture->target == PIPE_TEXTURE_2D || + texture->target == PIPE_TEXTURE_CUBE || + texture->target == PIPE_TEXTURE_CUBE_ARRAY); + else if (view->target == PIPE_TEXTURE_CUBE) + assert(texture->target == PIPE_TEXTURE_CUBE_ARRAY || + texture->target == PIPE_TEXTURE_2D_ARRAY); + else if (view->target == PIPE_TEXTURE_CUBE_ARRAY) + assert(texture->target == PIPE_TEXTURE_CUBE || + texture->target == PIPE_TEXTURE_2D_ARRAY); + else + assert(0); + } +#endif + } + + return view; +} + + +static void +llvmpipe_sampler_view_destroy(struct pipe_context *pipe, + struct pipe_sampler_view *view) +{ + pipe_resource_reference(&view->texture, NULL); + FREE(view); +} + + +static void +llvmpipe_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE( sampler ); +} + + +static void +prepare_shader_sampling( + struct llvmpipe_context *lp, + unsigned num, + struct pipe_sampler_view **views, + unsigned shader_type, + struct pipe_resource *mapped_tex[PIPE_MAX_SHADER_SAMPLER_VIEWS]) +{ + + unsigned i; + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS]; + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS]; + uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS]; + const void *addr; + + assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS); + if (!num) + return; + + for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) { + struct pipe_sampler_view *view = i < num ? views[i] : NULL; + + if (view) { + struct pipe_resource *tex = view->texture; + struct llvmpipe_resource *lp_tex = llvmpipe_resource(tex); + unsigned width0 = tex->width0; + unsigned num_layers = tex->depth0; + unsigned first_level = 0; + unsigned last_level = 0; + + /* We're referencing the texture's internal data, so save a + * reference to it. + */ + pipe_resource_reference(&mapped_tex[i], tex); + + if (!lp_tex->dt) { + /* regular texture - setup array of mipmap level offsets */ + struct pipe_resource *res = view->texture; + int j; + + if (llvmpipe_resource_is_texture(res)) { + first_level = view->u.tex.first_level; + last_level = view->u.tex.last_level; + assert(first_level <= last_level); + assert(last_level <= res->last_level); + addr = lp_tex->tex_data; + + for (j = first_level; j <= last_level; j++) { + mip_offsets[j] = lp_tex->mip_offsets[j]; + row_stride[j] = lp_tex->row_stride[j]; + img_stride[j] = lp_tex->img_stride[j]; + } + if (view->target == PIPE_TEXTURE_1D_ARRAY || + view->target == PIPE_TEXTURE_2D_ARRAY || + view->target == PIPE_TEXTURE_CUBE || + view->target == PIPE_TEXTURE_CUBE_ARRAY) { + num_layers = view->u.tex.last_layer - view->u.tex.first_layer + 1; + for (j = first_level; j <= last_level; j++) { + mip_offsets[j] += view->u.tex.first_layer * + lp_tex->img_stride[j]; + } + if (view->target == PIPE_TEXTURE_CUBE || + view->target == PIPE_TEXTURE_CUBE_ARRAY) { + assert(num_layers % 6 == 0); + } + assert(view->u.tex.first_layer <= view->u.tex.last_layer); + assert(view->u.tex.last_layer < res->array_size); + } + } + else { + unsigned view_blocksize = util_format_get_blocksize(view->format); + addr = lp_tex->data; + /* probably don't really need to fill that out */ + mip_offsets[0] = 0; + row_stride[0] = 0; + img_stride[0] = 0; + + /* everything specified in number of elements here. */ + width0 = view->u.buf.last_element - view->u.buf.first_element + 1; + addr = (uint8_t *)addr + view->u.buf.first_element * + view_blocksize; + assert(view->u.buf.first_element <= view->u.buf.last_element); + assert(view->u.buf.last_element * view_blocksize < res->width0); + } + } + else { + /* display target texture/surface */ + /* + * XXX: Where should this be unmapped? + */ + struct llvmpipe_screen *screen = llvmpipe_screen(tex->screen); + struct sw_winsys *winsys = screen->winsys; + addr = winsys->displaytarget_map(winsys, lp_tex->dt, + PIPE_TRANSFER_READ); + row_stride[0] = lp_tex->row_stride[0]; + img_stride[0] = lp_tex->img_stride[0]; + mip_offsets[0] = 0; + assert(addr); + } + draw_set_mapped_texture(lp->draw, + shader_type, + i, + width0, tex->height0, num_layers, + first_level, last_level, + addr, + row_stride, img_stride, mip_offsets); + } + } +} + + +/** + * Called during state validation when LP_NEW_SAMPLER_VIEW is set. + */ +void +llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *lp, + unsigned num, + struct pipe_sampler_view **views) +{ + prepare_shader_sampling(lp, num, views, PIPE_SHADER_VERTEX, + lp->mapped_vs_tex); +} + +void +llvmpipe_cleanup_vertex_sampling(struct llvmpipe_context *ctx) +{ + unsigned i; + for (i = 0; i < Elements(ctx->mapped_vs_tex); i++) { + pipe_resource_reference(&ctx->mapped_vs_tex[i], NULL); + } +} + + +/** + * Called during state validation when LP_NEW_SAMPLER_VIEW is set. + */ +void +llvmpipe_prepare_geometry_sampling(struct llvmpipe_context *lp, + unsigned num, + struct pipe_sampler_view **views) +{ + prepare_shader_sampling(lp, num, views, PIPE_SHADER_GEOMETRY, + lp->mapped_gs_tex); +} + +void +llvmpipe_cleanup_geometry_sampling(struct llvmpipe_context *ctx) +{ + unsigned i; + for (i = 0; i < Elements(ctx->mapped_gs_tex); i++) { + pipe_resource_reference(&ctx->mapped_gs_tex[i], NULL); + } +} + +void +llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state; + + llvmpipe->pipe.bind_sampler_states = llvmpipe_bind_sampler_states; + llvmpipe->pipe.create_sampler_view = llvmpipe_create_sampler_view; + llvmpipe->pipe.set_sampler_views = llvmpipe_set_sampler_views; + llvmpipe->pipe.sampler_view_destroy = llvmpipe_sampler_view_destroy; + llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c new file mode 100644 index 000000000..6397b5196 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.c @@ -0,0 +1,1021 @@ +/************************************************************************** + * + * Copyright 2010 VMware. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/simple_list.h" +#include "os/os_time.h" +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_bitarit.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_intr.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_type.h" + +#include "lp_perf.h" +#include "lp_debug.h" +#include "lp_flush.h" +#include "lp_screen.h" +#include "lp_context.h" +#include "lp_state.h" +#include "lp_state_fs.h" +#include "lp_state_setup.h" + + +/** Setup shader number (for debugging) */ +static unsigned setup_no = 0; + + +/* currently organized to interpolate full float[4] attributes even + * when some elements are unused. Later, can pack vertex data more + * closely. + */ + + +struct lp_setup_args +{ + /* Function arguments: + */ + LLVMValueRef v0; + LLVMValueRef v1; + LLVMValueRef v2; + LLVMValueRef facing; /* boolean */ + LLVMValueRef a0; + LLVMValueRef dadx; + LLVMValueRef dady; + + /* Derived: + */ + LLVMValueRef x0_center; + LLVMValueRef y0_center; + LLVMValueRef dy20_ooa; + LLVMValueRef dy01_ooa; + LLVMValueRef dx20_ooa; + LLVMValueRef dx01_ooa; + struct lp_build_context bld; +}; + + +static void +store_coef(struct gallivm_state *gallivm, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef a0, + LLVMValueRef dadx, + LLVMValueRef dady) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef idx = lp_build_const_int32(gallivm, slot); + + LLVMBuildStore(builder, + a0, + LLVMBuildGEP(builder, args->a0, &idx, 1, "")); + + LLVMBuildStore(builder, + dadx, + LLVMBuildGEP(builder, args->dadx, &idx, 1, "")); + + LLVMBuildStore(builder, + dady, + LLVMBuildGEP(builder, args->dady, &idx, 1, "")); +} + + + +static void +emit_constant_coef4(struct gallivm_state *gallivm, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef vert) +{ + store_coef(gallivm, args, slot, vert, args->bld.zero, args->bld.zero); +} + + + +/** + * Setup the fragment input attribute with the front-facing value. + * \param frontface is the triangle front facing? + */ +static void +emit_facing_coef(struct gallivm_state *gallivm, + struct lp_setup_args *args, + unsigned slot ) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); + LLVMValueRef a0_0 = args->facing; + LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, float_type, ""); + LLVMValueRef a0, face_val; + const unsigned char swizzles[4] = { PIPE_SWIZZLE_RED, PIPE_SWIZZLE_ZERO, + PIPE_SWIZZLE_ZERO, PIPE_SWIZZLE_ZERO }; + /* Our face val is either 1 or 0 so we do + * face = (val * 2) - 1 + * to make it 1 or -1 + */ + face_val = + LLVMBuildFAdd(builder, + LLVMBuildFMul(builder, a0_0f, + lp_build_const_float(gallivm, 2.0), + ""), + lp_build_const_float(gallivm, -1.0), + "facing"); + face_val = lp_build_broadcast_scalar(&args->bld, face_val); + a0 = lp_build_swizzle_aos(&args->bld, face_val, swizzles); + + store_coef(gallivm, args, slot, a0, args->bld.zero, args->bld.zero); +} + + +static LLVMValueRef +vert_attrib(struct gallivm_state *gallivm, + LLVMValueRef vert, + int attr, + int elem, + const char *name) +{ + LLVMBuilderRef b = gallivm->builder; + LLVMValueRef idx[2]; + idx[0] = lp_build_const_int32(gallivm, attr); + idx[1] = lp_build_const_int32(gallivm, elem); + return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name); +} + + +static void +lp_twoside(struct gallivm_state *gallivm, + struct lp_setup_args *args, + const struct lp_setup_variant_key *key, + int bcolor_slot, + LLVMValueRef attribv[3]) +{ + LLVMBuilderRef b = gallivm->builder; + LLVMValueRef a0_back, a1_back, a2_back; + LLVMValueRef idx2 = lp_build_const_int32(gallivm, bcolor_slot); + + LLVMValueRef facing = args->facing; + LLVMValueRef front_facing = LLVMBuildICmp(b, LLVMIntEQ, facing, + lp_build_const_int32(gallivm, 0), ""); /** need i1 for if condition */ + + a0_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx2, 1, ""), "v0a_back"); + a1_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx2, 1, ""), "v1a_back"); + a2_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx2, 1, ""), "v2a_back"); + + /* Possibly swap the front and back attrib values, + * + * Prefer select to if so we don't have to worry about phis or + * allocas. + */ + attribv[0] = LLVMBuildSelect(b, front_facing, a0_back, attribv[0], ""); + attribv[1] = LLVMBuildSelect(b, front_facing, a1_back, attribv[1], ""); + attribv[2] = LLVMBuildSelect(b, front_facing, a2_back, attribv[2], ""); + +} + +static void +lp_do_offset_tri(struct gallivm_state *gallivm, + struct lp_setup_args *args, + const struct lp_setup_variant_key *key, + LLVMValueRef inv_det, + LLVMValueRef dxyz01, + LLVMValueRef dxyz20, + LLVMValueRef attribv[3]) +{ + LLVMBuilderRef b = gallivm->builder; + struct lp_build_context flt_scalar_bld; + struct lp_build_context int_scalar_bld; + struct lp_build_context *bld = &args->bld; + LLVMValueRef zoffset, mult; + LLVMValueRef z0_new, z1_new, z2_new; + LLVMValueRef dzdxdzdy, dzdx, dzdy, dzxyz20, dyzzx01, dyzzx01_dzxyz20, dzx01_dyz20; + LLVMValueRef z0z1, z0z1z2; + LLVMValueRef max, max_value, res12; + LLVMValueRef shuffles[4]; + LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context); + LLVMValueRef onei = lp_build_const_int32(gallivm, 1); + LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0); + LLVMValueRef twoi = lp_build_const_int32(gallivm, 2); + LLVMValueRef threei = lp_build_const_int32(gallivm, 3); + + /* (res12) = cross(e,f).xy */ + shuffles[0] = twoi; + shuffles[1] = zeroi; + shuffles[2] = onei; + shuffles[3] = twoi; + dzxyz20 = LLVMBuildShuffleVector(b, dxyz20, dxyz20, LLVMConstVector(shuffles, 4), ""); + + shuffles[0] = onei; + shuffles[1] = twoi; + shuffles[2] = twoi; + shuffles[3] = zeroi; + dyzzx01 = LLVMBuildShuffleVector(b, dxyz01, dxyz01, LLVMConstVector(shuffles, 4), ""); + + dyzzx01_dzxyz20 = LLVMBuildFMul(b, dzxyz20, dyzzx01, "dyzzx01_dzxyz20"); + + shuffles[0] = twoi; + shuffles[1] = threei; + shuffles[2] = LLVMGetUndef(shuf_type); + shuffles[3] = LLVMGetUndef(shuf_type); + dzx01_dyz20 = LLVMBuildShuffleVector(b, dyzzx01_dzxyz20, dyzzx01_dzxyz20, + LLVMConstVector(shuffles, 4), ""); + + res12 = LLVMBuildFSub(b, dyzzx01_dzxyz20, dzx01_dyz20, "res12"); + + /* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/ + dzdxdzdy = LLVMBuildFMul(b, res12, inv_det, "dzdxdzdy"); + dzdxdzdy = lp_build_abs(bld, dzdxdzdy); + + dzdx = LLVMBuildExtractElement(b, dzdxdzdy, zeroi, ""); + dzdy = LLVMBuildExtractElement(b, dzdxdzdy, onei, ""); + + /* mult = MAX2(dzdx, dzdy) * pgon_offset_scale */ + max = LLVMBuildFCmp(b, LLVMRealUGT, dzdx, dzdy, ""); + max_value = LLVMBuildSelect(b, max, dzdx, dzdy, "max"); + + mult = LLVMBuildFMul(b, max_value, + lp_build_const_float(gallivm, key->pgon_offset_scale), ""); + + lp_build_context_init(&flt_scalar_bld, gallivm, lp_type_float_vec(32, 32)); + + if (key->floating_point_depth) { + /* + * bias = pgon_offset_units * 2^(exponent(max(z0, z1, z2)) - mantissa_bits) + + * MAX2(dzdx, dzdy) * pgon_offset_scale + * + * NOTE: Assumes IEEE float32. + */ + LLVMValueRef c23_shifted, exp_mask, bias, exp; + LLVMValueRef maxz_value, maxz0z1_value; + + lp_build_context_init(&int_scalar_bld, gallivm, lp_type_int_vec(32, 32)); + + c23_shifted = lp_build_const_int32(gallivm, 23 << 23); + exp_mask = lp_build_const_int32(gallivm, 0xff << 23); + + maxz0z1_value = lp_build_max(&flt_scalar_bld, + LLVMBuildExtractElement(b, attribv[0], twoi, ""), + LLVMBuildExtractElement(b, attribv[1], twoi, "")); + + maxz_value = lp_build_max(&flt_scalar_bld, + LLVMBuildExtractElement(b, attribv[2], twoi, ""), + maxz0z1_value); + + exp = LLVMBuildBitCast(b, maxz_value, int_scalar_bld.vec_type, ""); + exp = lp_build_and(&int_scalar_bld, exp, exp_mask); + exp = lp_build_sub(&int_scalar_bld, exp, c23_shifted); + /* Clamping to zero means mrd will be zero for very small numbers, + * but specs do not indicate this should be prevented by clamping + * mrd to smallest normal number instead. */ + exp = lp_build_max(&int_scalar_bld, exp, int_scalar_bld.zero); + exp = LLVMBuildBitCast(b, exp, flt_scalar_bld.vec_type, ""); + + bias = LLVMBuildFMul(b, exp, + lp_build_const_float(gallivm, key->pgon_offset_units), + "bias"); + + zoffset = LLVMBuildFAdd(b, bias, mult, "zoffset"); + } else { + /* + * bias = pgon_offset_units + MAX2(dzdx, dzdy) * pgon_offset_scale + */ + zoffset = LLVMBuildFAdd(b, + lp_build_const_float(gallivm, key->pgon_offset_units), + mult, "zoffset"); + } + + if (key->pgon_offset_clamp > 0) { + zoffset = lp_build_min(&flt_scalar_bld, + lp_build_const_float(gallivm, key->pgon_offset_clamp), + zoffset); + } + else if (key->pgon_offset_clamp < 0) { + zoffset = lp_build_max(&flt_scalar_bld, + lp_build_const_float(gallivm, key->pgon_offset_clamp), + zoffset); + } + + /* yuck */ + shuffles[0] = twoi; + shuffles[1] = lp_build_const_int32(gallivm, 6); + shuffles[2] = LLVMGetUndef(shuf_type); + shuffles[3] = LLVMGetUndef(shuf_type); + z0z1 = LLVMBuildShuffleVector(b, attribv[0], attribv[1], LLVMConstVector(shuffles, 4), ""); + shuffles[0] = zeroi; + shuffles[1] = onei; + shuffles[2] = lp_build_const_int32(gallivm, 6); + shuffles[3] = LLVMGetUndef(shuf_type); + z0z1z2 = LLVMBuildShuffleVector(b, z0z1, attribv[2], LLVMConstVector(shuffles, 4), ""); + zoffset = lp_build_broadcast_scalar(bld, zoffset); + + /* clamp and do offset */ + /* + * FIXME I suspect the clamp (is that even right to always clamp to fixed + * 0.0/1.0?) should really be per fragment? + */ + z0z1z2 = lp_build_clamp(bld, LLVMBuildFAdd(b, z0z1z2, zoffset, ""), bld->zero, bld->one); + + /* insert into args->a0.z, a1.z, a2.z: + */ + z0_new = LLVMBuildExtractElement(b, z0z1z2, zeroi, ""); + z1_new = LLVMBuildExtractElement(b, z0z1z2, onei, ""); + z2_new = LLVMBuildExtractElement(b, z0z1z2, twoi, ""); + attribv[0] = LLVMBuildInsertElement(b, attribv[0], z0_new, twoi, ""); + attribv[1] = LLVMBuildInsertElement(b, attribv[1], z1_new, twoi, ""); + attribv[2] = LLVMBuildInsertElement(b, attribv[2], z2_new, twoi, ""); +} + +static void +load_attribute(struct gallivm_state *gallivm, + struct lp_setup_args *args, + const struct lp_setup_variant_key *key, + unsigned vert_attr, + LLVMValueRef attribv[3]) +{ + LLVMBuilderRef b = gallivm->builder; + LLVMValueRef idx = lp_build_const_int32(gallivm, vert_attr); + + /* Load the vertex data + */ + attribv[0] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a"); + attribv[1] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a"); + attribv[2] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a"); + + + /* Potentially modify it according to twoside, etc: + */ + if (key->twoside) { + if (vert_attr == key->color_slot && key->bcolor_slot >= 0) + lp_twoside(gallivm, args, key, key->bcolor_slot, attribv); + else if (vert_attr == key->spec_slot && key->bspec_slot >= 0) + lp_twoside(gallivm, args, key, key->bspec_slot, attribv); + } +} + +/* + * FIXME: interpolation is always done wrt fb origin (0/0). + * However, if some (small) tri is far away from the origin and gradients + * are large, this can lead to HUGE errors, since the a0 value calculated + * here can get very large (with the actual values inside the triangle way + * smaller), leading to complete loss of accuracy. This could be prevented + * by using some point inside (or at corner) of the tri as interpolation + * origin, or just use barycentric interpolation (which GL suggests and is + * what real hw does - you can get the barycentric coordinates from the + * edge functions in rasterization in principle (though we skip these + * sometimes completely in case of tris covering a block fully, + * which obviously wouldn't work)). + */ +static void +emit_coef4( struct gallivm_state *gallivm, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef a0, + LLVMValueRef a1, + LLVMValueRef a2) +{ + LLVMBuilderRef b = gallivm->builder; + LLVMValueRef attr_0; + LLVMValueRef dy20_ooa = args->dy20_ooa; + LLVMValueRef dy01_ooa = args->dy01_ooa; + LLVMValueRef dx20_ooa = args->dx20_ooa; + LLVMValueRef dx01_ooa = args->dx01_ooa; + LLVMValueRef x0_center = args->x0_center; + LLVMValueRef y0_center = args->y0_center; + LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01"); + LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20"); + + /* Calculate dadx (vec4f) + */ + LLVMValueRef da01_dy20_ooa = LLVMBuildFMul(b, da01, dy20_ooa, "da01_dy20_ooa"); + LLVMValueRef da20_dy01_ooa = LLVMBuildFMul(b, da20, dy01_ooa, "da20_dy01_ooa"); + LLVMValueRef dadx = LLVMBuildFSub(b, da01_dy20_ooa, da20_dy01_ooa, "dadx"); + + /* Calculate dady (vec4f) + */ + LLVMValueRef da01_dx20_ooa = LLVMBuildFMul(b, da01, dx20_ooa, "da01_dx20_ooa"); + LLVMValueRef da20_dx01_ooa = LLVMBuildFMul(b, da20, dx01_ooa, "da20_dx01_ooa"); + LLVMValueRef dady = LLVMBuildFSub(b, da20_dx01_ooa, da01_dx20_ooa, "dady"); + + /* Calculate a0 - the attribute value at the origin + */ + LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0"); + LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, "dady_y0"); + LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0"); + attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0"); + + store_coef(gallivm, args, slot, attr_0, dadx, dady); +} + + +static void +emit_linear_coef( struct gallivm_state *gallivm, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef attribv[3]) +{ + /* nothing to do anymore */ + emit_coef4(gallivm, + args, slot, + attribv[0], + attribv[1], + attribv[2]); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void +apply_perspective_corr( struct gallivm_state *gallivm, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef attribv[3]) +{ + LLVMBuilderRef b = gallivm->builder; + + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + LLVMValueRef v0_oow = lp_build_broadcast_scalar(&args->bld, + vert_attrib(gallivm, args->v0, 0, 3, "v0_oow")); + LLVMValueRef v1_oow = lp_build_broadcast_scalar(&args->bld, + vert_attrib(gallivm, args->v1, 0, 3, "v1_oow")); + LLVMValueRef v2_oow = lp_build_broadcast_scalar(&args->bld, + vert_attrib(gallivm, args->v2, 0, 3, "v2_oow")); + + attribv[0] = LLVMBuildFMul(b, attribv[0], v0_oow, "v0_oow_v0a"); + attribv[1] = LLVMBuildFMul(b, attribv[1], v1_oow, "v1_oow_v1a"); + attribv[2] = LLVMBuildFMul(b, attribv[2], v2_oow, "v2_oow_v2a"); +} + + +/** + * Applys cylindrical wrapping to vertex attributes if enabled. + * Input coordinates must be in [0, 1] range, otherwise results are undefined. + * + * @param cyl_wrap TGSI_CYLINDRICAL_WRAP_x flags + */ +static void +emit_apply_cyl_wrap(struct gallivm_state *gallivm, + struct lp_setup_args *args, + uint cyl_wrap, + LLVMValueRef attribv[3]) + +{ + LLVMBuilderRef builder = gallivm->builder; + struct lp_type type = args->bld.type; + LLVMTypeRef float_vec_type = args->bld.vec_type; + LLVMValueRef pos_half; + LLVMValueRef neg_half; + LLVMValueRef cyl_mask; + LLVMValueRef offset; + LLVMValueRef delta; + LLVMValueRef one; + + if (!cyl_wrap) + return; + + /* Constants */ + pos_half = lp_build_const_vec(gallivm, type, +0.5f); + neg_half = lp_build_const_vec(gallivm, type, -0.5f); + cyl_mask = lp_build_const_mask_aos(gallivm, type, cyl_wrap, 4); + + one = lp_build_const_vec(gallivm, type, 1.0f); + one = LLVMBuildBitCast(builder, one, lp_build_int_vec_type(gallivm, type), ""); + one = LLVMBuildAnd(builder, one, cyl_mask, ""); + + /* Edge v0 -> v1 */ + delta = LLVMBuildFSub(builder, attribv[1], attribv[0], ""); + + offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half); + offset = LLVMBuildAnd(builder, offset, one, ""); + offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); + attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, ""); + + offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half); + offset = LLVMBuildAnd(builder, offset, one, ""); + offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); + attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, ""); + + /* Edge v1 -> v2 */ + delta = LLVMBuildFSub(builder, attribv[2], attribv[1], ""); + + offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half); + offset = LLVMBuildAnd(builder, offset, one, ""); + offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); + attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, ""); + + offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half); + offset = LLVMBuildAnd(builder, offset, one, ""); + offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); + attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, ""); + + /* Edge v2 -> v0 */ + delta = LLVMBuildFSub(builder, attribv[0], attribv[2], ""); + + offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half); + offset = LLVMBuildAnd(builder, offset, one, ""); + offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); + attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, ""); + + offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half); + offset = LLVMBuildAnd(builder, offset, one, ""); + offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); + attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, ""); +} + + +/** + * Compute the inputs-> dadx, dady, a0 values. + */ +static void +emit_tri_coef( struct gallivm_state *gallivm, + const struct lp_setup_variant_key *key, + struct lp_setup_args *args) +{ + unsigned slot; + + LLVMValueRef attribs[3]; + + /* setup interpolation for all the remaining attributes: + */ + for (slot = 0; slot < key->num_inputs; slot++) { + switch (key->inputs[slot].interp) { + case LP_INTERP_CONSTANT: + load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); + if (key->flatshade_first) { + emit_constant_coef4(gallivm, args, slot+1, attribs[0]); + } + else { + emit_constant_coef4(gallivm, args, slot+1, attribs[2]); + } + break; + + case LP_INTERP_LINEAR: + load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); + emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs); + emit_linear_coef(gallivm, args, slot+1, attribs); + break; + + case LP_INTERP_PERSPECTIVE: + load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); + emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs); + apply_perspective_corr(gallivm, args, slot+1, attribs); + emit_linear_coef(gallivm, args, slot+1, attribs); + break; + + case LP_INTERP_POSITION: + /* + * The generated pixel interpolators will pick up the coeffs from + * slot 0. + */ + break; + + case LP_INTERP_FACING: + emit_facing_coef(gallivm, args, slot+1); + break; + + default: + assert(0); + } + } +} + + +/* XXX: generic code: + */ +static void +set_noalias(LLVMBuilderRef builder, + LLVMValueRef function, + const LLVMTypeRef *arg_types, + int nr_args) +{ + int i; + for(i = 0; i < nr_args; ++i) + if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + LLVMAddAttribute(LLVMGetParam(function, i), + LLVMNoAliasAttribute); +} + +static void +init_args(struct gallivm_state *gallivm, + const struct lp_setup_variant_key *key, + struct lp_setup_args *args) +{ + LLVMBuilderRef b = gallivm->builder; + LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context); + LLVMValueRef onef = lp_build_const_float(gallivm, 1.0); + LLVMValueRef onei = lp_build_const_int32(gallivm, 1); + LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0); + LLVMValueRef pixel_center, xy0_center, dxy01, dxy20, dyx20; + LLVMValueRef e, f, ef, ooa; + LLVMValueRef shuffles[4], shuf10; + LLVMValueRef attr_pos[3]; + struct lp_type typef4 = lp_type_float_vec(32, 128); + struct lp_build_context bld; + + lp_build_context_init(&bld, gallivm, typef4); + args->bld = bld; + + /* The internal position input is in slot zero: + */ + load_attribute(gallivm, args, key, 0, attr_pos); + + pixel_center = lp_build_const_vec(gallivm, typef4, + key->pixel_center_half ? 0.5 : 0.0); + + /* + * xy are first two elems in v0a/v1a/v2a but just use vec4 arit + * also offset_tri uses actually xyz in them + */ + xy0_center = LLVMBuildFSub(b, attr_pos[0], pixel_center, "xy0_center" ); + + dxy01 = LLVMBuildFSub(b, attr_pos[0], attr_pos[1], "dxy01"); + dxy20 = LLVMBuildFSub(b, attr_pos[2], attr_pos[0], "dxy20"); + + shuffles[0] = onei; + shuffles[1] = zeroi; + shuffles[2] = LLVMGetUndef(shuf_type); + shuffles[3] = LLVMGetUndef(shuf_type); + shuf10 = LLVMConstVector(shuffles, 4); + + dyx20 = LLVMBuildShuffleVector(b, dxy20, dxy20, shuf10, ""); + + ef = LLVMBuildFMul(b, dxy01, dyx20, "ef"); + e = LLVMBuildExtractElement(b, ef, zeroi, ""); + f = LLVMBuildExtractElement(b, ef, onei, ""); + + ooa = LLVMBuildFDiv(b, onef, LLVMBuildFSub(b, e, f, ""), "ooa"); + + ooa = lp_build_broadcast_scalar(&bld, ooa); + + /* tri offset calc shares a lot of arithmetic, do it here */ + if (key->pgon_offset_scale != 0.0f || key->pgon_offset_units != 0.0f) { + lp_do_offset_tri(gallivm, args, key, ooa, dxy01, dxy20, attr_pos); + } + + dxy20 = LLVMBuildFMul(b, dxy20, ooa, ""); + dxy01 = LLVMBuildFMul(b, dxy01, ooa, ""); + + args->dy20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, onei); + args->dy01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, onei); + + args->dx20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, zeroi); + args->dx01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, zeroi); + + args->x0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, zeroi); + args->y0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, onei); + + emit_linear_coef(gallivm, args, 0, attr_pos); +} + +/** + * Generate the runtime callable function for the coefficient calculation. + * + */ +static struct lp_setup_variant * +generate_setup_variant(struct lp_setup_variant_key *key, + struct llvmpipe_context *lp) +{ + struct lp_setup_variant *variant = NULL; + struct gallivm_state *gallivm; + struct lp_setup_args args; + char func_name[64]; + LLVMTypeRef vec4f_type; + LLVMTypeRef func_type; + LLVMTypeRef arg_types[7]; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + int64_t t0 = 0, t1; + + if (0) + goto fail; + + variant = CALLOC_STRUCT(lp_setup_variant); + if (variant == NULL) + goto fail; + + variant->no = setup_no++; + + util_snprintf(func_name, sizeof(func_name), "setup_variant_%u", + variant->no); + + variant->gallivm = gallivm = gallivm_create(func_name, lp->context); + if (!variant->gallivm) { + goto fail; + } + + builder = gallivm->builder; + + if (LP_DEBUG & DEBUG_COUNTERS) { + t0 = os_time_get(); + } + + memcpy(&variant->key, key, key->size); + variant->list_item_global.base = variant; + + /* Currently always deal with full 4-wide vertex attributes from + * the vertices. + */ + + vec4f_type = LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4); + + arg_types[0] = LLVMPointerType(vec4f_type, 0); /* v0 */ + arg_types[1] = LLVMPointerType(vec4f_type, 0); /* v1 */ + arg_types[2] = LLVMPointerType(vec4f_type, 0); /* v2 */ + arg_types[3] = LLVMInt32TypeInContext(gallivm->context); /* facing */ + arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */ + arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */ + arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */ + + func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), + arg_types, Elements(arg_types), 0); + + variant->function = LLVMAddFunction(gallivm->module, func_name, func_type); + if (!variant->function) + goto fail; + + LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); + + args.v0 = LLVMGetParam(variant->function, 0); + args.v1 = LLVMGetParam(variant->function, 1); + args.v2 = LLVMGetParam(variant->function, 2); + args.facing = LLVMGetParam(variant->function, 3); + args.a0 = LLVMGetParam(variant->function, 4); + args.dadx = LLVMGetParam(variant->function, 5); + args.dady = LLVMGetParam(variant->function, 6); + + lp_build_name(args.v0, "in_v0"); + lp_build_name(args.v1, "in_v1"); + lp_build_name(args.v2, "in_v2"); + lp_build_name(args.facing, "in_facing"); + lp_build_name(args.a0, "out_a0"); + lp_build_name(args.dadx, "out_dadx"); + lp_build_name(args.dady, "out_dady"); + + /* + * Function body + */ + block = LLVMAppendBasicBlockInContext(gallivm->context, + variant->function, "entry"); + LLVMPositionBuilderAtEnd(builder, block); + + set_noalias(builder, variant->function, arg_types, Elements(arg_types)); + init_args(gallivm, &variant->key, &args); + emit_tri_coef(gallivm, &variant->key, &args); + + LLVMBuildRetVoid(builder); + + gallivm_verify_function(gallivm, variant->function); + + gallivm_compile_module(gallivm); + + variant->jit_function = (lp_jit_setup_triangle) + gallivm_jit_function(gallivm, variant->function); + if (!variant->jit_function) + goto fail; + + gallivm_free_ir(variant->gallivm); + + /* + * Update timing information: + */ + if (LP_DEBUG & DEBUG_COUNTERS) { + t1 = os_time_get(); + LP_COUNT_ADD(llvm_compile_time, t1 - t0); + LP_COUNT_ADD(nr_llvm_compiles, 1); + } + + return variant; + +fail: + if (variant) { + if (variant->gallivm) { + gallivm_destroy(variant->gallivm); + } + FREE(variant); + } + + return NULL; +} + + + +static void +lp_make_setup_variant_key(struct llvmpipe_context *lp, + struct lp_setup_variant_key *key) +{ + struct lp_fragment_shader *fs = lp->fs; + unsigned i; + + assert(sizeof key->inputs[0] == sizeof(uint)); + + key->num_inputs = fs->info.base.num_inputs; + key->flatshade_first = lp->rasterizer->flatshade_first; + key->pixel_center_half = lp->rasterizer->half_pixel_center; + key->twoside = lp->rasterizer->light_twoside; + key->size = Offset(struct lp_setup_variant_key, + inputs[key->num_inputs]); + + key->color_slot = lp->color_slot [0]; + key->bcolor_slot = lp->bcolor_slot[0]; + key->spec_slot = lp->color_slot [1]; + key->bspec_slot = lp->bcolor_slot[1]; + assert(key->color_slot == lp->color_slot [0]); + assert(key->bcolor_slot == lp->bcolor_slot[0]); + assert(key->spec_slot == lp->color_slot [1]); + assert(key->bspec_slot == lp->bcolor_slot[1]); + + /* + * If depth is floating point, depth bias is calculated with respect + * to the primitive's maximum Z value. Retain the original depth bias + * value until that stage. + */ + key->floating_point_depth = lp->floating_point_depth; + + if (key->floating_point_depth) { + key->pgon_offset_units = (float) lp->rasterizer->offset_units; + } else { + key->pgon_offset_units = + (float) (lp->rasterizer->offset_units * lp->mrd); + } + + key->pgon_offset_scale = lp->rasterizer->offset_scale; + key->pgon_offset_clamp = lp->rasterizer->offset_clamp; + key->pad = 0; + memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]); + for (i = 0; i < key->num_inputs; i++) { + if (key->inputs[i].interp == LP_INTERP_COLOR) { + if (lp->rasterizer->flatshade) + key->inputs[i].interp = LP_INTERP_CONSTANT; + else + key->inputs[i].interp = LP_INTERP_PERSPECTIVE; + } + } + +} + + +static void +remove_setup_variant(struct llvmpipe_context *lp, + struct lp_setup_variant *variant) +{ + if (gallivm_debug & GALLIVM_DEBUG_IR) { + debug_printf("llvmpipe: del setup_variant #%u total %u\n", + variant->no, lp->nr_setup_variants); + } + + if (variant->gallivm) { + gallivm_destroy(variant->gallivm); + } + + remove_from_list(&variant->list_item_global); + lp->nr_setup_variants--; + FREE(variant); +} + + + +/* When the number of setup variants exceeds a threshold, cull a + * fraction (currently a quarter) of them. + */ +static void +cull_setup_variants(struct llvmpipe_context *lp) +{ + struct pipe_context *pipe = &lp->pipe; + int i; + + /* + * XXX: we need to flush the context until we have some sort of reference + * counting in fragment shaders as they may still be binned + * Flushing alone might not be sufficient we need to wait on it too. + */ + llvmpipe_finish(pipe, __FUNCTION__); + + for (i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) { + struct lp_setup_variant_list_item *item; + if (is_empty_list(&lp->setup_variants_list)) { + break; + } + item = last_elem(&lp->setup_variants_list); + assert(item); + assert(item->base); + remove_setup_variant(lp, item->base); + } +} + + +/** + * Update fragment/vertex shader linkage state. This is called just + * prior to drawing something when some fragment-related state has + * changed. + */ +void +llvmpipe_update_setup(struct llvmpipe_context *lp) +{ + struct lp_setup_variant_key *key = &lp->setup_variant.key; + struct lp_setup_variant *variant = NULL; + struct lp_setup_variant_list_item *li; + + lp_make_setup_variant_key(lp, key); + + foreach(li, &lp->setup_variants_list) { + if(li->base->key.size == key->size && + memcmp(&li->base->key, key, key->size) == 0) { + variant = li->base; + break; + } + } + + if (variant) { + move_to_head(&lp->setup_variants_list, &variant->list_item_global); + } + else { + if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) { + cull_setup_variants(lp); + } + + variant = generate_setup_variant(key, lp); + if (variant) { + insert_at_head(&lp->setup_variants_list, &variant->list_item_global); + lp->nr_setup_variants++; + } + } + + lp_setup_set_setup_variant(lp->setup, variant); +} + +void +lp_delete_setup_variants(struct llvmpipe_context *lp) +{ + struct lp_setup_variant_list_item *li; + li = first_elem(&lp->setup_variants_list); + while(!at_end(&lp->setup_variants_list, li)) { + struct lp_setup_variant_list_item *next = next_elem(li); + remove_setup_variant(lp, li->base); + li = next; + } +} + +void +lp_dump_setup_coef(const struct lp_setup_variant_key *key, + const float (*sa0)[4], + const float (*sdadx)[4], + const float (*sdady)[4]) +{ + int i, slot; + + for (i = 0; i < TGSI_NUM_CHANNELS; i++) { + float a0 = sa0 [0][i]; + float dadx = sdadx[0][i]; + float dady = sdady[0][i]; + + debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n", + "xyzw"[i], a0, dadx, dady); + } + + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned usage_mask = key->inputs[slot].usage_mask; + for (i = 0; i < TGSI_NUM_CHANNELS; i++) { + if (usage_mask & (1 << i)) { + float a0 = sa0 [1 + slot][i]; + float dadx = sdadx[1 + slot][i]; + float dady = sdady[1 + slot][i]; + + debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n", + slot, "xyzw"[i], a0, dadx, dady); + } + } + } +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.h new file mode 100644 index 000000000..82af8350f --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_setup.h @@ -0,0 +1,85 @@ +#ifndef LP_STATE_SETUP_H +#define LP_STATE_SETUP_H + +#include "lp_bld_interp.h" + + +struct llvmpipe_context; +struct lp_setup_variant; + +struct lp_setup_variant_list_item +{ + struct lp_setup_variant *base; + struct lp_setup_variant_list_item *next, *prev; +}; + + +struct lp_setup_variant_key { + unsigned size:16; + unsigned num_inputs:8; + int color_slot:8; + + int bcolor_slot:8; + int spec_slot:8; + int bspec_slot:8; + unsigned flatshade_first:1; + unsigned pixel_center_half:1; + unsigned twoside:1; + unsigned floating_point_depth:1; + unsigned pad:4; + + /* TODO: get those floats out of the key and use a jit_context for setup */ + float pgon_offset_units; + float pgon_offset_scale; + float pgon_offset_clamp; + struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; +}; + + +typedef void (*lp_jit_setup_triangle)( const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean front_facing, + float (*a0)[4], + float (*dadx)[4], + float (*dady)[4] ); + + + + +/* At this stage, for a given variant key, we create a + * draw_vertex_info struct telling the draw module how to format the + * vertices, and an llvm-generated function which calculates the + * attribute interpolants (a0, dadx, dady) from three of those + * vertices. + */ +struct lp_setup_variant { + struct lp_setup_variant_key key; + + struct lp_setup_variant_list_item list_item_global; + + struct gallivm_state *gallivm; + + /* XXX: this is a pointer to the LLVM IR. Once jit_function is + * generated, we never need to use the IR again - need to find a + * way to release this data without destroying the generated + * assembly. + */ + LLVMValueRef function; + + /* The actual generated setup function: + */ + lp_jit_setup_triangle jit_function; + + unsigned no; +}; + +void lp_delete_setup_variants(struct llvmpipe_context *lp); + +void +lp_dump_setup_coef( const struct lp_setup_variant_key *key, + const float (*sa0)[4], + const float (*sdadx)[4], + const float (*sdady)[4]); + +#endif diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_so.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_so.c new file mode 100644 index 000000000..2af04cdf1 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_so.c @@ -0,0 +1,93 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "lp_context.h" +#include "lp_state.h" +#include "lp_texture.h" + +#include "util/u_memory.h" +#include "draw/draw_context.h" + +static struct pipe_stream_output_target * +llvmpipe_create_so_target(struct pipe_context *pipe, + struct pipe_resource *buffer, + unsigned buffer_offset, + unsigned buffer_size) +{ + struct draw_so_target *t; + + t = CALLOC_STRUCT(draw_so_target); + if (!t) + return NULL; + + t->target.context = pipe; + t->target.reference.count = 1; + pipe_resource_reference(&t->target.buffer, buffer); + t->target.buffer_offset = buffer_offset; + t->target.buffer_size = buffer_size; + return &t->target; +} + +static void +llvmpipe_so_target_destroy(struct pipe_context *pipe, + struct pipe_stream_output_target *target) +{ + pipe_resource_reference(&target->buffer, NULL); + FREE(target); +} + +static void +llvmpipe_set_so_targets(struct pipe_context *pipe, + unsigned num_targets, + struct pipe_stream_output_target **targets, + const unsigned *offsets) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + int i; + for (i = 0; i < num_targets; i++) { + const boolean append = (offsets[i] == (unsigned)-1); + pipe_so_target_reference((struct pipe_stream_output_target **)&llvmpipe->so_targets[i], targets[i]); + /* If we're not appending then lets set the internal + offset to what was requested */ + if (!append && llvmpipe->so_targets[i]) { + llvmpipe->so_targets[i]->internal_offset = offsets[i]; + } + } + + for (; i < llvmpipe->num_so_targets; i++) { + pipe_so_target_reference((struct pipe_stream_output_target **)&llvmpipe->so_targets[i], NULL); + } + llvmpipe->num_so_targets = num_targets; +} + +void +llvmpipe_init_so_funcs(struct llvmpipe_context *pipe) +{ + pipe->pipe.create_stream_output_target = llvmpipe_create_so_target; + pipe->pipe.stream_output_target_destroy = llvmpipe_so_target_destroy; + pipe->pipe.set_stream_output_targets = llvmpipe_set_so_targets; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_surface.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_surface.c new file mode 100644 index 000000000..c879ba975 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -0,0 +1,91 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keithw@vmware.com> + */ + +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_framebuffer.h" +#include "util/u_surface.h" +#include "lp_context.h" +#include "lp_scene.h" +#include "lp_state.h" +#include "lp_setup.h" + +#include "draw/draw_context.h" + +#include "util/u_format.h" + + +/** + * Set the framebuffer surface info: color buffers, zbuffer, stencil buffer. + */ +void +llvmpipe_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct llvmpipe_context *lp = llvmpipe_context(pipe); + + boolean changed = !util_framebuffer_state_equal(&lp->framebuffer, fb); + + assert(fb->width <= LP_MAX_WIDTH); + assert(fb->height <= LP_MAX_HEIGHT); + + if (changed) { + /* + * If no depth buffer is bound, send the utility function the default + * format for no bound depth (PIPE_FORMAT_NONE). + */ + enum pipe_format depth_format = fb->zsbuf ? + fb->zsbuf->format : PIPE_FORMAT_NONE; + const struct util_format_description *depth_desc = + util_format_description(depth_format); + + util_copy_framebuffer_state(&lp->framebuffer, fb); + + if (LP_PERF & PERF_NO_DEPTH) { + pipe_surface_reference(&lp->framebuffer.zsbuf, NULL); + } + + /* + * Calculate the floating point depth sense and Minimum Resolvable Depth + * value for the llvmpipe module. This is separate from the draw module. + */ + lp->floating_point_depth = + (util_get_depth_format_type(depth_desc) == UTIL_FORMAT_TYPE_FLOAT); + + lp->mrd = util_get_depth_format_mrd(depth_desc); + + /* Tell the draw module how deep the Z/depth buffer is. */ + draw_set_zs_format(lp->draw, depth_format); + + lp_setup_bind_framebuffer( lp->setup, &lp->framebuffer ); + + lp->dirty |= LP_NEW_FRAMEBUFFER; + } +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vertex.c new file mode 100644 index 000000000..1e93fd867 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -0,0 +1,117 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keithw@vmware.com> + */ + + +#include "lp_context.h" +#include "lp_state.h" + +#include "draw/draw_context.h" +#include "util/u_helpers.h" +#include "util/u_inlines.h" +#include "util/u_transfer.h" + + +static void * +llvmpipe_create_vertex_elements_state(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *attribs) +{ + struct lp_velems_state *velems; + assert(count <= PIPE_MAX_ATTRIBS); + velems = (struct lp_velems_state *) MALLOC(sizeof(struct lp_velems_state)); + if (velems) { + velems->count = count; + memcpy(velems->velem, attribs, sizeof(*attribs) * count); + } + return velems; +} + +static void +llvmpipe_bind_vertex_elements_state(struct pipe_context *pipe, + void *velems) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + struct lp_velems_state *lp_velems = (struct lp_velems_state *) velems; + + llvmpipe->velems = lp_velems; + + llvmpipe->dirty |= LP_NEW_VERTEX; + + if (velems) + draw_set_vertex_elements(llvmpipe->draw, lp_velems->count, lp_velems->velem); +} + +static void +llvmpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) +{ + FREE( velems ); +} + +static void +llvmpipe_set_vertex_buffers(struct pipe_context *pipe, + unsigned start_slot, unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + assert(count <= PIPE_MAX_ATTRIBS); + + util_set_vertex_buffers_count(llvmpipe->vertex_buffer, + &llvmpipe->num_vertex_buffers, + buffers, start_slot, count); + + llvmpipe->dirty |= LP_NEW_VERTEX; + + draw_set_vertex_buffers(llvmpipe->draw, start_slot, count, buffers); +} + + +static void +llvmpipe_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + if (ib) + memcpy(&llvmpipe->index_buffer, ib, sizeof(llvmpipe->index_buffer)); + else + memset(&llvmpipe->index_buffer, 0, sizeof(llvmpipe->index_buffer)); +} + +void +llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_vertex_elements_state = llvmpipe_create_vertex_elements_state; + llvmpipe->pipe.bind_vertex_elements_state = llvmpipe_bind_vertex_elements_state; + llvmpipe->pipe.delete_vertex_elements_state = llvmpipe_delete_vertex_elements_state; + + llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; + llvmpipe->pipe.set_index_buffer = llvmpipe_set_index_buffer; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vs.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vs.c new file mode 100644 index 000000000..826ee5b72 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_state_vs.c @@ -0,0 +1,96 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_defines.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" +#include "util/u_memory.h" +#include "draw/draw_context.h" + +#include "lp_context.h" +#include "lp_debug.h" +#include "lp_state.h" + + +static void * +llvmpipe_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + struct draw_vertex_shader *vs; + + vs = draw_create_vertex_shader(llvmpipe->draw, templ); + if (vs == NULL) { + return NULL; + } + + if (LP_DEBUG & DEBUG_TGSI) { + debug_printf("llvmpipe: Create vertex shader %p:\n", (void *) vs); + tgsi_dump(templ->tokens, 0); + } + + return vs; +} + + +static void +llvmpipe_bind_vs_state(struct pipe_context *pipe, void *_vs) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + struct draw_vertex_shader *vs = (struct draw_vertex_shader *)_vs; + + if (llvmpipe->vs == vs) + return; + + draw_bind_vertex_shader(llvmpipe->draw, vs); + + llvmpipe->vs = vs; + + llvmpipe->dirty |= LP_NEW_VS; +} + + +static void +llvmpipe_delete_vs_state(struct pipe_context *pipe, void *_vs) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + struct draw_vertex_shader *vs = (struct draw_vertex_shader *)_vs; + + draw_delete_vertex_shader(llvmpipe->draw, vs); +} + + + +void +llvmpipe_init_vs_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_vs_state = llvmpipe_create_vs_state; + llvmpipe->pipe.bind_vs_state = llvmpipe_bind_vs_state; + llvmpipe->pipe.delete_vs_state = llvmpipe_delete_vs_state; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c new file mode 100644 index 000000000..96f8ed82c --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.c @@ -0,0 +1,229 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_rect.h" +#include "util/u_surface.h" +#include "lp_context.h" +#include "lp_flush.h" +#include "lp_limits.h" +#include "lp_surface.h" +#include "lp_texture.h" +#include "lp_query.h" + + +static void +lp_resource_copy(struct pipe_context *pipe, + struct pipe_resource *dst, unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, unsigned src_level, + const struct pipe_box *src_box) +{ + llvmpipe_flush_resource(pipe, + dst, dst_level, + FALSE, /* read_only */ + TRUE, /* cpu_access */ + FALSE, /* do_not_block */ + "blit dest"); + + llvmpipe_flush_resource(pipe, + src, src_level, + TRUE, /* read_only */ + TRUE, /* cpu_access */ + FALSE, /* do_not_block */ + "blit src"); + + util_resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); +} + + +static void lp_blit(struct pipe_context *pipe, + const struct pipe_blit_info *blit_info) +{ + struct llvmpipe_context *lp = llvmpipe_context(pipe); + struct pipe_blit_info info = *blit_info; + + if (blit_info->render_condition_enable && !llvmpipe_check_render_cond(lp)) + return; + + if (info.src.resource->nr_samples > 1 && + info.dst.resource->nr_samples <= 1 && + !util_format_is_depth_or_stencil(info.src.resource->format) && + !util_format_is_pure_integer(info.src.resource->format)) { + debug_printf("llvmpipe: color resolve unimplemented\n"); + return; + } + + if (util_try_blit_via_copy_region(pipe, &info)) { + return; /* done */ + } + + if (!util_blitter_is_blit_supported(lp->blitter, &info)) { + debug_printf("llvmpipe: blit unsupported %s -> %s\n", + util_format_short_name(info.src.resource->format), + util_format_short_name(info.dst.resource->format)); + return; + } + + /* XXX turn off occlusion and streamout queries */ + + util_blitter_save_vertex_buffer_slot(lp->blitter, lp->vertex_buffer); + util_blitter_save_vertex_elements(lp->blitter, (void*)lp->velems); + util_blitter_save_vertex_shader(lp->blitter, (void*)lp->vs); + util_blitter_save_geometry_shader(lp->blitter, (void*)lp->gs); + util_blitter_save_so_targets(lp->blitter, lp->num_so_targets, + (struct pipe_stream_output_target**)lp->so_targets); + util_blitter_save_rasterizer(lp->blitter, (void*)lp->rasterizer); + util_blitter_save_viewport(lp->blitter, &lp->viewports[0]); + util_blitter_save_scissor(lp->blitter, &lp->scissors[0]); + util_blitter_save_fragment_shader(lp->blitter, lp->fs); + util_blitter_save_blend(lp->blitter, (void*)lp->blend); + util_blitter_save_depth_stencil_alpha(lp->blitter, (void*)lp->depth_stencil); + util_blitter_save_stencil_ref(lp->blitter, &lp->stencil_ref); + /*util_blitter_save_sample_mask(sp->blitter, lp->sample_mask);*/ + util_blitter_save_framebuffer(lp->blitter, &lp->framebuffer); + util_blitter_save_fragment_sampler_states(lp->blitter, + lp->num_samplers[PIPE_SHADER_FRAGMENT], + (void**)lp->samplers[PIPE_SHADER_FRAGMENT]); + util_blitter_save_fragment_sampler_views(lp->blitter, + lp->num_sampler_views[PIPE_SHADER_FRAGMENT], + lp->sampler_views[PIPE_SHADER_FRAGMENT]); + util_blitter_save_render_condition(lp->blitter, lp->render_cond_query, + lp->render_cond_cond, lp->render_cond_mode); + util_blitter_blit(lp->blitter, &info); +} + + +static void +lp_flush_resource(struct pipe_context *ctx, struct pipe_resource *resource) +{ +} + + +static struct pipe_surface * +llvmpipe_create_surface(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *surf_tmpl) +{ + struct pipe_surface *ps; + + if (!(pt->bind & (PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET))) + debug_printf("Illegal surface creation without bind flag\n"); + + ps = CALLOC_STRUCT(pipe_surface); + if (ps) { + pipe_reference_init(&ps->reference, 1); + pipe_resource_reference(&ps->texture, pt); + ps->context = pipe; + ps->format = surf_tmpl->format; + if (llvmpipe_resource_is_texture(pt)) { + assert(surf_tmpl->u.tex.level <= pt->last_level); + assert(surf_tmpl->u.tex.first_layer <= surf_tmpl->u.tex.last_layer); + ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level); + ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level); + ps->u.tex.level = surf_tmpl->u.tex.level; + ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer; + ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer; + } + else { + /* setting width as number of elements should get us correct renderbuffer width */ + ps->width = surf_tmpl->u.buf.last_element - surf_tmpl->u.buf.first_element + 1; + ps->height = pt->height0; + ps->u.buf.first_element = surf_tmpl->u.buf.first_element; + ps->u.buf.last_element = surf_tmpl->u.buf.last_element; + assert(ps->u.buf.first_element <= ps->u.buf.last_element); + assert(util_format_get_blocksize(surf_tmpl->format) * + (ps->u.buf.last_element + 1) <= pt->width0); + } + } + return ps; +} + + +static void +llvmpipe_surface_destroy(struct pipe_context *pipe, + struct pipe_surface *surf) +{ + /* Effectively do the texture_update work here - if texture images + * needed post-processing to put them into hardware layout, this is + * where it would happen. For llvmpipe, nothing to do. + */ + assert(surf->texture); + pipe_resource_reference(&surf->texture, NULL); + FREE(surf); +} + + +static void +llvmpipe_clear_render_target(struct pipe_context *pipe, + struct pipe_surface *dst, + const union pipe_color_union *color, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + if (!llvmpipe_check_render_cond(llvmpipe)) + return; + + util_clear_render_target(pipe, dst, color, + dstx, dsty, width, height); +} + + +static void +llvmpipe_clear_depth_stencil(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + + if (!llvmpipe_check_render_cond(llvmpipe)) + return; + + util_clear_depth_stencil(pipe, dst, clear_flags, + depth, stencil, + dstx, dsty, width, height); +} + + +void +llvmpipe_init_surface_functions(struct llvmpipe_context *lp) +{ + lp->pipe.clear_render_target = llvmpipe_clear_render_target; + lp->pipe.clear_depth_stencil = llvmpipe_clear_depth_stencil; + lp->pipe.create_surface = llvmpipe_create_surface; + lp->pipe.surface_destroy = llvmpipe_surface_destroy; + /* These two are not actually functions dealing with surfaces */ + lp->pipe.resource_copy_region = lp_resource_copy; + lp->pipe.blit = lp_blit; + lp->pipe.flush_resource = lp_flush_resource; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.h new file mode 100644 index 000000000..b50dc21f4 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_surface.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keithw@vmware.com> + */ + +#ifndef LP_SURFACE_H +#define LP_SURFACE_H + + +struct llvmpipe_context; + + +extern void +llvmpipe_init_surface_functions(struct llvmpipe_context *lp); + + +#endif /* LP_SURFACE_H */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test.h new file mode 100644 index 000000000..e1b51c9c9 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test.h @@ -0,0 +1,140 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Shared testing code. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + + +#ifndef LP_TEST_H +#define LP_TEST_H + + +#include <stdlib.h> +#include <stdio.h> +#include <float.h> + +#include "gallivm/lp_bld.h" + +#include "pipe/p_state.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_dump.h" + +#include "gallivm/lp_bld_type.h" + + +#define LP_TEST_NUM_SAMPLES 32 + + +void +write_tsv_header(FILE *fp); + + +boolean +test_some(unsigned verbose, FILE *fp, + unsigned long n); + +boolean +test_single(unsigned verbose, FILE *fp); + +boolean +test_all(unsigned verbose, FILE *fp); + + +#if defined(PIPE_CC_MSVC) + +unsigned __int64 __rdtsc(); +#pragma intrinsic(__rdtsc) +#define rdtsc() __rdtsc() + +#elif defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)) + +static inline uint64_t +rdtsc(void) +{ + uint32_t hi, lo; + __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); + return ((uint64_t)lo) | (((uint64_t)hi) << 32); +} + +#else + +#define rdtsc() 0 + +#endif + + + +float +random_float(void); + + +void +dump_type(FILE *fp, struct lp_type type); + + +double +read_elem(struct lp_type type, const void *src, unsigned index); + + +void +write_elem(struct lp_type type, void *dst, unsigned index, double src); + + +void +random_elem(struct lp_type type, void *dst, unsigned index); + + +void +read_vec(struct lp_type type, const void *src, double *dst); + + +void +write_vec(struct lp_type type, void *dst, const double *src); + + +void +random_vec(struct lp_type type, void *dst); + + +boolean +compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps); + + +boolean +compare_vec(struct lp_type type, const void *res, const void *ref); + + +void +dump_vec(FILE *fp, struct lp_type type, const void *src); + + +#endif /* !LP_TEST_H */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_arit.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_arit.c new file mode 100644 index 000000000..290c523f0 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_arit.c @@ -0,0 +1,484 @@ +/************************************************************************** + * + * Copyright 2011 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> + +#include "util/u_pointer.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_cpu_detect.h" + +#include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_arit.h" + +#include "lp_test.h" + + +void +write_tsv_header(FILE *fp) +{ + fprintf(fp, + "result\t" + "format\n"); + + fflush(fp); +} + + +typedef void (*unary_func_t)(float *out, const float *in); + + +/** + * Describe a test case of one unary function. + */ +struct unary_test_t +{ + /* + * Test name -- name of the mathematical function under test. + */ + + const char *name; + + LLVMValueRef + (*builder)(struct lp_build_context *bld, LLVMValueRef a); + + /* + * Reference (pure-C) function. + */ + float + (*ref)(float a); + + /* + * Test values. + */ + const float *values; + unsigned num_values; + + /* + * Required precision in bits. + */ + double precision; +}; + + +static float negf(float x) +{ + return -x; +} + + +static float sgnf(float x) +{ + if (x > 0.0f) { + return 1.0f; + } + if (x < 0.0f) { + return -1.0f; + } + return 0.0f; +} + + +const float exp2_values[] = { + -INFINITY, + -60, + -4, + -2, + -1, + -1e-007, + 0, + 1e-007, + 0.01, + 0.1, + 0.9, + 0.99, + 1, + 2, + 4, + 60, + INFINITY, + NAN +}; + + +const float log2_values[] = { +#if 0 + /* + * Smallest denormalized number; meant just for experimentation, but not + * validation. + */ + 1.4012984643248171e-45, +#endif + -INFINITY, + 0, + 1e-007, + 0.1, + 0.5, + 0.99, + 1, + 1.01, + 1.1, + 1.9, + 1.99, + 2, + 4, + 100000, + 1e+018, + INFINITY, + NAN +}; + + +static float rcpf(float x) +{ + return 1.0/x; +} + + +const float rcp_values[] = { + -0.0, 0.0, + -1.0, 1.0, + -1e-007, 1e-007, + -4.0, 4.0, + -1e+035, -100000, + 100000, 1e+035, + 5.88e-39f, // denormal +#if (__STDC_VERSION__ >= 199901L) + INFINITY, -INFINITY, +#endif +}; + + +static float rsqrtf(float x) +{ + return 1.0/(float)sqrt(x); +} + + +const float rsqrt_values[] = { + // http://msdn.microsoft.com/en-us/library/windows/desktop/bb147346.aspx + 0.0, // must yield infinity + 1.0, // must yield 1.0 + 1e-007, 4.0, + 100000, 1e+035, + 5.88e-39f, // denormal +#if (__STDC_VERSION__ >= 199901L) + INFINITY, +#endif +}; + + +const float sincos_values[] = { + -INFINITY, + -5*M_PI/4, + -4*M_PI/4, + -4*M_PI/4, + -3*M_PI/4, + -2*M_PI/4, + -1*M_PI/4, + 1*M_PI/4, + 2*M_PI/4, + 3*M_PI/4, + 4*M_PI/4, + 5*M_PI/4, + INFINITY, + NAN +}; + +const float round_values[] = { + -10.0, -1, 0.0, 12.0, + -1.49, -0.25, 1.25, 2.51, + -0.99, -0.01, 0.01, 0.99, + 1.401298464324817e-45f, // smallest denormal + -1.401298464324817e-45f, + 1.62981451e-08f, + -1.62981451e-08f, + 1.62981451e15f, // large number not representable as 32bit int + -1.62981451e15f, + FLT_EPSILON, + -FLT_EPSILON, + 1.0f - 0.5f*FLT_EPSILON, + -1.0f + FLT_EPSILON, + FLT_MAX, + -FLT_MAX +}; + +static float fractf(float x) +{ + x -= floorf(x); + if (x >= 1.0f) { + // clamp to the largest number smaller than one + x = 1.0f - 0.5f*FLT_EPSILON; + } + return x; +} + + +const float fract_values[] = { + // http://en.wikipedia.org/wiki/IEEE_754-1985#Examples + 0.0f, + -0.0f, + 1.0f, + -1.0f, + 0.5f, + -0.5f, + 1.401298464324817e-45f, // smallest denormal + -1.401298464324817e-45f, + 5.88e-39f, // middle denormal + 1.18e-38f, // largest denormal + -1.18e-38f, + -1.62981451e-08f, + FLT_EPSILON, + -FLT_EPSILON, + 1.0f - 0.5f*FLT_EPSILON, + -1.0f + FLT_EPSILON, + FLT_MAX, + -FLT_MAX +}; + + +/* + * Unary test cases. + */ + +static const struct unary_test_t +unary_tests[] = { + {"neg", &lp_build_negate, &negf, exp2_values, Elements(exp2_values), 20.0 }, + {"exp2", &lp_build_exp2, &exp2f, exp2_values, Elements(exp2_values), 20.0 }, + {"log2", &lp_build_log2_safe, &log2f, log2_values, Elements(log2_values), 20.0 }, + {"exp", &lp_build_exp, &expf, exp2_values, Elements(exp2_values), 18.0 }, + {"log", &lp_build_log_safe, &logf, log2_values, Elements(log2_values), 20.0 }, + {"rcp", &lp_build_rcp, &rcpf, rcp_values, Elements(rcp_values), 20.0 }, + {"rsqrt", &lp_build_rsqrt, &rsqrtf, rsqrt_values, Elements(rsqrt_values), 20.0 }, + {"sin", &lp_build_sin, &sinf, sincos_values, Elements(sincos_values), 20.0 }, + {"cos", &lp_build_cos, &cosf, sincos_values, Elements(sincos_values), 20.0 }, + {"sgn", &lp_build_sgn, &sgnf, exp2_values, Elements(exp2_values), 20.0 }, + {"round", &lp_build_round, &roundf, round_values, Elements(round_values), 24.0 }, + {"trunc", &lp_build_trunc, &truncf, round_values, Elements(round_values), 24.0 }, + {"floor", &lp_build_floor, &floorf, round_values, Elements(round_values), 24.0 }, + {"ceil", &lp_build_ceil, &ceilf, round_values, Elements(round_values), 24.0 }, + {"fract", &lp_build_fract_safe, &fractf, fract_values, Elements(fract_values), 24.0 }, +}; + + +/* + * Build LLVM function that exercises the unary operator builder. + */ +static LLVMValueRef +build_unary_test_func(struct gallivm_state *gallivm, + const struct unary_test_t *test) +{ + struct lp_type type = lp_type_float_vec(32, lp_native_vector_width); + LLVMContextRef context = gallivm->context; + LLVMModuleRef module = gallivm->module; + LLVMTypeRef vf32t = lp_build_vec_type(gallivm, type); + LLVMTypeRef args[2] = { LLVMPointerType(vf32t, 0), LLVMPointerType(vf32t, 0) }; + LLVMValueRef func = LLVMAddFunction(module, test->name, + LLVMFunctionType(LLVMVoidTypeInContext(context), + args, Elements(args), 0)); + LLVMValueRef arg0 = LLVMGetParam(func, 0); + LLVMValueRef arg1 = LLVMGetParam(func, 1); + LLVMBuilderRef builder = gallivm->builder; + LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(context, func, "entry"); + LLVMValueRef ret; + + struct lp_build_context bld; + + lp_build_context_init(&bld, gallivm, type); + + LLVMSetFunctionCallConv(func, LLVMCCallConv); + + LLVMPositionBuilderAtEnd(builder, block); + + arg1 = LLVMBuildLoad(builder, arg1, ""); + + ret = test->builder(&bld, arg1); + + LLVMBuildStore(builder, ret, arg0); + + LLVMBuildRetVoid(builder); + + gallivm_verify_function(gallivm, func); + + return func; +} + + +/* + * Flush denorms to zero. + */ +static float +flush_denorm_to_zero(float val) +{ + /* + * If we have a denorm manually set it to (+-)0. + * This is because the reference may or may not do the right thing + * otherwise because we want the result according to treating all + * denormals as zero (FTZ/DAZ). Not using fpclassify because + * a) some compilers are stuck at c89 (msvc) + * b) not sure it reliably works with non-standard ftz/daz mode + * And, right now we only disable denorms with jited code on x86/sse + * (albeit this should be classified as a bug) so to get results which + * match we must only flush them to zero here in that case too. + */ + union fi fi_val; + + fi_val.f = val; + +#if defined(PIPE_ARCH_SSE) + if (util_cpu_caps.has_sse) { + if ((fi_val.ui & 0x7f800000) == 0) { + fi_val.ui &= 0xff800000; + } + } +#endif + + return fi_val.f; +} + +/* + * Test one LLVM unary arithmetic builder function. + */ +static boolean +test_unary(unsigned verbose, FILE *fp, const struct unary_test_t *test) +{ + struct gallivm_state *gallivm; + LLVMValueRef test_func; + unary_func_t test_func_jit; + boolean success = TRUE; + int i, j; + int length = lp_native_vector_width / 32; + float *in, *out; + + in = align_malloc(length * 4, length * 4); + out = align_malloc(length * 4, length * 4); + + /* random NaNs or 0s could wreak havoc */ + for (i = 0; i < length; i++) { + in[i] = 1.0; + } + + gallivm = gallivm_create("test_module", LLVMGetGlobalContext()); + + test_func = build_unary_test_func(gallivm, test); + + gallivm_compile_module(gallivm); + + test_func_jit = (unary_func_t) gallivm_jit_function(gallivm, test_func); + + gallivm_free_ir(gallivm); + + for (j = 0; j < (test->num_values + length - 1) / length; j++) { + int num_vals = ((j + 1) * length <= test->num_values) ? length : + test->num_values % length; + + for (i = 0; i < num_vals; ++i) { + in[i] = test->values[i+j*length]; + } + + test_func_jit(out, in); + for (i = 0; i < num_vals; ++i) { + float testval, ref; + double error, precision; + bool pass; + + testval = flush_denorm_to_zero(in[i]); + ref = flush_denorm_to_zero(test->ref(testval)); + + if (util_inf_sign(ref) && util_inf_sign(out[i]) == util_inf_sign(ref)) { + error = 0; + } else { + error = fabs(out[i] - ref); + } + precision = error ? -log2(error/fabs(ref)) : FLT_MANT_DIG; + + pass = precision >= test->precision; + + if (isnan(ref)) { + continue; + } + + if (!pass || verbose) { + printf("%s(%.9g): ref = %.9g, out = %.9g, precision = %f bits, %s\n", + test->name, in[i], ref, out[i], precision, + pass ? "PASS" : "FAIL"); + fflush(stdout); + } + + if (!pass) { + success = FALSE; + } + } + } + + gallivm_destroy(gallivm); + + align_free(in); + align_free(out); + + return success; +} + + +boolean +test_all(unsigned verbose, FILE *fp) +{ + boolean success = TRUE; + int i; + + for (i = 0; i < Elements(unary_tests); ++i) { + if (!test_unary(verbose, fp, &unary_tests[i])) { + success = FALSE; + } + } + + return success; +} + + +boolean +test_some(unsigned verbose, FILE *fp, + unsigned long n) +{ + /* + * Not randomly generated test cases, so test all. + */ + + return test_all(verbose, fp); +} + + +boolean +test_single(unsigned verbose, FILE *fp) +{ + return TRUE; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_blend.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_blend.c new file mode 100644 index 000000000..37420b024 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -0,0 +1,737 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Unit tests for blend LLVM IR generation + * + * @author Jose Fonseca <jfonseca@vmware.com> + * + * Blend computation code derived from code written by + * @author Brian Paul <brian@vmware.com> + */ + +#include "util/u_memory.h" + +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_debug.h" +#include "lp_bld_blend.h" +#include "lp_test.h" + + +typedef void (*blend_test_ptr_t)(const void *src, const void *src1, + const void *dst, const void *con, void *res); + + +void +write_tsv_header(FILE *fp) +{ + fprintf(fp, + "result\t" + "cycles_per_channel\t" + "type\t" + "sep_func\t" + "sep_src_factor\t" + "sep_dst_factor\t" + "rgb_func\t" + "rgb_src_factor\t" + "rgb_dst_factor\t" + "alpha_func\t" + "alpha_src_factor\t" + "alpha_dst_factor\n"); + + fflush(fp); +} + + +static void +write_tsv_row(FILE *fp, + const struct pipe_blend_state *blend, + struct lp_type type, + double cycles, + boolean success) +{ + fprintf(fp, "%s\t", success ? "pass" : "fail"); + + fprintf(fp, "%.1f\t", cycles / type.length); + + fprintf(fp, "%s%u%sx%u\t", + type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")), + type.width, + type.norm ? "n" : "", + type.length); + + fprintf(fp, + "%s\t%s\t%s\t", + blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false", + blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false", + blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false"); + + fprintf(fp, + "%s\t%s\t%s\t%s\t%s\t%s\n", + util_dump_blend_func(blend->rt[0].rgb_func, TRUE), + util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE), + util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE), + util_dump_blend_func(blend->rt[0].alpha_func, TRUE), + util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE), + util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE)); + + fflush(fp); +} + + +static void +dump_blend_type(FILE *fp, + const struct pipe_blend_state *blend, + struct lp_type type) +{ + fprintf(fp, " type=%s%u%sx%u", + type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")), + type.width, + type.norm ? "n" : "", + type.length); + + fprintf(fp, + " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s", + "rgb_func", util_dump_blend_func(blend->rt[0].rgb_func, TRUE), + "rgb_src_factor", util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE), + "rgb_dst_factor", util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE), + "alpha_func", util_dump_blend_func(blend->rt[0].alpha_func, TRUE), + "alpha_src_factor", util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE), + "alpha_dst_factor", util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE)); + + fprintf(fp, " ...\n"); + fflush(fp); +} + + +static LLVMValueRef +add_blend_test(struct gallivm_state *gallivm, + const struct pipe_blend_state *blend, + struct lp_type type) +{ + LLVMModuleRef module = gallivm->module; + LLVMContextRef context = gallivm->context; + LLVMTypeRef vec_type; + LLVMTypeRef args[5]; + LLVMValueRef func; + LLVMValueRef src_ptr; + LLVMValueRef src1_ptr; + LLVMValueRef dst_ptr; + LLVMValueRef const_ptr; + LLVMValueRef res_ptr; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + const enum pipe_format format = PIPE_FORMAT_R8G8B8A8_UNORM; + const unsigned rt = 0; + const unsigned char swizzle[4] = { 0, 1, 2, 3 }; + LLVMValueRef src; + LLVMValueRef src1; + LLVMValueRef dst; + LLVMValueRef con; + LLVMValueRef res; + + vec_type = lp_build_vec_type(gallivm, type); + + args[4] = args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0); + func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidTypeInContext(context), args, 5, 0)); + LLVMSetFunctionCallConv(func, LLVMCCallConv); + src_ptr = LLVMGetParam(func, 0); + src1_ptr = LLVMGetParam(func, 1); + dst_ptr = LLVMGetParam(func, 2); + const_ptr = LLVMGetParam(func, 3); + res_ptr = LLVMGetParam(func, 4); + + block = LLVMAppendBasicBlockInContext(context, func, "entry"); + builder = gallivm->builder; + LLVMPositionBuilderAtEnd(builder, block); + + src = LLVMBuildLoad(builder, src_ptr, "src"); + src1 = LLVMBuildLoad(builder, src1_ptr, "src1"); + dst = LLVMBuildLoad(builder, dst_ptr, "dst"); + con = LLVMBuildLoad(builder, const_ptr, "const"); + + res = lp_build_blend_aos(gallivm, blend, format, type, rt, src, NULL, + src1, NULL, dst, NULL, con, NULL, swizzle, 4); + + lp_build_name(res, "res"); + + LLVMBuildStore(builder, res, res_ptr); + + LLVMBuildRetVoid(builder);; + + gallivm_verify_function(gallivm, func); + + return func; +} + + +static void +compute_blend_ref_term(unsigned rgb_factor, + unsigned alpha_factor, + const double *factor, + const double *src, + const double *src1, + const double *dst, + const double *con, + double *term) +{ + double temp; + + switch (rgb_factor) { + case PIPE_BLENDFACTOR_ONE: + term[0] = factor[0]; /* R */ + term[1] = factor[1]; /* G */ + term[2] = factor[2]; /* B */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term[0] = factor[0] * src[0]; /* R */ + term[1] = factor[1] * src[1]; /* G */ + term[2] = factor[2] * src[2]; /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term[0] = factor[0] * src[3]; /* R */ + term[1] = factor[1] * src[3]; /* G */ + term[2] = factor[2] * src[3]; /* B */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + term[0] = factor[0] * dst[0]; /* R */ + term[1] = factor[1] * dst[1]; /* G */ + term[2] = factor[2] * dst[2]; /* B */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + term[0] = factor[0] * dst[3]; /* R */ + term[1] = factor[1] * dst[3]; /* G */ + term[2] = factor[2] * dst[3]; /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + temp = MIN2(src[3], 1.0f - dst[3]); + term[0] = factor[0] * temp; /* R */ + term[1] = factor[1] * temp; /* G */ + term[2] = factor[2] * temp; /* B */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + term[0] = factor[0] * con[0]; /* R */ + term[1] = factor[1] * con[1]; /* G */ + term[2] = factor[2] * con[2]; /* B */ + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + term[0] = factor[0] * con[3]; /* R */ + term[1] = factor[1] * con[3]; /* G */ + term[2] = factor[2] * con[3]; /* B */ + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + term[0] = factor[0] * src1[0]; /* R */ + term[1] = factor[1] * src1[1]; /* G */ + term[2] = factor[2] * src1[2]; /* B */ + break; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + term[0] = factor[0] * src1[3]; /* R */ + term[1] = factor[1] * src1[3]; /* G */ + term[2] = factor[2] * src1[3]; /* B */ + break; + case PIPE_BLENDFACTOR_ZERO: + term[0] = 0.0f; /* R */ + term[1] = 0.0f; /* G */ + term[2] = 0.0f; /* B */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + term[0] = factor[0] * (1.0f - src[0]); /* R */ + term[1] = factor[1] * (1.0f - src[1]); /* G */ + term[2] = factor[2] * (1.0f - src[2]); /* B */ + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + term[0] = factor[0] * (1.0f - src[3]); /* R */ + term[1] = factor[1] * (1.0f - src[3]); /* G */ + term[2] = factor[2] * (1.0f - src[3]); /* B */ + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + term[0] = factor[0] * (1.0f - dst[3]); /* R */ + term[1] = factor[1] * (1.0f - dst[3]); /* G */ + term[2] = factor[2] * (1.0f - dst[3]); /* B */ + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + term[0] = factor[0] * (1.0f - dst[0]); /* R */ + term[1] = factor[1] * (1.0f - dst[1]); /* G */ + term[2] = factor[2] * (1.0f - dst[2]); /* B */ + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + term[0] = factor[0] * (1.0f - con[0]); /* R */ + term[1] = factor[1] * (1.0f - con[1]); /* G */ + term[2] = factor[2] * (1.0f - con[2]); /* B */ + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + term[0] = factor[0] * (1.0f - con[3]); /* R */ + term[1] = factor[1] * (1.0f - con[3]); /* G */ + term[2] = factor[2] * (1.0f - con[3]); /* B */ + break; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + term[0] = factor[0] * (1.0f - src1[0]); /* R */ + term[1] = factor[1] * (1.0f - src1[1]); /* G */ + term[2] = factor[2] * (1.0f - src1[2]); /* B */ + break; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + term[0] = factor[0] * (1.0f - src1[3]); /* R */ + term[1] = factor[1] * (1.0f - src1[3]); /* G */ + term[2] = factor[2] * (1.0f - src1[3]); /* B */ + break; + default: + assert(0); + } + + /* + * Compute src/first term A + */ + switch (alpha_factor) { + case PIPE_BLENDFACTOR_ONE: + term[3] = factor[3]; /* A */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + case PIPE_BLENDFACTOR_SRC_ALPHA: + term[3] = factor[3] * src[3]; /* A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + case PIPE_BLENDFACTOR_DST_ALPHA: + term[3] = factor[3] * dst[3]; /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + term[3] = src[3]; /* A */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_CONST_ALPHA: + term[3] = factor[3] * con[3]; /* A */ + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + term[3] = factor[3] * src1[3]; /* A */ + break; + case PIPE_BLENDFACTOR_ZERO: + term[3] = 0.0f; /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + term[3] = factor[3] * (1.0f - src[3]); /* A */ + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + term[3] = factor[3] * (1.0f - dst[3]); /* A */ + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + term[3] = factor[3] * (1.0f - con[3]); + break; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + term[3] = factor[3] * (1.0f - src1[3]); /* A */ + break; + default: + assert(0); + } +} + + +static void +compute_blend_ref(const struct pipe_blend_state *blend, + const double *src, + const double *src1, + const double *dst, + const double *con, + double *res) +{ + double src_term[4]; + double dst_term[4]; + + compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor, + src, src, src1, dst, con, src_term); + compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor, + dst, src, src1, dst, con, dst_term); + + /* + * Combine RGB terms + */ + switch (blend->rt[0].rgb_func) { + case PIPE_BLEND_ADD: + res[0] = src_term[0] + dst_term[0]; /* R */ + res[1] = src_term[1] + dst_term[1]; /* G */ + res[2] = src_term[2] + dst_term[2]; /* B */ + break; + case PIPE_BLEND_SUBTRACT: + res[0] = src_term[0] - dst_term[0]; /* R */ + res[1] = src_term[1] - dst_term[1]; /* G */ + res[2] = src_term[2] - dst_term[2]; /* B */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + res[0] = dst_term[0] - src_term[0]; /* R */ + res[1] = dst_term[1] - src_term[1]; /* G */ + res[2] = dst_term[2] - src_term[2]; /* B */ + break; + case PIPE_BLEND_MIN: + res[0] = MIN2(src_term[0], dst_term[0]); /* R */ + res[1] = MIN2(src_term[1], dst_term[1]); /* G */ + res[2] = MIN2(src_term[2], dst_term[2]); /* B */ + break; + case PIPE_BLEND_MAX: + res[0] = MAX2(src_term[0], dst_term[0]); /* R */ + res[1] = MAX2(src_term[1], dst_term[1]); /* G */ + res[2] = MAX2(src_term[2], dst_term[2]); /* B */ + break; + default: + assert(0); + } + + /* + * Combine A terms + */ + switch (blend->rt[0].alpha_func) { + case PIPE_BLEND_ADD: + res[3] = src_term[3] + dst_term[3]; /* A */ + break; + case PIPE_BLEND_SUBTRACT: + res[3] = src_term[3] - dst_term[3]; /* A */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + res[3] = dst_term[3] - src_term[3]; /* A */ + break; + case PIPE_BLEND_MIN: + res[3] = MIN2(src_term[3], dst_term[3]); /* A */ + break; + case PIPE_BLEND_MAX: + res[3] = MAX2(src_term[3], dst_term[3]); /* A */ + break; + default: + assert(0); + } +} + + +PIPE_ALIGN_STACK +static boolean +test_one(unsigned verbose, + FILE *fp, + const struct pipe_blend_state *blend, + struct lp_type type) +{ + struct gallivm_state *gallivm; + LLVMValueRef func = NULL; + blend_test_ptr_t blend_test_ptr; + boolean success; + const unsigned n = LP_TEST_NUM_SAMPLES; + int64_t cycles[LP_TEST_NUM_SAMPLES]; + double cycles_avg = 0.0; + unsigned i, j; + const unsigned stride = lp_type_width(type)/8; + + if(verbose >= 1) + dump_blend_type(stdout, blend, type); + + gallivm = gallivm_create("test_module", LLVMGetGlobalContext()); + + func = add_blend_test(gallivm, blend, type); + + gallivm_compile_module(gallivm); + + blend_test_ptr = (blend_test_ptr_t)gallivm_jit_function(gallivm, func); + + gallivm_free_ir(gallivm); + + success = TRUE; + + { + uint8_t *src, *src1, *dst, *con, *res, *ref; + src = align_malloc(stride, stride); + src1 = align_malloc(stride, stride); + dst = align_malloc(stride, stride); + con = align_malloc(stride, stride); + res = align_malloc(stride, stride); + ref = align_malloc(stride, stride); + + for(i = 0; i < n && success; ++i) { + int64_t start_counter = 0; + int64_t end_counter = 0; + + random_vec(type, src); + random_vec(type, src1); + random_vec(type, dst); + random_vec(type, con); + + { + double fsrc[LP_MAX_VECTOR_LENGTH]; + double fsrc1[LP_MAX_VECTOR_LENGTH]; + double fdst[LP_MAX_VECTOR_LENGTH]; + double fcon[LP_MAX_VECTOR_LENGTH]; + double fref[LP_MAX_VECTOR_LENGTH]; + + read_vec(type, src, fsrc); + read_vec(type, src1, fsrc1); + read_vec(type, dst, fdst); + read_vec(type, con, fcon); + + for(j = 0; j < type.length; j += 4) + compute_blend_ref(blend, fsrc + j, fsrc1 + j, fdst + j, fcon + j, fref + j); + + write_vec(type, ref, fref); + } + + start_counter = rdtsc(); + blend_test_ptr(src, src1, dst, con, res); + end_counter = rdtsc(); + + cycles[i] = end_counter - start_counter; + + if(!compare_vec(type, res, ref)) { + success = FALSE; + + if(verbose < 1) + dump_blend_type(stderr, blend, type); + fprintf(stderr, "MISMATCH\n"); + + fprintf(stderr, " Src: "); + dump_vec(stderr, type, src); + fprintf(stderr, "\n"); + + fprintf(stderr, " Src1: "); + dump_vec(stderr, type, src1); + fprintf(stderr, "\n"); + + fprintf(stderr, " Dst: "); + dump_vec(stderr, type, dst); + fprintf(stderr, "\n"); + + fprintf(stderr, " Con: "); + dump_vec(stderr, type, con); + fprintf(stderr, "\n"); + + fprintf(stderr, " Res: "); + dump_vec(stderr, type, res); + fprintf(stderr, "\n"); + + fprintf(stderr, " Ref: "); + dump_vec(stderr, type, ref); + fprintf(stderr, "\n"); + } + } + align_free(src); + align_free(src1); + align_free(dst); + align_free(con); + align_free(res); + align_free(ref); + } + + /* + * Unfortunately the output of cycle counter is not very reliable as it comes + * -- sometimes we get outliers (due IRQs perhaps?) which are + * better removed to avoid random or biased data. + */ + { + double sum = 0.0, sum2 = 0.0; + double avg, std; + unsigned m; + + for(i = 0; i < n; ++i) { + sum += cycles[i]; + sum2 += cycles[i]*cycles[i]; + } + + avg = sum/n; + std = sqrtf((sum2 - n*avg*avg)/n); + + m = 0; + sum = 0.0; + for(i = 0; i < n; ++i) { + if(fabs(cycles[i] - avg) <= 4.0*std) { + sum += cycles[i]; + ++m; + } + } + + cycles_avg = sum/m; + + } + + if(fp) + write_tsv_row(fp, blend, type, cycles_avg, success); + + gallivm_destroy(gallivm); + + return success; +} + + +const unsigned +blend_factors[] = { + PIPE_BLENDFACTOR_ZERO, + PIPE_BLENDFACTOR_ONE, + PIPE_BLENDFACTOR_SRC_COLOR, + PIPE_BLENDFACTOR_SRC_ALPHA, + PIPE_BLENDFACTOR_DST_COLOR, + PIPE_BLENDFACTOR_DST_ALPHA, + PIPE_BLENDFACTOR_CONST_COLOR, + PIPE_BLENDFACTOR_CONST_ALPHA, + PIPE_BLENDFACTOR_SRC1_COLOR, + PIPE_BLENDFACTOR_SRC1_ALPHA, + PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE, + PIPE_BLENDFACTOR_INV_SRC_COLOR, + PIPE_BLENDFACTOR_INV_SRC_ALPHA, + PIPE_BLENDFACTOR_INV_DST_COLOR, + PIPE_BLENDFACTOR_INV_DST_ALPHA, + PIPE_BLENDFACTOR_INV_CONST_COLOR, + PIPE_BLENDFACTOR_INV_CONST_ALPHA, + PIPE_BLENDFACTOR_INV_SRC1_COLOR, + PIPE_BLENDFACTOR_INV_SRC1_ALPHA, +}; + + +const unsigned +blend_funcs[] = { + PIPE_BLEND_ADD, + PIPE_BLEND_SUBTRACT, + PIPE_BLEND_REVERSE_SUBTRACT, + PIPE_BLEND_MIN, + PIPE_BLEND_MAX +}; + + +const struct lp_type blend_types[] = { + /* float, fixed, sign, norm, width, len */ + { TRUE, FALSE, TRUE, FALSE, 32, 4 }, /* f32 x 4 */ + { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */ +}; + + +const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]); +const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]); +const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]); + + +boolean +test_all(unsigned verbose, FILE *fp) +{ + const unsigned *rgb_func; + const unsigned *rgb_src_factor; + const unsigned *rgb_dst_factor; + const unsigned *alpha_func; + const unsigned *alpha_src_factor; + const unsigned *alpha_dst_factor; + struct pipe_blend_state blend; + const struct lp_type *type; + boolean success = TRUE; + + for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) { + for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) { + for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) { + for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) { + for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) { + for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) { + for(type = blend_types; type < &blend_types[num_types]; ++type) { + + if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) + continue; + + memset(&blend, 0, sizeof blend); + blend.rt[0].blend_enable = 1; + blend.rt[0].rgb_func = *rgb_func; + blend.rt[0].rgb_src_factor = *rgb_src_factor; + blend.rt[0].rgb_dst_factor = *rgb_dst_factor; + blend.rt[0].alpha_func = *alpha_func; + blend.rt[0].alpha_src_factor = *alpha_src_factor; + blend.rt[0].alpha_dst_factor = *alpha_dst_factor; + blend.rt[0].colormask = PIPE_MASK_RGBA; + + if(!test_one(verbose, fp, &blend, *type)) + success = FALSE; + + } + } + } + } + } + } + } + + return success; +} + + +boolean +test_some(unsigned verbose, FILE *fp, + unsigned long n) +{ + const unsigned *rgb_func; + const unsigned *rgb_src_factor; + const unsigned *rgb_dst_factor; + const unsigned *alpha_func; + const unsigned *alpha_src_factor; + const unsigned *alpha_dst_factor; + struct pipe_blend_state blend; + const struct lp_type *type; + unsigned long i; + boolean success = TRUE; + + for(i = 0; i < n; ++i) { + rgb_func = &blend_funcs[rand() % num_funcs]; + alpha_func = &blend_funcs[rand() % num_funcs]; + rgb_src_factor = &blend_factors[rand() % num_factors]; + alpha_src_factor = &blend_factors[rand() % num_factors]; + + do { + rgb_dst_factor = &blend_factors[rand() % num_factors]; + } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE); + + do { + alpha_dst_factor = &blend_factors[rand() % num_factors]; + } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE); + + type = &blend_types[rand() % num_types]; + + memset(&blend, 0, sizeof blend); + blend.rt[0].blend_enable = 1; + blend.rt[0].rgb_func = *rgb_func; + blend.rt[0].rgb_src_factor = *rgb_src_factor; + blend.rt[0].rgb_dst_factor = *rgb_dst_factor; + blend.rt[0].alpha_func = *alpha_func; + blend.rt[0].alpha_src_factor = *alpha_src_factor; + blend.rt[0].alpha_dst_factor = *alpha_dst_factor; + blend.rt[0].colormask = PIPE_MASK_RGBA; + + if(!test_one(verbose, fp, &blend, *type)) + success = FALSE; + } + + return success; +} + + +boolean +test_single(unsigned verbose, FILE *fp) +{ + printf("no test_single()"); + return TRUE; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_conv.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_conv.c new file mode 100644 index 000000000..8290da400 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -0,0 +1,453 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Unit tests for type conversion. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + + +#include "util/u_pointer.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_conv.h" +#include "gallivm/lp_bld_debug.h" +#include "lp_test.h" + + +typedef void (*conv_test_ptr_t)(const void *src, const void *dst); + + +void +write_tsv_header(FILE *fp) +{ + fprintf(fp, + "result\t" + "cycles_per_channel\t" + "src_type\t" + "dst_type\n"); + + fflush(fp); +} + + +static void +write_tsv_row(FILE *fp, + struct lp_type src_type, + struct lp_type dst_type, + double cycles, + boolean success) +{ + fprintf(fp, "%s\t", success ? "pass" : "fail"); + + fprintf(fp, "%.1f\t", cycles / MAX2(src_type.length, dst_type.length)); + + dump_type(fp, src_type); + fprintf(fp, "\t"); + + dump_type(fp, dst_type); + fprintf(fp, "\n"); + + fflush(fp); +} + + +static void +dump_conv_types(FILE *fp, + struct lp_type src_type, + struct lp_type dst_type) +{ + fprintf(fp, "src_type="); + dump_type(fp, src_type); + + fprintf(fp, " dst_type="); + dump_type(fp, dst_type); + + fprintf(fp, " ...\n"); + fflush(fp); +} + + +static LLVMValueRef +add_conv_test(struct gallivm_state *gallivm, + struct lp_type src_type, unsigned num_srcs, + struct lp_type dst_type, unsigned num_dsts) +{ + LLVMModuleRef module = gallivm->module; + LLVMContextRef context = gallivm->context; + LLVMBuilderRef builder = gallivm->builder; + LLVMTypeRef args[2]; + LLVMValueRef func; + LLVMValueRef src_ptr; + LLVMValueRef dst_ptr; + LLVMBasicBlockRef block; + LLVMValueRef src[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef dst[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + args[0] = LLVMPointerType(lp_build_vec_type(gallivm, src_type), 0); + args[1] = LLVMPointerType(lp_build_vec_type(gallivm, dst_type), 0); + + func = LLVMAddFunction(module, "test", + LLVMFunctionType(LLVMVoidTypeInContext(context), + args, 2, 0)); + LLVMSetFunctionCallConv(func, LLVMCCallConv); + src_ptr = LLVMGetParam(func, 0); + dst_ptr = LLVMGetParam(func, 1); + + block = LLVMAppendBasicBlockInContext(context, func, "entry"); + LLVMPositionBuilderAtEnd(builder, block); + + for(i = 0; i < num_srcs; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0); + LLVMValueRef ptr = LLVMBuildGEP(builder, src_ptr, &index, 1, ""); + src[i] = LLVMBuildLoad(builder, ptr, ""); + } + + lp_build_conv(gallivm, src_type, dst_type, src, num_srcs, dst, num_dsts); + + for(i = 0; i < num_dsts; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0); + LLVMValueRef ptr = LLVMBuildGEP(builder, dst_ptr, &index, 1, ""); + LLVMBuildStore(builder, dst[i], ptr); + } + + LLVMBuildRetVoid(builder);; + + gallivm_verify_function(gallivm, func); + + return func; +} + + +PIPE_ALIGN_STACK +static boolean +test_one(unsigned verbose, + FILE *fp, + struct lp_type src_type, + struct lp_type dst_type) +{ + struct gallivm_state *gallivm; + LLVMValueRef func = NULL; + conv_test_ptr_t conv_test_ptr; + boolean success; + const unsigned n = LP_TEST_NUM_SAMPLES; + int64_t cycles[LP_TEST_NUM_SAMPLES]; + double cycles_avg = 0.0; + unsigned num_srcs; + unsigned num_dsts; + double eps; + unsigned i, j; + + if ((src_type.width >= dst_type.width && src_type.length > dst_type.length) || + (src_type.width <= dst_type.width && src_type.length < dst_type.length)) { + return TRUE; + } + + /* Known failures + * - fixed point 32 -> float 32 + * - float 32 -> signed normalised integer 32 + */ + if ((src_type.floating && !dst_type.floating && dst_type.sign && dst_type.norm && src_type.width == dst_type.width) || + (!src_type.floating && dst_type.floating && src_type.fixed && src_type.width == dst_type.width)) { + return TRUE; + } + + /* Known failures + * - fixed point 32 -> float 32 + * - float 32 -> signed normalised integer 32 + */ + if ((src_type.floating && !dst_type.floating && dst_type.sign && dst_type.norm && src_type.width == dst_type.width) || + (!src_type.floating && dst_type.floating && src_type.fixed && src_type.width == dst_type.width)) { + return TRUE; + } + + if(verbose >= 1) + dump_conv_types(stderr, src_type, dst_type); + + if (src_type.length > dst_type.length) { + num_srcs = 1; + num_dsts = src_type.length/dst_type.length; + } + else if (src_type.length < dst_type.length) { + num_dsts = 1; + num_srcs = dst_type.length/src_type.length; + } + else { + num_dsts = 1; + num_srcs = 1; + } + + /* We must not loose or gain channels. Only precision */ + assert(src_type.length * num_srcs == dst_type.length * num_dsts); + + eps = MAX2(lp_const_eps(src_type), lp_const_eps(dst_type)); + + gallivm = gallivm_create("test_module", LLVMGetGlobalContext()); + + func = add_conv_test(gallivm, src_type, num_srcs, dst_type, num_dsts); + + gallivm_compile_module(gallivm); + + conv_test_ptr = (conv_test_ptr_t)gallivm_jit_function(gallivm, func); + + gallivm_free_ir(gallivm); + + success = TRUE; + for(i = 0; i < n && success; ++i) { + unsigned src_stride = src_type.length*src_type.width/8; + unsigned dst_stride = dst_type.length*dst_type.width/8; + PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + PIPE_ALIGN_VAR(LP_MIN_VECTOR_ALIGN) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + int64_t start_counter = 0; + int64_t end_counter = 0; + + for(j = 0; j < num_srcs; ++j) { + random_vec(src_type, src + j*src_stride); + read_vec(src_type, src + j*src_stride, fref + j*src_type.length); + } + + for(j = 0; j < num_dsts; ++j) { + write_vec(dst_type, ref + j*dst_stride, fref + j*dst_type.length); + } + + start_counter = rdtsc(); + conv_test_ptr(src, dst); + end_counter = rdtsc(); + + cycles[i] = end_counter - start_counter; + + for(j = 0; j < num_dsts; ++j) { + if(!compare_vec_with_eps(dst_type, dst + j*dst_stride, ref + j*dst_stride, eps)) + success = FALSE; + } + + if (!success || verbose >= 3) { + if(verbose < 1) + dump_conv_types(stderr, src_type, dst_type); + if (success) { + fprintf(stderr, "PASS\n"); + } + else { + fprintf(stderr, "MISMATCH\n"); + } + + for(j = 0; j < num_srcs; ++j) { + fprintf(stderr, " Src%u: ", j); + dump_vec(stderr, src_type, src + j*src_stride); + fprintf(stderr, "\n"); + } + +#if 1 + fprintf(stderr, " Ref: "); + for(j = 0; j < src_type.length*num_srcs; ++j) + fprintf(stderr, " %f", fref[j]); + fprintf(stderr, "\n"); +#endif + + for(j = 0; j < num_dsts; ++j) { + fprintf(stderr, " Dst%u: ", j); + dump_vec(stderr, dst_type, dst + j*dst_stride); + fprintf(stderr, "\n"); + + fprintf(stderr, " Ref%u: ", j); + dump_vec(stderr, dst_type, ref + j*dst_stride); + fprintf(stderr, "\n"); + } + } + } + + /* + * Unfortunately the output of cycle counter is not very reliable as it comes + * -- sometimes we get outliers (due IRQs perhaps?) which are + * better removed to avoid random or biased data. + */ + { + double sum = 0.0, sum2 = 0.0; + double avg, std; + unsigned m; + + for(i = 0; i < n; ++i) { + sum += cycles[i]; + sum2 += cycles[i]*cycles[i]; + } + + avg = sum/n; + std = sqrtf((sum2 - n*avg*avg)/n); + + m = 0; + sum = 0.0; + for(i = 0; i < n; ++i) { + if(fabs(cycles[i] - avg) <= 4.0*std) { + sum += cycles[i]; + ++m; + } + } + + cycles_avg = sum/m; + + } + + if(fp) + write_tsv_row(fp, src_type, dst_type, cycles_avg, success); + + gallivm_destroy(gallivm); + + return success; +} + + +const struct lp_type conv_types[] = { + /* float, fixed, sign, norm, width, len */ + + /* Float */ + { TRUE, FALSE, TRUE, TRUE, 32, 4 }, + { TRUE, FALSE, TRUE, FALSE, 32, 4 }, + { TRUE, FALSE, FALSE, TRUE, 32, 4 }, + { TRUE, FALSE, FALSE, FALSE, 32, 4 }, + + { TRUE, FALSE, TRUE, TRUE, 32, 8 }, + { TRUE, FALSE, TRUE, FALSE, 32, 8 }, + { TRUE, FALSE, FALSE, TRUE, 32, 8 }, + { TRUE, FALSE, FALSE, FALSE, 32, 8 }, + + /* Fixed */ + { FALSE, TRUE, TRUE, TRUE, 32, 4 }, + { FALSE, TRUE, TRUE, FALSE, 32, 4 }, + { FALSE, TRUE, FALSE, TRUE, 32, 4 }, + { FALSE, TRUE, FALSE, FALSE, 32, 4 }, + + { FALSE, TRUE, TRUE, TRUE, 32, 8 }, + { FALSE, TRUE, TRUE, FALSE, 32, 8 }, + { FALSE, TRUE, FALSE, TRUE, 32, 8 }, + { FALSE, TRUE, FALSE, FALSE, 32, 8 }, + + /* Integer */ + { FALSE, FALSE, TRUE, TRUE, 32, 4 }, + { FALSE, FALSE, TRUE, FALSE, 32, 4 }, + { FALSE, FALSE, FALSE, TRUE, 32, 4 }, + { FALSE, FALSE, FALSE, FALSE, 32, 4 }, + + { FALSE, FALSE, TRUE, TRUE, 32, 8 }, + { FALSE, FALSE, TRUE, FALSE, 32, 8 }, + { FALSE, FALSE, FALSE, TRUE, 32, 8 }, + { FALSE, FALSE, FALSE, FALSE, 32, 8 }, + + { FALSE, FALSE, TRUE, TRUE, 16, 8 }, + { FALSE, FALSE, TRUE, FALSE, 16, 8 }, + { FALSE, FALSE, FALSE, TRUE, 16, 8 }, + { FALSE, FALSE, FALSE, FALSE, 16, 8 }, + + { FALSE, FALSE, TRUE, TRUE, 8, 16 }, + { FALSE, FALSE, TRUE, FALSE, 8, 16 }, + { FALSE, FALSE, FALSE, TRUE, 8, 16 }, + { FALSE, FALSE, FALSE, FALSE, 8, 16 }, + + { FALSE, FALSE, TRUE, TRUE, 8, 4 }, + { FALSE, FALSE, TRUE, FALSE, 8, 4 }, + { FALSE, FALSE, FALSE, TRUE, 8, 4 }, + { FALSE, FALSE, FALSE, FALSE, 8, 4 }, + + { FALSE, FALSE, FALSE, TRUE, 8, 8 }, +}; + + +const unsigned num_types = sizeof(conv_types)/sizeof(conv_types[0]); + + +boolean +test_all(unsigned verbose, FILE *fp) +{ + const struct lp_type *src_type; + const struct lp_type *dst_type; + boolean success = TRUE; + int error_count = 0; + + for(src_type = conv_types; src_type < &conv_types[num_types]; ++src_type) { + for(dst_type = conv_types; dst_type < &conv_types[num_types]; ++dst_type) { + + if(src_type == dst_type) + continue; + + if(!test_one(verbose, fp, *src_type, *dst_type)){ + success = FALSE; + ++error_count; + } + } + } + + fprintf(stderr, "%d failures\n", error_count); + + return success; +} + + +boolean +test_some(unsigned verbose, FILE *fp, + unsigned long n) +{ + const struct lp_type *src_type; + const struct lp_type *dst_type; + unsigned long i; + boolean success = TRUE; + + for(i = 0; i < n; ++i) { + src_type = &conv_types[rand() % num_types]; + + do { + dst_type = &conv_types[rand() % num_types]; + } while (src_type == dst_type || src_type->norm != dst_type->norm); + + if(!test_one(verbose, fp, *src_type, *dst_type)) + success = FALSE; + } + + return success; +} + + +boolean +test_single(unsigned verbose, FILE *fp) +{ + /* float, fixed, sign, norm, width, len */ + struct lp_type f32x4_type = + { TRUE, FALSE, TRUE, TRUE, 32, 4 }; + struct lp_type ub8x4_type = + { FALSE, FALSE, FALSE, TRUE, 8, 16 }; + + boolean success; + + success = test_one(verbose, fp, f32x4_type, ub8x4_type); + + return success; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_format.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_format.c new file mode 100644 index 000000000..d9abd1ae3 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -0,0 +1,384 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdlib.h> +#include <stdio.h> +#include <float.h> + +#include "util/u_memory.h" +#include "util/u_pointer.h" +#include "util/u_string.h" +#include "util/u_format.h" +#include "util/u_format_tests.h" +#include "util/u_format_s3tc.h" + +#include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_format.h" +#include "gallivm/lp_bld_init.h" + +#include "lp_test.h" + + +void +write_tsv_header(FILE *fp) +{ + fprintf(fp, + "result\t" + "format\n"); + + fflush(fp); +} + + +static void +write_tsv_row(FILE *fp, + const struct util_format_description *desc, + boolean success) +{ + fprintf(fp, "%s\t", success ? "pass" : "fail"); + + fprintf(fp, "%s\n", desc->name); + + fflush(fp); +} + + +typedef void +(*fetch_ptr_t)(void *unpacked, const void *packed, + unsigned i, unsigned j); + + +static LLVMValueRef +add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose, + const struct util_format_description *desc, + struct lp_type type) +{ + char name[256]; + LLVMContextRef context = gallivm->context; + LLVMModuleRef module = gallivm->module; + LLVMBuilderRef builder = gallivm->builder; + LLVMTypeRef args[4]; + LLVMValueRef func; + LLVMValueRef packed_ptr; + LLVMValueRef offset = LLVMConstNull(LLVMInt32TypeInContext(context)); + LLVMValueRef rgba_ptr; + LLVMValueRef i; + LLVMValueRef j; + LLVMBasicBlockRef block; + LLVMValueRef rgba; + + util_snprintf(name, sizeof name, "fetch_%s_%s", desc->short_name, + type.floating ? "float" : "unorm8"); + + args[0] = LLVMPointerType(lp_build_vec_type(gallivm, type), 0); + args[1] = LLVMPointerType(LLVMInt8TypeInContext(context), 0); + args[3] = args[2] = LLVMInt32TypeInContext(context); + + func = LLVMAddFunction(module, name, + LLVMFunctionType(LLVMVoidTypeInContext(context), + args, Elements(args), 0)); + LLVMSetFunctionCallConv(func, LLVMCCallConv); + rgba_ptr = LLVMGetParam(func, 0); + packed_ptr = LLVMGetParam(func, 1); + i = LLVMGetParam(func, 2); + j = LLVMGetParam(func, 3); + + block = LLVMAppendBasicBlockInContext(context, func, "entry"); + LLVMPositionBuilderAtEnd(builder, block); + + rgba = lp_build_fetch_rgba_aos(gallivm, desc, type, TRUE, + packed_ptr, offset, i, j); + + LLVMBuildStore(builder, rgba, rgba_ptr); + + LLVMBuildRetVoid(builder); + + gallivm_verify_function(gallivm, func); + + return func; +} + + +PIPE_ALIGN_STACK +static boolean +test_format_float(unsigned verbose, FILE *fp, + const struct util_format_description *desc) +{ + struct gallivm_state *gallivm; + LLVMValueRef fetch = NULL; + fetch_ptr_t fetch_ptr; + PIPE_ALIGN_VAR(16) uint8_t packed[UTIL_FORMAT_MAX_PACKED_BYTES]; + PIPE_ALIGN_VAR(16) float unpacked[4]; + boolean first = TRUE; + boolean success = TRUE; + unsigned i, j, k, l; + + gallivm = gallivm_create("test_module_float", LLVMGetGlobalContext()); + + fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_float32_vec4_type()); + + gallivm_compile_module(gallivm); + + fetch_ptr = (fetch_ptr_t) gallivm_jit_function(gallivm, fetch); + + gallivm_free_ir(gallivm); + + for (l = 0; l < util_format_nr_test_cases; ++l) { + const struct util_format_test_case *test = &util_format_test_cases[l]; + + if (test->format == desc->format) { + + if (first) { + printf("Testing %s (float) ...\n", + desc->name); + fflush(stdout); + first = FALSE; + } + + /* To ensure it's 16-byte aligned */ + memcpy(packed, test->packed, sizeof packed); + + for (i = 0; i < desc->block.height; ++i) { + for (j = 0; j < desc->block.width; ++j) { + boolean match = TRUE; + + memset(unpacked, 0, sizeof unpacked); + + fetch_ptr(unpacked, packed, j, i); + + for(k = 0; k < 4; ++k) { + if (util_double_inf_sign(test->unpacked[i][j][k]) != util_inf_sign(unpacked[k])) { + match = FALSE; + } + + if (util_is_double_nan(test->unpacked[i][j][k]) != util_is_nan(unpacked[k])) { + match = FALSE; + } + + if (!util_is_double_inf_or_nan(test->unpacked[i][j][k]) && + fabs((float)test->unpacked[i][j][k] - unpacked[k]) > FLT_EPSILON) { + match = FALSE; + } + } + + if (!match) { + printf("FAILED\n"); + printf(" Packed: %02x %02x %02x %02x\n", + test->packed[0], test->packed[1], test->packed[2], test->packed[3]); + printf(" Unpacked (%u,%u): %.9g %.9g %.9g %.9g obtained\n", + j, i, + unpacked[0], unpacked[1], unpacked[2], unpacked[3]); + printf(" %.9g %.9g %.9g %.9g expected\n", + test->unpacked[i][j][0], + test->unpacked[i][j][1], + test->unpacked[i][j][2], + test->unpacked[i][j][3]); + fflush(stdout); + success = FALSE; + } + } + } + } + } + + gallivm_destroy(gallivm); + + if(fp) + write_tsv_row(fp, desc, success); + + return success; +} + + +PIPE_ALIGN_STACK +static boolean +test_format_unorm8(unsigned verbose, FILE *fp, + const struct util_format_description *desc) +{ + struct gallivm_state *gallivm; + LLVMValueRef fetch = NULL; + fetch_ptr_t fetch_ptr; + PIPE_ALIGN_VAR(16) uint8_t packed[UTIL_FORMAT_MAX_PACKED_BYTES]; + uint8_t unpacked[4]; + boolean first = TRUE; + boolean success = TRUE; + unsigned i, j, k, l; + + gallivm = gallivm_create("test_module_unorm8", LLVMGetGlobalContext()); + + fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_unorm8_vec4_type()); + + gallivm_compile_module(gallivm); + + fetch_ptr = (fetch_ptr_t) gallivm_jit_function(gallivm, fetch); + + gallivm_free_ir(gallivm); + + for (l = 0; l < util_format_nr_test_cases; ++l) { + const struct util_format_test_case *test = &util_format_test_cases[l]; + + if (test->format == desc->format) { + + if (first) { + printf("Testing %s (unorm8) ...\n", + desc->name); + first = FALSE; + } + + /* To ensure it's 16-byte aligned */ + /* Could skip this and use unaligned lp_build_fetch_rgba_aos */ + memcpy(packed, test->packed, sizeof packed); + + for (i = 0; i < desc->block.height; ++i) { + for (j = 0; j < desc->block.width; ++j) { + boolean match; + + memset(unpacked, 0, sizeof unpacked); + + fetch_ptr(unpacked, packed, j, i); + + match = TRUE; + for(k = 0; k < 4; ++k) { + int error = float_to_ubyte(test->unpacked[i][j][k]) - unpacked[k]; + + if (util_is_double_nan(test->unpacked[i][j][k])) + continue; + + if (error < 0) + error = -error; + + if (error > 1) + match = FALSE; + } + + if (!match) { + printf("FAILED\n"); + printf(" Packed: %02x %02x %02x %02x\n", + test->packed[0], test->packed[1], test->packed[2], test->packed[3]); + printf(" Unpacked (%u,%u): %02x %02x %02x %02x obtained\n", + j, i, + unpacked[0], unpacked[1], unpacked[2], unpacked[3]); + printf(" %02x %02x %02x %02x expected\n", + float_to_ubyte(test->unpacked[i][j][0]), + float_to_ubyte(test->unpacked[i][j][1]), + float_to_ubyte(test->unpacked[i][j][2]), + float_to_ubyte(test->unpacked[i][j][3])); + + success = FALSE; + } + } + } + } + } + + gallivm_destroy(gallivm); + + if(fp) + write_tsv_row(fp, desc, success); + + return success; +} + + + + +static boolean +test_one(unsigned verbose, FILE *fp, + const struct util_format_description *format_desc) +{ + boolean success = TRUE; + + if (!test_format_float(verbose, fp, format_desc)) { + success = FALSE; + } + + if (!test_format_unorm8(verbose, fp, format_desc)) { + success = FALSE; + } + + return success; +} + + +boolean +test_all(unsigned verbose, FILE *fp) +{ + enum pipe_format format; + boolean success = TRUE; + + util_format_s3tc_init(); + + for (format = 1; format < PIPE_FORMAT_COUNT; ++format) { + const struct util_format_description *format_desc; + + format_desc = util_format_description(format); + if (!format_desc) { + continue; + } + + + /* + * TODO: test more + */ + + if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + continue; + } + + if (util_format_is_pure_integer(format)) + continue; + + if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC && + !util_format_s3tc_enabled) { + continue; + } + + if (!test_one(verbose, fp, format_desc)) { + success = FALSE; + } + } + + return success; +} + + +boolean +test_some(unsigned verbose, FILE *fp, + unsigned long n) +{ + return test_all(verbose, fp); +} + + +boolean +test_single(unsigned verbose, FILE *fp) +{ + printf("no test_single()"); + return TRUE; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_main.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_main.c new file mode 100644 index 000000000..d835dbbd6 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -0,0 +1,418 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Shared testing code. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + + +#include "util/u_cpu_detect.h" +#include "util/u_math.h" + +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_debug.h" +#include "lp_test.h" + + +void +dump_type(FILE *fp, + struct lp_type type) +{ + fprintf(fp, "%s%s%u%sx%u", + type.sign ? (type.floating || type.fixed ? "" : "s") : "u", + type.floating ? "f" : (type.fixed ? "h" : "i"), + type.width, + type.norm ? "n" : "", + type.length); +} + + +double +read_elem(struct lp_type type, const void *src, unsigned index) +{ + double scale = lp_const_scale(type); + double value; + assert(index < type.length); + if (type.floating) { + switch(type.width) { + case 32: + value = *((const float *)src + index); + break; + case 64: + value = *((const double *)src + index); + break; + default: + assert(0); + return 0.0; + } + } + else { + if(type.sign) { + switch(type.width) { + case 8: + value = *((const int8_t *)src + index); + break; + case 16: + value = *((const int16_t *)src + index); + break; + case 32: + value = *((const int32_t *)src + index); + break; + case 64: + value = *((const int64_t *)src + index); + break; + default: + assert(0); + return 0.0; + } + } + else { + switch(type.width) { + case 8: + value = *((const uint8_t *)src + index); + break; + case 16: + value = *((const uint16_t *)src + index); + break; + case 32: + value = *((const uint32_t *)src + index); + break; + case 64: + value = *((const uint64_t *)src + index); + break; + default: + assert(0); + return 0.0; + } + } + } + return value/scale; +} + + +void +write_elem(struct lp_type type, void *dst, unsigned index, double value) +{ + assert(index < type.length); + if(!type.sign && value < 0.0) + value = 0.0; + if(type.norm && value < -1.0) + value = -1.0; + if(type.norm && value > 1.0) + value = 1.0; + if (type.floating) { + switch(type.width) { + case 32: + *((float *)dst + index) = (float)(value); + break; + case 64: + *((double *)dst + index) = value; + break; + default: + assert(0); + } + } + else { + double scale = lp_const_scale(type); + value = round(value*scale); + if(type.sign) { + long long lvalue = (long long)value; + lvalue = MIN2(lvalue, ((long long)1 << (type.width - 1)) - 1); + switch(type.width) { + case 8: + *((int8_t *)dst + index) = (int8_t)lvalue; + break; + case 16: + *((int16_t *)dst + index) = (int16_t)lvalue; + break; + case 32: + *((int32_t *)dst + index) = (int32_t)lvalue; + break; + case 64: + *((int64_t *)dst + index) = (int64_t)lvalue; + break; + default: + assert(0); + } + } + else { + unsigned long long lvalue = (long long)value; + lvalue = MIN2(lvalue, ((unsigned long long)1 << type.width) - 1); + switch(type.width) { + case 8: + *((uint8_t *)dst + index) = (uint8_t)lvalue; + break; + case 16: + *((uint16_t *)dst + index) = (uint16_t)lvalue; + break; + case 32: + *((uint32_t *)dst + index) = (uint32_t)lvalue; + break; + case 64: + *((uint64_t *)dst + index) = (uint64_t)lvalue; + break; + default: + assert(0); + } + } + } +} + + +void +random_elem(struct lp_type type, void *dst, unsigned index) +{ + double value; + assert(index < type.length); + value = (double)rand()/(double)RAND_MAX; + if(!type.norm) { + if (type.floating) { + value *= 2.0; + } + else { + unsigned long long mask; + if (type.fixed) + mask = ((unsigned long long)1 << (type.width / 2)) - 1; + else if (type.sign) + mask = ((unsigned long long)1 << (type.width - 1)) - 1; + else + mask = ((unsigned long long)1 << type.width) - 1; + value += (double)(mask & rand()); + } + } + if(!type.sign) + if(rand() & 1) + value = -value; + write_elem(type, dst, index, value); +} + + +void +read_vec(struct lp_type type, const void *src, double *dst) +{ + unsigned i; + for (i = 0; i < type.length; ++i) + dst[i] = read_elem(type, src, i); +} + + +void +write_vec(struct lp_type type, void *dst, const double *src) +{ + unsigned i; + for (i = 0; i < type.length; ++i) + write_elem(type, dst, i, src[i]); +} + + +float +random_float(void) +{ + return (float)((double)rand()/(double)RAND_MAX); +} + + +void +random_vec(struct lp_type type, void *dst) +{ + unsigned i; + for (i = 0; i < type.length; ++i) + random_elem(type, dst, i); +} + + +boolean +compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps) +{ + unsigned i; + eps *= type.floating ? 8.0 : 2.0; + for (i = 0; i < type.length; ++i) { + double res_elem = read_elem(type, res, i); + double ref_elem = read_elem(type, ref, i); + double delta = res_elem - ref_elem; + if (ref_elem < -1.0 || ref_elem > 1.0) { + delta /= ref_elem; + } + delta = fabs(delta); + if (delta >= eps) { + return FALSE; + } + } + + return TRUE; +} + + +boolean +compare_vec(struct lp_type type, const void *res, const void *ref) +{ + double eps = lp_const_eps(type); + return compare_vec_with_eps(type, res, ref, eps); +} + + +void +dump_vec(FILE *fp, struct lp_type type, const void *src) +{ + unsigned i; + for (i = 0; i < type.length; ++i) { + if(i) + fprintf(fp, " "); + if (type.floating) { + double value; + switch(type.width) { + case 32: + value = *((const float *)src + i); + break; + case 64: + value = *((const double *)src + i); + break; + default: + assert(0); + value = 0.0; + } + fprintf(fp, "%f", value); + } + else { + if(type.sign && !type.norm) { + long long value; + const char *format; + switch(type.width) { + case 8: + value = *((const int8_t *)src + i); + format = "%3lli"; + break; + case 16: + value = *((const int16_t *)src + i); + format = "%5lli"; + break; + case 32: + value = *((const int32_t *)src + i); + format = "%10lli"; + break; + case 64: + value = *((const int64_t *)src + i); + format = "%20lli"; + break; + default: + assert(0); + value = 0.0; + format = "?"; + } + fprintf(fp, format, value); + } + else { + unsigned long long value; + const char *format; + switch(type.width) { + case 8: + value = *((const uint8_t *)src + i); + format = type.norm ? "%2x" : "%4llu"; + break; + case 16: + value = *((const uint16_t *)src + i); + format = type.norm ? "%4x" : "%6llx"; + break; + case 32: + value = *((const uint32_t *)src + i); + format = type.norm ? "%8x" : "%11llx"; + break; + case 64: + value = *((const uint64_t *)src + i); + format = type.norm ? "%16x" : "%21llx"; + break; + default: + assert(0); + value = 0.0; + format = "?"; + } + fprintf(fp, format, value); + } + } + } +} + + +int main(int argc, char **argv) +{ + unsigned verbose = 0; + FILE *fp = NULL; + unsigned long n = 1000; + unsigned i; + boolean success; + boolean single = FALSE; + unsigned fpstate; + + util_cpu_detect(); + fpstate = util_fpstate_get(); + util_fpstate_set_denorms_to_zero(fpstate); + + if (!lp_build_init()) + return 1; + + for(i = 1; i < argc; ++i) { + if(strcmp(argv[i], "-v") == 0) + ++verbose; + else if(strcmp(argv[i], "-s") == 0) + single = TRUE; + else if(strcmp(argv[i], "-o") == 0) + fp = fopen(argv[++i], "wt"); + else + n = atoi(argv[i]); + } + +#ifdef DEBUG + if (verbose >= 2) { + gallivm_debug |= GALLIVM_DEBUG_IR; + gallivm_debug |= GALLIVM_DEBUG_ASM; + } +#endif + + if(fp) { + /* Warm up the caches */ + test_some(0, NULL, 100); + + write_tsv_header(fp); + } + + if (single) + success = test_single(verbose, fp); + else if (n) + success = test_some(verbose, fp, n); + else + success = test_all(verbose, fp); + + if(fp) + fclose(fp); + + return success ? 0 : 1; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_printf.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_printf.c new file mode 100644 index 000000000..fe4ce0fc5 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_test_printf.c @@ -0,0 +1,139 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdlib.h> +#include <stdio.h> + +#include "util/u_pointer.h" +#include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_assert.h" +#include "gallivm/lp_bld_printf.h" + +#include "lp_test.h" + + +struct printf_test_case { + int foo; +}; + +void +write_tsv_header(FILE *fp) +{ + fprintf(fp, + "result\t" + "format\n"); + + fflush(fp); +} + + + +typedef void (*test_printf_t)(int i); + + +static LLVMValueRef +add_printf_test(struct gallivm_state *gallivm) +{ + LLVMModuleRef module = gallivm->module; + LLVMTypeRef args[1] = { LLVMIntTypeInContext(gallivm->context, 32) }; + LLVMValueRef func = LLVMAddFunction(module, "test_printf", LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), args, 1, 0)); + LLVMBuilderRef builder = gallivm->builder; + LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(gallivm->context, func, "entry"); + + LLVMSetFunctionCallConv(func, LLVMCCallConv); + + LLVMPositionBuilderAtEnd(builder, block); + lp_build_printf(gallivm, "hello, world\n"); + lp_build_printf(gallivm, "print 5 6: %d %d\n", LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), 5, 0), + LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), 6, 0)); + + /* Also test lp_build_assert(). This should not fail. */ + lp_build_assert(gallivm, LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), 1, 0), "assert(1)"); + + LLVMBuildRetVoid(builder); + + gallivm_verify_function(gallivm, func); + + return func; +} + + +PIPE_ALIGN_STACK +static boolean +test_printf(unsigned verbose, FILE *fp, + const struct printf_test_case *testcase) +{ + struct gallivm_state *gallivm; + LLVMValueRef test; + test_printf_t test_printf_func; + boolean success = TRUE; + + gallivm = gallivm_create("test_module", LLVMGetGlobalContext()); + + test = add_printf_test(gallivm); + + gallivm_compile_module(gallivm); + + test_printf_func = (test_printf_t) gallivm_jit_function(gallivm, test); + + gallivm_free_ir(gallivm); + + test_printf_func(0); + + gallivm_destroy(gallivm); + + return success; +} + + +boolean +test_all(unsigned verbose, FILE *fp) +{ + boolean success = TRUE; + + test_printf(verbose, fp, NULL); + + return success; +} + + +boolean +test_some(unsigned verbose, FILE *fp, + unsigned long n) +{ + return test_all(verbose, fp); +} + + +boolean +test_single(unsigned verbose, FILE *fp) +{ + printf("no test_single()"); + return TRUE; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_tex_sample.c new file mode 100644 index 000000000..316d1c550 --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_tex_sample.c @@ -0,0 +1,321 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Texture sampling code generation + * + * This file is nothing more than ugly glue between three largely independent + * entities: + * - TGSI -> LLVM translation (i.e., lp_build_tgsi_soa) + * - texture sampling code generation (i.e., lp_build_sample_soa) + * - LLVM pipe driver + * + * All interesting code is in the functions mentioned above. There is really + * nothing to see here. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_sample.h" +#include "gallivm/lp_bld_tgsi.h" +#include "lp_jit.h" +#include "lp_tex_sample.h" +#include "lp_state_fs.h" +#include "lp_debug.h" + + +/** + * This provides the bridge between the sampler state store in + * lp_jit_context and lp_jit_texture and the sampler code + * generator. It provides the texture layout information required by + * the texture sampler code generator in terms of the state stored in + * lp_jit_context and lp_jit_texture in runtime. + */ +struct llvmpipe_sampler_dynamic_state +{ + struct lp_sampler_dynamic_state base; + + const struct lp_sampler_static_state *static_state; +}; + + +/** + * This is the bridge between our sampler and the TGSI translator. + */ +struct lp_llvm_sampler_soa +{ + struct lp_build_sampler_soa base; + + struct llvmpipe_sampler_dynamic_state dynamic_state; +}; + + +/** + * Fetch the specified member of the lp_jit_texture structure. + * \param emit_load if TRUE, emit the LLVM load instruction to actually + * fetch the field's value. Otherwise, just emit the + * GEP code to address the field. + * + * @sa http://llvm.org/docs/GetElementPtr.html + */ +static LLVMValueRef +lp_llvm_texture_member(const struct lp_sampler_dynamic_state *base, + struct gallivm_state *gallivm, + LLVMValueRef context_ptr, + unsigned texture_unit, + unsigned member_index, + const char *member_name, + boolean emit_load) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef indices[4]; + LLVMValueRef ptr; + LLVMValueRef res; + + assert(texture_unit < PIPE_MAX_SHADER_SAMPLER_VIEWS); + + /* context[0] */ + indices[0] = lp_build_const_int32(gallivm, 0); + /* context[0].textures */ + indices[1] = lp_build_const_int32(gallivm, LP_JIT_CTX_TEXTURES); + /* context[0].textures[unit] */ + indices[2] = lp_build_const_int32(gallivm, texture_unit); + /* context[0].textures[unit].member */ + indices[3] = lp_build_const_int32(gallivm, member_index); + + ptr = LLVMBuildGEP(builder, context_ptr, indices, Elements(indices), ""); + + if (emit_load) + res = LLVMBuildLoad(builder, ptr, ""); + else + res = ptr; + + lp_build_name(res, "context.texture%u.%s", texture_unit, member_name); + + return res; +} + + +/** + * Helper macro to instantiate the functions that generate the code to + * fetch the members of lp_jit_texture to fulfill the sampler code + * generator requests. + * + * This complexity is the price we have to pay to keep the texture + * sampler code generator a reusable module without dependencies to + * llvmpipe internals. + */ +#define LP_LLVM_TEXTURE_MEMBER(_name, _index, _emit_load) \ + static LLVMValueRef \ + lp_llvm_texture_##_name( const struct lp_sampler_dynamic_state *base, \ + struct gallivm_state *gallivm, \ + LLVMValueRef context_ptr, \ + unsigned texture_unit) \ + { \ + return lp_llvm_texture_member(base, gallivm, context_ptr, \ + texture_unit, _index, #_name, _emit_load ); \ + } + + +LP_LLVM_TEXTURE_MEMBER(width, LP_JIT_TEXTURE_WIDTH, TRUE) +LP_LLVM_TEXTURE_MEMBER(height, LP_JIT_TEXTURE_HEIGHT, TRUE) +LP_LLVM_TEXTURE_MEMBER(depth, LP_JIT_TEXTURE_DEPTH, TRUE) +LP_LLVM_TEXTURE_MEMBER(first_level, LP_JIT_TEXTURE_FIRST_LEVEL, TRUE) +LP_LLVM_TEXTURE_MEMBER(last_level, LP_JIT_TEXTURE_LAST_LEVEL, TRUE) +LP_LLVM_TEXTURE_MEMBER(base_ptr, LP_JIT_TEXTURE_BASE, TRUE) +LP_LLVM_TEXTURE_MEMBER(row_stride, LP_JIT_TEXTURE_ROW_STRIDE, FALSE) +LP_LLVM_TEXTURE_MEMBER(img_stride, LP_JIT_TEXTURE_IMG_STRIDE, FALSE) +LP_LLVM_TEXTURE_MEMBER(mip_offsets, LP_JIT_TEXTURE_MIP_OFFSETS, FALSE) + + +/** + * Fetch the specified member of the lp_jit_sampler structure. + * \param emit_load if TRUE, emit the LLVM load instruction to actually + * fetch the field's value. Otherwise, just emit the + * GEP code to address the field. + * + * @sa http://llvm.org/docs/GetElementPtr.html + */ +static LLVMValueRef +lp_llvm_sampler_member(const struct lp_sampler_dynamic_state *base, + struct gallivm_state *gallivm, + LLVMValueRef context_ptr, + unsigned sampler_unit, + unsigned member_index, + const char *member_name, + boolean emit_load) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef indices[4]; + LLVMValueRef ptr; + LLVMValueRef res; + + assert(sampler_unit < PIPE_MAX_SAMPLERS); + + /* context[0] */ + indices[0] = lp_build_const_int32(gallivm, 0); + /* context[0].samplers */ + indices[1] = lp_build_const_int32(gallivm, LP_JIT_CTX_SAMPLERS); + /* context[0].samplers[unit] */ + indices[2] = lp_build_const_int32(gallivm, sampler_unit); + /* context[0].samplers[unit].member */ + indices[3] = lp_build_const_int32(gallivm, member_index); + + ptr = LLVMBuildGEP(builder, context_ptr, indices, Elements(indices), ""); + + if (emit_load) + res = LLVMBuildLoad(builder, ptr, ""); + else + res = ptr; + + lp_build_name(res, "context.sampler%u.%s", sampler_unit, member_name); + + return res; +} + + +#define LP_LLVM_SAMPLER_MEMBER(_name, _index, _emit_load) \ + static LLVMValueRef \ + lp_llvm_sampler_##_name( const struct lp_sampler_dynamic_state *base, \ + struct gallivm_state *gallivm, \ + LLVMValueRef context_ptr, \ + unsigned sampler_unit) \ + { \ + return lp_llvm_sampler_member(base, gallivm, context_ptr, \ + sampler_unit, _index, #_name, _emit_load ); \ + } + + +LP_LLVM_SAMPLER_MEMBER(min_lod, LP_JIT_SAMPLER_MIN_LOD, TRUE) +LP_LLVM_SAMPLER_MEMBER(max_lod, LP_JIT_SAMPLER_MAX_LOD, TRUE) +LP_LLVM_SAMPLER_MEMBER(lod_bias, LP_JIT_SAMPLER_LOD_BIAS, TRUE) +LP_LLVM_SAMPLER_MEMBER(border_color, LP_JIT_SAMPLER_BORDER_COLOR, FALSE) + + +static void +lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) +{ + FREE(sampler); +} + + +/** + * Fetch filtered values from texture. + * The 'texel' parameter returns four vectors corresponding to R, G, B, A. + */ +static void +lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base, + struct gallivm_state *gallivm, + const struct lp_sampler_params *params) +{ + struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base; + unsigned texture_index = params->texture_index; + unsigned sampler_index = params->sampler_index; + + assert(sampler_index < PIPE_MAX_SAMPLERS); + assert(texture_index < PIPE_MAX_SHADER_SAMPLER_VIEWS); + + if (LP_PERF & PERF_NO_TEX) { + lp_build_sample_nop(gallivm, params->type, params->coords, params->texel); + return; + } + + lp_build_sample_soa(&sampler->dynamic_state.static_state[texture_index].texture_state, + &sampler->dynamic_state.static_state[sampler_index].sampler_state, + &sampler->dynamic_state.base, + gallivm, params); +} + +/** + * Fetch the texture size. + */ +static void +lp_llvm_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base, + struct gallivm_state *gallivm, + struct lp_type type, + unsigned texture_unit, + unsigned target, + LLVMValueRef context_ptr, + boolean is_sviewinfo, + enum lp_sampler_lod_property lod_property, + LLVMValueRef explicit_lod, /* optional */ + LLVMValueRef *sizes_out) +{ + struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base; + + assert(texture_unit < PIPE_MAX_SHADER_SAMPLER_VIEWS); + + lp_build_size_query_soa(gallivm, + &sampler->dynamic_state.static_state[texture_unit].texture_state, + &sampler->dynamic_state.base, + type, + texture_unit, + target, + context_ptr, + is_sviewinfo, + lod_property, + explicit_lod, + sizes_out); +} + + +struct lp_build_sampler_soa * +lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state) +{ + struct lp_llvm_sampler_soa *sampler; + + sampler = CALLOC_STRUCT(lp_llvm_sampler_soa); + if(!sampler) + return NULL; + + sampler->base.destroy = lp_llvm_sampler_soa_destroy; + sampler->base.emit_tex_sample = lp_llvm_sampler_soa_emit_fetch_texel; + sampler->base.emit_size_query = lp_llvm_sampler_soa_emit_size_query; + sampler->dynamic_state.base.width = lp_llvm_texture_width; + sampler->dynamic_state.base.height = lp_llvm_texture_height; + sampler->dynamic_state.base.depth = lp_llvm_texture_depth; + sampler->dynamic_state.base.first_level = lp_llvm_texture_first_level; + sampler->dynamic_state.base.last_level = lp_llvm_texture_last_level; + sampler->dynamic_state.base.base_ptr = lp_llvm_texture_base_ptr; + sampler->dynamic_state.base.row_stride = lp_llvm_texture_row_stride; + sampler->dynamic_state.base.img_stride = lp_llvm_texture_img_stride; + sampler->dynamic_state.base.mip_offsets = lp_llvm_texture_mip_offsets; + sampler->dynamic_state.base.min_lod = lp_llvm_sampler_min_lod; + sampler->dynamic_state.base.max_lod = lp_llvm_sampler_max_lod; + sampler->dynamic_state.base.lod_bias = lp_llvm_sampler_lod_bias; + sampler->dynamic_state.base.border_color = lp_llvm_sampler_border_color; + + sampler->dynamic_state.static_state = static_state; + + return &sampler->base; +} + diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_tex_sample.h new file mode 100644 index 000000000..f4aff226c --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -0,0 +1,46 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef LP_TEX_SAMPLE_H +#define LP_TEX_SAMPLE_H + + +#include "gallivm/lp_bld.h" + + +struct lp_sampler_static_state; + + +/** + * Pure-LLVM texture sampling code generator. + * + */ +struct lp_build_sampler_soa * +lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *key); + + +#endif /* LP_TEX_SAMPLE_H */ diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.c b/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.c new file mode 100644 index 000000000..af46342fd --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.c @@ -0,0 +1,815 @@ +/************************************************************************** + * + * Copyright 2006 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell <keithw@vmware.com> + * Michel Dänzer <daenzer@vmware.com> + */ + +#include <stdio.h> + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" + +#include "util/u_inlines.h" +#include "util/u_cpu_detect.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/simple_list.h" +#include "util/u_transfer.h" + +#include "lp_context.h" +#include "lp_flush.h" +#include "lp_screen.h" +#include "lp_texture.h" +#include "lp_setup.h" +#include "lp_state.h" +#include "lp_rast.h" + +#include "state_tracker/sw_winsys.h" + + +#ifdef DEBUG +static struct llvmpipe_resource resource_list; +#endif +static unsigned id_counter = 0; + + +/** + * Conventional allocation path for non-display textures: + * Compute strides and allocate data (unless asked not to). + */ +static boolean +llvmpipe_texture_layout(struct llvmpipe_screen *screen, + struct llvmpipe_resource *lpr, + boolean allocate) +{ + struct pipe_resource *pt = &lpr->base; + unsigned level; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; + uint64_t total_size = 0; + unsigned layers = pt->array_size; + /* XXX: + * This alignment here (same for displaytarget) was added for the purpose of + * ARB_map_buffer_alignment. I am not convinced it's needed for non-buffer + * resources. Otherwise we'd want the max of cacheline size and 16 (max size + * of a block for all formats) though this should not be strictly necessary + * neither. In any case it can only affect compressed or 1d textures. + */ + unsigned mip_align = MAX2(64, util_cpu_caps.cacheline); + + assert(LP_MAX_TEXTURE_2D_LEVELS <= LP_MAX_TEXTURE_LEVELS); + assert(LP_MAX_TEXTURE_3D_LEVELS <= LP_MAX_TEXTURE_LEVELS); + + for (level = 0; level <= pt->last_level; level++) { + uint64_t mipsize; + unsigned align_x, align_y, nblocksx, nblocksy, block_size, num_slices; + + /* Row stride and image stride */ + + /* For non-compressed formats we need 4x4 pixel alignment + * so we can read/write LP_RASTER_BLOCK_SIZE when rendering to them. + * We also want cache line size in x direction, + * otherwise same cache line could end up in multiple threads. + * For explicit 1d resources however we reduce this to 4x1 and + * handle specially in render output code (as we need to do special + * handling there for buffers in any case). + */ + if (util_format_is_compressed(pt->format)) + align_x = align_y = 1; + else { + align_x = LP_RASTER_BLOCK_SIZE; + if (llvmpipe_resource_is_1d(&lpr->base)) + align_y = 1; + else + align_y = LP_RASTER_BLOCK_SIZE; + } + + nblocksx = util_format_get_nblocksx(pt->format, + align(width, align_x)); + nblocksy = util_format_get_nblocksy(pt->format, + align(height, align_y)); + block_size = util_format_get_blocksize(pt->format); + + if (util_format_is_compressed(pt->format)) + lpr->row_stride[level] = nblocksx * block_size; + else + lpr->row_stride[level] = align(nblocksx * block_size, util_cpu_caps.cacheline); + + /* if row_stride * height > LP_MAX_TEXTURE_SIZE */ + if ((uint64_t)lpr->row_stride[level] * nblocksy > LP_MAX_TEXTURE_SIZE) { + /* image too large */ + goto fail; + } + + lpr->img_stride[level] = lpr->row_stride[level] * nblocksy; + + /* Number of 3D image slices, cube faces or texture array layers */ + if (lpr->base.target == PIPE_TEXTURE_CUBE) { + assert(layers == 6); + } + + if (lpr->base.target == PIPE_TEXTURE_3D) + num_slices = depth; + else if (lpr->base.target == PIPE_TEXTURE_1D_ARRAY || + lpr->base.target == PIPE_TEXTURE_2D_ARRAY || + lpr->base.target == PIPE_TEXTURE_CUBE || + lpr->base.target == PIPE_TEXTURE_CUBE_ARRAY) + num_slices = layers; + else + num_slices = 1; + + /* if img_stride * num_slices_faces > LP_MAX_TEXTURE_SIZE */ + mipsize = (uint64_t)lpr->img_stride[level] * num_slices; + if (mipsize > LP_MAX_TEXTURE_SIZE) { + /* volume too large */ + goto fail; + } + + lpr->mip_offsets[level] = total_size; + + total_size += align((unsigned)mipsize, mip_align); + if (total_size > LP_MAX_TEXTURE_SIZE) { + goto fail; + } + + /* Compute size of next mipmap level */ + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); + } + + if (allocate) { + lpr->tex_data = align_malloc(total_size, mip_align); + if (!lpr->tex_data) { + return FALSE; + } + else { + memset(lpr->tex_data, 0, total_size); + } + } + + return TRUE; + +fail: + return FALSE; +} + + +/** + * Check the size of the texture specified by 'res'. + * \return TRUE if OK, FALSE if too large. + */ +static boolean +llvmpipe_can_create_resource(struct pipe_screen *screen, + const struct pipe_resource *res) +{ + struct llvmpipe_resource lpr; + memset(&lpr, 0, sizeof(lpr)); + lpr.base = *res; + return llvmpipe_texture_layout(llvmpipe_screen(screen), &lpr, false); +} + + +static boolean +llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, + struct llvmpipe_resource *lpr) +{ + struct sw_winsys *winsys = screen->winsys; + + /* Round up the surface size to a multiple of the tile size to + * avoid tile clipping. + */ + const unsigned width = MAX2(1, align(lpr->base.width0, TILE_SIZE)); + const unsigned height = MAX2(1, align(lpr->base.height0, TILE_SIZE)); + + lpr->dt = winsys->displaytarget_create(winsys, + lpr->base.bind, + lpr->base.format, + width, height, + 64, + &lpr->row_stride[0] ); + + if (lpr->dt == NULL) + return FALSE; + + { + void *map = winsys->displaytarget_map(winsys, lpr->dt, + PIPE_TRANSFER_WRITE); + + if (map) + memset(map, 0, height * lpr->row_stride[0]); + + winsys->displaytarget_unmap(winsys, lpr->dt); + } + + return TRUE; +} + + +static struct pipe_resource * +llvmpipe_resource_create(struct pipe_screen *_screen, + const struct pipe_resource *templat) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(_screen); + struct llvmpipe_resource *lpr = CALLOC_STRUCT(llvmpipe_resource); + if (!lpr) + return NULL; + + lpr->base = *templat; + pipe_reference_init(&lpr->base.reference, 1); + lpr->base.screen = &screen->base; + + /* assert(lpr->base.bind); */ + + if (llvmpipe_resource_is_texture(&lpr->base)) { + if (lpr->base.bind & (PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED)) { + /* displayable surface */ + if (!llvmpipe_displaytarget_layout(screen, lpr)) + goto fail; + } + else { + /* texture map */ + if (!llvmpipe_texture_layout(screen, lpr, true)) + goto fail; + } + } + else { + /* other data (vertex buffer, const buffer, etc) */ + const uint bytes = templat->width0; + assert(util_format_get_blocksize(templat->format) == 1); + assert(templat->height0 == 1); + assert(templat->depth0 == 1); + assert(templat->last_level == 0); + /* + * Reserve some extra storage since if we'd render to a buffer we + * read/write always LP_RASTER_BLOCK_SIZE pixels, but the element + * offset doesn't need to be aligned to LP_RASTER_BLOCK_SIZE. + */ + lpr->data = align_malloc(bytes + (LP_RASTER_BLOCK_SIZE - 1) * 4 * sizeof(float), 64); + + /* + * buffers don't really have stride but it's probably safer + * (for code doing same calculations for buffers and textures) + * to put something sane in there. + */ + lpr->row_stride[0] = bytes; + if (!lpr->data) + goto fail; + memset(lpr->data, 0, bytes); + } + + lpr->id = id_counter++; + +#ifdef DEBUG + insert_at_tail(&resource_list, lpr); +#endif + + return &lpr->base; + + fail: + FREE(lpr); + return NULL; +} + + +static void +llvmpipe_resource_destroy(struct pipe_screen *pscreen, + struct pipe_resource *pt) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(pscreen); + struct llvmpipe_resource *lpr = llvmpipe_resource(pt); + + if (lpr->dt) { + /* display target */ + struct sw_winsys *winsys = screen->winsys; + winsys->displaytarget_destroy(winsys, lpr->dt); + } + else if (llvmpipe_resource_is_texture(pt)) { + /* free linear image data */ + if (lpr->tex_data) { + align_free(lpr->tex_data); + lpr->tex_data = NULL; + } + } + else if (!lpr->userBuffer) { + assert(lpr->data); + align_free(lpr->data); + } + +#ifdef DEBUG + if (lpr->next) + remove_from_list(lpr); +#endif + + FREE(lpr); +} + + +/** + * Map a resource for read/write. + */ +void * +llvmpipe_resource_map(struct pipe_resource *resource, + unsigned level, + unsigned layer, + enum lp_texture_usage tex_usage) +{ + struct llvmpipe_resource *lpr = llvmpipe_resource(resource); + uint8_t *map; + + assert(level < LP_MAX_TEXTURE_LEVELS); + assert(layer < (u_minify(resource->depth0, level) + resource->array_size - 1)); + + assert(tex_usage == LP_TEX_USAGE_READ || + tex_usage == LP_TEX_USAGE_READ_WRITE || + tex_usage == LP_TEX_USAGE_WRITE_ALL); + + if (lpr->dt) { + /* display target */ + struct llvmpipe_screen *screen = llvmpipe_screen(resource->screen); + struct sw_winsys *winsys = screen->winsys; + unsigned dt_usage; + + if (tex_usage == LP_TEX_USAGE_READ) { + dt_usage = PIPE_TRANSFER_READ; + } + else { + dt_usage = PIPE_TRANSFER_READ_WRITE; + } + + assert(level == 0); + assert(layer == 0); + + /* FIXME: keep map count? */ + map = winsys->displaytarget_map(winsys, lpr->dt, dt_usage); + + /* install this linear image in texture data structure */ + lpr->tex_data = map; + + return map; + } + else if (llvmpipe_resource_is_texture(resource)) { + + map = llvmpipe_get_texture_image_address(lpr, layer, level); + return map; + } + else { + return lpr->data; + } +} + + +/** + * Unmap a resource. + */ +void +llvmpipe_resource_unmap(struct pipe_resource *resource, + unsigned level, + unsigned layer) +{ + struct llvmpipe_resource *lpr = llvmpipe_resource(resource); + + if (lpr->dt) { + /* display target */ + struct llvmpipe_screen *lp_screen = llvmpipe_screen(resource->screen); + struct sw_winsys *winsys = lp_screen->winsys; + + assert(level == 0); + assert(layer == 0); + + winsys->displaytarget_unmap(winsys, lpr->dt); + } +} + + +void * +llvmpipe_resource_data(struct pipe_resource *resource) +{ + struct llvmpipe_resource *lpr = llvmpipe_resource(resource); + + assert(!llvmpipe_resource_is_texture(resource)); + + return lpr->data; +} + + +static struct pipe_resource * +llvmpipe_resource_from_handle(struct pipe_screen *screen, + const struct pipe_resource *template, + struct winsys_handle *whandle) +{ + struct sw_winsys *winsys = llvmpipe_screen(screen)->winsys; + struct llvmpipe_resource *lpr; + + /* XXX Seems like from_handled depth textures doesn't work that well */ + + lpr = CALLOC_STRUCT(llvmpipe_resource); + if (!lpr) { + goto no_lpr; + } + + lpr->base = *template; + pipe_reference_init(&lpr->base.reference, 1); + lpr->base.screen = screen; + + /* + * Looks like unaligned displaytargets work just fine, + * at least sampler/render ones. + */ +#if 0 + assert(lpr->base.width0 == width); + assert(lpr->base.height0 == height); +#endif + + lpr->dt = winsys->displaytarget_from_handle(winsys, + template, + whandle, + &lpr->row_stride[0]); + if (!lpr->dt) { + goto no_dt; + } + + lpr->id = id_counter++; + +#ifdef DEBUG + insert_at_tail(&resource_list, lpr); +#endif + + return &lpr->base; + +no_dt: + FREE(lpr); +no_lpr: + return NULL; +} + + +static boolean +llvmpipe_resource_get_handle(struct pipe_screen *screen, + struct pipe_resource *pt, + struct winsys_handle *whandle) +{ + struct sw_winsys *winsys = llvmpipe_screen(screen)->winsys; + struct llvmpipe_resource *lpr = llvmpipe_resource(pt); + + assert(lpr->dt); + if (!lpr->dt) + return FALSE; + + return winsys->displaytarget_get_handle(winsys, lpr->dt, whandle); +} + + +static void * +llvmpipe_transfer_map( struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **transfer ) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen); + struct llvmpipe_resource *lpr = llvmpipe_resource(resource); + struct llvmpipe_transfer *lpt; + struct pipe_transfer *pt; + ubyte *map; + enum pipe_format format; + enum lp_texture_usage tex_usage; + const char *mode; + + assert(resource); + assert(level <= resource->last_level); + + /* + * Transfers, like other pipe operations, must happen in order, so flush the + * context if necessary. + */ + if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { + boolean read_only = !(usage & PIPE_TRANSFER_WRITE); + boolean do_not_block = !!(usage & PIPE_TRANSFER_DONTBLOCK); + if (!llvmpipe_flush_resource(pipe, resource, + level, + read_only, + TRUE, /* cpu_access */ + do_not_block, + __FUNCTION__)) { + /* + * It would have blocked, but state tracker requested no to. + */ + assert(do_not_block); + return NULL; + } + } + + /* Check if we're mapping the current constant buffer */ + if ((usage & PIPE_TRANSFER_WRITE) && + (resource->bind & PIPE_BIND_CONSTANT_BUFFER)) { + unsigned i; + for (i = 0; i < Elements(llvmpipe->constants[PIPE_SHADER_FRAGMENT]); ++i) { + if (resource == llvmpipe->constants[PIPE_SHADER_FRAGMENT][i].buffer) { + /* constants may have changed */ + llvmpipe->dirty |= LP_NEW_CONSTANTS; + break; + } + } + } + + lpt = CALLOC_STRUCT(llvmpipe_transfer); + if (!lpt) + return NULL; + pt = &lpt->base; + pipe_resource_reference(&pt->resource, resource); + pt->box = *box; + pt->level = level; + pt->stride = lpr->row_stride[level]; + pt->layer_stride = lpr->img_stride[level]; + pt->usage = usage; + *transfer = pt; + + assert(level < LP_MAX_TEXTURE_LEVELS); + + /* + printf("tex_transfer_map(%d, %d %d x %d of %d x %d, usage %d )\n", + transfer->x, transfer->y, transfer->width, transfer->height, + transfer->texture->width0, + transfer->texture->height0, + transfer->usage); + */ + + if (usage == PIPE_TRANSFER_READ) { + tex_usage = LP_TEX_USAGE_READ; + mode = "read"; + } + else { + tex_usage = LP_TEX_USAGE_READ_WRITE; + mode = "read/write"; + } + + if (0) { + printf("transfer map tex %u mode %s\n", lpr->id, mode); + } + + format = lpr->base.format; + + map = llvmpipe_resource_map(resource, + level, + box->z, + tex_usage); + + + /* May want to do different things here depending on read/write nature + * of the map: + */ + if (usage & PIPE_TRANSFER_WRITE) { + /* Do something to notify sharing contexts of a texture change. + */ + screen->timestamp++; + } + + map += + box->y / util_format_get_blockheight(format) * pt->stride + + box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); + + return map; +} + + +static void +llvmpipe_transfer_unmap(struct pipe_context *pipe, + struct pipe_transfer *transfer) +{ + assert(transfer->resource); + + llvmpipe_resource_unmap(transfer->resource, + transfer->level, + transfer->box.z); + + /* Effectively do the texture_update work here - if texture images + * needed post-processing to put them into hardware layout, this is + * where it would happen. For llvmpipe, nothing to do. + */ + assert (transfer->resource); + pipe_resource_reference(&transfer->resource, NULL); + FREE(transfer); +} + +unsigned int +llvmpipe_is_resource_referenced( struct pipe_context *pipe, + struct pipe_resource *presource, + unsigned level) +{ + struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); + + /* + * XXX checking only resources with the right bind flags + * is unsafe since with opengl state tracker we can end up + * with resources bound to places they weren't supposed to be + * (buffers bound as sampler views is one possibility here). + */ + if (!(presource->bind & (PIPE_BIND_DEPTH_STENCIL | + PIPE_BIND_RENDER_TARGET | + PIPE_BIND_SAMPLER_VIEW))) + return LP_UNREFERENCED; + + return lp_setup_is_resource_referenced(llvmpipe->setup, presource); +} + + +/** + * Returns the largest possible alignment for a format in llvmpipe + */ +unsigned +llvmpipe_get_format_alignment( enum pipe_format format ) +{ + const struct util_format_description *desc = util_format_description(format); + unsigned size = 0; + unsigned bytes; + unsigned i; + + for (i = 0; i < desc->nr_channels; ++i) { + size += desc->channel[i].size; + } + + bytes = size / 8; + + if (!util_is_power_of_two(bytes)) { + bytes /= desc->nr_channels; + } + + if (bytes % 2 || bytes < 1) { + return 1; + } else { + return bytes; + } +} + + +/** + * Create buffer which wraps user-space data. + */ +struct pipe_resource * +llvmpipe_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes, + unsigned bind_flags) +{ + struct llvmpipe_resource *buffer; + + buffer = CALLOC_STRUCT(llvmpipe_resource); + if(!buffer) + return NULL; + + pipe_reference_init(&buffer->base.reference, 1); + buffer->base.screen = screen; + buffer->base.format = PIPE_FORMAT_R8_UNORM; /* ?? */ + buffer->base.bind = bind_flags; + buffer->base.usage = PIPE_USAGE_IMMUTABLE; + buffer->base.flags = 0; + buffer->base.width0 = bytes; + buffer->base.height0 = 1; + buffer->base.depth0 = 1; + buffer->base.array_size = 1; + buffer->userBuffer = TRUE; + buffer->data = ptr; + + return &buffer->base; +} + + +/** + * Compute size (in bytes) need to store a texture image / mipmap level, + * for just one cube face, one array layer or one 3D texture slice + */ +static unsigned +tex_image_face_size(const struct llvmpipe_resource *lpr, unsigned level) +{ + return lpr->img_stride[level]; +} + + +/** + * Return pointer to a 2D texture image/face/slice. + * No tiled/linear conversion is done. + */ +ubyte * +llvmpipe_get_texture_image_address(struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level) +{ + unsigned offset; + + assert(llvmpipe_resource_is_texture(&lpr->base)); + + offset = lpr->mip_offsets[level]; + + if (face_slice > 0) + offset += face_slice * tex_image_face_size(lpr, level); + + return (ubyte *) lpr->tex_data + offset; +} + + +/** + * Return size of resource in bytes + */ +unsigned +llvmpipe_resource_size(const struct pipe_resource *resource) +{ + const struct llvmpipe_resource *lpr = llvmpipe_resource_const(resource); + unsigned size = 0; + + if (llvmpipe_resource_is_texture(resource)) { + /* Note this will always return 0 for displaytarget resources */ + size = lpr->total_alloc_size; + } + else { + size = resource->width0; + } + return size; +} + + +#ifdef DEBUG +void +llvmpipe_print_resources(void) +{ + struct llvmpipe_resource *lpr; + unsigned n = 0, total = 0; + + debug_printf("LLVMPIPE: current resources:\n"); + foreach(lpr, &resource_list) { + unsigned size = llvmpipe_resource_size(&lpr->base); + debug_printf("resource %u at %p, size %ux%ux%u: %u bytes, refcount %u\n", + lpr->id, (void *) lpr, + lpr->base.width0, lpr->base.height0, lpr->base.depth0, + size, lpr->base.reference.count); + total += size; + n++; + } + debug_printf("LLVMPIPE: total size of %u resources: %u\n", n, total); +} +#endif + + +void +llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen) +{ +#ifdef DEBUG + /* init linked list for tracking resources */ + { + static boolean first_call = TRUE; + if (first_call) { + memset(&resource_list, 0, sizeof(resource_list)); + make_empty_list(&resource_list); + first_call = FALSE; + } + } +#endif + + screen->resource_create = llvmpipe_resource_create; + screen->resource_destroy = llvmpipe_resource_destroy; + screen->resource_from_handle = llvmpipe_resource_from_handle; + screen->resource_get_handle = llvmpipe_resource_get_handle; + screen->can_create_resource = llvmpipe_can_create_resource; +} + + +void +llvmpipe_init_context_resource_funcs(struct pipe_context *pipe) +{ + pipe->transfer_map = llvmpipe_transfer_map; + pipe->transfer_unmap = llvmpipe_transfer_unmap; + + pipe->transfer_flush_region = u_default_transfer_flush_region; + pipe->transfer_inline_write = u_default_transfer_inline_write; +} diff --git a/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.h b/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.h new file mode 100644 index 000000000..3d315bb9a --- /dev/null +++ b/lib/mesa/src/gallium/drivers/llvmpipe/lp_texture.h @@ -0,0 +1,239 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef LP_TEXTURE_H +#define LP_TEXTURE_H + + +#include "pipe/p_state.h" +#include "util/u_debug.h" +#include "lp_limits.h" + + +enum lp_texture_usage +{ + LP_TEX_USAGE_READ = 100, + LP_TEX_USAGE_READ_WRITE, + LP_TEX_USAGE_WRITE_ALL +}; + + +struct pipe_context; +struct pipe_screen; +struct llvmpipe_context; + +struct sw_displaytarget; + + +/** + * llvmpipe subclass of pipe_resource. A texture, drawing surface, + * vertex buffer, const buffer, etc. + * Textures are stored differently than other types of objects such as + * vertex buffers and const buffers. + * The latter are simple malloc'd blocks of memory. + */ +struct llvmpipe_resource +{ + struct pipe_resource base; + + /** Row stride in bytes */ + unsigned row_stride[LP_MAX_TEXTURE_LEVELS]; + /** Image stride (for cube maps, array or 3D textures) in bytes */ + unsigned img_stride[LP_MAX_TEXTURE_LEVELS]; + /** Offset to start of mipmap level, in bytes */ + unsigned mip_offsets[LP_MAX_TEXTURE_LEVELS]; + /** allocated total size (for non-display target texture resources only) */ + unsigned total_alloc_size; + + /** + * Display target, for textures with the PIPE_BIND_DISPLAY_TARGET + * usage. + */ + struct sw_displaytarget *dt; + + /** + * Malloc'ed data for regular textures, or a mapping to dt above. + */ + void *tex_data; + + /** + * Data for non-texture resources. + */ + void *data; + + boolean userBuffer; /** Is this a user-space buffer? */ + unsigned timestamp; + + unsigned id; /**< temporary, for debugging */ + +#ifdef DEBUG + /** for linked list */ + struct llvmpipe_resource *prev, *next; +#endif +}; + + +struct llvmpipe_transfer +{ + struct pipe_transfer base; + + unsigned long offset; +}; + + +/** cast wrappers */ +static inline struct llvmpipe_resource * +llvmpipe_resource(struct pipe_resource *pt) +{ + return (struct llvmpipe_resource *) pt; +} + + +static inline const struct llvmpipe_resource * +llvmpipe_resource_const(const struct pipe_resource *pt) +{ + return (const struct llvmpipe_resource *) pt; +} + + +static inline struct llvmpipe_transfer * +llvmpipe_transfer(struct pipe_transfer *pt) +{ + return (struct llvmpipe_transfer *) pt; +} + + +void llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen); +void llvmpipe_init_context_resource_funcs(struct pipe_context *pipe); + + +static inline boolean +llvmpipe_resource_is_texture(const struct pipe_resource *resource) +{ + switch (resource->target) { + case PIPE_BUFFER: + return FALSE; + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_3D: + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + return TRUE; + default: + assert(0); + return FALSE; + } +} + + +static inline boolean +llvmpipe_resource_is_1d(const struct pipe_resource *resource) +{ + switch (resource->target) { + case PIPE_BUFFER: + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + return TRUE; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_3D: + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + return FALSE; + default: + assert(0); + return FALSE; + } +} + + +static inline unsigned +llvmpipe_layer_stride(struct pipe_resource *resource, + unsigned level) +{ + struct llvmpipe_resource *lpr = llvmpipe_resource(resource); + assert(level < LP_MAX_TEXTURE_2D_LEVELS); + return lpr->img_stride[level]; +} + + +static inline unsigned +llvmpipe_resource_stride(struct pipe_resource *resource, + unsigned level) +{ + struct llvmpipe_resource *lpr = llvmpipe_resource(resource); + assert(level < LP_MAX_TEXTURE_2D_LEVELS); + return lpr->row_stride[level]; +} + + +void * +llvmpipe_resource_map(struct pipe_resource *resource, + unsigned level, + unsigned layer, + enum lp_texture_usage tex_usage); + +void +llvmpipe_resource_unmap(struct pipe_resource *resource, + unsigned level, + unsigned layer); + + +void * +llvmpipe_resource_data(struct pipe_resource *resource); + + +unsigned +llvmpipe_resource_size(const struct pipe_resource *resource); + + +ubyte * +llvmpipe_get_texture_image_address(struct llvmpipe_resource *lpr, + unsigned face_slice, unsigned level); + + +extern void +llvmpipe_print_resources(void); + + +#define LP_UNREFERENCED 0 +#define LP_REFERENCED_FOR_READ (1 << 0) +#define LP_REFERENCED_FOR_WRITE (1 << 1) + +unsigned int +llvmpipe_is_resource_referenced( struct pipe_context *pipe, + struct pipe_resource *presource, + unsigned level); + +unsigned +llvmpipe_get_format_alignment(enum pipe_format format); + +#endif /* LP_TEXTURE_H */ |